Hi, I would like to share with you some improvements in Document class.
I changed from ArrayList to Dictionary<string, Fieldable>. It avoid box/unbox, and it is better to iterate by field name. I tested and the functionality is the same as before, but with more performance. I appreciate some feedback. I do not know if this list accept attached file, so I send attached and in mail body (below). Regards, Leo ######### Document.cs ################### /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; // for javadoc using IndexReader = Lucene.Net.Index.IndexReader; using ScoreDoc = Lucene.Net.Search.ScoreDoc; using Searcher = Lucene.Net.Search.Searcher; namespace Lucene.Net.Documents { /// <summary>Documents are the unit of indexing and search. /// /// A Document is a set of fields. Each field has a name and a textual value. /// A field may be {...@link <%...@link> Fieldable#IsStored() stored} with the document, in which /// case it is returned with search hits on the document. Thus each document /// should typically contain one or more stored fields which uniquely identify /// it. /// /// <p/>Note that fields which are <i>not</i> {...@link <%...@link>Fieldable#IsStored() stored} are /// <i>not</i> available in documents retrieved from the index, e.g. with {...@link <%...@link> /// ScoreDoc#doc}, {...@link <%...@link> Searcher#Doc(int)} or {...@link<%...@link> /// IndexReader#Document(int)}. /// </summary> [Serializable] public sealed class Document { private class AnonymousClassEnumeration : System.Collections.IEnumerator { public AnonymousClassEnumeration(Document enclosingInstance) { InitBlock(enclosingInstance); } private void InitBlock(Document enclosingInstance) { this.enclosingInstance = enclosingInstance; iter = Enclosing_Instance.fields.GetEnumerator(); } private System.Object tempAuxObj; public bool MoveNext() { bool result = HasMoreElements(); if (result) { tempAuxObj = NextElement(); } return result; } public void Reset() { tempAuxObj = null; } public System.Object Current { get { return tempAuxObj; } } private Document enclosingInstance; public Document Enclosing_Instance { get { return enclosingInstance; } } internal System.Collections.IEnumerator iter; public bool HasMoreElements() { return iter.MoveNext(); } public System.Object NextElement() { return iter.Current; } } internal class FieldControl : System.Collections.Generic.Dictionary<string, System.Collections.Generic.List<Fieldable>> { public void Add(Fieldable item) { string key = item.Name(); if (!base.ContainsKey(key)) { base.Add(key, new System.Collections.Generic.List<Fieldable>()); } base[key].Add(item); } public void RemoveOne(string name) { string key = name; if (base.ContainsKey(key)) { base[key].RemoveAt(0); } } public void RemoveAll(string name) { string key = name; if (base.ContainsKey(key)) { base.Remove(key); } } public System.Collections.Generic.List<Fieldable> GetList(string name) { string key = name; if (!base.ContainsKey(key)) { return new System.Collections.Generic.List<Fieldable>(); } return base[key]; } public Fieldable GetFirst(string name) { string key = name; if (!base.ContainsKey(key)) { return null; } return base[key][0]; } public System.Collections.Generic.List<Fieldable> GetAllLists() { System.Collections.Generic.List<Fieldable> fieldables = new System.Collections.Generic.List<Fieldable>(); foreach (string key in base.Keys) { fieldables.AddRange(base[key]); } return fieldables; } public System.Collections.Generic.List<T> GetListWithConstraint<T>(string name, Func<T> constraint) where T : class { System.Collections.Generic.List<Fieldable> fieldables = this.GetList(name); System.Collections.Generic.List<T> internalList = new System.Collections.Generic.List<T>(fieldables.Count); foreach (Fieldable item in fieldables) { T instance = constraint.Invoke(item); if (instance != null) { internalList.Add(instance); } } return internalList; } public T GetFirstWithConstraint<T>(string name, Func<T> constraint) where T : class { System.Collections.Generic.List<Fieldable> fieldables = this.GetList(name); System.Collections.Generic.List<T> internalList = new System.Collections.Generic.List<T>(fieldables.Count); foreach (Fieldable item in fieldables) { T instance = constraint.Invoke(item); if (instance != null) { return instance; } } return default(T); } public delegate T Func<T>(Fieldable item) where T : class; } internal FieldControl fields = new FieldControl(); private float boost = 1.0f; /// <summary>Constructs a new document with no fields. </summary> public Document() { } /// <summary>Sets a boost factor for hits on any field of this document. This value /// will be multiplied into the score of all hits on this document. /// /// <p/>The default value is 1.0. /// /// <p/>Values are multiplied into the value of {...@link <%...@link>Fieldable#GetBoost()} of /// each field in this document. Thus, this method in effect sets a default /// boost for the fields of this document. /// /// </summary> /// <seealso cref="Fieldable.SetBoost(float)"> /// </seealso> public void SetBoost(float boost) { this.boost = boost; } /// <summary>Returns, at indexing time, the boost factor as set by {...@link<%...@link>#SetBoost(float)}. /// /// <p/>Note that once a document is indexed this value is no longer available /// from the index. At search time, for retrieved documents, this method always /// returns 1. This however does not mean that the boost value set at indexing /// time was ignored - it was just combined with other indexing time factors and /// stored elsewhere, for better indexing and search performance. (For more /// information see the "norm(t,d)" part of the scoring formula in /// {...@link <%...@link> Lucene.Net.Search.Similarity Similarity}.) /// /// </summary> /// <seealso cref="SetBoost(float)"> /// </seealso> public float GetBoost() { return boost; } /// <summary> <p/>Adds a field to a document. Several fields may be added with /// the same name. In this case, if the fields are indexed, their text is /// treated as though appended for the purposes of search.<p/> /// <p/> Note that add like the removeField(s) methods only makes sense /// prior to adding a document to an index. These methods cannot /// be used to change the content of an existing index! In order to achieve this, /// a document has to be deleted from an index and a new changed version of that /// document has to be added.<p/> /// </summary> public void Add(Fieldable field) { fields.Add(field); } /// <summary> <p/>Removes field with the specified name from the document. /// If multiple fields exist with this name, this method removes the first field that has been added. /// If there is no field with the specified name, the document remains unchanged.<p/> /// <p/> Note that the removeField(s) methods like the add method only make sense /// prior to adding a document to an index. These methods cannot /// be used to change the content of an existing index! In order to achieve this, /// a document has to be deleted from an index and a new changed version of that /// document has to be added.<p/> /// </summary> public void RemoveField(System.String name) { fields.RemoveOne(name); } /// <summary> <p/>Removes all fields with the given name from the document. /// If there is no field with the specified name, the document remains unchanged.<p/> /// <p/> Note that the removeField(s) methods like the add method only make sense /// prior to adding a document to an index. These methods cannot /// be used to change the content of an existing index! In order to achieve this, /// a document has to be deleted from an index and a new changed version of that /// document has to be added.<p/> /// </summary> public void RemoveFields(System.String name) { fields.RemoveAll(name); } /// <summary>Returns a field with the given name if any exist in this document, or /// null. If multiple fields exists with this name, this method returns the /// first value added. /// Do not use this method with lazy loaded fields. /// </summary> public Field GetField(System.String name) { return fields.GetFirst(name) as Field; } /// <summary>Returns a field with the given name if any exist in this document, or /// null. If multiple fields exists with this name, this method returns the /// first value added. /// </summary> public Fieldable GetFieldable(System.String name) { return fields.GetFirst(name); } /// <summary>Returns the string value of the field with the given name if any exist in /// this document, or null. If multiple fields exist with this name, this /// method returns the first value added. If only binary fields with this name /// exist, returns null. /// </summary> public System.String Get(System.String name) { return fields.GetFirstWithConstraint<string>(name, delegate(Fieldable fieldable) { if (!fieldable.IsBinary()) return fieldable.StringValue(); return null; }); } /// <summary>Returns an Enumeration of all the fields in a document.</summary> /// <deprecated> use {...@link <%...@link> #GetFields()} instead /// </deprecated> [Obsolete("Use GetFields() instead")] public System.Collections.IEnumerator Fields() { return new AnonymousClassEnumeration(this); } /// <summary>Returns a List of all the fields in a document. /// <p/>Note that fields which are <i>not</i> {...@link <%...@link>Fieldable#IsStored() stored} are /// <i>not</i> available in documents retrieved from the /// index, e.g. {...@link <%...@link> Searcher#Doc(int)} or {...@link <%...@link> /// IndexReader#Document(int)}. /// </summary> public System.Collections.IList GetFields() { return fields.GetAllLists(); } /// <summary> Returns an array of {...@link <%...@link> Field}s with the given name. /// Do not use with lazy loaded fields. /// This method returns an empty array when there are no /// matching fields. It never returns null. /// /// </summary> /// <param name="name">the name of the field /// </param> /// <returns> a <code>Field[]</code> array /// </returns> public Field[] GetFields(System.String name) { System.Collections.Generic.List<Fieldable> fieldables = fields.GetList(name); System.Collections.Generic.List<Field> localFields = new System.Collections.Generic.List<Field>(fieldables.Count); foreach (Fieldable item in fieldables) { localFields.Add(item as Field); } return localFields.ToArray(); } /// <summary> Returns an array of {...@link <%...@link> Fieldable}s with the given name. /// This method returns an empty array when there are no /// matching fields. It never returns null. /// /// </summary> /// <param name="name">the name of the field /// </param> /// <returns> a <code>Fieldable[]</code> array /// </returns> public Fieldable[] GetFieldables(System.String name) { return fields.GetList(name).ToArray(); } /// <summary> Returns an array of values of the field specified as the method parameter. /// This method returns an empty array when there are no /// matching fields. It never returns null. /// </summary> /// <param name="name">the name of the field /// </param> /// <returns> a <code>String[]</code> of field values /// </returns> public System.String[] GetValues(System.String name) { return fields.GetListWithConstraint<string>(name, delegate(Fieldable fieldable) { if (!fieldable.IsBinary()) return fieldable.StringValue(); return null; }).ToArray(); } private static readonly byte[][] NO_BYTES = new byte[0][]; /// <summary> Returns an array of byte arrays for of the fields that have the name specified /// as the method parameter. This method returns an empty /// array when there are no matching fields. It never /// returns null. /// /// </summary> /// <param name="name">the name of the field /// </param> /// <returns> a <code>byte[][]</code> of binary field values /// </returns> public byte[][] GetBinaryValues(System.String name) { System.Collections.Generic.List<Fieldable> fieldables = fields.GetListWithConstraint<Fieldable>(name, delegate(Fieldable fieldable) { if (fieldable.IsBinary()) return fieldable; return null; }); System.Collections.IList result = new System.Collections.ArrayList(); for (int i = 0; i < fieldables.Count; i++) { Fieldable field = fieldables[i]; result.Add(field.BinaryValue()); } if (result.Count == 0) return NO_BYTES; System.Collections.ICollection c = result; object[] objects = new byte[result.Count][]; System.Type type = objects.GetType().GetElementType(); object[] objs = (object[])Array.CreateInstance(type, c.Count); System.Collections.IEnumerator e = c.GetEnumerator(); int ii = 0; while (e.MoveNext()) objs[ii++] = e.Current; // If objects is smaller than c then do not return the new array in the parameter if (objects.Length >= c.Count) objs.CopyTo(objects, 0); return (byte[][])objs; } /// <summary> Returns an array of bytes for the first (or only) field that has the name /// specified as the method parameter. This method will return <code>null</code> /// if no binary fields with the specified name are available. /// There may be non-binary fields with the same name. /// /// </summary> /// <param name="name">the name of the field. /// </param> /// <returns> a <code>byte[]</code> containing the binary field value or <code>null</code> /// </returns> public byte[] GetBinaryValue(System.String name) { return fields.GetFirstWithConstraint<byte[]>(name, delegate(Fieldable fieldable) { if (fieldable.IsBinary()) return fieldable.BinaryValue(); return null; }); } /// <summary>Prints the fields of a document for human consumption. </summary> public override System.String ToString() { System.Collections.Generic.List<Fieldable> fieldables = fields.GetAllLists(); System.Text.StringBuilder buffer = new System.Text.StringBuilder(); buffer.Append("Document<"); for (int i = 0; i < fields.Count; i++) { Fieldable field = fieldables[i]; buffer.Append(field.ToString()); if (i != fieldables.Count - 1) buffer.Append(" "); } buffer.Append(">"); return buffer.ToString(); } public System.Collections.IList fields_ForNUnit { get { return fields.GetAllLists(); } } } }