I've made the modification to FieldCacheImpl to use the actual stored value in case the field is tokenized and stored. For keyword fields the behavior stayed the same.
Index: FieldCacheImpl.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/FieldCacheI mpl.java,v retrieving revision 1.3 diff -u -r1.3 FieldCacheImpl.java --- FieldCacheImpl.java 21 Jul 2004 19:05:46 -0000 1.3 +++ FieldCacheImpl.java 28 Jul 2004 17:45:41 -0000 @@ -25,6 +25,8 @@ import java.util.Map; import java.util.WeakHashMap; import java.util.HashMap; +import org.apache.lucene.document.Field; +import java.util.Arrays; /** * Expert: The default cache implementation, storing all values in memory. @@ -80,6 +82,29 @@ } } + class FieldEntry implements Comparable { + String val; + int ind; + FieldEntry(int ind, String val) + { + this.ind = ind; + this.val = val; + } + public String getVal() + { + return val; + } + public int getInd() + { + return ind; + } + public int compareTo(Object obj) + { + return val.compareToIgnoreCase(((FieldEntry)obj).getVal()); + } +} + + /** The internal cache. Maps Entry to array of interpreted term values. **/ final Map cache = new WeakHashMap(); @@ -240,54 +265,92 @@ if (ret == null) { final int[] retArray = new int[reader.maxDoc()]; String[] mterms = new String[reader.maxDoc()+1]; - if (retArray.length > 0) { - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms (new Term (field, "")); - int t = 0; // current term number - - // an entry for documents that have no terms in this field - // should a document with no terms be at top or bottom? - // this puts them at the top - if it is changed, FieldDocSortedHitQueue - // needs to change as well. - mterms[t++] = null; - try { - if (termEnum.term() == null) { - throw new RuntimeException ("no terms in field " + field); - } - do { - Term term = termEnum.term(); - if (term.field() != field) break; - - // store term text - // we expect that there is at most one term per document - if (t >= mterms.length) throw new RuntimeException ("there are more terms than documents in field \"" + field + "\""); - mterms[t] = term.text(); - - termDocs.seek (termEnum); - while (termDocs.next()) { - retArray[termDocs.doc()] = t; - } - - t++; - } while (termEnum.next()); - } finally { - termDocs.close(); - termEnum.close(); + Field docField = reader.document(0).getField(field); + if (docField.isStored() && docField.isTokenized()) { + // Fill entries + FieldEntry[] entries = new FieldEntry[reader.maxDoc()]; + for (int i=0; i<reader.maxDoc(); i++) { + String fieldValue; + if (!reader.isDeleted(i)) + fieldValue = reader.document(i).get(field); + else + fieldValue = ""; + entries[i] = new FieldEntry (i,fieldValue); } - if (t == 0) { - // if there are no terms, make the term array - // have a single null entry - mterms = new String[1]; - } else if (t < mterms.length) { - // if there are less terms than documents, - // trim off the dead array space - String[] terms = new String[t]; - System.arraycopy (mterms, 0, terms, 0, t); - mterms = terms; + Arrays.sort(entries); + for (int i=0;i<reader.maxDoc();i++) + { + int ind = entries[i].getInd(); + retArray[ind] = i; + mterms[ind]=entries[i].getVal(); } } + else + { + if (retArray.length > 0) + { + TermDocs termDocs = reader.termDocs(); + TermEnum termEnum = reader.terms(new Term(field, "")); + int t = 0; // current term number + + // an entry for documents that have no terms in this field + // should a document with no terms be at top or bottom? + // this puts them at the top - if it is changed, FieldDocSortedHitQueue + // needs to change as well. + mterms[t++] = null; + + try + { + if (termEnum.term() == null) + { + throw new RuntimeException("no terms in field " + field); + } + do + { + Term term = termEnum.term(); + if (term.field() != field) + break; + + // store term text + // we expect that there is at most one term per document + if (t >= mterms.length) + throw new RuntimeException("there are more terms than documents in field \"" + field + + "\""); + mterms[t] = term.text(); + termDocs.seek(termEnum); + while (termDocs.next()) + { + retArray[termDocs.doc()] = t; + } + + t++; + } + while (termEnum.next()); + } + finally + { + termDocs.close(); + termEnum.close(); + } + + if (t == 0) + { + // if there are no terms, make the term array + // have a single null entry + mterms = new String[1]; + } + else if (t < mterms.length) + { + // if there are less terms than documents, + // trim off the dead array space + String[] terms = new String[t]; + System.arraycopy(mterms, 0, terms, 0, t); + mterms = terms; + } + } + } StringIndex value = new StringIndex (retArray, mterms); store (reader, field, STRING_INDEX, value); return value; @@ -309,7 +372,7 @@ // inherit javadocs public Object getAuto (IndexReader reader, String field) throws IOException { - field = field.intern(); + field = field.intern(); Object ret = lookup (reader, field, SortField.AUTO); if (ret == null) { TermEnum enumerator = reader.terms (new Term (field, "")); -----Original Message----- From: Doug Cutting [mailto:[EMAIL PROTECTED] Sent: Monday, July 26, 2004 14:10 PM To: Lucene Developers List Subject: Re: Problem with Sort logic ? Aviran wrote: > Do you think that another FieldCache implementation will be > beneficiary for those who want to sort on any field other than > keyword. I bet that a lot of developers will want to have the ability > to sort on ANY field, without having to duplicate all the fields as > keywords. Yes, it would probably be useful to have the option to cache values of unindexed fields, or stored and tokenized fields. Doug --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED] --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]