[ https://issues.apache.org/jira/browse/LUCENE-2649?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12911157#action_12911157 ]
Ryan McKinley commented on LUCENE-2649: --------------------------------------- Here is the code for ByteValues that: # optionally stores the BitSet via static config # does not cache a real BitSet unless only some docs match # calculates numDocs/numTerms {code:java} @Override protected ByteValues createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = entryKey; String field = entry.field; ByteParser parser = (ByteParser) entry.custom; if (parser == null) { return wrapper.getByteValues(reader, field, FieldCache.DEFAULT_BYTE_PARSER); } int numDocs = 0; int numTerms = 0; int maxDoc = reader.maxDoc(); final byte[] retArray = new byte[maxDoc]; Bits valid = null; Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); final Bits delDocs = MultiFields.getDeletedDocs(reader); final OpenBitSet validBits = new OpenBitSet( maxDoc ); DocsEnum docs = null; try { while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final byte termval = parser.parseByte(term); docs = termsEnum.docs(delDocs, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { break; } retArray[docID] = termval; validBits.set( docID ); numDocs++; } numTerms++; } } catch (StopFillCacheException stop) {} // If all non-deleted docs are valid we don't need the bitset in memory if( numDocs > 0 && CachedArray.CACHE_VALID_ARRAY_BITS ) { boolean matchesAllDocs = true; for( int i=0; i<maxDoc; i++ ) { if( !delDocs.get(i) && !validBits.get(i) ) { matchesAllDocs = false; break; } } if( matchesAllDocs ) { valid = new Bits.MatchAllBits( maxDoc ); } else { valid = validBits; } } } if( numDocs < 1 ) { valid = new Bits.MatchNoBits( maxDoc ); } return new ByteValues( retArray, valid, numDocs, numTerms ); } {code} > FieldCache should include a BitSet for matching docs > ---------------------------------------------------- > > Key: LUCENE-2649 > URL: https://issues.apache.org/jira/browse/LUCENE-2649 > Project: Lucene - Java > Issue Type: Improvement > Reporter: Ryan McKinley > Fix For: 4.0 > > Attachments: LUCENE-2649-FieldCacheWithBitSet.patch, > LUCENE-2649-FieldCacheWithBitSet.patch, LUCENE-2649-FieldCacheWithBitSet.patch > > > The FieldCache returns an array representing the values for each doc. > However there is no way to know if the doc actually has a value. > This should be changed to return an object representing the values *and* a > BitSet for all valid docs. -- This message is automatically generated by JIRA. - You can reply to this email to add a comment to the issue online. --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org