[
https://issues.apache.org/jira/browse/LUCENE-2649?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12911157#action_12911157
]
Ryan McKinley commented on LUCENE-2649:
---------------------------------------
Here is the code for ByteValues that:
# optionally stores the BitSet via static config
# does not cache a real BitSet unless only some docs match
# calculates numDocs/numTerms
{code:java}
@Override
protected ByteValues createValue(IndexReader reader, Entry entryKey) throws
IOException {
Entry entry = entryKey;
String field = entry.field;
ByteParser parser = (ByteParser) entry.custom;
if (parser == null) {
return wrapper.getByteValues(reader, field,
FieldCache.DEFAULT_BYTE_PARSER);
}
int numDocs = 0;
int numTerms = 0;
int maxDoc = reader.maxDoc();
final byte[] retArray = new byte[maxDoc];
Bits valid = null;
Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator();
final Bits delDocs = MultiFields.getDeletedDocs(reader);
final OpenBitSet validBits = new OpenBitSet( maxDoc );
DocsEnum docs = null;
try {
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final byte termval = parser.parseByte(term);
docs = termsEnum.docs(delDocs, docs);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocsEnum.NO_MORE_DOCS) {
break;
}
retArray[docID] = termval;
validBits.set( docID );
numDocs++;
}
numTerms++;
}
} catch (StopFillCacheException stop) {}
// If all non-deleted docs are valid we don't need the bitset in memory
if( numDocs > 0 && CachedArray.CACHE_VALID_ARRAY_BITS ) {
boolean matchesAllDocs = true;
for( int i=0; i<maxDoc; i++ ) {
if( !delDocs.get(i) && !validBits.get(i) ) {
matchesAllDocs = false;
break;
}
}
if( matchesAllDocs ) {
valid = new Bits.MatchAllBits( maxDoc );
}
else {
valid = validBits;
}
}
}
if( numDocs < 1 ) {
valid = new Bits.MatchNoBits( maxDoc );
}
return new ByteValues( retArray, valid, numDocs, numTerms );
}
{code}
> FieldCache should include a BitSet for matching docs
> ----------------------------------------------------
>
> Key: LUCENE-2649
> URL: https://issues.apache.org/jira/browse/LUCENE-2649
> Project: Lucene - Java
> Issue Type: Improvement
> Reporter: Ryan McKinley
> Fix For: 4.0
>
> Attachments: LUCENE-2649-FieldCacheWithBitSet.patch,
> LUCENE-2649-FieldCacheWithBitSet.patch, LUCENE-2649-FieldCacheWithBitSet.patch
>
>
> The FieldCache returns an array representing the values for each doc.
> However there is no way to know if the doc actually has a value.
> This should be changed to return an object representing the values *and* a
> BitSet for all valid docs.
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]