stefanvodita commented on code in PR #12966: URL: https://github.com/apache/lucene/pull/12966#discussion_r1518798043
########## lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java: ########## @@ -76,6 +111,78 @@ public int compare(FacetResult a, FacetResult b) { this.config = config; this.fc = fc; parents = taxoReader.getParallelTaxonomyArrays().parents(); + valueComparator = Comparator.comparingInt((x) -> (int) x); + } + + /** Return true if a sparse hash table should be used for counting, instead of a dense int[]. */ + private boolean useHashTable(FacetsCollector fc, TaxonomyReader taxoReader) { + if (taxoReader.getSize() < 1024) { + // small number of unique values: use an array + return false; + } + + if (fc == null) { + // counting all docs: use an array + return false; + } + + int maxDoc = 0; + int sumTotalHits = 0; + for (FacetsCollector.MatchingDocs docs : fc.getMatchingDocs()) { + sumTotalHits += docs.totalHits; + maxDoc += docs.context.reader().maxDoc(); + } + + // if our result set is < 10% of the index, we collect sparsely (use hash map): + return sumTotalHits < maxDoc / 10; + } + + protected void initializeValueCounters() { + if (initialized) { + return; + } + initialized = true; + assert sparseCounts == null && counts == null; + if (useHashTable(fc, taxoReader)) { + sparseCounts = new IntIntHashMap(); + } else { + counts = new int[taxoReader.getSize()]; + } + } + + /** Set the count for this ordinal to {@code newValue}. */ + protected void setCount(int ordinal, int newValue) { + if (sparseCounts != null) { + sparseCounts.put(ordinal, newValue); + } else { + counts[ordinal] = newValue; + } + } + + /** Get the count for this ordinal. */ + protected int getCount(int ordinal) { + if (sparseCounts != null) { + return sparseCounts.get(ordinal); + } else { + return counts[ordinal]; + } + } + + /** Get the aggregation value for this ordinal. */ + protected Number getAggregationValue(int ordinal) { + // By default, this is just the count. Review Comment: It's a good point, but I think it's better for the default behaviour to be getting counts. We need the `getAggregationValue` level of abstraction to be able to call `getValue` with different signatures for `IntTaxonomyFacets` and `FloatTaxonomyFacets`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org