stefanvodita commented on code in PR #12966: URL: https://github.com/apache/lucene/pull/12966#discussion_r1518965858
########## lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java: ########## @@ -142,6 +249,301 @@ DimConfig verifyDim(String dim) { return dimConfig; } + /** + * Roll-up the aggregation values from {@code childOrdinal} to {@code ordinal}. Overrides should + * probably call this to update the counts. Overriding allows us to work with primitive types for + * the aggregation values, keeping aggregation efficient. + */ + protected void updateValueFromRollup(int ordinal, int childOrdinal) throws IOException { + setCount(ordinal, getCount(ordinal) + rollup(childOrdinal)); + } + + /** + * Return a {@link TopOrdAndNumberQueue} of the appropriate type, i.e. a {@link TopOrdAndIntQueue} + * or a {@link org.apache.lucene.facet.TopOrdAndFloatQueue}. + */ + protected TopOrdAndNumberQueue makeTopOrdAndNumberQueue(int topN) { + return new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN)); + } + + // TODO: We don't need this if we're okay with having an integer -1 in the results even for float + // aggregations. + /** Return the value for a missing aggregation, i.e. {@code -1} or {@code -1f}. */ + protected Number missingAggregationValue() { + return -1; + } + + /** Rolls up any single-valued hierarchical dimensions. */ + void rollup() throws IOException { + if (initialized == false) { + return; + } + + // Rollup any necessary dims: + int[] children = null; + for (Map.Entry<String, FacetsConfig.DimConfig> ent : config.getDimConfigs().entrySet()) { + String dim = ent.getKey(); + FacetsConfig.DimConfig ft = ent.getValue(); + if (ft.hierarchical && ft.multiValued == false) { + int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim)); + // It can be -1 if this field was declared in the + // config but never indexed: + if (dimRootOrd > 0) { + if (children == null) { + // lazy init + children = getChildren(); + } + updateValueFromRollup(dimRootOrd, children[dimRootOrd]); + } + } + } + } + + private int rollup(int ord) throws IOException { + int[] children = getChildren(); + int[] siblings = getSiblings(); + int aggregatedValue = 0; + while (ord != TaxonomyReader.INVALID_ORDINAL) { + int currentValue = getCount(ord); + int newValue = currentValue + rollup(children[ord]); + setCount(ord, newValue); + aggregatedValue += getCount(ord); + ord = siblings[ord]; + } + return aggregatedValue; + } + + /** + * Create a FacetResult for the provided dim + path and intermediate results. Does the extra work + * of resolving ordinals -> labels, etc. Will return null if there are no children. + */ + private FacetResult createFacetResult( + TopChildrenForPath topChildrenForPath, String dim, String... path) throws IOException { + // If the intermediate result is null or there are no children, we return null: + if (topChildrenForPath == null || topChildrenForPath.childCount == 0) { + return null; + } + + TopOrdAndNumberQueue q = topChildrenForPath.childQueue; + assert q != null; + + LabelAndValue[] labelValues = new LabelAndValue[q.size()]; + int[] ordinals = new int[labelValues.length]; + Number[] values = new Number[labelValues.length]; + + for (int i = labelValues.length - 1; i >= 0; i--) { + TopOrdAndNumberQueue.OrdAndValue ordAndValue = q.pop(); + assert ordAndValue != null; + ordinals[i] = ordAndValue.ord; + values[i] = ordAndValue.value; + } + + FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals); + // The path component we're interested in is the one immediately after the provided path. We + // add 1 here to also account for the dim: + int childComponentIdx = path.length + 1; + for (int i = 0; i < labelValues.length; i++) { + labelValues[i] = new LabelAndValue(bulkPath[i].components[childComponentIdx], values[i]); + } + + return new FacetResult( + dim, path, topChildrenForPath.pathValue, labelValues, topChildrenForPath.childCount); + } + + @Override + public FacetResult getAllChildren(String dim, String... path) throws IOException { + DimConfig dimConfig = verifyDim(dim); + FacetLabel cp = new FacetLabel(dim, path); + int dimOrd = taxoReader.getOrdinal(cp); + if (dimOrd == -1) { + return null; + } + + if (initialized == false) { + return null; + } + + Number aggregatedValue = 0; + int aggregatedCount = 0; + + IntArrayList ordinals = new IntArrayList(); + List<Number> ordValues = new ArrayList<>(); + + if (sparseCounts != null) { + for (IntIntCursor ordAndCount : sparseCounts) { + int ord = ordAndCount.key; + int count = ordAndCount.value; + Number value = getAggregationValue(ord); + if (parents[ord] == dimOrd && count > 0) { + aggregatedCount += count; + aggregatedValue = aggregate(aggregatedValue, value); + ordinals.add(ord); + ordValues.add(value); + } + } + } else { + int[] children = getChildren(); + int[] siblings = getSiblings(); + int ord = children[dimOrd]; + while (ord != TaxonomyReader.INVALID_ORDINAL) { + int count = counts[ord]; + Number value = getAggregationValue(ord); + if (count > 0) { + aggregatedCount += count; + aggregatedValue = aggregate(aggregatedValue, value); + ordinals.add(ord); + ordValues.add(value); + } + ord = siblings[ord]; + } + } + + if (aggregatedCount == 0) { + return null; + } + + if (dimConfig.multiValued) { + if (dimConfig.requireDimCount) { + aggregatedValue = getAggregationValue(dimOrd); + } else { + // Our aggregated value is not correct, in general: + aggregatedValue = missingAggregationValue(); + } + } else { + // Our aggregateddim value is accurate, so we keep it + } + + // TODO: It would be nice if TaxonomyReader let us pass in a buffer + size so we didn't have to + // do an array copy here: + FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals.toArray()); + + LabelAndValue[] labelValues = new LabelAndValue[ordValues.size()]; + for (int i = 0; i < ordValues.size(); i++) { + labelValues[i] = new LabelAndValue(bulkPath[i].components[cp.length], ordValues.get(i)); + } + return new FacetResult(dim, path, aggregatedValue, labelValues, ordinals.size()); + } + + private TopOrdAndNumberQueue.OrdAndValue insertIntoQueue( Review Comment: Good point. Added to #13175, where we can target improvements related to the way we access these queues. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org