stefanvodita commented on code in PR #12966:
URL: https://github.com/apache/lucene/pull/12966#discussion_r1518965319


##########
lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java:
##########
@@ -142,6 +249,301 @@ DimConfig verifyDim(String dim) {
     return dimConfig;
   }
 
+  /**
+   * Roll-up the aggregation values from {@code childOrdinal} to {@code 
ordinal}. Overrides should
+   * probably call this to update the counts. Overriding allows us to work 
with primitive types for
+   * the aggregation values, keeping aggregation efficient.
+   */
+  protected void updateValueFromRollup(int ordinal, int childOrdinal) throws 
IOException {
+    setCount(ordinal, getCount(ordinal) + rollup(childOrdinal));
+  }
+
+  /**
+   * Return a {@link TopOrdAndNumberQueue} of the appropriate type, i.e. a 
{@link TopOrdAndIntQueue}
+   * or a {@link org.apache.lucene.facet.TopOrdAndFloatQueue}.
+   */
+  protected TopOrdAndNumberQueue makeTopOrdAndNumberQueue(int topN) {
+    return new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
+  }
+
+  // TODO: We don't need this if we're okay with having an integer -1 in the 
results even for float
+  // aggregations.
+  /** Return the value for a missing aggregation, i.e. {@code -1} or {@code 
-1f}. */
+  protected Number missingAggregationValue() {
+    return -1;
+  }
+
+  /** Rolls up any single-valued hierarchical dimensions. */
+  void rollup() throws IOException {
+    if (initialized == false) {
+      return;
+    }
+
+    // Rollup any necessary dims:
+    int[] children = null;
+    for (Map.Entry<String, FacetsConfig.DimConfig> ent : 
config.getDimConfigs().entrySet()) {
+      String dim = ent.getKey();
+      FacetsConfig.DimConfig ft = ent.getValue();
+      if (ft.hierarchical && ft.multiValued == false) {
+        int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
+        // It can be -1 if this field was declared in the
+        // config but never indexed:
+        if (dimRootOrd > 0) {
+          if (children == null) {
+            // lazy init
+            children = getChildren();
+          }
+          updateValueFromRollup(dimRootOrd, children[dimRootOrd]);
+        }
+      }
+    }
+  }
+
+  private int rollup(int ord) throws IOException {
+    int[] children = getChildren();
+    int[] siblings = getSiblings();
+    int aggregatedValue = 0;
+    while (ord != TaxonomyReader.INVALID_ORDINAL) {
+      int currentValue = getCount(ord);
+      int newValue = currentValue + rollup(children[ord]);
+      setCount(ord, newValue);
+      aggregatedValue += getCount(ord);
+      ord = siblings[ord];
+    }
+    return aggregatedValue;
+  }
+
+  /**
+   * Create a FacetResult for the provided dim + path and intermediate 
results. Does the extra work
+   * of resolving ordinals -> labels, etc. Will return null if there are no 
children.
+   */
+  private FacetResult createFacetResult(
+      TopChildrenForPath topChildrenForPath, String dim, String... path) 
throws IOException {
+    // If the intermediate result is null or there are no children, we return 
null:
+    if (topChildrenForPath == null || topChildrenForPath.childCount == 0) {
+      return null;
+    }
+
+    TopOrdAndNumberQueue q = topChildrenForPath.childQueue;
+    assert q != null;
+
+    LabelAndValue[] labelValues = new LabelAndValue[q.size()];
+    int[] ordinals = new int[labelValues.length];
+    Number[] values = new Number[labelValues.length];
+
+    for (int i = labelValues.length - 1; i >= 0; i--) {
+      TopOrdAndNumberQueue.OrdAndValue ordAndValue = q.pop();
+      assert ordAndValue != null;
+      ordinals[i] = ordAndValue.ord;
+      values[i] = ordAndValue.value;
+    }
+
+    FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals);
+    // The path component we're interested in is the one immediately after the 
provided path. We
+    // add 1 here to also account for the dim:
+    int childComponentIdx = path.length + 1;
+    for (int i = 0; i < labelValues.length; i++) {
+      labelValues[i] = new 
LabelAndValue(bulkPath[i].components[childComponentIdx], values[i]);
+    }
+
+    return new FacetResult(
+        dim, path, topChildrenForPath.pathValue, labelValues, 
topChildrenForPath.childCount);
+  }
+
+  @Override
+  public FacetResult getAllChildren(String dim, String... path) throws 
IOException {
+    DimConfig dimConfig = verifyDim(dim);
+    FacetLabel cp = new FacetLabel(dim, path);
+    int dimOrd = taxoReader.getOrdinal(cp);
+    if (dimOrd == -1) {
+      return null;
+    }
+
+    if (initialized == false) {
+      return null;
+    }
+
+    Number aggregatedValue = 0;
+    int aggregatedCount = 0;
+
+    IntArrayList ordinals = new IntArrayList();
+    List<Number> ordValues = new ArrayList<>();
+
+    if (sparseCounts != null) {
+      for (IntIntCursor ordAndCount : sparseCounts) {
+        int ord = ordAndCount.key;
+        int count = ordAndCount.value;
+        Number value = getAggregationValue(ord);
+        if (parents[ord] == dimOrd && count > 0) {
+          aggregatedCount += count;
+          aggregatedValue = aggregate(aggregatedValue, value);
+          ordinals.add(ord);
+          ordValues.add(value);
+        }
+      }
+    } else {
+      int[] children = getChildren();
+      int[] siblings = getSiblings();
+      int ord = children[dimOrd];
+      while (ord != TaxonomyReader.INVALID_ORDINAL) {
+        int count = counts[ord];
+        Number value = getAggregationValue(ord);
+        if (count > 0) {
+          aggregatedCount += count;
+          aggregatedValue = aggregate(aggregatedValue, value);
+          ordinals.add(ord);
+          ordValues.add(value);
+        }
+        ord = siblings[ord];
+      }
+    }
+
+    if (aggregatedCount == 0) {
+      return null;
+    }
+
+    if (dimConfig.multiValued) {
+      if (dimConfig.requireDimCount) {
+        aggregatedValue = getAggregationValue(dimOrd);
+      } else {
+        // Our aggregated value is not correct, in general:
+        aggregatedValue = missingAggregationValue();
+      }
+    } else {
+      // Our aggregateddim value is accurate, so we keep it
+    }
+
+    // TODO: It would be nice if TaxonomyReader let us pass in a buffer + size 
so we didn't have to
+    // do an array copy here:
+    FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals.toArray());
+
+    LabelAndValue[] labelValues = new LabelAndValue[ordValues.size()];
+    for (int i = 0; i < ordValues.size(); i++) {
+      labelValues[i] = new LabelAndValue(bulkPath[i].components[cp.length], 
ordValues.get(i));
+    }
+    return new FacetResult(dim, path, aggregatedValue, labelValues, 
ordinals.size());
+  }
+
+  private TopOrdAndNumberQueue.OrdAndValue insertIntoQueue(
+      TopOrdAndNumberQueue q,
+      int topN,
+      TopOrdAndNumberQueue.OrdAndValue bottomOrdAndValue,
+      TopOrdAndNumberQueue.OrdAndValue incomingOrdAndValue,
+      int ord,
+      Number value) {
+    if (incomingOrdAndValue == null) {
+      incomingOrdAndValue = new TopOrdAndNumberQueue.OrdAndValue();
+    }
+    incomingOrdAndValue.ord = ord;
+    incomingOrdAndValue.value = value;
+
+    if (q.size() < topN || q.lessThan(bottomOrdAndValue, incomingOrdAndValue)) 
{
+      incomingOrdAndValue = q.insertWithOverflow(incomingOrdAndValue);
+      bottomOrdAndValue.ord = q.top().ord;
+      bottomOrdAndValue.value = q.top().value;
+    }
+    return incomingOrdAndValue;
+  }
+
+  /**
+   * Determine the top-n children for a specified dimension + path. Results 
are in an intermediate
+   * form.
+   */
+  protected TopChildrenForPath getTopChildrenForPath(DimConfig dimConfig, int 
pathOrd, int topN)

Review Comment:
   I would like to avoid making API changes in this PR. It's an interesting 
question whether all `Facets` should have this.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to