romseygeek commented on code in PR #15727:
URL: https://github.com/apache/lucene/pull/15727#discussion_r2832281453


##########
lucene/join/src/java/org/apache/lucene/search/join/ToParentDocValues.java:
##########
@@ -210,6 +246,7 @@ public int nextDoc() throws IOException {
     int nextParentDocID = parents.nextSetBit(childWithValues.docID());
     collector.reset();
     seen = true;
+    int childrenWithValuesCount = 1;

Review Comment:
   Nit: let's keep track of value counts inside the Accumulator?



##########
lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinSortField.java:
##########
@@ -47,69 +47,169 @@
  */
 public class ToParentBlockJoinSortField extends SortField {
 
-  private final boolean order;
+  private final boolean reverseChildren;
   private final BitSetProducer parentFilter;
   private final BitSetProducer childFilter;
+  private final Object childMissingValue;
+
+  /**
+   * Create ToParentBlockJoinSortField. The parent and child document ordering 
is based on the flag
+   * reverse. Missing values are treated as the default for the type.
+   *
+   * @param field The sort field on the nested / child level.
+   * @param type The sort type on the nested / child level.
+   * @param reverse Whether natural order should be reversed on both child and 
parent levels.
+   * @param parentFilter Filter that identifies the parent documents.
+   * @param childFilter Filter that defines which child documents participates 
in sorting.
+   */
+  public ToParentBlockJoinSortField(
+      String field,
+      Type type,
+      boolean reverse,
+      BitSetProducer parentFilter,
+      BitSetProducer childFilter) {
+    this(field, type, reverse, null, null, parentFilter, childFilter);
+  }
+
+  /**
+   * Create ToParentBlockJoinSortField. Missing values are treated as the 
default for the type.
+   *
+   * @param field The sort field on the nested / child level.
+   * @param type The sort type on the nested / child level.
+   * @param reverseParents Whether natural order should be reversed on the 
parent level.
+   * @param reverseChildren Whether natural order should be reversed on the 
nested / child document
+   *     level.
+   * @param parentFilter Filter that identifies the parent documents.
+   * @param childFilter Filter that defines which child documents participates 
in sorting.
+   */
+  public ToParentBlockJoinSortField(
+      String field,
+      Type type,
+      boolean reverseParents,
+      boolean reverseChildren,
+      BitSetProducer parentFilter,
+      BitSetProducer childFilter) {
+    this(field, type, reverseParents, reverseChildren, null, null, 
parentFilter, childFilter);
+  }
 
   /**
    * Create ToParentBlockJoinSortField. The parent document ordering is based 
on child document
    * ordering (reverse).
    *
    * @param field The sort field on the nested / child level.
    * @param type The sort type on the nested / child level.
-   * @param reverse Whether natural order should be reversed on the nested / 
child level.
+   * @param reverse Whether natural order should be reversed on both child and 
parent levels.
+   * @param parentMissingValue The missing value for parent documents that 
have no child documents.
+   *     For Type.STRING use {@link SortField#STRING_FIRST} or {@link 
SortField#STRING_LAST}. For
+   *     numeric types use the corresponding boxed type (e.g. {@link Integer} 
for Type.INT). Pass
+   *     {@code null} for default behavior.
+   * @param childMissingValue The missing value for child documents that lack 
the sort field. This
+   *     value participates in the min/max selection among siblings. Type 
constraints are the same
+   *     as for {@code parentMissingValue}. Pass {@code null} to skip children 
without a value.
    * @param parentFilter Filter that identifies the parent documents.
    * @param childFilter Filter that defines which child documents participates 
in sorting.
    */
   public ToParentBlockJoinSortField(
       String field,
       Type type,
       boolean reverse,
+      Object parentMissingValue,
+      Object childMissingValue,
       BitSetProducer parentFilter,
       BitSetProducer childFilter) {
-    super(field, type, reverse);
-    switch (getType()) {
-      case STRING:
-      case DOUBLE:
-      case FLOAT:
-      case LONG:
-      case INT:
-        // ok
-        break;
-      case CUSTOM:
-      case DOC:
-      case REWRITEABLE:
-      case STRING_VAL:
-      case SCORE:
-      default:
-        throw new UnsupportedOperationException("Sort type " + type + " is not 
supported");
-    }
-    this.order = reverse;
-    this.parentFilter = parentFilter;
-    this.childFilter = childFilter;
+    this(
+        field,
+        type,
+        reverse,
+        reverse,
+        parentMissingValue,
+        childMissingValue,
+        parentFilter,
+        childFilter);
   }
 
   /**
    * Create ToParentBlockJoinSortField.
    *
    * @param field The sort field on the nested / child level.
    * @param type The sort type on the nested / child level.
-   * @param reverse Whether natural order should be reversed on the nested / 
child document level.
-   * @param order Whether natural order should be reversed on the parent level.
+   * @param reverseParents Whether natural order should be reversed on the 
parent level.
+   * @param reverseChildren Whether natural order should be reversed on the 
nested / child document
+   *     level.
+   * @param parentMissingValue The missing value for parent documents whose 
children do not have a
+   *     value for the sort field. For Type.STRING use {@link 
SortField#STRING_FIRST} or {@link
+   *     SortField#STRING_LAST}. For numeric types use the corresponding boxed 
type (e.g. {@link
+   *     Integer} for Type.INT). Pass {@code null} for default behavior.
+   * @param childMissingValue The missing value for child documents that lack 
the sort field. This
+   *     value participates in the min/max selection among siblings. Type 
constraints are the same
+   *     as for {@code parentMissingValue}. Pass {@code null} to skip children 
without a value.
    * @param parentFilter Filter that identifies the parent documents.
    * @param childFilter Filter that defines which child documents participates 
in sorting.
    */
   public ToParentBlockJoinSortField(
       String field,
       Type type,
-      boolean reverse,
-      boolean order,
+      boolean reverseParents,
+      boolean reverseChildren,
+      Object parentMissingValue,
+      Object childMissingValue,
       BitSetProducer parentFilter,
       BitSetProducer childFilter) {
-    super(field, type, reverse);
-    this.order = order;
+    super(field, type, reverseParents, parentMissingValue);
+    validate(type, childMissingValue);
+    this.reverseChildren = reverseChildren;
     this.parentFilter = parentFilter;
     this.childFilter = childFilter;
+    this.childMissingValue = childMissingValue;
+  }
+
+  private static void validate(Type type, Object childMissingValue) {
+    switch (type) {

Review Comment:
   Nit: we can use switch expressions here (ie, `case STRING -> 
validateMissingValue()`)



##########
lucene/join/src/java/org/apache/lucene/search/join/ToParentDocValues.java:
##########
@@ -283,10 +346,19 @@ public boolean advanceExact(int targetParentDocID) throws 
IOException {
 
     for (int doc = childWithValues.docID(); doc < docID; doc = 
childWithValues.nextDoc()) {
       collector.increment();
+      childrenWithValuesCount++;
+    }
+    hasChildWithMissingValue = childrenWithValuesCount < totalChildren;
+    if (hasChildWithMissingValue && missingWithChildWithoutValue) {
+      return false;
     }
     return true;
   }
 
+  private int getTotalChildrenCount(int prevParentDocID) {
+    return docID - prevParentDocID - 1;

Review Comment:
   I think I'd inline this, it's a bit confusing to read otherwise as it's a 
mix of new inputs and state.



##########
lucene/join/src/java/org/apache/lucene/search/join/ToParentDocValues.java:
##########
@@ -218,9 +255,15 @@ public int nextDoc() throws IOException {
         break;
       }
       collector.increment();
+      childrenWithValuesCount++;
     }
 
     docID = nextParentDocID;
+    int prevParentDocID = parents.prevSetBit(docID - 1);
+    hasChildWithMissingValue = childrenWithValuesCount < 
getTotalChildrenCount(prevParentDocID);
+    if (hasChildWithMissingValue && missingWithChildWithoutValue) {
+      docID = nextDoc();

Review Comment:
   Can we avoid recursion here?



##########
lucene/join/src/java/org/apache/lucene/search/join/ToParentDocValues.java:
##########
@@ -176,22 +196,38 @@ public long cost() {
 
   private ToParentDocValues(
       DocIdSetIterator values, BitSet parents, DocIdSetIterator children, 
Accumulator collect) {
+    this(values, parents, children, false, collect);
+  }
+
+  private ToParentDocValues(
+      DocIdSetIterator values,
+      BitSet parents,
+      DocIdSetIterator children,
+      boolean missingWithChildWithoutValue,
+      Accumulator collect) {
     this.parents = parents;
     childWithValues = 
ConjunctionUtils.intersectIterators(Arrays.asList(children, values));
     this.collector = collect;
+    this.missingWithChildWithoutValue = missingWithChildWithoutValue;

Review Comment:
   Can we rename this to something like 'skipParentsWithMissingChildValues'? 
'missingWithChildWithoutValue' is giving me a headache trying to parse it :)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to