romseygeek commented on code in PR #15727:
URL: https://github.com/apache/lucene/pull/15727#discussion_r2832281453
##########
lucene/join/src/java/org/apache/lucene/search/join/ToParentDocValues.java:
##########
@@ -210,6 +246,7 @@ public int nextDoc() throws IOException {
int nextParentDocID = parents.nextSetBit(childWithValues.docID());
collector.reset();
seen = true;
+ int childrenWithValuesCount = 1;
Review Comment:
Nit: let's keep track of value counts inside the Accumulator?
##########
lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinSortField.java:
##########
@@ -47,69 +47,169 @@
*/
public class ToParentBlockJoinSortField extends SortField {
- private final boolean order;
+ private final boolean reverseChildren;
private final BitSetProducer parentFilter;
private final BitSetProducer childFilter;
+ private final Object childMissingValue;
+
+ /**
+ * Create ToParentBlockJoinSortField. The parent and child document ordering
is based on the flag
+ * reverse. Missing values are treated as the default for the type.
+ *
+ * @param field The sort field on the nested / child level.
+ * @param type The sort type on the nested / child level.
+ * @param reverse Whether natural order should be reversed on both child and
parent levels.
+ * @param parentFilter Filter that identifies the parent documents.
+ * @param childFilter Filter that defines which child documents participates
in sorting.
+ */
+ public ToParentBlockJoinSortField(
+ String field,
+ Type type,
+ boolean reverse,
+ BitSetProducer parentFilter,
+ BitSetProducer childFilter) {
+ this(field, type, reverse, null, null, parentFilter, childFilter);
+ }
+
+ /**
+ * Create ToParentBlockJoinSortField. Missing values are treated as the
default for the type.
+ *
+ * @param field The sort field on the nested / child level.
+ * @param type The sort type on the nested / child level.
+ * @param reverseParents Whether natural order should be reversed on the
parent level.
+ * @param reverseChildren Whether natural order should be reversed on the
nested / child document
+ * level.
+ * @param parentFilter Filter that identifies the parent documents.
+ * @param childFilter Filter that defines which child documents participates
in sorting.
+ */
+ public ToParentBlockJoinSortField(
+ String field,
+ Type type,
+ boolean reverseParents,
+ boolean reverseChildren,
+ BitSetProducer parentFilter,
+ BitSetProducer childFilter) {
+ this(field, type, reverseParents, reverseChildren, null, null,
parentFilter, childFilter);
+ }
/**
* Create ToParentBlockJoinSortField. The parent document ordering is based
on child document
* ordering (reverse).
*
* @param field The sort field on the nested / child level.
* @param type The sort type on the nested / child level.
- * @param reverse Whether natural order should be reversed on the nested /
child level.
+ * @param reverse Whether natural order should be reversed on both child and
parent levels.
+ * @param parentMissingValue The missing value for parent documents that
have no child documents.
+ * For Type.STRING use {@link SortField#STRING_FIRST} or {@link
SortField#STRING_LAST}. For
+ * numeric types use the corresponding boxed type (e.g. {@link Integer}
for Type.INT). Pass
+ * {@code null} for default behavior.
+ * @param childMissingValue The missing value for child documents that lack
the sort field. This
+ * value participates in the min/max selection among siblings. Type
constraints are the same
+ * as for {@code parentMissingValue}. Pass {@code null} to skip children
without a value.
* @param parentFilter Filter that identifies the parent documents.
* @param childFilter Filter that defines which child documents participates
in sorting.
*/
public ToParentBlockJoinSortField(
String field,
Type type,
boolean reverse,
+ Object parentMissingValue,
+ Object childMissingValue,
BitSetProducer parentFilter,
BitSetProducer childFilter) {
- super(field, type, reverse);
- switch (getType()) {
- case STRING:
- case DOUBLE:
- case FLOAT:
- case LONG:
- case INT:
- // ok
- break;
- case CUSTOM:
- case DOC:
- case REWRITEABLE:
- case STRING_VAL:
- case SCORE:
- default:
- throw new UnsupportedOperationException("Sort type " + type + " is not
supported");
- }
- this.order = reverse;
- this.parentFilter = parentFilter;
- this.childFilter = childFilter;
+ this(
+ field,
+ type,
+ reverse,
+ reverse,
+ parentMissingValue,
+ childMissingValue,
+ parentFilter,
+ childFilter);
}
/**
* Create ToParentBlockJoinSortField.
*
* @param field The sort field on the nested / child level.
* @param type The sort type on the nested / child level.
- * @param reverse Whether natural order should be reversed on the nested /
child document level.
- * @param order Whether natural order should be reversed on the parent level.
+ * @param reverseParents Whether natural order should be reversed on the
parent level.
+ * @param reverseChildren Whether natural order should be reversed on the
nested / child document
+ * level.
+ * @param parentMissingValue The missing value for parent documents whose
children do not have a
+ * value for the sort field. For Type.STRING use {@link
SortField#STRING_FIRST} or {@link
+ * SortField#STRING_LAST}. For numeric types use the corresponding boxed
type (e.g. {@link
+ * Integer} for Type.INT). Pass {@code null} for default behavior.
+ * @param childMissingValue The missing value for child documents that lack
the sort field. This
+ * value participates in the min/max selection among siblings. Type
constraints are the same
+ * as for {@code parentMissingValue}. Pass {@code null} to skip children
without a value.
* @param parentFilter Filter that identifies the parent documents.
* @param childFilter Filter that defines which child documents participates
in sorting.
*/
public ToParentBlockJoinSortField(
String field,
Type type,
- boolean reverse,
- boolean order,
+ boolean reverseParents,
+ boolean reverseChildren,
+ Object parentMissingValue,
+ Object childMissingValue,
BitSetProducer parentFilter,
BitSetProducer childFilter) {
- super(field, type, reverse);
- this.order = order;
+ super(field, type, reverseParents, parentMissingValue);
+ validate(type, childMissingValue);
+ this.reverseChildren = reverseChildren;
this.parentFilter = parentFilter;
this.childFilter = childFilter;
+ this.childMissingValue = childMissingValue;
+ }
+
+ private static void validate(Type type, Object childMissingValue) {
+ switch (type) {
Review Comment:
Nit: we can use switch expressions here (ie, `case STRING ->
validateMissingValue()`)
##########
lucene/join/src/java/org/apache/lucene/search/join/ToParentDocValues.java:
##########
@@ -283,10 +346,19 @@ public boolean advanceExact(int targetParentDocID) throws
IOException {
for (int doc = childWithValues.docID(); doc < docID; doc =
childWithValues.nextDoc()) {
collector.increment();
+ childrenWithValuesCount++;
+ }
+ hasChildWithMissingValue = childrenWithValuesCount < totalChildren;
+ if (hasChildWithMissingValue && missingWithChildWithoutValue) {
+ return false;
}
return true;
}
+ private int getTotalChildrenCount(int prevParentDocID) {
+ return docID - prevParentDocID - 1;
Review Comment:
I think I'd inline this, it's a bit confusing to read otherwise as it's a
mix of new inputs and state.
##########
lucene/join/src/java/org/apache/lucene/search/join/ToParentDocValues.java:
##########
@@ -218,9 +255,15 @@ public int nextDoc() throws IOException {
break;
}
collector.increment();
+ childrenWithValuesCount++;
}
docID = nextParentDocID;
+ int prevParentDocID = parents.prevSetBit(docID - 1);
+ hasChildWithMissingValue = childrenWithValuesCount <
getTotalChildrenCount(prevParentDocID);
+ if (hasChildWithMissingValue && missingWithChildWithoutValue) {
+ docID = nextDoc();
Review Comment:
Can we avoid recursion here?
##########
lucene/join/src/java/org/apache/lucene/search/join/ToParentDocValues.java:
##########
@@ -176,22 +196,38 @@ public long cost() {
private ToParentDocValues(
DocIdSetIterator values, BitSet parents, DocIdSetIterator children,
Accumulator collect) {
+ this(values, parents, children, false, collect);
+ }
+
+ private ToParentDocValues(
+ DocIdSetIterator values,
+ BitSet parents,
+ DocIdSetIterator children,
+ boolean missingWithChildWithoutValue,
+ Accumulator collect) {
this.parents = parents;
childWithValues =
ConjunctionUtils.intersectIterators(Arrays.asList(children, values));
this.collector = collect;
+ this.missingWithChildWithoutValue = missingWithChildWithoutValue;
Review Comment:
Can we rename this to something like 'skipParentsWithMissingChildValues'?
'missingWithChildWithoutValue' is giving me a headache trying to parse it :)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]