amansinha100 commented on code in PR #6062:
URL: https://github.com/apache/hive/pull/6062#discussion_r2332073383


##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java:
##########
@@ -135,43 +142,76 @@ private List<RexNode> getResidualFilterNodes(Filter 
filter, Join join) {
     List<RexNode> aboveFilters = 
RelOptUtil.conjunctions(filter.getCondition());
     boolean hasNullFilterOnRightSide = false;
     List<RexNode> filterList = new ArrayList<>();
+    final ImmutableBitSet notNullColumnsFromRightSide = 
getNotNullColumnsFromRightSide(join);
+
     for (RexNode filterNode : aboveFilters) {
-      if (filterNode.getKind() == SqlKind.IS_NULL) {
-        // Null filter from right side table can be removed and its a 
pre-condition for anti join conversion.
-        if (HiveCalciteUtil.hasAllExpressionsFromRightSide(join, 
Collections.singletonList(filterNode))
-            && isStrong(((RexCall) filterNode).getOperands().get(0))) {
-          hasNullFilterOnRightSide = true;
-        } else {
-          filterList.add(filterNode);
-        }
-      } else {
-        if (HiveCalciteUtil.hasAnyExpressionFromRightSide(join, 
Collections.singletonList(filterNode))) {
-          // If some non null condition is present from right side, we can not 
convert the join to anti join as
-          // anti join does not project the fields from right side.
-          return null;
-        } else {
-          filterList.add(filterNode);
-        }
+      final ImmutableBitSet usedFields = 
RelOptUtil.InputFinder.bits(filterNode);
+      boolean usesFieldFromRHS = usedFields.intersects(rhsFields);
+
+      if(!usesFieldFromRHS) {
+        // Only LHS fields or constants, so the filterNode is part of the 
residual filter
+        filterList.add(filterNode);
+        continue;
+      }
+
+      // In the following we check for filter nodes that let us deduce that
+      // "an (originally) not-null column of RHS IS NULL because the LHS row 
will not be matched"
+
+      boolean usesRHSFieldsOnly = rhsFields.contains(usedFields);
+      if (!usesRHSFieldsOnly) {
+        // If there is a mix between LHS and RHS fields, don't convert to 
anti-join
+        return Optional.empty();
+      }
+
+      if(filterNode.getKind() != SqlKind.IS_NULL) {
+        return Optional.empty();
+      }
+
+      // Null filter from right side table can be removed and it is a 
pre-condition for anti join conversion.
+      RexNode arg = ((RexCall) filterNode).getOperands().get(0);
+      if (isStrong(arg, notNullColumnsFromRightSide)) {
+        hasNullFilterOnRightSide = true;
+      } else if(!isStrong(arg, rhsFields)) {
+        // if all RHS fields are null and the IS NULL is still not fulfilled, 
bail out
+        return Optional.empty();
       }
     }
 
     if (!hasNullFilterOnRightSide) {
-      return null;
+      return Optional.empty();
+    }
+    return Optional.of(filterList);
+  }
+
+  private ImmutableBitSet getNotNullColumnsFromRightSide(RelNode joinRel) {
+    // we need to shift the indices of the second child to the right
+    int shift = (joinRel.getInput(0)).getRowType().getFieldCount();
+
+    ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
+    List<RelDataTypeField> rhsFields = 
joinRel.getInput(1).getRowType().getFieldList();
+    for(RelDataTypeField field : rhsFields) {
+      if(!field.getType().isNullable()) {
+        builder.set(shift+field.getIndex());
+      }
     }
-    return filterList;
+    return builder.build();
   }
 
-  private boolean isStrong(RexNode rexNode) {
-    AtomicBoolean hasCast = new AtomicBoolean(false);

Review Comment:
   Yes, I couldn't think of a compelling reason why AtomicBoolean was used in 
the first place. The updated change looks good. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to