DRILL-1335: Fix merge join operator when compare null against null value.

Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/cd9eaa88
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/cd9eaa88
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/cd9eaa88

Branch: refs/heads/master
Commit: cd9eaa88fee4b01706a9237fb160aad5cb59f9c8
Parents: 746a0c7
Author: Jinfeng Ni <[email protected]>
Authored: Sat Jun 21 16:04:12 2014 -0700
Committer: Jacques Nadeau <[email protected]>
Committed: Wed Aug 27 13:33:48 2014 -0700

----------------------------------------------------------------------
 .../exec/physical/impl/join/MergeJoinBatch.java |  39 ++---
 .../impl/join/TestMergeJoinMulCondition.java    |  48 +++++-
 .../test/resources/join/merge_join_nullkey.json | 171 +++++++++++++++++++
 3 files changed, 228 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/cd9eaa88/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/MergeJoinBatch.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/MergeJoinBatch.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/MergeJoinBatch.java
index 24ca463..7a6273c 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/MergeJoinBatch.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/MergeJoinBatch.java
@@ -499,40 +499,21 @@ public class MergeJoinBatch extends 
AbstractRecordBatch<MergeJoinPOP> {
       cg.getSetupBlock().assign(JExpr._this().ref(incomingRecordBatch), 
JExpr._this().ref(incomingRightRecordBatch));
       ClassGenerator.HoldingContainer compareRightExprHolder = 
cg.addExpr(materializedRightExpr, false);
 
-      if (compareLeftExprHolder.isOptional() && 
compareRightExprHolder.isOptional()) {
-        // handle null == null
-        cg.getEvalBlock()._if(compareLeftExprHolder.getIsSet().eq(JExpr.lit(0))
-            .cand(compareRightExprHolder.getIsSet().eq(JExpr.lit(0))))
-            ._then()
-            ._return(JExpr.lit(0));
-
-        // handle null == !null
-        cg.getEvalBlock()._if(compareLeftExprHolder.getIsSet().eq(JExpr.lit(0))
-            .cor(compareRightExprHolder.getIsSet().eq(JExpr.lit(0))))
-            ._then()
-            ._return(JExpr.lit(1));
-
-      } else if (compareLeftExprHolder.isOptional()) {
-        // handle null == required (null is less than any value)
-        
cg.getEvalBlock()._if(compareLeftExprHolder.getIsSet().eq(JExpr.lit(0)))
-            ._then()
-            ._return(JExpr.lit(-1));
-
-      } else if (compareRightExprHolder.isOptional()) {
-        // handle required == null (null is less than any value)
-        
cg.getEvalBlock()._if(compareRightExprHolder.getIsSet().eq(JExpr.lit(0)))
-            ._then()
-            ._return(JExpr.lit(1));
-      }
-
       LogicalExpression fh = 
FunctionGenerationHelper.getComparator(compareLeftExprHolder,
         compareRightExprHolder,
         context.getFunctionRegistry());
       HoldingContainer out = cg.addExpr(fh, false);
 
-      //If not 0, it means not equal. We return this out value.
-      JConditional jc = cg.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0)));
-      jc._then()._return(out.getValue());
+      // If not 0, it means not equal. We return this out value.
+      // Null compares to Null should returns null (unknown). In such case, we 
return 1 to indicate they are not equal. 
+      if (compareLeftExprHolder.isOptional() && 
compareRightExprHolder.isOptional()) {
+        JConditional jc = 
cg.getEvalBlock()._if(compareLeftExprHolder.getIsSet().eq(JExpr.lit(0)).
+                                    
cand(compareRightExprHolder.getIsSet().eq(JExpr.lit(0))));
+        jc._then()._return(JExpr.lit(1));
+        
jc._elseif(out.getValue().ne(JExpr.lit(0)))._then()._return(out.getValue());
+      } else {
+        
cg.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0)))._then()._return(out.getValue());
+      }
     }
 
     //Pass the equality check for all the join conditions. Finally, return 0.

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/cd9eaa88/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/join/TestMergeJoinMulCondition.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/join/TestMergeJoinMulCondition.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/join/TestMergeJoinMulCondition.java
index d9cfa5c..bf402d1 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/join/TestMergeJoinMulCondition.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/join/TestMergeJoinMulCondition.java
@@ -37,7 +37,6 @@ import org.junit.rules.TestRule;
 import com.google.common.base.Charsets;
 import com.google.common.io.Files;
 
-//@Ignore("Currently returns wrong result.  Stopped working when incoming 
became more than one result set.")
 public class TestMergeJoinMulCondition extends PopUnitTestBase {
   static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(TestMergeJoinMulCondition.class);
 
@@ -72,4 +71,51 @@ public class TestMergeJoinMulCondition extends 
PopUnitTestBase {
     }
   }
 
+  @Test
+  // The physical plan is obtained through sql:
+  // alter session set `planner.enable_hashjoin`=false;
+  // select * from cp.`region.json` t1, cp.`region.json` t2 where t1.non_exist 
= t2.non_exist2 ;
+  public void testMergeJoinInnerNullKey() throws Exception {
+    RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
+
+    try(Drillbit bit1 = new Drillbit(CONFIG, serviceSet);
+        DrillClient client = new DrillClient(CONFIG, 
serviceSet.getCoordinator());) {
+
+      bit1.run();
+      client.connect();
+      List<QueryResultBatch> results = 
client.runQuery(org.apache.drill.exec.proto.UserBitShared.QueryType.PHYSICAL,
+          
Files.toString(FileUtils.getResourceAsFile("/join/merge_join_nullkey.json"), 
Charsets.UTF_8).replace("${JOIN_TYPE}", "INNER"));
+      int count = 0;
+      for(QueryResultBatch b : results) {
+        if (b.getHeader().getRowCount() != 0)
+          count += b.getHeader().getRowCount();
+        b.release();
+      }
+      assertEquals(0, count);
+    }
+  }
+
+  @Test
+  // The physical plan is obtained through sql:
+  // alter session set `planner.enable_hashjoin`=false;
+  // select * from cp.`region.json` t1 left outer join cp.`region.json` t2 on  
t1.non_exist = t2.non_exist2 ;
+  public void testMergeJoinLeftOuterNullKey() throws Exception {
+    RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
+
+    try(Drillbit bit1 = new Drillbit(CONFIG, serviceSet);
+        DrillClient client = new DrillClient(CONFIG, 
serviceSet.getCoordinator());) {
+
+      bit1.run();
+      client.connect();
+      List<QueryResultBatch> results = 
client.runQuery(org.apache.drill.exec.proto.UserBitShared.QueryType.PHYSICAL,
+          
Files.toString(FileUtils.getResourceAsFile("/join/merge_join_nullkey.json"), 
Charsets.UTF_8).replace("${JOIN_TYPE}", "LEFT"));
+      int count = 0;
+      for(QueryResultBatch b : results) {
+        if (b.getHeader().getRowCount() != 0)
+          count += b.getHeader().getRowCount();
+        b.release();
+      }
+      assertEquals(110, count);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/cd9eaa88/exec/java-exec/src/test/resources/join/merge_join_nullkey.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/join/merge_join_nullkey.json 
b/exec/java-exec/src/test/resources/join/merge_join_nullkey.json
new file mode 100644
index 0000000..b283dda
--- /dev/null
+++ b/exec/java-exec/src/test/resources/join/merge_join_nullkey.json
@@ -0,0 +1,171 @@
+{
+  "head" : {
+    "version" : 1,
+    "generator" : {
+      "type" : "DefaultSqlHandler",
+      "info" : ""
+    },
+    "type" : "APACHE_DRILL_PHYSICAL",
+    "options" : [ ],
+    "queue" : 0,
+    "resultMode" : "EXEC"
+  },
+  "graph" : [ {
+    "pop" : "fs-scan",
+    "@id" : 11,
+    "files" : [ "/region.json" ],
+    "storage" : {
+      "type" : "file",
+      "enabled" : true,
+      "connection" : "classpath:///",
+      "workspaces" : null,
+      "formats" : {
+        "json" : {
+          "type" : "json"
+        },
+        "parquet" : {
+          "type" : "parquet"
+        }
+      }
+    },
+    "format" : {
+      "type" : "json"
+    },
+    "selectionRoot" : "/region.json",
+    "cost" : 18.0
+  }, {
+    "pop" : "project",
+    "@id" : 9,
+    "exprs" : [ {
+      "ref" : "`T2¦¦*`",
+      "expr" : "`*`"
+    } ],
+    "child" : 11,
+    "initialAllocation" : 1000000,
+    "maxAllocation" : 10000000000,
+    "cost" : 18.0
+  }, {
+    "pop" : "external-sort",
+    "@id" : 7,
+    "child" : 9,
+    "orderings" : [ {
+      "order" : "ASC",
+      "expr" : "`T2¦¦non_exist`",
+      "nullDirection" : "UNSPECIFIED"
+    } ],
+    "reverse" : false,
+    "initialAllocation" : 1000000,
+    "maxAllocation" : 10000000000,
+    "cost" : 18.0
+  }, {
+    "pop" : "selection-vector-remover",
+    "@id" : 5,
+    "child" : 7,
+    "initialAllocation" : 1000000,
+    "maxAllocation" : 10000000000,
+    "cost" : 18.0
+  }, {
+    "pop" : "fs-scan",
+    "@id" : 10,
+    "files" : [ "/region.json" ],
+    "storage" : {
+      "type" : "file",
+      "enabled" : true,
+      "connection" : "classpath:///",
+      "workspaces" : null,
+      "formats" : {
+        "json" : {
+          "type" : "json"
+        },
+        "parquet" : {
+          "type" : "parquet"
+        }
+      }
+    },
+    "format" : {
+      "type" : "json"
+    },
+    "selectionRoot" : "/region.json",
+    "cost" : 18.0
+  }, {
+    "pop" : "project",
+    "@id" : 8,
+    "exprs" : [ {
+      "ref" : "`T3¦¦*`",
+      "expr" : "`*`"
+    } ],
+    "child" : 10,
+    "initialAllocation" : 1000000,
+    "maxAllocation" : 10000000000,
+    "cost" : 18.0
+  }, {
+    "pop" : "external-sort",
+    "@id" : 6,
+    "child" : 8,
+    "orderings" : [ {
+      "order" : "ASC",
+      "expr" : "`T3¦¦non_exist2`",
+      "nullDirection" : "UNSPECIFIED"
+    } ],
+    "reverse" : false,
+    "initialAllocation" : 1000000,
+    "maxAllocation" : 10000000000,
+    "cost" : 18.0
+  }, {
+    "pop" : "selection-vector-remover",
+    "@id" : 4,
+    "child" : 6,
+    "initialAllocation" : 1000000,
+    "maxAllocation" : 10000000000,
+    "cost" : 18.0
+  }, {
+    "pop" : "merge-join",
+    "@id" : 3,
+    "left" : 5,
+    "right" : 4,
+    "conditions" : [ {
+      "relationship" : "==",
+      "left" : "`T2¦¦non_exist`",
+      "right" : "`T3¦¦non_exist2`"
+    } ],
+    "joinType" : "${JOIN_TYPE}",
+    "initialAllocation" : 1000000,
+    "maxAllocation" : 10000000000,
+    "cost" : 18.0
+  }, {
+    "pop" : "project",
+    "@id" : 2,
+    "exprs" : [ {
+      "ref" : "`T2¦¦*`",
+      "expr" : "`T2¦¦*`"
+    }, {
+      "ref" : "`T3¦¦*`",
+      "expr" : "`T3¦¦*`"
+    } ],
+    "child" : 3,
+    "initialAllocation" : 1000000,
+    "maxAllocation" : 10000000000,
+    "cost" : 18.0
+  }, {
+    "pop" : "project",
+    "@id" : 1,
+    "exprs" : [ {
+      "ref" : "`*`",
+      "expr" : "`T2¦¦*`"
+    }, {
+      "ref" : "`*0`",
+      "expr" : "`T3¦¦*`"
+    } ],
+    "child" : 2,
+    "initialAllocation" : 1000000,
+    "maxAllocation" : 10000000000,
+    "cost" : 18.0
+  }, {
+    "pop" : "screen",
+    "@id" : 0,
+    "child" : 1,
+    "initialAllocation" : 1000000,
+    "maxAllocation" : 10000000000,
+    "cost" : 18.0
+  } ]
+}

Reply via email to