DRILL-1125: Apply casts to handle different input types in the join condition


Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/1c40b5e7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/1c40b5e7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/1c40b5e7

Branch: refs/heads/master
Commit: 1c40b5e745ab20da6725568c9e4790f34ae73c8f
Parents: e22803a
Author: Mehant Baid <[email protected]>
Authored: Sun Sep 21 15:03:56 2014 -0700
Committer: Steven Phillips <[email protected]>
Committed: Mon Sep 29 18:21:44 2014 -0700

----------------------------------------------------------------------
 .../exec/expr/ExpressionTreeMaterializer.java   |  7 ++-
 .../physical/impl/common/ChainedHashTable.java  | 61 +++++++++++++++++++-
 .../drill/exec/resolver/TypeCastRules.java      |  3 +-
 .../org/apache/drill/TestExampleQueries.java    |  6 ++
 4 files changed, 70 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/1c40b5e7/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
index 2854c14..2a5afce 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
@@ -20,6 +20,7 @@ package org.apache.drill.exec.expr;
 import java.util.Arrays;
 import java.util.List;
 
+import org.apache.drill.common.exceptions.DrillRuntimeException;
 import org.apache.drill.common.expression.BooleanOperator;
 import org.apache.drill.common.expression.CastExpression;
 import org.apache.drill.common.expression.ConvertExpression;
@@ -298,8 +299,10 @@ public class ExpressionTreeMaterializer {
           // Implicitly cast the else expression
           newElseExpr = addCastExpression(newElseExpr, 
conditions.expression.getMajorType(), registry);
         } else {
-          assert false: "Incorrect least restrictive type computed, 
leastRestrictive: " +
-              leastRestrictive.toString() + " thenType: " + 
thenType.toString() + " elseType: " + elseType;
+          /* Cannot cast one of the two expressions to make the output type of 
if and else expression
+           * to be the same. Raise error.
+           */
+          throw new DrillRuntimeException("Case expression should have similar 
output type on all its branches");
         }
       }
 

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/1c40b5e7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java
index f77407e..792b7ab 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java
@@ -18,11 +18,17 @@
 package org.apache.drill.exec.physical.impl.common;
 
 import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
 
 import org.apache.drill.common.expression.ErrorCollector;
 import org.apache.drill.common.expression.ErrorCollectorImpl;
 import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.FunctionCall;
+import org.apache.drill.common.expression.ExpressionPosition;
+import org.apache.drill.common.exceptions.DrillRuntimeException;
 import org.apache.drill.common.logical.data.NamedExpression;
+import org.apache.drill.common.types.TypeProtos;
 import org.apache.drill.common.types.TypeProtos.MinorType;
 import org.apache.drill.common.types.Types;
 import org.apache.drill.exec.compile.sig.GeneratorMapping;
@@ -43,6 +49,7 @@ import org.apache.drill.exec.record.MaterializedField;
 import org.apache.drill.exec.record.RecordBatch;
 import org.apache.drill.exec.record.TypedFieldId;
 import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.resolver.TypeCastRules;
 import org.apache.drill.exec.vector.ValueVector;
 
 import com.sun.codemodel.JConditional;
@@ -183,8 +190,17 @@ public class ChainedHashTable {
     }
     setupOutputRecordKeys(cgInner, htKeyFieldIds, outKeyFieldIds);
 
-    setupGetHash(cg /* use top level code generator for getHash */,  
GetHashIncomingBuildMapping, keyExprsBuild);
-    setupGetHash(cg /* use top level code generator for getHash */,  
GetHashIncomingProbeMapping, keyExprsProbe);
+    /* Before generating the code for hashing the build and probe expressions
+     * examine the expressions to make sure they are of the same type, add 
casts if necessary.
+     * If they are not of the same type, hashing the same value of different 
types will yield different hash values.
+     * NOTE: We add the cast only for the hash function, comparator function 
can handle the case
+     * when expressions are different (for eg we have comparator functions 
that compare bigint and float8)
+     * However for the hash to work correctly we would need to apply the cast.
+     */
+    addLeastRestrictiveCasts(keyExprsBuild, keyExprsProbe);
+
+    setupGetHash(cg /* use top level code generator for getHash */,  
GetHashIncomingBuildMapping, keyExprsBuild, false);
+    setupGetHash(cg /* use top level code generator for getHash */,  
GetHashIncomingProbeMapping, keyExprsProbe, true);
 
     HashTable ht = context.getImplementationClass(top);
     ht.setup(htConfig, context, allocator, incomingBuild, incomingProbe, 
outgoing, htContainerOrig);
@@ -261,7 +277,46 @@ public class ChainedHashTable {
     }
   }
 
-  private void setupGetHash(ClassGenerator<HashTable> cg, MappingSet 
incomingMapping, LogicalExpression[] keyExprs) throws SchemaChangeException {
+  private void addLeastRestrictiveCasts(LogicalExpression[] keyExprsBuild, 
LogicalExpression[] keyExprsProbe) {
+
+    // If we don't have probe expressions then nothing to do get out
+    if (keyExprsProbe == null) {
+      return;
+    }
+
+    assert keyExprsBuild.length == keyExprsProbe.length;
+
+    for (int i = 0; i < keyExprsBuild.length; i++) {
+      MinorType buildType = keyExprsBuild[i].getMajorType().getMinorType();
+      MinorType probeType = keyExprsProbe[i].getMajorType().getMinorType();
+
+      if (buildType != probeType) {
+        // We need to add a cast to one of the expressions
+        List<MinorType> types = new LinkedList<>();
+        types.add(buildType);
+        types.add(probeType);
+        MinorType result = TypeCastRules.getLeastRestrictiveType(types);
+
+        // Add the cast
+        List<LogicalExpression> args = new LinkedList<>();
+        if (result == null) {
+          throw new DrillRuntimeException(String.format("Join conditions 
cannot be compared failing build expression: %s failing probe expression: %s",
+              keyExprsBuild[i].getMajorType().toString(), 
keyExprsProbe[i].getMajorType().toString()));
+        }
+        else if (result != buildType) {
+          // Add a cast expression on top of the build expression
+          args.add(keyExprsBuild[i]);
+          FunctionCall castCall = new FunctionCall("cast" + 
result.toString().toUpperCase(), args, ExpressionPosition.UNKNOWN);
+          keyExprsBuild[i] = ExpressionTreeMaterializer.materialize(castCall, 
incomingBuild, new ErrorCollectorImpl(), context.getFunctionRegistry());
+        } else if (result != probeType) {
+          args.add(keyExprsProbe[i]);
+          FunctionCall castCall = new FunctionCall("cast" + 
result.toString().toUpperCase(), args, ExpressionPosition.UNKNOWN);
+          keyExprsProbe[i] = ExpressionTreeMaterializer.materialize(castCall, 
incomingProbe, new ErrorCollectorImpl(), context.getFunctionRegistry());
+        }
+      }
+    }
+  }
+  private void setupGetHash(ClassGenerator<HashTable> cg, MappingSet 
incomingMapping, LogicalExpression[] keyExprs, boolean isProbe) throws 
SchemaChangeException {
 
     cg.setMappingSet(incomingMapping);
 

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/1c40b5e7/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java
index 7969d49..609b3a9 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java
@@ -24,7 +24,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.drill.common.exceptions.DrillRuntimeException;
 import org.apache.drill.common.expression.FunctionCall;
 import org.apache.drill.common.types.TypeProtos.DataMode;
 import org.apache.drill.common.types.TypeProtos.MajorType;
@@ -806,7 +805,7 @@ public class TypeCastRules {
         result = next;
         resultPrec = nextPrec;
       } else {
-        throw new DrillRuntimeException("Case expression branches have 
different output types ");
+        return null;
       }
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/1c40b5e7/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java 
b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
index 388d057..342bef1 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
@@ -475,4 +475,10 @@ public class TestExampleQueries extends BaseTestQuery{
     test("select case when n_nationkey > 0 and n_nationkey < 2 then 
concat(n_name, '_abc') when n_nationkey >=2 and n_nationkey < 4 then '_EFG' 
else concat(n_name,'_XYZ') end from cp.`tpch/nation.parquet` ;");
   }
 
+  @Test // tests join condition that has different input types
+  public void testJoinCondWithDifferentTypes() throws Exception {
+    test("select t1.department_description from cp.`department.json` t1, 
cp.`employee.json` t2 where (cast(t1.department_id as double)) = 
t2.department_id");
+    test("select t1.full_name from cp.`employee.json` t1, cp.`department.json` 
t2 where cast(t1.department_id as double) = t2.department_id and 
cast(t1.position_id as bigint) = t2.department_id");
+    test("select t1.full_name from cp.`employee.json` t1, cp.`department.json` 
t2 where t1.department_id = t2.department_id and t1.position_id = 
t2.department_id");
+  }
 }

Reply via email to