DRILL-1125: Apply casts to handle different input types in the join condition
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/1c40b5e7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/1c40b5e7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/1c40b5e7 Branch: refs/heads/master Commit: 1c40b5e745ab20da6725568c9e4790f34ae73c8f Parents: e22803a Author: Mehant Baid <[email protected]> Authored: Sun Sep 21 15:03:56 2014 -0700 Committer: Steven Phillips <[email protected]> Committed: Mon Sep 29 18:21:44 2014 -0700 ---------------------------------------------------------------------- .../exec/expr/ExpressionTreeMaterializer.java | 7 ++- .../physical/impl/common/ChainedHashTable.java | 61 +++++++++++++++++++- .../drill/exec/resolver/TypeCastRules.java | 3 +- .../org/apache/drill/TestExampleQueries.java | 6 ++ 4 files changed, 70 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/1c40b5e7/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java index 2854c14..2a5afce 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java @@ -20,6 +20,7 @@ package org.apache.drill.exec.expr; import java.util.Arrays; import java.util.List; +import org.apache.drill.common.exceptions.DrillRuntimeException; import org.apache.drill.common.expression.BooleanOperator; import org.apache.drill.common.expression.CastExpression; import org.apache.drill.common.expression.ConvertExpression; @@ -298,8 +299,10 @@ public class ExpressionTreeMaterializer { // Implicitly cast the else expression newElseExpr = addCastExpression(newElseExpr, conditions.expression.getMajorType(), registry); } else { - assert false: "Incorrect least restrictive type computed, leastRestrictive: " + - leastRestrictive.toString() + " thenType: " + thenType.toString() + " elseType: " + elseType; + /* Cannot cast one of the two expressions to make the output type of if and else expression + * to be the same. Raise error. + */ + throw new DrillRuntimeException("Case expression should have similar output type on all its branches"); } } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/1c40b5e7/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java index f77407e..792b7ab 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java @@ -18,11 +18,17 @@ package org.apache.drill.exec.physical.impl.common; import java.io.IOException; +import java.util.LinkedList; +import java.util.List; import org.apache.drill.common.expression.ErrorCollector; import org.apache.drill.common.expression.ErrorCollectorImpl; import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.FunctionCall; +import org.apache.drill.common.expression.ExpressionPosition; +import org.apache.drill.common.exceptions.DrillRuntimeException; import org.apache.drill.common.logical.data.NamedExpression; +import org.apache.drill.common.types.TypeProtos; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.common.types.Types; import org.apache.drill.exec.compile.sig.GeneratorMapping; @@ -43,6 +49,7 @@ import org.apache.drill.exec.record.MaterializedField; import org.apache.drill.exec.record.RecordBatch; import org.apache.drill.exec.record.TypedFieldId; import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.resolver.TypeCastRules; import org.apache.drill.exec.vector.ValueVector; import com.sun.codemodel.JConditional; @@ -183,8 +190,17 @@ public class ChainedHashTable { } setupOutputRecordKeys(cgInner, htKeyFieldIds, outKeyFieldIds); - setupGetHash(cg /* use top level code generator for getHash */, GetHashIncomingBuildMapping, keyExprsBuild); - setupGetHash(cg /* use top level code generator for getHash */, GetHashIncomingProbeMapping, keyExprsProbe); + /* Before generating the code for hashing the build and probe expressions + * examine the expressions to make sure they are of the same type, add casts if necessary. + * If they are not of the same type, hashing the same value of different types will yield different hash values. + * NOTE: We add the cast only for the hash function, comparator function can handle the case + * when expressions are different (for eg we have comparator functions that compare bigint and float8) + * However for the hash to work correctly we would need to apply the cast. + */ + addLeastRestrictiveCasts(keyExprsBuild, keyExprsProbe); + + setupGetHash(cg /* use top level code generator for getHash */, GetHashIncomingBuildMapping, keyExprsBuild, false); + setupGetHash(cg /* use top level code generator for getHash */, GetHashIncomingProbeMapping, keyExprsProbe, true); HashTable ht = context.getImplementationClass(top); ht.setup(htConfig, context, allocator, incomingBuild, incomingProbe, outgoing, htContainerOrig); @@ -261,7 +277,46 @@ public class ChainedHashTable { } } - private void setupGetHash(ClassGenerator<HashTable> cg, MappingSet incomingMapping, LogicalExpression[] keyExprs) throws SchemaChangeException { + private void addLeastRestrictiveCasts(LogicalExpression[] keyExprsBuild, LogicalExpression[] keyExprsProbe) { + + // If we don't have probe expressions then nothing to do get out + if (keyExprsProbe == null) { + return; + } + + assert keyExprsBuild.length == keyExprsProbe.length; + + for (int i = 0; i < keyExprsBuild.length; i++) { + MinorType buildType = keyExprsBuild[i].getMajorType().getMinorType(); + MinorType probeType = keyExprsProbe[i].getMajorType().getMinorType(); + + if (buildType != probeType) { + // We need to add a cast to one of the expressions + List<MinorType> types = new LinkedList<>(); + types.add(buildType); + types.add(probeType); + MinorType result = TypeCastRules.getLeastRestrictiveType(types); + + // Add the cast + List<LogicalExpression> args = new LinkedList<>(); + if (result == null) { + throw new DrillRuntimeException(String.format("Join conditions cannot be compared failing build expression: %s failing probe expression: %s", + keyExprsBuild[i].getMajorType().toString(), keyExprsProbe[i].getMajorType().toString())); + } + else if (result != buildType) { + // Add a cast expression on top of the build expression + args.add(keyExprsBuild[i]); + FunctionCall castCall = new FunctionCall("cast" + result.toString().toUpperCase(), args, ExpressionPosition.UNKNOWN); + keyExprsBuild[i] = ExpressionTreeMaterializer.materialize(castCall, incomingBuild, new ErrorCollectorImpl(), context.getFunctionRegistry()); + } else if (result != probeType) { + args.add(keyExprsProbe[i]); + FunctionCall castCall = new FunctionCall("cast" + result.toString().toUpperCase(), args, ExpressionPosition.UNKNOWN); + keyExprsProbe[i] = ExpressionTreeMaterializer.materialize(castCall, incomingProbe, new ErrorCollectorImpl(), context.getFunctionRegistry()); + } + } + } + } + private void setupGetHash(ClassGenerator<HashTable> cg, MappingSet incomingMapping, LogicalExpression[] keyExprs, boolean isProbe) throws SchemaChangeException { cg.setMappingSet(incomingMapping); http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/1c40b5e7/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java b/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java index 7969d49..609b3a9 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java @@ -24,7 +24,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import org.apache.drill.common.exceptions.DrillRuntimeException; import org.apache.drill.common.expression.FunctionCall; import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MajorType; @@ -806,7 +805,7 @@ public class TypeCastRules { result = next; resultPrec = nextPrec; } else { - throw new DrillRuntimeException("Case expression branches have different output types "); + return null; } } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/1c40b5e7/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java index 388d057..342bef1 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java +++ b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java @@ -475,4 +475,10 @@ public class TestExampleQueries extends BaseTestQuery{ test("select case when n_nationkey > 0 and n_nationkey < 2 then concat(n_name, '_abc') when n_nationkey >=2 and n_nationkey < 4 then '_EFG' else concat(n_name,'_XYZ') end from cp.`tpch/nation.parquet` ;"); } + @Test // tests join condition that has different input types + public void testJoinCondWithDifferentTypes() throws Exception { + test("select t1.department_description from cp.`department.json` t1, cp.`employee.json` t2 where (cast(t1.department_id as double)) = t2.department_id"); + test("select t1.full_name from cp.`employee.json` t1, cp.`department.json` t2 where cast(t1.department_id as double) = t2.department_id and cast(t1.position_id as bigint) = t2.department_id"); + test("select t1.full_name from cp.`employee.json` t1, cp.`department.json` t2 where t1.department_id = t2.department_id and t1.position_id = t2.department_id"); + } }
