DRILL-1101: For Union-All, allow strings of different lengths on either side of the union to be treated as compatible.
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/d22f325c Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/d22f325c Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/d22f325c Branch: refs/heads/master Commit: d22f325c395c22b3c5a5762e7aa1c379cfda7893 Parents: 9921547 Author: Aman Sinha <[email protected]> Authored: Mon Jul 7 23:17:11 2014 -0700 Committer: Jacques Nadeau <[email protected]> Committed: Tue Jul 8 17:05:33 2014 -0700 ---------------------------------------------------------------------- .../exec/planner/common/DrillRelOptUtil.java | 71 +++++++++++++++++++ .../exec/planner/common/DrillUnionRelBase.java | 19 ++++- .../exec/planner/physical/UnionAllPrule.java | 2 +- .../org/apache/drill/TestExampleQueries.java | 35 +--------- .../java/org/apache/drill/TestUnionAll.java | 73 ++++++++++++++++++++ 5 files changed, 164 insertions(+), 36 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/d22f325c/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java new file mode 100644 index 0000000..58dc34d --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.common; + +import java.util.List; + +import org.eigenbase.reltype.RelDataType; +import org.eigenbase.reltype.RelDataTypeField; +import org.eigenbase.sql.type.SqlTypeName; +import org.eigenbase.util.Pair; + +/** + * Utility class that is a subset of the RelOptUtil class and is a placeholder for Drill specific + * static methods that are needed during either logical or physical planning. + */ +public abstract class DrillRelOptUtil { + + // Similar to RelOptUtil.areRowTypesEqual() with the additional check for allowSubstring + public static boolean areRowTypesEqual( + RelDataType rowType1, + RelDataType rowType2, + boolean compareNames, + boolean allowSubstring) { + if (rowType1 == rowType2) { + return true; + } + if (compareNames) { + // if types are not identity-equal, then either the names or + // the types must be different + return false; + } + if (rowType2.getFieldCount() != rowType1.getFieldCount()) { + return false; + } + final List<RelDataTypeField> f1 = rowType1.getFieldList(); + final List<RelDataTypeField> f2 = rowType2.getFieldList(); + for (Pair<RelDataTypeField, RelDataTypeField> pair : Pair.zip(f1, f2)) { + final RelDataType type1 = pair.left.getType(); + final RelDataType type2 = pair.right.getType(); + // If one of the types is ANY comparison should succeed + if (type1.getSqlTypeName() == SqlTypeName.ANY + || type2.getSqlTypeName() == SqlTypeName.ANY) { + continue; + } + if (!type1.equals(type2)) { + if (allowSubstring + && (type1.getSqlTypeName() == SqlTypeName.CHAR && type2.getSqlTypeName() == SqlTypeName.CHAR) + && (type1.getPrecision() <= type2.getPrecision())) { + return true; + } + return false; + } + } + return true; + } +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/d22f325c/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillUnionRelBase.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillUnionRelBase.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillUnionRelBase.java index 6a828e2..fa0465f 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillUnionRelBase.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillUnionRelBase.java @@ -23,7 +23,12 @@ import org.eigenbase.rel.InvalidRelException; import org.eigenbase.rel.RelNode; import org.eigenbase.rel.UnionRelBase; import org.eigenbase.relopt.RelOptCluster; +import org.eigenbase.relopt.RelOptUtil; import org.eigenbase.relopt.RelTraitSet; +import org.eigenbase.reltype.RelDataType; +import org.eigenbase.reltype.RelDataTypeField; +import org.eigenbase.sql.type.SqlTypeName; +import org.eigenbase.util.Pair; /** * Base class for logical and physical Union implemented in Drill @@ -33,8 +38,20 @@ public abstract class DrillUnionRelBase extends UnionRelBase implements DrillRel public DrillUnionRelBase(RelOptCluster cluster, RelTraitSet traits, List<RelNode> inputs, boolean all) throws InvalidRelException { super(cluster, traits, inputs, all); - if (! this.isHomogeneous(false /* don't compare names */)) { + // if (! this.isHomogeneous(false /* don't compare names */)) { + if (! this.isCompatible(false /* don't compare names */, true /* allow substrings */)) { throw new InvalidRelException("Input row types of the Union are not compatible."); } } + + public boolean isCompatible(boolean compareNames, boolean allowSubstring) { + RelDataType unionType = getRowType(); + for (RelNode input : getInputs()) { + if (! DrillRelOptUtil.areRowTypesEqual( + input.getRowType(), unionType, compareNames, allowSubstring)) { + return false; + } + } + return true; + } } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/d22f325c/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/UnionAllPrule.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/UnionAllPrule.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/UnionAllPrule.java index 1ce73a4..bcddce6 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/UnionAllPrule.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/UnionAllPrule.java @@ -43,7 +43,7 @@ public class UnionAllPrule extends Prule { @Override public boolean matches(RelOptRuleCall call) { DrillUnionRel union = (DrillUnionRel) call.rel(0); - return ((! union.isDistinct()) && union.isHomogeneous(false /* don't compare names */)); + return (! union.isDistinct()); } @Override http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/d22f325c/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java index 57cf072..7943756 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java +++ b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java @@ -38,8 +38,7 @@ public class TestExampleQueries extends BaseTestQuery{ test("select count(*) from cp.`customer.json` limit 1"); test("select count(*) from cp.`customer.json` limit 1"); } - - + @Test public void testCaseReturnValueVarChar() throws Exception{ test("select case when employee_id < 1000 then 'ABC' else 'DEF' end from cp.`employee.json` limit 5"); @@ -159,38 +158,6 @@ public class TestExampleQueries extends BaseTestQuery{ test("select count(*) as mycnt, count(*) + 2 * count(*) as mycnt2 from cp.`tpch/nation.parquet` where 1 < 2"); } - - @Test // Simple Union-All over two scans - public void testUnionAll1() throws Exception { - test("select n_regionkey from cp.`tpch/nation.parquet` union all select r_regionkey from cp.`tpch/region.parquet`"); - } - - @Test // Union-All over inner joins - public void testUnionAll2() throws Exception { - test("select n1.n_nationkey from cp.`tpch/nation.parquet` n1 inner join cp.`tpch/region.parquet` r1 on n1.n_regionkey = r1.r_regionkey where n1.n_nationkey in (1, 2) union all select n2.n_nationkey from cp.`tpch/nation.parquet` n2 inner join cp.`tpch/region.parquet` r2 on n2.n_regionkey = r2.r_regionkey where n2.n_nationkey in (3, 4)"); - } - - @Test // Union-All over grouped aggregates - public void testUnionAll3() throws Exception { - test("select n1.n_nationkey from cp.`tpch/nation.parquet` n1 where n1.n_nationkey in (1, 2) group by n1.n_nationkey union all select r1.r_regionkey from cp.`tpch/region.parquet` r1 group by r1.r_regionkey"); - } - - @Test // Chain of Union-Alls - public void testUnionAll4() throws Exception { - test("select n_regionkey from cp.`tpch/nation.parquet` union all select r_regionkey from cp.`tpch/region.parquet` union all select n_nationkey from cp.`tpch/nation.parquet` union all select c_custkey from cp.`tpch/customer.parquet` where c_custkey < 5"); - } - - @Test // Union-All of all columns in the table - public void testUnionAll5() throws Exception { - test("select * from cp.`tpch/region.parquet` r1 union all select * from cp.`tpch/region.parquet` r2"); - } - - @Test - @Ignore // Produces wrong result - public void testUnionAll6() throws Exception { - test("select n_nationkey, n_regionkey from cp.`tpch/nation.parquet` where n_regionkey = 1 union all select r_regionkey, r_regionkey from cp.`tpch/region.parquet` where r_regionkey = 2"); - } - @Test // cast non-exist column from json file. Should return null value. public void testDrill428() throws Exception { http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/d22f325c/exec/java-exec/src/test/java/org/apache/drill/TestUnionAll.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestUnionAll.java b/exec/java-exec/src/test/java/org/apache/drill/TestUnionAll.java new file mode 100644 index 0000000..4cb6c3e --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/TestUnionAll.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill; + +import org.junit.Ignore; +import org.junit.Test; + +public class TestUnionAll extends BaseTestQuery{ + static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestUnionAll.class); + + @Test + public void testQuery1() throws Exception { + test("select c_name from cp.`tpch/customer.parquet` union all select c_acctbal from cp.`tpch/customer.parquet`"); + } + + @Test // Simple Union-All over two scans + public void testUnionAll1() throws Exception { + test("select n_regionkey from cp.`tpch/nation.parquet` union all select r_regionkey from cp.`tpch/region.parquet`"); + } + + @Test // Union-All over inner joins + public void testUnionAll2() throws Exception { + test("select n1.n_nationkey from cp.`tpch/nation.parquet` n1 inner join cp.`tpch/region.parquet` r1 on n1.n_regionkey = r1.r_regionkey where n1.n_nationkey in (1, 2) union all select n2.n_nationkey from cp.`tpch/nation.parquet` n2 inner join cp.`tpch/region.parquet` r2 on n2.n_regionkey = r2.r_regionkey where n2.n_nationkey in (3, 4)"); + } + + @Test // Union-All over grouped aggregates + public void testUnionAll3() throws Exception { + test("select n1.n_nationkey from cp.`tpch/nation.parquet` n1 where n1.n_nationkey in (1, 2) group by n1.n_nationkey union all select r1.r_regionkey from cp.`tpch/region.parquet` r1 group by r1.r_regionkey"); + } + + @Test // Chain of Union-Alls + public void testUnionAll4() throws Exception { + test("select n_regionkey from cp.`tpch/nation.parquet` union all select r_regionkey from cp.`tpch/region.parquet` union all select n_nationkey from cp.`tpch/nation.parquet` union all select c_custkey from cp.`tpch/customer.parquet` where c_custkey < 5"); + } + + @Test // Union-All of all columns in the table + public void testUnionAll5() throws Exception { + test("select * from cp.`tpch/region.parquet` r1 union all select * from cp.`tpch/region.parquet` r2"); + } + + @Test + @Ignore // Produces wrong result + public void testUnionAll6() throws Exception { + test("select n_nationkey, n_regionkey from cp.`tpch/nation.parquet` where n_regionkey = 1 union all select r_regionkey, r_regionkey from cp.`tpch/region.parquet` where r_regionkey = 2"); + } + + @Test // Union-all of two string literals of different lengths + public void testUnionAll7() throws Exception { + test("select 'abc' from cp.`tpch/region.parquet` union all select 'abcdefgh' from cp.`tpch/region.parquet`"); + } + + @Test // Union-all of two character columns of different lengths + public void testUnionAll8() throws Exception { + test("select n_name from cp.`tpch/nation.parquet` union all select r_comment from cp.`tpch/region.parquet`"); + } + + +}
