DRILL-1101: For Union-All, allow strings of different lengths on either side of 
the union to be treated as compatible.


Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/d22f325c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/d22f325c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/d22f325c

Branch: refs/heads/master
Commit: d22f325c395c22b3c5a5762e7aa1c379cfda7893
Parents: 9921547
Author: Aman Sinha <[email protected]>
Authored: Mon Jul 7 23:17:11 2014 -0700
Committer: Jacques Nadeau <[email protected]>
Committed: Tue Jul 8 17:05:33 2014 -0700

----------------------------------------------------------------------
 .../exec/planner/common/DrillRelOptUtil.java    | 71 +++++++++++++++++++
 .../exec/planner/common/DrillUnionRelBase.java  | 19 ++++-
 .../exec/planner/physical/UnionAllPrule.java    |  2 +-
 .../org/apache/drill/TestExampleQueries.java    | 35 +---------
 .../java/org/apache/drill/TestUnionAll.java     | 73 ++++++++++++++++++++
 5 files changed, 164 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/d22f325c/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
new file mode 100644
index 0000000..58dc34d
--- /dev/null
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.common;
+
+import java.util.List;
+
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.sql.type.SqlTypeName;
+import org.eigenbase.util.Pair;
+
+/**
+ * Utility class that is a subset of the RelOptUtil class and is a placeholder 
for Drill specific 
+ * static methods that are needed during either logical or physical planning.
+ */
+public abstract class DrillRelOptUtil {
+
+  // Similar to RelOptUtil.areRowTypesEqual() with the additional check for 
allowSubstring
+  public static boolean areRowTypesEqual(
+      RelDataType rowType1,
+      RelDataType rowType2,
+      boolean compareNames, 
+      boolean allowSubstring) {
+    if (rowType1 == rowType2) {
+      return true;
+    }
+    if (compareNames) {
+      // if types are not identity-equal, then either the names or
+      // the types must be different
+      return false;
+    }
+    if (rowType2.getFieldCount() != rowType1.getFieldCount()) {
+      return false;
+    }
+    final List<RelDataTypeField> f1 = rowType1.getFieldList();
+    final List<RelDataTypeField> f2 = rowType2.getFieldList();
+    for (Pair<RelDataTypeField, RelDataTypeField> pair : Pair.zip(f1, f2)) {
+      final RelDataType type1 = pair.left.getType();
+      final RelDataType type2 = pair.right.getType();
+      // If one of the types is ANY comparison should succeed
+      if (type1.getSqlTypeName() == SqlTypeName.ANY
+        || type2.getSqlTypeName() == SqlTypeName.ANY) {
+        continue;
+      }
+      if (!type1.equals(type2)) {
+        if (allowSubstring 
+            && (type1.getSqlTypeName() == SqlTypeName.CHAR && 
type2.getSqlTypeName() == SqlTypeName.CHAR) 
+            && (type1.getPrecision() <= type2.getPrecision())) {
+          return true;
+        }           
+        return false;
+      }
+    }
+    return true;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/d22f325c/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillUnionRelBase.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillUnionRelBase.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillUnionRelBase.java
index 6a828e2..fa0465f 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillUnionRelBase.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillUnionRelBase.java
@@ -23,7 +23,12 @@ import org.eigenbase.rel.InvalidRelException;
 import org.eigenbase.rel.RelNode;
 import org.eigenbase.rel.UnionRelBase;
 import org.eigenbase.relopt.RelOptCluster;
+import org.eigenbase.relopt.RelOptUtil;
 import org.eigenbase.relopt.RelTraitSet;
+import org.eigenbase.reltype.RelDataType;
+import org.eigenbase.reltype.RelDataTypeField;
+import org.eigenbase.sql.type.SqlTypeName;
+import org.eigenbase.util.Pair;
 
 /**
  * Base class for logical and physical Union implemented in Drill
@@ -33,8 +38,20 @@ public abstract class DrillUnionRelBase extends UnionRelBase 
implements DrillRel
   public DrillUnionRelBase(RelOptCluster cluster, RelTraitSet traits,
       List<RelNode> inputs, boolean all) throws InvalidRelException {
     super(cluster, traits, inputs, all);
-    if (! this.isHomogeneous(false /* don't compare names */)) {
+    // if (! this.isHomogeneous(false /* don't compare names */)) {
+    if (! this.isCompatible(false /* don't compare names */, true /* allow 
substrings */)) {
       throw new InvalidRelException("Input row types of the Union are not 
compatible.");
     }
   }
+  
+  public boolean isCompatible(boolean compareNames, boolean allowSubstring) {
+    RelDataType unionType = getRowType();
+    for (RelNode input : getInputs()) {
+      if (! DrillRelOptUtil.areRowTypesEqual(
+          input.getRowType(), unionType, compareNames, allowSubstring)) {
+        return false;
+      }
+    }
+    return true;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/d22f325c/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/UnionAllPrule.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/UnionAllPrule.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/UnionAllPrule.java
index 1ce73a4..bcddce6 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/UnionAllPrule.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/UnionAllPrule.java
@@ -43,7 +43,7 @@ public class UnionAllPrule extends Prule {
   @Override
   public boolean matches(RelOptRuleCall call) {
     DrillUnionRel union = (DrillUnionRel) call.rel(0);
-    return ((! union.isDistinct()) && union.isHomogeneous(false /* don't 
compare names */)); 
+    return (! union.isDistinct()); 
   }
   
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/d22f325c/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java 
b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
index 57cf072..7943756 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
@@ -38,8 +38,7 @@ public class TestExampleQueries extends BaseTestQuery{
     test("select count(*) from cp.`customer.json` limit 1");
     test("select count(*) from cp.`customer.json` limit 1");
   }
-  
-
+ 
   @Test
   public void testCaseReturnValueVarChar() throws Exception{
     test("select case when employee_id < 1000 then 'ABC' else 'DEF' end from 
cp.`employee.json` limit 5");
@@ -159,38 +158,6 @@ public class TestExampleQueries extends BaseTestQuery{
     test("select count(*) as mycnt, count(*) + 2 * count(*) as mycnt2 from 
cp.`tpch/nation.parquet` where 1 < 2");
   }
 
-
-  @Test    // Simple Union-All over two scans
-  public void testUnionAll1() throws Exception {
-    test("select n_regionkey from cp.`tpch/nation.parquet` union all select 
r_regionkey from cp.`tpch/region.parquet`");  
-  }
-
-  @Test  // Union-All over inner joins
-  public void testUnionAll2() throws Exception {
-    test("select n1.n_nationkey from cp.`tpch/nation.parquet` n1 inner join 
cp.`tpch/region.parquet` r1 on n1.n_regionkey = r1.r_regionkey where 
n1.n_nationkey in (1, 2)  union all select n2.n_nationkey from 
cp.`tpch/nation.parquet` n2 inner join cp.`tpch/region.parquet` r2 on 
n2.n_regionkey = r2.r_regionkey where n2.n_nationkey in (3, 4)");
-  }
-  
-  @Test  // Union-All over grouped aggregates
-  public void testUnionAll3() throws Exception {
-    test("select n1.n_nationkey from cp.`tpch/nation.parquet` n1 where 
n1.n_nationkey in (1, 2) group by n1.n_nationkey union all select 
r1.r_regionkey from cp.`tpch/region.parquet` r1 group by r1.r_regionkey");
-  }
-  
-  @Test    // Chain of Union-Alls
-  public void testUnionAll4() throws Exception {
-    test("select n_regionkey from cp.`tpch/nation.parquet` union all select 
r_regionkey from cp.`tpch/region.parquet` union all select n_nationkey from 
cp.`tpch/nation.parquet` union all select c_custkey from 
cp.`tpch/customer.parquet` where c_custkey < 5");  
-  }
-  
-  @Test  // Union-All of all columns in the table
-  public void testUnionAll5() throws Exception {
-    test("select * from cp.`tpch/region.parquet` r1 union all select * from 
cp.`tpch/region.parquet` r2");
-  }
-  
-  @Test
-  @Ignore // Produces wrong result
-  public void testUnionAll6() throws Exception {
-    test("select n_nationkey, n_regionkey from cp.`tpch/nation.parquet` where 
n_regionkey = 1 union all select r_regionkey, r_regionkey from 
cp.`tpch/region.parquet` where r_regionkey = 2");
-  }  
-
   @Test
   // cast non-exist column from json file. Should return null value. 
   public void testDrill428() throws Exception {

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/d22f325c/exec/java-exec/src/test/java/org/apache/drill/TestUnionAll.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestUnionAll.java 
b/exec/java-exec/src/test/java/org/apache/drill/TestUnionAll.java
new file mode 100644
index 0000000..4cb6c3e
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestUnionAll.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill;
+
+import org.junit.Ignore;
+import org.junit.Test;
+
+public class TestUnionAll extends BaseTestQuery{
+  static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(TestUnionAll.class);
+  
+  @Test
+  public void testQuery1() throws Exception {
+    test("select c_name from cp.`tpch/customer.parquet` union all select 
c_acctbal from cp.`tpch/customer.parquet`"); 
+  }
+
+  @Test    // Simple Union-All over two scans
+  public void testUnionAll1() throws Exception {
+    test("select n_regionkey from cp.`tpch/nation.parquet` union all select 
r_regionkey from cp.`tpch/region.parquet`");  
+  }
+
+  @Test  // Union-All over inner joins
+  public void testUnionAll2() throws Exception {
+    test("select n1.n_nationkey from cp.`tpch/nation.parquet` n1 inner join 
cp.`tpch/region.parquet` r1 on n1.n_regionkey = r1.r_regionkey where 
n1.n_nationkey in (1, 2)  union all select n2.n_nationkey from 
cp.`tpch/nation.parquet` n2 inner join cp.`tpch/region.parquet` r2 on 
n2.n_regionkey = r2.r_regionkey where n2.n_nationkey in (3, 4)");
+  }
+  
+  @Test  // Union-All over grouped aggregates
+  public void testUnionAll3() throws Exception {
+    test("select n1.n_nationkey from cp.`tpch/nation.parquet` n1 where 
n1.n_nationkey in (1, 2) group by n1.n_nationkey union all select 
r1.r_regionkey from cp.`tpch/region.parquet` r1 group by r1.r_regionkey");
+  }
+  
+  @Test    // Chain of Union-Alls
+  public void testUnionAll4() throws Exception {
+    test("select n_regionkey from cp.`tpch/nation.parquet` union all select 
r_regionkey from cp.`tpch/region.parquet` union all select n_nationkey from 
cp.`tpch/nation.parquet` union all select c_custkey from 
cp.`tpch/customer.parquet` where c_custkey < 5");  
+  }
+  
+  @Test  // Union-All of all columns in the table
+  public void testUnionAll5() throws Exception {
+    test("select * from cp.`tpch/region.parquet` r1 union all select * from 
cp.`tpch/region.parquet` r2");
+  }
+  
+  @Test
+  @Ignore // Produces wrong result
+  public void testUnionAll6() throws Exception {
+    test("select n_nationkey, n_regionkey from cp.`tpch/nation.parquet` where 
n_regionkey = 1 union all select r_regionkey, r_regionkey from 
cp.`tpch/region.parquet` where r_regionkey = 2");
+  }  
+
+  @Test  // Union-all of two string literals of different lengths
+  public void testUnionAll7() throws Exception {
+    test("select 'abc' from cp.`tpch/region.parquet` union all select 
'abcdefgh' from cp.`tpch/region.parquet`");
+  }
+
+  @Test  // Union-all of two character columns of different lengths
+  public void testUnionAll8() throws Exception {
+    test("select n_name from cp.`tpch/nation.parquet` union all select 
r_comment from cp.`tpch/region.parquet`");  
+  }
+  
+  
+}

Reply via email to