HIVE-17568 : HiveJoinPushTransitivePredicatesRule may exchange predicates which are not valid on the other branch (Zoltan Haindrich via Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/aa7edfef Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/aa7edfef Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/aa7edfef Branch: refs/heads/hive-14535 Commit: aa7edfefe20cede2c37d8c8b6c864c3b6039923f Parents: 6a99cf5 Author: Ashutosh Chauhan <[email protected]> Authored: Mon Sep 25 22:10:58 2017 -0700 Committer: Ashutosh Chauhan <[email protected]> Committed: Mon Sep 25 22:10:58 2017 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../HiveJoinPushTransitivePredicatesRule.java | 21 ++- .../queries/clientpositive/optimize_join_ptp.q | 16 +++ .../clientpositive/llap/optimize_join_ptp.q.out | 130 +++++++++++++++++++ 4 files changed, 167 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/aa7edfef/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 8f52321..32a6418 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -223,6 +223,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ orc_ppd_schema_evol_1b.q,\ orc_ppd_schema_evol_2a.q,\ orc_ppd_schema_evol_2b.q,\ + optimize_join_ptp.q,\ parallel.q,\ partition_pruning.q,\ ptf.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/aa7edfef/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java index 9cdc5e9..1f186d5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java @@ -28,7 +28,6 @@ import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.RelFactories.FilterFactory; -import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; @@ -37,6 +36,7 @@ import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.Util; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; @@ -181,6 +181,25 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule { } return super.visitCall(call); } + + @Override + public Void visitInputRef(RexInputRef inputRef) { + if (!areTypesCompatible(inputRef.getType(), types.get(inputRef.getIndex()).getType())) { + throw new Util.FoundOne(inputRef); + } + return super.visitInputRef(inputRef); + } + + private boolean areTypesCompatible(RelDataType type1, RelDataType type2) { + if (type1.equals(type2)) { + return true; + } + SqlTypeName sqlType1 = type1.getSqlTypeName(); + if (sqlType1 != null) { + return sqlType1.equals(type2.getSqlTypeName()); + } + return false; + } } } http://git-wip-us.apache.org/repos/asf/hive/blob/aa7edfef/ql/src/test/queries/clientpositive/optimize_join_ptp.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/optimize_join_ptp.q b/ql/src/test/queries/clientpositive/optimize_join_ptp.q new file mode 100644 index 0000000..5807ec3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/optimize_join_ptp.q @@ -0,0 +1,16 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; + +create table t1 (v string, k int); +insert into t1 values ('people', 10), ('strangers', 20), ('parents', 30); + +create table t2 (v string, k double); +insert into t2 values ('people', 10), ('strangers', 20), ('parents', 30); + +-- should not produce exceptions +explain +select * from t1 where t1.k in (select t2.k from t2 where t2.v='people') and t1.k<15; + +select * from t1 where t1.k in (select t2.k from t2 where t2.v='people') and t1.k<15; + + http://git-wip-us.apache.org/repos/asf/hive/blob/aa7edfef/ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out b/ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out new file mode 100644 index 0000000..5dbdeb4 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out @@ -0,0 +1,130 @@ +PREHOOK: query: create table t1 (v string, k int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (v string, k int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: insert into t1 values ('people', 10), ('strangers', 20), ('parents', 30) +PREHOOK: type: QUERY +PREHOOK: Output: default@t1 +POSTHOOK: query: insert into t1 values ('people', 10), ('strangers', 20), ('parents', 30) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.k EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t1.v SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table t2 (v string, k double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2 (v string, k double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: insert into t2 values ('people', 10), ('strangers', 20), ('parents', 30) +PREHOOK: type: QUERY +PREHOOK: Output: default@t2 +POSTHOOK: query: insert into t2 values ('people', 10), ('strangers', 20), ('parents', 30) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.k EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t2.v SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: explain +select * from t1 where t1.k in (select t2.k from t2 where t2.v='people') and t1.k<15 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from t1 where t1.k in (select t2.k from t2 where t2.v='people') and t1.k<15 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (k < 15) (type: boolean) + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: v (type: string), k (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col1) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col1) (type: double) + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 3 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((v = 'people') and k is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: k (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 3 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 UDFToDouble(_col1) (type: double) + 1 _col0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t1 where t1.k in (select t2.k from t2 where t2.v='people') and t1.k<15 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 where t1.k in (select t2.k from t2 where t2.v='people') and t1.k<15 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +people 10
