Repository: hive Updated Branches: refs/heads/master fdd103dde -> ec0636c06
HIVE-19727: Fix Signature matching of table aliases (Zoltan Haindrich reviewed by Ashutosh Chauhan) Signed-off-by: Zoltan Haindrich <k...@rxd.hu> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4750e411 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4750e411 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4750e411 Branch: refs/heads/master Commit: 4750e411b23ba297fb8fe0f752c5438c079b393e Parents: fdd103d Author: Zoltan Haindrich <k...@rxd.hu> Authored: Thu Jun 7 09:59:59 2018 +0200 Committer: Zoltan Haindrich <k...@rxd.hu> Committed: Thu Jun 7 09:59:59 2018 +0200 ---------------------------------------------------------------------- .../hadoop/hive/ql/plan/TableScanDesc.java | 24 ++-- .../signature/TestOperatorSignature.java | 8 +- .../spark/dynamic_rdd_cache.q.out | 57 +------- .../results/clientpositive/spark/join22.q.out | 20 +-- .../spark/spark_explainuser_1.q.out | 44 ++----- .../clientpositive/spark/subquery_select.q.out | 21 +-- .../results/clientpositive/spark/union10.q.out | 38 +----- .../results/clientpositive/spark/union11.q.out | 44 +------ .../results/clientpositive/spark/union15.q.out | 48 ++----- .../results/clientpositive/spark/union2.q.out | 21 +-- .../results/clientpositive/spark/union20.q.out | 19 +-- .../results/clientpositive/spark/union25.q.out | 22 +--- .../results/clientpositive/spark/union4.q.out | 19 +-- .../results/clientpositive/spark/union5.q.out | 21 +-- .../results/clientpositive/spark/union9.q.out | 40 +----- .../clientpositive/spark/union_ppr.q.out | 132 +------------------ 16 files changed, 52 insertions(+), 526 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index 57df7e2..192c04c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -52,10 +52,10 @@ public class TableScanDesc extends AbstractOperatorDesc implements IStatsGatherD private List<VirtualColumn> virtualCols; private String statsAggKeyPrefix; // stats publishing/aggregating key prefix - /** - * A list of the partition columns of the table. - * Set by the semantic analyzer only in case of the analyze command. - */ + /** + * A list of the partition columns of the table. + * Set by the semantic analyzer only in case of the analyze command. + */ private List<String> partColumns; /** @@ -92,13 +92,13 @@ public class TableScanDesc extends AbstractOperatorDesc implements IStatsGatherD private transient List<String> referencedColumns; public static final String FILTER_EXPR_CONF_STR = - "hive.io.filter.expr.serialized"; + "hive.io.filter.expr.serialized"; public static final String FILTER_TEXT_CONF_STR = - "hive.io.filter.text"; + "hive.io.filter.text"; public static final String FILTER_OBJECT_CONF_STR = - "hive.io.filter.object"; + "hive.io.filter.object"; // input file name (big) to bucket number private Map<String, Integer> bucketFileNameMapping; @@ -157,8 +157,6 @@ public class TableScanDesc extends AbstractOperatorDesc implements IStatsGatherD } @Explain(displayName = "alias") - // FIXME: this might not needed to be in the signature; but in that case the compare shouldn't consider it either! - @Signature public String getAlias() { return alias; } @@ -386,9 +384,9 @@ public class TableScanDesc extends AbstractOperatorDesc implements IStatsGatherD return isMetadataOnly; } - // @Signature + @Signature public String getQualifiedTable() { - return tableMetadata.getFullyQualifiedName(); + return dbName + "." + tableName; } public Table getTableMetadata() { @@ -540,7 +538,7 @@ public class TableScanDesc extends AbstractOperatorDesc implements IStatsGatherD public boolean isSame(OperatorDesc other) { if (getClass().getName().equals(other.getClass().getName())) { TableScanDesc otherDesc = (TableScanDesc) other; - return Objects.equals(getAlias(), otherDesc.getAlias()) && + return Objects.equals(getQualifiedTable(), otherDesc.getQualifiedTable()) && ExprNodeDescUtils.isSame(getFilterExpr(), otherDesc.getFilterExpr()) && getRowLimit() == otherDesc.getRowLimit() && isGatherStats() == otherDesc.isGatherStats(); @@ -549,6 +547,6 @@ public class TableScanDesc extends AbstractOperatorDesc implements IStatsGatherD } public boolean isFullAcidTable() { - return isTranscationalTable() && !getAcidOperationalProperties().isInsertOnly(); + return isTranscationalTable() && !getAcidOperationalProperties().isInsertOnly(); } } http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/test/org/apache/hadoop/hive/ql/optimizer/signature/TestOperatorSignature.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/signature/TestOperatorSignature.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/signature/TestOperatorSignature.java index b09aafb..9561135 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/signature/TestOperatorSignature.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/signature/TestOperatorSignature.java @@ -93,7 +93,6 @@ public class TestOperatorSignature { Operator<TableScanDesc> t1a = getTsOp(3); Operator<TableScanDesc> t2 = getTsOp(4); - assertTrue(t1.logicalEquals(t1a)); checkEquals(t1, t1a); checkNotEquals(t1, t2); } @@ -149,11 +148,10 @@ public class TestOperatorSignature { private Operator<TableScanDesc> getTsOp(int i) { Table tblMetadata = new Table("db", "table"); - // FIXME: I think this shouldn't be sensitive to the alias; but currently its included in logicalEquals...check that - TableScanDesc desc = new TableScanDesc("alias"/*+ cCtx.nextOperatorId()*/, tblMetadata); - List<ExprNodeDesc> as = + TableScanDesc desc = new TableScanDesc("alias_" + cCtx.nextOperatorId(), tblMetadata); + List<ExprNodeDesc> as = Lists.newArrayList(new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(i)), - new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "c1", "aa", false)); + new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "c1", "aa", false)); ExprNodeGenericFuncDesc f1 = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, udf, as); desc.setFilterExpr(f1); Operator<TableScanDesc> ts = OperatorFactory.get(cCtx, desc); http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out b/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out index 176cd7c..0c7ac4f 100644 --- a/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out +++ b/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out @@ -328,8 +328,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 4 <- Map 1 (GROUP, 1) + Reducer 6 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -349,40 +349,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map 3 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: s3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: @@ -514,7 +480,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP, 1) Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 31), Map 7 (PARTITION-LEVEL SORT, 31), Reducer 2 (PARTITION-LEVEL SORT, 31), Reducer 6 (PARTITION-LEVEL SORT, 31) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 6 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -553,23 +519,6 @@ STAGE PLANS: Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: s3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized Map 7 Map Operator Tree: TableScan http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/test/results/clientpositive/spark/join22.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/join22.q.out b/ql/src/test/results/clientpositive/spark/join22.q.out index f1732ab..75aeffb 100644 --- a/ql/src/test/results/clientpositive/spark/join22.q.out +++ b/ql/src/test/results/clientpositive/spark/join22.q.out @@ -13,7 +13,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -53,24 +53,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: src4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out index f8e28e6..103491d 100644 --- a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out @@ -741,8 +741,8 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (GROUP) Reducer 3 <- Reducer 2 (SORT), Reducer 5 (SORT), Reducer 7 (SORT) -Reducer 5 <- Map 4 (GROUP) -Reducer 7 <- Map 6 (GROUP) +Reducer 5 <- Map 1 (GROUP) +Reducer 7 <- Map 1 (GROUP) Stage-0 Fetch Operator @@ -772,28 +772,14 @@ Stage-0 Output:["_col0"] Group By Operator [GBY_11] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 4 [GROUP] - GROUP [RS_10] - Group By Operator [GBY_9] (rows=1 width=8) - Output:["_col0"],aggregations:["count(key)"] - Select Operator [SEL_8] (rows=20 width=80) - Output:["key"] - TableScan [TS_7] (rows=20 width=80) - default@cbo_t3,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <- Please refer to the previous Map 1 [GROUP] <-Reducer 7 [SORT] SORT [RS_23] Select Operator [SEL_20] (rows=1 width=87) Output:["_col0"] Group By Operator [GBY_19] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 6 [GROUP] - GROUP [RS_18] - Group By Operator [GBY_17] (rows=1 width=8) - Output:["_col0"],aggregations:["count(key)"] - Select Operator [SEL_16] (rows=20 width=80) - Output:["key"] - TableScan [TS_15] (rows=20 width=80) - default@cbo_t3,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <- Please refer to the previous Map 1 [GROUP] PREHOOK: query: explain select unionsrc.key, count(1) FROM (select 'max' as key, max(c_int) as value from cbo_t3 s1 UNION ALL @@ -813,8 +799,8 @@ Vertex dependency in root stage Reducer 2 <- Map 1 (GROUP) Reducer 3 <- Reducer 2 (GROUP), Reducer 6 (GROUP), Reducer 8 (GROUP) Reducer 4 <- Reducer 3 (SORT) -Reducer 6 <- Map 5 (GROUP) -Reducer 8 <- Map 7 (GROUP) +Reducer 6 <- Map 1 (GROUP) +Reducer 8 <- Map 1 (GROUP) Stage-0 Fetch Operator @@ -854,14 +840,7 @@ Stage-0 Output:["_col0"] Group By Operator [GBY_11] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 5 [GROUP] - GROUP [RS_10] - Group By Operator [GBY_9] (rows=1 width=8) - Output:["_col0"],aggregations:["count(key)"] - Select Operator [SEL_8] (rows=20 width=80) - Output:["key"] - TableScan [TS_7] (rows=20 width=80) - default@cbo_t3,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <- Please refer to the previous Map 1 [GROUP] <-Reducer 8 [GROUP] GROUP [RS_25] PartitionCols:_col0 @@ -871,14 +850,7 @@ Stage-0 Output:["_col0"] Group By Operator [GBY_19] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 7 [GROUP] - GROUP [RS_18] - Group By Operator [GBY_17] (rows=1 width=8) - Output:["_col0"],aggregations:["count(key)"] - Select Operator [SEL_16] (rows=20 width=80) - Output:["key"] - TableScan [TS_15] (rows=20 width=80) - default@cbo_t3,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <- Please refer to the previous Map 1 [GROUP] PREHOOK: query: explain select cbo_t1.key from cbo_t1 join cbo_t3 where cbo_t1.key=cbo_t3.key and cbo_t1.key >= 1 PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/test/results/clientpositive/spark/subquery_select.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/subquery_select.q.out b/ql/src/test/results/clientpositive/spark/subquery_select.q.out index 615f508..bee3107 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_select.q.out @@ -5032,7 +5032,7 @@ STAGE PLANS: Reducer 16 <- Map 15 (GROUP, 2) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 13 (GROUP PARTITION-LEVEL SORT, 2) Reducer 7 <- Map 6 (GROUP, 2) Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### @@ -5111,25 +5111,6 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized - Map 4 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Map 6 Map Operator Tree: TableScan http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/test/results/clientpositive/spark/union10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/union10.q.out b/ql/src/test/results/clientpositive/spark/union10.q.out index 7215fc2..672dc89 100644 --- a/ql/src/test/results/clientpositive/spark/union10.q.out +++ b/ql/src/test/results/clientpositive/spark/union10.q.out @@ -32,8 +32,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 4 <- Map 1 (GROUP, 1) + Reducer 6 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -53,40 +53,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map 3 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: s3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/test/results/clientpositive/spark/union11.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/union11.q.out b/ql/src/test/results/clientpositive/spark/union11.q.out index 83b343a..b22d41a 100644 --- a/ql/src/test/results/clientpositive/spark/union11.q.out +++ b/ql/src/test/results/clientpositive/spark/union11.q.out @@ -22,8 +22,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP, 1) Reducer 3 <- Reducer 2 (GROUP, 2), Reducer 5 (GROUP, 2), Reducer 7 (GROUP, 2) - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP, 1) + Reducer 7 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -45,44 +45,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map 4 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(key) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Map 6 - Map Operator Tree: - TableScan - alias: s3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(key) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: @@ -194,6 +156,6 @@ POSTHOOK: query: select unionsrc.key, count(1) FROM (select 'tst1' as key, count POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -tst3 1 tst1 1 tst2 1 +tst3 1 http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/test/results/clientpositive/spark/union15.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/union15.q.out b/ql/src/test/results/clientpositive/spark/union15.q.out index 28a6227..7bb1b14 100644 --- a/ql/src/test/results/clientpositive/spark/union15.q.out +++ b/ql/src/test/results/clientpositive/spark/union15.q.out @@ -21,7 +21,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 4 (GROUP, 2), Map 5 (GROUP, 2), Reducer 2 (GROUP, 2) + Reducer 3 <- Map 4 (GROUP, 2), Map 4 (GROUP, 2), Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -65,28 +65,6 @@ STAGE PLANS: Statistics: Num rows: 51 Data size: 390 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: s3 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 51 Data size: 390 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 51 Data size: 390 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: @@ -152,20 +130,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -278 2 -273 2 -128 2 -255 2 +401 2 tst1 1 -146 2 -369 2 +66 2 +150 2 +128 2 213 2 +255 2 + 20 311 2 -224 2 238 2 -150 2 - 20 -406 2 -66 2 -401 2 +146 2 98 2 +224 2 +273 2 +369 2 +406 2 +278 2 http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/test/results/clientpositive/spark/union2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/union2.q.out b/ql/src/test/results/clientpositive/spark/union2.q.out index 0f1a49c..d867669 100644 --- a/ql/src/test/results/clientpositive/spark/union2.q.out +++ b/ql/src/test/results/clientpositive/spark/union2.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1), Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -36,25 +36,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map 3 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/test/results/clientpositive/spark/union20.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/union20.q.out b/ql/src/test/results/clientpositive/spark/union20.q.out index cf72b6e..9fb2d43 100644 --- a/ql/src/test/results/clientpositive/spark/union20.q.out +++ b/ql/src/test/results/clientpositive/spark/union20.q.out @@ -30,7 +30,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP, 1) Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 6 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -69,23 +69,6 @@ STAGE PLANS: Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: s3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized Map 7 Map Operator Tree: TableScan http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/test/results/clientpositive/spark/union25.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/union25.q.out b/ql/src/test/results/clientpositive/spark/union25.q.out index d63f0e8..d473d1a 100644 --- a/ql/src/test/results/clientpositive/spark/union25.q.out +++ b/ql/src/test/results/clientpositive/spark/union25.q.out @@ -67,7 +67,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2), Reducer 4 (GROUP, 2) - Reducer 4 <- Map 3 (GROUP, 2), Map 5 (GROUP, 2) + Reducer 4 <- Map 3 (GROUP, 2), Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -112,26 +112,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/test/results/clientpositive/spark/union4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/union4.q.out b/ql/src/test/results/clientpositive/spark/union4.q.out index e9b4a3c..019e460 100644 --- a/ql/src/test/results/clientpositive/spark/union4.q.out +++ b/ql/src/test/results/clientpositive/spark/union4.q.out @@ -28,7 +28,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 4 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -48,23 +48,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map 3 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/test/results/clientpositive/spark/union5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/union5.q.out b/ql/src/test/results/clientpositive/spark/union5.q.out index b97dd38..0a0e3c7 100644 --- a/ql/src/test/results/clientpositive/spark/union5.q.out +++ b/ql/src/test/results/clientpositive/spark/union5.q.out @@ -18,7 +18,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP, 1) Reducer 3 <- Reducer 2 (GROUP, 2), Reducer 5 (GROUP, 2) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -40,25 +40,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map 4 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(key) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/test/results/clientpositive/spark/union9.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/union9.q.out b/ql/src/test/results/clientpositive/spark/union9.q.out index e38e4cd..16186c5 100644 --- a/ql/src/test/results/clientpositive/spark/union9.q.out +++ b/ql/src/test/results/clientpositive/spark/union9.q.out @@ -16,7 +16,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1), Map 3 (GROUP, 1), Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -38,44 +38,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map 3 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1500 Data size: 12000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Map 4 - Map Operator Tree: - TableScan - alias: s3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1500 Data size: 12000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/4750e411/ql/src/test/results/clientpositive/spark/union_ppr.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/union_ppr.q.out b/ql/src/test/results/clientpositive/spark/union_ppr.q.out index 9c9ec6a..46c8246 100644 --- a/ql/src/test/results/clientpositive/spark/union_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/union_ppr.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -157,136 +157,6 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [a:_u1-subquery1:x] /srcpart/ds=2008-04-08/hr=12 [a:_u1-subquery1:x] - Map 3 - Map Operator Tree: - TableScan - alias: y - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (key < 100) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 666 Data size: 7074 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col3 (type: string) - null sort order: aaaa - sort order: ++++ - Statistics: Num rows: 666 Data size: 7074 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false - Execution mode: vectorized - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [a:_u1-subquery2:y] - /srcpart/ds=2008-04-08/hr=12 [a:_u1-subquery2:y] Reducer 2 Execution mode: vectorized Needs Tagging: false