HIVE-20009 : Fix runtime stats for merge statement (Zoltan Haindrich via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <hashut...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/78cbf147 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/78cbf147 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/78cbf147 Branch: refs/heads/master-txnstats Commit: 78cbf147873752e7955fff37416edba372e2b69a Parents: 8f57e25 Author: Zoltan Haindrich <k...@rxd.hu> Authored: Sat Jun 30 09:18:28 2018 -0700 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Sat Jun 30 09:18:28 2018 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../java/org/apache/hadoop/hive/ql/Context.java | 4 + .../ql/parse/UpdateDeleteSemanticAnalyzer.java | 10 +- .../clientpositive/runtime_stats_merge.q | 41 ++++ .../llap/runtime_stats_merge.q.out | 194 +++++++++++++++++++ 5 files changed, 246 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/78cbf147/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 2610bdd..8a64121 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -600,6 +600,7 @@ minillaplocal.query.files=\ partition_pruning.q,\ ptf.q,\ ptf_streaming.q,\ + runtime_stats_merge.q,\ quotedid_smb.q,\ resourceplan.q,\ results_cache_1.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/78cbf147/ql/src/java/org/apache/hadoop/hive/ql/Context.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index bb41e98..3004f9c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -1093,6 +1093,10 @@ public class Context { return executionId; } + public void setPlanMapper(PlanMapper planMapper) { + this.planMapper = planMapper; + } + public PlanMapper getPlanMapper() { return planMapper; } http://git-wip-us.apache.org/repos/asf/hive/blob/78cbf147/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java index 7925151..d9483f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java @@ -536,6 +536,8 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer { } rewrittenCtx.setExplainConfig(ctx.getExplainConfig()); rewrittenCtx.setExplainPlan(ctx.isExplainPlan()); + rewrittenCtx.setStatsSource(ctx.getStatsSource()); + rewrittenCtx.setPlanMapper(ctx.getPlanMapper()); rewrittenCtx.setIsUpdateDeleteMerge(true); rewrittenCtx.setCmd(rewrittenQueryStr.toString()); @@ -770,7 +772,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer { /** * This allows us to take an arbitrary ASTNode and turn it back into SQL that produced it. - * Since HiveLexer.g is written such that it strips away any ` (back ticks) around + * Since HiveLexer.g is written such that it strips away any ` (back ticks) around * quoted identifiers we need to add those back to generated SQL. * Additionally, the parser only produces tokens of type Identifier and never * QuotedIdentifier (HIVE-6013). So here we just quote all identifiers. @@ -808,7 +810,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer { /** * This allows us to take an arbitrary ASTNode and turn it back into SQL that produced it without * needing to understand what it is (except for QuotedIdentifiers) - * + * */ private String getMatchedText(ASTNode n) { quotedIdenfierHelper.visit(n); @@ -1096,10 +1098,10 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer { .append("\n SELECT cardinality_violation(") .append(getSimpleTableName(target)).append(".ROW__ID"); addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, target); - + rewrittenQueryStr.append(")\n WHERE ").append(onClauseAsString) .append(" GROUP BY ").append(getSimpleTableName(target)).append(".ROW__ID"); - + addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, target); rewrittenQueryStr.append(" HAVING count(*) > 1"); http://git-wip-us.apache.org/repos/asf/hive/blob/78cbf147/ql/src/test/queries/clientpositive/runtime_stats_merge.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/runtime_stats_merge.q b/ql/src/test/queries/clientpositive/runtime_stats_merge.q new file mode 100644 index 0000000..e694101 --- /dev/null +++ b/ql/src/test/queries/clientpositive/runtime_stats_merge.q @@ -0,0 +1,41 @@ + +set hive.mapred.mode=nonstrict; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; +set hive.support.concurrency=true; +set hive.explain.user=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=88888888; +-- set hive.auto.convert.sortmerge.join=true; +-- set hive.auto.convert.sortmerge.join.to.mapjoin=true; + +create table lineitem (L_ORDERKEY integer); + +insert into lineitem values (1),(2),(3); + +create table lineitem2 + stored as orc TBLPROPERTIES ('transactional'='true') + as select * from lineitem; +create table lineitem_stage + stored as orc TBLPROPERTIES ('transactional'='true') + as select * from lineitem limit 1; + + +analyze table lineitem2 compute statistics for columns; +analyze table lineitem_stage compute statistics for columns; + +explain reoptimization +merge into lineitem2 using + (select * from lineitem_stage) sub + on sub.L_ORDERKEY = lineitem2.L_ORDERKEY + when matched then delete; + +merge into lineitem2 using + (select * from lineitem_stage) sub + on sub.L_ORDERKEY = lineitem2.L_ORDERKEY + when matched then delete; + + http://git-wip-us.apache.org/repos/asf/hive/blob/78cbf147/ql/src/test/results/clientpositive/llap/runtime_stats_merge.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/runtime_stats_merge.q.out b/ql/src/test/results/clientpositive/llap/runtime_stats_merge.q.out new file mode 100644 index 0000000..02f2134 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/runtime_stats_merge.q.out @@ -0,0 +1,194 @@ +PREHOOK: query: create table lineitem (L_ORDERKEY integer) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem +POSTHOOK: query: create table lineitem (L_ORDERKEY integer) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem +PREHOOK: query: insert into lineitem values (1),(2),(3) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@lineitem +POSTHOOK: query: insert into lineitem values (1),(2),(3) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@lineitem +POSTHOOK: Lineage: lineitem.l_orderkey SCRIPT [] +PREHOOK: query: create table lineitem2 + stored as orc TBLPROPERTIES ('transactional'='true') + as select * from lineitem +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@lineitem +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem2 +POSTHOOK: query: create table lineitem2 + stored as orc TBLPROPERTIES ('transactional'='true') + as select * from lineitem +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@lineitem +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem2 +POSTHOOK: Lineage: lineitem2.l_orderkey SIMPLE [(lineitem)lineitem.FieldSchema(name:l_orderkey, type:int, comment:null), ] +PREHOOK: query: create table lineitem_stage + stored as orc TBLPROPERTIES ('transactional'='true') + as select * from lineitem limit 1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@lineitem +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_stage +POSTHOOK: query: create table lineitem_stage + stored as orc TBLPROPERTIES ('transactional'='true') + as select * from lineitem limit 1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@lineitem +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_stage +POSTHOOK: Lineage: lineitem_stage.l_orderkey SIMPLE [(lineitem)lineitem.FieldSchema(name:l_orderkey, type:int, comment:null), ] +PREHOOK: query: analyze table lineitem2 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@lineitem2 +PREHOOK: Output: default@lineitem2 +#### A masked pattern was here #### +POSTHOOK: query: analyze table lineitem2 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@lineitem2 +POSTHOOK: Output: default@lineitem2 +#### A masked pattern was here #### +PREHOOK: query: analyze table lineitem_stage compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@lineitem_stage +PREHOOK: Output: default@lineitem_stage +#### A masked pattern was here #### +POSTHOOK: query: analyze table lineitem_stage compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@lineitem_stage +POSTHOOK: Output: default@lineitem_stage +#### A masked pattern was here #### +PREHOOK: query: explain reoptimization +merge into lineitem2 using + (select * from lineitem_stage) sub + on sub.L_ORDERKEY = lineitem2.L_ORDERKEY + when matched then delete +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem2 +PREHOOK: Input: default@lineitem_stage +PREHOOK: Output: default@lineitem2 +PREHOOK: Output: default@merge_tmp_table +POSTHOOK: query: explain reoptimization +merge into lineitem2 using + (select * from lineitem_stage) sub + on sub.L_ORDERKEY = lineitem2.L_ORDERKEY + when matched then delete +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem2 +POSTHOOK: Input: default@lineitem_stage +POSTHOOK: Output: default@lineitem2 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(lineitem2)lineitem2.FieldSchema(name:ROW__ID, type:struct<writeId:bigint,bucketId:int,rowId:bigint>, comment:), ] +PREHOOK: query: explain reoptimization +merge into lineitem2 using + (select * from lineitem_stage) sub + on sub.L_ORDERKEY = lineitem2.L_ORDERKEY + when matched then delete +PREHOOK: type: QUERY +POSTHOOK: query: explain reoptimization +merge into lineitem2 using + (select * from lineitem_stage) sub + on sub.L_ORDERKEY = lineitem2.L_ORDERKEY + when matched then delete +POSTHOOK: type: QUERY +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(lineitem2)lineitem2.FieldSchema(name:ROW__ID, type:struct<writeId:bigint,bucketId:int,rowId:bigint>, comment:), ] +Vertex dependency in root stage +Map 2 <- Map 1 (BROADCAST_EDGE) +Reducer 3 <- Map 2 (SIMPLE_EDGE) +Reducer 4 <- Map 2 (SIMPLE_EDGE) + +Stage-4 + Stats Work{} + Stage-0 + Move Operator + table:{"name:":"default.lineitem2"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 3 vectorized, llap + File Output Operator [FS_61] + table:{"name:":"default.lineitem2"} + Select Operator [SEL_60] (runtime: rows=1 width=76) + Output:["_col0"] + <-Map 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_10] + PartitionCols:UDFToInteger(_col0) + Select Operator [SEL_9] (runtime: rows=1 width=76) + Output:["_col0"] + Filter Operator [FIL_32] (runtime: rows=1 width=84) + predicate:(_col4 = _col0) + Map Join Operator [MAPJOIN_48] (runtime: rows=1 width=84) + Conds:FIL_36.l_orderkey=RS_52._col0(Inner),Output:["_col0","_col3","_col4"] + <-Map 1 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_52] + PartitionCols:_col0 + Select Operator [SEL_51] (runtime: rows=1 width=4) + Output:["_col0"] + Filter Operator [FIL_50] (runtime: rows=1 width=4) + predicate:l_orderkey is not null + TableScan [TS_0] (runtime: rows=1 width=4) + default@lineitem_stage,lineitem_stage, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey"] + <-Filter Operator [FIL_36] (runtime: rows=3 width=4) + predicate:l_orderkey is not null + TableScan [TS_2] (runtime: rows=3 width=4) + default@lineitem2,lineitem2, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey"] + Reducer 4 llap + File Output Operator [FS_22] + table:{"name:":"default.merge_tmp_table"} + Select Operator [SEL_21] (runtime: rows=0 width=-1) + Output:["_col0"] + Filter Operator [FIL_33] (runtime: rows=0 width=-1) + predicate:(_col1 > 1L) + Group By Operator [GBY_19] (runtime: rows=1 width=84) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Map 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Group By Operator [GBY_17] (runtime: rows=1 width=84) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col3 + Select Operator [SEL_16] (runtime: rows=1 width=84) + Output:["_col3"] + Filter Operator [FIL_34] (runtime: rows=1 width=84) + predicate:(_col4 = _col0) + Please refer to the previous Map Join Operator [MAPJOIN_48] + File Output Operator [FS_29] + Select Operator [SEL_28] (runtime: rows=1 width=424) + Output:["_col0"] + Group By Operator [GBY_27] (runtime: rows=1 width=424) + Output:["_col0"],aggregations:["compute_stats(val, 'hll')"] + Select Operator [SEL_24] (runtime: rows=0 width=-1) + Output:["val"] + Please refer to the previous Select Operator [SEL_21] +Stage-5 + Stats Work{} + Stage-1 + Move Operator + table:{"name:":"default.merge_tmp_table"} + Please refer to the previous Stage-3 + +PREHOOK: query: merge into lineitem2 using + (select * from lineitem_stage) sub + on sub.L_ORDERKEY = lineitem2.L_ORDERKEY + when matched then delete +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem2 +PREHOOK: Input: default@lineitem_stage +PREHOOK: Output: default@lineitem2 +PREHOOK: Output: default@merge_tmp_table +POSTHOOK: query: merge into lineitem2 using + (select * from lineitem_stage) sub + on sub.L_ORDERKEY = lineitem2.L_ORDERKEY + when matched then delete +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem2 +POSTHOOK: Input: default@lineitem_stage +POSTHOOK: Output: default@lineitem2 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(lineitem2)lineitem2.FieldSchema(name:ROW__ID, type:struct<writeId:bigint,bucketId:int,rowId:bigint>, comment:), ]