HIVE-10677 : hive.exec.parallel=true has problem when it is used for analyze table column stats (Pengcheng Xiong via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <hashut...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d75a5414 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d75a5414 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d75a5414 Branch: refs/heads/spark Commit: d75a5414552a23a6c07413deee6234bcb2b1ff74 Parents: d823fc8 Author: Pengcheng Xiong <pxi...@hortonworks.com> Authored: Thu May 21 13:30:00 2015 -0700 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Sat May 23 12:34:35 2015 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/TaskCompiler.java | 4 +- .../clientpositive/exec_parallel_column_stats.q | 5 ++ .../clientpositive/columnstats_partlvl.q.out | 14 +++--- .../clientpositive/columnstats_partlvl_dp.q.out | 8 +-- .../clientpositive/columnstats_tbllvl.q.out | 14 +++--- .../clientpositive/compute_stats_date.q.out | 2 +- .../display_colstats_tbllvl.q.out | 6 +-- .../exec_parallel_column_stats.q.out | 51 ++++++++++++++++++++ .../temp_table_display_colstats_tbllvl.q.out | 6 +-- 9 files changed, 84 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index b4d5382..ba11e41 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -329,7 +329,9 @@ public abstract class TaskCompiler { colName, colType, isTblLevel); cStatsWork = new ColumnStatsWork(fetch, cStatsDesc); cStatsTask = (ColumnStatsTask) TaskFactory.get(cStatsWork, conf); - rootTasks.add(cStatsTask); + // This is a column stats task. According to the semantic, there should be + // only one MR task in the rootTask. + rootTasks.get(0).addDependentTask(cStatsTask); } http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q b/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q new file mode 100644 index 0000000..ceacc24 --- /dev/null +++ b/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q @@ -0,0 +1,5 @@ +set hive.exec.parallel=true; + +explain analyze table src compute statistics for columns; + +analyze table src compute statistics for columns; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/results/clientpositive/columnstats_partlvl.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out index 3c22d40..e0c4cfe 100644 --- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out +++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out @@ -38,7 +38,7 @@ analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -105,7 +105,7 @@ TOK_ANALYZE STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -238,7 +238,7 @@ analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -305,7 +305,7 @@ TOK_ANALYZE STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -438,7 +438,7 @@ analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -518,7 +518,7 @@ analyze table Employee_Part compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -600,7 +600,7 @@ analyze table Employee_Part compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out index 18a6909..aaf9d91 100644 --- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out +++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out @@ -76,7 +76,7 @@ analyze table Employee_Part partition (employeeSalary='4000.0', country) compute POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -149,7 +149,7 @@ analyze table Employee_Part partition (employeeSalary='2000.0') compute statisti POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -233,7 +233,7 @@ analyze table Employee_Part partition (employeeSalary) compute statistics for co POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -314,7 +314,7 @@ analyze table Employee_Part partition (employeeSalary,country) compute statistic POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out index 002823c..19283bb 100644 --- a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out @@ -46,7 +46,7 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -104,7 +104,7 @@ TOK_ANALYZE STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -223,7 +223,7 @@ analyze table default.UserVisits_web_text_none compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -323,7 +323,7 @@ analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -447,7 +447,7 @@ analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sour POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -506,7 +506,7 @@ TOK_ANALYZE STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -625,7 +625,7 @@ analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/results/clientpositive/compute_stats_date.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/compute_stats_date.q.out b/ql/src/test/results/clientpositive/compute_stats_date.q.out index 53cc107..b57a862 100644 --- a/ql/src/test/results/clientpositive/compute_stats_date.q.out +++ b/ql/src/test/results/clientpositive/compute_stats_date.q.out @@ -56,7 +56,7 @@ analyze table tab_date compute statistics for columns fl_date POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out index 53f06d9..7c91248 100644 --- a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out @@ -62,7 +62,7 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -120,7 +120,7 @@ TOK_ANALYZE STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -295,7 +295,7 @@ analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out b/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out new file mode 100644 index 0000000..2e96bb6 --- /dev/null +++ b/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out @@ -0,0 +1,51 @@ +PREHOOK: query: explain analyze table src compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze table src compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src + +PREHOOK: query: analyze table src compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: analyze table src compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out index 3f63cbb..cfa88ab 100644 --- a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out @@ -70,7 +70,7 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -128,7 +128,7 @@ TOK_ANALYZE STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0 @@ -303,7 +303,7 @@ analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage - Stage-1 is a root stage + Stage-1 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-0