http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/spark/query9.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query9.q.out b/ql/src/test/results/clientpositive/perf/spark/query9.q.out index a434501..06ac961 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query9.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query9.q.out @@ -145,11 +145,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 21 AND 40 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_ext_list_price (type: decimal(7,2)) outputColumnNames: ss_ext_list_price - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_ext_list_price), count(ss_ext_list_price) mode: hash @@ -168,11 +168,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 21 AND 40 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_net_paid_inc_tax (type: decimal(7,2)) outputColumnNames: ss_net_paid_inc_tax - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_net_paid_inc_tax), count(ss_net_paid_inc_tax) mode: hash @@ -191,9 +191,9 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 41 AND 60 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -212,11 +212,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 41 AND 60 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_ext_list_price (type: decimal(7,2)) outputColumnNames: ss_ext_list_price - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_ext_list_price), count(ss_ext_list_price) mode: hash @@ -235,11 +235,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 41 AND 60 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_net_paid_inc_tax (type: decimal(7,2)) outputColumnNames: ss_net_paid_inc_tax - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_net_paid_inc_tax), count(ss_net_paid_inc_tax) mode: hash @@ -258,9 +258,9 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 1 AND 20 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -279,9 +279,9 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 61 AND 80 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -300,11 +300,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 61 AND 80 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_ext_list_price (type: decimal(7,2)) outputColumnNames: ss_ext_list_price - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_ext_list_price), count(ss_ext_list_price) mode: hash @@ -323,11 +323,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 61 AND 80 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_net_paid_inc_tax (type: decimal(7,2)) outputColumnNames: ss_net_paid_inc_tax - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_net_paid_inc_tax), count(ss_net_paid_inc_tax) mode: hash @@ -346,9 +346,9 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 81 AND 100 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -367,11 +367,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 81 AND 100 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_ext_list_price (type: decimal(7,2)) outputColumnNames: ss_ext_list_price - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_ext_list_price), count(ss_ext_list_price) mode: hash @@ -390,11 +390,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 81 AND 100 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_net_paid_inc_tax (type: decimal(7,2)) outputColumnNames: ss_net_paid_inc_tax - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_net_paid_inc_tax), count(ss_net_paid_inc_tax) mode: hash @@ -413,11 +413,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 1 AND 20 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_ext_list_price (type: decimal(7,2)) outputColumnNames: ss_ext_list_price - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_ext_list_price), count(ss_ext_list_price) mode: hash @@ -436,11 +436,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 1 AND 20 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_net_paid_inc_tax (type: decimal(7,2)) outputColumnNames: ss_net_paid_inc_tax - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_net_paid_inc_tax), count(ss_net_paid_inc_tax) mode: hash @@ -459,9 +459,9 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 21 AND 40 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash
http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/spark/query90.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query90.q.out b/ql/src/test/results/clientpositive/perf/spark/query90.q.out index 9f78d64..b0cf966 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query90.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query90.q.out @@ -43,9 +43,11 @@ select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-3, Stage-4 + Stage-5 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 + Stage-4 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -53,27 +55,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 - Map Operator Tree: - TableScan - alias: time_dim - filterExpr: (t_hour BETWEEN 14 AND 15 and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (t_hour BETWEEN 14 AND 15 and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: t_time_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 11 + Map 13 Map Operator Tree: TableScan alias: household_demographics @@ -93,34 +75,34 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 9 - Map Operator Tree: - TableScan - alias: web_page - filterExpr: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: wp_web_page_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-2 Spark Edges: - Reducer 8 <- Map 7 (GROUP, 1) + Reducer 10 <- Reducer 9 (GROUP, 1) + Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 169), Map 8 (PARTITION-LEVEL SORT, 169) #### A masked pattern was here #### Vertices: - Map 7 + Map 12 + Map Operator Tree: + TableScan + alias: time_dim + filterExpr: (t_hour BETWEEN 14 AND 15 and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t_hour BETWEEN 14 AND 15 and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_time_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: web_sales @@ -141,40 +123,18 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 9 + 1 Map 11 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 10 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 11 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 8 + Reducer 10 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -188,67 +148,82 @@ STAGE PLANS: keys: 0 1 + Reducer 9 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 13 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) - Stage: Stage-4 + Stage: Stage-5 Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 7 Map Operator Tree: TableScan - alias: web_page - filterExpr: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE + predicate: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wp_web_page_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work + + Stage: Stage-6 + Spark +#### A masked pattern was here #### + Vertices: Map 5 Map Operator Tree: TableScan - alias: time_dim - filterExpr: (t_hour BETWEEN 6 AND 7 and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (t_hour BETWEEN 6 AND 7 and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: t_time_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 6 - Map Operator Tree: - TableScan - alias: household_demographics - filterExpr: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: web_page + filterExpr: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: wp_web_page_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -257,8 +232,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 169), Map 6 (PARTITION-LEVEL SORT, 169) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -282,40 +258,67 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 5 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 6 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work + Map 6 + Map Operator Tree: + TableScan + alias: time_dim + filterExpr: (t_hour BETWEEN 6 AND 7 and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t_hour BETWEEN 6 AND 7 and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_time_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 7 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -333,7 +336,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 input vertices: - 1 Reducer 8 + 1 Reducer 10 Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (CAST( _col0 AS decimal(15,4)) / CAST( _col1 AS decimal(15,4))) (type: decimal(35,20)) @@ -344,7 +347,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -362,6 +365,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 11 + Map Operator Tree: + TableScan + alias: web_page + filterExpr: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: wp_web_page_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Stage: Stage-0 Fetch Operator limit: 100 http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/spark/query97.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query97.q.out b/ql/src/test/results/clientpositive/perf/spark/query97.q.out index c4f4804..7e7d791 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query97.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query97.q.out @@ -47,17 +47,42 @@ from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Reducer 2 (GROUP, 437) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 386), Reducer 9 (PARTITION-LEVEL SORT, 386) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306) + Reducer 9 <- Reducer 8 (GROUP, 336) #### A masked pattern was here #### Vertices: - Map 5 + Map 1 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: ss_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ss_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Execution mode: vectorized + Map 10 Map Operator Tree: TableScan alias: date_dim @@ -65,24 +90,18 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 8 + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -90,65 +109,18 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 437) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 386), Reducer 7 (PARTITION-LEVEL SORT, 386) - Reducer 4 <- Reducer 3 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 336) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: ss_sold_date_sk is not null (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ss_sold_date_sk is not null (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 6 + Map 7 Map Operator Tree: TableScan alias: catalog_sales @@ -161,30 +133,34 @@ STAGE PLANS: expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 8 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -197,7 +173,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -221,7 +197,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -239,7 +215,27 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reducer 9 Execution mode: vectorized Reduce Operator Tree: Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/spark/query99.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query99.q.out b/ql/src/test/results/clientpositive/perf/spark/query99.q.out index f541da8..c3da1e0 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query99.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query99.q.out @@ -76,27 +76,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 5 + Map 6 Map Operator Tree: TableScan alias: call_center @@ -116,7 +96,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 6 + Map 7 Map Operator Tree: TableScan alias: warehouse @@ -136,7 +116,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 7 + Map 8 Map Operator Tree: TableScan alias: ship_mode @@ -160,8 +140,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 447) - Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 5 (PARTITION-LEVEL SORT, 306) + Reducer 3 <- Reducer 2 (GROUP, 447) + Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -177,67 +158,92 @@ STAGE PLANS: expressions: cs_sold_date_sk (type: int), cs_ship_date_sk (type: int), cs_call_center_sk (type: int), cs_ship_mode_sk (type: int), cs_warehouse_sk (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - input vertices: - 1 Map 4 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col8 - input vertices: - 1 Map 5 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col8, _col10 - input vertices: - 1 Map 6 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col8, _col10, _col12 - input vertices: - 1 Map 7 - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: substr(_col10, 1, 20) (type: string), _col12 (type: string), _col8 (type: string), CASE WHEN (((_col1 - _col0) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 30) and ((_col1 - _col0) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 60) and ((_col1 - _col0) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 90) and ((_col1 - _col0) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col1 - _col0) > 120)) THEN (1) ELSE (0) END (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) - keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + Execution mode: vectorized + Map 5 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Reducer 2 Local Work: Map Reduce Local Work - Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col8 + input vertices: + 1 Map 6 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col8, _col10 + input vertices: + 1 Map 7 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col8, _col10, _col12 + input vertices: + 1 Map 8 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: substr(_col10, 1, 20) (type: string), _col12 (type: string), _col8 (type: string), CASE WHEN (((_col1 - _col0) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 30) and ((_col1 - _col0) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 60) and ((_col1 - _col0) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 90) and ((_col1 - _col0) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col1 - _col0) > 120)) THEN (1) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -256,7 +262,7 @@ STAGE PLANS: Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: PARTIAL Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/tez/query10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/query10.q.out index 5b55d44..f8fa17f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query10.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query10.q.out @@ -213,9 +213,9 @@ Stage-0 <-Map 12 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_185] PartitionCols:_col0 - Select Operator [SEL_184] (rows=4058 width=1119) + Select Operator [SEL_184] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_183] (rows=4058 width=1119) + Filter Operator [FIL_183] (rows=36524 width=1119) predicate:((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) TableScan [TS_12] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] @@ -236,7 +236,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_194] Group By Operator [GBY_191] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_186] (rows=4058 width=1119) + Select Operator [SEL_186] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_184] <-Reducer 7 [BROADCAST_EDGE] vectorized @@ -285,7 +285,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_195] Group By Operator [GBY_192] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_188] (rows=4058 width=1119) + Select Operator [SEL_188] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_184] <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized @@ -323,7 +323,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_196] Group By Operator [GBY_193] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_190] (rows=4058 width=1119) + Select Operator [SEL_190] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_184] http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/tez/query14.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/query14.q.out b/ql/src/test/results/clientpositive/perf/tez/query14.q.out index b9efa45..61b7070 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query14.q.out @@ -363,9 +363,9 @@ Stage-0 <-Map 21 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1808] PartitionCols:_col0 - Select Operator [SEL_1803] (rows=8116 width=1119) + Select Operator [SEL_1803] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_1802] (rows=8116 width=1119) + Filter Operator [FIL_1802] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_year BETWEEN 1998 AND 2000) TableScan [TS_13] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] @@ -386,7 +386,7 @@ Stage-0 SHUFFLE [RS_1814] Group By Operator [GBY_1812] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1805] (rows=8116 width=1119) + Select Operator [SEL_1805] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1803] <-Reducer 35 [CONTAINS] @@ -402,9 +402,9 @@ Stage-0 <-Map 37 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1836] PartitionCols:_col0 - Select Operator [SEL_1831] (rows=8116 width=1119) + Select Operator [SEL_1831] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_1830] (rows=8116 width=1119) + Filter Operator [FIL_1830] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_year BETWEEN 1998 AND 2000) TableScan [TS_24] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] @@ -425,7 +425,7 @@ Stage-0 SHUFFLE [RS_1842] Group By Operator [GBY_1840] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1833] (rows=8116 width=1119) + Select Operator [SEL_1833] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1831] <-Reducer 9 [CONTAINS] @@ -441,9 +441,9 @@ Stage-0 <-Map 102 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1626] PartitionCols:_col0 - Select Operator [SEL_1615] (rows=8116 width=1119) + Select Operator [SEL_1615] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_1614] (rows=8116 width=1119) + Filter Operator [FIL_1614] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_year BETWEEN 1999 AND 2001) TableScan [TS_97] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] @@ -464,7 +464,7 @@ Stage-0 SHUFFLE [RS_1638] Group By Operator [GBY_1633] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1623] (rows=8116 width=1119) + Select Operator [SEL_1623] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1615] <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized @@ -505,7 +505,7 @@ Stage-0 SHUFFLE [RS_1815] Group By Operator [GBY_1813] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1807] (rows=8116 width=1119) + Select Operator [SEL_1807] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1803] <-Reducer 41 [CONTAINS] @@ -539,7 +539,7 @@ Stage-0 SHUFFLE [RS_1843] Group By Operator [GBY_1841] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1835] (rows=8116 width=1119) + Select Operator [SEL_1835] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1831] <-Reducer 45 [CONTAINS] @@ -573,7 +573,7 @@ Stage-0 SHUFFLE [RS_1639] Group By Operator [GBY_1634] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1625] (rows=8116 width=1119) + Select Operator [SEL_1625] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1615] <-Reducer 57 [CUSTOM_SIMPLE_EDGE] vectorized @@ -676,7 +676,7 @@ Stage-0 SHUFFLE [RS_1635] Group By Operator [GBY_1630] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1617] (rows=8116 width=1119) + Select Operator [SEL_1617] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1615] <-Reducer 71 [BROADCAST_EDGE] vectorized @@ -742,7 +742,7 @@ Stage-0 SHUFFLE [RS_1636] Group By Operator [GBY_1631] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1619] (rows=8116 width=1119) + Select Operator [SEL_1619] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1615] <-Reducer 74 [BROADCAST_EDGE] vectorized @@ -804,7 +804,7 @@ Stage-0 SHUFFLE [RS_1637] Group By Operator [GBY_1632] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1621] (rows=8116 width=1119) + Select Operator [SEL_1621] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1615] <-Reducer 77 [BROADCAST_EDGE] vectorized http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/tez/query22.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/query22.q.out b/ql/src/test/results/clientpositive/perf/tez/query22.q.out index cfe780a..ca5c970 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query22.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query22.q.out @@ -112,9 +112,9 @@ Stage-0 <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_81] PartitionCols:_col0 - Select Operator [SEL_80] (rows=8116 width=1119) + Select Operator [SEL_80] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_79] (rows=8116 width=1119) + Filter Operator [FIL_79] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/tez/query25.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/query25.q.out b/ql/src/test/results/clientpositive/perf/tez/query25.q.out index b68c54a..044df14 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query25.q.out @@ -177,9 +177,9 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_225] PartitionCols:_col0 - Select Operator [SEL_220] (rows=4058 width=1119) + Select Operator [SEL_220] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_217] (rows=4058 width=1119) + Filter Operator [FIL_217] (rows=36524 width=1119) predicate:((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] @@ -200,9 +200,9 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_223] PartitionCols:_col0 - Select Operator [SEL_219] (rows=4058 width=1119) + Select Operator [SEL_219] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_216] (rows=4058 width=1119) + Filter Operator [FIL_216] (rows=36524 width=1119) predicate:((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) Please refer to the previous TableScan [TS_3] <-Map 16 [SIMPLE_EDGE] vectorized @@ -255,7 +255,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_229] Group By Operator [GBY_227] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_224] (rows=4058 width=1119) + Select Operator [SEL_224] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_219] <-Reducer 2 [SIMPLE_EDGE]