http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query50.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query50.q.out b/ql/src/test/results/clientpositive/perf/spark/query50.q.out index b978201..fde0c4e 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query50.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query50.q.out @@ -124,7 +124,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: store @@ -138,7 +138,7 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) + 0 _col10 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -146,63 +146,16 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 476), Reducer 8 (PARTITION-LEVEL SORT, 476) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 481), Reducer 2 (PARTITION-LEVEL SORT, 481) - Reducer 4 <- Reducer 3 (GROUP, 529) - Reducer 5 <- Reducer 4 (SORT, 1) - Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 36), Map 9 (PARTITION-LEVEL SORT, 36) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 36), Map 7 (PARTITION-LEVEL SORT, 36) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 436), Reducer 2 (PARTITION-LEVEL SORT, 436) + Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 438), Reducer 3 (PARTITION-LEVEL SORT, 438) + Reducer 5 <- Reducer 4 (GROUP, 529) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: store_sales - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - input vertices: - 1 Map 6 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string) - Local Work: - Map Reduce Local Work - Map 10 - Map Operator Tree: - TableScan - alias: d1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan alias: store_returns Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -218,7 +171,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) - Map 9 + Map 7 Map Operator Tree: TableScan alias: d2 @@ -235,50 +188,113 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int) + Map 9 + Map Operator Tree: + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) - 1 _col1 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: int) + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int), _col2 (type: int), _col3 (type: int) + 1 _col1 (type: int), _col2 (type: int), _col4 (type: int) + outputColumnNames: _col0, _col7, _col10 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col7 (type: int) + sort order: + + Map-reduce partition columns: _col7 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col10 (type: int) + Reducer 4 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col7 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), CASE WHEN (((_col16 - _col0) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col16 - _col0) > 30) and ((_col16 - _col0) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col16 - _col0) > 60) and ((_col16 - _col0) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col16 - _col0) > 90) and ((_col16 - _col0) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col16 - _col0) > 120)) THEN (1) ELSE (0) END (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + outputColumnNames: _col0, _col7, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col10 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col7, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 + input vertices: + 1 Map 10 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14) - keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) - mode: hash + Select Operator + expressions: _col14 (type: string), _col15 (type: int), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: string), CASE WHEN (((_col0 - _col7) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 30) and ((_col0 - _col7) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 60) and ((_col0 - _col7) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col0 - _col7) > 90) and ((_col0 - _col7) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col0 - _col7) > 120)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) - sort order: ++++++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + Group By Operator + aggregations: sum(_col10), sum(_col11), sum(_col12), sum(_col13), sum(_col14) + keys: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) - Reducer 4 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + sort order: ++++++++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4) @@ -292,7 +308,7 @@ STAGE PLANS: Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) - Reducer 5 + Reducer 6 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: string), KEY.reducesinkkey7 (type: string), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: bigint) @@ -308,22 +324,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Stage: Stage-0 Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query53.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query53.q.out b/ql/src/test/results/clientpositive/perf/spark/query53.q.out index 3a1e948..3bb21ef 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query53.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query53.q.out @@ -62,7 +62,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 8 Map Operator Tree: TableScan alias: store @@ -84,11 +84,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 399), Map 7 (PARTITION-LEVEL SORT, 399) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) - Reducer 4 <- Reducer 3 (GROUP, 529) - Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 265) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 399), Map 6 (PARTITION-LEVEL SORT, 399) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 529) + Reducer 5 <- Reducer 4 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -109,7 +108,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) - Map 7 + Map 6 Map Operator Tree: TableScan alias: item @@ -127,7 +126,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: int) - Map 8 + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -181,18 +180,18 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col3, _col8, _col11 input vertices: - 1 Map 9 + 1 Map 8 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col11 (type: int), _col8 (type: int) + keys: _col8 (type: int), _col11 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) Reducer 4 @@ -204,58 +203,46 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), _col2 (type: decimal(17,2)) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Reducer 5 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col1 (type: decimal(17,2)) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: int, _col2: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: avg_window_0 - arguments: _col2 - name: avg - window function: GenericUDAFAverageEvaluatorDecimal - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + expressions: _col0 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: _col0, _col2 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col1 (type: int), _col2 (type: decimal(17,2)) - outputColumnNames: avg_window_0, _col1, _col2 + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col2: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END (type: boolean) - Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col2 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)) - outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: avg_window_0, _col0, _col2 + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END (type: boolean) Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: decimal(21,6)), _col1 (type: decimal(17,2)), _col0 (type: int) - sort order: +++ + Select Operator + expressions: _col0 (type: int), _col2 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 6 + Reduce Output Operator + key expressions: _col2 (type: decimal(21,6)), _col1 (type: decimal(17,2)), _col0 (type: int) + sort order: +++ + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 5 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey2 (type: int), KEY.reducesinkkey1 (type: decimal(17,2)), KEY.reducesinkkey0 (type: decimal(21,6)) http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query54.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query54.q.out b/ql/src/test/results/clientpositive/perf/spark/query54.q.out index 251d7ad..43132bc 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join JOIN[111][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product -Warning: Shuffle Join JOIN[104][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Work 'Reducer 14' is a cross product -Warning: Shuffle Join JOIN[114][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 5' is a cross product -Warning: Map Join MAPJOIN[143][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[111][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[107][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Work 'Reducer 14' is a cross product +Warning: Shuffle Join JOIN[114][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 4' is a cross product +Warning: Map Join MAPJOIN[144][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -122,11 +122,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 31 <- Map 30 (GROUP, 2) - Reducer 32 <- Reducer 31 (GROUP, 1) + Reducer 29 <- Map 28 (GROUP, 2) + Reducer 30 <- Reducer 29 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 30 + Map 28 Map Operator Tree: TableScan alias: date_dim @@ -148,7 +148,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Reducer 31 + Reducer 29 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -166,7 +166,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 32 + Reducer 30 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -211,23 +211,23 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 1 (GROUP, 2) + Reducer 10 <- Reducer 9 (GROUP, 1) Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 15 (PARTITION-LEVEL SORT, 398) Reducer 13 <- Reducer 12 (PARTITION-LEVEL SORT, 772), Reducer 17 (PARTITION-LEVEL SORT, 772) - Reducer 14 <- Reducer 13 (PARTITION-LEVEL SORT, 1), Reducer 29 (PARTITION-LEVEL SORT, 1) + Reducer 14 <- Reducer 13 (PARTITION-LEVEL SORT, 1), Reducer 32 (PARTITION-LEVEL SORT, 1) Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 654), Reducer 23 (PARTITION-LEVEL SORT, 654) Reducer 2 <- Map 1 (GROUP, 2) - Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 459), Map 24 (PARTITION-LEVEL SORT, 459), Map 25 (PARTITION-LEVEL SORT, 459) - Reducer 21 <- Map 26 (PARTITION-LEVEL SORT, 504), Reducer 20 (PARTITION-LEVEL SORT, 504) + Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 458), Map 24 (PARTITION-LEVEL SORT, 458), Map 25 (PARTITION-LEVEL SORT, 458) + Reducer 21 <- Map 26 (PARTITION-LEVEL SORT, 505), Reducer 20 (PARTITION-LEVEL SORT, 505) Reducer 22 <- Map 27 (PARTITION-LEVEL SORT, 1009), Reducer 21 (PARTITION-LEVEL SORT, 1009) Reducer 23 <- Reducer 22 (GROUP, 610) - Reducer 29 <- Map 28 (GROUP, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Reducer 14 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 32 <- Map 31 (GROUP, 2) + Reducer 4 <- Reducer 14 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP, 1009) Reducer 6 <- Reducer 5 (GROUP, 1009) - Reducer 7 <- Reducer 6 (GROUP, 1009) - Reducer 8 <- Reducer 7 (SORT, 1) + Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 9 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -330,11 +330,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) + value expressions: _col1 (type: int), _col2 (type: int) Map 24 Map Operator Tree: TableScan @@ -348,45 +348,45 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) + value expressions: _col1 (type: int), _col2 (type: int) Map 25 Map Operator Tree: TableScan - alias: item - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int) + expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Map 26 Map Operator Tree: TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + predicate: ((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) + expressions: i_item_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE Map 27 Map Operator Tree: TableScan @@ -405,7 +405,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Map 28 + Map 31 Map Operator Tree: TableScan alias: date_dim @@ -430,14 +430,18 @@ STAGE PLANS: Reducer 10 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col0) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 12 Reduce Operator Tree: Join Operator @@ -455,6 +459,8 @@ STAGE PLANS: Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col4 (type: int) Reducer 13 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -464,13 +470,21 @@ STAGE PLANS: 1 _col5 (type: int) outputColumnNames: _col2, _col4, _col10 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col4 (type: int), _col10 (type: int) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col2, _col4, _col10 + input vertices: + 1 Reducer 30 + Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col4 (type: int), _col10 (type: int) Reducer 14 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -478,26 +492,16 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col2, _col4, _col10, _col12 - Statistics: Num rows: 6363893803988 Data size: 7688985619260581 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col2, _col4, _col10, _col12 - input vertices: - 1 Reducer 32 + outputColumnNames: _col2, _col4, _col10, _col13 + Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: int), _col2 (type: decimal(7,2)), _col4 (type: int), _col13 (type: int) + outputColumnNames: _col0, _col4, _col11, _col13 Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col10 (type: int), _col2 (type: decimal(7,2)), _col4 (type: int), _col12 (type: int) - outputColumnNames: _col0, _col4, _col11, _col13 + Reduce Output Operator + sort order: Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int) + value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int) Reducer 17 Reduce Operator Tree: Join Operator @@ -520,31 +524,24 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Select Operator + Reduce Output Operator + sort order: Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: int) Reducer 20 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1 + outputColumnNames: _col1, _col2 Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reducer 21 @@ -553,7 +550,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) outputColumnNames: _col1 Statistics: Num rows: 522710951 Data size: 70881569741 Basic stats: COMPLETE Column stats: NONE @@ -599,47 +596,35 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 287491029 Data size: 38984864202 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Reducer 29 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Reducer 3 Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 4 - Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} keys: 0 1 - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 9131 Data size: 10299768 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 9131 Data size: 10299768 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Reducer 5 + value expressions: _col0 (type: int) + Reducer 32 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -647,28 +632,32 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1, _col2, _col6, _col13, _col15 + outputColumnNames: _col0, _col2, _col6, _col13, _col15 Statistics: Num rows: 58108714324214428 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col13 BETWEEN _col15 AND _col1 (type: boolean) - Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col6 (type: decimal(7,2)) - outputColumnNames: _col2, _col6 + Select Operator + expressions: _col2 (type: int), _col6 (type: decimal(7,2)), _col13 (type: int), _col15 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col4, _col11, _col13, _col15 + Statistics: Num rows: 58108714324214428 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col11 BETWEEN _col13 AND _col15 (type: boolean) Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col6) - keys: _col2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: int), _col4 (type: decimal(7,2)) + outputColumnNames: _col0, _col4 Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: sum(_col4) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)) - Reducer 6 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)) + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -692,7 +681,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3228261906900801 Data size: 512409557603043008 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 7 + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -710,7 +699,7 @@ STAGE PLANS: Statistics: Num rows: 1614130953450400 Data size: 256204778801521408 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: int) - Reducer 8 + Reducer 7 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: int) @@ -726,6 +715,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query57.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query57.q.out b/ql/src/test/results/clientpositive/perf/spark/query57.q.out index 6b30e66..1b66f54 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query57.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query57.q.out @@ -391,47 +391,47 @@ STAGE PLANS: Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col10 (type: string), _col11 (type: string), _col8 (type: string), _col5 (type: int), _col6 (type: int) + keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col10 (type: string), _col11 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 14 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string), _col3 (type: int) + key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int) sort order: ++++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col2 (type: string), _col3 (type: int) + Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int), _col5 (type: decimal(17,2)) + value expressions: _col1 (type: int), _col5 (type: decimal(17,2)) Reducer 15 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: decimal(17,2)) + expressions: KEY.reducesinkkey3 (type: int), VALUE._col0 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: int, _col5: decimal(17,2) + output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col3 ASC NULLS FIRST - partition by: _col1, _col0, _col2, _col3 + order by: _col4 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST + partition by: _col4, _col3, _col2, _col0 raw input shape: window functions: window function definition @@ -442,54 +442,54 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(17,2)) + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(17,2)) outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int) sort order: +++++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col2 (type: string) + Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: avg_window_0 (type: decimal(21,6)), _col5 (type: decimal(17,2)) Reducer 16 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col1 (type: decimal(17,2)) + expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: decimal(21,6), _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2) + output shape: _col0: decimal(21,6), _col1: int, _col2: int, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST - partition by: _col2, _col1, _col3 + order by: _col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST + partition by: _col5, _col4, _col3 raw input shape: window functions: window function definition alias: rank_window_1 - arguments: _col4, _col5 + arguments: _col1, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > 0) and (_col4 = 2000) and rank_window_1 is not null) (type: boolean) + predicate: ((_col0 > 0) and (_col1 = 2000) and rank_window_1 is not null) (type: boolean) Statistics: Num rows: 31942874 Data size: 4325706828 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(17,2)) + expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: int), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) outputColumnNames: rank_window_1, _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 31942874 Data size: 4325706828 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: CASE WHEN ((_col0 > 0)) THEN (((abs((_col6 - _col0)) / _col0) > 0.1)) ELSE (null) END (type: boolean) Statistics: Num rows: 15971437 Data size: 2162853414 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) + expressions: _col5 (type: string), _col4 (type: string), _col3 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 15971437 Data size: 2162853414 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -566,52 +566,52 @@ STAGE PLANS: Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col10 (type: string), _col11 (type: string), _col8 (type: string), _col5 (type: int), _col6 (type: int) + keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col10 (type: string), _col11 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 23 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int) sort order: +++++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col2 (type: string) + Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 24 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: int, _col5: decimal(17,2) + output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS FIRST, _col4 ASC NULLS FIRST - partition by: _col1, _col0, _col2 + order by: _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col4, _col3, _col2 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col3, _col4 + arguments: _col0, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -621,7 +621,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) + expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -642,52 +642,52 @@ STAGE PLANS: Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col10 (type: string), _col11 (type: string), _col8 (type: string), _col5 (type: int), _col6 (type: int) + keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col10 (type: string), _col11 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int) sort order: +++++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col2 (type: string) + Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 5 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: int, _col5: decimal(17,2) + output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS FIRST, _col4 ASC NULLS FIRST - partition by: _col1, _col0, _col2 + order by: _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col4, _col3, _col2 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col3, _col4 + arguments: _col0, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -697,7 +697,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) + expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator