http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query29.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query29.q.out b/ql/src/test/results/clientpositive/perf/spark/query29.q.out index 456fce7..530a625 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query29.q.out @@ -124,7 +124,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 14 + Map 13 Map Operator Tree: TableScan alias: d2 @@ -146,51 +146,51 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) - Reducer 11 <- Map 13 (PARTITION-LEVEL SORT, 374), Reducer 10 (PARTITION-LEVEL SORT, 374) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442) - Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 850), Reducer 3 (PARTITION-LEVEL SORT, 850) - Reducer 5 <- Reducer 4 (GROUP, 582) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 10 <- Map 14 (PARTITION-LEVEL SORT, 486), Reducer 9 (PARTITION-LEVEL SORT, 486) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 6 (PARTITION-LEVEL SORT, 306) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 917), Reducer 2 (PARTITION-LEVEL SORT, 917) + Reducer 4 <- Reducer 3 (GROUP, 640) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 8 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 476), Reducer 8 (PARTITION-LEVEL SORT, 476) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: store_sales - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) - Map 12 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Map 11 Map Operator Tree: TableScan - alias: d3 + alias: d1 Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE - Map 13 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Map 12 Map Operator Tree: TableScan alias: store_returns @@ -210,34 +210,17 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4 input vertices: - 1 Map 14 + 1 Map 13 Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: int) + value expressions: _col4 (type: int) Local Work: Map Reduce Local Work - Map 7 - Map Operator Tree: - TableScan - alias: d1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 14 Map Operator Tree: TableScan alias: item @@ -255,56 +238,73 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) - Map 9 + Map 6 Map Operator Tree: TableScan - alias: catalog_sales - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: d3 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) Reducer 10 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int) - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int), _col1 (type: int) - 1 _col1 (type: int), _col2 (type: int) - outputColumnNames: _col3, _col7, _col8, _col9, _col10 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col7 (type: int), _col8 (type: int), _col9 (type: int) - sort order: +++ - Map-reduce partition columns: _col7 (type: int), _col8 (type: int), _col9 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col10 (type: int) + outputColumnNames: _col3, _col5, _col10, _col11, _col13, _col18, _col19 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col10, _col11, _col13, _col18, _col19, _col21, _col22 + input vertices: + 1 Map 15 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col18 (type: string), _col19 (type: string), _col5 (type: int), _col10 (type: int), _col11 (type: int), _col13 (type: int), _col21 (type: string), _col22 (type: string) + outputColumnNames: _col1, _col2, _col8, _col13, _col14, _col16, _col21, _col22 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col14 (type: int), _col13 (type: int) + sort order: ++ + Map-reduce partition columns: _col14 (type: int), _col13 (type: int) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col8 (type: int), _col16 (type: int), _col21 (type: string), _col22 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -313,85 +313,57 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col10, _col11 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: int), _col10 (type: string), _col11 (type: string) + 0 _col1 (type: int), _col2 (type: int) + 1 _col14 (type: int), _col13 (type: int) + outputColumnNames: _col3, _col7, _col8, _col14, _col22, _col27, _col28 + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col14), sum(_col22), sum(_col3) + keys: _col7 (type: string), _col8 (type: string), _col27 (type: string), _col28 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) Reducer 4 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) - 1 _col7 (type: int), _col8 (type: int), _col9 (type: int) - outputColumnNames: _col3, _col5, _col10, _col11, _col15, _col22 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col10, _col11, _col15, _col22, _col27, _col28 - input vertices: - 1 Map 15 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5), sum(_col22), sum(_col15) - keys: _col10 (type: string), _col11 (type: string), _col27 (type: string), _col28 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) - Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: ++++ - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) - Reducer 6 + Reducer 5 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -402,6 +374,38 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: int) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) + 1 _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col1, _col3, _col5, _col10, _col11, _col13 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: int), _col10 (type: int), _col11 (type: int), _col13 (type: int) Stage: Stage-0 Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query30.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query30.q.out b/ql/src/test/results/clientpositive/perf/spark/query30.q.out index 399251d..6385984 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query30.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query30.q.out @@ -66,37 +66,72 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 11), Map 14 (PARTITION-LEVEL SORT, 11) - Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 329), Reducer 11 (PARTITION-LEVEL SORT, 329) - Reducer 13 <- Reducer 12 (GROUP PARTITION-LEVEL SORT, 349) - Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 697), Map 18 (PARTITION-LEVEL SORT, 697) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 11), Map 8 (PARTITION-LEVEL SORT, 11) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 329), Reducer 2 (PARTITION-LEVEL SORT, 329) - Reducer 4 <- Reducer 3 (GROUP, 349) - Reducer 5 <- Reducer 13 (PARTITION-LEVEL SORT, 262), Reducer 4 (PARTITION-LEVEL SORT, 262) - Reducer 6 <- Reducer 17 (PARTITION-LEVEL SORT, 656), Reducer 5 (PARTITION-LEVEL SORT, 656) - Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 10 <- Reducer 16 (PARTITION-LEVEL SORT, 262), Reducer 9 (PARTITION-LEVEL SORT, 262) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 11), Map 17 (PARTITION-LEVEL SORT, 11) + Reducer 15 <- Map 18 (PARTITION-LEVEL SORT, 329), Reducer 14 (PARTITION-LEVEL SORT, 329) + Reducer 16 <- Reducer 15 (GROUP PARTITION-LEVEL SORT, 349) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 5 (PARTITION-LEVEL SORT, 697) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 656), Reducer 2 (PARTITION-LEVEL SORT, 656) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 7 <- Map 11 (PARTITION-LEVEL SORT, 11), Map 6 (PARTITION-LEVEL SORT, 11) + Reducer 8 <- Map 12 (PARTITION-LEVEL SORT, 329), Reducer 7 (PARTITION-LEVEL SORT, 329) + Reducer 9 <- Reducer 8 (GROUP, 349) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: web_returns - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (wr_returned_date_sk is not null and wr_returning_addr_sk is not null and wr_returning_customer_sk is not null) (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wr_returned_date_sk (type: int), wr_returning_customer_sk (type: int), wr_returning_addr_sk (type: int), wr_return_amt (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + expressions: c_customer_sk (type: int), c_customer_id (type: string), c_current_addr_sk (type: int), c_salutation (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_day (type: int), c_birth_month (type: int), c_birth_year (type: int), c_birth_country (type: string), c_login (type: string), c_email_address (type: string), c_last_review_date (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string) + Map 11 + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) - Map 10 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Map 12 + Map Operator Tree: + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 13 Map Operator Tree: TableScan alias: web_returns @@ -114,7 +149,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) - Map 14 + Map 17 Map Operator Tree: TableScan alias: date_dim @@ -131,7 +166,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Map 15 + Map 18 Map Operator Tree: TableScan alias: customer_address @@ -149,25 +184,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 16 - Map Operator Tree: - TableScan - alias: customer - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int), c_customer_id (type: string), c_current_addr_sk (type: int), c_salutation (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_day (type: int), c_birth_month (type: int), c_birth_year (type: int), c_birth_country (type: string), c_login (type: string), c_email_address (type: string), c_last_review_date (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string) - Map 18 + Map 5 Map Operator Tree: TableScan alias: customer_address @@ -184,42 +201,48 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 6 Map Operator Tree: TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Map 9 - Map Operator Tree: - TableScan - alias: customer_address - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: web_returns + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + predicate: (wr_returned_date_sk is not null and wr_returning_addr_sk is not null and wr_returning_customer_sk is not null) (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + expressions: wr_returned_date_sk (type: int), wr_returning_customer_sk (type: int), wr_returning_addr_sk (type: int), wr_return_amt (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 11 + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 > CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END) (type: boolean) + Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) + Reducer 14 Reduce Operator Tree: Join Operator condition map: @@ -235,7 +258,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 12 + Reducer 15 Reduce Operator Tree: Join Operator condition map: @@ -257,7 +280,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) - Reducer 13 + Reducer 16 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -285,7 +308,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(38,11)), _col1 (type: boolean) - Reducer 17 + Reducer 2 Reduce Operator Tree: Join Operator condition map: @@ -301,7 +324,42 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col18 + Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col18 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: decimal(17,2)) + sort order: +++++++++++++ + Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: string), KEY.reducesinkkey10 (type: string), KEY.reducesinkkey11 (type: string), KEY.reducesinkkey12 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Join Operator condition map: @@ -317,7 +375,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 3 + Reducer 8 Reduce Operator Tree: Join Operator condition map: @@ -339,7 +397,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) - Reducer 4 + Reducer 9 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -357,60 +415,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(17,2)) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col2 > CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END) (type: boolean) - Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col7, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 - Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col7 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: int), _col15 (type: int), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string), _col2 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: decimal(17,2)) - sort order: +++++++++++++ - Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 7 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: string), KEY.reducesinkkey10 (type: string), KEY.reducesinkkey11 (type: string), KEY.reducesinkkey12 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query32.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query32.q.out b/ql/src/test/results/clientpositive/perf/spark/query32.q.out index 0ceea16..eed3a4d 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query32.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query32.q.out @@ -63,7 +63,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 6 + Map 4 Map Operator Tree: TableScan alias: date_dim @@ -86,7 +86,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -108,10 +108,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 308), Map 5 (PARTITION-LEVEL SORT, 308) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 537), Reducer 8 (PARTITION-LEVEL SORT, 537) - Reducer 4 <- Reducer 3 (GROUP, 1) - Reducer 8 <- Map 7 (GROUP, 336) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 520), Reducer 7 (PARTITION-LEVEL SORT, 520) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP, 336) + Reducer 7 <- Map 9 (PARTITION-LEVEL SORT, 171), Reducer 6 (PARTITION-LEVEL SORT, 171) #### A masked pattern was here #### Vertices: Map 1 @@ -126,32 +126,27 @@ STAGE PLANS: expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_discount_amt (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: item - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((i_manufact_id = 269) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan alias: catalog_sales Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -169,7 +164,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2 input vertices: - 1 Map 9 + 1 Map 8 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2), count(_col2) @@ -185,51 +180,40 @@ STAGE PLANS: value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Local Work: Map Reduce Local Work + Map 9 + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_manufact_id = 269) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Reducer 2 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3 - input vertices: - 1 Map 6 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col2, _col7 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + 1 _col2 (type: int) + outputColumnNames: _col2, _col5 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col2 > _col7) (type: boolean) - Statistics: Num rows: 127771498 Data size: 17302827584 Basic stats: COMPLETE Column stats: NONE + predicate: (_col2 > _col5) (type: boolean) + Statistics: Num rows: 116155905 Data size: 15729842913 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: decimal(7,2)) outputColumnNames: _col2 - Statistics: Num rows: 127771498 Data size: 17302827584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116155905 Data size: 15729842913 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2) mode: hash @@ -240,7 +224,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: decimal(17,2)) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -257,7 +241,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) @@ -275,6 +259,22 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(38,21)) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(38,21)) Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query34.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query34.q.out b/ql/src/test/results/clientpositive/perf/spark/query34.q.out index d563a67..80f7ec1 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query34.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query34.q.out @@ -71,36 +71,36 @@ STAGE PLANS: Map 8 Map Operator Tree: TableScan - alias: store - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson County', 'Barrow County', 'Pennington County') and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + predicate: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: s_store_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work Map 9 Map Operator Tree: TableScan - alias: household_demographics - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE + predicate: ((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson County', 'Barrow County', 'Pennington County') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -215,9 +215,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4 + outputColumnNames: _col1, _col3, _col4 input vertices: 1 Map 8 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE @@ -225,7 +225,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col4 input vertices: http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query35.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query35.q.out b/ql/src/test/results/clientpositive/perf/spark/query35.q.out index 952e6d1..de7c29d 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query35.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query35.q.out @@ -123,8 +123,8 @@ STAGE PLANS: Reducer 14 <- Reducer 13 (GROUP, 169) Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 305), Map 19 (PARTITION-LEVEL SORT, 305) Reducer 18 <- Reducer 17 (GROUP, 336) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 544), Map 7 (PARTITION-LEVEL SORT, 544) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 909), Reducer 2 (PARTITION-LEVEL SORT, 909) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 7 (PARTITION-LEVEL SORT, 855) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597) Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 1009), Reducer 14 (PARTITION-LEVEL SORT, 1009), Reducer 18 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) Reducer 5 <- Reducer 4 (GROUP, 1009) Reducer 6 <- Reducer 5 (SORT, 1) @@ -143,11 +143,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int) + value expressions: _col0 (type: int), _col1 (type: int) Map 11 Map Operator Tree: TableScan @@ -238,24 +238,6 @@ STAGE PLANS: Map 7 Map Operator Tree: TableScan - alias: customer_demographics - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cd_demo_sk is not null (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cd_demo_sk (type: int), cd_gender (type: string), cd_marital_status (type: string), cd_dep_count (type: int), cd_dep_employed_count (type: int), cd_dep_college_count (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: int) - Map 8 - Map Operator Tree: - TableScan alias: ca Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -271,6 +253,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: customer_demographics + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cd_demo_sk is not null (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int), cd_gender (type: string), cd_marital_status (type: string), cd_dep_count (type: int), cd_dep_employed_count (type: int), cd_dep_college_count (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: int) Map 9 Map Operator Tree: TableScan @@ -393,36 +393,32 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1, _col4 Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) + value expressions: _col0 (type: int), _col4 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col4, _col5, _col6, _col7, _col8, _col10 + outputColumnNames: _col0, _col4, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col10 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) - outputColumnNames: _col0, _col4, _col6, _col7, _col8, _col9, _col10 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int) + value expressions: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int) Reducer 4 Reduce Operator Tree: Join Operator http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query37.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query37.q.out b/ql/src/test/results/clientpositive/perf/spark/query37.q.out index b37823c..325861e 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query37.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query37.q.out @@ -40,7 +40,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 6 + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -62,7 +62,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 5 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 5 (PARTITION-LEVEL SORT, 306), Map 6 (PARTITION-LEVEL SORT, 306) Reducer 3 <- Reducer 2 (GROUP, 671) Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### @@ -102,7 +102,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)) - Map 7 + Map 6 Map Operator Tree: TableScan alias: inventory @@ -120,14 +120,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3 + outputColumnNames: _col1 input vertices: - 0 Map 6 + 1 Map 7 Statistics: Num rows: 4593600 Data size: 72578135 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col3 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 4593600 Data size: 72578135 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work @@ -140,7 +140,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col3 (type: int) + 2 _col1 (type: int) outputColumnNames: _col2, _col3, _col4 Statistics: Num rows: 633577652 Data size: 85799141554 Basic stats: COMPLETE Column stats: NONE Group By Operator