http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/ql/src/test/results/clientpositive/perf/spark/query15.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query15.q.out b/ql/src/test/results/clientpositive/perf/spark/query15.q.out new file mode 100644 index 0000000..c54b95a --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/spark/query15.q.out @@ -0,0 +1,223 @@ +PREHOOK: query: explain +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 6 (PARTITION-LEVEL SORT, 855) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 927), Reducer 8 (PARTITION-LEVEL SORT, 927) + Reducer 4 <- Reducer 3 (GROUP, 369) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 305), Map 9 (PARTITION-LEVEL SORT, 305) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 6 + Map Operator Tree: + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + Map 9 + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string), _col4 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col3, _col4, _col7 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col3) IN ('CA', 'WA', 'GA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col7 (type: decimal(7,2)) + outputColumnNames: _col4, _col7 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col7) + keys: _col4 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: decimal(17,2)) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: decimal(17,2)) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink +
http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/ql/src/test/results/clientpositive/perf/spark/query16.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query16.q.out b/ql/src/test/results/clientpositive/perf/spark/query16.q.out new file mode 100644 index 0000000..205173a --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/spark/query16.q.out @@ -0,0 +1,363 @@ +PREHOOK: query: explain +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-4-01' and + (cast('2001-4-01' as date) + 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'NY' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish', + 'Daviess County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-4-01' and + (cast('2001-4-01' as date) + 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'NY' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish', + 'Daviess County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 9 + Map Operator Tree: + TableScan + alias: call_center + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) (type: boolean) + Statistics: Num rows: 30 Data size: 61350 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cc_call_center_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 30 Data size: 61350 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 7 + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 2001-04-01 00:00:00.0 AND 2001-05-31 01:00:00.0 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 11 <- Map 10 (GROUP, 24) + Reducer 13 <- Map 12 (GROUP, 305) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 494), Map 8 (PARTITION-LEVEL SORT, 494) + Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 570), Reducer 13 (PARTITION-LEVEL SORT, 570), Reducer 2 (PARTITION-LEVEL SORT, 570) + Reducer 4 <- Reducer 3 (GROUP, 447) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Reducer 5 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: cs1 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_call_center_sk is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cs_ship_date_sk (type: int), cs_ship_addr_sk (type: int), cs_call_center_sk (type: int), cs_warehouse_sk (type: int), cs_order_number (type: int), cs_ext_ship_cost (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 7 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Local Work: + Map Reduce Local Work + Map 10 + Map Operator Tree: + TableScan + alias: cr1 + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cr_order_number is not null (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: cr_order_number (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Map 12 + Map Operator Tree: + TableScan + alias: cs2 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_order_number is not null and cs_warehouse_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: cs_order_number (type: int), cs_warehouse_sk (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_state = 'NY') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reducer 11 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 14399440 Data size: 1528617286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14399440 Data size: 1528617286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14399440 Data size: 1528617286 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) + Reducer 13 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 2 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5, _col6 + input vertices: + 1 Map 9 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + 2 _col1 (type: int) + outputColumnNames: _col3, _col4, _col5, _col6, _col14, _col15 + residual filter predicates: {(_col3 <> _col15)} + Statistics: Num rows: 843291907 Data size: 114198664830 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col14 (type: boolean) + outputColumnNames: _col4, _col5, _col6, _col16 + Statistics: Num rows: 843291907 Data size: 114198664830 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col16 is null (type: boolean) + Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + outputColumnNames: _col4, _col5, _col6 + Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), sum(_col6) + keys: _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: partial2 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0), sum(_col1), sum(_col2) + mode: partial2 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col0 (type: bigint) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/ql/src/test/results/clientpositive/perf/spark/query17.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query17.q.out b/ql/src/test/results/clientpositive/perf/spark/query17.q.out new file mode 100644 index 0000000..7b12a39 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/spark/query17.q.out @@ -0,0 +1,409 @@ +PREHOOK: query: explain +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as_store_returns_quantitycount + ,avg(sr_return_quantity) as_store_returns_quantityave + ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2000Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as_store_returns_quantitycount + ,avg(sr_return_quantity) as_store_returns_quantityave + ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2000Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 16 + Map Operator Tree: + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) + Reducer 11 <- Reducer 10 (PARTITION-LEVEL SORT, 374), Reducer 14 (PARTITION-LEVEL SORT, 374) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 36), Map 15 (PARTITION-LEVEL SORT, 36) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442) + Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 850), Reducer 3 (PARTITION-LEVEL SORT, 850) + Reducer 5 <- Reducer 4 (GROUP, 582) + Reducer 6 <- Reducer 5 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) + Map 12 + Map Operator Tree: + TableScan + alias: d3 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Map 13 + Map Operator Tree: + TableScan + alias: store_returns + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + Map 15 + Map Operator Tree: + TableScan + alias: d2 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: d1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_quarter_name = '2000Q1') and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int) + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col2 (type: int) + outputColumnNames: _col3, _col7, _col8, _col9, _col10 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col7 (type: int), _col8 (type: int), _col9 (type: int) + sort order: +++ + Map-reduce partition columns: _col7 (type: int), _col8 (type: int), _col9 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col10 (type: int) + Reducer 14 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col9, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: int), _col9 (type: string), _col10 (type: string) + Reducer 4 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) + 1 _col7 (type: int), _col8 (type: int), _col9 (type: int) + outputColumnNames: _col3, _col5, _col9, _col10, _col14, _col21 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col9, _col10, _col14, _col21, _col25 + input vertices: + 1 Map 16 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col5), avg(_col5), stddev_samp(_col5), count(_col21), avg(_col21), stddev_samp(_col21), count(_col14), avg(_col14), stddev_samp(_col14) + keys: _col9 (type: string), _col10 (type: string), _col25 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,input:int>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: bigint), _col7 (type: struct<count:bigint,sum:double,input:int>), _col8 (type: struct<count:bigint,sum:double,variance:double>), _col9 (type: bigint), _col10 (type: struct<count:bigint,sum:double,input:int>), _col11 (type: struct<count:bigint,sum:double,variance:double>) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), avg(VALUE._col1), stddev_samp(VALUE._col2), count(VALUE._col3), avg(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7), stddev_samp(VALUE._col8) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), (_col5 / _col4) (type: double), _col6 (type: bigint), _col7 (type: double), _col8 (type: double), (_col8 / _col7) (type: double), _col9 (type: bigint), _col10 (type: double), (_col11 / _col10) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: double) + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: bigint), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: bigint), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col10 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/ql/src/test/results/clientpositive/perf/spark/query18.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query18.q.out b/ql/src/test/results/clientpositive/perf/spark/query18.q.out new file mode 100644 index 0000000..88d289c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/spark/query18.q.out @@ -0,0 +1,360 @@ +PREHOOK: query: explain +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as numeric(12,2))) agg1, + avg( cast(cs_list_price as numeric(12,2))) agg2, + avg( cast(cs_coupon_amt as numeric(12,2))) agg3, + avg( cast(cs_sales_price as numeric(12,2))) agg4, + avg( cast(cs_net_profit as numeric(12,2))) agg5, + avg( cast(c_birth_year as numeric(12,2))) agg6, + avg( cast(cd1.cd_dep_count as numeric(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'College' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (9,5,12,4,1,10) and + d_year = 2001 and + ca_state in ('ND','WI','AL' + ,'NC','OK','MS','TN') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as numeric(12,2))) agg1, + avg( cast(cs_list_price as numeric(12,2))) agg2, + avg( cast(cs_coupon_amt as numeric(12,2))) agg3, + avg( cast(cs_sales_price as numeric(12,2))) agg4, + avg( cast(cs_net_profit as numeric(12,2))) agg5, + avg( cast(c_birth_year as numeric(12,2))) agg6, + avg( cast(cd1.cd_dep_count as numeric(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'College' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (9,5,12,4,1,10) and + d_year = 2001 and + ca_state in ('ND','WI','AL' + ,'NC','OK','MS','TN') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) + Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 337), Reducer 10 (PARTITION-LEVEL SORT, 337) + Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 374), Reducer 11 (PARTITION-LEVEL SORT, 374) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 428), Map 7 (PARTITION-LEVEL SORT, 428) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 302), Reducer 2 (PARTITION-LEVEL SORT, 302) + Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 731), Reducer 3 (PARTITION-LEVEL SORT, 731) + Reducer 5 <- Reducer 4 (GROUP, 1009) + Reducer 6 <- Reducer 5 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), c_birth_year (type: int) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int) + Map 13 + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Map 14 + Map Operator Tree: + TableScan + alias: cd1 + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cd_education_status = 'College') and (cd_gender = 'M') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int), cd_dep_count (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int) + Map 15 + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string), ca_country (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: cd2 + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cd_demo_sk is not null (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2)), cs_sales_price (type: decimal(7,2)), cs_coupon_amt (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col14 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col14 (type: int) + Reducer 12 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col8, _col14, _col16 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col16 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col14 (type: int) + outputColumnNames: _col1, _col3, _col6, _col7, _col8, _col9, _col10, _col16 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col16 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4, _col6, _col7, _col8 + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 44000000 Data size: 37840889538 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col4, _col6, _col7, _col8 + Statistics: Num rows: 48400001 Data size: 41624979393 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 48400001 Data size: 41624979393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col4, _col6, _col7, _col8, _col11, _col16, _col17, _col18, _col19, _col20, _col26 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col11 (type: string), _col8 (type: string), _col7 (type: string), _col6 (type: string), CAST( _col16 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col17 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col19 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col18 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col20 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col4 AS decimal(12,2)) (type: decimal(12,2)), CAST( _col26 AS decimal(12,2)) (type: decimal(12,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col4), avg(_col5), avg(_col6), avg(_col7), avg(_col8), avg(_col9), avg(_col10) + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 2108229765 Data size: 285496662075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) + Statistics: Num rows: 2108229765 Data size: 285496662075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: struct<count:bigint,sum:decimal(22,2),input:decimal(12,2)>), _col6 (type: struct<count:bigint,sum:decimal(22,2),input:decimal(12,2)>), _col7 (type: struct<count:bigint,sum:decimal(22,2),input:decimal(12,2)>), _col8 (type: struct<count:bigint,sum:decimal(22,2),input:decimal(12,2)>), _col9 (type: struct<count:bigint,sum:decimal(22,2),input:decimal(12,2)>), _col10 (type: struct<count:bigint,sum:decimal(22,2),input:decimal(12,2)>), _col11 (type: struct<count:bigint,sum:decimal(22,2),input:decimal(12,2)>) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), avg(VALUE._col3), avg(VALUE._col4), avg(VALUE._col5), avg(VALUE._col6) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: decimal(16,6)), _col6 (type: decimal(16,6)), _col7 (type: decimal(16,6)), _col8 (type: decimal(16,6)), _col9 (type: decimal(16,6)), _col10 (type: decimal(16,6)), _col11 (type: decimal(16,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col0 (type: string) + sort order: ++++ + Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col4 (type: decimal(16,6)), _col5 (type: decimal(16,6)), _col6 (type: decimal(16,6)), _col7 (type: decimal(16,6)), _col8 (type: decimal(16,6)), _col9 (type: decimal(16,6)), _col10 (type: decimal(16,6)) + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey3 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: decimal(16,6)), VALUE._col1 (type: decimal(16,6)), VALUE._col2 (type: decimal(16,6)), VALUE._col3 (type: decimal(16,6)), VALUE._col4 (type: decimal(16,6)), VALUE._col5 (type: decimal(16,6)), VALUE._col6 (type: decimal(16,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink +