http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query78.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query78.q.out b/ql/src/test/results/clientpositive/perf/spark/query78.q.out index c2e8577..1467c5f 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query78.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query78.q.out @@ -120,41 +120,57 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 164), Map 14 (PARTITION-LEVEL SORT, 164) - Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 85), Reducer 11 (PARTITION-LEVEL SORT, 85) - Reducer 13 <- Reducer 12 (GROUP, 93) - Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 85), Reducer 20 (PARTITION-LEVEL SORT, 85) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 85), Reducer 14 (PARTITION-LEVEL SORT, 85) + Reducer 12 <- Reducer 11 (GROUP, 93) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 164), Map 15 (PARTITION-LEVEL SORT, 164) + Reducer 17 <- Map 10 (PARTITION-LEVEL SORT, 85), Reducer 20 (PARTITION-LEVEL SORT, 85) Reducer 18 <- Reducer 17 (GROUP, 93) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 8 (PARTITION-LEVEL SORT, 432) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 219), Reducer 8 (PARTITION-LEVEL SORT, 219) Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 177), Map 21 (PARTITION-LEVEL SORT, 177) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 219), Reducer 2 (PARTITION-LEVEL SORT, 219) - Reducer 4 <- Reducer 3 (GROUP, 241) - Reducer 5 <- Reducer 13 (PARTITION-LEVEL SORT, 167), Reducer 4 (PARTITION-LEVEL SORT, 167) - Reducer 6 <- Reducer 18 (PARTITION-LEVEL SORT, 91), Reducer 5 (PARTITION-LEVEL SORT, 91) - Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 241) + Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 167), Reducer 3 (PARTITION-LEVEL SORT, 167) + Reducer 5 <- Reducer 18 (PARTITION-LEVEL SORT, 91), Reducer 4 (PARTITION-LEVEL SORT, 91) + Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 432), Map 9 (PARTITION-LEVEL SORT, 432) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: store_sales - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ss_sold_date_sk is not null (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_wholesale_cost (type: decimal(7,2)), ss_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col3 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Map 10 Map Operator Tree: TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Map 13 + Map Operator Tree: + TableScan alias: web_sales Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -170,7 +186,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int), _col3 (type: int) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - Map 14 + Map 15 Map Operator Tree: TableScan alias: web_returns @@ -184,40 +200,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - Map 15 - Map Operator Tree: - TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Map 16 - Map Operator Tree: - TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Map 19 Map Operator Tree: TableScan @@ -250,7 +232,25 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 7 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ss_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_wholesale_cost (type: decimal(7,2)), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col3 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Map 9 Map Operator Tree: TableScan alias: store_returns @@ -264,59 +264,19 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Map 9 - Map Operator Tree: - TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reducer 11 Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: int), _col3 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col8 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col8 is null (type: boolean) - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - Reducer 12 - Reduce Operator Tree: - Join Operator - condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4, _col5, _col6 + outputColumnNames: _col3, _col4, _col6, _col7, _col8 Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col4), sum(_col5), sum(_col6) - keys: _col2 (type: int), _col1 (type: int) + aggregations: sum(_col6), sum(_col7), sum(_col8) + keys: _col4 (type: int), _col3 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE @@ -326,7 +286,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 13 + Reducer 12 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) @@ -344,6 +304,29 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int), _col0 (type: int) Statistics: Num rows: 43560808 Data size: 5923010147 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) + Reducer 14 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int), _col3 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col8 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col8 is null (type: boolean) + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Reducer 17 Reduce Operator Tree: Join Operator @@ -388,25 +371,24 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col3 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col8 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col8 is null (type: boolean) - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col6, _col7, _col8 + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col6), sum(_col7), sum(_col8) + keys: _col4 (type: int), _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Reducer 20 Reduce Operator Tree: Join Operator @@ -432,28 +414,6 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Reducer 3 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4, _col5, _col6 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col4), sum(_col5), sum(_col6) - keys: _col2 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 4 - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) keys: KEY._col0 (type: int), KEY._col1 (type: int) @@ -470,7 +430,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int), _col0 (type: int) Statistics: Num rows: 174238687 Data size: 15371387547 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 5 + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -489,7 +449,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 63887519 Data size: 5636175497 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col7 (type: bigint), _col8 (type: decimal(17,2)), _col9 (type: decimal(17,2)) - Reducer 6 + Reducer 5 Reduce Operator Tree: Join Operator condition map: @@ -511,7 +471,7 @@ STAGE PLANS: sort order: ++---++++ Statistics: Num rows: 23425424 Data size: 2066597727 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 7 + Reducer 6 Reduce Operator Tree: Select Operator expressions: 2000 (type: int), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey2 (type: bigint), KEY.reducesinkkey3 (type: decimal(17,2)), KEY.reducesinkkey4 (type: decimal(17,2)), KEY.reducesinkkey5 (type: bigint), KEY.reducesinkkey6 (type: decimal(18,2)), KEY.reducesinkkey7 (type: decimal(18,2)) @@ -527,6 +487,29 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int), _col3 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col8 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col8 is null (type: boolean) + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query79.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query79.q.out b/ql/src/test/results/clientpositive/perf/spark/query79.q.out index 8dcb7f1..f010369 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query79.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query79.q.out @@ -221,26 +221,26 @@ STAGE PLANS: Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col6), sum(_col7) - keys: _col1 (type: int), _col13 (type: string), _col3 (type: int), _col5 (type: int) + keys: _col1 (type: int), _col3 (type: int), _col5 (type: int), _col13 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int) + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) sort order: ++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int) + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)) Reducer 6 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: int), _col0 (type: int), _col1 (type: string), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)) + expressions: _col2 (type: int), _col0 (type: int), _col3 (type: string), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query8.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query8.q.out b/ql/src/test/results/clientpositive/perf/spark/query8.q.out index 677746b..583abbc 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query8.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query8.q.out @@ -222,14 +222,31 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 159) - Reducer 4 <- Reducer 3 (GROUP, 109), Reducer 8 (GROUP, 109) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 586), Map 9 (PARTITION-LEVEL SORT, 586) - Reducer 7 <- Reducer 6 (GROUP, 349) - Reducer 8 <- Reducer 7 (GROUP, 59) + Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 586), Map 9 (PARTITION-LEVEL SORT, 586) + Reducer 11 <- Reducer 10 (GROUP, 349) + Reducer 12 <- Reducer 11 (GROUP, 59) + Reducer 7 <- Map 6 (GROUP, 159) + Reducer 8 <- Reducer 12 (GROUP, 109), Reducer 7 (GROUP, 109) #### A masked pattern was here #### Vertices: - Map 2 + Map 13 + Map Operator Tree: + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_current_addr_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE + Map 6 Map Operator Tree: TableScan alias: customer_address @@ -253,7 +270,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 + Map 9 Map Operator Tree: TableScan alias: customer_address @@ -271,65 +288,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 9 - Map Operator Tree: - TableScan - alias: customer - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_current_addr_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 4 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6833333 Data size: 6935012229 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 = 2L) (type: boolean) - Statistics: Num rows: 1 Data size: 1014 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1014 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 substr(_col2, 1, 2) (type: string) - 1 substr(_col0, 1, 2) (type: string) - Reducer 6 + Reducer 10 Reduce Operator Tree: Join Operator condition map: @@ -351,7 +310,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 7 + Reducer 11 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -378,7 +337,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 7333333 Data size: 7442452291 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 8 + Reducer 12 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -398,12 +357,53 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 8 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6833333 Data size: 6935012229 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 = 2L) (type: boolean) + Statistics: Num rows: 1 Data size: 1014 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1014 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 substr(_col0, 1, 2) (type: string) + 1 substr(_col2, 1, 2) (type: string) Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 14 Map Operator Tree: TableScan alias: store @@ -419,15 +419,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 substr(_col2, 1, 2) (type: string) - 1 substr(_col0, 1, 2) (type: string) - outputColumnNames: _col0, _col1 + 0 substr(_col0, 1, 2) (type: string) + 1 substr(_col2, 1, 2) (type: string) + outputColumnNames: _col1, _col2 input vertices: - 1 Reducer 4 + 0 Reducer 8 Statistics: Num rows: 1874 Data size: 3581903 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col1 (type: int) Local Work: Map Reduce Local Work @@ -435,12 +435,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 14 (PARTITION-LEVEL SORT, 398) - Reducer 12 <- Reducer 11 (GROUP, 481) - Reducer 13 <- Reducer 12 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 5 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Reducer 2 (GROUP, 481) + Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 10 + Map 1 Map Operator Tree: TableScan alias: store_sales @@ -458,7 +458,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) - Map 14 + Map 5 Map Operator Tree: TableScan alias: date_dim @@ -475,7 +475,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Reducer 11 + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -491,15 +491,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col1 (type: int) - outputColumnNames: _col1, _col6 + outputColumnNames: _col2, _col8 input vertices: - 0 Map 1 + 1 Map 14 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col6) - keys: _col1 (type: string) + aggregations: sum(_col2) + keys: _col8 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE @@ -510,7 +510,7 @@ STAGE PLANS: Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: decimal(17,2)) - Reducer 12 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -524,7 +524,7 @@ STAGE PLANS: Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: decimal(17,2)) - Reducer 13 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query80.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query80.q.out b/ql/src/test/results/clientpositive/perf/spark/query80.q.out index 051bfd1..9b008eb 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query80.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query80.q.out @@ -194,7 +194,8 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-5 depends on stages: Stage-4 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -202,7 +203,25 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 12 + Map 10 + Map Operator Tree: + TableScan + alias: promotion + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + Map 11 Map Operator Tree: TableScan alias: store @@ -225,7 +244,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -248,7 +267,30 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 21 + Map 20 + Map Operator Tree: + TableScan + alias: promotion + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-5 + Spark +#### A masked pattern was here #### + Vertices: + Map 18 Map Operator Tree: TableScan alias: date_dim @@ -267,11 +309,29 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-5 + Stage: Stage-6 Spark #### A masked pattern was here #### Vertices: - Map 33 + Map 29 + Map Operator Tree: + TableScan + alias: promotion + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + Map 30 Map Operator Tree: TableScan alias: web_site @@ -290,11 +350,11 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-6 + Stage: Stage-7 Spark #### A masked pattern was here #### Vertices: - Map 31 + Map 27 Map Operator Tree: TableScan alias: date_dim @@ -316,21 +376,18 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 329), Map 19 (PARTITION-LEVEL SORT, 329) - Reducer 15 <- Map 20 (PARTITION-LEVEL SORT, 336), Reducer 14 (PARTITION-LEVEL SORT, 336) - Reducer 16 <- Map 11 (PARTITION-LEVEL SORT, 408), Reducer 15 (PARTITION-LEVEL SORT, 408) - Reducer 17 <- Map 23 (PARTITION-LEVEL SORT, 447), Reducer 16 (PARTITION-LEVEL SORT, 447) - Reducer 18 <- Reducer 17 (GROUP, 491) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 8 (PARTITION-LEVEL SORT, 432) - Reducer 25 <- Map 24 (PARTITION-LEVEL SORT, 164), Map 29 (PARTITION-LEVEL SORT, 164) - Reducer 26 <- Map 30 (PARTITION-LEVEL SORT, 169), Reducer 25 (PARTITION-LEVEL SORT, 169) - Reducer 27 <- Map 32 (PARTITION-LEVEL SORT, 206), Reducer 26 (PARTITION-LEVEL SORT, 206) - Reducer 28 <- Reducer 27 (GROUP, 247) - Reducer 3 <- Map 20 (PARTITION-LEVEL SORT, 437), Reducer 2 (PARTITION-LEVEL SORT, 437) - Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 531), Reducer 3 (PARTITION-LEVEL SORT, 531) - Reducer 5 <- Reducer 4 (GROUP, 640) - Reducer 6 <- Reducer 18 (GROUP, 1009), Reducer 28 (GROUP, 1009), Reducer 5 (GROUP, 1009) - Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 329), Map 17 (PARTITION-LEVEL SORT, 329) + Reducer 14 <- Map 19 (PARTITION-LEVEL SORT, 371), Reducer 13 (PARTITION-LEVEL SORT, 371) + Reducer 15 <- Map 21 (PARTITION-LEVEL SORT, 447), Reducer 14 (PARTITION-LEVEL SORT, 447) + Reducer 16 <- Reducer 15 (GROUP, 491) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 7 (PARTITION-LEVEL SORT, 432) + Reducer 23 <- Map 22 (PARTITION-LEVEL SORT, 164), Map 26 (PARTITION-LEVEL SORT, 164) + Reducer 24 <- Map 28 (PARTITION-LEVEL SORT, 187), Reducer 23 (PARTITION-LEVEL SORT, 187) + Reducer 25 <- Reducer 24 (GROUP, 247) + Reducer 3 <- Map 19 (PARTITION-LEVEL SORT, 483), Reducer 2 (PARTITION-LEVEL SORT, 483) + Reducer 4 <- Reducer 3 (GROUP, 640) + Reducer 5 <- Reducer 16 (GROUP, 1009), Reducer 25 (GROUP, 1009), Reducer 4 (GROUP, 1009) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -351,24 +408,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int), _col4 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - Map 11 - Map Operator Tree: - TableScan - alias: item - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((i_current_price > 50) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE - Map 13 + Map 12 Map Operator Tree: TableScan alias: catalog_sales @@ -386,7 +426,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int), _col4 (type: int) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - Map 19 + Map 17 Map Operator Tree: TableScan alias: catalog_returns @@ -404,24 +444,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) - Map 20 + Map 19 Map Operator Tree: TableScan - alias: promotion - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) - Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + predicate: ((i_current_price > 50) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_promo_sk (type: int) + expressions: i_item_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE - Map 23 + Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE + Map 21 Map Operator Tree: TableScan alias: catalog_page @@ -439,7 +479,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 24 + Map 22 Map Operator Tree: TableScan alias: web_sales @@ -457,7 +497,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int), _col4 (type: int) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - Map 29 + Map 26 Map Operator Tree: TableScan alias: web_returns @@ -475,24 +515,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) - Map 30 - Map Operator Tree: - TableScan - alias: promotion - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((p_channel_tv = 'N') and p_promo_sk is not null) (type: boolean) - Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_promo_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE - Map 32 + Map 28 Map Operator Tree: TableScan alias: item @@ -509,7 +532,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 7 Map Operator Tree: TableScan alias: store_returns @@ -527,7 +550,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) - Reducer 14 + Reducer 13 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -537,41 +562,25 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 15 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col9, _col10 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col5, _col6, _col9, _col10 + outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col9, _col10 input vertices: - 1 Map 21 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + 1 Map 18 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 16 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 14 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -579,15 +588,25 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col5, _col6, _col9, _col10 - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + outputColumnNames: _col1, _col3, _col5, _col6, _col9, _col10 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col5, _col6, _col9, _col10 + input vertices: + 1 Map 20 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 17 + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 15 Reduce Operator Tree: Join Operator condition map: @@ -613,7 +632,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) - Reducer 18 + Reducer 16 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) @@ -639,6 +658,8 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(32,2)), _col5 (type: decimal(33,2)) Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -648,13 +669,25 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 25 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col9, _col10 + input vertices: + 1 Map 8 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 23 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -664,41 +697,23 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 26 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col9, _col10 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col5, _col6, _col9, _col10 + outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col9, _col10 input vertices: - 1 Map 31 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + 1 Map 27 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 27 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 24 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -708,35 +723,45 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col9, _col10 - Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col3, _col5, _col6, _col9, _col10 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col5, _col6, _col9, _col10, _col18 + outputColumnNames: _col2, _col5, _col6, _col9, _col10 input vertices: - 1 Map 33 - Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2)) - outputColumnNames: _col0, _col1, _col2, _col3 + 1 Map 29 + Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col6, _col9, _col10, _col18 + input vertices: + 1 Map 30 Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1), sum(_col2), sum(_col3) - keys: _col0 (type: string) - mode: hash + Select Operator + expressions: _col18 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: sum(_col1), sum(_col2), sum(_col3) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) - Reducer 28 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) + Reducer 25 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) @@ -769,65 +794,47 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col9, _col10 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col5, _col6, _col9, _col10 - input vertices: - 1 Map 10 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 4 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col9, _col10 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col3, _col5, _col6, _col9, _col10 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col5, _col6, _col9, _col10, _col18 + outputColumnNames: _col2, _col5, _col6, _col9, _col10 input vertices: - 1 Map 12 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2)) - outputColumnNames: _col0, _col1, _col2, _col3 + 1 Map 10 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col6, _col9, _col10, _col18 + input vertices: + 1 Map 11 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1), sum(_col2), sum(_col3) - keys: _col0 (type: string) - mode: hash + Select Operator + expressions: _col18 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: sum(_col1), sum(_col2), sum(_col3) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) - Reducer 5 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(22,2)), _col3 (type: decimal(23,2)) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) @@ -852,7 +859,7 @@ STAGE PLANS: Statistics: Num rows: 2435062716 Data size: 264270971781 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(32,2)), _col5 (type: decimal(33,2)) - Reducer 6 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) @@ -871,7 +878,7 @@ STAGE PLANS: Statistics: Num rows: 1217531358 Data size: 132135485890 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(32,2)), _col4 (type: decimal(33,2)) - Reducer 7 + Reducer 6 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(27,2)), VALUE._col1 (type: decimal(32,2)), VALUE._col2 (type: decimal(33,2))