http://git-wip-us.apache.org/repos/asf/hive/blob/f0b76e24/ql/src/test/results/clientpositive/perf/spark/query57.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query57.q.out b/ql/src/test/results/clientpositive/perf/spark/query57.q.out index 7d53922..5976141 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query57.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query57.q.out @@ -269,10 +269,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year) IN (2000, 1999, 2001) and ((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + predicate: (((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int) @@ -329,10 +329,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year) IN (2000, 1999, 2001) and ((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + predicate: (((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int) @@ -369,10 +369,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year) IN (2000, 1999, 2001) and ((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)) or (d_year = 2000)) and d_date_sk is not null) (type: boolean) + predicate: (((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and (d_year) IN (2000, 1999, 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int) @@ -425,14 +425,14 @@ STAGE PLANS: Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col10 (type: string), _col11 (type: string) + keys: _col10 (type: string), _col11 (type: string), _col5 (type: int), _col6 (type: int), _col8 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) sort order: +++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 14 @@ -440,34 +440,34 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int) + key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col2 (type: int) sort order: ++++ - Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int) + Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col2 (type: int) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col5 (type: decimal(17,2)) + value expressions: _col3 (type: int), _col5 (type: decimal(17,2)) Reducer 15 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey3 (type: int), VALUE._col0 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: decimal(17,2)) + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: int), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: string, _col5: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col4 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST - partition by: _col4, _col3, _col2, _col0 + order by: _col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col2 ASC NULLS FIRST + partition by: _col1, _col0, _col4, _col2 raw input shape: window functions: window function definition @@ -478,55 +478,55 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(17,2)) + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string), _col5 (type: decimal(17,2)) outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int) + key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col2 (type: int), _col3 (type: int) sort order: +++++ - Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string) + Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: avg_window_0 (type: decimal(21,6)), _col5 (type: decimal(17,2)) Reducer 16 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: decimal(17,2)) + expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: decimal(21,6), _col1: int, _col2: int, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) + output shape: _col0: decimal(21,6), _col1: string, _col2: string, _col3: int, _col4: int, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col1 ASC NULLS LAST, _col2 ASC NULLS LAST - partition by: _col5, _col4, _col3 + order by: _col3 ASC NULLS LAST, _col4 ASC NULLS LAST + partition by: _col2, _col1, _col5 raw input shape: window functions: window function definition alias: rank_window_1 - arguments: _col1, _col2 + arguments: _col3, _col4 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > 0) and (_col1 = 2000) and rank_window_1 is not null) (type: boolean) + predicate: ((_col0 > 0) and (_col3 = 2000) and rank_window_1 is not null) (type: boolean) Statistics: Num rows: 31942874 Data size: 4325706828 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: int), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) + expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string), _col6 (type: decimal(17,2)) outputColumnNames: rank_window_1, _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 31942874 Data size: 4325706828 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: CASE WHEN ((_col0 > 0)) THEN (((abs((_col6 - _col0)) / _col0) > 0.1)) ELSE (null) END (type: boolean) Statistics: Num rows: 15971437 Data size: 2162853414 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col4 (type: string), _col3 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: string), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 15971437 Data size: 2162853414 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -603,14 +603,14 @@ STAGE PLANS: Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col10 (type: string), _col11 (type: string) + keys: _col10 (type: string), _col11 (type: string), _col5 (type: int), _col6 (type: int), _col8 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) sort order: +++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 23 @@ -618,39 +618,39 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int) + key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col2 (type: int), _col3 (type: int) sort order: +++++ - Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string) + Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 24 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: string, _col5: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col0 ASC NULLS LAST, _col1 ASC NULLS LAST - partition by: _col4, _col3, _col2 + order by: _col2 ASC NULLS LAST, _col3 ASC NULLS LAST + partition by: _col1, _col0, _col4 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col0, _col1 + arguments: _col2, _col3 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -660,7 +660,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) + expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -681,14 +681,14 @@ STAGE PLANS: Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col10 (type: string), _col11 (type: string) + keys: _col10 (type: string), _col11 (type: string), _col5 (type: int), _col6 (type: int), _col8 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) sort order: +++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 4 @@ -696,39 +696,39 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col0 (type: int), _col1 (type: int) + key expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col2 (type: int), _col3 (type: int) sort order: +++++ - Map-reduce partition columns: _col4 (type: string), _col3 (type: string), _col2 (type: string) + Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col4 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: decimal(17,2) + output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: string, _col5: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col0 ASC NULLS LAST, _col1 ASC NULLS LAST - partition by: _col4, _col3, _col2 + order by: _col2 ASC NULLS LAST, _col3 ASC NULLS LAST + partition by: _col1, _col0, _col4 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col0, _col1 + arguments: _col2, _col3 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -738,7 +738,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col3 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) + expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: decimal(17,2)), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/f0b76e24/ql/src/test/results/clientpositive/perf/spark/query63.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query63.q.out b/ql/src/test/results/clientpositive/perf/spark/query63.q.out index dc51332..1288b30 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query63.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query63.q.out @@ -128,10 +128,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: item - filterExpr: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) (type: boolean) + filterExpr: ((i_class) IN ('personal', 'portable', 'refernece', 'self-help', 'accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) (type: boolean) + predicate: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help', 'accessories', 'classical', 'fragrances', 'pants') and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_manager_id (type: int) http://git-wip-us.apache.org/repos/asf/hive/blob/f0b76e24/ql/src/test/results/clientpositive/perf/spark/query85.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query85.q.out b/ql/src/test/results/clientpositive/perf/spark/query85.q.out index dfa2679..ce842c2 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query85.q.out @@ -182,7 +182,8 @@ POSTHOOK: Input: default@web_sales #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -190,26 +191,6 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 11 - Map Operator Tree: - TableScan - alias: web_page - filterExpr: wp_web_page_sk is not null (type: boolean) - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: wp_web_page_sk is not null (type: boolean) - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: wp_web_page_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col10 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 12 Map Operator Tree: TableScan @@ -225,25 +206,69 @@ STAGE PLANS: Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col4 (type: int) + 0 _col14 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: web_page + filterExpr: wp_web_page_sk is not null (type: boolean) + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: wp_web_page_sk is not null (type: boolean) + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: wp_web_page_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 62), Map 9 (PARTITION-LEVEL SORT, 62) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 57), Reducer 2 (PARTITION-LEVEL SORT, 57) - Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 81), Reducer 3 (PARTITION-LEVEL SORT, 81) - Reducer 5 <- Map 14 (PARTITION-LEVEL SORT, 13), Reducer 4 (PARTITION-LEVEL SORT, 13) - Reducer 6 <- Map 15 (PARTITION-LEVEL SORT, 167), Reducer 5 (PARTITION-LEVEL SORT, 167) - Reducer 7 <- Reducer 6 (GROUP, 59) - Reducer 8 <- Reducer 7 (SORT, 1) + Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 20), Map 2 (PARTITION-LEVEL SORT, 20) + Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 31), Reducer 3 (PARTITION-LEVEL SORT, 31) + Reducer 5 <- Map 13 (PARTITION-LEVEL SORT, 184), Reducer 4 (PARTITION-LEVEL SORT, 184) + Reducer 6 <- Map 14 (PARTITION-LEVEL SORT, 15), Reducer 5 (PARTITION-LEVEL SORT, 15) + Reducer 7 <- Map 15 (PARTITION-LEVEL SORT, 7), Reducer 6 (PARTITION-LEVEL SORT, 7) + Reducer 8 <- Reducer 7 (GROUP, 7) + Reducer 9 <- Reducer 8 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 10 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 11 Map Operator Tree: TableScan alias: web_returns @@ -263,26 +288,27 @@ STAGE PLANS: Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Execution mode: vectorized - Map 10 + Map 13 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: ((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int), ca_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 13 + Map 14 Map Operator Tree: TableScan alias: cd1 @@ -302,7 +328,7 @@ STAGE PLANS: Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 14 + Map 15 Map Operator Tree: TableScan alias: cd2 @@ -321,63 +347,55 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 15 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized - Map 9 + Map 2 Map Operator Tree: TableScan alias: web_sales - filterExpr: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null) (type: boolean) + filterExpr: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE + predicate: ((ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and (ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col3 (type: int) - Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7 + input vertices: + 0 Map 1 + Statistics: Num rows: 17600325 Data size: 2393135321 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 17600325 Data size: 2393135321 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Execution mode: vectorized - Reducer 2 + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col5 (type: int) - 1 _col1 (type: int), _col3 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col13, _col14 - Statistics: Num rows: 52800977 Data size: 7179405967 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col4, _col5, _col6, _col7 + Statistics: Num rows: 19360357 Data size: 2632448910 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col8 (type: int) - sort order: + - Map-reduce partition columns: _col8 (type: int) - Statistics: Num rows: 52800977 Data size: 7179405967 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col10 (type: int), _col12 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)) - Reducer 3 + key expressions: _col2 (type: int), _col4 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col4 (type: int) + Statistics: Num rows: 19360357 Data size: 2632448910 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -385,101 +403,87 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col8 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col10, _col12, _col13, _col14 - Statistics: Num rows: 58081075 Data size: 7897346734 Basic stats: COMPLETE Column stats: NONE + 0 _col2 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col5 (type: int) + outputColumnNames: _col5, _col6, _col7, _col11, _col12, _col13, _col14, _col16, _col17 + Statistics: Num rows: 21296393 Data size: 2895693863 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: int) + 0 _col14 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col12, _col13, _col14 + outputColumnNames: _col5, _col6, _col7, _col11, _col12, _col13, _col16, _col17, _col19 input vertices: - 1 Map 11 - Statistics: Num rows: 63889183 Data size: 8687081595 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col6, _col7, _col12, _col13, _col14, _col19 - input vertices: - 1 Map 12 - Statistics: Num rows: 70278102 Data size: 9555789961 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 70278102 Data size: 9555789961 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)), _col19 (type: string) - Reducer 4 + 1 Map 12 + Statistics: Num rows: 23426032 Data size: 3185263318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 23426032 Data size: 3185263318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int), _col16 (type: decimal(7,2)), _col17 (type: decimal(7,2)), _col19 (type: string) + Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col12 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col6, _col7, _col12, _col13, _col14, _col19, _col21, _col22 - Statistics: Num rows: 77305913 Data size: 10511369184 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col5, _col6, _col7, _col11, _col13, _col16, _col17, _col19, _col21 + Statistics: Num rows: 25768635 Data size: 3503789725 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col21 = 'D') and (_col22 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col21 = 'M') and (_col22 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col21 = 'U') and (_col22 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) (type: boolean) - Statistics: Num rows: 6442158 Data size: 875947239 Basic stats: COMPLETE Column stats: NONE + predicate: (((_col21) IN ('KY', 'GA', 'NM') and _col7 BETWEEN 100 AND 200) or ((_col21) IN ('MT', 'OR', 'IN') and _col7 BETWEEN 150 AND 300) or ((_col21) IN ('WI', 'MO', 'WV') and _col7 BETWEEN 50 AND 250)) (type: boolean) + Statistics: Num rows: 8589543 Data size: 1167929636 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int), _col21 (type: string), _col22 (type: string) - sort order: +++ - Map-reduce partition columns: _col3 (type: int), _col21 (type: string), _col22 (type: string) - Statistics: Num rows: 6442158 Data size: 875947239 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col14 (type: decimal(7,2)), _col19 (type: string) - Reducer 5 + key expressions: _col11 (type: int) + sort order: + + Map-reduce partition columns: _col11 (type: int) + Statistics: Num rows: 8589543 Data size: 1167929636 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col6 (type: decimal(7,2)), _col13 (type: int), _col16 (type: decimal(7,2)), _col17 (type: decimal(7,2)), _col19 (type: string) + Reducer 6 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int), _col21 (type: string), _col22 (type: string) - 1 _col0 (type: int), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col2, _col6, _col7, _col12, _col14, _col19 - Statistics: Num rows: 7086373 Data size: 963541983 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 7086373 Data size: 963541983 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col14 (type: decimal(7,2)), _col19 (type: string) - Reducer 6 + 0 _col11 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col6, _col13, _col16, _col17, _col19, _col24, _col25 + Statistics: Num rows: 9448497 Data size: 1284722627 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col24 = 'D') and (_col25 = 'Primary') and _col6 BETWEEN 50 AND 100) or ((_col24 = 'M') and (_col25 = '4 yr Degree') and _col6 BETWEEN 100 AND 150) or ((_col24 = 'U') and (_col25 = 'Advanced Degree') and _col6 BETWEEN 150 AND 200)) (type: boolean) + Statistics: Num rows: 787374 Data size: 107060116 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: int), _col24 (type: string), _col25 (type: string) + sort order: +++ + Map-reduce partition columns: _col13 (type: int), _col24 (type: string), _col25 (type: string) + Statistics: Num rows: 787374 Data size: 107060116 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col16 (type: decimal(7,2)), _col17 (type: decimal(7,2)), _col19 (type: string) + Reducer 7 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col6, _col7, _col12, _col14, _col19, _col27 - Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col27) IN ('KY', 'GA', 'NM') and _col14 BETWEEN 100 AND 200) or ((_col27) IN ('MT', 'OR', 'IN') and _col14 BETWEEN 150 AND 300) or ((_col27) IN ('WI', 'MO', 'WV') and _col14 BETWEEN 50 AND 250)) (type: boolean) - Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col19 (type: string) - outputColumnNames: _col6, _col7, _col12, _col19 - Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col12), count(_col12), sum(_col7), count(_col7), sum(_col6), count(_col6) - keys: _col19 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint) - Reducer 7 + 0 _col13 (type: int), _col24 (type: string), _col25 (type: string) + 1 _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col5, _col16, _col17, _col19 + Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), count(_col5), sum(_col17), count(_col17), sum(_col16), count(_col16) + keys: _col19 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint) + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -487,29 +491,29 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(_col1) / _col2) (type: double), (_col3 / _col4) (type: decimal(37,22)), (_col5 / _col6) (type: decimal(37,22)), substr(_col0, 1, 20) (type: string) outputColumnNames: _col4, _col5, _col6, _col7 - Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col7 (type: string), _col4 (type: double), _col5 (type: decimal(37,22)), _col6 (type: decimal(37,22)) sort order: ++++ - Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 8 + Reducer 9 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(37,22)), KEY.reducesinkkey3 (type: decimal(37,22)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 101400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 38500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 101400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 38500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat