http://git-wip-us.apache.org/repos/asf/hive/blob/e986fc56/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index 89986fb..a1924ce 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -776,18 +776,17 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) + Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (key_int is not null and (key_int BETWEEN DynamicValue(RS_10_b_key_int_min) AND DynamicValue(RS_10_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_10_b_key_int_bloom_filter))) and (key_int BETWEEN DynamicValue(RS_11_c_key_int_min) AND DynamicValue(RS_11_c_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_11_c_key_int_bloom_filter)))) (type: boolean) + filterExpr: (key_int is not null and (key_int BETWEEN DynamicValue(RS_10_b_key_int_min) AND DynamicValue(RS_10_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_10_b_key_int_bloom_filter)))) (type: boolean) Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -795,8 +794,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:int), FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 1:int, left 0, right 0), VectorInBloomFilterColDynamicValue), FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 1:int, left 0, right 0), VectorInBloomFilterColDynamicValue)) - predicate: ((key_int BETWEEN DynamicValue(RS_10_b_key_int_min) AND DynamicValue(RS_10_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_10_b_key_int_bloom_filter))) and (key_int BETWEEN DynamicValue(RS_11_c_key_int_min) AND DynamicValue(RS_11_c_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_11_c_key_int_bloom_filter))) and key_int is not null) (type: boolean) + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:int), FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 1:int, left 0, right 0), VectorInBloomFilterColDynamicValue)) + predicate: ((key_int BETWEEN DynamicValue(RS_10_b_key_int_min) AND DynamicValue(RS_10_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_10_b_key_int_bloom_filter))) and key_int is not null) (type: boolean) Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) @@ -929,34 +928,6 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 55 Data size: 216 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1] - Statistics: Num rows: 55 Data size: 216 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=55) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 1:int) -> int, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFBloomFilter(col 1:int) -> binary - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -965,7 +936,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1052,35 +1023,6 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 7 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=55) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 0:int) -> int, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFBloomFilterMerge(col 2:binary) -> binary - className: VectorGroupByOperator - groupByMode: FINAL - native: false - vectorProcessingMode: STREAMING - projectedOutputColumnNums: [0, 1, 2] - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -1122,18 +1064,17 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) + Map 1 <- Reducer 5 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - filterExpr: (key_str is not null and key_int is not null and (key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter))) and (key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter)))) (type: boolean) + filterExpr: (key_str is not null and key_int is not null and (key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter)))) (type: boolean) Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -1141,8 +1082,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:string), SelectColumnIsNotNull(col 1:int), FilterExprAndExpr(children: FilterStringColumnBetweenDynamicValue(col 0:string, left NULL, right NULL), VectorInBloomFilterColDynamicValue), FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 1:int, left 0, right 0), VectorInBloomFilterColDynamicValue)) - predicate: ((key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter))) and (key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter))) and key_int is not null and key_str is not null) (type: boolean) + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:string), SelectColumnIsNotNull(col 1:int), FilterExprAndExpr(children: FilterStringColumnBetweenDynamicValue(col 0:string, left NULL, right NULL), VectorInBloomFilterColDynamicValue)) + predicate: ((key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter))) and key_int is not null and key_str is not null) (type: boolean) Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_str (type: string), key_int (type: int) @@ -1232,34 +1173,6 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1] - Statistics: Num rows: 53 Data size: 9789 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=53) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 1:int) -> int, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFBloomFilter(col 1:int) -> binary - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1353,35 +1266,6 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 740 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 6 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=53) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 0:int) -> int, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFBloomFilterMerge(col 2:binary) -> binary - className: VectorGroupByOperator - groupByMode: FINAL - native: false - vectorProcessingMode: STREAMING - projectedOutputColumnNums: [0, 1, 2] - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Stage: Stage-0 Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/e986fc56/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out index b55f2c1..4019b26 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[583][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product -Warning: Shuffle Join MERGEJOIN[584][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product -Warning: Shuffle Join MERGEJOIN[586][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product -Warning: Shuffle Join MERGEJOIN[587][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 34' is a cross product +Warning: Shuffle Join MERGEJOIN[583][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 27' is a cross product +Warning: Shuffle Join MERGEJOIN[584][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 28' is a cross product +Warning: Shuffle Join MERGEJOIN[586][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 31' is a cross product +Warning: Shuffle Join MERGEJOIN[587][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 32' is a cross product PREHOOK: query: explain cbo with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt http://git-wip-us.apache.org/repos/asf/hive/blob/e986fc56/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out index 6d3d037..fd5f6d7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out @@ -1,6 +1,6 @@ Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 32' is a cross product +Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 31' is a cross product Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo with my_customers as ( http://git-wip-us.apache.org/repos/asf/hive/blob/e986fc56/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out index 63fa5ce..def822a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out @@ -133,24 +133,21 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 14 <- Reducer 11 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) -Map 24 <- Reducer 10 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) -Map 25 <- Reducer 23 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 13 <- Reducer 16 (BROADCAST_EDGE) +Map 21 <- Reducer 10 (BROADCAST_EDGE) +Map 22 <- Reducer 9 (BROADCAST_EDGE) Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 11 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 20 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 21 <- Map 16 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE) -Reducer 23 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 19 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) -Reducer 6 <- Reducer 22 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 15 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Map 15 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE) +Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 18 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 20 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) @@ -160,16 +157,16 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_235] - Limit [LIM_234] (rows=1 width=419) + File Output Operator [FS_223] + Limit [LIM_222] (rows=1 width=419) Number of rows:100 - Select Operator [SEL_233] (rows=1 width=419) + Select Operator [SEL_221] (rows=1 width=419) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_232] - Select Operator [SEL_231] (rows=1 width=419) + SHUFFLE [RS_220] + Select Operator [SEL_219] (rows=1 width=419) Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col12"] - Group By Operator [GBY_230] (rows=1 width=379) + Group By Operator [GBY_218] (rows=1 width=379) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_66] @@ -183,23 +180,61 @@ Stage-0 Filter Operator [FIL_63] (rows=58 width=379) predicate:(_col15 is not null or _col17 is not null) Merge Join Operator [MERGEJOIN_180] (rows=58 width=379) - Conds:RS_60._col0=RS_229._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"] + Conds:RS_60._col0=RS_217._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"] <-Reducer 5 [ONE_TO_ONE_EDGE] PARTITION_ONLY_SHUFFLE [RS_60] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_179] (rows=58 width=379) - Conds:RS_57._col0=RS_219._col0(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15"] + Conds:RS_57._col0=RS_209._col0(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15"] <-Reducer 4 [ONE_TO_ONE_EDGE] FORWARD [RS_57] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_178] (rows=22703 width=375) Conds:RS_54._col0=RS_55._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0 + Group By Operator [GBY_53] (rows=155827 width=2) + Output:["_col0"],keys:_col0 + Select Operator [SEL_17] (rows=57825495 width=2) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_175] (rows=57825495 width=2) + Conds:RS_201._col0=RS_191._col0(Inner),Output:["_col1"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_191] + PartitionCols:_col0 + Select Operator [SEL_190] (rows=201 width=4) + Output:["_col0"] + Filter Operator [FIL_189] (rows=201 width=12) + predicate:((d_year = 2002) and d_moy BETWEEN 4 AND 7) + TableScan [TS_11] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_201] + PartitionCols:_col0 + Select Operator [SEL_200] (rows=525327388 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_199] (rows=525327388 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_8] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_198] + Group By Operator [GBY_197] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_196] + Group By Operator [GBY_195] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_192] (rows=201 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_190] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_54] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_174] (rows=228127 width=375) Conds:RS_49._col1=RS_188._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 13 [SIMPLE_EDGE] vectorized + <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_188] PartitionCols:_col0 Select Operator [SEL_187] (rows=1861800 width=375) @@ -220,7 +255,7 @@ Stage-0 predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null) TableScan [TS_0] (rows=80000000 width=11) default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] - <-Map 12 [SIMPLE_EDGE] vectorized + <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_186] PartitionCols:_col0 Select Operator [SEL_185] (rows=116550 width=102) @@ -229,85 +264,36 @@ Stage-0 predicate:(ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') TableScan [TS_3] (rows=40000000 width=102) default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col0 - Group By Operator [GBY_53] (rows=155827 width=2) - Output:["_col0"],keys:_col0 - Select Operator [SEL_17] (rows=57825495 width=2) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_175] (rows=57825495 width=2) - Conds:RS_209._col0=RS_191._col0(Inner),Output:["_col1"] - <-Map 16 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_191] - PartitionCols:_col0 - Select Operator [SEL_190] (rows=201 width=4) - Output:["_col0"] - Filter Operator [FIL_189] (rows=201 width=12) - predicate:((d_year = 2002) and d_moy BETWEEN 4 AND 7) - TableScan [TS_11] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] - PartitionCols:_col0 - Select Operator [SEL_208] (rows=525327388 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_207] (rows=525327388 width=7) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_54_c_c_customer_sk_min) AND DynamicValue(RS_54_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_54_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_8] (rows=575995635 width=7) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_206] - Group By Operator [GBY_205] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_135] - Group By Operator [GBY_134] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_133] (rows=228127 width=4) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_174] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_204] - Group By Operator [GBY_203] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_200] - Group By Operator [GBY_197] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_192] (rows=201 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_190] - <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_219] + <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_209] PartitionCols:_col0 - Select Operator [SEL_218] (rows=155827 width=7) + Select Operator [SEL_208] (rows=155827 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_217] (rows=155827 width=3) + Group By Operator [GBY_207] (rows=155827 width=3) Output:["_col0"],keys:KEY._col0 - <-Reducer 18 [SIMPLE_EDGE] + <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col0 Group By Operator [GBY_28] (rows=155827 width=3) Output:["_col0"],keys:_col1 Merge Join Operator [MERGEJOIN_176] (rows=15843227 width=3) - Conds:RS_216._col0=RS_193._col0(Inner),Output:["_col1"] - <-Map 16 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_193] + Conds:RS_206._col0=RS_193._col0(Inner),Output:["_col1"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_193] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_190] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_216] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_206] PartitionCols:_col0 - Select Operator [SEL_215] (rows=143930993 width=7) + Select Operator [SEL_205] (rows=143930993 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_214] (rows=143930993 width=7) - predicate:((ws_bill_customer_sk BETWEEN DynamicValue(RS_57_c_c_customer_sk_min) AND DynamicValue(RS_57_c_c_customer_sk_max) and in_bloom_filter(ws_bill_customer_sk, DynamicValue(RS_57_c_c_customer_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_25_date_dim_d_date_sk_min) AND DynamicValue(RS_25_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_25_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + Filter Operator [FIL_204] (rows=143930993 width=7) + predicate:((ws_bill_customer_sk BETWEEN DynamicValue(RS_57_c_c_customer_sk_min) AND DynamicValue(RS_57_c_c_customer_sk_max) and in_bloom_filter(ws_bill_customer_sk, DynamicValue(RS_57_c_c_customer_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) TableScan [TS_18] (rows=144002668 width=7) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_213] - Group By Operator [GBY_212] (rows=1 width=12) + BROADCAST [RS_203] + Group By Operator [GBY_202] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] FORWARD [RS_150] @@ -316,58 +302,36 @@ Stage-0 Select Operator [SEL_148] (rows=22703 width=4) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_178] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_211] - Group By Operator [GBY_210] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_201] - Group By Operator [GBY_198] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_194] (rows=201 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_190] - <-Reducer 22 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_229] + <-Reducer 20 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_217] PartitionCols:_col0 - Select Operator [SEL_228] (rows=154725 width=7) + Select Operator [SEL_216] (rows=154725 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_227] (rows=154725 width=3) + Group By Operator [GBY_215] (rows=154725 width=3) Output:["_col0"],keys:KEY._col0 - <-Reducer 21 [SIMPLE_EDGE] + <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col0 Group By Operator [GBY_42] (rows=154725 width=3) Output:["_col0"],keys:_col1 Merge Join Operator [MERGEJOIN_177] (rows=31162251 width=3) - Conds:RS_226._col0=RS_195._col0(Inner),Output:["_col1"] - <-Map 16 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_195] + Conds:RS_214._col0=RS_194._col0(Inner),Output:["_col1"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_194] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_190] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_226] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_214] PartitionCols:_col0 - Select Operator [SEL_225] (rows=285115246 width=7) + Select Operator [SEL_213] (rows=285115246 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_224] (rows=285115246 width=7) - predicate:((cs_ship_customer_sk BETWEEN DynamicValue(RS_60_c_c_customer_sk_min) AND DynamicValue(RS_60_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_60_c_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_39_date_dim_d_date_sk_min) AND DynamicValue(RS_39_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_39_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) + Filter Operator [FIL_212] (rows=285115246 width=7) + predicate:((cs_ship_customer_sk BETWEEN DynamicValue(RS_60_c_c_customer_sk_min) AND DynamicValue(RS_60_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_60_c_c_customer_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) TableScan [TS_32] (rows=287989836 width=7) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_221] - Group By Operator [GBY_220] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_202] - Group By Operator [GBY_199] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_196] (rows=201 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_190] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_223] - Group By Operator [GBY_222] (rows=1 width=12) + BROADCAST [RS_211] + Group By Operator [GBY_210] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_165] http://git-wip-us.apache.org/repos/asf/hive/blob/e986fc56/ql/src/test/results/clientpositive/perf/tez/constraints/query12.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query12.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query12.q.out index 741bd90..8b7c09b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query12.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query12.q.out @@ -73,8 +73,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) -Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 8 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) @@ -87,23 +86,23 @@ Stage-0 limit:-1 Stage-1 Reducer 6 vectorized - File Output Operator [FS_86] - Limit [LIM_85] (rows=100 width=802) + File Output Operator [FS_81] + Limit [LIM_80] (rows=100 width=802) Number of rows:100 - Select Operator [SEL_84] (rows=138600 width=801) + Select Operator [SEL_79] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_83] - Select Operator [SEL_82] (rows=138600 width=801) + SHUFFLE [RS_78] + Select Operator [SEL_77] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - PTF Operator [PTF_81] (rows=138600 width=689) + PTF Operator [PTF_76] (rows=138600 width=689) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] - Select Operator [SEL_80] (rows=138600 width=689) + Select Operator [SEL_75] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_79] + SHUFFLE [RS_74] PartitionCols:_col1 - Group By Operator [GBY_78] (rows=138600 width=689) + Group By Operator [GBY_73] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] @@ -111,13 +110,13 @@ Stage-0 Group By Operator [GBY_16] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col9, _col8, _col5, _col6, _col7 Merge Join Operator [MERGEJOIN_58] (rows=4798568 width=689) - Conds:RS_12._col1=RS_69._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9"] + Conds:RS_12._col1=RS_72._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_69] + SHUFFLE [RS_72] PartitionCols:_col0 - Select Operator [SEL_68] (rows=138600 width=581) + Select Operator [SEL_71] (rows=138600 width=581) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_67] (rows=138600 width=581) + Filter Operator [FIL_70] (rows=138600 width=581) predicate:(i_category) IN ('Jewelry', 'Sports', 'Books') TableScan [TS_6] (rows=462000 width=581) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] @@ -125,7 +124,7 @@ Stage-0 SHUFFLE [RS_12] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_57] (rows=15995224 width=115) - Conds:RS_77._col0=RS_61._col0(Inner),Output:["_col1","_col2"] + Conds:RS_69._col0=RS_61._col0(Inner),Output:["_col1","_col2"] <-Map 7 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_61] PartitionCols:_col0 @@ -136,25 +135,14 @@ Stage-0 TableScan [TS_3] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_77] + SHUFFLE [RS_69] PartitionCols:_col0 - Select Operator [SEL_76] (rows=143966864 width=119) + Select Operator [SEL_68] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_75] (rows=143966864 width=119) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) + Filter Operator [FIL_67] (rows=143966864 width=119) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) TableScan [TS_0] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_sales_price"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_74] - Group By Operator [GBY_73] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_72] - Group By Operator [GBY_71] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_70] (rows=138600 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_68] <-Reducer 8 [BROADCAST_EDGE] vectorized BROADCAST [RS_66] Group By Operator [GBY_65] (rows=1 width=12) http://git-wip-us.apache.org/repos/asf/hive/blob/e986fc56/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out index 02966e4..ac81e37 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out @@ -115,14 +115,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 8 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) -Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Map 8 <- Reducer 7 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Map 1 (CUSTOM_SIMPLE_EDGE) @@ -131,10 +128,10 @@ Stage-0 limit:-1 Stage-1 Reducer 6 vectorized - File Output Operator [FS_135] - Select Operator [SEL_134] (rows=1 width=344) + File Output Operator [FS_120] + Select Operator [SEL_119] (rows=1 width=344) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_133] (rows=1 width=256) + Group By Operator [GBY_118] (rows=1 width=256) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_31] @@ -145,13 +142,13 @@ Stage-0 Filter Operator [FIL_28] (rows=40950 width=44) predicate:((_col22 and _col23 and _col11 and _col15) or (_col24 and _col25 and _col12 and _col16) or (_col26 and _col27 and _col13 and _col16)) Merge Join Operator [MERGEJOIN_97] (rows=218403 width=44) - Conds:RS_25._col2=RS_124._col0(Inner),Output:["_col5","_col6","_col7","_col11","_col12","_col13","_col15","_col16","_col22","_col23","_col24","_col25","_col26","_col27"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_124] + Conds:RS_25._col2=RS_117._col0(Inner),Output:["_col5","_col6","_col7","_col11","_col12","_col13","_col15","_col16","_col22","_col23","_col24","_col25","_col26","_col27"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_117] PartitionCols:_col0 - Select Operator [SEL_123] (rows=265971 width=28) + Select Operator [SEL_116] (rows=265971 width=28) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_122] (rows=265971 width=183) + Filter Operator [FIL_115] (rows=265971 width=183) predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U')) TableScan [TS_12] (rows=1861800 width=183) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] @@ -161,13 +158,13 @@ Stage-0 Filter Operator [FIL_24] (rows=218403 width=44) predicate:((_col18 and _col8) or (_col19 and _col9) or (_col20 and _col10)) Merge Join Operator [MERGEJOIN_96] (rows=291204 width=44) - Conds:RS_21._col4=RS_116._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col16","_col18","_col19","_col20"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_116] + Conds:RS_21._col4=RS_114._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col16","_col18","_col19","_col20"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_114] PartitionCols:_col0 - Select Operator [SEL_115] (rows=3529412 width=16) + Select Operator [SEL_113] (rows=3529412 width=16) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_114] (rows=3529412 width=187) + Filter Operator [FIL_112] (rows=3529412 width=187) predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV')) TableScan [TS_9] (rows=40000000 width=187) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] @@ -175,13 +172,13 @@ Stage-0 SHUFFLE [RS_21] PartitionCols:_col4 Merge Join Operator [MERGEJOIN_95] (rows=3300311 width=104) - Conds:RS_18._col3=RS_108._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col16"] + Conds:RS_18._col3=RS_111._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col16"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_108] + SHUFFLE [RS_111] PartitionCols:_col0 - Select Operator [SEL_107] (rows=1309 width=12) + Select Operator [SEL_110] (rows=1309 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_106] (rows=1309 width=8) + Filter Operator [FIL_109] (rows=1309 width=8) predicate:(hd_dep_count) IN (3, 1) TableScan [TS_6] (rows=7200 width=8) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count"] @@ -189,7 +186,7 @@ Stage-0 SHUFFLE [RS_18] PartitionCols:_col3 Merge Join Operator [MERGEJOIN_94] (rows=18152968 width=233) - Conds:RS_100._col0=RS_132._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Conds:RS_100._col0=RS_108._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Map 1 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_100] PartitionCols:_col0 @@ -200,47 +197,14 @@ Stage-0 TableScan [TS_0] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_132] + SHUFFLE [RS_108] PartitionCols:_col0 - Select Operator [SEL_131] (rows=50840141 width=257) + Select Operator [SEL_107] (rows=50840141 width=257) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Filter Operator [FIL_130] (rows=50840141 width=450) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_22_customer_address_ca_address_sk_min) AND DynamicValue(RS_22_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_22_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_26_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_26_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_26_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_19_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_19_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_19_household_demographics_hd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 A ND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + Filter Operator [FIL_106] (rows=50840141 width=450) + predicate:((ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_3] (rows=575995635 width=450) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_113] - Group By Operator [GBY_112] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_111] - Group By Operator [GBY_110] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_109] (rows=1309 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_107] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_121] - Group By Operator [GBY_120] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=3529412)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_119] - Group By Operator [GBY_118] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=3529412)"] - Select Operator [SEL_117] (rows=3529412 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_115] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_129] - Group By Operator [GBY_128] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_127] - Group By Operator [GBY_126] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_125] (rows=265971 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_123] <-Reducer 7 [BROADCAST_EDGE] vectorized BROADCAST [RS_105] Group By Operator [GBY_104] (rows=1 width=12)