http://git-wip-us.apache.org/repos/asf/hive/blob/470ba3e2/ql/src/test/results/clientpositive/perf/spark/query70.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query70.q.out b/ql/src/test/results/clientpositive/perf/spark/query70.q.out index 4222b52..b719cdb 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query70.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query70.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select sum(ss_net_profit) as total_sum ,s_state @@ -35,7 +35,7 @@ select ,rank_within_parent limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select sum(ss_net_profit) as total_sum ,s_state @@ -72,6 +72,10 @@ select ,rank_within_parent limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -89,18 +93,40 @@ STAGE PLANS: alias: store filterExpr: (s_state is not null and s_store_sk is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 24:string), SelectColumnIsNotNull(col 0:int)) predicate: (s_state is not null and s_store_sk is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_county (type: string), s_state (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 23, 24] Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -114,18 +140,40 @@ STAGE PLANS: alias: store filterExpr: (s_store_sk is not null and s_state is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 24:string)) predicate: (s_state is not null and s_store_sk is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_state (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 24] Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -148,81 +196,178 @@ STAGE PLANS: alias: store_sales filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 7:int)) predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 7, 22] Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 13 Map Operator Tree: TableScan alias: date_dim filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 3:int, left 1212, right 1223), SelectColumnIsNotNull(col 0:int)) predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 7 Map Operator Tree: TableScan alias: d1 filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 3:int, left 1212, right 1223), SelectColumnIsNotNull(col 0:int)) predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 9 Map Operator Tree: TableScan alias: store_sales filterExpr: (ss_store_sk is not null and ss_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7:int), SelectColumnIsNotNull(col 0:int)) predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 7, 22] Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 10 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -256,9 +401,23 @@ STAGE PLANS: value expressions: _col1 (type: decimal(17,2)) Reducer 11 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -267,14 +426,29 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: decimal(17,2)) sort order: +- Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No PTF TopN IS false Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 12 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -296,22 +470,47 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1:decimal(17,2)] + functionNames: [rank] + native: true + orderExpressions: [col 1:decimal(17,2)] + partitionExpressions: [col 0:string] Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessEqualLongScalar(col 2:int, val 5) predicate: (rank_window_0 <= 5) (type: boolean) Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE Reducer 2 Local Work: Map Reduce Local Work + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -338,6 +537,11 @@ STAGE PLANS: Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col6 (type: string) Reducer 3 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -365,9 +569,23 @@ STAGE PLANS: value expressions: _col3 (type: decimal(17,2)) Reducer 4 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 3:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:bigint + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -375,19 +593,38 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(17,2)), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 3, 2] Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: (grouping(_col3, 1) + grouping(_col3, 0)) (type: bigint), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END (type: string), _col2 (type: decimal(17,2)) sort order: ++- Map-reduce partition columns: (grouping(_col3, 1) + grouping(_col3, 0)) (type: bigint), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyExpressions: LongColAddLongColumn(col 4:bigint, col 5:bigint)(children: VectorUDFAdaptor(grouping(_col3, 1)) -> 4:bigint, VectorUDFAdaptor(grouping(_col3, 0)) -> 5:bigint) -> 6:bigint, IfExprColumnNull(col 5:boolean, col 0:string, null)(children: LongColEqualLongScalar(col 4:bigint, val 0)(children: VectorUDFAdaptor(grouping(_col3, 0)) -> 4:bigint) -> 5:boolean, col 0:string) -> 7:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) Reducer 5 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: decimal(17,2)), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 4, 2, 5] Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -409,29 +646,62 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 2:decimal(17,2)] + functionNames: [rank] + native: true + orderExpressions: [col 2:decimal(17,2)] + partitionExpressions: [LongColAddLongColumn(col 7:bigint, col 8:bigint)(children: VectorUDFAdaptor(grouping(_col3, 1)) -> 7:bigint, VectorUDFAdaptor(grouping(_col3, 0)) -> 8:bigint) -> 9:bigint, IfExprColumnNull(col 8:boolean, col 3:string, null)(children: LongColEqualLongScalar(col 7:bigint, val 0)(children: VectorUDFAdaptor(grouping(_col3, 0)) -> 7:bigint) -> 8:boolean, col 3:string) -> 10:string] Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: decimal(17,2)), _col0 (type: string), _col1 (type: string), (grouping(_col3, 1) + grouping(_col3, 0)) (type: bigint), rank_window_0 (type: int), CASE WHEN (((grouping(_col3, 1) + grouping(_col3, 0)) = 0)) THEN (_col0) ELSE (null) END (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3, 4, 12, 6, 14] + selectExpressions: LongColAddLongColumn(col 7:bigint, col 11:bigint)(children: VectorUDFAdaptor(grouping(_col3, 1)) -> 7:bigint, VectorUDFAdaptor(grouping(_col3, 0)) -> 11:bigint) -> 12:bigint, IfExprColumnNull(col 7:boolean, col 3:string, null)(children: LongColEqualLongScalar(col 13:bigint, val 0)(children: LongColAddLongColumn(col 7:bigint, col 11:bigint)(children: VectorUDFAdaptor(grouping(_col3, 1)) -> 7:bigint, VectorUDFAdaptor(grouping(_col3, 0)) -> 11:bigint) -> 13:bigint) -> 7:boolean, col 3:string) -> 14:string Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: bigint), _col5 (type: string), _col4 (type: int) sort order: -++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: decimal(17,2)), _col1 (type: string), _col2 (type: string) Reducer 6 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 4, 5, 0, 2] Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/470ba3e2/ql/src/test/results/clientpositive/perf/spark/query71.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query71.q.out b/ql/src/test/results/clientpositive/perf/spark/query71.q.out index eded78c..2ec7b12 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query71.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query71.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select i_brand_id brand_id, i_brand brand,t_hour,t_minute, sum(ext_price) ext_price from item, (select ws_ext_sales_price as ext_price, @@ -36,7 +36,7 @@ select i_brand_id brand_id, i_brand brand,t_hour,t_minute, group by i_brand, i_brand_id,t_hour,t_minute order by ext_price desc, i_brand_id PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select i_brand_id brand_id, i_brand brand,t_hour,t_minute, sum(ext_price) ext_price from item, (select ws_ext_sales_price as ext_price, @@ -74,6 +74,10 @@ select i_brand_id brand_id, i_brand brand,t_hour,t_minute, group by i_brand, i_brand_id,t_hour,t_minute order by ext_price desc, i_brand_id POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -97,139 +101,305 @@ STAGE PLANS: alias: web_sales filterExpr: (ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_time_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 1:int)) predicate: (ws_item_sk is not null and ws_sold_date_sk is not null and ws_sold_time_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ws_sold_date_sk (type: int), ws_sold_time_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 3, 23] Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 10 Map Operator Tree: TableScan alias: date_dim filterExpr: ((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 8:int, val 12), FilterLongColEqualLongScalar(col 6:int, val 2001), SelectColumnIsNotNull(col 0:int)) predicate: ((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 11 Map Operator Tree: TableScan alias: store_sales filterExpr: (ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_time_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 1:int)) predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_sold_time_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_sold_time_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 15] Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 13 Map Operator Tree: TableScan alias: date_dim filterExpr: ((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 8:int, val 12), FilterLongColEqualLongScalar(col 6:int, val 2001), SelectColumnIsNotNull(col 0:int)) predicate: ((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 14 Map Operator Tree: TableScan alias: item filterExpr: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 20:int, val 1), SelectColumnIsNotNull(col 0:int)) predicate: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean) Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 7, 8] Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 15 Map Operator Tree: TableScan alias: time_dim filterExpr: ((t_meal_time) IN ('breakfast', 'dinner') and t_time_sk is not null) (type: boolean) Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringColumnInList(col 9, values breakfast, dinner), SelectColumnIsNotNull(col 0:int)) predicate: ((t_meal_time) IN ('breakfast', 'dinner') and t_time_sk is not null) (type: boolean) Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t_time_sk (type: int), t_hour (type: int), t_minute (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3, 4] Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 8 Map Operator Tree: TableScan alias: catalog_sales filterExpr: (cs_sold_date_sk is not null and cs_item_sk is not null and cs_sold_time_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 15:int), SelectColumnIsNotNull(col 1:int)) predicate: (cs_item_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cs_sold_date_sk (type: int), cs_sold_time_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 15, 23] Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 12 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -247,6 +417,11 @@ STAGE PLANS: Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(7,2)), _col2 (type: int) Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -264,6 +439,11 @@ STAGE PLANS: Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(7,2)), _col2 (type: int) Reducer 3 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -280,6 +460,11 @@ STAGE PLANS: Statistics: Num rows: 1219665700 Data size: 132367119932 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(7,2)), _col4 (type: int), _col5 (type: string) Reducer 4 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -303,9 +488,23 @@ STAGE PLANS: value expressions: _col4 (type: decimal(17,2)) Reducer 5 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 4:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int, col 1:int, col 2:int, col 3:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -313,27 +512,53 @@ STAGE PLANS: Select Operator expressions: _col3 (type: string), _col1 (type: int), _col2 (type: int), _col4 (type: decimal(17,2)), _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 1, 2, 4, 0] Statistics: Num rows: 670816149 Data size: 72801917486 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: decimal(17,2)), _col5 (type: int) sort order: -+ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 670816149 Data size: 72801917486 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: int) Reducer 6 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: int), KEY.reducesinkkey0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2, 3, 4, 0] Statistics: Num rows: 670816149 Data size: 72801917486 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 670816149 Data size: 72801917486 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 9 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: