http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query21.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query21.q.out b/ql/src/test/results/clientpositive/perf/spark/query21.q.out index c3fde7b..1673061 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query21.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query21.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select * from(select w_warehouse_name ,i_item_id @@ -27,7 +27,7 @@ select * ,i_item_id limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select * from(select w_warehouse_name ,i_item_id @@ -56,10 +56,6 @@ select * ,i_item_id limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -77,40 +73,18 @@ STAGE PLANS: alias: warehouse filterExpr: w_warehouse_sk is not null (type: boolean) Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: w_warehouse_sk is not null (type: boolean) Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -124,40 +98,18 @@ STAGE PLANS: alias: date_dim filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterTimestampColumnBetween(col 29:timestamp, left 1998-03-08 16:00:00.0, right 1998-05-07 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 29:timestamp), SelectColumnIsNotNull(col 0:int)) predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -175,22 +127,12 @@ STAGE PLANS: alias: inventory filterExpr: (inv_warehouse_sk is not null and inv_item_sk is not null and inv_date_sk is not null) (type: boolean) Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 1:int), SelectColumnIsNotNull(col 0:int)) predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) (type: boolean) Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -198,10 +140,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinInnerLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col1, _col2, _col3, _col5 input vertices: 1 Map 5 @@ -210,22 +148,9 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: int), _col5 (type: string) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work Map 6 @@ -234,51 +159,23 @@ STAGE PLANS: alias: item filterExpr: (i_current_price BETWEEN 0.99 AND 1.49 and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterDecimal64ColumnBetween(col 5:decimal(7,2)/DECIMAL_64, decimal64LeftVal 99, decimalLeftVal 0.99, decimal64RightVal 149, decimalRightVal 1.49), SelectColumnIsNotNull(col 0:int)) predicate: (i_current_price BETWEEN 0.99 AND 1.49 and i_item_sk is not null) (type: boolean) Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_item_id (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Local Work: Map Reduce Local Work - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -316,72 +213,34 @@ STAGE PLANS: value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 2:bigint) -> bigint, VectorUDAFSumLong(col 3:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string, col 1:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1] keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 25012152 Data size: 395187961 Basic stats: COMPLETE Column stats: NONE Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsTrue(col 9:boolean)(children: IfExprCondExprNull(col 4:boolean, col 8:boolean, null)(children: LongColGreaterLongScalar(col 2:bigint, val 0) -> 4:boolean, DoubleColumnBetween(col 7:double, left 0.666667, right 1.5)(children: DoubleColDivideDoubleColumn(col 5:double, col 6:double)(children: CastLongToDouble(col 3:bigint) -> 5:double, CastLongToDouble(col 2:bigint) -> 6:double) -> 7:double) -> 8:boolean) -> 9:boolean) predicate: CASE WHEN ((_col2 > 0L)) THEN ((UDFToDouble(_col3) / UDFToDouble(_col2)) BETWEEN 0.666667D AND 1.5D) ELSE (null) END (type: boolean) Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 4 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Limit Vectorization: - className: VectorLimitOperator - native: true Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query22.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query22.q.out b/ql/src/test/results/clientpositive/perf/spark/query22.q.out index 3d99a80..2a99cc3 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query22.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query22.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select i_product_name ,i_brand ,i_class @@ -19,7 +19,7 @@ select i_product_name order by qoh, i_product_name, i_brand, i_class, i_category limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select i_product_name ,i_brand ,i_class @@ -40,10 +40,6 @@ select i_product_name order by qoh, i_product_name, i_brand, i_class, i_category limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -60,40 +56,18 @@ STAGE PLANS: alias: date_dim filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 3:int, left 1212, right 1223), SelectColumnIsNotNull(col 0:int)) predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work Map 6 @@ -102,40 +76,18 @@ STAGE PLANS: alias: warehouse filterExpr: w_warehouse_sk is not null (type: boolean) Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: w_warehouse_sk is not null (type: boolean) Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: w_warehouse_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -153,22 +105,12 @@ STAGE PLANS: alias: inventory filterExpr: (inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) (type: boolean) Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:int), SelectColumnIsNotNull(col 2:int)) predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) (type: boolean) Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -176,10 +118,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinInnerBigOnlyLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 5 @@ -190,10 +128,6 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinInnerBigOnlyLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col1, _col3 input vertices: 1 Map 6 @@ -202,22 +136,9 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work Map 7 @@ -226,49 +147,21 @@ STAGE PLANS: alias: item filterExpr: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string), i_product_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 8, 10, 12, 21] Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -292,23 +185,9 @@ STAGE PLANS: value expressions: _col5 (type: bigint), _col6 (type: bigint) Reducer 3 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:bigint) -> bigint, VectorUDAFCountMerge(col 6:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string, col 4:bigint - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6 @@ -317,49 +196,24 @@ STAGE PLANS: Select Operator expressions: _col3 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), (_col5 / _col6) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [3, 0, 1, 2, 6] - selectExpressions: LongColDivideLongColumn(col 4:bigint, col 5:bigint) -> 6:double Statistics: Num rows: 125060762 Data size: 1975939839 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: double), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: +++++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 125060762 Data size: 1975939839 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 4 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 2, 3, 4, 0] Statistics: Num rows: 125060762 Data size: 1975939839 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Limit Vectorization: - className: VectorLimitOperator - native: true Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat