http://git-wip-us.apache.org/repos/asf/hive/blob/470ba3e2/ql/src/test/results/clientpositive/perf/spark/query81.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query81.q.out b/ql/src/test/results/clientpositive/perf/spark/query81.q.out index 60186d9..16c1a8f 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query81.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query81.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression with customer_total_return as (select cr_returning_customer_sk as ctr_customer_sk ,ca_state as ctr_state, @@ -28,7 +28,7 @@ with customer_total_return as ,ca_location_type,ctr_total_return limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression with customer_total_return as (select cr_returning_customer_sk as ctr_customer_sk ,ca_state as ctr_state, @@ -58,6 +58,10 @@ with customer_total_return as ,ca_location_type,ctr_total_return limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -84,159 +88,348 @@ STAGE PLANS: alias: customer filterExpr: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 4:int), SelectColumnIsNotNull(col 0:int)) predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c_customer_sk (type: int), c_customer_id (type: string), c_current_addr_sk (type: int), c_salutation (type: string), c_first_name (type: string), c_last_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 4, 7, 8, 9] Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 11 Map Operator Tree: TableScan alias: date_dim filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1998), SelectColumnIsNotNull(col 0:int)) predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 12 Map Operator Tree: TableScan alias: customer_address filterExpr: (ca_address_sk is not null and ca_state is not null) (type: boolean) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 8:string)) predicate: (ca_address_sk is not null and ca_state is not null) (type: boolean) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ca_address_sk (type: int), ca_state (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 8] Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 13 Map Operator Tree: TableScan alias: catalog_returns filterExpr: (cr_returned_date_sk is not null and cr_returning_addr_sk is not null) (type: boolean) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 10:int)) predicate: (cr_returned_date_sk is not null and cr_returning_addr_sk is not null) (type: boolean) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_returning_addr_sk (type: int), cr_return_amt_inc_tax (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 7, 10, 20] Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 17 Map Operator Tree: TableScan alias: date_dim filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1998), SelectColumnIsNotNull(col 0:int)) predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 18 Map Operator Tree: TableScan alias: customer_address filterExpr: (ca_address_sk is not null and ca_state is not null) (type: boolean) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 8:string)) predicate: (ca_address_sk is not null and ca_state is not null) (type: boolean) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ca_address_sk (type: int), ca_state (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 8] Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: customer_address filterExpr: ((ca_state = 'IL') and ca_address_sk is not null) (type: boolean) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 8:string, val IL), SelectColumnIsNotNull(col 0:int)) predicate: ((ca_state = 'IL') and ca_address_sk is not null) (type: boolean) Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ca_address_sk (type: int), ca_street_number (type: string), ca_street_name (type: string), ca_street_type (type: string), ca_suite_number (type: string), ca_city (type: string), ca_county (type: string), ca_zip (type: string), ca_country (type: string), ca_gmt_offset (type: decimal(5,2)), ca_location_type (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12] Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: decimal(5,2)), _col11 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan alias: catalog_returns filterExpr: (cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) (type: boolean) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 10:int), SelectColumnIsNotNull(col 7:int)) predicate: (cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) (type: boolean) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_returning_addr_sk (type: int), cr_return_amt_inc_tax (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 7, 10, 20] Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 10 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -260,6 +453,11 @@ STAGE PLANS: Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) Reducer 14 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -276,6 +474,11 @@ STAGE PLANS: Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) Reducer 15 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -299,9 +502,23 @@ STAGE PLANS: value expressions: _col2 (type: decimal(17,2)) Reducer 16 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -309,9 +526,21 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col2 (type: decimal(17,2)) outputColumnNames: _col0, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2] Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2), count(_col2) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(27,2), VectorUDAFCount(col 2:decimal(17,2)) -> bigint + className: VectorGroupByOperator + groupByMode: COMPLETE + keyExpressions: col 0:string + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0, 1] keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 @@ -319,14 +548,28 @@ STAGE PLANS: Select Operator expressions: ((_col1 / _col2) * 1.2) (type: decimal(38,11)), _col0 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5, 0] + selectExpressions: DecimalColMultiplyDecimalScalar(col 4:decimal(38,13), val 1.2)(children: DecimalColDivideDecimalColumn(col 1:decimal(27,2), col 3:decimal(19,0))(children: CastLongToDecimal(col 2:bigint) -> 3:decimal(19,0)) -> 4:decimal(38,13)) -> 5:decimal(38,11) Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(38,11)) Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -343,6 +586,11 @@ STAGE PLANS: Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: decimal(5,2)), _col17 (type: string) Reducer 3 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -363,26 +611,52 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 4 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: string), KEY.reducesinkkey7 (type: string), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: string), KEY.reducesinkkey10 (type: string), KEY.reducesinkkey11 (type: string), KEY.reducesinkkey12 (type: decimal(5,2)), KEY.reducesinkkey13 (type: string), KEY.reducesinkkey14 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), 'IL' (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: decimal(5,2)), _col13 (type: string), _col14 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 10, 11, 12, 13, 14] + selectExpressions: ConstantVectorExpression(val IL) -> 15:string Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -399,6 +673,11 @@ STAGE PLANS: Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) Reducer 8 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -422,9 +701,23 @@ STAGE PLANS: value expressions: _col2 (type: decimal(17,2)) Reducer 9 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -432,11 +725,19 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int), _col0 (type: string), _col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(17,2))
http://git-wip-us.apache.org/repos/asf/hive/blob/470ba3e2/ql/src/test/results/clientpositive/perf/spark/query82.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query82.q.out b/ql/src/test/results/clientpositive/perf/spark/query82.q.out index daadc88..f38469c 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query82.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query82.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select i_item_id ,i_item_desc ,i_current_price @@ -14,7 +14,7 @@ select i_item_id order by i_item_id limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select i_item_id ,i_item_desc ,i_current_price @@ -30,6 +30,10 @@ select i_item_id order by i_item_id limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -46,18 +50,40 @@ STAGE PLANS: alias: date_dim filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2002-05-30 00:00:00' AND TIMESTAMP'2002-07-29 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterTimestampColumnBetween(col 29:timestamp, left 2002-05-29 17:00:00.0, right 2002-07-28 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 29:timestamp), SelectColumnIsNotNull(col 0:int)) predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2002-05-30 00:00:00' AND TIMESTAMP'2002-07-29 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -75,51 +101,107 @@ STAGE PLANS: alias: store_sales filterExpr: ss_item_sk is not null (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2:int) predicate: ss_item_sk is not null (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_item_sk (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2] Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: item filterExpr: ((i_manufact_id) IN (437, 129, 727, 663) and i_current_price BETWEEN 30 AND 60 and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColumnInList(col 13:int, values [437, 129, 727, 663]), FilterDecimal64ColumnBetween(col 5:decimal(7,2)/DECIMAL_64, decimal64LeftVal 3000, decimalLeftVal 3000, decimal64RightVal 6000, decimalRightVal 6000), SelectColumnIsNotNull(col 0:int)) predicate: ((i_manufact_id) IN (437, 129, 727, 663) and i_current_price BETWEEN 30 AND 60 and i_item_sk is not null) (type: boolean) Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 4, 5] Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan alias: inventory filterExpr: (inv_quantity_on_hand BETWEEN 100 AND 500 and inv_item_sk is not null and inv_date_sk is not null) (type: boolean) Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 3:int, left 100, right 500), SelectColumnIsNotNull(col 1:int), SelectColumnIsNotNull(col 0:int)) predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_quantity_on_hand BETWEEN 100 AND 500) (type: boolean) Statistics: Num rows: 4176000 Data size: 65980122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: inv_date_sk (type: int), inv_item_sk (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 4176000 Data size: 65980122 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -127,6 +209,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col1 input vertices: 1 Map 7 @@ -135,11 +221,29 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4593600 Data size: 72578135 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -164,8 +268,21 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:decimal(7,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: decimal(7,2)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -173,21 +290,41 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string), _col2 (type: decimal(7,2)) Reducer 4 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat