http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out b/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out index 1886769..9801470 100644 --- a/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out @@ -60,6 +60,8 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -195,6 +197,8 @@ STAGE PLANS: 1 value (type: int) 2 key (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -356,6 +360,8 @@ STAGE PLANS: 1 value (type: int) 2 key (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -538,6 +544,8 @@ STAGE PLANS: 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -698,6 +706,8 @@ STAGE PLANS: 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -943,6 +953,8 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -1078,6 +1090,8 @@ STAGE PLANS: 1 value (type: int) 2 key (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -1239,6 +1253,8 @@ STAGE PLANS: 1 value (type: int) 2 key (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -1421,6 +1437,8 @@ STAGE PLANS: 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -1581,6 +1599,8 @@ STAGE PLANS: 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:int + bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out index 51994da..3232736 100644 --- a/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out @@ -38,40 +38,42 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@over1k -PREHOOK: query: CREATE TABLE t1(`dec` decimal(22,2)) STORED AS ORC +PREHOOK: query: CREATE TABLE t1(`dec` decimal(22,2), value_dec decimal(22,2)) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t1 -POSTHOOK: query: CREATE TABLE t1(`dec` decimal(22,2)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t1(`dec` decimal(22,2), value_dec decimal(22,2)) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t1 -PREHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k +PREHOOK: query: INSERT INTO TABLE t1 select `dec`, cast(d as decimal(22,2)) from over1k PREHOOK: type: QUERY PREHOOK: Input: default@over1k PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k +POSTHOOK: query: INSERT INTO TABLE t1 select `dec`, cast(d as decimal(22,2)) from over1k POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t1 POSTHOOK: Lineage: t1.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(20,2), comment:null), ] -PREHOOK: query: CREATE TABLE t2(`dec` decimal(24,0)) STORED AS ORC +POSTHOOK: Lineage: t1.value_dec EXPRESSION [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] +PREHOOK: query: CREATE TABLE t2(`dec` decimal(24,0), value_dec decimal(24,0)) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t2 -POSTHOOK: query: CREATE TABLE t2(`dec` decimal(24,0)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t2(`dec` decimal(24,0), value_dec decimal(24,0)) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t2 -PREHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k +PREHOOK: query: INSERT INTO TABLE t2 select `dec`, cast(d as decimal(24,0)) from over1k PREHOOK: type: QUERY PREHOOK: Input: default@over1k PREHOOK: Output: default@t2 -POSTHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k +POSTHOOK: query: INSERT INTO TABLE t2 select `dec`, cast(d as decimal(24,0)) from over1k POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(20,2), comment:null), ] +POSTHOOK: Lineage: t2.value_dec EXPRESSION [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] PREHOOK: query: explain vectorization detail select t1.`dec`, t2.`dec` from t1 join t2 on (t1.`dec`=t2.`dec`) PREHOOK: type: QUERY @@ -96,17 +98,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:dec:decimal(24,0), 1:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + vectorizationSchemaColumns: [0:dec:decimal(24,0), 1:value_dec:decimal(24,0), 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: SelectColumnIsNotNull(col 0:decimal(24,0)) predicate: dec is not null (type: boolean) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(24,0)) outputColumnNames: _col0 @@ -114,7 +116,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator @@ -133,9 +135,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 + dataColumnCount: 2 includeColumns: [0] - dataColumns: dec:decimal(24,0) + dataColumns: dec:decimal(24,0), value_dec:decimal(24,0) partitionColumnCount: 0 scratchColumnTypeNames: [] Local Work: @@ -149,17 +151,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:dec:decimal(22,2), 1:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + vectorizationSchemaColumns: [0:dec:decimal(22,2), 1:value_dec:decimal(22,2), 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: SelectColumnIsNotNull(col 0:decimal(22,2)) predicate: dec is not null (type: boolean) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dec (type: decimal(22,2)) outputColumnNames: _col0 @@ -167,7 +169,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -175,6 +177,8 @@ STAGE PLANS: 0 _col0 (type: decimal(26,2)) 1 _col0 (type: decimal(26,2)) Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(26,2) + bigTableValueExpressions: col 0:decimal(26,2) className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true @@ -183,13 +187,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1153 Data size: 258473 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1153 Data size: 258473 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -205,9 +209,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 + dataColumnCount: 2 includeColumns: [0] - dataColumns: dec:decimal(22,2) + dataColumns: dec:decimal(22,2), value_dec:decimal(22,2) partitionColumnCount: 0 scratchColumnTypeNames: [] Local Work: @@ -335,6 +339,271 @@ POSTHOOK: Input: default@t2 9.00 9 9.00 9 9.00 9 +PREHOOK: query: explain vectorization detail +select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(24,0), 1:value_dec:decimal(24,0), 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(24,0)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(24,0)), value_dec (type: decimal(24,0)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true + keys: + 0 _col0 (type: decimal(26,2)) + 1 _col0 (type: decimal(26,2)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(24,0), value_dec:decimal(24,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(22,2), 1:value_dec:decimal(22,2), 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(22,2)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(22,2)), value_dec (type: decimal(22,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1049 Data size: 234976 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(26,2)) + 1 _col0 (type: decimal(26,2)) + Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(26,2) + bigTableValueExpressions: col 0:decimal(26,2), col 1:decimal(22,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 1153 Data size: 258473 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1153 Data size: 258473 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(22,2), value_dec:decimal(22,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(24,0)] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select t1.`dec`, t1.value_dec, t2.`dec`, t2.value_dec from t1 join t2 on (t1.`dec`=t2.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +14.00 33.66 14 10 +14.00 33.66 14 22 +14.00 33.66 14 34 +14.00 33.66 14 39 +14.00 33.66 14 42 +14.00 33.66 14 45 +14.00 33.66 14 46 +14.00 33.66 14 49 +14.00 33.66 14 5 +17.00 14.26 17 1 +17.00 14.26 17 14 +17.00 14.26 17 16 +17.00 14.26 17 19 +17.00 14.26 17 2 +17.00 14.26 17 22 +17.00 14.26 17 29 +17.00 14.26 17 3 +17.00 14.26 17 4 +17.00 14.26 17 44 +45.00 23.55 45 1 +45.00 23.55 45 2 +45.00 23.55 45 22 +45.00 23.55 45 24 +45.00 23.55 45 42 +6.00 29.78 6 16 +6.00 29.78 6 28 +6.00 29.78 6 30 +6.00 29.78 6 34 +6.00 29.78 6 36 +6.00 29.78 6 44 +62.00 21.02 62 15 +62.00 21.02 62 15 +62.00 21.02 62 21 +62.00 21.02 62 21 +62.00 21.02 62 22 +62.00 21.02 62 25 +62.00 21.02 62 29 +62.00 21.02 62 3 +62.00 21.02 62 34 +62.00 21.02 62 47 +62.00 21.02 62 47 +62.00 21.02 62 49 +64.00 37.76 64 0 +64.00 37.76 64 10 +64.00 37.76 64 10 +64.00 37.76 64 13 +64.00 37.76 64 23 +64.00 37.76 64 25 +64.00 37.76 64 26 +64.00 37.76 64 27 +64.00 37.76 64 27 +64.00 37.76 64 30 +64.00 37.76 64 32 +64.00 37.76 64 34 +64.00 37.76 64 35 +64.00 37.76 64 38 +64.00 37.76 64 40 +64.00 37.76 64 43 +64.00 37.76 64 5 +64.00 37.76 64 50 +70.00 24.59 70 2 +70.00 24.59 70 25 +70.00 24.59 70 27 +70.00 24.59 70 28 +70.00 24.59 70 3 +70.00 24.59 70 32 +70.00 24.59 70 44 +79.00 15.12 79 1 +79.00 15.12 79 15 +79.00 15.12 79 25 +79.00 15.12 79 30 +79.00 15.12 79 35 +79.00 15.12 79 35 +89.00 15.09 89 1 +89.00 15.09 89 15 +89.00 15.09 89 23 +89.00 15.09 89 27 +89.00 15.09 89 28 +89.00 15.09 89 29 +89.00 15.09 89 30 +89.00 15.09 89 32 +89.00 15.09 89 39 +89.00 15.09 89 40 +89.00 15.09 89 45 +89.00 15.09 89 7 +9.00 48.96 9 12 +9.00 48.96 9 15 +9.00 48.96 9 2 +9.00 48.96 9 2 +9.00 48.96 9 2 +9.00 48.96 9 20 +9.00 48.96 9 20 +9.00 48.96 9 21 +9.00 48.96 9 21 +9.00 48.96 9 26 +9.00 48.96 9 27 +9.00 48.96 9 34 +9.00 48.96 9 38 +9.00 48.96 9 41 +9.00 48.96 9 42 +9.00 48.96 9 45 +9.00 48.96 9 48 +9.00 48.96 9 49 +9.00 48.96 9 5 +9.00 48.96 9 7 +9.00 48.96 9 7 PREHOOK: query: CREATE TABLE over1k_small(t tinyint, si smallint, i int, @@ -344,7 +613,7 @@ PREHOOK: query: CREATE TABLE over1k_small(t tinyint, bo boolean, s string, ts timestamp, - `dec` decimal(4,2), + `dec` decimal(14,2), bin binary) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE @@ -360,7 +629,7 @@ POSTHOOK: query: CREATE TABLE over1k_small(t tinyint, bo boolean, s string, ts timestamp, - `dec` decimal(4,2), + `dec` decimal(14,2), bin binary) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE @@ -375,40 +644,42 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@over1k_small -PREHOOK: query: CREATE TABLE t1_small(`dec` decimal(4,2)) STORED AS ORC +PREHOOK: query: CREATE TABLE t1_small(`dec` decimal(14,2), value_dec decimal(14,2)) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t1_small -POSTHOOK: query: CREATE TABLE t1_small(`dec` decimal(4,2)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t1_small(`dec` decimal(14,2), value_dec decimal(14,2)) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t1_small -PREHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k_small +PREHOOK: query: INSERT INTO TABLE t1_small select `dec`, cast(d as decimal(14,2)) from over1k_small PREHOOK: type: QUERY PREHOOK: Input: default@over1k_small -PREHOOK: Output: default@t1 -POSTHOOK: query: INSERT INTO TABLE t1 select `dec` from over1k_small +PREHOOK: Output: default@t1_small +POSTHOOK: query: INSERT INTO TABLE t1_small select `dec`, cast(d as decimal(14,2)) from over1k_small POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_small -POSTHOOK: Output: default@t1 -POSTHOOK: Lineage: t1.dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -PREHOOK: query: CREATE TABLE t2_small(`dec` decimal(4,0)) STORED AS ORC +POSTHOOK: Output: default@t1_small +POSTHOOK: Lineage: t1_small.dec SIMPLE [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(14,2), comment:null), ] +POSTHOOK: Lineage: t1_small.value_dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:d, type:double, comment:null), ] +PREHOOK: query: CREATE TABLE t2_small(`dec` decimal(14,0), value_dec decimal(14,0)) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t2_small -POSTHOOK: query: CREATE TABLE t2_small(`dec` decimal(4,0)) STORED AS ORC +POSTHOOK: query: CREATE TABLE t2_small(`dec` decimal(14,0), value_dec decimal(14,0)) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t2_small -PREHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k_small +PREHOOK: query: INSERT INTO TABLE t2_small select `dec`, cast(d as decimal(14,0)) from over1k_small PREHOOK: type: QUERY PREHOOK: Input: default@over1k_small -PREHOOK: Output: default@t2 -POSTHOOK: query: INSERT INTO TABLE t2 select `dec` from over1k_small +PREHOOK: Output: default@t2_small +POSTHOOK: query: INSERT INTO TABLE t2_small select `dec`, cast(d as decimal(14,0)) from over1k_small POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_small -POSTHOOK: Output: default@t2 -POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Output: default@t2_small +POSTHOOK: Lineage: t2_small.dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:dec, type:decimal(14,2), comment:null), ] +POSTHOOK: Lineage: t2_small.value_dec EXPRESSION [(over1k_small)over1k_small.FieldSchema(name:d, type:double, comment:null), ] PREHOOK: query: explain vectorization detail select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) PREHOOK: type: QUERY @@ -433,46 +704,367 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2_small - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:dec:decimal(4,0), 1:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + vectorizationSchemaColumns: [0:dec:decimal(14,0)/DECIMAL_64, 1:value_dec:decimal(14,0)/DECIMAL_64, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:decimal(4,0)) + predicateExpression: SelectColumnIsNotNull(col 3:decimal(14,0))(children: ConvertDecimal64ToDecimal(col 0:decimal(14,0)/DECIMAL_64) -> 3:decimal(14,0)) predicate: dec is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(4,0)) + expressions: dec (type: decimal(14,0)) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator Spark Hash Table Sink Vectorization: className: VectorSparkHashTableSinkOperator native: true keys: - 0 _col0 (type: decimal(6,2)) - 1 _col0 (type: decimal(6,2)) + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) Execution mode: vectorized Map Vectorization: enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dec:decimal(14,0)/DECIMAL_64, value_dec:decimal(14,0)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(14,0)] + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1_small + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,2)/DECIMAL_64, 1:value_dec:decimal(14,2)/DECIMAL_64, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3:decimal(14,2))(children: ConvertDecimal64ToDecimal(col 0:decimal(14,2)/DECIMAL_64) -> 3:decimal(14,2)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Map Join Vectorization: + bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 4:decimal(16,2) + bigTableValueExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 5:decimal(16,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dec:decimal(14,2)/DECIMAL_64, value_dec:decimal(14,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(14,2), decimal(16,2), decimal(16,2)] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_small +PREHOOK: Input: default@t2_small +#### A masked pattern was here #### +POSTHOOK: query: select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_small +POSTHOOK: Input: default@t2_small +#### A masked pattern was here #### +89.00 89 +PREHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: t2_small + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,0)/DECIMAL_64, 1:value_dec:decimal(14,0)/DECIMAL_64, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3:decimal(14,0))(children: ConvertDecimal64ToDecimal(col 0:decimal(14,0)/DECIMAL_64) -> 3:decimal(14,0)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,0)), value_dec (type: decimal(14,0)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(14,0)/DECIMAL_64, value_dec:decimal(14,0)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(14,0)] + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1_small + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,2)/DECIMAL_64, 1:value_dec:decimal(14,2)/DECIMAL_64, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3:decimal(14,2))(children: ConvertDecimal64ToDecimal(col 0:decimal(14,2)/DECIMAL_64) -> 3:decimal(14,2)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,2)), value_dec (type: decimal(14,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Map Join Vectorization: + bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 4:decimal(16,2) + bigTableValueExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 5:decimal(16,2), ConvertDecimal64ToDecimal(col 1:decimal(14,2)/DECIMAL_64) -> 3:decimal(14,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(14,2)/DECIMAL_64, value_dec:decimal(14,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(14,2), decimal(16,2), decimal(16,2), decimal(14,0)] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_small +PREHOOK: Input: default@t2_small +#### A masked pattern was here #### +POSTHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_small +POSTHOOK: Input: default@t2_small +#### A masked pattern was here #### +89.00 15.09 89 15 +PREHOOK: query: explain vectorization detail +select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1_small.`dec`, t2_small.`dec` from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: t2_small + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,0), 1:value_dec:decimal(14,0), 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,0)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,0)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [[] is disabled because it is not in hive.vectorized.input.format.supports.enabled []] featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 + dataColumnCount: 2 includeColumns: [0] - dataColumns: dec:decimal(4,0) + dataColumns: dec:decimal(14,0), value_dec:decimal(14,0) partitionColumnCount: 0 scratchColumnTypeNames: [] Local Work: @@ -486,32 +1078,34 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1_small - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:dec:decimal(4,2), 1:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + vectorizationSchemaColumns: [0:dec:decimal(14,2), 1:value_dec:decimal(14,2), 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:decimal(4,2)) + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,2)) predicate: dec is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dec (type: decimal(4,2)) + expressions: dec (type: decimal(14,2)) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: decimal(6,2)) - 1 _col0 (type: decimal(6,2)) + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(16,2) + bigTableValueExpressions: col 0:decimal(16,2) className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true @@ -520,13 +1114,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -534,17 +1128,18 @@ STAGE PLANS: Execution mode: vectorized Map Vectorization: enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [[] is disabled because it is not in hive.vectorized.input.format.supports.enabled []] featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 + dataColumnCount: 2 includeColumns: [0] - dataColumns: dec:decimal(4,2) + dataColumns: dec:decimal(14,2), value_dec:decimal(14,2) partitionColumnCount: 0 scratchColumnTypeNames: [] Local Work: @@ -566,3 +1161,166 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_small POSTHOOK: Input: default@t2_small #### A masked pattern was here #### +89.00 89 +PREHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: t2_small + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,0), 1:value_dec:decimal(14,0), 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,0)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,0)), value_dec (type: decimal(14,0)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1049 Data size: 7044 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [[] is disabled because it is not in hive.vectorized.input.format.supports.enabled []] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(14,0), value_dec:decimal(14,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1_small + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dec:decimal(14,2), 1:value_dec:decimal(14,2), 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:decimal(14,2)) + predicate: dec is not null (type: boolean) + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dec (type: decimal(14,2)), value_dec (type: decimal(14,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1049 Data size: 11234 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(16,2)) + 1 _col0 (type: decimal(16,2)) + Map Join Vectorization: + bigTableKeyExpressions: col 0:decimal(16,2) + bigTableValueExpressions: col 0:decimal(16,2), col 1:decimal(14,2) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true + nativeConditionsNotMet: Optimized Table and Supports Key Types IS false + nativeNotSupportedKeyTypes: DECIMAL + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1153 Data size: 12357 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [[] is disabled because it is not in hive.vectorized.input.format.supports.enabled []] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dec:decimal(14,2), value_dec:decimal(14,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(14,0)] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_small +PREHOOK: Input: default@t2_small +#### A masked pattern was here #### +POSTHOOK: query: select t1_small.`dec`, t1_small.value_dec, t2_small.`dec`, t2_small.value_dec from t1_small join t2_small on (t1_small.`dec`=t2_small.`dec`) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_small +POSTHOOK: Input: default@t2_small +#### A masked pattern was here #### +89.00 15.09 89 15 http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/test/results/clientpositive/vector_between_columns.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_between_columns.q.out b/ql/src/test/results/clientpositive/vector_between_columns.q.out index bafcc70..c65ef71 100644 --- a/ql/src/test/results/clientpositive/vector_between_columns.q.out +++ b/ql/src/test/results/clientpositive/vector_between_columns.q.out @@ -121,6 +121,7 @@ STAGE PLANS: 0 1 Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:smallint className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -263,6 +264,7 @@ STAGE PLANS: 0 1 Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:smallint className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out index 533b729..3f9e90b 100644 --- a/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out +++ b/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out @@ -169,6 +169,8 @@ STAGE PLANS: 0 _col10 (type: binary) 1 _col10 (type: binary) Map Join Vectorization: + bigTableKeyExpressions: col 10:binary + bigTableValueExpressions: col 0:tinyint, col 1:smallint, col 2:int, col 3:bigint, col 4:float, col 5:double, col 6:boolean, col 7:string, col 8:timestamp, col 9:decimal(4,2), col 10:binary className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -550,6 +552,8 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 2:int + bigTableValueExpressions: col 10:binary className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out b/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out index 9238a8d..c98bb44 100644 --- a/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out +++ b/ql/src/test/results/clientpositive/vector_char_mapjoin1.q.out @@ -191,6 +191,8 @@ STAGE PLANS: 0 _col1 (type: char(10)) 1 _col1 (type: char(10)) Map Join Vectorization: + bigTableKeyExpressions: col 1:char(10) + bigTableValueExpressions: col 0:int, col 1:char(10) className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -322,6 +324,8 @@ STAGE PLANS: 0 _col1 (type: char(20)) 1 _col1 (type: char(20)) Map Join Vectorization: + bigTableKeyExpressions: col 1:char(20) + bigTableValueExpressions: col 0:int, col 1:char(20) className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -455,6 +459,8 @@ STAGE PLANS: 0 UDFToString(_col1) (type: string) 1 _col1 (type: string) Map Join Vectorization: + bigTableKeyExpressions: CastStringGroupToString(col 1:char(10)) -> 3:string + bigTableValueExpressions: col 0:int, col 1:char(10) className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true http://git-wip-us.apache.org/repos/asf/hive/blob/65cd866e/ql/src/test/results/clientpositive/vector_complex_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_complex_join.q.out b/ql/src/test/results/clientpositive/vector_complex_join.q.out index f8501f6..23ae87e 100644 --- a/ql/src/test/results/clientpositive/vector_complex_join.q.out +++ b/ql/src/test/results/clientpositive/vector_complex_join.q.out @@ -87,6 +87,8 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 2:int + bigTableValueExpressions: col 0:tinyint, col 1:smallint, col 2:int, col 3:bigint, col 4:float, col 5:double, col 6:string, col 7:string, col 8:timestamp, col 9:timestamp, col 10:boolean, col 11:boolean className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true