This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 4f25b25 HIVE-21837: MapJoin is throwing exception when selected column is having completely null values (Naresh P R via Zoltan Haindrich) 4f25b25 is described below commit 4f25b25b2ecc042d7d8cb4e92626156dad39f5b9 Author: Naresh P R <prnaresh.nar...@gmail.com> AuthorDate: Tue Jun 11 17:09:59 2019 +0200 HIVE-21837: MapJoin is throwing exception when selected column is having completely null values (Naresh P R via Zoltan Haindrich) Signed-off-by: Zoltan Haindrich <k...@rxd.hu> --- .../hive/ql/exec/vector/VectorizedBatchUtil.java | 2 + .../queries/clientpositive/vectorized_mapjoin3.q | 22 ++ .../clientpositive/llap/vectorized_mapjoin3.q.out | 298 +++++++++++++++++++++ .../clientpositive/vectorized_mapjoin3.q.out | 262 ++++++++++++++++++ 4 files changed, 584 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 617cbf1..ec24c10 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -619,6 +619,8 @@ public class VectorizedBatchUtil { copy[i] = makeLikeColumnVector(src.fields[i]); } return new UnionColumnVector(src.tags.length, copy); + } else if (source instanceof VoidColumnVector) { + return new VoidColumnVector(VectorizedRowBatch.DEFAULT_SIZE); } else throw new HiveException("Column vector class " + source.getClass().getName() + diff --git a/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q b/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q index c216499..989f4cf 100644 --- a/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q +++ b/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q @@ -7,6 +7,7 @@ set hive.auto.convert.join=true; create temporary table table_19 (decimal0801_col decimal(8,1), int_col_1 int) stored as orc; create temporary table table_6 (int_col_0 int) stored as orc; +CREATE TEMPORARY TABLE table_27 (t_id STRING, f_id STRING, type STRING) stored as orc; insert into table_19 values (418.9, 1000), @@ -17,6 +18,9 @@ insert into table_19 values insert into table_6 values (1000); +INSERT INTO table_27 values +('A','F','TEST'),('B','F','TEST'),('C','F','TEST'),('D','F','TEST'),('E','F','TEST'), +('F','F','TEST'),('G','F','TEST'),('H','F','TEST'),('I','F','TEST'),('J','F','TEST'); SELECT t1.decimal0801_col FROM table_19 t1 @@ -32,6 +36,14 @@ SELECT t1.decimal0801_col FROM table_19 t1 WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null; +SELECT + t1.t_id, + null + FROM table_27 t1 + JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) + JOIN + (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')); set hive.explain.user=false; @@ -51,3 +63,13 @@ EXPLAIN VECTORIZATION DETAIL SELECT t1.decimal0801_col FROM table_19 t1 WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) AND decimal0801_col is not null; + +EXPLAIN VECTORIZATION DETAIL +SELECT + t1.t_id, + null + FROM table_27 t1 + JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) + JOIN + (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')); diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out index e9f7344..193e3d1 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -14,6 +14,14 @@ POSTHOOK: query: create temporary table table_6 (int_col_0 int) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@table_6 +PREHOOK: query: CREATE TEMPORARY TABLE table_27 (t_id STRING, f_id STRING, type STRING) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_27 +POSTHOOK: query: CREATE TEMPORARY TABLE table_27 (t_id STRING, f_id STRING, type STRING) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_27 PREHOOK: query: insert into table_19 values (418.9, 1000), (418.9, -759), @@ -43,6 +51,21 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@table_6 POSTHOOK: Lineage: table_6.int_col_0 SCRIPT [] +PREHOOK: query: INSERT INTO table_27 values +('A','F','TEST'),('B','F','TEST'),('C','F','TEST'),('D','F','TEST'),('E','F','TEST'), +('F','F','TEST'),('G','F','TEST'),('H','F','TEST'),('I','F','TEST'),('J','F','TEST') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_27 +POSTHOOK: query: INSERT INTO table_27 values +('A','F','TEST'),('B','F','TEST'),('C','F','TEST'),('D','F','TEST'),('E','F','TEST'), +('F','F','TEST'),('G','F','TEST'),('H','F','TEST'),('I','F','TEST'),('J','F','TEST') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_27 +POSTHOOK: Lineage: table_27.f_id SCRIPT [] +POSTHOOK: Lineage: table_27.t_id SCRIPT [] +POSTHOOK: Lineage: table_27.type SCRIPT [] PREHOOK: query: SELECT t1.decimal0801_col FROM table_19 t1 WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null @@ -88,6 +111,51 @@ POSTHOOK: Input: default@table_19 POSTHOOK: Input: default@table_6 #### A masked pattern was here #### 418.9 +Warning: Shuffle Join MERGEJOIN[46][tables = [$hdt$_0]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT + t1.t_id, + null + FROM table_27 t1 + JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) + JOIN + (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@table_27 +#### A masked pattern was here #### +POSTHOOK: query: SELECT + t1.t_id, + null + FROM table_27 t1 + JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) + JOIN + (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@table_27 +#### A masked pattern was here #### +J NULL +J NULL +I NULL +I NULL +H NULL +H NULL +G NULL +G NULL +F NULL +F NULL +E NULL +E NULL +D NULL +D NULL +C NULL +C NULL +B NULL +B NULL +A NULL +A NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT t1.decimal0801_col FROM table_19 t1 @@ -721,3 +789,233 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[46][tables = [$hdt$_0]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + t1.t_id, + null + FROM table_27 t1 + JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) + JOIN + (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@table_27 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + t1.t_id, + null + FROM table_27 t1 + JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) + JOIN + (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@table_27 +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 3 <- Union 4 (CONTAINS) + Map 5 <- Union 4 (CONTAINS) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 6 (BROADCAST_EDGE), Union 4 (XPROD_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (t_id is not null and f_id is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:t_id:string, 1:f_id:string, 2:type:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:string), SelectColumnIsNotNull(col 1:string)) + predicate: (f_id is not null and t_id is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_id (type: string), f_id (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:string, 1:string + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1] + dataColumns: t_id:string, f_id:string, type:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + enabledConditionsNotMet: Could not enable vectorization due to partition column names size 1 is greater than the number of table column names size 0 IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.NullRowsInputFormat + Map 5 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + enabledConditionsNotMet: Could not enable vectorization due to partition column names size 1 is greater than the number of table column names size 0 IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.NullRowsInputFormat + Map 6 + Map Operator Tree: + TableScan + alias: t1 + filterExpr: ((type = 'TEST') and t_id is not null and f_id is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 5520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:t_id:string, 1:f_id:string, 2:type:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 2:string, val TEST), SelectColumnIsNotNull(col 0:string), SelectColumnIsNotNull(col 1:string)) + predicate: ((type = 'TEST') and f_id is not null and t_id is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_id (type: string), f_id (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 5 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: t_id:string, f_id:string, type:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 9180 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col3 + input vertices: + 1 Map 6 + Statistics: Num rows: 22 Data size: 10098 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), null (type: void) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 10098 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 22 Data size: 10098 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Union 4 + Vertex: Union 4 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out b/ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out index 0c94b8e..9cf9e2d 100644 --- a/ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out +++ b/ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out @@ -14,6 +14,14 @@ POSTHOOK: query: create temporary table table_6 (int_col_0 int) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@table_6 +PREHOOK: query: CREATE TEMPORARY TABLE table_27 (t_id STRING, f_id STRING, type STRING) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_27 +POSTHOOK: query: CREATE TEMPORARY TABLE table_27 (t_id STRING, f_id STRING, type STRING) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_27 PREHOOK: query: insert into table_19 values (418.9, 1000), (418.9, -759), @@ -43,6 +51,21 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@table_6 POSTHOOK: Lineage: table_6.int_col_0 SCRIPT [] +PREHOOK: query: INSERT INTO table_27 values +('A','F','TEST'),('B','F','TEST'),('C','F','TEST'),('D','F','TEST'),('E','F','TEST'), +('F','F','TEST'),('G','F','TEST'),('H','F','TEST'),('I','F','TEST'),('J','F','TEST') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_27 +POSTHOOK: query: INSERT INTO table_27 values +('A','F','TEST'),('B','F','TEST'),('C','F','TEST'),('D','F','TEST'),('E','F','TEST'), +('F','F','TEST'),('G','F','TEST'),('H','F','TEST'),('I','F','TEST'),('J','F','TEST') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_27 +POSTHOOK: Lineage: table_27.f_id SCRIPT [] +POSTHOOK: Lineage: table_27.t_id SCRIPT [] +POSTHOOK: Lineage: table_27.type SCRIPT [] PREHOOK: query: SELECT t1.decimal0801_col FROM table_19 t1 WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null @@ -88,6 +111,51 @@ POSTHOOK: Input: default@table_19 POSTHOOK: Input: default@table_6 #### A masked pattern was here #### 418.9 +Warning: Map Join MAPJOIN[25][bigTable=?] in task 'Stage-6:MAPRED' is a cross product +PREHOOK: query: SELECT + t1.t_id, + null + FROM table_27 t1 + JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) + JOIN + (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@table_27 +#### A masked pattern was here #### +POSTHOOK: query: SELECT + t1.t_id, + null + FROM table_27 t1 + JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) + JOIN + (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@table_27 +#### A masked pattern was here #### +A NULL +A NULL +B NULL +B NULL +C NULL +C NULL +D NULL +D NULL +E NULL +E NULL +F NULL +F NULL +G NULL +G NULL +H NULL +H NULL +I NULL +I NULL +J NULL +J NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT t1.decimal0801_col FROM table_19 t1 @@ -634,3 +702,197 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[25][bigTable=?] in task 'Stage-6:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + t1.t_id, + null + FROM table_27 t1 + JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) + JOIN + (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@table_27 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT + t1.t_id, + null + FROM table_27 t1 + JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) + JOIN + (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@table_27 +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:$hdt$_1-subquery1:_dummy_table + Fetch Operator + limit: -1 + $hdt$_1:$hdt$_1-subquery2:_dummy_table + Fetch Operator + limit: -1 + $hdt$_2:t1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:$hdt$_1-subquery1:_dummy_table + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 + 1 + $hdt$_1:$hdt$_1-subquery2:_dummy_table + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 + 1 + $hdt$_2:t1 + TableScan + alias: t1 + filterExpr: ((type = 'TEST') and t_id is not null and f_id is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 5520 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((type = 'TEST') and f_id is not null and t_id is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_id (type: string), f_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: t2 + filterExpr: (t_id is not null and f_id is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:t_id:string, 1:f_id:string, 2:type:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:string), SelectColumnIsNotNull(col 1:string)) + predicate: (f_id is not null and t_id is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_id (type: string), f_id (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Map Join Vectorization: + bigTableValueExpressions: col 0:string, col 1:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 9180 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string, col 1:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + outputColumnNames: _col3 + Statistics: Num rows: 22 Data size: 10098 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), null (type: void) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + selectExpressions: ConstantVectorExpression(val null) -> 1:void + Statistics: Num rows: 22 Data size: 10098 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 22 Data size: 10098 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1] + dataColumns: t_id:string, f_id:string, type:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +