[hive] branch master updated: HIVE-21837: MapJoin is throwing exception when selected column is having completely null values (Naresh P R via Zoltan Haindrich)

kgyrtkirk Tue, 11 Jun 2019 08:16:33 -0700

This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new 4f25b25  HIVE-21837: MapJoin is throwing exception when selected 
column is having completely null values (Naresh P R via Zoltan Haindrich)
4f25b25 is described below

commit 4f25b25b2ecc042d7d8cb4e92626156dad39f5b9
Author: Naresh P R <prnaresh.nar...@gmail.com>
AuthorDate: Tue Jun 11 17:09:59 2019 +0200

    HIVE-21837: MapJoin is throwing exception when selected column is having 
completely null values (Naresh P R via Zoltan Haindrich)
    
    Signed-off-by: Zoltan Haindrich <k...@rxd.hu>
---
 .../hive/ql/exec/vector/VectorizedBatchUtil.java   |   2 +
 .../queries/clientpositive/vectorized_mapjoin3.q   |  22 ++
 .../clientpositive/llap/vectorized_mapjoin3.q.out  | 298 +++++++++++++++++++++
 .../clientpositive/vectorized_mapjoin3.q.out       | 262 ++++++++++++++++++
 4 files changed, 584 insertions(+)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
index 617cbf1..ec24c10 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
@@ -619,6 +619,8 @@ public class VectorizedBatchUtil {
         copy[i] = makeLikeColumnVector(src.fields[i]);
       }
       return new UnionColumnVector(src.tags.length, copy);
+    } else if (source instanceof VoidColumnVector) {
+      return new VoidColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
     } else
       throw new HiveException("Column vector class " +
           source.getClass().getName() +
diff --git a/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q 
b/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q
index c216499..989f4cf 100644
--- a/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q
+++ b/ql/src/test/queries/clientpositive/vectorized_mapjoin3.q
@@ -7,6 +7,7 @@ set hive.auto.convert.join=true;
 
 create temporary table table_19 (decimal0801_col decimal(8,1), int_col_1 int) 
stored as orc;
 create temporary table table_6 (int_col_0 int) stored as orc;
+CREATE TEMPORARY TABLE table_27 (t_id STRING, f_id STRING, type STRING) stored 
as orc;
 
 insert into table_19 values 
 (418.9,        1000),
@@ -17,6 +18,9 @@ insert into table_19 values
 
 insert into table_6 values (1000);
 
+INSERT INTO table_27 values
+('A','F','TEST'),('B','F','TEST'),('C','F','TEST'),('D','F','TEST'),('E','F','TEST'),
+('F','F','TEST'),('G','F','TEST'),('H','F','TEST'),('I','F','TEST'),('J','F','TEST');
 
 SELECT t1.decimal0801_col
 FROM table_19 t1
@@ -32,6 +36,14 @@ SELECT t1.decimal0801_col
 FROM table_19 t1
 WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) 
AND decimal0801_col is not null;
 
+SELECT
+  t1.t_id,
+  null
+  FROM table_27 t1
+  JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id)
+  JOIN
+  (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' 
r_type) t3
+  ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2'));
 
 set hive.explain.user=false;
 
@@ -51,3 +63,13 @@ EXPLAIN VECTORIZATION DETAIL
 SELECT t1.decimal0801_col
 FROM table_19 t1
 WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) = (t1.int_col_1) 
AND decimal0801_col is not null;
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT
+  t1.t_id,
+  null
+  FROM table_27 t1
+  JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id)
+  JOIN
+  (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' 
r_type) t3
+  ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2'));
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
index e9f7344..193e3d1 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
@@ -14,6 +14,14 @@ POSTHOOK: query: create temporary table table_6 (int_col_0 
int) stored as orc
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@table_6
+PREHOOK: query: CREATE TEMPORARY TABLE table_27 (t_id STRING, f_id STRING, 
type STRING) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_27
+POSTHOOK: query: CREATE TEMPORARY TABLE table_27 (t_id STRING, f_id STRING, 
type STRING) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table_27
 PREHOOK: query: insert into table_19 values 
 (418.9,        1000),
 (418.9,        -759),
@@ -43,6 +51,21 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: _dummy_database@_dummy_table
 POSTHOOK: Output: default@table_6
 POSTHOOK: Lineage: table_6.int_col_0 SCRIPT []
+PREHOOK: query: INSERT INTO table_27 values
+('A','F','TEST'),('B','F','TEST'),('C','F','TEST'),('D','F','TEST'),('E','F','TEST'),
+('F','F','TEST'),('G','F','TEST'),('H','F','TEST'),('I','F','TEST'),('J','F','TEST')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table_27
+POSTHOOK: query: INSERT INTO table_27 values
+('A','F','TEST'),('B','F','TEST'),('C','F','TEST'),('D','F','TEST'),('E','F','TEST'),
+('F','F','TEST'),('G','F','TEST'),('H','F','TEST'),('I','F','TEST'),('J','F','TEST')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table_27
+POSTHOOK: Lineage: table_27.f_id SCRIPT []
+POSTHOOK: Lineage: table_27.t_id SCRIPT []
+POSTHOOK: Lineage: table_27.type SCRIPT []
 PREHOOK: query: SELECT t1.decimal0801_col
 FROM table_19 t1
 WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN 
(t1.int_col_1) AND decimal0801_col is not null
@@ -88,6 +111,51 @@ POSTHOOK: Input: default@table_19
 POSTHOOK: Input: default@table_6
 #### A masked pattern was here ####
 418.9
+Warning: Shuffle Join MERGEJOIN[46][tables = [$hdt$_0]] in Stage 'Reducer 2' 
is a cross product
+PREHOOK: query: SELECT
+  t1.t_id,
+  null
+  FROM table_27 t1
+  JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id)
+  JOIN
+  (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' 
r_type) t3
+  ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2'))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Input: default@table_27
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+  t1.t_id,
+  null
+  FROM table_27 t1
+  JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id)
+  JOIN
+  (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' 
r_type) t3
+  ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Input: default@table_27
+#### A masked pattern was here ####
+J      NULL
+J      NULL
+I      NULL
+I      NULL
+H      NULL
+H      NULL
+G      NULL
+G      NULL
+F      NULL
+F      NULL
+E      NULL
+E      NULL
+D      NULL
+D      NULL
+C      NULL
+C      NULL
+B      NULL
+B      NULL
+A      NULL
+A      NULL
 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT t1.decimal0801_col
 FROM table_19 t1
@@ -721,3 +789,233 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join MERGEJOIN[46][tables = [$hdt$_0]] in Stage 'Reducer 2' 
is a cross product
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+  t1.t_id,
+  null
+  FROM table_27 t1
+  JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id)
+  JOIN
+  (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' 
r_type) t3
+  ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2'))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Input: default@table_27
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+  t1.t_id,
+  null
+  FROM table_27 t1
+  JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id)
+  JOIN
+  (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' 
r_type) t3
+  ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Input: default@table_27
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 3 <- Union 4 (CONTAINS)
+        Map 5 <- Union 4 (CONTAINS)
+        Reducer 2 <- Map 1 (XPROD_EDGE), Map 6 (BROADCAST_EDGE), Union 4 
(XPROD_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t2
+                  filterExpr: (t_id is not null and f_id is not null) (type: 
boolean)
+                  Statistics: Num rows: 10 Data size: 3680 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:t_id:string, 
1:f_id:string, 2:type:string, 
3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:string), SelectColumnIsNotNull(col 1:string))
+                    predicate: (f_id is not null and t_id is not null) (type: 
boolean)
+                    Statistics: Num rows: 10 Data size: 3680 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: t_id (type: string), f_id (type: string)
+                      outputColumnNames: _col0, _col1
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 1]
+                      Statistics: Num rows: 10 Data size: 3680 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkEmptyKeyOperator
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumns: 0:string, 1:string
+                        Statistics: Num rows: 10 Data size: 3680 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: string), _col1 (type: 
string)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    includeColumns: [0, 1]
+                    dataColumns: t_id:string, f_id:string, type:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: _dummy_table
+                  Row Limit Per Split: 1
+                  Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    Statistics: Num rows: 1 Data size: 90 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      Statistics: Num rows: 2 Data size: 180 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 2 Data size: 180 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: false
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                enabledConditionsNotMet: Could not enable vectorization due to 
partition column names size 1 is greater than the number of table column names 
size 0 IS false
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.NullRowsInputFormat
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: _dummy_table
+                  Row Limit Per Split: 1
+                  Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    Statistics: Num rows: 1 Data size: 90 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      Statistics: Num rows: 2 Data size: 180 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 2 Data size: 180 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: false
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                enabledConditionsNotMet: Could not enable vectorization due to 
partition column names size 1 is greater than the number of table column names 
size 0 IS false
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.NullRowsInputFormat
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  filterExpr: ((type = 'TEST') and t_id is not null and f_id 
is not null) (type: boolean)
+                  Statistics: Num rows: 10 Data size: 5520 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:t_id:string, 
1:f_id:string, 2:type:string, 
3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: 
FilterStringGroupColEqualStringScalar(col 2:string, val TEST), 
SelectColumnIsNotNull(col 0:string), SelectColumnIsNotNull(col 1:string))
+                    predicate: ((type = 'TEST') and f_id is not null and t_id 
is not null) (type: boolean)
+                    Statistics: Num rows: 5 Data size: 2760 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: t_id (type: string), f_id (type: string)
+                      outputColumnNames: _col0, _col1
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 1]
+                      Statistics: Num rows: 5 Data size: 2760 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkMultiKeyOperator
+                            keyColumns: 0:string, 1:string
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                        Statistics: Num rows: 5 Data size: 2760 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    includeColumns: [0, 1, 2]
+                    dataColumns: t_id:string, f_id:string, type:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 
+                  1 
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 20 Data size: 9180 Basic stats: COMPLETE 
Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: string), _col1 (type: string)
+                    1 _col0 (type: string), _col1 (type: string)
+                  outputColumnNames: _col3
+                  input vertices:
+                    1 Map 6
+                  Statistics: Num rows: 22 Data size: 10098 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col3 (type: string), null (type: void)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 22 Data size: 10098 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 22 Data size: 10098 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            MergeJoin Vectorization:
+                enabled: false
+                enableConditionsNotMet: Vectorizing MergeJoin Supported IS 
false
+        Union 4 
+            Vertex: Union 4
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
diff --git a/ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out 
b/ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out
index 0c94b8e..9cf9e2d 100644
--- a/ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out
@@ -14,6 +14,14 @@ POSTHOOK: query: create temporary table table_6 (int_col_0 
int) stored as orc
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@table_6
+PREHOOK: query: CREATE TEMPORARY TABLE table_27 (t_id STRING, f_id STRING, 
type STRING) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_27
+POSTHOOK: query: CREATE TEMPORARY TABLE table_27 (t_id STRING, f_id STRING, 
type STRING) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table_27
 PREHOOK: query: insert into table_19 values 
 (418.9,        1000),
 (418.9,        -759),
@@ -43,6 +51,21 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: _dummy_database@_dummy_table
 POSTHOOK: Output: default@table_6
 POSTHOOK: Lineage: table_6.int_col_0 SCRIPT []
+PREHOOK: query: INSERT INTO table_27 values
+('A','F','TEST'),('B','F','TEST'),('C','F','TEST'),('D','F','TEST'),('E','F','TEST'),
+('F','F','TEST'),('G','F','TEST'),('H','F','TEST'),('I','F','TEST'),('J','F','TEST')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table_27
+POSTHOOK: query: INSERT INTO table_27 values
+('A','F','TEST'),('B','F','TEST'),('C','F','TEST'),('D','F','TEST'),('E','F','TEST'),
+('F','F','TEST'),('G','F','TEST'),('H','F','TEST'),('I','F','TEST'),('J','F','TEST')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table_27
+POSTHOOK: Lineage: table_27.f_id SCRIPT []
+POSTHOOK: Lineage: table_27.t_id SCRIPT []
+POSTHOOK: Lineage: table_27.type SCRIPT []
 PREHOOK: query: SELECT t1.decimal0801_col
 FROM table_19 t1
 WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN 
(t1.int_col_1) AND decimal0801_col is not null
@@ -88,6 +111,51 @@ POSTHOOK: Input: default@table_19
 POSTHOOK: Input: default@table_6
 #### A masked pattern was here ####
 418.9
+Warning: Map Join MAPJOIN[25][bigTable=?] in task 'Stage-6:MAPRED' is a cross 
product
+PREHOOK: query: SELECT
+  t1.t_id,
+  null
+  FROM table_27 t1
+  JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id)
+  JOIN
+  (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' 
r_type) t3
+  ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2'))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Input: default@table_27
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+  t1.t_id,
+  null
+  FROM table_27 t1
+  JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id)
+  JOIN
+  (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' 
r_type) t3
+  ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Input: default@table_27
+#### A masked pattern was here ####
+A      NULL
+A      NULL
+B      NULL
+B      NULL
+C      NULL
+C      NULL
+D      NULL
+D      NULL
+E      NULL
+E      NULL
+F      NULL
+F      NULL
+G      NULL
+G      NULL
+H      NULL
+H      NULL
+I      NULL
+I      NULL
+J      NULL
+J      NULL
 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT t1.decimal0801_col
 FROM table_19 t1
@@ -634,3 +702,197 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Map Join MAPJOIN[25][bigTable=?] in task 'Stage-6:MAPRED' is a cross 
product
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+  t1.t_id,
+  null
+  FROM table_27 t1
+  JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id)
+  JOIN
+  (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' 
r_type) t3
+  ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2'))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Input: default@table_27
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT
+  t1.t_id,
+  null
+  FROM table_27 t1
+  JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id)
+  JOIN
+  (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' 
r_type) t3
+  ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2'))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Input: default@table_27
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-8 is a root stage
+  Stage-6 depends on stages: Stage-8
+  Stage-0 depends on stages: Stage-6
+
+STAGE PLANS:
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_1:$hdt$_1-subquery1:_dummy_table 
+          Fetch Operator
+            limit: -1
+        $hdt$_1:$hdt$_1-subquery2:_dummy_table 
+          Fetch Operator
+            limit: -1
+        $hdt$_2:t1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_1:$hdt$_1-subquery1:_dummy_table 
+          TableScan
+            alias: _dummy_table
+            Row Limit Per Split: 1
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column 
stats: COMPLETE
+            Select Operator
+              Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Union
+                Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  HashTable Sink Operator
+                    keys:
+                      0 
+                      1 
+        $hdt$_1:$hdt$_1-subquery2:_dummy_table 
+          TableScan
+            alias: _dummy_table
+            Row Limit Per Split: 1
+            Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column 
stats: COMPLETE
+            Select Operator
+              Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Union
+                Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  HashTable Sink Operator
+                    keys:
+                      0 
+                      1 
+        $hdt$_2:t1 
+          TableScan
+            alias: t1
+            filterExpr: ((type = 'TEST') and t_id is not null and f_id is not 
null) (type: boolean)
+            Statistics: Num rows: 10 Data size: 5520 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: ((type = 'TEST') and f_id is not null and t_id is not 
null) (type: boolean)
+              Statistics: Num rows: 5 Data size: 2760 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: t_id (type: string), f_id (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 5 Data size: 2760 Basic stats: COMPLETE 
Column stats: NONE
+                HashTable Sink Operator
+                  keys:
+                    0 _col0 (type: string), _col1 (type: string)
+                    1 _col0 (type: string), _col1 (type: string)
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t2
+            filterExpr: (t_id is not null and f_id is not null) (type: boolean)
+            Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE 
Column stats: NONE
+            TableScan Vectorization:
+                native: true
+                vectorizationSchemaColumns: [0:t_id:string, 1:f_id:string, 
2:type:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+            Filter Operator
+              Filter Vectorization:
+                  className: VectorFilterOperator
+                  native: true
+                  predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:string), SelectColumnIsNotNull(col 1:string))
+              predicate: (f_id is not null and t_id is not null) (type: 
boolean)
+              Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: t_id (type: string), f_id (type: string)
+                outputColumnNames: _col0, _col1
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0, 1]
+                Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE 
Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 
+                    1 
+                  Map Join Vectorization:
+                      bigTableValueExpressions: col 0:string, col 1:string
+                      className: VectorMapJoinOperator
+                      native: false
+                      nativeConditionsMet: hive.mapjoin.optimized.hashtable IS 
true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin 
Condition IS true, No nullsafe IS true, Small table vectorizes IS true, 
Optimized Table and Supports Key Types IS true
+                      nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 20 Data size: 9180 Basic stats: 
COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    keys:
+                      0 _col0 (type: string), _col1 (type: string)
+                      1 _col0 (type: string), _col1 (type: string)
+                    Map Join Vectorization:
+                        bigTableKeyExpressions: col 0:string, col 1:string
+                        className: VectorMapJoinOperator
+                        native: false
+                        nativeConditionsMet: hive.mapjoin.optimized.hashtable 
IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin 
Condition IS true, No nullsafe IS true, Small table vectorizes IS true, 
Optimized Table and Supports Key Types IS true
+                        nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false
+                    outputColumnNames: _col3
+                    Statistics: Num rows: 22 Data size: 10098 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col3 (type: string), null (type: void)
+                      outputColumnNames: _col0, _col1
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 1]
+                          selectExpressions: ConstantVectorExpression(val 
null) -> 1:void
+                      Statistics: Num rows: 22 Data size: 10098 Basic stats: 
COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        File Sink Vectorization:
+                            className: VectorFileSinkOperator
+                            native: false
+                        Statistics: Num rows: 22 Data size: 10098 Basic stats: 
COMPLETE Column stats: NONE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS 
true
+          inputFormatFeatureSupport: [DECIMAL_64]
+          featureSupportInUse: [DECIMAL_64]
+          inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+          rowBatchContext:
+              dataColumnCount: 3
+              includeColumns: [0, 1]
+              dataColumns: t_id:string, f_id:string, type:string
+              partitionColumnCount: 0
+              scratchColumnTypeNames: []
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

[hive] branch master updated: HIVE-21837: MapJoin is throwing exception when selected column is having completely null values (Naresh P R via Zoltan Haindrich)

Reply via email to