This is an automated email from the ASF dual-hosted git repository. zabetak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 1455f6201b0 HIVE-27943: NPE in VectorMapJoinCommonOperator.setUpHashTable when running query with join on date (Stamatis Zampetakis reviewed by Attila Turoczy, Krisztian Kasa) 1455f6201b0 is described below commit 1455f6201b0f7b061361bc9acc23cb810ff02483 Author: Stamatis Zampetakis <zabe...@gmail.com> AuthorDate: Thu Dec 7 16:20:17 2023 +0100 HIVE-27943: NPE in VectorMapJoinCommonOperator.setUpHashTable when running query with join on date (Stamatis Zampetakis reviewed by Attila Turoczy, Krisztian Kasa) Close apache/hive#4929 --- .../VectorMapJoinOptimizedCreateHashTable.java | 1 + ql/src/test/queries/clientpositive/mapjoin_date.q | 8 + .../results/clientpositive/llap/mapjoin_date.q.out | 186 +++++++++++++++++++++ 3 files changed, 195 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java index 4171f2038c2..dff1f51da60 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java @@ -55,6 +55,7 @@ public class VectorMapJoinOptimizedCreateHashTable { case SHORT: case INT: case LONG: + case DATE: switch (hashTableKind) { case HASH_MAP: hashTable = new VectorMapJoinOptimizedLongHashMap( diff --git a/ql/src/test/queries/clientpositive/mapjoin_date.q b/ql/src/test/queries/clientpositive/mapjoin_date.q new file mode 100644 index 00000000000..8126c017a90 --- /dev/null +++ b/ql/src/test/queries/clientpositive/mapjoin_date.q @@ -0,0 +1,8 @@ +set hive.auto.convert.join=true; + +CREATE TABLE person (fname string, birthDate date); +INSERT INTO person VALUES ('Victor', '2023-11-27'), ('Alexandre', '2023-11-28'); + +EXPLAIN VECTORIZATION DETAIL SELECT * FROM person p1 INNER JOIN person p2 ON p1.birthDate=p2.birthDate; + +SELECT * FROM person p1 INNER JOIN person p2 ON p1.birthDate=p2.birthDate; diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out new file mode 100644 index 00000000000..c5dfc75a5f3 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out @@ -0,0 +1,186 @@ +PREHOOK: query: CREATE TABLE person (fname string, birthDate date) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@person +POSTHOOK: query: CREATE TABLE person (fname string, birthDate date) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@person +PREHOOK: query: INSERT INTO person VALUES ('Victor', '2023-11-27'), ('Alexandre', '2023-11-28') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@person +POSTHOOK: query: INSERT INTO person VALUES ('Victor', '2023-11-27'), ('Alexandre', '2023-11-28') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@person +POSTHOOK: Lineage: person.birthdate SCRIPT [] +POSTHOOK: Lineage: person.fname SCRIPT [] +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM person p1 INNER JOIN person p2 ON p1.birthDate=p2.birthDate +PREHOOK: type: QUERY +PREHOOK: Input: default@person +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM person p1 INNER JOIN person p2 ON p1.birthDate=p2.birthDate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@person +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p1 + filterExpr: birthdate is not null (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_25_container, bigKeyColName:birthdate, smallTablePos:1, keyRatio:0.0 + Statistics: Num rows: 2 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:fname:string, 1:birthdate:date, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 3:ROW__IS__DELETED:boolean] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1:date) + predicate: birthdate is not null (type: boolean) + Statistics: Num rows: 2 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: fname (type: string), birthdate (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 2 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: date) + 1 _col1 (type: date) + Map Join Vectorization: + bigTableKeyColumns: 1:date + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:date + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 1:date, 4:string, 1:date + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 4 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: fname:string, birthdate:date + partitionColumnCount: 0 + scratchColumnTypeNames: [string] + Map 2 + Map Operator Tree: + TableScan + alias: p2 + filterExpr: birthdate is not null (type: boolean) + Statistics: Num rows: 2 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:fname:string, 1:birthdate:date, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 3:ROW__IS__DELETED:boolean] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1:date) + predicate: birthdate is not null (type: boolean) + Statistics: Num rows: 2 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: fname (type: string), birthdate (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 2 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: date) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:date + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:string + Statistics: Num rows: 2 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: fname:string, birthdate:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM person p1 INNER JOIN person p2 ON p1.birthDate=p2.birthDate +PREHOOK: type: QUERY +PREHOOK: Input: default@person +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM person p1 INNER JOIN person p2 ON p1.birthDate=p2.birthDate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@person +#### A masked pattern was here #### +Victor 2023-11-27 Victor 2023-11-27 +Alexandre 2023-11-28 Alexandre 2023-11-28