This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-3 by this push:
     new 2058c2e0dee HIVE-27573: Backport of HIVE-21799: NullPointerException 
in DynamicPartitionPruningOptimization, when join key is on aggregation column 
to branch-3
2058c2e0dee is described below

commit 2058c2e0dee430f0a8bc9f4a3cadd75d5e087091
Author: Shefali Singh <31477542+shefali...@users.noreply.github.com>
AuthorDate: Thu Sep 28 17:13:31 2023 +0530

    HIVE-27573: Backport of HIVE-21799: NullPointerException in 
DynamicPartitionPruningOptimization, when join key is on aggregation column to 
branch-3
    
    Signed-off-by: Sankar Hariappan <sank...@apache.org>
    Closes (#4556)
---
 .../test/resources/testconfiguration.properties    |   1 +
 .../DynamicPartitionPruningOptimization.java       |  13 +-
 .../dynamic_semijoin_reduction_on_aggcol.q         |  17 +++
 .../dynamic_semijoin_reduction_on_aggcol.q.out     | 149 +++++++++++++++++++++
 4 files changed, 171 insertions(+), 9 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index a5bce33d74f..144a5a8ad48 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -518,6 +518,7 @@ minillaplocal.query.files=\
   dynamic_semijoin_reduction_2.q,\
   dynamic_semijoin_reduction_3.q,\
   dynamic_semijoin_reduction_4.q,\
+  dynamic_semijoin_reduction_on_aggcol.q,\
   dynamic_semijoin_reduction_sw.q,\
   dynpart_sort_opt_vectorization.q,\
   dynpart_sort_optimization.q,\
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
index a1401aac72c..d84f10b4c38 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
@@ -576,16 +576,11 @@ public class DynamicPartitionPruningOptimization 
implements NodeProcessor {
     // Create the column expr map
     Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
     ExprNodeDesc exprNode = null;
-    if ( parentOfRS.getColumnExprMap() != null) {
-      exprNode = parentOfRS.getColumnExprMap().get(internalColName).clone();
-    } else {
-      exprNode = new ExprNodeColumnDesc(columnInfo);
-    }
-
-    if (exprNode instanceof ExprNodeColumnDesc) {
-      ExprNodeColumnDesc encd = (ExprNodeColumnDesc) exprNode;
-      encd.setColumn(internalColName);
+    if (columnInfo == null) {
+      LOG.debug("No ColumnInfo found in {} for {}", 
parentOfRS.getOperatorId(), internalColName);
+      return false;
     }
+    exprNode = new ExprNodeColumnDesc(columnInfo);
     colExprMap.put(internalColName, exprNode);
 
     // Create the Select Operator
diff --git 
a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_on_aggcol.q 
b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_on_aggcol.q
new file mode 100644
index 00000000000..e7c8db3e778
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_on_aggcol.q
@@ -0,0 +1,17 @@
+--! qt:dataset:src
+set hive.explain.user=false;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.tez.dynamic.semijoin.reduction=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
+
+create table dynamic_semijoin_reduction_on_aggcol(id int, outcome string, 
eventid int) stored as orc;
+insert into dynamic_semijoin_reduction_on_aggcol select key, value, key from 
src;
+
+explain select a.id, b.outcome from (select id, max(eventid) as event_id_max 
from dynamic_semijoin_reduction_on_aggcol where id = 0 group by id) a 
+LEFT OUTER JOIN dynamic_semijoin_reduction_on_aggcol b 
+on a.event_id_max = b.eventid;
+
+select a.id, b.outcome from (select id, max(eventid) as event_id_max from 
dynamic_semijoin_reduction_on_aggcol where id = 0 group by id) a 
+LEFT OUTER JOIN dynamic_semijoin_reduction_on_aggcol b 
+on a.event_id_max = b.eventid;
diff --git 
a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_on_aggcol.q.out
 
b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_on_aggcol.q.out
new file mode 100644
index 00000000000..4d29456df26
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_on_aggcol.q.out
@@ -0,0 +1,149 @@
+PREHOOK: query: create table dynamic_semijoin_reduction_on_aggcol(id int, 
outcome string, eventid int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dynamic_semijoin_reduction_on_aggcol
+POSTHOOK: query: create table dynamic_semijoin_reduction_on_aggcol(id int, 
outcome string, eventid int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dynamic_semijoin_reduction_on_aggcol
+PREHOOK: query: insert into dynamic_semijoin_reduction_on_aggcol select key, 
value, key from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dynamic_semijoin_reduction_on_aggcol
+POSTHOOK: query: insert into dynamic_semijoin_reduction_on_aggcol select key, 
value, key from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dynamic_semijoin_reduction_on_aggcol
+POSTHOOK: Lineage: dynamic_semijoin_reduction_on_aggcol.eventid EXPRESSION 
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dynamic_semijoin_reduction_on_aggcol.id EXPRESSION 
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dynamic_semijoin_reduction_on_aggcol.outcome SIMPLE 
[(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain select a.id, b.outcome from (select id, max(eventid) 
as event_id_max from dynamic_semijoin_reduction_on_aggcol where id = 0 group by 
id) a 
+LEFT OUTER JOIN dynamic_semijoin_reduction_on_aggcol b 
+on a.event_id_max = b.eventid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dynamic_semijoin_reduction_on_aggcol
+#### A masked pattern was here ####
+POSTHOOK: query: explain select a.id, b.outcome from (select id, max(eventid) 
as event_id_max from dynamic_semijoin_reduction_on_aggcol where id = 0 group by 
id) a 
+LEFT OUTER JOIN dynamic_semijoin_reduction_on_aggcol b 
+on a.event_id_max = b.eventid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dynamic_semijoin_reduction_on_aggcol
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dynamic_semijoin_reduction_on_aggcol
+                  Statistics: Num rows: 500 Data size: 4000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (id = 0) (type: boolean)
+                    Statistics: Num rows: 2 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: eventid (type: int)
+                      outputColumnNames: _col1
+                      Statistics: Num rows: 2 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: max(_col1)
+                        keys: 0 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          value expressions: _col1 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: outcome (type: string), eventid (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col1 (type: int)
+                      Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: max(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: int)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: COMPLETE
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col1 (type: int)
+                outputColumnNames: _col1
+                Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: 0 (type: int), _col1 (type: string)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 95 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a.id, b.outcome from (select id, max(eventid) as 
event_id_max from dynamic_semijoin_reduction_on_aggcol where id = 0 group by 
id) a 
+LEFT OUTER JOIN dynamic_semijoin_reduction_on_aggcol b 
+on a.event_id_max = b.eventid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dynamic_semijoin_reduction_on_aggcol
+#### A masked pattern was here ####
+POSTHOOK: query: select a.id, b.outcome from (select id, max(eventid) as 
event_id_max from dynamic_semijoin_reduction_on_aggcol where id = 0 group by 
id) a 
+LEFT OUTER JOIN dynamic_semijoin_reduction_on_aggcol b 
+on a.event_id_max = b.eventid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dynamic_semijoin_reduction_on_aggcol
+#### A masked pattern was here ####
+0      val_0
+0      val_0
+0      val_0

Reply via email to