This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-3 by this push:
     new f74b93e461d HIVE-27254 : Backport of HIVE-22136 and HIVE-22227 : Turn 
on tez.bucket.pruning (Vineet Garg, reviewed by Jesus Jesus Camacho Rodriguez )
f74b93e461d is described below

commit f74b93e461da9db50c462dedf7323feaea7f43a7
Author: Aman Raj <104416558+amanraj2...@users.noreply.github.com>
AuthorDate: Fri Jul 7 10:27:05 2023 +0530

    HIVE-27254 : Backport of HIVE-22136 and HIVE-22227 : Turn on 
tez.bucket.pruning (Vineet Garg, reviewed by Jesus Jesus Camacho Rodriguez )
    
    Signed-off-by: Sankar Hariappan <sank...@apache.org>
    Closes (#4468)
---
 common/src/java/org/apache/hadoop/hive/conf/HiveConf.java          | 2 +-
 .../org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java   | 7 +++++++
 ql/src/test/queries/clientpositive/mergejoin.q                     | 2 ++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index c35a0a0fba1..96f44fae490 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3753,7 +3753,7 @@ public class HiveConf extends Configuration {
         "When auto reducer parallelism is enabled this factor will be used to 
put a lower limit to the number\n" +
         "of reducers that tez specifies."),
     TEZ_OPTIMIZE_BUCKET_PRUNING(
-        "hive.tez.bucket.pruning", false,
+        "hive.tez.bucket.pruning", true,
          "When pruning is enabled, filters on bucket columns will be processed 
by \n" +
          "filtering the splits against a bitset of included buckets. This 
needs predicates \n"+
             "produced by hive.optimize.ppd and hive.optimize.index.filters."),
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
index 247f9b0d304..cbcbc5f8b8b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
@@ -749,6 +749,12 @@ public class SharedWorkOptimizer extends Transform {
     if (!prevTsOpPPList.getPartitions().equals(tsOpPPList.getPartitions())) {
       return false;
     }
+
+    if(!Objects.equals(tsOp1.getConf().getIncludedBuckets(),
+            tsOp2.getConf().getIncludedBuckets())) {
+      return false;
+    }
+
     // If is a DPP, check if actually it refers to same target, column, etc.
     // Further, the DPP value needs to be generated from same subtree
     List<Operator<?>> dppsOp1 = new 
ArrayList<>(optimizerCache.tableScanToDPPSource.get(tsOp1));
@@ -1155,6 +1161,7 @@ public class SharedWorkOptimizer extends Transform {
           && pctx.getPrunedPartitions(tsOp1).getPartitions().equals(
               pctx.getPrunedPartitions(tsOp2).getPartitions())
           && op1Conf.getRowLimit() == op2Conf.getRowLimit()
+          && Objects.equals(op1Conf.getIncludedBuckets(), 
op2Conf.getIncludedBuckets())
           && Objects.equals(op1Conf.getOpProps(), op2Conf.getOpProps())) {
         return true;
       } else {
diff --git a/ql/src/test/queries/clientpositive/mergejoin.q 
b/ql/src/test/queries/clientpositive/mergejoin.q
index 8636f1320ea..0da7eee61c0 100644
--- a/ql/src/test/queries/clientpositive/mergejoin.q
+++ b/ql/src/test/queries/clientpositive/mergejoin.q
@@ -17,6 +17,8 @@ set hive.vectorized.execution.enabled=true;
 set hive.tez.min.bloom.filter.entries=1;
 set hive.tez.bigtable.minsize.semijoin.reduction=1;
 
+set hive.tez.bucket.pruning=true;
+
 -- SORT_QUERY_RESULTS
 
 explain vectorization detail

Reply via email to