This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/branch-3 by this push: new f74b93e461d HIVE-27254 : Backport of HIVE-22136 and HIVE-22227 : Turn on tez.bucket.pruning (Vineet Garg, reviewed by Jesus Jesus Camacho Rodriguez ) f74b93e461d is described below commit f74b93e461da9db50c462dedf7323feaea7f43a7 Author: Aman Raj <104416558+amanraj2...@users.noreply.github.com> AuthorDate: Fri Jul 7 10:27:05 2023 +0530 HIVE-27254 : Backport of HIVE-22136 and HIVE-22227 : Turn on tez.bucket.pruning (Vineet Garg, reviewed by Jesus Jesus Camacho Rodriguez ) Signed-off-by: Sankar Hariappan <sank...@apache.org> Closes (#4468) --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +- .../org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java | 7 +++++++ ql/src/test/queries/clientpositive/mergejoin.q | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index c35a0a0fba1..96f44fae490 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3753,7 +3753,7 @@ public class HiveConf extends Configuration { "When auto reducer parallelism is enabled this factor will be used to put a lower limit to the number\n" + "of reducers that tez specifies."), TEZ_OPTIMIZE_BUCKET_PRUNING( - "hive.tez.bucket.pruning", false, + "hive.tez.bucket.pruning", true, "When pruning is enabled, filters on bucket columns will be processed by \n" + "filtering the splits against a bitset of included buckets. This needs predicates \n"+ "produced by hive.optimize.ppd and hive.optimize.index.filters."), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java index 247f9b0d304..cbcbc5f8b8b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java @@ -749,6 +749,12 @@ public class SharedWorkOptimizer extends Transform { if (!prevTsOpPPList.getPartitions().equals(tsOpPPList.getPartitions())) { return false; } + + if(!Objects.equals(tsOp1.getConf().getIncludedBuckets(), + tsOp2.getConf().getIncludedBuckets())) { + return false; + } + // If is a DPP, check if actually it refers to same target, column, etc. // Further, the DPP value needs to be generated from same subtree List<Operator<?>> dppsOp1 = new ArrayList<>(optimizerCache.tableScanToDPPSource.get(tsOp1)); @@ -1155,6 +1161,7 @@ public class SharedWorkOptimizer extends Transform { && pctx.getPrunedPartitions(tsOp1).getPartitions().equals( pctx.getPrunedPartitions(tsOp2).getPartitions()) && op1Conf.getRowLimit() == op2Conf.getRowLimit() + && Objects.equals(op1Conf.getIncludedBuckets(), op2Conf.getIncludedBuckets()) && Objects.equals(op1Conf.getOpProps(), op2Conf.getOpProps())) { return true; } else { diff --git a/ql/src/test/queries/clientpositive/mergejoin.q b/ql/src/test/queries/clientpositive/mergejoin.q index 8636f1320ea..0da7eee61c0 100644 --- a/ql/src/test/queries/clientpositive/mergejoin.q +++ b/ql/src/test/queries/clientpositive/mergejoin.q @@ -17,6 +17,8 @@ set hive.vectorized.execution.enabled=true; set hive.tez.min.bloom.filter.entries=1; set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.bucket.pruning=true; + -- SORT_QUERY_RESULTS explain vectorization detail