[hive] branch master updated: HIVE-26238: Decouple sort filter predicates optimization from digest normalization in CBO (Stamatis Zampetakis, reviewed by Zoltan Haindrich)
This is an automated email from the ASF dual-hosted git repository. zabetak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new f29cb2245c9 HIVE-26238: Decouple sort filter predicates optimization from digest normalization in CBO (Stamatis Zampetakis, reviewed by Zoltan Haindrich) f29cb2245c9 is described below commit f29cb2245c97102975ea0dd73783049eaa0947a0 Author: Stamatis Zampetakis AuthorDate: Tue May 17 15:20:06 2022 +0200 HIVE-26238: Decouple sort filter predicates optimization from digest normalization in CBO (Stamatis Zampetakis, reviewed by Zoltan Haindrich) 1. Decouple sort filter optimization from digest normalization by refactoring HiveSortFilterPredicates into a (DFS) visitor. We cannot use planner or rules cause they make use of digest. Performing this optimization using a visitor slightly simplifies the code since there is no need to have a registry since we are not going to visit the same node twice. 2. Move the optimization after all post-join transformations to avoid having other optimizations cancel the benefit of the sort filter predicates. Closes #3299 --- .../calcite/rules/HiveFilterSortPredicates.java| 47 +++--- .../hadoop/hive/ql/parse/CalcitePlanner.java | 8 ++-- .../clientpositive/llap/external_jdbc_table2.q.out | 2 +- .../perf/tpcds30tb/tez/cbo_ext_query1.q.out| 4 +- .../perf/tpcds30tb/tez/cbo_query1.q.out| 2 +- .../perf/tpcds30tb/tez/cbo_query11.q.out | 8 ++-- .../perf/tpcds30tb/tez/cbo_query31.q.out | 2 +- .../perf/tpcds30tb/tez/cbo_query33.q.out | 4 +- .../perf/tpcds30tb/tez/cbo_query34.q.out | 2 +- .../perf/tpcds30tb/tez/cbo_query38.q.out | 4 +- .../perf/tpcds30tb/tez/cbo_query4.q.out| 12 +++--- .../perf/tpcds30tb/tez/cbo_query54.q.out | 2 +- .../perf/tpcds30tb/tez/cbo_query56.q.out | 4 +- .../perf/tpcds30tb/tez/cbo_query6.q.out| 2 +- .../perf/tpcds30tb/tez/cbo_query60.q.out | 4 +- .../perf/tpcds30tb/tez/cbo_query65.q.out | 2 +- .../perf/tpcds30tb/tez/cbo_query73.q.out | 2 +- .../perf/tpcds30tb/tez/cbo_query78.q.out | 2 +- .../perf/tpcds30tb/tez/cbo_query81.q.out | 2 +- .../perf/tpcds30tb/tez/query11.q.out | 4 +- .../clientpositive/perf/tpcds30tb/tez/query4.q.out | 6 +-- 21 files changed, 52 insertions(+), 73 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java index 780481f2fd5..6ecf94b5f63 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java @@ -20,8 +20,7 @@ import java.util.Comparator; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; -import org.apache.calcite.plan.RelOptRule; -import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelHomogeneousShuttle; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.metadata.RelMetadataQuery; @@ -42,49 +41,34 @@ import org.slf4j.LoggerFactory; /** - * Rule that sorts conditions in a filter predicate to accelerate query processing + * Sorts conditions in a filter predicate to accelerate query processing * based on selectivity and compute cost. Currently it is not applied recursively, * i.e., it is only applied to top predicates in the condition. */ -public class HiveFilterSortPredicates extends RelOptRule { +public class HiveFilterSortPredicates extends RelHomogeneousShuttle { private static final Logger LOG = LoggerFactory.getLogger(HiveFilterSortPredicates.class); private final AtomicInteger noColsMissingStats; public HiveFilterSortPredicates(AtomicInteger noColsMissingStats) { -super( -operand(Filter.class, -operand(RelNode.class, any(; this.noColsMissingStats = noColsMissingStats; } @Override - public boolean matches(RelOptRuleCall call) { -final Filter filter = call.rel(0); - -HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class); - -// If this operator has been visited already by the rule, -// we do not need to apply the optimization -if (registry != null && registry.getVisited(this).contains(filter)) { - return false; + public RelNode visit(RelNode other) { +RelNode visitedNode = super.visit(other); +if (visitedNode instanceof Filter) { + return rewriteFilter((Filter) visitedNode); } -return
[hive] branch master updated: HIVE-26301: Fix ACID tables bootstrap during reverse replication in unplanned failover (Haymant Mangla reviewed by Peter Vary) (#3352)
This is an automated email from the ASF dual-hosted git repository. pvary pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new fe0f1a648b1 HIVE-26301: Fix ACID tables bootstrap during reverse replication in unplanned failover (Haymant Mangla reviewed by Peter Vary) (#3352) fe0f1a648b1 is described below commit fe0f1a648b14cdf27edcf7a5d323cbd060104ebf Author: Haymant Mangla <79496857+hmangl...@users.noreply.github.com> AuthorDate: Fri Jun 10 16:06:58 2022 +0530 HIVE-26301: Fix ACID tables bootstrap during reverse replication in unplanned failover (Haymant Mangla reviewed by Peter Vary) (#3352) --- .../parse/TestReplicationOptimisedBootstrap.java | 360 - .../TestReplicationScenariosExclusiveReplica.java | 292 - .../hadoop/hive/ql/exec/repl/ReplDumpTask.java | 5 +- 3 files changed, 349 insertions(+), 308 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java index 5bd6ac3d362..673e41b3065 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java @@ -23,14 +23,11 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.QuotaUsage; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore; -import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId; -import org.apache.hadoop.hive.metastore.api.NotificationEvent; -import org.apache.hadoop.hive.metastore.api.NotificationEventResponse; +import org.apache.hadoop.hive.metastore.api.AbortTxnsRequest; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; -import org.apache.hadoop.hive.metastore.messaging.event.filters.DatabaseAndTableFilter; import org.apache.hadoop.hive.metastore.messaging.json.gzip.GzipJSONMessageEncoder; +import org.apache.hadoop.hive.metastore.txn.TxnStore; +import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.security.UserGroupInformation; @@ -71,7 +68,7 @@ import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -public class TestReplicationOptimisedBootstrap extends BaseReplicationAcrossInstances { +public class TestReplicationOptimisedBootstrap extends BaseReplicationScenariosAcidTables { String extraPrimaryDb; @@ -84,8 +81,9 @@ public class TestReplicationOptimisedBootstrap extends BaseReplicationAcrossInst overrides.put(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES.varname, "true"); overrides.put(HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER.varname, UserGroupInformation.getCurrentUser().getUserName()); overrides.put(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "true"); - -internalBeforeClassSetupExclusiveReplica(overrides, overrides, TestReplicationOptimisedBootstrap.class); +overrides.put("hive.repl.bootstrap.dump.open.txn.timeout", "1s"); +overrides.put("hive.in.repl.test", "true"); +internalBeforeClassSetup(overrides, TestReplicationOptimisedBootstrap.class); } @Before @@ -112,7 +110,8 @@ public class TestReplicationOptimisedBootstrap extends BaseReplicationAcrossInst .run("create external table t2 (place string) partitioned by (country string)") .run("insert into table t2 partition(country='india') values ('chennai')") .run("insert into table t2 partition(country='us') values ('new york')") -.run("create table t1_managed (id int)") +.run("create table t1_managed (id int) clustered by(id) into 3 buckets stored as orc " + +"tblproperties (\"transactional\"=\"true\")") .run("insert into table t1_managed values (10)") .run("insert into table t1_managed values (20),(31),(42)") .run("create table t2_managed (place string) partitioned by (country string)") @@ -125,14 +124,8 @@ public class TestReplicationOptimisedBootstrap extends BaseReplicationAcrossInst .run("repl status " + replicatedDbName) .verifyResult(tuple.lastReplicationId) .run("use " + replicatedDbName) -.run("show tables like 't1'") -.verifyResult("t1") -.run("show tables like 't2'") -.verifyResult("t2") -.run("show tables like 't1_managed'") -.verifyResult("t1_managed") -.run("show tables like 't2_managed'