[hive] branch master updated: HIVE-26238: Decouple sort filter predicates optimization from digest normalization in CBO (Stamatis Zampetakis, reviewed by Zoltan Haindrich)

2022-06-10 Thread zabetak
This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new f29cb2245c9 HIVE-26238: Decouple sort filter predicates optimization 
from digest normalization in CBO (Stamatis Zampetakis, reviewed by Zoltan 
Haindrich)
f29cb2245c9 is described below

commit f29cb2245c97102975ea0dd73783049eaa0947a0
Author: Stamatis Zampetakis 
AuthorDate: Tue May 17 15:20:06 2022 +0200

HIVE-26238: Decouple sort filter predicates optimization from digest 
normalization in CBO (Stamatis Zampetakis, reviewed by Zoltan Haindrich)

1. Decouple sort filter optimization from digest normalization by
refactoring HiveSortFilterPredicates into a (DFS) visitor. We cannot
use planner or rules cause they make use of digest. Performing this
optimization using a visitor slightly simplifies the code since there
is no need to have a registry since we are not going to visit the same
node twice.

2. Move the optimization after all post-join transformations to avoid
having other optimizations cancel the benefit of the sort filter
predicates.

Closes #3299
---
 .../calcite/rules/HiveFilterSortPredicates.java| 47 +++---
 .../hadoop/hive/ql/parse/CalcitePlanner.java   |  8 ++--
 .../clientpositive/llap/external_jdbc_table2.q.out |  2 +-
 .../perf/tpcds30tb/tez/cbo_ext_query1.q.out|  4 +-
 .../perf/tpcds30tb/tez/cbo_query1.q.out|  2 +-
 .../perf/tpcds30tb/tez/cbo_query11.q.out   |  8 ++--
 .../perf/tpcds30tb/tez/cbo_query31.q.out   |  2 +-
 .../perf/tpcds30tb/tez/cbo_query33.q.out   |  4 +-
 .../perf/tpcds30tb/tez/cbo_query34.q.out   |  2 +-
 .../perf/tpcds30tb/tez/cbo_query38.q.out   |  4 +-
 .../perf/tpcds30tb/tez/cbo_query4.q.out| 12 +++---
 .../perf/tpcds30tb/tez/cbo_query54.q.out   |  2 +-
 .../perf/tpcds30tb/tez/cbo_query56.q.out   |  4 +-
 .../perf/tpcds30tb/tez/cbo_query6.q.out|  2 +-
 .../perf/tpcds30tb/tez/cbo_query60.q.out   |  4 +-
 .../perf/tpcds30tb/tez/cbo_query65.q.out   |  2 +-
 .../perf/tpcds30tb/tez/cbo_query73.q.out   |  2 +-
 .../perf/tpcds30tb/tez/cbo_query78.q.out   |  2 +-
 .../perf/tpcds30tb/tez/cbo_query81.q.out   |  2 +-
 .../perf/tpcds30tb/tez/query11.q.out   |  4 +-
 .../clientpositive/perf/tpcds30tb/tez/query4.q.out |  6 +--
 21 files changed, 52 insertions(+), 73 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java
index 780481f2fd5..6ecf94b5f63 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterSortPredicates.java
@@ -20,8 +20,7 @@ import java.util.Comparator;
 import java.util.List;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
-import org.apache.calcite.plan.RelOptRule;
-import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelHomogeneousShuttle;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Filter;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
@@ -42,49 +41,34 @@ import org.slf4j.LoggerFactory;
 
 
 /**
- * Rule that sorts conditions in a filter predicate to accelerate query 
processing
+ * Sorts conditions in a filter predicate to accelerate query processing
  * based on selectivity and compute cost. Currently it is not applied 
recursively,
  * i.e., it is only applied to top predicates in the condition.
  */
-public class HiveFilterSortPredicates extends RelOptRule {
+public class HiveFilterSortPredicates extends RelHomogeneousShuttle {
 
   private static final Logger LOG = 
LoggerFactory.getLogger(HiveFilterSortPredicates.class);
 
   private final AtomicInteger noColsMissingStats;
 
   public HiveFilterSortPredicates(AtomicInteger noColsMissingStats) {
-super(
-operand(Filter.class,
-operand(RelNode.class, any(;
 this.noColsMissingStats = noColsMissingStats;
   }
 
   @Override
-  public boolean matches(RelOptRuleCall call) {
-final Filter filter = call.rel(0);
-
-HiveRulesRegistry registry = 
call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
-
-// If this operator has been visited already by the rule,
-// we do not need to apply the optimization
-if (registry != null && registry.getVisited(this).contains(filter)) {
-  return false;
+  public RelNode visit(RelNode other) {
+RelNode visitedNode = super.visit(other);
+if (visitedNode instanceof Filter) {
+  return rewriteFilter((Filter) visitedNode);
 }
 
-return

[hive] branch master updated: HIVE-26301: Fix ACID tables bootstrap during reverse replication in unplanned failover (Haymant Mangla reviewed by Peter Vary) (#3352)

2022-06-10 Thread pvary
This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new fe0f1a648b1 HIVE-26301: Fix ACID tables bootstrap during reverse 
replication in unplanned failover (Haymant Mangla reviewed by Peter Vary) 
(#3352)
fe0f1a648b1 is described below

commit fe0f1a648b14cdf27edcf7a5d323cbd060104ebf
Author: Haymant Mangla <79496857+hmangl...@users.noreply.github.com>
AuthorDate: Fri Jun 10 16:06:58 2022 +0530

HIVE-26301: Fix ACID tables bootstrap during reverse replication in 
unplanned failover (Haymant Mangla reviewed by Peter Vary) (#3352)
---
 .../parse/TestReplicationOptimisedBootstrap.java   | 360 -
 .../TestReplicationScenariosExclusiveReplica.java  | 292 -
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java |   5 +-
 3 files changed, 349 insertions(+), 308 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
index 5bd6ac3d362..673e41b3065 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOptimisedBootstrap.java
@@ -23,14 +23,11 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.QuotaUsage;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
-import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore;
-import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId;
-import org.apache.hadoop.hive.metastore.api.NotificationEvent;
-import org.apache.hadoop.hive.metastore.api.NotificationEventResponse;
+import org.apache.hadoop.hive.metastore.api.AbortTxnsRequest;
 import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
-import 
org.apache.hadoop.hive.metastore.messaging.event.filters.DatabaseAndTableFilter;
 import 
org.apache.hadoop.hive.metastore.messaging.json.gzip.GzipJSONMessageEncoder;
+import org.apache.hadoop.hive.metastore.txn.TxnStore;
+import org.apache.hadoop.hive.metastore.txn.TxnUtils;
 import org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -71,7 +68,7 @@ import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
-public class TestReplicationOptimisedBootstrap extends 
BaseReplicationAcrossInstances {
+public class TestReplicationOptimisedBootstrap extends 
BaseReplicationScenariosAcidTables {
 
   String extraPrimaryDb;
 
@@ -84,8 +81,9 @@ public class TestReplicationOptimisedBootstrap extends 
BaseReplicationAcrossInst
 overrides.put(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES.varname, 
"true");
 overrides.put(HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER.varname, 
UserGroupInformation.getCurrentUser().getUserName());
 
overrides.put(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, 
"true");
-
-internalBeforeClassSetupExclusiveReplica(overrides, overrides, 
TestReplicationOptimisedBootstrap.class);
+overrides.put("hive.repl.bootstrap.dump.open.txn.timeout", "1s");
+overrides.put("hive.in.repl.test", "true");
+internalBeforeClassSetup(overrides, 
TestReplicationOptimisedBootstrap.class);
   }
 
   @Before
@@ -112,7 +110,8 @@ public class TestReplicationOptimisedBootstrap extends 
BaseReplicationAcrossInst
 .run("create external table t2 (place string) partitioned by (country 
string)")
 .run("insert into table t2 partition(country='india') values 
('chennai')")
 .run("insert into table t2 partition(country='us') values ('new 
york')")
-.run("create table t1_managed (id int)")
+.run("create table t1_managed (id int) clustered by(id) into 3 buckets 
stored as orc " +
+"tblproperties (\"transactional\"=\"true\")")
 .run("insert into table t1_managed values (10)")
 .run("insert into table t1_managed values (20),(31),(42)")
 .run("create table t2_managed (place string) partitioned by (country 
string)")
@@ -125,14 +124,8 @@ public class TestReplicationOptimisedBootstrap extends 
BaseReplicationAcrossInst
 .run("repl status " + replicatedDbName)
 .verifyResult(tuple.lastReplicationId)
 .run("use " + replicatedDbName)
-.run("show tables like 't1'")
-.verifyResult("t1")
-.run("show tables like 't2'")
-.verifyResult("t2")
-.run("show tables like 't1_managed'")
-.verifyResult("t1_managed")
-.run("show tables like 't2_managed'