This is an automated email from the ASF dual-hosted git repository.

mahesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 9a06923  HIVE-20967 : Handle alter events when replicate to cluster 
with hive.strict.managed.tables enabled. (Ashutosh Bapat reviewed by Mahesh 
Kumar Behera)
9a06923 is described below

commit 9a0692330104d610e8102f509f061a2cc8498e18
Author: Ashutosh Bapat <aba...@cloudera.com>
AuthorDate: Thu May 9 11:09:11 2019 +0530

    HIVE-20967 : Handle alter events when replicate to cluster with 
hive.strict.managed.tables enabled. (Ashutosh Bapat reviewed by Mahesh Kumar 
Behera)
    
    Signed-off-by: Mahesh Kumar Behera <mah...@apache.org>
---
 .../parse/BaseReplicationScenariosAcidTables.java  |  6 +-
 .../TestReplicationScenariosExternalTables.java    | 17 +++++
 .../parse/TestReplicationWithTableMigrationEx.java | 42 +++++++++++
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java    |  4 +-
 .../clientnegative/strict_managed_tables6.q        | 10 +++
 .../clientnegative/strict_managed_tables6.q.out    | 30 ++++++++
 .../apache/hadoop/hive/metastore/HiveMetaHook.java |  2 +
 .../hadoop/hive/metastore/HiveAlterHandler.java    | 85 +++++++++++++++++++---
 8 files changed, 179 insertions(+), 17 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java
index bfc4abd..e543695 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java
@@ -140,10 +140,8 @@ public class BaseReplicationScenariosAcidTables {
                     "\"transactional_properties\"=\"insert_only\")")
             .run("insert into t3 values(11)")
             .run("insert into t3 values(22)")
-            .run("create table t5 (id int) stored as orc ")
-            .run("insert into t5 values(1111), (2222)")
-            .run("alter table t5 set tblproperties 
(\"transactional\"=\"true\")")
-            .run("insert into t5 values(3333)");
+            .run("create table t5 (id int) stored as orc tblproperties 
(\"transactional\"=\"true\")")
+            .run("insert into t5 values(1111), (2222), (3333)");
     acidTableNames.add("t1");
     acidTableNames.add("t2");
     acidTableNames.add("t3");
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
index 0b7b82e..74854db 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
@@ -364,6 +364,23 @@ public class TestReplicationScenariosExternalTables 
extends BaseReplicationAcros
         .run("use " + replicatedDbName)
         .run("select place from t2 where country='france'")
         .verifyResults(new String[] {});
+
+    // Changing location of one of the partitions shouldn't result in changing 
location of other
+    // partitions as well as that of the table.
+    assertTablePartitionLocation(primaryDbName + ".t2", replicatedDbName + 
".t2");
+
+    // Changing location of the external table, should result in changes to 
the location of
+    // partition residing within the table location and not the partitions 
located outside.
+    String tmpLocation2 = "/tmp/" + System.nanoTime() + "_2";
+    primary.miniDFSCluster.getFileSystem().mkdirs(new Path(tmpLocation2), new 
FsPermission("777"));
+
+    tuple = primary.run("use " + primaryDbName)
+            .run("insert into table t2 partition(country='france') values 
('lyon')")
+            .run("alter table t2 set location '" + tmpLocation2 + "'")
+            .dump(primaryDbName, tuple.lastReplicationId);
+
+    replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause);
+    assertTablePartitionLocation(primaryDbName + ".t2", replicatedDbName + 
".t2");
   }
 
   @Test
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigrationEx.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigrationEx.java
index 5f59a2a..7619718 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigrationEx.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigrationEx.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hive.ql.parse;
 
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -415,4 +417,44 @@ public class TestReplicationWithTableMigrationEx {
     replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation);
     verifyUserName("hive");
   }
+
+  @Test
+  public void dynamicallyConvertNonAcidToAcidTable() throws Throwable {
+    // Non-acid table converted to an ACID table should be prohibited on 
source cluster with
+    // strict managed false.
+    primary.run("use " + primaryDbName)
+            .run("create table t1 (id int) stored as orc")
+            .run("insert into table t1 values (1)")
+            .run("create table t2 (place string) partitioned by (country 
string) stored as orc")
+            .run("insert into table t2 partition(country='india') values 
('bangalore')")
+            .runFailure("alter table t1 set 
tblproperties('transactional'='true')")
+            .runFailure("alter table t2 set 
tblproperties('transactional'='true')")
+            .runFailure("alter table t1 set 
tblproperties('transactional'='true', " +
+                    "'transactional_properties'='insert_only')")
+            .runFailure("alter table t2 set 
tblproperties('transactional'='true', " +
+                    "'transactional_properties'='insert_only')");
+
+  }
+
+  @Test
+  public void prohibitManagedTableLocationChangeOnReplSource() throws 
Throwable {
+    String tmpLocation = "/tmp/" + System.nanoTime();
+    primary.miniDFSCluster.getFileSystem().mkdirs(new Path(tmpLocation), new 
FsPermission("777"));
+
+    // For managed tables at source, the table location shouldn't be changed 
for the given
+    // non-partitioned table and partition location shouldn't be changed for 
partitioned table as
+    // alter event doesn't capture the new files list. So, it may cause data 
inconsistsency. So,
+    // if database is enabled for replication at source, then alter location 
on managed tables
+    // should be blocked.
+    primary.run("use " + primaryDbName)
+            .run("create table t1 (id int) clustered by(id) into 3 buckets 
stored as orc ")
+            .run("insert into t1 values(1)")
+            .run("create table t2 (place string) partitioned by (country 
string) stored as orc")
+            .run("insert into table t2 partition(country='india') values 
('bangalore')")
+            .runFailure("alter table t1 set location '" + tmpLocation + "'")
+            .runFailure("alter table t2 partition(country='india') set 
location '" + tmpLocation + "'")
+            .runFailure("alter table t2 set location '" + tmpLocation + "'");
+
+    primary.miniDFSCluster.getFileSystem().delete(new Path(tmpLocation), true);
+  }
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index 3d4ba01..529b9c5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -1641,8 +1641,8 @@ public class DDLTask extends Task<DDLWork> implements 
Serializable {
     return (part == null ? tbl.getTTable().getSd() : 
part.getTPartition().getSd());
   }
 
-  private List<Task<?>> alterTableOrSinglePartition(
-      AlterTableDesc alterTbl, Table tbl, Partition part) throws HiveException 
{
+  private List<Task<?>> alterTableOrSinglePartition(AlterTableDesc alterTbl, 
Table tbl,
+                                                    Partition part) throws 
HiveException {
     EnvironmentContext environmentContext = alterTbl.getEnvironmentContext();
     if (environmentContext == null) {
       environmentContext = new EnvironmentContext();
diff --git a/ql/src/test/queries/clientnegative/strict_managed_tables6.q 
b/ql/src/test/queries/clientnegative/strict_managed_tables6.q
new file mode 100644
index 0000000..f88a080
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/strict_managed_tables6.q
@@ -0,0 +1,10 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.strict.managed.tables=true;
+
+drop database if exists smt6;
+create database smt6 with dbproperties ('repl.source.for'='1,2,3');
+use smt6;
+create table strict_managed_tables1_tab1 (c1 string, c2 string) stored as orc 
tblproperties ('transactional'='true');
+-- changing location of a managed table in a database which is source of 
replication is not allowed
+alter table strict_managed_tables1_tab1 set location '/tmp/new_smt1_tab';
diff --git a/ql/src/test/results/clientnegative/strict_managed_tables6.q.out 
b/ql/src/test/results/clientnegative/strict_managed_tables6.q.out
new file mode 100644
index 0000000..b6d23d1
--- /dev/null
+++ b/ql/src/test/results/clientnegative/strict_managed_tables6.q.out
@@ -0,0 +1,30 @@
+PREHOOK: query: drop database if exists smt6
+PREHOOK: type: DROPDATABASE
+POSTHOOK: query: drop database if exists smt6
+POSTHOOK: type: DROPDATABASE
+PREHOOK: query: create database smt6 with dbproperties 
('repl.source.for'='1,2,3')
+PREHOOK: type: CREATEDATABASE
+PREHOOK: Output: database:smt6
+POSTHOOK: query: create database smt6 with dbproperties 
('repl.source.for'='1,2,3')
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:smt6
+PREHOOK: query: use smt6
+PREHOOK: type: SWITCHDATABASE
+PREHOOK: Input: database:smt6
+POSTHOOK: query: use smt6
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:smt6
+PREHOOK: query: create table strict_managed_tables1_tab1 (c1 string, c2 
string) stored as orc tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:smt6
+PREHOOK: Output: smt6@strict_managed_tables1_tab1
+POSTHOOK: query: create table strict_managed_tables1_tab1 (c1 string, c2 
string) stored as orc tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:smt6
+POSTHOOK: Output: smt6@strict_managed_tables1_tab1
+#### A masked pattern was here ####
+PREHOOK: type: ALTERTABLE_LOCATION
+PREHOOK: Input: smt6@strict_managed_tables1_tab1
+#### A masked pattern was here ####
+PREHOOK: Output: smt6@strict_managed_tables1_tab1
+FAILED: Execution Error, return code 1 from 
org.apache.hadoop.hive.ql.exec.DDLTask. Unable to alter table. Cannot change 
location of a managed table hive.smt6.strict_managed_tables1_tab1 as it is 
enabled for replication.
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java
index 76c97d5..5ef356d 100644
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java
+++ 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java
@@ -44,7 +44,9 @@ public interface HiveMetaHook {
 
   public String ALTER_TABLE_OPERATION_TYPE = "alterTableOpType";
 
+  // These should remain in sync with AlterTableDesc::AlterTableType enum
   public List<String> allowedAlterTypes = ImmutableList.of("ADDPROPS", 
"DROPPROPS");
+  String ALTERLOCATION = "ALTERLOCATION";
 
   /**
    * Called before a new table definition is added to the metastore
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
index d4aaa5c..03f136b 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
@@ -63,6 +63,8 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 
+import static org.apache.hadoop.hive.metastore.HiveMetaHook.ALTERLOCATION;
+import static 
org.apache.hadoop.hive.metastore.HiveMetaHook.ALTER_TABLE_OPERATION_TYPE;
 import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME;
 import static 
org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog;
 import static 
org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier;
@@ -169,7 +171,7 @@ public class HiveAlterHandler implements AlterHandler {
             TableName.getQualified(catName, dbname, name) + " doesn't exist");
       }
 
-      checkTableTypeConversion(olddb, oldt, newt);
+      validateTableChangesOnReplSource(olddb, oldt, newt, environmentContext);
 
       if (oldt.getPartitionKeysSize() != 0) {
         isPartitionedTable = true;
@@ -745,6 +747,10 @@ public class HiveAlterHandler implements AlterHandler {
         throw new InvalidObjectException(
             "Unable to alter partitions because table or database does not 
exist.");
       }
+
+      blockPartitionLocationChangesOnReplSource(msdb.getDatabase(catName, 
dbname), tbl,
+                                                environmentContext);
+
       for (Partition tmpPart: new_parts) {
         // Set DDL time to now if not specified
         if (tmpPart.getParameters() == null ||
@@ -805,22 +811,79 @@ public class HiveAlterHandler implements AlterHandler {
     return oldParts;
   }
 
-  private void checkTableTypeConversion(Database db, Table oldTbl, Table 
newTbl)
+  // Validate changes to partition's location to protect against errors on 
migration during
+  // replication
+  private void blockPartitionLocationChangesOnReplSource(Database db, Table 
tbl,
+                                                         EnvironmentContext ec)
+          throws InvalidOperationException {
+    // If the database is not replication source, nothing to do
+    if (!ReplChangeManager.isSourceOfReplication(db)) {
+      return;
+    }
+
+    // Do not allow changing location of a managed table as alter event 
doesn't capture the
+    // new files list. So, it may cause data inconsistency.
+    if (ec.isSetProperties()) {
+      String alterType = ec.getProperties().get(ALTER_TABLE_OPERATION_TYPE);
+      if (alterType != null && alterType.equalsIgnoreCase(ALTERLOCATION) &&
+              
tbl.getTableType().equalsIgnoreCase(TableType.MANAGED_TABLE.name())) {
+        throw new InvalidOperationException("Cannot change location of a 
managed table " +
+                TableName.getQualified(tbl.getCatName(),
+                        tbl.getDbName(), tbl.getTableName()) + " as it is 
enabled for replication.");
+      }
+    }
+  }
+
+  // Validate changes to a table to protect against errors on migration during 
replication.
+  private void validateTableChangesOnReplSource(Database db, Table oldTbl, 
Table newTbl,
+                                                EnvironmentContext ec)
           throws InvalidOperationException {
-    // If the given DB is enabled for replication and strict managed is false, 
then table type cannot be changed.
-    // This is to avoid migration scenarios which causes Managed ACID table to 
be converted to external at replica.
-    // As ACID tables cannot be converted to external table and vice versa, we 
need to restrict this conversion at
-    // primary as well.
-    // Currently, table type conversion is allowed only between managed and 
external table types.
-    // But, to be future proof, any table type conversion is restricted on a 
replication enabled DB.
-    if 
(!conf.getBoolean(MetastoreConf.ConfVars.STRICT_MANAGED_TABLES.getHiveName(), 
false)
-        && !oldTbl.getTableType().equalsIgnoreCase(newTbl.getTableType())
-        && ReplChangeManager.isSourceOfReplication(db)) {
+    // If the database is not replication source, nothing to do
+    if (!ReplChangeManager.isSourceOfReplication(db)) {
+      return;
+    }
+
+    // Do not allow changing location of a managed table as alter event 
doesn't capture the
+    // new files list. So, it may cause data inconsistency. We do this whether 
or not strict
+    // managed is true on the source cluster.
+    if (ec.isSetProperties()) {
+        String alterType = ec.getProperties().get(ALTER_TABLE_OPERATION_TYPE);
+        if (alterType != null && alterType.equalsIgnoreCase(ALTERLOCATION) &&
+            
oldTbl.getTableType().equalsIgnoreCase(TableType.MANAGED_TABLE.name())) {
+          throw new InvalidOperationException("Cannot change location of a 
managed table " +
+                  TableName.getQualified(oldTbl.getCatName(),
+                          oldTbl.getDbName(), oldTbl.getTableName()) + " as it 
is enabled for replication.");
+        }
+    }
+
+    // Rest of the changes need validation only when strict managed tables is 
false. That's
+    // when there's scope for migration during replication, at least for now.
+    if 
(conf.getBoolean(MetastoreConf.ConfVars.STRICT_MANAGED_TABLES.getHiveName(), 
false)) {
+      return;
+    }
+
+    // Do not allow changing the type of table. This is to avoid migration 
scenarios which causes
+    // Managed ACID table to be converted to external at replica. As ACID 
tables cannot be
+    // converted to external table and vice versa, we need to restrict this 
conversion at primary
+    // as well. Currently, table type conversion is allowed only between 
managed and external
+    // table types. But, to be future proof, any table type conversion is 
restricted on a
+    // replication enabled DB.
+    if (!oldTbl.getTableType().equalsIgnoreCase(newTbl.getTableType())) {
       throw new InvalidOperationException("Table type cannot be changed from " 
+ oldTbl.getTableType()
               + " to " + newTbl.getTableType() + " for the table " +
               TableName.getQualified(oldTbl.getCatName(), oldTbl.getDbName(), 
oldTbl.getTableName())
               + " as it is enabled for replication.");
     }
+
+    // Also we do not allow changing a non-Acid managed table to acid table on 
source with strict
+    // managed false. After replicating a non-acid managed table to a target 
with strict managed
+    // true the table will be converted to acid or external table. So changing 
the transactional
+    // property of table on source can conflict with resultant change in the 
target.
+    if (!TxnUtils.isTransactionalTable(oldTbl) && 
TxnUtils.isTransactionalTable(newTbl)) {
+      throw new InvalidOperationException("A non-Acid table cannot be 
converted to an Acid " +
+              "table for the table " + 
TableName.getQualified(oldTbl.getCatName(),
+              oldTbl.getDbName(), oldTbl.getTableName()) + " as it is enabled 
for replication.");
+    }
   }
 
   private boolean checkPartialPartKeysEqual(List<FieldSchema> oldPartKeys,

Reply via email to