This is an automated email from the ASF dual-hosted git repository. mahesh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 9a06923 HIVE-20967 : Handle alter events when replicate to cluster with hive.strict.managed.tables enabled. (Ashutosh Bapat reviewed by Mahesh Kumar Behera) 9a06923 is described below commit 9a0692330104d610e8102f509f061a2cc8498e18 Author: Ashutosh Bapat <aba...@cloudera.com> AuthorDate: Thu May 9 11:09:11 2019 +0530 HIVE-20967 : Handle alter events when replicate to cluster with hive.strict.managed.tables enabled. (Ashutosh Bapat reviewed by Mahesh Kumar Behera) Signed-off-by: Mahesh Kumar Behera <mah...@apache.org> --- .../parse/BaseReplicationScenariosAcidTables.java | 6 +- .../TestReplicationScenariosExternalTables.java | 17 +++++ .../parse/TestReplicationWithTableMigrationEx.java | 42 +++++++++++ .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 4 +- .../clientnegative/strict_managed_tables6.q | 10 +++ .../clientnegative/strict_managed_tables6.q.out | 30 ++++++++ .../apache/hadoop/hive/metastore/HiveMetaHook.java | 2 + .../hadoop/hive/metastore/HiveAlterHandler.java | 85 +++++++++++++++++++--- 8 files changed, 179 insertions(+), 17 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java index bfc4abd..e543695 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java @@ -140,10 +140,8 @@ public class BaseReplicationScenariosAcidTables { "\"transactional_properties\"=\"insert_only\")") .run("insert into t3 values(11)") .run("insert into t3 values(22)") - .run("create table t5 (id int) stored as orc ") - .run("insert into t5 values(1111), (2222)") - .run("alter table t5 set tblproperties (\"transactional\"=\"true\")") - .run("insert into t5 values(3333)"); + .run("create table t5 (id int) stored as orc tblproperties (\"transactional\"=\"true\")") + .run("insert into t5 values(1111), (2222), (3333)"); acidTableNames.add("t1"); acidTableNames.add("t2"); acidTableNames.add("t3"); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java index 0b7b82e..74854db 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java @@ -364,6 +364,23 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros .run("use " + replicatedDbName) .run("select place from t2 where country='france'") .verifyResults(new String[] {}); + + // Changing location of one of the partitions shouldn't result in changing location of other + // partitions as well as that of the table. + assertTablePartitionLocation(primaryDbName + ".t2", replicatedDbName + ".t2"); + + // Changing location of the external table, should result in changes to the location of + // partition residing within the table location and not the partitions located outside. + String tmpLocation2 = "/tmp/" + System.nanoTime() + "_2"; + primary.miniDFSCluster.getFileSystem().mkdirs(new Path(tmpLocation2), new FsPermission("777")); + + tuple = primary.run("use " + primaryDbName) + .run("insert into table t2 partition(country='france') values ('lyon')") + .run("alter table t2 set location '" + tmpLocation2 + "'") + .dump(primaryDbName, tuple.lastReplicationId); + + replica.load(replicatedDbName, tuple.dumpLocation, loadWithClause); + assertTablePartitionLocation(primaryDbName + ".t2", replicatedDbName + ".t2"); } @Test diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigrationEx.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigrationEx.java index 5f59a2a..7619718 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigrationEx.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationWithTableMigrationEx.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.parse; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hive.conf.HiveConf; @@ -415,4 +417,44 @@ public class TestReplicationWithTableMigrationEx { replica.loadWithoutExplain(replicatedDbName, tuple.dumpLocation); verifyUserName("hive"); } + + @Test + public void dynamicallyConvertNonAcidToAcidTable() throws Throwable { + // Non-acid table converted to an ACID table should be prohibited on source cluster with + // strict managed false. + primary.run("use " + primaryDbName) + .run("create table t1 (id int) stored as orc") + .run("insert into table t1 values (1)") + .run("create table t2 (place string) partitioned by (country string) stored as orc") + .run("insert into table t2 partition(country='india') values ('bangalore')") + .runFailure("alter table t1 set tblproperties('transactional'='true')") + .runFailure("alter table t2 set tblproperties('transactional'='true')") + .runFailure("alter table t1 set tblproperties('transactional'='true', " + + "'transactional_properties'='insert_only')") + .runFailure("alter table t2 set tblproperties('transactional'='true', " + + "'transactional_properties'='insert_only')"); + + } + + @Test + public void prohibitManagedTableLocationChangeOnReplSource() throws Throwable { + String tmpLocation = "/tmp/" + System.nanoTime(); + primary.miniDFSCluster.getFileSystem().mkdirs(new Path(tmpLocation), new FsPermission("777")); + + // For managed tables at source, the table location shouldn't be changed for the given + // non-partitioned table and partition location shouldn't be changed for partitioned table as + // alter event doesn't capture the new files list. So, it may cause data inconsistsency. So, + // if database is enabled for replication at source, then alter location on managed tables + // should be blocked. + primary.run("use " + primaryDbName) + .run("create table t1 (id int) clustered by(id) into 3 buckets stored as orc ") + .run("insert into t1 values(1)") + .run("create table t2 (place string) partitioned by (country string) stored as orc") + .run("insert into table t2 partition(country='india') values ('bangalore')") + .runFailure("alter table t1 set location '" + tmpLocation + "'") + .runFailure("alter table t2 partition(country='india') set location '" + tmpLocation + "'") + .runFailure("alter table t2 set location '" + tmpLocation + "'"); + + primary.miniDFSCluster.getFileSystem().delete(new Path(tmpLocation), true); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 3d4ba01..529b9c5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -1641,8 +1641,8 @@ public class DDLTask extends Task<DDLWork> implements Serializable { return (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd()); } - private List<Task<?>> alterTableOrSinglePartition( - AlterTableDesc alterTbl, Table tbl, Partition part) throws HiveException { + private List<Task<?>> alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, + Partition part) throws HiveException { EnvironmentContext environmentContext = alterTbl.getEnvironmentContext(); if (environmentContext == null) { environmentContext = new EnvironmentContext(); diff --git a/ql/src/test/queries/clientnegative/strict_managed_tables6.q b/ql/src/test/queries/clientnegative/strict_managed_tables6.q new file mode 100644 index 0000000..f88a080 --- /dev/null +++ b/ql/src/test/queries/clientnegative/strict_managed_tables6.q @@ -0,0 +1,10 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.managed.tables=true; + +drop database if exists smt6; +create database smt6 with dbproperties ('repl.source.for'='1,2,3'); +use smt6; +create table strict_managed_tables1_tab1 (c1 string, c2 string) stored as orc tblproperties ('transactional'='true'); +-- changing location of a managed table in a database which is source of replication is not allowed +alter table strict_managed_tables1_tab1 set location '/tmp/new_smt1_tab'; diff --git a/ql/src/test/results/clientnegative/strict_managed_tables6.q.out b/ql/src/test/results/clientnegative/strict_managed_tables6.q.out new file mode 100644 index 0000000..b6d23d1 --- /dev/null +++ b/ql/src/test/results/clientnegative/strict_managed_tables6.q.out @@ -0,0 +1,30 @@ +PREHOOK: query: drop database if exists smt6 +PREHOOK: type: DROPDATABASE +POSTHOOK: query: drop database if exists smt6 +POSTHOOK: type: DROPDATABASE +PREHOOK: query: create database smt6 with dbproperties ('repl.source.for'='1,2,3') +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:smt6 +POSTHOOK: query: create database smt6 with dbproperties ('repl.source.for'='1,2,3') +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:smt6 +PREHOOK: query: use smt6 +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:smt6 +POSTHOOK: query: use smt6 +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:smt6 +PREHOOK: query: create table strict_managed_tables1_tab1 (c1 string, c2 string) stored as orc tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:smt6 +PREHOOK: Output: smt6@strict_managed_tables1_tab1 +POSTHOOK: query: create table strict_managed_tables1_tab1 (c1 string, c2 string) stored as orc tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:smt6 +POSTHOOK: Output: smt6@strict_managed_tables1_tab1 +#### A masked pattern was here #### +PREHOOK: type: ALTERTABLE_LOCATION +PREHOOK: Input: smt6@strict_managed_tables1_tab1 +#### A masked pattern was here #### +PREHOOK: Output: smt6@strict_managed_tables1_tab1 +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Unable to alter table. Cannot change location of a managed table hive.smt6.strict_managed_tables1_tab1 as it is enabled for replication. diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java index 76c97d5..5ef356d 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java @@ -44,7 +44,9 @@ public interface HiveMetaHook { public String ALTER_TABLE_OPERATION_TYPE = "alterTableOpType"; + // These should remain in sync with AlterTableDesc::AlterTableType enum public List<String> allowedAlterTypes = ImmutableList.of("ADDPROPS", "DROPPROPS"); + String ALTERLOCATION = "ALTERLOCATION"; /** * Called before a new table definition is added to the metastore diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index d4aaa5c..03f136b 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -63,6 +63,8 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import static org.apache.hadoop.hive.metastore.HiveMetaHook.ALTERLOCATION; +import static org.apache.hadoop.hive.metastore.HiveMetaHook.ALTER_TABLE_OPERATION_TYPE; import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME; import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog; import static org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier; @@ -169,7 +171,7 @@ public class HiveAlterHandler implements AlterHandler { TableName.getQualified(catName, dbname, name) + " doesn't exist"); } - checkTableTypeConversion(olddb, oldt, newt); + validateTableChangesOnReplSource(olddb, oldt, newt, environmentContext); if (oldt.getPartitionKeysSize() != 0) { isPartitionedTable = true; @@ -745,6 +747,10 @@ public class HiveAlterHandler implements AlterHandler { throw new InvalidObjectException( "Unable to alter partitions because table or database does not exist."); } + + blockPartitionLocationChangesOnReplSource(msdb.getDatabase(catName, dbname), tbl, + environmentContext); + for (Partition tmpPart: new_parts) { // Set DDL time to now if not specified if (tmpPart.getParameters() == null || @@ -805,22 +811,79 @@ public class HiveAlterHandler implements AlterHandler { return oldParts; } - private void checkTableTypeConversion(Database db, Table oldTbl, Table newTbl) + // Validate changes to partition's location to protect against errors on migration during + // replication + private void blockPartitionLocationChangesOnReplSource(Database db, Table tbl, + EnvironmentContext ec) + throws InvalidOperationException { + // If the database is not replication source, nothing to do + if (!ReplChangeManager.isSourceOfReplication(db)) { + return; + } + + // Do not allow changing location of a managed table as alter event doesn't capture the + // new files list. So, it may cause data inconsistency. + if (ec.isSetProperties()) { + String alterType = ec.getProperties().get(ALTER_TABLE_OPERATION_TYPE); + if (alterType != null && alterType.equalsIgnoreCase(ALTERLOCATION) && + tbl.getTableType().equalsIgnoreCase(TableType.MANAGED_TABLE.name())) { + throw new InvalidOperationException("Cannot change location of a managed table " + + TableName.getQualified(tbl.getCatName(), + tbl.getDbName(), tbl.getTableName()) + " as it is enabled for replication."); + } + } + } + + // Validate changes to a table to protect against errors on migration during replication. + private void validateTableChangesOnReplSource(Database db, Table oldTbl, Table newTbl, + EnvironmentContext ec) throws InvalidOperationException { - // If the given DB is enabled for replication and strict managed is false, then table type cannot be changed. - // This is to avoid migration scenarios which causes Managed ACID table to be converted to external at replica. - // As ACID tables cannot be converted to external table and vice versa, we need to restrict this conversion at - // primary as well. - // Currently, table type conversion is allowed only between managed and external table types. - // But, to be future proof, any table type conversion is restricted on a replication enabled DB. - if (!conf.getBoolean(MetastoreConf.ConfVars.STRICT_MANAGED_TABLES.getHiveName(), false) - && !oldTbl.getTableType().equalsIgnoreCase(newTbl.getTableType()) - && ReplChangeManager.isSourceOfReplication(db)) { + // If the database is not replication source, nothing to do + if (!ReplChangeManager.isSourceOfReplication(db)) { + return; + } + + // Do not allow changing location of a managed table as alter event doesn't capture the + // new files list. So, it may cause data inconsistency. We do this whether or not strict + // managed is true on the source cluster. + if (ec.isSetProperties()) { + String alterType = ec.getProperties().get(ALTER_TABLE_OPERATION_TYPE); + if (alterType != null && alterType.equalsIgnoreCase(ALTERLOCATION) && + oldTbl.getTableType().equalsIgnoreCase(TableType.MANAGED_TABLE.name())) { + throw new InvalidOperationException("Cannot change location of a managed table " + + TableName.getQualified(oldTbl.getCatName(), + oldTbl.getDbName(), oldTbl.getTableName()) + " as it is enabled for replication."); + } + } + + // Rest of the changes need validation only when strict managed tables is false. That's + // when there's scope for migration during replication, at least for now. + if (conf.getBoolean(MetastoreConf.ConfVars.STRICT_MANAGED_TABLES.getHiveName(), false)) { + return; + } + + // Do not allow changing the type of table. This is to avoid migration scenarios which causes + // Managed ACID table to be converted to external at replica. As ACID tables cannot be + // converted to external table and vice versa, we need to restrict this conversion at primary + // as well. Currently, table type conversion is allowed only between managed and external + // table types. But, to be future proof, any table type conversion is restricted on a + // replication enabled DB. + if (!oldTbl.getTableType().equalsIgnoreCase(newTbl.getTableType())) { throw new InvalidOperationException("Table type cannot be changed from " + oldTbl.getTableType() + " to " + newTbl.getTableType() + " for the table " + TableName.getQualified(oldTbl.getCatName(), oldTbl.getDbName(), oldTbl.getTableName()) + " as it is enabled for replication."); } + + // Also we do not allow changing a non-Acid managed table to acid table on source with strict + // managed false. After replicating a non-acid managed table to a target with strict managed + // true the table will be converted to acid or external table. So changing the transactional + // property of table on source can conflict with resultant change in the target. + if (!TxnUtils.isTransactionalTable(oldTbl) && TxnUtils.isTransactionalTable(newTbl)) { + throw new InvalidOperationException("A non-Acid table cannot be converted to an Acid " + + "table for the table " + TableName.getQualified(oldTbl.getCatName(), + oldTbl.getDbName(), oldTbl.getTableName()) + " as it is enabled for replication."); + } } private boolean checkPartialPartKeysEqual(List<FieldSchema> oldPartKeys,