Repository: hive Updated Branches: refs/heads/branch-3 1ad644c60 -> 812ae7388
HIVE-18570 - ACID IOW implemented using base may delete too much data (Eugene Koifman, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/812ae738 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/812ae738 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/812ae738 Branch: refs/heads/branch-3 Commit: 812ae7388936d3a8861304a66eb1f448b126e408 Parents: 1ad644c Author: Eugene Koifman <ekoif...@apache.org> Authored: Fri May 4 13:21:00 2018 -0700 Committer: Eugene Koifman <ekoif...@apache.org> Committed: Fri May 4 13:21:00 2018 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 ++ .../hadoop/hive/ql/hooks/WriteEntity.java | 13 +++-- .../hadoop/hive/ql/lockmgr/DbTxnManager.java | 6 +- .../apache/hadoop/hive/ql/metadata/Table.java | 12 ++-- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +- .../hive/ql/lockmgr/TestDbTxnManager2.java | 58 +++++++++++++++++--- .../alter_rename_partition_failure3.q.out | 2 +- .../alter_table_add_partition.q.out | 2 +- .../clientnegative/alter_view_failure5.q.out | 2 +- .../clientnegative/alter_view_failure7.q.out | 2 +- .../test/results/clientnegative/analyze1.q.out | 2 +- .../clientnegative/archive_partspec1.q.out | 2 +- .../test/results/clientnegative/dyn_part1.q.out | 2 +- ql/src/test/results/clientnegative/touch2.q.out | 2 +- .../truncate_partition_column.q.out | 2 +- 15 files changed, 84 insertions(+), 29 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/812ae738/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9a0d8a9..8c6cb61 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2436,6 +2436,10 @@ public class HiveConf extends Configuration { "In nonstrict mode, for non-ACID resources, INSERT will only acquire shared lock, which\n" + "allows two concurrent writes to the same partition but still lets lock manager prevent\n" + "DROP TABLE etc. when the table is being written to"), + TXN_OVERWRITE_X_LOCK("hive.txn.xlock.iow", true, + "Ensures commands with OVERWRITE (such as INSERT OVERWRITE) acquire Exclusive locks for\b" + + "transactional tables. This ensures that inserts (w/o overwrite) running concurrently\n" + + "are not hidden by the INSERT OVERWRITE."), /** * @deprecated Use MetastoreConf.TXN_TIMEOUT */ http://git-wip-us.apache.org/repos/asf/hive/blob/812ae738/ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java index c465adf..f1cf113 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/WriteEntity.java @@ -63,7 +63,7 @@ public class WriteEntity extends Entity implements Serializable { public WriteEntity(Database database, WriteType type) { super(database, true); - writeType = type; + setWriteTypeInternal(type); } /** @@ -74,12 +74,12 @@ public class WriteEntity extends Entity implements Serializable { */ public WriteEntity(Table t, WriteType type) { super(t, true); - writeType = type; + setWriteTypeInternal(type); } public WriteEntity(Table t, WriteType type, boolean complete) { super(t, complete); - writeType = type; + setWriteTypeInternal(type); } /** @@ -104,12 +104,12 @@ public class WriteEntity extends Entity implements Serializable { */ public WriteEntity(Partition p, WriteType type) { super(p, true); - writeType = type; + setWriteTypeInternal(type); } public WriteEntity(DummyPartition p, WriteType type, boolean complete) { super(p, complete); - writeType = type; + setWriteTypeInternal(type); } /** @@ -161,6 +161,9 @@ public class WriteEntity extends Entity implements Serializable { * @param type new operation type */ public void setWriteType(WriteType type) { + setWriteTypeInternal(type); + } + private void setWriteTypeInternal(WriteType type) { writeType = type; } http://git-wip-us.apache.org/repos/asf/hive/blob/812ae738/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java index 94f6b00..2486321 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java @@ -510,7 +510,11 @@ public final class DbTxnManager extends HiveTxnManagerImpl { case INSERT_OVERWRITE: t = getTable(output); if (AcidUtils.isTransactionalTable(t)) { - compBuilder.setSemiShared(); + if(conf.getBoolVar(HiveConf.ConfVars.TXN_OVERWRITE_X_LOCK)) { + compBuilder.setExclusive(); + } else { + compBuilder.setSemiShared(); + } compBuilder.setOperationType(DataOperationType.UPDATE); } else { compBuilder.setExclusive(); http://git-wip-us.apache.org/repos/asf/hive/blob/812ae738/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index e11965e..879b422 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -369,14 +369,17 @@ public class Table implements Serializable { final public void validatePartColumnNames( Map<String, String> spec, boolean shouldBeFull) throws SemanticException { List<FieldSchema> partCols = tTable.getPartitionKeys(); + final String tableName = Warehouse.getQualifiedName(tTable); if (partCols == null || (partCols.size() == 0)) { if (spec != null) { - throw new ValidationFailureSemanticException("table is not partitioned but partition spec exists: " + spec); + throw new ValidationFailureSemanticException(tableName + + " table is not partitioned but partition spec exists: " + spec); } return; } else if (spec == null) { if (shouldBeFull) { - throw new ValidationFailureSemanticException("table is partitioned but partition spec is not specified"); + throw new ValidationFailureSemanticException(tableName + + " table is partitioned but partition spec is not specified"); } return; } @@ -390,10 +393,11 @@ public class Table implements Serializable { } } if (columnsFound < spec.size()) { - throw new ValidationFailureSemanticException("Partition spec " + spec + " contains non-partition columns"); + throw new ValidationFailureSemanticException(tableName + ": Partition spec " + spec + + " contains non-partition columns"); } if (shouldBeFull && (spec.size() != partCols.size())) { - throw new ValidationFailureSemanticException("partition spec " + spec + throw new ValidationFailureSemanticException(tableName + ": partition spec " + spec + " doesn't contain all (" + partCols.size() + ") partition columns"); } } http://git-wip-us.apache.org/repos/asf/hive/blob/812ae738/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 3f4c0ff..6eb67f0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -14383,7 +14383,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { if (isNonNativeTable) { return WriteEntity.WriteType.INSERT_OVERWRITE; } else { - return ((ltd.getLoadFileType() == LoadFileType.REPLACE_ALL) + return ((ltd.getLoadFileType() == LoadFileType.REPLACE_ALL || ltd.isInsertOverwrite()) ? WriteEntity.WriteType.INSERT_OVERWRITE : getWriteType(dest)); } } http://git-wip-us.apache.org/repos/asf/hive/blob/812ae738/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java index 4c7ec16..1d79242 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.metastore.api.LockType; import org.apache.hadoop.hive.metastore.api.ShowLocksRequest; import org.apache.hadoop.hive.metastore.api.ShowLocksResponse; import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.txn.AcidWriteSetService; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.metastore.txn.TxnUtils; @@ -197,10 +198,20 @@ public class TestDbTxnManager2 { } @Test public void insertOverwriteCreate() throws Exception { + insertOverwriteCreate(false); + } + @Test + public void insertOverwriteCreateAcid() throws Exception { + insertOverwriteCreate(true); + } + private void insertOverwriteCreate(boolean isTransactional) throws Exception { + if(isTransactional) { + MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID, true); + } dropTable(new String[] {"T2", "T3"}); CommandProcessorResponse cpr = driver.run("create table if not exists T2(a int)"); checkCmdOnDriver(cpr); - cpr = driver.run("create table if not exists T3(a int)"); + cpr = driver.run("create table T3(a int) stored as ORC"); checkCmdOnDriver(cpr); cpr = driver.compileAndRespond("insert overwrite table T3 select a from T2", true); checkCmdOnDriver(cpr); @@ -218,12 +229,24 @@ public class TestDbTxnManager2 { } @Test public void insertOverwritePartitionedCreate() throws Exception { - dropTable(new String[] {"T4"}); - CommandProcessorResponse cpr = driver.run("create table if not exists T4 (name string, gpa double) partitioned by (age int)"); + insertOverwritePartitionedCreate(true); + } + @Test + public void insertOverwritePartitionedCreateAcid() throws Exception { + insertOverwritePartitionedCreate(false); + } + private void insertOverwritePartitionedCreate(boolean isTransactional) throws Exception { + dropTable(new String[] {"T4", "T5"}); + if(isTransactional) { + MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID, true); + } + CommandProcessorResponse cpr = driver.run("create table T4" + + "(name string, gpa double) partitioned by (age int) stored as ORC"); checkCmdOnDriver(cpr); - cpr = driver.run("create table if not exists T5(name string, age int, gpa double)"); + cpr = driver.run("create table T5(name string, age int, gpa double)"); checkCmdOnDriver(cpr); - cpr = driver.compileAndRespond("INSERT OVERWRITE TABLE T4 PARTITION (age) SELECT name, age, gpa FROM T5", true); + cpr = driver.compileAndRespond("INSERT OVERWRITE TABLE T4 PARTITION (age) SELECT " + + "name, age, gpa FROM T5", true); checkCmdOnDriver(cpr); txnMgr.acquireLocks(driver.getPlan(), ctx, "Fifer"); List<ShowLocksResponseElement> locks = getLocks(); @@ -2180,9 +2203,9 @@ public class TestDbTxnManager2 { */ @Test public void testShowTablesLock() throws Exception { - dropTable(new String[] {"T, T2"}); + dropTable(new String[] {"T", "T2"}); CommandProcessorResponse cpr = driver.run( - "create table if not exists T (a int, b int)"); + "create table T (a int, b int)"); checkCmdOnDriver(cpr); long txnid1 = txnMgr.openTxn(ctx, "Fifer"); @@ -2211,7 +2234,7 @@ public class TestDbTxnManager2 { swapTxnManager(txnMgr); cpr = driver.run( - "create table if not exists T2 (a int, b int) partitioned by (p int) clustered by (a) " + + "create table T2 (a int, b int) partitioned by (p int) clustered by (a) " + "into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false')"); checkCmdOnDriver(cpr); @@ -2454,7 +2477,24 @@ public class TestDbTxnManager2 { Assert.assertEquals("Unexpected lock count", 1, locks.size()); checkLock(LockType.EXCLUSIVE, LockState.ACQUIRED, "default", "T", null, locks); } - + @Test + public void testLoadData() throws Exception { + dropTable(new String[] {"T2"}); + CommandProcessorResponse cpr = driver.run("create table T2(a int) " + + "stored as ORC TBLPROPERTIES ('transactional'='true')"); + checkCmdOnDriver(cpr); + checkCmdOnDriver(driver.run("insert into T2 values(1)")); + String exportLoc = exportFolder.newFolder("1").toString(); + checkCmdOnDriver(driver.run("export table T2 to '" + exportLoc + "/2'")); + cpr = driver.compileAndRespond( + "load data inpath '" + exportLoc + "/2/data' overwrite into table T2"); + checkCmdOnDriver(cpr); + txnMgr.acquireLocks(driver.getPlan(), ctx, "Fifer"); + List<ShowLocksResponseElement> locks = getLocks(); + Assert.assertEquals("Unexpected lock count", 1, locks.size()); + checkLock(LockType.EXCLUSIVE, LockState.ACQUIRED, "default", "T2", null, locks); + txnMgr.commitTxn(); + } @Test public void testMmConversionLocks() throws Exception { dropTable(new String[] {"T"}); http://git-wip-us.apache.org/repos/asf/hive/blob/812ae738/ql/src/test/results/clientnegative/alter_rename_partition_failure3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/alter_rename_partition_failure3.q.out b/ql/src/test/results/clientnegative/alter_rename_partition_failure3.q.out index 078ec37..e3538e3 100644 --- a/ql/src/test/results/clientnegative/alter_rename_partition_failure3.q.out +++ b/ql/src/test/results/clientnegative/alter_rename_partition_failure3.q.out @@ -31,4 +31,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_rename_partition_src POSTHOOK: Output: default@alter_rename_partition@pcol1=old_part1%3A/pcol2=old_part2%3A POSTHOOK: Lineage: alter_rename_partition PARTITION(pcol1=old_part1:,pcol2=old_part2:).col1 SIMPLE [(alter_rename_partition_src)alter_rename_partition_src.FieldSchema(name:col1, type:string, comment:null), ] -FAILED: ValidationFailureSemanticException Partition spec {pcol1=old_part1:, pcol2=old_part2:, pcol3=old_part3:} contains non-partition columns +FAILED: ValidationFailureSemanticException default.alter_rename_partition: Partition spec {pcol1=old_part1:, pcol2=old_part2:, pcol3=old_part3:} contains non-partition columns http://git-wip-us.apache.org/repos/asf/hive/blob/812ae738/ql/src/test/results/clientnegative/alter_table_add_partition.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/alter_table_add_partition.q.out b/ql/src/test/results/clientnegative/alter_table_add_partition.q.out index 2cc2953..4fdbde7 100644 --- a/ql/src/test/results/clientnegative/alter_table_add_partition.q.out +++ b/ql/src/test/results/clientnegative/alter_table_add_partition.q.out @@ -6,4 +6,4 @@ POSTHOOK: query: create table mp (a int) partitioned by (b int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@mp -FAILED: ValidationFailureSemanticException Partition spec {b=1, c=1} contains non-partition columns +FAILED: ValidationFailureSemanticException default.mp: Partition spec {b=1, c=1} contains non-partition columns http://git-wip-us.apache.org/repos/asf/hive/blob/812ae738/ql/src/test/results/clientnegative/alter_view_failure5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/alter_view_failure5.q.out b/ql/src/test/results/clientnegative/alter_view_failure5.q.out index 6629df4..caf3ded 100644 --- a/ql/src/test/results/clientnegative/alter_view_failure5.q.out +++ b/ql/src/test/results/clientnegative/alter_view_failure5.q.out @@ -19,4 +19,4 @@ POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@xxx6 POSTHOOK: Lineage: xxx6.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -FAILED: ValidationFailureSemanticException Partition spec {v=val_86} contains non-partition columns +FAILED: ValidationFailureSemanticException default.xxx6: Partition spec {v=val_86} contains non-partition columns http://git-wip-us.apache.org/repos/asf/hive/blob/812ae738/ql/src/test/results/clientnegative/alter_view_failure7.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/alter_view_failure7.q.out b/ql/src/test/results/clientnegative/alter_view_failure7.q.out index f96f3cc..8096835 100644 --- a/ql/src/test/results/clientnegative/alter_view_failure7.q.out +++ b/ql/src/test/results/clientnegative/alter_view_failure7.q.out @@ -19,4 +19,4 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Output: database:default POSTHOOK: Output: default@xxx8 POSTHOOK: Lineage: xxx8.key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -FAILED: ValidationFailureSemanticException partition spec {ds=2011-01-01} doesn't contain all (2) partition columns +FAILED: ValidationFailureSemanticException default.xxx8: partition spec {ds=2011-01-01} doesn't contain all (2) partition columns http://git-wip-us.apache.org/repos/asf/hive/blob/812ae738/ql/src/test/results/clientnegative/analyze1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/analyze1.q.out b/ql/src/test/results/clientnegative/analyze1.q.out index 3230b02..c87283d 100644 --- a/ql/src/test/results/clientnegative/analyze1.q.out +++ b/ql/src/test/results/clientnegative/analyze1.q.out @@ -1 +1 @@ -FAILED: ValidationFailureSemanticException Partition spec {key=null} contains non-partition columns +FAILED: ValidationFailureSemanticException default.srcpart: Partition spec {key=null} contains non-partition columns http://git-wip-us.apache.org/repos/asf/hive/blob/812ae738/ql/src/test/results/clientnegative/archive_partspec1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/archive_partspec1.q.out b/ql/src/test/results/clientnegative/archive_partspec1.q.out index 2b44eb3..609a2ca 100644 --- a/ql/src/test/results/clientnegative/archive_partspec1.q.out +++ b/ql/src/test/results/clientnegative/archive_partspec1.q.out @@ -20,4 +20,4 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@srcpart_archived@ds=2008-04-08/hr=12 POSTHOOK: Lineage: srcpart_archived PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_archived PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -FAILED: ValidationFailureSemanticException Partition spec {ds=2008-04-08, nonexistingpart=12} contains non-partition columns +FAILED: ValidationFailureSemanticException default.srcpart_archived: Partition spec {ds=2008-04-08, nonexistingpart=12} contains non-partition columns http://git-wip-us.apache.org/repos/asf/hive/blob/812ae738/ql/src/test/results/clientnegative/dyn_part1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/dyn_part1.q.out b/ql/src/test/results/clientnegative/dyn_part1.q.out index 4c8e171..8847b8b 100644 --- a/ql/src/test/results/clientnegative/dyn_part1.q.out +++ b/ql/src/test/results/clientnegative/dyn_part1.q.out @@ -6,4 +6,4 @@ POSTHOOK: query: create table dynamic_partition (key string) partitioned by (val POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dynamic_partition -FAILED: ValidationFailureSemanticException Partition spec {hr=null} contains non-partition columns +FAILED: ValidationFailureSemanticException default.dynamic_partition: Partition spec {hr=null} contains non-partition columns http://git-wip-us.apache.org/repos/asf/hive/blob/812ae738/ql/src/test/results/clientnegative/touch2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/touch2.q.out b/ql/src/test/results/clientnegative/touch2.q.out index 52e4f01..fe34886 100644 --- a/ql/src/test/results/clientnegative/touch2.q.out +++ b/ql/src/test/results/clientnegative/touch2.q.out @@ -1 +1 @@ -FAILED: ValidationFailureSemanticException table is not partitioned but partition spec exists: {ds=2008-04-08, hr=12} +FAILED: ValidationFailureSemanticException default.src table is not partitioned but partition spec exists: {ds=2008-04-08, hr=12} http://git-wip-us.apache.org/repos/asf/hive/blob/812ae738/ql/src/test/results/clientnegative/truncate_partition_column.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/truncate_partition_column.q.out b/ql/src/test/results/clientnegative/truncate_partition_column.q.out index a93ece3..e38dd4c 100644 --- a/ql/src/test/results/clientnegative/truncate_partition_column.q.out +++ b/ql/src/test/results/clientnegative/truncate_partition_column.q.out @@ -16,4 +16,4 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@test_tab@part=1 POSTHOOK: Lineage: test_tab PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_tab PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -FAILED: SemanticException org.apache.hadoop.hive.ql.metadata.Table$ValidationFailureSemanticException: table is partitioned but partition spec is not specified +FAILED: SemanticException org.apache.hadoop.hive.ql.metadata.Table$ValidationFailureSemanticException: default.test_tab table is partitioned but partition spec is not specified