Repository: hive Updated Branches: refs/heads/branch-3 665198c4c -> 40fe6c397
HIVE-19908 Block Insert Overwrite with Union All on full CRUD ACID tables using HIVE_UNION_SUBDIR_ (Eugene Koifman, reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/40fe6c39 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/40fe6c39 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/40fe6c39 Branch: refs/heads/branch-3 Commit: 40fe6c397ecaee0793db70187bd7ce84b871d4fe Parents: 665198c Author: Eugene Koifman <ekoif...@apache.org> Authored: Wed Jun 20 09:07:45 2018 -0700 Committer: Eugene Koifman <ekoif...@apache.org> Committed: Wed Jun 20 09:07:45 2018 -0700 ---------------------------------------------------------------------- ql/src/java/org/apache/hadoop/hive/ql/Driver.java | 15 +++++++++++++-- .../apache/hadoop/hive/ql/plan/FileSinkDesc.java | 9 +++++++++ .../apache/hadoop/hive/ql/TestTxnNoBuckets.java | 18 ++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/40fe6c39/ql/src/java/org/apache/hadoop/hive/ql/Driver.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index e43d1af..4a7131a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -71,6 +71,7 @@ import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.cache.results.CacheUsage; import org.apache.hadoop.hive.ql.cache.results.QueryResultsCache; import org.apache.hadoop.hive.ql.cache.results.QueryResultsCache.CacheEntry; +import org.apache.hadoop.hive.ql.exec.AbstractFileMergeOperator; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.DagUtils; import org.apache.hadoop.hive.ql.exec.ExplainTask; @@ -1578,9 +1579,19 @@ public class Driver implements IDriver { Utilities.getTableName(tableInfo.getTableName())); desc.setTableWriteId(writeId); - //it's possible to have > 1 FileSink writing to the same table/partition - //e.g. Merge stmt, multi-insert stmt when mixing DP and SP writes + /** + * it's possible to have > 1 FileSink writing to the same table/partition + * e.g. Merge stmt, multi-insert stmt when mixing DP and SP writes + * Insert ... Select ... Union All Select ... using + * {@link org.apache.hadoop.hive.ql.exec.AbstractFileMergeOperator#UNION_SUDBIR_PREFIX} + */ desc.setStatementId(queryTxnMgr.getStmtIdAndIncrement()); + String unionAllSubdir = "/" + AbstractFileMergeOperator.UNION_SUDBIR_PREFIX; + if(desc.getInsertOverwrite() && desc.getDirName().toString().contains(unionAllSubdir) && + desc.isFullAcidTable()) { + throw new UnsupportedOperationException("QueryId=" + plan.getQueryId() + + " is not supported due to OVERWRITE and UNION ALL. Please use truncate + insert"); + } } } http://git-wip-us.apache.org/repos/asf/hive/blob/40fe6c39/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java index 1d05468..42b8f40 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java @@ -300,6 +300,15 @@ public class FileSinkDesc extends AbstractOperatorDesc implements IStatsGatherDe return AcidUtils.isInsertOnlyTable(getTableInfo().getProperties()); } } + public boolean isFullAcidTable() { + if(getTable() != null) { + return AcidUtils.isFullAcidTable(table); + } + else { + return AcidUtils.isTablePropertyTransactional(getTableInfo().getProperties()) && + !AcidUtils.isInsertOnlyTable(getTableInfo().getProperties()); + } + } public boolean isMaterialization() { return materialization; http://git-wip-us.apache.org/repos/asf/hive/blob/40fe6c39/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index f071531..7ab76b3 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -275,6 +275,24 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver }; checkExpected(rs, expected, "Unexpected row count after ctas"); } + @Test + public void testInsertOverwriteToAcidWithUnionRemove() throws Exception { + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE, true); + hiveConf.setVar(HiveConf.ConfVars.HIVEFETCHTASKCONVERSION, "none"); + d.close(); + d = new Driver(hiveConf); + int[][] values = {{1, 2}, {3, 4}, {5, 6}, {7, 8}, {9, 10}}; + runStatementOnDriver("drop table if exists T"); + runStatementOnDriver("create table T (a int, b int) stored as ORC TBLPROPERTIES ('transactional'='true')"); + + CommandProcessorResponse cpr = runStatementOnDriverNegative( + "insert overwrite table T select a, b from " + TxnCommandsBaseForTests.Table.ACIDTBL + + " where a between 1 and 3 group by a, b union all select a, b from " + + TxnCommandsBaseForTests.Table.ACIDTBL + + " where a between 5 and 7 union all select a, b from " + + TxnCommandsBaseForTests.Table.ACIDTBL + " where a >= 9"); + Assert.assertTrue("", cpr.getErrorMessage().contains("not supported due to OVERWRITE and UNION ALL")); + } /** * The idea here is to create a non acid table that was written by multiple writers, i.e. * unbucketed table that has 000000_0 & 000001_0, for example.