This is an automated email from the ASF dual-hosted git repository. lpinter pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 29d6967 HIVE-23109: Query-based compaction omits database (Karen Coppage, reviewed by Laszlo Pinter) 29d6967 is described below commit 29d696742d7f5097aa2aa3a085471d118e5e4fb7 Author: Karen Coppage <karen.copp...@cloudera.com> AuthorDate: Thu Apr 2 19:20:12 2020 +0200 HIVE-23109: Query-based compaction omits database (Karen Coppage, reviewed by Laszlo Pinter) --- .../hive/ql/txn/compactor/CompactorOnTezTest.java | 58 ++++++++++++++++++--- .../ql/txn/compactor/TestCrudCompactorOnTez.java | 60 ++++++++++++++++++++++ .../ql/txn/compactor/TestMmCompactorOnTez.java | 43 ++++++++++++++++ .../ql/txn/compactor/CompactionQueryBuilder.java | 21 +++++--- .../hive/ql/txn/compactor/MajorQueryCompactor.java | 1 - .../hive/ql/txn/compactor/MinorQueryCompactor.java | 2 +- .../ql/txn/compactor/MmMajorQueryCompactor.java | 1 - .../ql/txn/compactor/MmMinorQueryCompactor.java | 2 +- 8 files changed, 170 insertions(+), 18 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java index 78174f3..05d72ba 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java @@ -110,7 +110,12 @@ public class CompactorOnTezTest { void createFullAcidTable(String tblName, boolean isPartitioned, boolean isBucketed) throws Exception { - createTable(tblName, isPartitioned, isBucketed, false, "orc"); + createFullAcidTable(null, tblName, isPartitioned, isBucketed); + } + + void createFullAcidTable(String dbName, String tblName, boolean isPartitioned, boolean isBucketed) + throws Exception { + createTable(dbName, tblName, isPartitioned, isBucketed, false, "orc"); } void createMmTable(String tblName, boolean isPartitioned, boolean isBucketed) @@ -120,12 +125,19 @@ public class CompactorOnTezTest { void createMmTable(String tblName, boolean isPartitioned, boolean isBucketed, String fileFormat) throws Exception { - createTable(tblName, isPartitioned, isBucketed, true, fileFormat); + createMmTable(null, tblName, isPartitioned, isBucketed, fileFormat); } - private void createTable(String tblName, boolean isPartitioned, boolean isBucketed, - boolean insertOnly, String fileFormat) throws Exception { + void createMmTable(String dbName, String tblName, boolean isPartitioned, boolean isBucketed, String fileFormat) + throws Exception { + createTable(dbName, tblName, isPartitioned, isBucketed, true, fileFormat); + } + private void createTable(String dbName, String tblName, boolean isPartitioned, boolean isBucketed, + boolean insertOnly, String fileFormat) throws Exception { + if (dbName != null) { + tblName = dbName + "." + tblName; + } executeStatementOnDriver("drop table if exists " + tblName, driver); StringBuilder query = new StringBuilder(); query.append("create table ").append(tblName).append(" (a string, b int)"); @@ -145,6 +157,11 @@ public class CompactorOnTezTest { executeStatementOnDriver(query.toString(), driver); } + void createDb(String dbName) throws Exception { + executeStatementOnDriver("drop database if exists " + dbName + " cascade", driver); + executeStatementOnDriver("create database " + dbName, driver); + } + /** * 5 txns. */ @@ -178,7 +195,17 @@ public class CompactorOnTezTest { /** * 5 txns. */ - protected void insertTestData(String tblName) throws Exception { + void insertTestData(String tblName) throws Exception { + insertTestData(null, tblName); + } + + /** + * 5 txns. + */ + void insertTestData(String dbName, String tblName) throws Exception { + if (dbName != null) { + tblName = dbName + "." + tblName; + } executeStatementOnDriver("insert into " + tblName + " values('1',2),('1',3),('1',4),('2',2),('2',3),('2',4)", driver); executeStatementOnDriver("insert into " + tblName + " values('3',2),('3',3),('3',4),('4',2),('4',3),('4',4)", @@ -190,9 +217,19 @@ public class CompactorOnTezTest { } /** + * 5 txns. + */ + void insertMmTestData(String tblName) throws Exception { + insertMmTestData(null, tblName); + } + + /** * 3 txns. */ - protected void insertMmTestData(String tblName) throws Exception { + void insertMmTestData(String dbName, String tblName) throws Exception { + if (dbName != null) { + tblName = dbName + "." + tblName; + } executeStatementOnDriver("insert into " + tblName + " values('1',2),('1',3),('1',4),('2',2),('2',3),('2',4)", driver); executeStatementOnDriver("insert into " + tblName + " values('3',2),('3',3),('3',4),('4',2),('4',3),('4',4)", @@ -222,7 +259,14 @@ public class CompactorOnTezTest { } } - protected List<String> getAllData(String tblName) throws Exception { + List<String> getAllData(String tblName) throws Exception { + return getAllData(null, tblName); + } + + List<String> getAllData(String dbName, String tblName) throws Exception { + if (dbName != null) { + tblName = dbName + "." + tblName; + } List<String> result = executeStatementOnDriverAndReturnResults("select * from " + tblName, driver); Collections.sort(result); return result; diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java index 9659a3f..89920cc 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java @@ -27,12 +27,14 @@ import com.google.common.collect.Lists; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.CompactionType; +import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.ShowCompactRequest; import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement; @@ -920,6 +922,64 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest { executeStatementOnDriver("drop table " + tblName, driver); } + @Test public void testMajorCompactionDb() throws Exception { + testCompactionDb(CompactionType.MAJOR, "base_0000005_v0000011"); + } + + @Test public void testMinorCompactionDb() throws Exception { + testCompactionDb(CompactionType.MINOR, "delta_0000001_0000005_v0000011"); + } + + /** + * Make sure db is specified in compaction queries. + */ + private void testCompactionDb(CompactionType compactionType, String resultDirName) + throws Exception { + String dbName = "myDb"; + String tableName = "testCompactionDb"; + // Create test table + TestDataProvider dataProvider = new TestDataProvider(); + dataProvider.createDb(dbName); + dataProvider.createFullAcidTable(dbName, tableName, false, false); + // Find the location of the table + IMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf); + Table table = metaStoreClient.getTable(dbName, tableName); + FileSystem fs = FileSystem.get(conf); + // Insert test data into test table + dataProvider.insertTestData(dbName, tableName); + // Get all data before compaction is run + List<String> expectedData = dataProvider.getAllData(dbName, tableName); + Collections.sort(expectedData); + // Run a compaction + CompactorTestUtil.runCompaction(conf, dbName, tableName, compactionType, true); + CompactorTestUtil.runCleaner(conf); + verifySuccessulTxn(1); + // Verify directories after compaction + PathFilter pathFilter = compactionType == CompactionType.MAJOR ? AcidUtils.baseFileFilter : + AcidUtils.deltaFileFilter; + Assert.assertEquals("Result directory does not match after " + compactionType.name() + + " compaction", Collections.singletonList(resultDirName), + CompactorTestUtil.getBaseOrDeltaNames(fs, pathFilter, table, null)); + // Verify all contents + List<String> actualData = dataProvider.getAllData(dbName, tableName); + Assert.assertEquals(expectedData, actualData); + } + + /** + * Verify that the expected number of transactions have run, and their state is "succeeded". + * + * @param expectedCompleteCompacts number of compactions already run + * @throws MetaException + */ + private void verifySuccessulTxn(int expectedCompleteCompacts) throws MetaException { + List<ShowCompactResponseElement> compacts = + TxnUtils.getTxnStore(conf).showCompact(new ShowCompactRequest()).getCompacts(); + Assert.assertEquals("Completed compaction queue must contain one element", + expectedCompleteCompacts, compacts.size()); + compacts.forEach( + c -> Assert.assertEquals("Compaction state is not succeeded", "succeeded", c.getState())); + } + /** * Tests whether hive.llap.io.etl.skip.format config is handled properly whenever QueryCompactor#runCompactionQueries * is invoked. diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestMmCompactorOnTez.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestMmCompactorOnTez.java index 43a216b..2c717b7 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestMmCompactorOnTez.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestMmCompactorOnTez.java @@ -21,6 +21,7 @@ import com.google.common.collect.Lists; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; @@ -527,6 +528,48 @@ public class TestMmCompactorOnTez extends CompactorOnTezTest { verifyAllContents(tableName, testDataProvider, expectedData); } + @Test public void testMmMajorCompactionDb() throws Exception { + testMmCompactionDb(CompactionType.MAJOR, "base_0000003_v0000009"); + } + + @Test public void testMmMinorCompactionDb() throws Exception { + testMmCompactionDb(CompactionType.MINOR, "delta_0000001_0000003_v0000009"); + } + + /** + * Make sure db is specified in compaction queries. + */ + private void testMmCompactionDb(CompactionType compactionType, String resultDirName) throws Exception { + String dbName = "myDb"; + String tableName = "testMmCompactionDb"; + // Create test table + TestDataProvider dataProvider = new TestDataProvider(); + dataProvider.createDb(dbName); + dataProvider.createMmTable(dbName, tableName, false, false, "orc"); + // Find the location of the table + IMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf); + Table table = metaStoreClient.getTable(dbName, tableName); + FileSystem fs = FileSystem.get(conf); + // Insert test data into test table + dataProvider.insertMmTestData(dbName, tableName); + // Get all data before compaction is run + List<String> expectedData = dataProvider.getAllData(dbName, tableName); + Collections.sort(expectedData); + // Run a compaction + CompactorTestUtil.runCompaction(conf, dbName, tableName, compactionType, true); + CompactorTestUtil.runCleaner(conf); + verifySuccessulTxn(1); + // Verify directories after compaction + PathFilter pathFilter = compactionType == CompactionType.MAJOR ? AcidUtils.baseFileFilter : + AcidUtils.deltaFileFilter; + Assert.assertEquals("Result directories does not match after " + compactionType.name() + + " compaction", Collections.singletonList(resultDirName), + CompactorTestUtil.getBaseOrDeltaNames(fs, pathFilter, table, null)); + List<String> actualData = dataProvider.getAllData(dbName, tableName); + Collections.sort(actualData); + Assert.assertEquals(expectedData, actualData); + } + /** * Verify that the expected number of transactions have run, and their state is "succeeded". * diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactionQueryBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactionQueryBuilder.java index 6b3a4db..193f832 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactionQueryBuilder.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactionQueryBuilder.java @@ -46,7 +46,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -68,7 +67,7 @@ class CompactionQueryBuilder { private ValidWriteIdList validWriteIdList; // for Alter/Insert in minor and CRUD private AcidUtils.Directory dir; // for Alter in minor private Partition sourcePartition; // for Insert in major and insert-only minor - private String fromTableName; // for Insert + private String sourceTabForInsert; // for Insert // settable booleans private boolean isPartitioned; // for Create @@ -158,10 +157,10 @@ class CompactionQueryBuilder { * Set table to select from. * Required for Insert operations. * - * @param fromTableName name of table to select from, not null + * @param sourceTabForInsert name of table to select from, not null */ - CompactionQueryBuilder setFromTableName(String fromTableName) { - this.fromTableName = fromTableName; + CompactionQueryBuilder setSourceTabForInsert(String sourceTabForInsert) { + this.sourceTabForInsert = sourceTabForInsert; return this; } @@ -247,8 +246,8 @@ class CompactionQueryBuilder { case INSERT: query.append(" select "); buildSelectClauseForInsert(query); - query.append(" from ") - .append(fromTableName); + query.append(" from "); + getSourceForInsert(query); buildWhereClauseForInsert(query); break; case DROP: @@ -326,6 +325,14 @@ class CompactionQueryBuilder { } } + private void getSourceForInsert(StringBuilder query) { + if (sourceTabForInsert != null) { + query.append(sourceTabForInsert); + } else { + query.append(sourceTab.getDbName()).append(".").append(sourceTab.getTableName()); + } + } + private void buildWhereClauseForInsert(StringBuilder query) { if (major && sourcePartition != null && sourceTab != null) { List<String> vals = sourcePartition.getValues(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MajorQueryCompactor.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MajorQueryCompactor.java index f47c23a..c70d4f3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MajorQueryCompactor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MajorQueryCompactor.java @@ -98,7 +98,6 @@ final class MajorQueryCompactor extends QueryCompactor { CompactionQueryBuilder.CompactionType.MAJOR_CRUD, CompactionQueryBuilder.Operation.INSERT, tmpName) - .setFromTableName(t.getTableName()) .setSourceTab(t) .setSourcePartition(p) .build()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MinorQueryCompactor.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MinorQueryCompactor.java index 1bf0bee..4d0e5f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MinorQueryCompactor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MinorQueryCompactor.java @@ -221,7 +221,7 @@ final class MinorQueryCompactor extends QueryCompactor { CompactionQueryBuilder.CompactionType.MINOR_CRUD, CompactionQueryBuilder.Operation.INSERT, resultTableName) - .setFromTableName(sourceTableName) + .setSourceTabForInsert(sourceTableName) .setSourceTab(table) .setValidWriteIdList(validWriteIdList) .build(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MmMajorQueryCompactor.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MmMajorQueryCompactor.java index 114b6f7..724a437 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MmMajorQueryCompactor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MmMajorQueryCompactor.java @@ -124,7 +124,6 @@ final class MmMajorQueryCompactor extends QueryCompactor { CompactionQueryBuilder.Operation.INSERT, tmpName) .setSourceTab(t) - .setFromTableName(t.getTableName()) .setSourcePartition(p) .build() ); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MmMinorQueryCompactor.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MmMinorQueryCompactor.java index 383891b..1cd95f80 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MmMinorQueryCompactor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MmMinorQueryCompactor.java @@ -182,7 +182,7 @@ final class MmMinorQueryCompactor extends QueryCompactor { CompactionQueryBuilder.CompactionType.MINOR_INSERT_ONLY, CompactionQueryBuilder.Operation.INSERT, resultTmpTableName) - .setFromTableName(sourceTmpTableName) + .setSourceTabForInsert(sourceTmpTableName) .setSourceTab(sourceTable) .build() );