[hive] branch master updated: HIVE-26177: Create a new connection pool for compaction (DataNucleus) (Antal Sinkovits, reviewed by Denys Kuzmenko)
This is an automated email from the ASF dual-hosted git repository. asinkovits pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 915a17317d HIVE-26177: Create a new connection pool for compaction (DataNucleus) (Antal Sinkovits, reviewed by Denys Kuzmenko) 915a17317d is described below commit 915a17317d13f7b2eb235b0f9fdd4fb0aaf0beb9 Author: Antal Sinkovits AuthorDate: Mon May 9 09:49:45 2022 +0200 HIVE-26177: Create a new connection pool for compaction (DataNucleus) (Antal Sinkovits, reviewed by Denys Kuzmenko) Closes #3265 --- .../ql/txn/compactor/MetaStoreCompactorThread.java | 7 +++ .../hadoop/hive/metastore/conf/MetastoreConf.java | 3 +++ .../apache/hadoop/hive/metastore/ObjectStore.java | 4 +++- .../hive/metastore/PersistenceManagerProvider.java | 22 +- 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MetaStoreCompactorThread.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MetaStoreCompactorThread.java index 6a451d5138..4f0b324230 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MetaStoreCompactorThread.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MetaStoreCompactorThread.java @@ -19,11 +19,13 @@ package org.apache.hadoop.hive.ql.txn.compactor; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.hive.metastore.MetaStoreThread; +import org.apache.hadoop.hive.metastore.RawStore; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.metrics.Metrics; import org.apache.hadoop.hive.metastore.txn.CompactionInfo; import org.apache.hadoop.hive.metastore.txn.TxnStore; @@ -39,6 +41,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import static org.apache.hadoop.hive.metastore.HMSHandler.getMSForConf; +import static org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars.COMPACTOR_USE_CUSTOM_POOL; import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog; /** @@ -56,6 +59,10 @@ public class MetaStoreCompactorThread extends CompactorThread implements MetaSto // Get our own instance of the transaction handler txnHandler = TxnUtils.getTxnStore(conf); +// Initialize the RawStore, with the flag marked as true. Since its stored as a ThreadLocal variable in the +// HMSHandlerContext, it will use the compactor related pool. +MetastoreConf.setBoolVar(conf, COMPACTOR_USE_CUSTOM_POOL, true); +getMSForConf(conf); } @Override Table resolveTable(CompactionInfo ci) throws MetaException { diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index a2f0e17a75..04f94212a8 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -438,6 +438,9 @@ public class MetastoreConf { COMPACTOR_RUN_AS_USER("metastore.compactor.run.as.user", "hive.compactor.run.as.user", "", "Specify the user to run compactor Initiator and Worker as. If empty string, defaults to table/partition " + "directory owner."), +COMPACTOR_USE_CUSTOM_POOL("metastore.compactor.use.custom.pool", "hive.compactor.use.custom.pool", +false, "internal usage only -- use custom connection pool specific to compactor components." +), COMPACTOR_OLDEST_REPLICATION_OPENTXN_THRESHOLD_WARNING( "metastore.compactor.oldest.replication.open.txn.threshold.warning", "hive.compactor.oldest.replication.open.txn.threshold.warning", diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 9b7d28bfd0..9b5ef82fdc 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -19,6 +19,7 @@ package org.apac
[hive] branch master updated: HIVE-26176: Create a new connection pool for compaction (CompactionTxnHandler) (Antal Sinkovits, reviewed by Denys Kuzmenko and Peter Vary)
This is an automated email from the ASF dual-hosted git repository. asinkovits pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 1db1da8042 HIVE-26176: Create a new connection pool for compaction (CompactionTxnHandler) (Antal Sinkovits, reviewed by Denys Kuzmenko and Peter Vary) 1db1da8042 is described below commit 1db1da804271f65d4a63572cdd48f24425c1615c Author: Antal Sinkovits AuthorDate: Thu Apr 28 12:33:24 2022 +0200 HIVE-26176: Create a new connection pool for compaction (CompactionTxnHandler) (Antal Sinkovits, reviewed by Denys Kuzmenko and Peter Vary) Closes #3223 --- .../hadoop/hive/metastore/conf/MetastoreConf.java | 3 + .../metastore/datasource/DataSourceProvider.java | 12 +++- .../datasource/DbCPDataSourceProvider.java | 5 +- .../datasource/HikariCPDataSourceProvider.java | 11 ++-- .../hive/metastore/txn/CompactionTxnHandler.java | 67 +- .../hadoop/hive/metastore/txn/TxnHandler.java | 11 ++-- 6 files changed, 65 insertions(+), 44 deletions(-) diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 6b337aab94..a2f0e17a75 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -625,6 +625,9 @@ public class MetastoreConf { "hive.compactor.cleaner.retry.retentionTime", 300, TimeUnit.SECONDS, new TimeValidator(TimeUnit.SECONDS), "Initial value of the cleaner retry retention time. The delay has a backoff, and calculated the following way: " + "pow(2, number_of_failed_attempts) * HIVE_COMPACTOR_CLEANER_RETRY_RETENTION_TIME."), + HIVE_COMPACTOR_CONNECTION_POOLING_MAX_CONNECTIONS("metastore.compactor.connectionPool.maxPoolSize", +"hive.compactor.connectionPool.maxPoolSize", 10, +"Specify the maximum number of connections in the connection pool used by the compactor."), CONNECTION_DRIVER("javax.jdo.option.ConnectionDriverName", "javax.jdo.option.ConnectionDriverName", "org.apache.derby.jdbc.EmbeddedDriver", "Driver class name for a JDBC metastore"), diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DataSourceProvider.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DataSourceProvider.java index 29633b500d..4e5803ead0 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DataSourceProvider.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DataSourceProvider.java @@ -32,7 +32,17 @@ public interface DataSourceProvider { * @param hdpConfig * @return the new connection pool */ - DataSource create(Configuration hdpConfig) throws SQLException; + default DataSource create(Configuration hdpConfig) throws SQLException { +int maxPoolSize = MetastoreConf.getIntVar(hdpConfig, MetastoreConf.ConfVars.CONNECTION_POOLING_MAX_CONNECTIONS); +return create(hdpConfig, maxPoolSize); + } + + /** + * @param hdpConfig + * @param maxPoolSize the maximum size of the connection pool + * @return the new connection pool + */ + DataSource create(Configuration hdpConfig, int maxPoolSize) throws SQLException; /** * Get the declared pooling type string. This is used to check against the constant in diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java index 4069a9350c..476a3d846f 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java @@ -57,7 +57,7 @@ public class DbCPDataSourceProvider implements DataSourceProvider { @SuppressWarnings({ "rawtypes", "unchecked" }) @Override - public DataSource create(Configuration hdpConfig) throws SQLException { + public DataSource create(Configuration hdpConfig, int maxPoolSize) throws SQLException { LOG.debug("Creating dbcp connection pool for the MetaStore"); String driverUrl = DataSourceP
[hive] branch master updated: HIVE-25492: Major query-based compaction is skipped if partition is empty (Antal Sinkovits, reviewed by Denys Kuzmenko)
This is an automated email from the ASF dual-hosted git repository. asinkovits pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 93c186e675 HIVE-25492: Major query-based compaction is skipped if partition is empty (Antal Sinkovits, reviewed by Denys Kuzmenko) 93c186e675 is described below commit 93c186e675bdeecd89322f58f15f9c62d4935ed3 Author: Antal Sinkovits AuthorDate: Mon Apr 11 12:12:54 2022 +0200 HIVE-25492: Major query-based compaction is skipped if partition is empty (Antal Sinkovits, reviewed by Denys Kuzmenko) Closes #3157 --- .../ql/txn/compactor/TestCrudCompactorOnTez.java | 97 - .../org/apache/hadoop/hive/ql/io/AcidUtils.java| 68 -- .../hadoop/hive/ql/txn/compactor/Cleaner.java | 2 +- .../hive/ql/txn/compactor/MajorQueryCompactor.java | 2 +- .../apache/hadoop/hive/ql/TestTxnCommands2.java| 230 - 5 files changed, 372 insertions(+), 27 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java index 8e75a793a0..f409ec2efe 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java @@ -28,9 +28,12 @@ import java.util.concurrent.atomic.AtomicBoolean; import com.google.common.collect.Lists; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; @@ -48,7 +51,6 @@ import org.apache.hadoop.hive.metastore.txn.CompactionInfo; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.ql.DriverFactory; -import org.apache.hadoop.hive.ql.TxnCommandsBaseForTests; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook; @@ -73,7 +75,6 @@ import org.apache.tez.dag.history.logging.proto.ProtoMessageReader; import org.junit.Assert; import org.junit.Test; import org.mockito.ArgumentCaptor; -import org.mockito.Mockito; import org.mockito.internal.util.reflection.FieldSetter; import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.runWorker; @@ -2209,4 +2210,96 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest { qc.runCompactionQueries(hiveConf, null, sdMock, null, ciMock, null, emptyQueries, emptyQueries, emptyQueries); Assert.assertEquals("none", hiveConf.getVar(HiveConf.ConfVars.LLAP_IO_ETL_SKIP_FORMAT)); } + + @Test + public void testIfEmptyBaseIsPresentAfterCompaction() throws Exception { +String dbName = "default"; +String tblName = "empty_table"; + +// Setup of LOAD INPATH scenario. +executeStatementOnDriver("drop table if exists " + tblName, driver); +executeStatementOnDriver("create table " + tblName + " (a string) stored as orc " + +"TBLPROPERTIES ('transactional'='true')", driver); +executeStatementOnDriver("insert into " + tblName + " values ('a')", driver); +executeStatementOnDriver("delete from " + tblName + " where a='a'", driver); + +// Run a query-based MAJOR compaction +CompactorTestUtil.runCompaction(conf, dbName, tblName, CompactionType.MAJOR, true); +// Clean up resources +CompactorTestUtil.runCleaner(conf); + +IMetaStoreClient hmsClient = new HiveMetaStoreClient(conf); +Table table = hmsClient.getTable(dbName, tblName); +FileSystem fs = FileSystem.get(conf); + +FileStatus[] fileStatuses = fs.listStatus(new Path(table.getSd().getLocation())); +// There should be only dir +Assert.assertEquals(1, fileStatuses.length); +Path basePath = fileStatuses[0].getPath(); +// And it's a base +Assert.assertTrue(AcidUtils.baseFileFilter.accept(basePath)); +RemoteIterator filesInBase = fs.listFiles(basePath, true); +// It has no files in it +Assert.assertFalse(filesInBase.hasNext()); + } + + @Test + public void testNonAcidToAcidConversionWithNestedTableWithUnionSubdir() throws Exception { +String dbName = "default"; + +// Helper table for the union all insert +String helperTblName = "
[hive] branch master updated: HIVE-26060: Invalidate acid table directory cache on drop table (Antal Sinkovits, reviewed by Denys Kuzmenko)
This is an automated email from the ASF dual-hosted git repository. asinkovits pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 1662a90 HIVE-26060: Invalidate acid table directory cache on drop table (Antal Sinkovits, reviewed by Denys Kuzmenko) 1662a90 is described below commit 1662a902c98404946582709acba42de3252c4ac0 Author: Antal Sinkovits AuthorDate: Sat Mar 26 14:53:38 2022 +0100 HIVE-26060: Invalidate acid table directory cache on drop table (Antal Sinkovits, reviewed by Denys Kuzmenko) Closes #3128 --- .../hive/ql/txn/compactor/TestCompactor.java | 78 ++ .../org/apache/hadoop/hive/ql/io/AcidUtils.java| 17 - .../org/apache/hadoop/hive/ql/metadata/Hive.java | 2 + 3 files changed, 96 insertions(+), 1 deletion(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java index f2da316..16b5957 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java @@ -2517,6 +2517,84 @@ public class TestCompactor { Assert.assertEquals("2\t55\t66", valuesReadFromHiveDriver.get(1)); } + @Test + public void testAcidDirCacheOnDropTable() throws Exception { +int cacheDurationInMinutes = 10; +AcidUtils.initDirCache(cacheDurationInMinutes); +HiveConf.setBoolVar(conf, ConfVars.HIVE_COMPACTOR_GATHER_STATS, false); +String dbName = "default"; +String tblName = "adc_table"; + +// First phase, populate the cache +executeStatementOnDriver("drop table if exists " + tblName, driver); +executeStatementOnDriver("create table " + tblName + " (a string) stored as orc " + +"TBLPROPERTIES ('transactional'='true', 'hive.exec.orc.split.strategy'='BI')", driver); +executeStatementOnDriver("insert into " + tblName + " values ('a')", driver); +executeStatementOnDriver("insert into " + tblName + " values ('b')", driver); +runMajorCompaction(dbName, tblName); +runCleaner(conf); + +HiveConf.setIntVar(driver.getConf(), ConfVars.HIVE_TXN_ACID_DIR_CACHE_DURATION, cacheDurationInMinutes); +executeStatementOnDriver("select * from " + tblName + " order by a", driver); + +// Second phase, the previous data should be cleaned +executeStatementOnDriver("drop table if exists " + tblName, driver); +executeStatementOnDriver("create table " + tblName + " (a string) stored as orc " + +"TBLPROPERTIES ('transactional'='true', 'hive.exec.orc.split.strategy'='BI')", driver); +executeStatementOnDriver("insert into " + tblName + " values ('c')", driver); +executeStatementOnDriver("insert into " + tblName + " values ('d')", driver); +runMajorCompaction(dbName, tblName); +runCleaner(conf); + +HiveConf.setIntVar(driver.getConf(), ConfVars.HIVE_TXN_ACID_DIR_CACHE_DURATION, cacheDurationInMinutes); +List rs = execSelectAndDumpData("select * from " + tblName + " order by a", driver, "select"); +Assert.assertEquals(2, rs.size()); +Assert.assertEquals("c", rs.get(0)); +Assert.assertEquals("d", rs.get(1)); + } + + @Test + public void testAcidDirCacheOnDropPartitionedTable() throws Exception { +int cacheDurationInMinutes = 10; +AcidUtils.initDirCache(cacheDurationInMinutes); +HiveConf.setBoolVar(conf, ConfVars.HIVE_COMPACTOR_GATHER_STATS, false); +String dbName = "default"; +String tblName = "adc_part_table"; + +// First phase, populate the cache +executeStatementOnDriver("drop table if exists " + tblName, driver); +executeStatementOnDriver("create table " + tblName + " (a string) PARTITIONED BY (p string) stored as orc " + +"TBLPROPERTIES ('transactional'='true', 'hive.exec.orc.split.strategy'='BI')", driver); +executeStatementOnDriver("insert into " + tblName + " values ('a', 'p1')", driver); +executeStatementOnDriver("insert into " + tblName + " values ('b', 'p1')", driver); +executeStatementOnDriver("insert into " + tblName + " values ('a', 'p2')", driver); +executeStatementOnDriver("insert into " + tblName + " values ('b', 'p2')", driver); +runMajorCompaction(dbName, tblName, "p=p1", "p=p2"); +runCleaner(conf); + +HiveConf.setIntVar(driver.getConf(), ConfVars.
[hive] branch master updated: HIVE-26048: Missing quotation mark in findReadyToClean query (Antal Sinkovits, reviewed by Karen Coppage and Denys Kuzmenko)
This is an automated email from the ASF dual-hosted git repository. asinkovits pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 79679a9 HIVE-26048: Missing quotation mark in findReadyToClean query (Antal Sinkovits, reviewed by Karen Coppage and Denys Kuzmenko) 79679a9 is described below commit 79679a96349dbd0255dd126b588350436ec778ff Author: Antal Sinkovits AuthorDate: Mon Mar 21 13:20:41 2022 +0100 HIVE-26048: Missing quotation mark in findReadyToClean query (Antal Sinkovits, reviewed by Karen Coppage and Denys Kuzmenko) Closes #3118 --- .../java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java index 439795a..8585379 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java @@ -353,7 +353,7 @@ class CompactionTxnHandler extends TxnHandler { if (minOpenTxnWaterMark > 0) { whereClause += " AND (\"CQ_NEXT_TXN_ID\" <= " + minOpenTxnWaterMark + " OR \"CQ_NEXT_TXN_ID\" IS NULL)"; } -whereClause += " AND (\"CQ_COMMIT_TIME\" < (" + getEpochFn(dbProduct) + " - CQ_RETRY_RETENTION - " + retentionTime + ") OR \"CQ_COMMIT_TIME\" IS NULL)"; +whereClause += " AND (\"CQ_COMMIT_TIME\" < (" + getEpochFn(dbProduct) + " - \"CQ_RETRY_RETENTION\" - " + retentionTime + ") OR \"CQ_COMMIT_TIME\" IS NULL)"; String s = "SELECT \"CQ_ID\", \"cq1\".\"CQ_DATABASE\", \"cq1\".\"CQ_TABLE\", \"cq1\".\"CQ_PARTITION\"," + " \"CQ_TYPE\", \"CQ_RUN_AS\", \"CQ_HIGHEST_WRITE_ID\", \"CQ_TBLPROPERTIES\", \"CQ_RETRY_RETENTION\" " + " FROM \"COMPACTION_QUEUE\" \"cq1\" " +
[hive] branch master updated (f15e67b -> 655006a)
This is an automated email from the ASF dual-hosted git repository. asinkovits pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from f15e67b HIVE-24949: Fail to rename a partition with customized catalog (#2910) (Zhihua Deng reviewed by Zoltan Haindrich) add 655006a HIVE-25986: Statement id is incorrect in case of load in path to MM table (Antal Sinkovits, reviewed by Peter Vary) No new revisions were added by this update. Summary of changes: .../apache/hadoop/hive/ql/TestLocationQueries.java | 49 ++ .../java/org/apache/hadoop/hive/ql/QueryPlan.java | 8 +++- .../org/apache/hadoop/hive/ql/exec/MoveTask.java | 4 +- .../test/queries/clientpositive/acid_load_data.q | 6 +++ .../clientpositive/llap/acid_load_data.q.out | 16 +++ 5 files changed, 79 insertions(+), 4 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/acid_load_data.q create mode 100644 ql/src/test/results/clientpositive/llap/acid_load_data.q.out
[hive] branch master updated: HIVE-24805: Compactor: Initiator shouldn't fetch table details again and again for partitioned tables (Antal Sinkovits, reviewed by Denys Kuzmenko)
This is an automated email from the ASF dual-hosted git repository. asinkovits pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 718e19e HIVE-24805: Compactor: Initiator shouldn't fetch table details again and again for partitioned tables (Antal Sinkovits, reviewed by Denys Kuzmenko) 718e19e is described below commit 718e19ecabf09c4e11b1a4addd9ab78f12187961 Author: Antal Sinkovits AuthorDate: Wed Jan 19 16:21:10 2022 +0100 HIVE-24805: Compactor: Initiator shouldn't fetch table details again and again for partitioned tables (Antal Sinkovits, reviewed by Denys Kuzmenko) Closes #2906 --- .../hadoop/hive/ql/txn/compactor/Initiator.java| 29 -- .../hive/ql/txn/compactor/TestInitiator.java | 46 ++ .../hadoop/hive/metastore/conf/MetastoreConf.java | 3 ++ 3 files changed, 75 insertions(+), 3 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java index 5b31d97..880e5e2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.txn.compactor; import com.codahale.metrics.Counter; import com.google.common.annotations.VisibleForTesting; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; import com.google.common.collect.Sets; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -68,7 +70,9 @@ import java.util.HashMap; import java.util.List; import java.util.LongSummaryStatistics; import java.util.Map; +import java.util.Optional; import java.util.Set; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; @@ -90,6 +94,7 @@ public class Initiator extends MetaStoreCompactorThread { private long checkInterval; private ExecutorService compactionExecutor; + private Optional> tableCache = Optional.empty(); @Override public void run() { @@ -142,6 +147,9 @@ public class Initiator extends MetaStoreCompactorThread { final ShowCompactResponse currentCompactions = txnHandler.showCompact(new ShowCompactRequest()); + // Currently we invalidate all entries after each cycle, because the bootstrap replication is marked via + // table property hive.repl.first.inc.pending which would be cached. + tableCache.ifPresent(c -> c.invalidateAll()); Set skipDBs = Sets.newConcurrentHashSet(); Set skipTables = Sets.newConcurrentHashSet(); @@ -164,7 +172,7 @@ public class Initiator extends MetaStoreCompactorThread { for (CompactionInfo ci : potentials) { try { - Table t = resolveTable(ci); + Table t = resolveTableAndCache(ci); Partition p = resolvePartition(ci); if (p == null && ci.partName != null) { LOG.info("Can't find partition " + ci.getFullPartitionName() + @@ -242,6 +250,17 @@ public class Initiator extends MetaStoreCompactorThread { } } + private Table resolveTableAndCache(CompactionInfo ci) throws MetaException { +if (tableCache.isPresent()) { + try { +return tableCache.get().get(ci.getFullTableName(), () -> resolveTable(ci)); + } catch (ExecutionException e) { +throw (MetaException) e.getCause(); + } +} +return resolveTable(ci); + } + private ValidWriteIdList resolveValidWriteIds(Table t) throws NoSuchTxnException, MetaException { ValidTxnList validTxnList = new ValidReadTxnList(conf.get(ValidTxnList.VALID_TXNS_KEY)); // The response will have one entry per table and hence we get only one ValidWriteIdList @@ -275,6 +294,11 @@ public class Initiator extends MetaStoreCompactorThread { compactionExecutor = CompactorUtil.createExecutorWithThreadFactory( conf.getIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_REQUEST_QUEUE), COMPACTOR_INTIATOR_THREAD_NAME_FORMAT); +boolean tableCacheOn = MetastoreConf.getBoolVar(conf, +MetastoreConf.ConfVars.COMPACTOR_INITIATOR_TABLECACHE_ON); +if (tableCacheOn) { + this.tableCache = Optional.of(CacheBuilder.newBuilder().softValues().build()); +} } private void recoverFailedCompactions(boolean remoteOnly) throws MetaException { @@ -524,8 +548,7 @@ public class Initiator extends MetaStoreCompactorThread { return false; } - //TODO: avoid repeated HMS lookup for same table (e.g partitions within table) - Table t = resolveTable(ci); + Table t = resolveTableAndCache(ci); if (t == null) {
[hive] branch master updated (5b112aa -> eabe8ae)
This is an automated email from the ASF dual-hosted git repository. asinkovits pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from 5b112aa HIVE-24893: Add UploadData and DownloadData to TCLIService.thrift (#2878) add eabe8ae HIVE-25862: Persist the time of last run of the initiator (Antal Sinkovits, reviewed by Denys Kuzmenko) No new revisions were added by this update. Summary of changes: .../hadoop/hive/ql/txn/compactor/Initiator.java| 8 ++-- .../hadoop/hive/metastore/txn/TxnHandler.java | 45 -- .../apache/hadoop/hive/metastore/txn/TxnStore.java | 10 + 3 files changed, 48 insertions(+), 15 deletions(-)
[hive] branch master updated: HIVE-25716: Fix of flaky test TestCompactionMetrics#testOldestReadyForCleaningAge (Viktor Csomor, reviewed by Denys Kuzmenko and Antal Sinkovits)
This is an automated email from the ASF dual-hosted git repository. asinkovits pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 4566f07 HIVE-25716: Fix of flaky test TestCompactionMetrics#testOldestReadyForCleaningAge (Viktor Csomor, reviewed by Denys Kuzmenko and Antal Sinkovits) 4566f07 is described below commit 4566f076a2b3d6d87b258570a373dd60f7152786 Author: Viktor Csomor AuthorDate: Thu Dec 9 11:15:45 2021 +0100 HIVE-25716: Fix of flaky test TestCompactionMetrics#testOldestReadyForCleaningAge (Viktor Csomor, reviewed by Denys Kuzmenko and Antal Sinkovits) Closes #2837 --- .../hive/ql/txn/compactor/CompactorTest.java | 10 ++-- .../ql/txn/compactor/TestCompactionMetrics.java| 60 ++ 2 files changed, 44 insertions(+), 26 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java index 5dc01f9..c2b78ad 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java @@ -292,7 +292,8 @@ public abstract class CompactorTest { burnThroughTransactions(dbName, tblName, num, open, aborted, null); } - protected void burnThroughTransactions(String dbName, String tblName, int num, Set open, Set aborted, LockRequest lockReq) + protected void burnThroughTransactions(String dbName, String tblName, int num, Set open, Set aborted, + LockRequest lockReq) throws MetaException, NoSuchTxnException, TxnAbortedException { OpenTxnsResponse rsp = txnHandler.openTxns(new OpenTxnRequest(num, "me", "localhost")); AllocateTableWriteIdsRequest awiRqst = new AllocateTableWriteIdsRequest(dbName, tblName); @@ -300,14 +301,15 @@ public abstract class CompactorTest { AllocateTableWriteIdsResponse awiResp = txnHandler.allocateTableWriteIds(awiRqst); int i = 0; for (long tid : rsp.getTxn_ids()) { - assert(awiResp.getTxnToWriteIds().get(i++).getTxnId() == tid); - if(lockReq != null) { + assert (awiResp.getTxnToWriteIds().get(i).getTxnId() == tid); + ++i; + if (lockReq != null) { lockReq.setTxnid(tid); txnHandler.lock(lockReq); } if (aborted != null && aborted.contains(tid)) { txnHandler.abortTxn(new AbortTxnRequest(tid)); - } else if (open == null || (open != null && !open.contains(tid))) { + } else if (open == null || !open.contains(tid)) { txnHandler.commitTxn(new CommitTxnRequest(tid)); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCompactionMetrics.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCompactionMetrics.java index 75c722b..eda09c1 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCompactionMetrics.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCompactionMetrics.java @@ -179,41 +179,49 @@ public class TestCompactionMetrics extends CompactorTest { } @Test - @org.junit.Ignore("HIVE-25716") public void testOldestReadyForCleaningAge() throws Exception { conf.setIntVar(HiveConf.ConfVars.COMPACTOR_MAX_NUM_DELTA, 1); -long oldStart = System.currentTimeMillis(); -Table old = newTable("default", "old_rfc", true); -Partition oldP = newPartition(old, "part"); +final String DB_NAME = "default"; +final String OLD_TABLE_NAME = "old_rfc"; +final String OLD_PARTITION_NAME = "part"; +final String YOUNG_TABLE_NAME = "young_rfc"; +final String YOUNG_PARTITION_NAME = "part"; + +long oldTableStart = System.currentTimeMillis(); +Table old = newTable(DB_NAME, OLD_TABLE_NAME, true); +Partition oldP = newPartition(old, OLD_PARTITION_NAME); addBaseFile(old, oldP, 20L, 20); addDeltaFile(old, oldP, 21L, 22L, 2); addDeltaFile(old, oldP, 23L, 24L, 2); -burnThroughTransactions("default", "old_rfc", 25); -CompactionRequest rqst = new CompactionRequest("default", "old_rfc", CompactionType.MINOR); -rqst.setPartitionname("ds=part"); -txnHandler.compact(rqst); -startWorker(); +burnThroughTransactions(DB_NAME, OLD_TABLE_NAME, 25); +doCompaction(DB_NAME, OLD_TABLE_NAME, OLD_PARTITION_NAME, CompactionType.MINOR); +long oldTableEnd = System.currentTimeMillis(); -long youngStart = System.currentTimeMillis(); -Table young = newTable("default", "young_rfc", true); -Partition youngP = newPartition(young, "part"); +Table young = newTable(DB_NAME, YOUNG_TABLE_NAME, true); +Partition youngP = newPartition(young, YOUNG_PA