[hive] branch master updated: HIVE-26177: Create a new connection pool for compaction (DataNucleus) (Antal Sinkovits, reviewed by Denys Kuzmenko)

2022-05-09 Thread asinkovits
This is an automated email from the ASF dual-hosted git repository.

asinkovits pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 915a17317d HIVE-26177: Create a new connection pool for compaction 
(DataNucleus) (Antal Sinkovits, reviewed by Denys Kuzmenko)
915a17317d is described below

commit 915a17317d13f7b2eb235b0f9fdd4fb0aaf0beb9
Author: Antal Sinkovits 
AuthorDate: Mon May 9 09:49:45 2022 +0200

HIVE-26177: Create a new connection pool for compaction (DataNucleus) 
(Antal Sinkovits, reviewed by Denys Kuzmenko)

Closes #3265
---
 .../ql/txn/compactor/MetaStoreCompactorThread.java |  7 +++
 .../hadoop/hive/metastore/conf/MetastoreConf.java  |  3 +++
 .../apache/hadoop/hive/metastore/ObjectStore.java  |  4 +++-
 .../hive/metastore/PersistenceManagerProvider.java | 22 +-
 4 files changed, 30 insertions(+), 6 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MetaStoreCompactorThread.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MetaStoreCompactorThread.java
index 6a451d5138..4f0b324230 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MetaStoreCompactorThread.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/MetaStoreCompactorThread.java
@@ -19,11 +19,13 @@ package org.apache.hadoop.hive.ql.txn.compactor;
 
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import org.apache.hadoop.hive.metastore.MetaStoreThread;
+import org.apache.hadoop.hive.metastore.RawStore;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import org.apache.hadoop.hive.metastore.metrics.Metrics;
 import org.apache.hadoop.hive.metastore.txn.CompactionInfo;
 import org.apache.hadoop.hive.metastore.txn.TxnStore;
@@ -39,6 +41,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import static org.apache.hadoop.hive.metastore.HMSHandler.getMSForConf;
+import static 
org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars.COMPACTOR_USE_CUSTOM_POOL;
 import static 
org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog;
 
 /**
@@ -56,6 +59,10 @@ public class MetaStoreCompactorThread extends 
CompactorThread implements MetaSto
 
 // Get our own instance of the transaction handler
 txnHandler = TxnUtils.getTxnStore(conf);
+// Initialize the RawStore, with the flag marked as true. Since its stored 
as a ThreadLocal variable in the
+// HMSHandlerContext, it will use the compactor related pool.
+MetastoreConf.setBoolVar(conf, COMPACTOR_USE_CUSTOM_POOL, true);
+getMSForConf(conf);
   }
 
   @Override Table resolveTable(CompactionInfo ci) throws MetaException {
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
index a2f0e17a75..04f94212a8 100644
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
+++ 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
@@ -438,6 +438,9 @@ public class MetastoreConf {
 COMPACTOR_RUN_AS_USER("metastore.compactor.run.as.user", 
"hive.compactor.run.as.user", "",
 "Specify the user to run compactor Initiator and Worker as. If empty 
string, defaults to table/partition " +
 "directory owner."),
+COMPACTOR_USE_CUSTOM_POOL("metastore.compactor.use.custom.pool", 
"hive.compactor.use.custom.pool",
+false, "internal usage only -- use custom connection pool specific 
to compactor components."
+),
 COMPACTOR_OLDEST_REPLICATION_OPENTXN_THRESHOLD_WARNING(
 "metastore.compactor.oldest.replication.open.txn.threshold.warning",
 "hive.compactor.oldest.replication.open.txn.threshold.warning",
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 9b7d28bfd0..9b5ef82fdc 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -19,6 +19,7 @@
 package org.apac

[hive] branch master updated: HIVE-26176: Create a new connection pool for compaction (CompactionTxnHandler) (Antal Sinkovits, reviewed by Denys Kuzmenko and Peter Vary)

2022-04-28 Thread asinkovits
This is an automated email from the ASF dual-hosted git repository.

asinkovits pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 1db1da8042 HIVE-26176: Create a new connection pool for compaction 
(CompactionTxnHandler) (Antal Sinkovits, reviewed by Denys Kuzmenko and Peter 
Vary)
1db1da8042 is described below

commit 1db1da804271f65d4a63572cdd48f24425c1615c
Author: Antal Sinkovits 
AuthorDate: Thu Apr 28 12:33:24 2022 +0200

HIVE-26176: Create a new connection pool for compaction 
(CompactionTxnHandler) (Antal Sinkovits, reviewed by Denys Kuzmenko and Peter 
Vary)

Closes #3223
---
 .../hadoop/hive/metastore/conf/MetastoreConf.java  |  3 +
 .../metastore/datasource/DataSourceProvider.java   | 12 +++-
 .../datasource/DbCPDataSourceProvider.java |  5 +-
 .../datasource/HikariCPDataSourceProvider.java | 11 ++--
 .../hive/metastore/txn/CompactionTxnHandler.java   | 67 +-
 .../hadoop/hive/metastore/txn/TxnHandler.java  | 11 ++--
 6 files changed, 65 insertions(+), 44 deletions(-)

diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
index 6b337aab94..a2f0e17a75 100644
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
+++ 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
@@ -625,6 +625,9 @@ public class MetastoreConf {
 "hive.compactor.cleaner.retry.retentionTime", 300, 
TimeUnit.SECONDS, new TimeValidator(TimeUnit.SECONDS),
 "Initial value of the cleaner retry retention time. The delay has 
a backoff, and calculated the following way: " +
 "pow(2, number_of_failed_attempts) * 
HIVE_COMPACTOR_CLEANER_RETRY_RETENTION_TIME."),
+
HIVE_COMPACTOR_CONNECTION_POOLING_MAX_CONNECTIONS("metastore.compactor.connectionPool.maxPoolSize",
+"hive.compactor.connectionPool.maxPoolSize", 10,
+"Specify the maximum number of connections in the connection pool 
used by the compactor."),
 CONNECTION_DRIVER("javax.jdo.option.ConnectionDriverName",
 "javax.jdo.option.ConnectionDriverName", 
"org.apache.derby.jdbc.EmbeddedDriver",
 "Driver class name for a JDBC metastore"),
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DataSourceProvider.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DataSourceProvider.java
index 29633b500d..4e5803ead0 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DataSourceProvider.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DataSourceProvider.java
@@ -32,7 +32,17 @@ public interface DataSourceProvider {
* @param hdpConfig
* @return the new connection pool
*/
-  DataSource create(Configuration hdpConfig) throws SQLException;
+  default DataSource create(Configuration hdpConfig) throws SQLException {
+int maxPoolSize = MetastoreConf.getIntVar(hdpConfig, 
MetastoreConf.ConfVars.CONNECTION_POOLING_MAX_CONNECTIONS);
+return create(hdpConfig, maxPoolSize);
+  }
+
+  /**
+   * @param hdpConfig
+   * @param maxPoolSize the maximum size of the connection pool
+   * @return the new connection pool
+   */
+  DataSource create(Configuration hdpConfig, int maxPoolSize) throws 
SQLException;
 
   /**
* Get the declared pooling type string. This is used to check against the 
constant in
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java
index 4069a9350c..476a3d846f 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/datasource/DbCPDataSourceProvider.java
@@ -57,7 +57,7 @@ public class DbCPDataSourceProvider implements 
DataSourceProvider {
 
   @SuppressWarnings({ "rawtypes", "unchecked" })
   @Override
-  public DataSource create(Configuration hdpConfig) throws SQLException {
+  public DataSource create(Configuration hdpConfig, int maxPoolSize) throws 
SQLException {
 LOG.debug("Creating dbcp connection pool for the MetaStore");
 
 String driverUrl = DataSourceP

[hive] branch master updated: HIVE-25492: Major query-based compaction is skipped if partition is empty (Antal Sinkovits, reviewed by Denys Kuzmenko)

2022-04-11 Thread asinkovits
This is an automated email from the ASF dual-hosted git repository.

asinkovits pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 93c186e675 HIVE-25492: Major query-based compaction is skipped if 
partition is empty  (Antal Sinkovits, reviewed by Denys Kuzmenko)
93c186e675 is described below

commit 93c186e675bdeecd89322f58f15f9c62d4935ed3
Author: Antal Sinkovits 
AuthorDate: Mon Apr 11 12:12:54 2022 +0200

HIVE-25492: Major query-based compaction is skipped if partition is empty  
(Antal Sinkovits, reviewed by Denys Kuzmenko)

Closes #3157
---
 .../ql/txn/compactor/TestCrudCompactorOnTez.java   |  97 -
 .../org/apache/hadoop/hive/ql/io/AcidUtils.java|  68 --
 .../hadoop/hive/ql/txn/compactor/Cleaner.java  |   2 +-
 .../hive/ql/txn/compactor/MajorQueryCompactor.java |   2 +-
 .../apache/hadoop/hive/ql/TestTxnCommands2.java| 230 -
 5 files changed, 372 insertions(+), 27 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
index 8e75a793a0..f409ec2efe 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
@@ -28,9 +28,12 @@ import java.util.concurrent.atomic.AtomicBoolean;
 
 import com.google.common.collect.Lists;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.hive.common.ValidTxnList;
 import org.apache.hadoop.hive.common.ValidWriteIdList;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -48,7 +51,6 @@ import org.apache.hadoop.hive.metastore.txn.CompactionInfo;
 import org.apache.hadoop.hive.metastore.txn.TxnStore;
 import org.apache.hadoop.hive.metastore.txn.TxnUtils;
 import org.apache.hadoop.hive.ql.DriverFactory;
-import org.apache.hadoop.hive.ql.TxnCommandsBaseForTests;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook;
@@ -73,7 +75,6 @@ import 
org.apache.tez.dag.history.logging.proto.ProtoMessageReader;
 import org.junit.Assert;
 import org.junit.Test;
 import org.mockito.ArgumentCaptor;
-import org.mockito.Mockito;
 import org.mockito.internal.util.reflection.FieldSetter;
 
 import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.runWorker;
@@ -2209,4 +2210,96 @@ public class TestCrudCompactorOnTez extends 
CompactorOnTezTest {
 qc.runCompactionQueries(hiveConf, null, sdMock, null, ciMock, null, 
emptyQueries, emptyQueries, emptyQueries);
 Assert.assertEquals("none", 
hiveConf.getVar(HiveConf.ConfVars.LLAP_IO_ETL_SKIP_FORMAT));
   }
+
+  @Test
+  public void testIfEmptyBaseIsPresentAfterCompaction() throws Exception {
+String dbName = "default";
+String tblName = "empty_table";
+
+// Setup of LOAD INPATH scenario.
+executeStatementOnDriver("drop table if exists " + tblName, driver);
+executeStatementOnDriver("create table " + tblName + " (a string) stored 
as orc " +
+"TBLPROPERTIES ('transactional'='true')", driver);
+executeStatementOnDriver("insert into " + tblName + " values ('a')", 
driver);
+executeStatementOnDriver("delete from " + tblName + " where a='a'", 
driver);
+
+// Run a query-based MAJOR compaction
+CompactorTestUtil.runCompaction(conf, dbName, tblName, 
CompactionType.MAJOR, true);
+// Clean up resources
+CompactorTestUtil.runCleaner(conf);
+
+IMetaStoreClient hmsClient = new HiveMetaStoreClient(conf);
+Table table = hmsClient.getTable(dbName, tblName);
+FileSystem fs = FileSystem.get(conf);
+
+FileStatus[] fileStatuses = fs.listStatus(new 
Path(table.getSd().getLocation()));
+// There should be only dir
+Assert.assertEquals(1, fileStatuses.length);
+Path basePath = fileStatuses[0].getPath();
+// And it's a base
+Assert.assertTrue(AcidUtils.baseFileFilter.accept(basePath));
+RemoteIterator filesInBase = fs.listFiles(basePath, 
true);
+// It has no files in it
+Assert.assertFalse(filesInBase.hasNext());
+  }
+
+  @Test
+  public void testNonAcidToAcidConversionWithNestedTableWithUnionSubdir() 
throws Exception {
+String dbName = "default";
+
+// Helper table for the union all insert
+String helperTblName = "

[hive] branch master updated: HIVE-26060: Invalidate acid table directory cache on drop table (Antal Sinkovits, reviewed by Denys Kuzmenko)

2022-03-26 Thread asinkovits
This is an automated email from the ASF dual-hosted git repository.

asinkovits pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 1662a90  HIVE-26060: Invalidate acid table directory cache on drop 
table (Antal Sinkovits, reviewed by Denys Kuzmenko)
1662a90 is described below

commit 1662a902c98404946582709acba42de3252c4ac0
Author: Antal Sinkovits 
AuthorDate: Sat Mar 26 14:53:38 2022 +0100

HIVE-26060: Invalidate acid table directory cache on drop table (Antal 
Sinkovits, reviewed by Denys Kuzmenko)

Closes #3128
---
 .../hive/ql/txn/compactor/TestCompactor.java   | 78 ++
 .../org/apache/hadoop/hive/ql/io/AcidUtils.java| 17 -
 .../org/apache/hadoop/hive/ql/metadata/Hive.java   |  2 +
 3 files changed, 96 insertions(+), 1 deletion(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
index f2da316..16b5957 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
@@ -2517,6 +2517,84 @@ public class TestCompactor {
 Assert.assertEquals("2\t55\t66", valuesReadFromHiveDriver.get(1));
   }
 
+  @Test
+  public void testAcidDirCacheOnDropTable() throws Exception {
+int cacheDurationInMinutes = 10;
+AcidUtils.initDirCache(cacheDurationInMinutes);
+HiveConf.setBoolVar(conf, ConfVars.HIVE_COMPACTOR_GATHER_STATS, false);
+String dbName = "default";
+String tblName = "adc_table";
+
+// First phase, populate the cache
+executeStatementOnDriver("drop table if exists " + tblName, driver);
+executeStatementOnDriver("create table " + tblName + " (a string) stored 
as orc " +
+"TBLPROPERTIES ('transactional'='true', 
'hive.exec.orc.split.strategy'='BI')", driver);
+executeStatementOnDriver("insert into " + tblName + " values ('a')", 
driver);
+executeStatementOnDriver("insert into " + tblName + " values ('b')", 
driver);
+runMajorCompaction(dbName, tblName);
+runCleaner(conf);
+
+HiveConf.setIntVar(driver.getConf(), 
ConfVars.HIVE_TXN_ACID_DIR_CACHE_DURATION, cacheDurationInMinutes);
+executeStatementOnDriver("select * from " + tblName + " order by a", 
driver);
+
+// Second phase, the previous data should be cleaned
+executeStatementOnDriver("drop table if exists " + tblName, driver);
+executeStatementOnDriver("create table " + tblName + " (a string) stored 
as orc " +
+"TBLPROPERTIES ('transactional'='true', 
'hive.exec.orc.split.strategy'='BI')", driver);
+executeStatementOnDriver("insert into " + tblName + " values ('c')", 
driver);
+executeStatementOnDriver("insert into " + tblName + " values ('d')", 
driver);
+runMajorCompaction(dbName, tblName);
+runCleaner(conf);
+
+HiveConf.setIntVar(driver.getConf(), 
ConfVars.HIVE_TXN_ACID_DIR_CACHE_DURATION, cacheDurationInMinutes);
+List rs = execSelectAndDumpData("select * from " + tblName + " 
order by a", driver, "select");
+Assert.assertEquals(2, rs.size());
+Assert.assertEquals("c", rs.get(0));
+Assert.assertEquals("d", rs.get(1));
+  }
+
+  @Test
+  public void testAcidDirCacheOnDropPartitionedTable() throws Exception {
+int cacheDurationInMinutes = 10;
+AcidUtils.initDirCache(cacheDurationInMinutes);
+HiveConf.setBoolVar(conf, ConfVars.HIVE_COMPACTOR_GATHER_STATS, false);
+String dbName = "default";
+String tblName = "adc_part_table";
+
+// First phase, populate the cache
+executeStatementOnDriver("drop table if exists " + tblName, driver);
+executeStatementOnDriver("create table " + tblName + " (a string) 
PARTITIONED BY (p string) stored as orc " +
+"TBLPROPERTIES ('transactional'='true', 
'hive.exec.orc.split.strategy'='BI')", driver);
+executeStatementOnDriver("insert into " + tblName + " values ('a', 'p1')", 
driver);
+executeStatementOnDriver("insert into " + tblName + " values ('b', 'p1')", 
driver);
+executeStatementOnDriver("insert into " + tblName + " values ('a', 'p2')", 
driver);
+executeStatementOnDriver("insert into " + tblName + " values ('b', 'p2')", 
driver);
+runMajorCompaction(dbName, tblName, "p=p1", "p=p2");
+runCleaner(conf);
+
+HiveConf.setIntVar(driver.getConf(), 
ConfVars.

[hive] branch master updated: HIVE-26048: Missing quotation mark in findReadyToClean query (Antal Sinkovits, reviewed by Karen Coppage and Denys Kuzmenko)

2022-03-21 Thread asinkovits
This is an automated email from the ASF dual-hosted git repository.

asinkovits pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 79679a9  HIVE-26048: Missing quotation mark in findReadyToClean query 
(Antal Sinkovits, reviewed by Karen Coppage and Denys Kuzmenko)
79679a9 is described below

commit 79679a96349dbd0255dd126b588350436ec778ff
Author: Antal Sinkovits 
AuthorDate: Mon Mar 21 13:20:41 2022 +0100

HIVE-26048: Missing quotation mark in findReadyToClean query (Antal 
Sinkovits, reviewed by Karen Coppage and Denys Kuzmenko)

Closes #3118
---
 .../java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java
index 439795a..8585379 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java
@@ -353,7 +353,7 @@ class CompactionTxnHandler extends TxnHandler {
 if (minOpenTxnWaterMark > 0) {
   whereClause += " AND (\"CQ_NEXT_TXN_ID\" <= " + minOpenTxnWaterMark 
+ " OR \"CQ_NEXT_TXN_ID\" IS NULL)";
 }
-whereClause += " AND (\"CQ_COMMIT_TIME\" < (" + getEpochFn(dbProduct) 
+ " - CQ_RETRY_RETENTION - " + retentionTime + ") OR \"CQ_COMMIT_TIME\" IS 
NULL)";
+whereClause += " AND (\"CQ_COMMIT_TIME\" < (" + getEpochFn(dbProduct) 
+ " - \"CQ_RETRY_RETENTION\" - " + retentionTime + ") OR \"CQ_COMMIT_TIME\" IS 
NULL)";
 String s = "SELECT \"CQ_ID\", \"cq1\".\"CQ_DATABASE\", 
\"cq1\".\"CQ_TABLE\", \"cq1\".\"CQ_PARTITION\"," +
 "   \"CQ_TYPE\", \"CQ_RUN_AS\", \"CQ_HIGHEST_WRITE_ID\", 
\"CQ_TBLPROPERTIES\", \"CQ_RETRY_RETENTION\" " +
 "  FROM \"COMPACTION_QUEUE\" \"cq1\" " +


[hive] branch master updated (f15e67b -> 655006a)

2022-03-02 Thread asinkovits
This is an automated email from the ASF dual-hosted git repository.

asinkovits pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git.


from f15e67b  HIVE-24949: Fail to rename a partition with customized 
catalog (#2910) (Zhihua Deng reviewed by Zoltan Haindrich)
 add 655006a  HIVE-25986: Statement id is incorrect in case of load in path 
to MM table (Antal Sinkovits, reviewed by Peter Vary)

No new revisions were added by this update.

Summary of changes:
 .../apache/hadoop/hive/ql/TestLocationQueries.java | 49 ++
 .../java/org/apache/hadoop/hive/ql/QueryPlan.java  |  8 +++-
 .../org/apache/hadoop/hive/ql/exec/MoveTask.java   |  4 +-
 .../test/queries/clientpositive/acid_load_data.q   |  6 +++
 .../clientpositive/llap/acid_load_data.q.out   | 16 +++
 5 files changed, 79 insertions(+), 4 deletions(-)
 create mode 100644 ql/src/test/queries/clientpositive/acid_load_data.q
 create mode 100644 ql/src/test/results/clientpositive/llap/acid_load_data.q.out


[hive] branch master updated: HIVE-24805: Compactor: Initiator shouldn't fetch table details again and again for partitioned tables (Antal Sinkovits, reviewed by Denys Kuzmenko)

2022-01-19 Thread asinkovits
This is an automated email from the ASF dual-hosted git repository.

asinkovits pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 718e19e  HIVE-24805: Compactor: Initiator shouldn't fetch table 
details again and again for partitioned tables (Antal Sinkovits, reviewed by 
Denys Kuzmenko)
718e19e is described below

commit 718e19ecabf09c4e11b1a4addd9ab78f12187961
Author: Antal Sinkovits 
AuthorDate: Wed Jan 19 16:21:10 2022 +0100

HIVE-24805: Compactor: Initiator shouldn't fetch table details again and 
again for partitioned tables (Antal Sinkovits, reviewed by Denys Kuzmenko)

Closes  #2906
---
 .../hadoop/hive/ql/txn/compactor/Initiator.java| 29 --
 .../hive/ql/txn/compactor/TestInitiator.java   | 46 ++
 .../hadoop/hive/metastore/conf/MetastoreConf.java  |  3 ++
 3 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java
index 5b31d97..880e5e2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java
@@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.txn.compactor;
 
 import com.codahale.metrics.Counter;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
 import com.google.common.collect.Sets;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -68,7 +70,9 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.LongSummaryStatistics;
 import java.util.Map;
+import java.util.Optional;
 import java.util.Set;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.TimeUnit;
@@ -90,6 +94,7 @@ public class Initiator extends MetaStoreCompactorThread {
 
   private long checkInterval;
   private ExecutorService compactionExecutor;
+  private Optional> tableCache = Optional.empty();
 
   @Override
   public void run() {
@@ -142,6 +147,9 @@ public class Initiator extends MetaStoreCompactorThread {
 
   final ShowCompactResponse currentCompactions = 
txnHandler.showCompact(new ShowCompactRequest());
 
+  // Currently we invalidate all entries after each cycle, because the 
bootstrap replication is marked via
+  // table property hive.repl.first.inc.pending which would be cached.
+  tableCache.ifPresent(c -> c.invalidateAll());
   Set skipDBs = Sets.newConcurrentHashSet();
   Set skipTables = Sets.newConcurrentHashSet();
 
@@ -164,7 +172,7 @@ public class Initiator extends MetaStoreCompactorThread {
 
   for (CompactionInfo ci : potentials) {
 try {
-  Table t = resolveTable(ci);
+  Table t = resolveTableAndCache(ci);
   Partition p = resolvePartition(ci);
   if (p == null && ci.partName != null) {
 LOG.info("Can't find partition " + ci.getFullPartitionName() +
@@ -242,6 +250,17 @@ public class Initiator extends MetaStoreCompactorThread {
 }
   }
 
+  private Table resolveTableAndCache(CompactionInfo ci) throws MetaException {
+if (tableCache.isPresent()) {
+  try {
+return tableCache.get().get(ci.getFullTableName(), () -> 
resolveTable(ci));
+  } catch (ExecutionException e) {
+throw (MetaException) e.getCause();
+  }
+}
+return resolveTable(ci);
+  }
+
   private ValidWriteIdList resolveValidWriteIds(Table t) throws 
NoSuchTxnException, MetaException {
 ValidTxnList validTxnList = new 
ValidReadTxnList(conf.get(ValidTxnList.VALID_TXNS_KEY));
 // The response will have one entry per table and hence we get only one 
ValidWriteIdList
@@ -275,6 +294,11 @@ public class Initiator extends MetaStoreCompactorThread {
 compactionExecutor = CompactorUtil.createExecutorWithThreadFactory(
 conf.getIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_REQUEST_QUEUE),
 COMPACTOR_INTIATOR_THREAD_NAME_FORMAT);
+boolean tableCacheOn = MetastoreConf.getBoolVar(conf,
+MetastoreConf.ConfVars.COMPACTOR_INITIATOR_TABLECACHE_ON);
+if (tableCacheOn) {
+  this.tableCache = 
Optional.of(CacheBuilder.newBuilder().softValues().build());
+}
   }
 
   private void recoverFailedCompactions(boolean remoteOnly) throws 
MetaException {
@@ -524,8 +548,7 @@ public class Initiator extends MetaStoreCompactorThread {
 return false;
   }
 
-  //TODO: avoid repeated HMS lookup for same table (e.g partitions within 
table)
-  Table t = resolveTable(ci);
+  Table t = resolveTableAndCache(ci);
   if (t == null) {

[hive] branch master updated (5b112aa -> eabe8ae)

2022-01-17 Thread asinkovits
This is an automated email from the ASF dual-hosted git repository.

asinkovits pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git.


from 5b112aa  HIVE-24893: Add UploadData and DownloadData to 
TCLIService.thrift (#2878)
 add eabe8ae  HIVE-25862: Persist the time of last run of the initiator 
(Antal Sinkovits, reviewed by Denys Kuzmenko)

No new revisions were added by this update.

Summary of changes:
 .../hadoop/hive/ql/txn/compactor/Initiator.java|  8 ++--
 .../hadoop/hive/metastore/txn/TxnHandler.java  | 45 --
 .../apache/hadoop/hive/metastore/txn/TxnStore.java | 10 +
 3 files changed, 48 insertions(+), 15 deletions(-)


[hive] branch master updated: HIVE-25716: Fix of flaky test TestCompactionMetrics#testOldestReadyForCleaningAge (Viktor Csomor, reviewed by Denys Kuzmenko and Antal Sinkovits)

2021-12-09 Thread asinkovits
This is an automated email from the ASF dual-hosted git repository.

asinkovits pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 4566f07  HIVE-25716: Fix of flaky test 
TestCompactionMetrics#testOldestReadyForCleaningAge (Viktor Csomor, reviewed by 
Denys Kuzmenko and Antal Sinkovits)
4566f07 is described below

commit 4566f076a2b3d6d87b258570a373dd60f7152786
Author: Viktor Csomor 
AuthorDate: Thu Dec 9 11:15:45 2021 +0100

HIVE-25716: Fix of flaky test 
TestCompactionMetrics#testOldestReadyForCleaningAge (Viktor Csomor, reviewed by 
Denys Kuzmenko and Antal Sinkovits)

Closes #2837
---
 .../hive/ql/txn/compactor/CompactorTest.java   | 10 ++--
 .../ql/txn/compactor/TestCompactionMetrics.java| 60 ++
 2 files changed, 44 insertions(+), 26 deletions(-)

diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java 
b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
index 5dc01f9..c2b78ad 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
@@ -292,7 +292,8 @@ public abstract class CompactorTest {
 burnThroughTransactions(dbName, tblName, num, open, aborted, null);
   }
 
-  protected void burnThroughTransactions(String dbName, String tblName, int 
num, Set open, Set aborted, LockRequest lockReq)
+  protected void burnThroughTransactions(String dbName, String tblName, int 
num, Set open, Set aborted,
+  LockRequest lockReq)
   throws MetaException, NoSuchTxnException, TxnAbortedException {
 OpenTxnsResponse rsp = txnHandler.openTxns(new OpenTxnRequest(num, "me", 
"localhost"));
 AllocateTableWriteIdsRequest awiRqst = new 
AllocateTableWriteIdsRequest(dbName, tblName);
@@ -300,14 +301,15 @@ public abstract class CompactorTest {
 AllocateTableWriteIdsResponse awiResp = 
txnHandler.allocateTableWriteIds(awiRqst);
 int i = 0;
 for (long tid : rsp.getTxn_ids()) {
-  assert(awiResp.getTxnToWriteIds().get(i++).getTxnId() == tid);
-  if(lockReq != null) {
+  assert (awiResp.getTxnToWriteIds().get(i).getTxnId() == tid);
+  ++i;
+  if (lockReq != null) {
 lockReq.setTxnid(tid);
 txnHandler.lock(lockReq);
   }
   if (aborted != null && aborted.contains(tid)) {
 txnHandler.abortTxn(new AbortTxnRequest(tid));
-  } else if (open == null || (open != null && !open.contains(tid))) {
+  } else if (open == null || !open.contains(tid)) {
 txnHandler.commitTxn(new CommitTxnRequest(tid));
   }
 }
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCompactionMetrics.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCompactionMetrics.java
index 75c722b..eda09c1 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCompactionMetrics.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCompactionMetrics.java
@@ -179,41 +179,49 @@ public class TestCompactionMetrics  extends CompactorTest 
{
   }
 
   @Test
-  @org.junit.Ignore("HIVE-25716")
   public void testOldestReadyForCleaningAge() throws Exception {
 conf.setIntVar(HiveConf.ConfVars.COMPACTOR_MAX_NUM_DELTA, 1);
 
-long oldStart = System.currentTimeMillis();
-Table old = newTable("default", "old_rfc", true);
-Partition oldP = newPartition(old, "part");
+final String DB_NAME = "default";
+final String OLD_TABLE_NAME = "old_rfc";
+final String OLD_PARTITION_NAME = "part";
+final String YOUNG_TABLE_NAME = "young_rfc";
+final String YOUNG_PARTITION_NAME = "part";
+
+long oldTableStart = System.currentTimeMillis();
+Table old = newTable(DB_NAME, OLD_TABLE_NAME, true);
+Partition oldP = newPartition(old, OLD_PARTITION_NAME);
 addBaseFile(old, oldP, 20L, 20);
 addDeltaFile(old, oldP, 21L, 22L, 2);
 addDeltaFile(old, oldP, 23L, 24L, 2);
-burnThroughTransactions("default", "old_rfc", 25);
-CompactionRequest rqst = new CompactionRequest("default", "old_rfc", 
CompactionType.MINOR);
-rqst.setPartitionname("ds=part");
-txnHandler.compact(rqst);
-startWorker();
+burnThroughTransactions(DB_NAME, OLD_TABLE_NAME, 25);
+doCompaction(DB_NAME, OLD_TABLE_NAME, OLD_PARTITION_NAME, 
CompactionType.MINOR);
+long oldTableEnd = System.currentTimeMillis();
 
-long youngStart = System.currentTimeMillis();
-Table young = newTable("default", "young_rfc", true);
-Partition youngP = newPartition(young, "part");
+Table young = newTable(DB_NAME, YOUNG_TABLE_NAME, true);
+Partition youngP = newPartition(young, YOUNG_PA