[hive] branch master updated: HIVE-26242: Compaction heartbeater improvements (Laszlo Vegh, reviewed by Karen Coppage, Denys Kuzmenko)

2022-06-29 Thread dkuzmenko
This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new bc35507757 HIVE-26242: Compaction heartbeater improvements (Laszlo 
Vegh, reviewed by Karen Coppage, Denys Kuzmenko)
bc35507757 is described below

commit bc35507757c23a6da612a2dc4b840105aed2515c
Author: veghlaci05 <90267982+veghlac...@users.noreply.github.com>
AuthorDate: Wed Jun 29 13:16:59 2022 +0200

HIVE-26242: Compaction heartbeater improvements (Laszlo Vegh, reviewed by 
Karen Coppage, Denys Kuzmenko)

Closes #3303
---
 .../txn/compactor/CompactionHeartbeatService.java  | 217 +
 .../ql/txn/compactor/IMetaStoreClientFactory.java  |  64 ++
 .../hadoop/hive/ql/txn/compactor/Worker.java   |  84 ++--
 .../compactor/TestCompactionHeartbeatService.java  | 154 +++
 4 files changed, 446 insertions(+), 73 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactionHeartbeatService.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactionHeartbeatService.java
new file mode 100644
index 00..788955e35c
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactionHeartbeatService.java
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.txn.compactor;
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.commons.pool2.ObjectPool;
+import org.apache.commons.pool2.impl.GenericObjectPool;
+import org.apache.commons.pool2.impl.GenericObjectPoolConfig;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+import org.apache.hive.common.util.ShutdownHookManager;
+import org.apache.thrift.TException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.NoSuchElementException;
+import java.util.Objects;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import static 
org.apache.hive.common.util.HiveStringUtils.SHUTDOWN_HOOK_PRIORITY;
+
+/**
+ * Singleton service responsible for heartbeating the compaction transactions.
+ */
+class CompactionHeartbeatService {
+
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(CompactionHeartbeatService.class);
+
+  private static volatile CompactionHeartbeatService instance;
+
+  /**
+   * Return the singleton instance of this class.
+   * @param conf The {@link HiveConf} used to create the service. Used only 
during the firsst call
+   * @return Returns the singleton {@link CompactionHeartbeatService}
+   * @throws IllegalStateException Thrown when the service has already been 
destroyed.
+   */
+  static CompactionHeartbeatService getInstance(HiveConf conf) {
+if (instance == null) {
+  synchronized (CompactionHeartbeatService.class) {
+if (instance == null) {
+  LOG.debug("Initializing compaction txn heartbeater service.");
+  instance = new CompactionHeartbeatService(conf);
+  ShutdownHookManager.addShutdownHook(() -> instance.shutdown(), 
SHUTDOWN_HOOK_PRIORITY);
+}
+  }
+}
+if (instance.shuttingDown) {
+  throw new IllegalStateException("CompactionHeartbeatService is already 
destroyed!");
+}
+return instance;
+  }
+
+  private final ObjectPool clientPool;
+  private volatile boolean shuttingDown = false;
+  private final long initialDelay;
+  private final long period;
+  private final ConcurrentHashMap tasks = new 
ConcurrentHashMap<>(30);
+
+  /**
+   * Starts the heartbeat for the given transaction
+   * @param txnId The id of the compaction txn
+   * @param lockId The id of the lock associated with the txn
+   * @param tableName Required for logging only
+   * @throws IllegalStateException Thrown when the heartbeat for the given txn 
has already been started.
+   */
+  void startHeartbeat(long txnId, long lockId, String tableName) {
+if (shutti

[hive] branch master updated: HIVE-25976: Cleaner may remove files being accessed from a fetch-task-converted reader (Laszlo Vegh, reviewed by Peter Vary, Krisztian Kasa)

2022-06-29 Thread krisztiankasa
This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new f6906f2af1 HIVE-25976: Cleaner may remove files being accessed from a 
fetch-task-converted reader (Laszlo Vegh, reviewed by Peter Vary, Krisztian 
Kasa)
f6906f2af1 is described below

commit f6906f2af1e1feefc6f5b6a392307d0fb655bb1c
Author: veghlaci05 <90267982+veghlac...@users.noreply.github.com>
AuthorDate: Wed Jun 29 11:52:36 2022 +0200

HIVE-25976: Cleaner may remove files being accessed from a 
fetch-task-converted reader (Laszlo Vegh, reviewed by Peter Vary, Krisztian 
Kasa)
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   4 +
 .../mapreduce/TestHCatMultiOutputFormat.java   |   1 +
 .../iceberg/mr/hive/TestHiveIcebergTimeTravel.java |  25 ++--
 .../hive/minikdc/JdbcWithMiniKdcSQLAuthTest.java   |   1 +
 .../hive/minikdc/TestHs2HooksWithMiniKdc.java  |   1 +
 .../hive/minikdc/TestJdbcWithMiniKdcCookie.java|   2 +
 .../apache/hive/minikdc/TestSSLWithMiniKdc.java|   1 +
 .../parse/TestReplicationOnHDFSEncryptedZones.java |   7 +-
 .../hive/ql/txn/compactor/TestCompactor.java   |  40 -
 ql/src/java/org/apache/hadoop/hive/ql/Driver.java  |  26 ++--
 .../org/apache/hadoop/hive/ql/exec/FetchTask.java  | 166 +
 .../hive/ql/optimizer/SimpleFetchOptimizer.java|  34 +++--
 .../queries/clientpositive/nonmr_fetch_threshold.q |   1 +
 .../clientpositive/nonmr_fetch_threshold2.q|  16 ++
 .../clientnegative/cluster_tasklog_retrieval.q.out |   2 +-
 .../results/clientnegative/udf_assert_true.q.out   |   2 +-
 .../results/clientnegative/udf_assert_true2.q.out  |   2 +-
 .../results/clientnegative/udf_reflect_neg.q.out   |   2 +-
 .../results/clientnegative/udf_test_error.q.out|   2 +-
 .../llap/nonmr_fetch_threshold2.q.out  |  69 +
 20 files changed, 294 insertions(+), 110 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 67cfef75a3..50cfb85ba9 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3503,6 +3503,10 @@ public class HiveConf extends Configuration {
 "1. minimal : SELECT STAR, FILTER on partition columns, LIMIT only\n" +
 "2. more: SELECT, FILTER, LIMIT only (support TABLESAMPLE and 
virtual columns)"
 ),
+HIVEFETCHTASKCACHING("hive.fetch.task.caching", true,
+"Enabling the caching of the result of fetch tasks eliminates the 
chance of running into a failing read." +
+" On the other hand, if enabled, the 
hive.fetch.task.conversion.threshold must be adjusted accordingly. That" +
+" is 1GB by default which must be lowered in case of enabled 
caching to prevent the consumption of too much memory."),
 HIVEFETCHTASKCONVERSIONTHRESHOLD("hive.fetch.task.conversion.threshold", 
1073741824L,
 "Input threshold for applying hive.fetch.task.conversion. If target 
table is native, input length\n" +
 "is calculated by summation of file lengths. If it's not native, 
storage handler for the table\n" +
diff --git 
a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java
 
b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java
index c62d356ddd..93d1418afa 100644
--- 
a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java
+++ 
b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java
@@ -388,6 +388,7 @@ public class TestHCatMultiOutputFormat {
 conf.set("_hive.hdfs.session.path", "path");
 conf.set("_hive.local.session.path", "path");
 task.initialize(queryState, null, null, new 
org.apache.hadoop.hive.ql.Context(conf));
+task.execute();
 task.fetch(temp);
 for (String str : temp) {
   results.add(str.replace("\t", ","));
diff --git 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergTimeTravel.java
 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergTimeTravel.java
index 233f87a857..13cfd975f4 100644
--- 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergTimeTravel.java
+++ 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergTimeTravel.java
@@ -21,7 +21,6 @@ package org.apache.iceberg.mr.hive;
 
 import java.io.IOException;
 import java.util.List;
-import org.apache.iceberg.AssertHelpers;
 import org.apache.iceberg.HistoryEntry;
 import org.apache.iceberg.Table;
 import org.junit.Assert;
@@ -50,10 +49,14 @@ public class TestHiveIcebergTimeTravel extends 
HiveIcebergStorageHandlerWithEngi
 
 Assert.assertEq

[hive] branch master updated: Disable flaky test

2022-06-29 Thread pvary
This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 56c336268e Disable flaky test
56c336268e is described below

commit 56c336268ea8c281d23c22d89271af37cb7e2572
Author: Peter Vary 
AuthorDate: Wed Jun 29 09:32:45 2022 +0200

Disable flaky test
---
 ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java | 1 +
 1 file changed, 1 insertion(+)

diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java
index 5d5f68700c..8ce58bb45c 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java
@@ -1260,6 +1260,7 @@ public class TestWorkloadManager {
 assertEquals("B", sessionA4.getPoolName());
   }
 
+  @org.junit.Ignore("HIVE-26364")
   @Test(timeout=1)
   public void testAsyncSessionInitFailures() throws Exception {
 final HiveConf conf = createConf();



[hive] branch master updated: HIVE-26358: Querying metadata tables does not work for Iceberg tables using HADOOP_TABLE (Peter Vary reviewed by Laszlo Pinter) (#3408)

2022-06-29 Thread pvary
This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new ba87754a94 HIVE-26358: Querying metadata tables does not work for 
Iceberg tables using HADOOP_TABLE (Peter Vary reviewed by Laszlo Pinter) (#3408)
ba87754a94 is described below

commit ba87754a942912928d9d59fb94db307ef85808b2
Author: pvary 
AuthorDate: Wed Jun 29 09:16:52 2022 +0200

HIVE-26358: Querying metadata tables does not work for Iceberg tables using 
HADOOP_TABLE (Peter Vary reviewed by Laszlo Pinter) (#3408)
---
 .../java/org/apache/iceberg/mr/hive/IcebergTableUtil.java  |  4 
 .../org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java | 14 ++
 .../java/org/apache/hadoop/hive/ql/stats/StatsUtils.java   |  3 ++-
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
index 6e471f7be3..3fe2eee39d 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
@@ -77,8 +77,12 @@ public class IcebergTableUtil {
   static Table getTable(Configuration configuration, Properties properties) {
 String metaTable = properties.getProperty("metaTable");
 String tableName = properties.getProperty(Catalogs.NAME);
+String location = properties.getProperty(Catalogs.LOCATION);
 if (metaTable != null) {
+  // HiveCatalog, HadoopCatalog uses NAME to identify the metadata table
   properties.setProperty(Catalogs.NAME, tableName + "." + metaTable);
+  // HadoopTable uses LOCATION to identify the metadata table
+  properties.setProperty(Catalogs.LOCATION, location + "#" + metaTable);
 }
 
 String tableIdentifier = properties.getProperty(Catalogs.NAME);
diff --git 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java
 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java
index a9c692d12e..6ab6e3e4ff 100644
--- 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java
+++ 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java
@@ -24,6 +24,7 @@ import java.util.List;
 import java.util.stream.Collectors;
 import org.apache.iceberg.FileFormat;
 import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
 import org.apache.iceberg.catalog.TableIdentifier;
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.mr.InputFormatConfig;
@@ -263,4 +264,17 @@ public class TestHiveIcebergSelects extends 
HiveIcebergStorageHandlerWithEngineB
 Assert.assertEquals(2, result.size());
 
   }
+
+  @Test
+  public void testHistory() throws IOException, InterruptedException {
+TableIdentifier identifier = TableIdentifier.of("default", "source");
+Table table = testTables.createTableWithVersions(shell, identifier.name(),
+HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat,
+HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, 1);
+List history = shell.executeStatement("SELECT snapshot_id FROM 
default.source.history");
+Assert.assertEquals(table.history().size(), history.size());
+for (int i = 0; i < table.history().size(); ++i) {
+  Assert.assertEquals(table.history().get(i).snapshotId(), 
history.get(i)[0]);
+}
+  }
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 56b3843c00..f493bfebc6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -262,6 +262,7 @@ public class StatsUtils {
 boolean fetchColStats =
 HiveConf.getBoolVar(conf, 
HiveConf.ConfVars.HIVE_STATS_FETCH_COLUMN_STATS);
 boolean estimateStats = HiveConf.getBoolVar(conf, 
ConfVars.HIVE_STATS_ESTIMATE_STATS);
+boolean metaTable = table.getMetaTable() != null;
 
 if (!table.isPartitioned()) {
 
@@ -285,7 +286,7 @@ public class StatsUtils {
 
   long numErasureCodedFiles = getErasureCodedFiles(table);
 
-  if (needColStats) {
+  if (needColStats && !metaTable) {
 colStats = getTableColumnStats(table, schema, neededColumns, 
colStatsCache, fetchColStats);
 if (estimateStats) {
   estimateStatsForMissingCols(neededColumns, colStats, table, conf, 
nr, schema);