[hive] branch master updated: HIVE-26242: Compaction heartbeater improvements (Laszlo Vegh, reviewed by Karen Coppage, Denys Kuzmenko)
This is an automated email from the ASF dual-hosted git repository. dkuzmenko pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new bc35507757 HIVE-26242: Compaction heartbeater improvements (Laszlo Vegh, reviewed by Karen Coppage, Denys Kuzmenko) bc35507757 is described below commit bc35507757c23a6da612a2dc4b840105aed2515c Author: veghlaci05 <90267982+veghlac...@users.noreply.github.com> AuthorDate: Wed Jun 29 13:16:59 2022 +0200 HIVE-26242: Compaction heartbeater improvements (Laszlo Vegh, reviewed by Karen Coppage, Denys Kuzmenko) Closes #3303 --- .../txn/compactor/CompactionHeartbeatService.java | 217 + .../ql/txn/compactor/IMetaStoreClientFactory.java | 64 ++ .../hadoop/hive/ql/txn/compactor/Worker.java | 84 ++-- .../compactor/TestCompactionHeartbeatService.java | 154 +++ 4 files changed, 446 insertions(+), 73 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactionHeartbeatService.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactionHeartbeatService.java new file mode 100644 index 00..788955e35c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactionHeartbeatService.java @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.txn.compactor; + +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.commons.pool2.ObjectPool; +import org.apache.commons.pool2.impl.GenericObjectPool; +import org.apache.commons.pool2.impl.GenericObjectPoolConfig; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hive.common.util.ShutdownHookManager; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.NoSuchElementException; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import static org.apache.hive.common.util.HiveStringUtils.SHUTDOWN_HOOK_PRIORITY; + +/** + * Singleton service responsible for heartbeating the compaction transactions. + */ +class CompactionHeartbeatService { + + + private static final Logger LOG = LoggerFactory.getLogger(CompactionHeartbeatService.class); + + private static volatile CompactionHeartbeatService instance; + + /** + * Return the singleton instance of this class. + * @param conf The {@link HiveConf} used to create the service. Used only during the firsst call + * @return Returns the singleton {@link CompactionHeartbeatService} + * @throws IllegalStateException Thrown when the service has already been destroyed. + */ + static CompactionHeartbeatService getInstance(HiveConf conf) { +if (instance == null) { + synchronized (CompactionHeartbeatService.class) { +if (instance == null) { + LOG.debug("Initializing compaction txn heartbeater service."); + instance = new CompactionHeartbeatService(conf); + ShutdownHookManager.addShutdownHook(() -> instance.shutdown(), SHUTDOWN_HOOK_PRIORITY); +} + } +} +if (instance.shuttingDown) { + throw new IllegalStateException("CompactionHeartbeatService is already destroyed!"); +} +return instance; + } + + private final ObjectPool clientPool; + private volatile boolean shuttingDown = false; + private final long initialDelay; + private final long period; + private final ConcurrentHashMap tasks = new ConcurrentHashMap<>(30); + + /** + * Starts the heartbeat for the given transaction + * @param txnId The id of the compaction txn + * @param lockId The id of the lock associated with the txn + * @param tableName Required for logging only + * @throws IllegalStateException Thrown when the heartbeat for the given txn has already been started. + */ + void startHeartbeat(long txnId, long lockId, String tableName) { +if (shutti
[hive] branch master updated: HIVE-25976: Cleaner may remove files being accessed from a fetch-task-converted reader (Laszlo Vegh, reviewed by Peter Vary, Krisztian Kasa)
This is an automated email from the ASF dual-hosted git repository. krisztiankasa pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new f6906f2af1 HIVE-25976: Cleaner may remove files being accessed from a fetch-task-converted reader (Laszlo Vegh, reviewed by Peter Vary, Krisztian Kasa) f6906f2af1 is described below commit f6906f2af1e1feefc6f5b6a392307d0fb655bb1c Author: veghlaci05 <90267982+veghlac...@users.noreply.github.com> AuthorDate: Wed Jun 29 11:52:36 2022 +0200 HIVE-25976: Cleaner may remove files being accessed from a fetch-task-converted reader (Laszlo Vegh, reviewed by Peter Vary, Krisztian Kasa) --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 4 + .../mapreduce/TestHCatMultiOutputFormat.java | 1 + .../iceberg/mr/hive/TestHiveIcebergTimeTravel.java | 25 ++-- .../hive/minikdc/JdbcWithMiniKdcSQLAuthTest.java | 1 + .../hive/minikdc/TestHs2HooksWithMiniKdc.java | 1 + .../hive/minikdc/TestJdbcWithMiniKdcCookie.java| 2 + .../apache/hive/minikdc/TestSSLWithMiniKdc.java| 1 + .../parse/TestReplicationOnHDFSEncryptedZones.java | 7 +- .../hive/ql/txn/compactor/TestCompactor.java | 40 - ql/src/java/org/apache/hadoop/hive/ql/Driver.java | 26 ++-- .../org/apache/hadoop/hive/ql/exec/FetchTask.java | 166 + .../hive/ql/optimizer/SimpleFetchOptimizer.java| 34 +++-- .../queries/clientpositive/nonmr_fetch_threshold.q | 1 + .../clientpositive/nonmr_fetch_threshold2.q| 16 ++ .../clientnegative/cluster_tasklog_retrieval.q.out | 2 +- .../results/clientnegative/udf_assert_true.q.out | 2 +- .../results/clientnegative/udf_assert_true2.q.out | 2 +- .../results/clientnegative/udf_reflect_neg.q.out | 2 +- .../results/clientnegative/udf_test_error.q.out| 2 +- .../llap/nonmr_fetch_threshold2.q.out | 69 + 20 files changed, 294 insertions(+), 110 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 67cfef75a3..50cfb85ba9 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3503,6 +3503,10 @@ public class HiveConf extends Configuration { "1. minimal : SELECT STAR, FILTER on partition columns, LIMIT only\n" + "2. more: SELECT, FILTER, LIMIT only (support TABLESAMPLE and virtual columns)" ), +HIVEFETCHTASKCACHING("hive.fetch.task.caching", true, +"Enabling the caching of the result of fetch tasks eliminates the chance of running into a failing read." + +" On the other hand, if enabled, the hive.fetch.task.conversion.threshold must be adjusted accordingly. That" + +" is 1GB by default which must be lowered in case of enabled caching to prevent the consumption of too much memory."), HIVEFETCHTASKCONVERSIONTHRESHOLD("hive.fetch.task.conversion.threshold", 1073741824L, "Input threshold for applying hive.fetch.task.conversion. If target table is native, input length\n" + "is calculated by summation of file lengths. If it's not native, storage handler for the table\n" + diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java index c62d356ddd..93d1418afa 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java @@ -388,6 +388,7 @@ public class TestHCatMultiOutputFormat { conf.set("_hive.hdfs.session.path", "path"); conf.set("_hive.local.session.path", "path"); task.initialize(queryState, null, null, new org.apache.hadoop.hive.ql.Context(conf)); +task.execute(); task.fetch(temp); for (String str : temp) { results.add(str.replace("\t", ",")); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergTimeTravel.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergTimeTravel.java index 233f87a857..13cfd975f4 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergTimeTravel.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergTimeTravel.java @@ -21,7 +21,6 @@ package org.apache.iceberg.mr.hive; import java.io.IOException; import java.util.List; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.HistoryEntry; import org.apache.iceberg.Table; import org.junit.Assert; @@ -50,10 +49,14 @@ public class TestHiveIcebergTimeTravel extends HiveIcebergStorageHandlerWithEngi Assert.assertEq
[hive] branch master updated: Disable flaky test
This is an automated email from the ASF dual-hosted git repository. pvary pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 56c336268e Disable flaky test 56c336268e is described below commit 56c336268ea8c281d23c22d89271af37cb7e2572 Author: Peter Vary AuthorDate: Wed Jun 29 09:32:45 2022 +0200 Disable flaky test --- ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java | 1 + 1 file changed, 1 insertion(+) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java index 5d5f68700c..8ce58bb45c 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestWorkloadManager.java @@ -1260,6 +1260,7 @@ public class TestWorkloadManager { assertEquals("B", sessionA4.getPoolName()); } + @org.junit.Ignore("HIVE-26364") @Test(timeout=1) public void testAsyncSessionInitFailures() throws Exception { final HiveConf conf = createConf();
[hive] branch master updated: HIVE-26358: Querying metadata tables does not work for Iceberg tables using HADOOP_TABLE (Peter Vary reviewed by Laszlo Pinter) (#3408)
This is an automated email from the ASF dual-hosted git repository. pvary pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new ba87754a94 HIVE-26358: Querying metadata tables does not work for Iceberg tables using HADOOP_TABLE (Peter Vary reviewed by Laszlo Pinter) (#3408) ba87754a94 is described below commit ba87754a942912928d9d59fb94db307ef85808b2 Author: pvary AuthorDate: Wed Jun 29 09:16:52 2022 +0200 HIVE-26358: Querying metadata tables does not work for Iceberg tables using HADOOP_TABLE (Peter Vary reviewed by Laszlo Pinter) (#3408) --- .../java/org/apache/iceberg/mr/hive/IcebergTableUtil.java | 4 .../org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java | 14 ++ .../java/org/apache/hadoop/hive/ql/stats/StatsUtils.java | 3 ++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java index 6e471f7be3..3fe2eee39d 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java @@ -77,8 +77,12 @@ public class IcebergTableUtil { static Table getTable(Configuration configuration, Properties properties) { String metaTable = properties.getProperty("metaTable"); String tableName = properties.getProperty(Catalogs.NAME); +String location = properties.getProperty(Catalogs.LOCATION); if (metaTable != null) { + // HiveCatalog, HadoopCatalog uses NAME to identify the metadata table properties.setProperty(Catalogs.NAME, tableName + "." + metaTable); + // HadoopTable uses LOCATION to identify the metadata table + properties.setProperty(Catalogs.LOCATION, location + "#" + metaTable); } String tableIdentifier = properties.getProperty(Catalogs.NAME); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java index a9c692d12e..6ab6e3e4ff 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.stream.Collectors; import org.apache.iceberg.FileFormat; import org.apache.iceberg.Schema; +import org.apache.iceberg.Table; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.data.Record; import org.apache.iceberg.mr.InputFormatConfig; @@ -263,4 +264,17 @@ public class TestHiveIcebergSelects extends HiveIcebergStorageHandlerWithEngineB Assert.assertEquals(2, result.size()); } + + @Test + public void testHistory() throws IOException, InterruptedException { +TableIdentifier identifier = TableIdentifier.of("default", "source"); +Table table = testTables.createTableWithVersions(shell, identifier.name(), +HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, +HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, 1); +List history = shell.executeStatement("SELECT snapshot_id FROM default.source.history"); +Assert.assertEquals(table.history().size(), history.size()); +for (int i = 0; i < table.history().size(); ++i) { + Assert.assertEquals(table.history().get(i).snapshotId(), history.get(i)[0]); +} + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 56b3843c00..f493bfebc6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -262,6 +262,7 @@ public class StatsUtils { boolean fetchColStats = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_COLUMN_STATS); boolean estimateStats = HiveConf.getBoolVar(conf, ConfVars.HIVE_STATS_ESTIMATE_STATS); +boolean metaTable = table.getMetaTable() != null; if (!table.isPartitioned()) { @@ -285,7 +286,7 @@ public class StatsUtils { long numErasureCodedFiles = getErasureCodedFiles(table); - if (needColStats) { + if (needColStats && !metaTable) { colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache, fetchColStats); if (estimateStats) { estimateStatsForMissingCols(neededColumns, colStats, table, conf, nr, schema);