This is an automated email from the ASF dual-hosted git repository. szita pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 7b600fe HIVE-25563: Iceberg table operations hang a long time if metadata is missing/corrupted (Adam Szita, reviewed by Marton Bod) 7b600fe is described below commit 7b600fe38f03b9790b193171a65e57f6a6970820 Author: Adam Szita <40628386+sz...@users.noreply.github.com> AuthorDate: Mon Oct 4 10:21:08 2021 +0200 HIVE-25563: Iceberg table operations hang a long time if metadata is missing/corrupted (Adam Szita, reviewed by Marton Bod) --- .../src/java/org/apache/hadoop/hive/conf/HiveConf.java | 4 ++++ .../org/apache/iceberg/hive/HiveTableOperations.java | 4 +++- .../org/apache/iceberg/hive/HiveTableBaseTest.java | 2 +- .../java/org/apache/iceberg/hive/HiveTableTest.java | 18 ++++++++++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 903a803..cf96fff 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -5593,6 +5593,10 @@ public class HiveConf extends Configuration { HIVE_SERVER2_ICEBERG_METADATA_GENERATOR_THREADS("hive.server2.iceberg.metadata.generator.threads", 10, "Number of threads used to scan partition directories for data files and update/generate iceberg metadata"), + HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES("hive.iceberg.metadata.refresh.max.retries", 2, + "Max retry count for trying to access the metadata location in order to refresh metadata during " + + " Iceberg table load."), + /* BLOBSTORE section */ HIVE_BLOBSTORE_SUPPORTED_SCHEMES("hive.blobstore.supported.schemes", "s3,s3a,s3n", diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index d7533e0..386d9ff 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -32,6 +32,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; @@ -180,7 +181,8 @@ public class HiveTableOperations extends BaseMetastoreTableOperations { throw new RuntimeException("Interrupted during refresh", e); } - refreshFromMetadataLocation(metadataLocation); + refreshFromMetadataLocation(metadataLocation, HiveConf.getIntVar(conf, + HiveConf.ConfVars.HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES)); } @SuppressWarnings("checkstyle:CyclomaticComplexity") diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableBaseTest.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableBaseTest.java index 76f2192..b584b9e 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableBaseTest.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableBaseTest.java @@ -79,7 +79,7 @@ public class HiveTableBaseTest extends HiveMetastoreTest { return getTableLocationPath(tableName).toString(); } - private static String metadataLocation(String tableName) { + protected static String metadataLocation(String tableName) { return Paths.get(getTableBasePath(tableName), "metadata").toString(); } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java index fa67e5d..6804fc8 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java @@ -26,6 +26,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.SerDeInfo; @@ -48,6 +49,7 @@ import org.apache.iceberg.avro.AvroSchemaUtil; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.hadoop.ConfigProperties; import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -434,6 +436,22 @@ public class HiveTableTest extends HiveTableBaseTest { assertHiveEnabled(hmsTable, false); } + @Test(timeout = 60000, expected = NotFoundException.class) + public void testMissingMetadataWontCauseHang() throws Exception { + catalog.loadTable(TABLE_IDENTIFIER); + HiveConf.setIntVar(catalog.getConf(), HiveConf.ConfVars.HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES, 3); + + File realLocation = new File(metadataLocation(TABLE_NAME)); + File fakeLocation = new File(metadataLocation(TABLE_NAME) + "_dummy"); + realLocation.renameTo(fakeLocation); + + try { + catalog.loadTable(TABLE_IDENTIFIER); + } finally { + realLocation.renameTo(realLocation); + } + } + private void assertHiveEnabled(org.apache.hadoop.hive.metastore.api.Table hmsTable, boolean expected) { if (expected) { Assert.assertEquals("org.apache.iceberg.mr.hive.HiveIcebergStorageHandler",