This is an automated email from the ASF dual-hosted git repository. dkuzmenko pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new e7c034fb6df HIVE-28421: Iceberg: mvn test can not run UTs in iceberg-catalog (Butao Zhang, reviewed by Denys Kuzmenko) e7c034fb6df is described below commit e7c034fb6df3b84bd3cb489740c821a980b1b478 Author: Butao Zhang <zhangbu...@cmss.chinamobile.com> AuthorDate: Sun Aug 25 16:12:16 2024 +0800 HIVE-28421: Iceberg: mvn test can not run UTs in iceberg-catalog (Butao Zhang, reviewed by Denys Kuzmenko) Closes #5376 --- iceberg/iceberg-catalog/pom.xml | 2 +- .../apache/iceberg/hive/HiveOperationsBase.java | 9 ---- .../apache/iceberg/hive/HiveTableOperations.java | 20 ++++---- .../test/java/org/apache/iceberg/TestHelpers.java | 55 +++++++++++++++------- .../iceberg/hive/TestHiveTableConcurrency.java | 2 +- .../iceberg/mr/hive/HiveIcebergMetaHook.java | 12 ++++- .../iceberg/mr/hive/TestConflictingDataFiles.java | 23 +++++++-- iceberg/pom.xml | 11 ----- pom.xml | 2 +- 9 files changed, 80 insertions(+), 56 deletions(-) diff --git a/iceberg/iceberg-catalog/pom.xml b/iceberg/iceberg-catalog/pom.xml index 02f617407b3..dd6848d43c3 100644 --- a/iceberg/iceberg-catalog/pom.xml +++ b/iceberg/iceberg-catalog/pom.xml @@ -84,7 +84,7 @@ </dependency> <dependency> <groupId>org.junit.jupiter</groupId> - <artifactId>junit-jupiter-api</artifactId> + <artifactId>junit-jupiter-engine</artifactId> <scope>test</scope> </dependency> <dependency> diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveOperationsBase.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveOperationsBase.java index 976c6bac2c5..a160449da2c 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveOperationsBase.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveOperationsBase.java @@ -101,15 +101,6 @@ public interface HiveOperationsBase { "Not an iceberg table: %s (type=%s)", fullName, tableType); } - static boolean isHiveIcebergStorageHandler(String storageHandler) { - try { - Class<?> storageHandlerClass = Class.forName(storageHandler); - return Class.forName(HIVE_ICEBERG_STORAGE_HANDLER).isAssignableFrom(storageHandlerClass); - } catch (ClassNotFoundException e) { - throw new RuntimeException("Error checking storage handler class", e); - } - } - default void persistTable(Table hmsTable, boolean updateHiveTable, String metadataLocation) throws TException, InterruptedException { if (updateHiveTable) { diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 1249b11ed60..bceded8081a 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -338,16 +338,7 @@ public class HiveTableOperations extends BaseMetastoreTableOperations parameters.put(PREVIOUS_METADATA_LOCATION_PROP, currentMetadataLocation()); } - // If needed set the 'storage_handler' property to enable query from Hive - if (hiveEngineEnabled) { - String storageHandler = parameters.get(hive_metastoreConstants.META_TABLE_STORAGE); - // Check if META_TABLE_STORAGE is not present or is not an instance of ICEBERG_STORAGE_HANDLER - if (storageHandler == null || !HiveOperationsBase.isHiveIcebergStorageHandler(storageHandler)) { - parameters.put(hive_metastoreConstants.META_TABLE_STORAGE, HIVE_ICEBERG_STORAGE_HANDLER); - } - } else { - parameters.remove(hive_metastoreConstants.META_TABLE_STORAGE); - } + setStorageHandler(parameters, hiveEngineEnabled); // Set the basic statistics if (summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP) != null) { @@ -368,6 +359,15 @@ public class HiveTableOperations extends BaseMetastoreTableOperations tbl.setParameters(parameters); } + private static void setStorageHandler(Map<String, String> parameters, boolean hiveEngineEnabled) { + // If needed set the 'storage_handler' property to enable query from Hive + if (hiveEngineEnabled) { + parameters.put(hive_metastoreConstants.META_TABLE_STORAGE, HiveOperationsBase.HIVE_ICEBERG_STORAGE_HANDLER); + } else { + parameters.remove(hive_metastoreConstants.META_TABLE_STORAGE); + } + } + @VisibleForTesting void setSnapshotStats(TableMetadata metadata, Map<String, String> parameters) { parameters.remove(TableProperties.CURRENT_SNAPSHOT_ID); diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/TestHelpers.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/TestHelpers.java index 085c95b7ce2..d47b0c013b9 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/TestHelpers.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/TestHelpers.java @@ -28,11 +28,13 @@ import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.stream.IntStream; import org.apache.iceberg.expressions.BoundPredicate; import org.apache.iceberg.expressions.BoundSetPredicate; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.ExpressionVisitors; import org.apache.iceberg.expressions.UnboundPredicate; +import org.assertj.core.api.Assertions; import org.junit.Assert; public class TestHelpers { @@ -84,23 +86,22 @@ public class TestHelpers { } } - // commented out because the schemaId() method was only added to org.apache.iceberg.Schema after the 0.11.0 release -// public static void assertSameSchemaList(List<Schema> list1, List<Schema> list2) { -// if (list1.size() != list2.size()) { -// Assert.fail("Should have same number of schemas in both lists"); -// } -// -// IntStream.range(0, list1.size()).forEach( -// index -> { -// Schema schema1 = list1.get(index); -// Schema schema2 = list2.get(index); -// Assert.assertEquals("Should have matching schema id", -// schema1.schemaId(), schema2.schemaId()); -// Assert.assertEquals("Should have matching schema struct", -// schema1.asStruct(), schema2.asStruct()); -// } -// ); -// } + public static void assertSameSchemaList(List<Schema> list1, List<Schema> list2) { + Assertions.assertThat(list1) + .as("Should have same number of schemas in both lists") + .hasSameSizeAs(list2); + + IntStream.range(0, list1.size()) + .forEach( + index -> { + Schema schema1 = list1.get(index); + Schema schema2 = list2.get(index); + Assert.assertEquals( + "Should have matching schema id", schema1.schemaId(), schema2.schemaId()); + Assert.assertEquals( + "Should have matching schema struct", schema1.asStruct(), schema2.asStruct()); + }); + } public static void assertSerializedMetadata(Table expected, Table actual) { Assert.assertEquals("Name must match", expected.name(), actual.name()); @@ -120,6 +121,26 @@ public class TestHelpers { Assert.assertEquals("History must match", expected.history(), actual.history()); } + public static void assertSameSchemaMap(Map<Integer, Schema> map1, Map<Integer, Schema> map2) { + Assertions.assertThat(map1) + .as("Should have same number of schemas in both maps") + .hasSameSizeAs(map2); + + map1.forEach( + (schemaId, schema1) -> { + Schema schema2 = map2.get(schemaId); + Assert.assertNotNull( + String.format("Schema ID %s does not exist in map: %s", schemaId, map2), schema2); + + Assert.assertEquals( + "Should have matching schema id", schema1.schemaId(), schema2.schemaId()); + Assert.assertTrue( + String.format( + "Should be the same schema. Schema 1: %s, schema 2: %s", schema1, schema2), + schema1.sameSchema(schema2)); + }); + } + private static class CheckReferencesBound extends ExpressionVisitors.ExpressionVisitor<Void> { private final String message; diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveTableConcurrency.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveTableConcurrency.java index 5b0fd740f00..b2e41bfd446 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveTableConcurrency.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveTableConcurrency.java @@ -103,6 +103,6 @@ public class TestHiveTableConcurrency extends HiveTableBaseTest { executorService.shutdown(); assertThat(executorService.awaitTermination(3, TimeUnit.MINUTES)).as("Timeout").isTrue(); - assertThat(icebergTable.currentSnapshot().allManifests(icebergTable.io())).hasSize(20); + assertThat(icebergTable.snapshots()).hasSize(7); } } diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java index 36e26c9459c..36905ffb94f 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java @@ -105,7 +105,6 @@ import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.expressions.ResidualEvaluator; import org.apache.iceberg.hive.CachedClientPool; -import org.apache.iceberg.hive.HiveOperationsBase; import org.apache.iceberg.hive.HiveSchemaUtil; import org.apache.iceberg.hive.HiveTableOperations; import org.apache.iceberg.hive.MetastoreLock; @@ -1075,7 +1074,7 @@ public class HiveIcebergMetaHook implements HiveMetaHook { hmsTable.getSd().getSerdeInfo().setSerializationLib(HiveIcebergSerDe.class.getName()); String storageHandler = hmsTable.getParameters().get(hive_metastoreConstants.META_TABLE_STORAGE); // Check if META_TABLE_STORAGE is not present or is not an instance of ICEBERG_STORAGE_HANDLER - if (storageHandler == null || !HiveOperationsBase.isHiveIcebergStorageHandler(storageHandler)) { + if (storageHandler == null || !isHiveIcebergStorageHandler(storageHandler)) { hmsTable.getParameters() .put(hive_metastoreConstants.META_TABLE_STORAGE, HiveTableOperations.HIVE_ICEBERG_STORAGE_HANDLER); } @@ -1085,6 +1084,15 @@ public class HiveIcebergMetaHook implements HiveMetaHook { } } + private static boolean isHiveIcebergStorageHandler(String storageHandler) { + try { + Class<?> storageHandlerClass = Class.forName(storageHandler); + return Class.forName(HIVE_ICEBERG_STORAGE_HANDLER).isAssignableFrom(storageHandlerClass); + } catch (ClassNotFoundException e) { + throw new RuntimeException("Error checking storage handler class", e); + } + } + @Override public void preDropPartitions(org.apache.hadoop.hive.metastore.api.Table hmsTable, EnvironmentContext context, diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestConflictingDataFiles.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestConflictingDataFiles.java index 1ac1a74eb3f..b107041105d 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestConflictingDataFiles.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestConflictingDataFiles.java @@ -20,8 +20,10 @@ package org.apache.iceberg.mr.hive; +import java.lang.reflect.Method; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.concurrent.Executors; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.iceberg.FileFormat; @@ -29,6 +31,7 @@ import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.data.Record; import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.hive.HiveTableOperations; import org.apache.iceberg.mr.TestHelper; import org.apache.iceberg.relocated.com.google.common.base.Throwables; import org.apache.iceberg.util.Tasks; @@ -37,22 +40,34 @@ import org.junit.Assert; import org.junit.Assume; import org.junit.Before; import org.junit.Test; +import org.mockito.MockedStatic; import static org.apache.iceberg.mr.hive.HiveIcebergStorageHandlerTestUtils.init; +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.CALLS_REAL_METHODS; +import static org.mockito.Mockito.mockStatic; public class TestConflictingDataFiles extends HiveIcebergStorageHandlerWithEngineBase { private final String storageHandlerStub = "'org.apache.iceberg.mr.hive.HiveIcebergStorageHandlerStub'"; @Before - public void setUpTables() { + public void setUpTables() throws NoSuchMethodException { PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("last_name") .bucket("customer_id", 16).build(); - // create and insert an initial batch of records - testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat, - HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2, 2, Collections.emptyMap(), storageHandlerStub); + Method method = HiveTableOperations.class.getDeclaredMethod("setStorageHandler", Map.class, Boolean.TYPE); + method.setAccessible(true); + + try (MockedStatic<HiveTableOperations> tableOps = mockStatic(HiveTableOperations.class, CALLS_REAL_METHODS)) { + tableOps.when(() -> method.invoke(null, anyMap(), eq(true))) + .thenAnswer(invocation -> null); + // create and insert an initial batch of records + testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat, + HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2, 2, Collections.emptyMap(), storageHandlerStub); + } // insert one more batch so that we have multiple data files within the same partition shell.executeStatement(testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1, TableIdentifier.of("default", "customers"), false)); diff --git a/iceberg/pom.xml b/iceberg/pom.xml index eedbd7e5692..ec002613d78 100644 --- a/iceberg/pom.xml +++ b/iceberg/pom.xml @@ -35,7 +35,6 @@ <google.errorprone.javac.version>9+181-r4173-1</google.errorprone.javac.version> <google.errorprone.version>2.5.1</google.errorprone.version> <assertj.version>3.24.2</assertj.version> - <junit.jupiter.version>5.10.0</junit.jupiter.version> <awaitility.version>4.2.1</awaitility.version> <immutables.value.version>2.10.0</immutables.value.version> <validate.skip>false</validate.skip> @@ -203,16 +202,6 @@ <artifactId>assertj-core</artifactId> <version>${assertj.version}</version> </dependency> - <dependency> - <groupId>org.junit.jupiter</groupId> - <artifactId>junit-jupiter-api</artifactId> - <version>${junit.jupiter.version}</version> - </dependency> - <dependency> - <groupId>org.junit.jupiter</groupId> - <artifactId>junit-jupiter-params</artifactId> - <version>${junit.jupiter.version}</version> - </dependency> <dependency> <groupId>org.awaitility</groupId> <artifactId>awaitility</artifactId> diff --git a/pom.xml b/pom.xml index 53e82d50d14..cebbd83a4c6 100644 --- a/pom.xml +++ b/pom.xml @@ -165,7 +165,7 @@ <jodd.version>6.0.0</jodd.version> <json.version>1.8</json.version> <junit.version>4.13.2</junit.version> - <junit.jupiter.version>5.6.2</junit.jupiter.version> + <junit.jupiter.version>5.10.0</junit.jupiter.version> <junit.vintage.version>5.6.3</junit.vintage.version> <kafka.version>2.5.0</kafka.version> <kryo.version>5.5.0</kryo.version>