This is an automated email from the ASF dual-hosted git repository. lpinter pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new faa3f62923 HIVE-26203: Implement alter iceberg table metadata location (#3270) (Laszlo Pinter, reviewed by Peter Vary) faa3f62923 is described below commit faa3f62923ac0b5a68a2b1e16736f1a32d38bb8e Author: László Pintér <47777102+lcspin...@users.noreply.github.com> AuthorDate: Wed May 11 13:57:08 2022 +0200 HIVE-26203: Implement alter iceberg table metadata location (#3270) (Laszlo Pinter, reviewed by Peter Vary) --- .../iceberg/mr/hive/HiveIcebergMetaHook.java | 25 ++++++++ .../hive/TestHiveIcebergStorageHandlerNoScan.java | 70 ++++++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java index a27078581e..13142ce144 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java @@ -336,6 +336,31 @@ public class HiveIcebergMetaHook implements HiveMetaHook { // that users can change data types or reorder columns too with this alter op type, so its name is misleading..) assertNotMigratedTable(hmsTable.getParameters(), "CHANGE COLUMN"); handleChangeColumn(hmsTable); + } else if (AlterTableType.ADDPROPS.equals(currentAlterTableOp)) { + assertNotCrossTableMetadataLocationChange(hmsTable.getParameters()); + } + } + + /** + * Perform a check on the current iceberg table whether a metadata change can be performed. A table is eligible if + * the current metadata uuid and the new metadata uuid matches. + * @param tblParams hms table properties, must be non-null + */ + private void assertNotCrossTableMetadataLocationChange(Map<String, String> tblParams) { + if (tblParams.containsKey(BaseMetastoreTableOperations.METADATA_LOCATION_PROP)) { + Preconditions.checkArgument(icebergTable != null, + "Cannot perform table migration to Iceberg and setting the snapshot location in one step. " + + "Please migrate the table first"); + String newMetadataLocation = tblParams.get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP); + FileIO io = ((BaseTable) icebergTable).operations().io(); + TableMetadata newMetadata = TableMetadataParser.read(io, newMetadataLocation); + TableMetadata currentMetadata = ((BaseTable) icebergTable).operations().current(); + if (!currentMetadata.uuid().equals(newMetadata.uuid())) { + throw new UnsupportedOperationException( + String.format("Cannot change iceberg table %s metadata location pointing to another table's metadata %s", + icebergTable.name(), newMetadataLocation) + ); + } } } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java index 31b2800d81..0984cc9d10 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java @@ -1456,6 +1456,76 @@ public class TestHiveIcebergStorageHandlerNoScan { HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.stream()).collect(Collectors.toList()), records, 0); } + @Test + public void testAlterTableWithMetadataLocation() throws IOException { + Assume.assumeTrue("Alter table with metadata location is only supported for Hive Catalog tables", + testTableType.equals(TestTables.TestTableType.HIVE_CATALOG)); + TableIdentifier tableIdentifier = TableIdentifier.of("default", "source"); + // create a test table with some dummy data + Table table = + testTables.createTable(shell, tableIdentifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + PartitionSpec.unpartitioned(), FileFormat.PARQUET, Collections.emptyList(), 1, Collections.emptyMap()); + testTables.appendIcebergTable(shell.getHiveConf(), table, FileFormat.PARQUET, null, + HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); + String firstMetadataLocation = ((BaseTable) table).operations().current().metadataFileLocation(); + testTables.appendIcebergTable(shell.getHiveConf(), table, FileFormat.PARQUET, null, + HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); + table.refresh(); + String secondMetadataLocation = ((BaseTable) table).operations().current().metadataFileLocation(); + Assert.assertNotEquals(firstMetadataLocation, secondMetadataLocation); + shell.executeStatement("ALTER TABLE " + tableIdentifier.name() + " SET TBLPROPERTIES('metadata_location'='" + + firstMetadataLocation + "')"); + // during alter operation a new metadata file is created but reflecting the old metadata state + List<Object[]> rows = shell.executeStatement("SELECT * FROM " + tableIdentifier.name()); + List<Record> records = HiveIcebergTestUtils.valueForRow(table.schema(), rows); + HiveIcebergTestUtils.validateData(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, records, 0); + // add another batch of data to the table to make sure data manipulation is working + testTables.appendIcebergTable(shell.getHiveConf(), table, FileFormat.PARQUET, null, + HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); + rows = shell.executeStatement("SELECT * FROM " + tableIdentifier.name()); + records = HiveIcebergTestUtils.valueForRow(table.schema(), rows); + HiveIcebergTestUtils.validateData( + Stream.concat(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.stream(), + HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.stream()).collect(Collectors.toList()), records, 0); + } + + @Test + public void testAlterTableWithMetadataLocationFromAnotherTable() throws IOException { + TableIdentifier sourceIdentifier = TableIdentifier.of("default", "source"); + Table sourceTable = + testTables.createTable(shell, sourceIdentifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + PartitionSpec.unpartitioned(), FileFormat.PARQUET, Collections.emptyList(), 1, + ImmutableMap.<String, String>builder().put(InputFormatConfig.EXTERNAL_TABLE_PURGE, "FALSE").build()); + testTables.appendIcebergTable(shell.getHiveConf(), sourceTable, FileFormat.PARQUET, null, + HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS); + String metadataLocation = ((BaseTable) sourceTable).operations().current().metadataFileLocation(); + shell.executeStatement("DROP TABLE " + sourceIdentifier.name()); + TableIdentifier targetIdentifier = TableIdentifier.of("default", "target"); + testTables.createTable(shell, targetIdentifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, + PartitionSpec.unpartitioned(), FileFormat.PARQUET, Collections.emptyList(), 1, Collections.emptyMap()); + AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, + "Cannot change iceberg table", + () -> { + shell.executeStatement("ALTER TABLE " + targetIdentifier.name() + " SET TBLPROPERTIES('metadata_location'='" + + metadataLocation + "')"); + }); + } + + @Test + public void testAlterTableToIcebergAndMetadataLocation() throws IOException { + String tableName = "tbl"; + String createQuery = "CREATE EXTERNAL TABLE " + tableName + " (a int) STORED AS PARQUET " + + testTables.locationForCreateTableSQL(TableIdentifier.of("default", tableName)) + + testTables.propertiesForCreateTableSQL(ImmutableMap.of()); + shell.executeStatement(createQuery); + AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, + "Cannot perform table migration to Iceberg and setting the snapshot location in one step.", + () -> { + shell.executeStatement("ALTER TABLE " + tableName + " SET TBLPROPERTIES(" + + "'storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler','metadata_location'='asdf')"); + }); + } + /** * Checks that the new schema has newintcol and newstring col columns on both HMS and Iceberg sides * @throws Exception - any test error