This is an automated email from the ASF dual-hosted git repository. szita pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new d22eca1 HIVE-25375: Partition column rename support for Iceberg tables (Adam Szita, reviewed by Marta Kuczora) d22eca1 is described below commit d22eca1877bc79b5f3e17972684b0e985469fac7 Author: Adam Szita <40628386+sz...@users.noreply.github.com> AuthorDate: Wed Aug 11 11:42:45 2021 +0200 HIVE-25375: Partition column rename support for Iceberg tables (Adam Szita, reviewed by Marta Kuczora) --- .../iceberg/mr/hive/HiveIcebergMetaHook.java | 35 ++++++++++++++++++---- .../hive/TestHiveIcebergStorageHandlerNoScan.java | 30 +++++++++++++++++++ .../hive/ql/parse/PartitionTransformSpec.java | 2 +- 3 files changed, 61 insertions(+), 6 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java index 6301f93..8d773b9 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java @@ -60,6 +60,8 @@ import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.TableMetadataParser; import org.apache.iceberg.TableProperties; +import org.apache.iceberg.Transaction; +import org.apache.iceberg.UpdatePartitionSpec; import org.apache.iceberg.UpdateProperties; import org.apache.iceberg.UpdateSchema; import org.apache.iceberg.catalog.TableIdentifier; @@ -103,6 +105,8 @@ public class HiveIcebergMetaHook implements HiveMetaHook { private boolean canMigrateHiveTable; private PreAlterTableProperties preAlterTableProperties; private UpdateSchema updateSchema; + private UpdatePartitionSpec updatePartitionSpec; + private Transaction transaction; private AlterTableType currentAlterTableOp; public HiveIcebergMetaHook(Configuration conf) { @@ -293,8 +297,8 @@ public class HiveIcebergMetaHook implements HiveMetaHook { case REPLACE_COLUMNS: case RENAME_COLUMN: case ADDCOLS: - if (updateSchema != null) { - updateSchema.commit(); + if (transaction != null) { + transaction.commitTransaction(); } break; case ADDPROPS: @@ -497,12 +501,14 @@ public class HiveIcebergMetaHook implements HiveMetaHook { HiveSchemaUtil.getSchemaDiff(hmsTable.getSd().getCols(), HiveSchemaUtil.convert(icebergTable.schema()), false) .getMissingFromSecond(); if (!addedCols.isEmpty()) { - updateSchema = icebergTable.updateSchema(); + transaction = icebergTable.newTransaction(); + updateSchema = transaction.updateSchema(); } for (FieldSchema addedCol : addedCols) { updateSchema.addColumn(addedCol.getName(), HiveSchemaUtil.convert(TypeInfoUtils.getTypeInfoFromTypeString(addedCol.getType())), addedCol.getComment()); } + updateSchema.commit(); } private void handleReplaceColumns(org.apache.hadoop.hive.metastore.api.Table hmsTable) throws MetaException { @@ -531,12 +537,14 @@ public class HiveIcebergMetaHook implements HiveMetaHook { "mismatches between HMS and Iceberg, please consider the UPDATE COLUMNS command."); } - updateSchema = icebergTable.updateSchema(); + transaction = icebergTable.newTransaction(); + updateSchema = transaction.updateSchema(); LOG.info("handleReplaceColumns: Dropping the following columns for Iceberg table {}, cols: {}", hmsTable.getTableName(), schemaDifference.getMissingFromFirst()); for (FieldSchema droppedCol : schemaDifference.getMissingFromFirst()) { updateSchema.deleteColumn(droppedCol.getName()); } + updateSchema.commit(); } private void handleChangeColumn(org.apache.hadoop.hive.metastore.api.Table hmsTable) throws MetaException { @@ -555,7 +563,8 @@ public class HiveIcebergMetaHook implements HiveMetaHook { renameMapping); if (!schemaDifference.isEmpty() || outOfOrder != null) { - updateSchema = icebergTable.updateSchema(); + transaction = icebergTable.newTransaction(); + updateSchema = transaction.updateSchema(); } else { // we should get here if the user didn't change anything about the column // i.e. no changes to the name, type, comment or order @@ -597,6 +606,22 @@ public class HiveIcebergMetaHook implements HiveMetaHook { updateSchema.moveFirst(outOfOrder.first()); } } + updateSchema.commit(); + + handlePartitionRename(schemaDifference); + } + + private void handlePartitionRename(HiveSchemaUtil.SchemaDifference schemaDifference) { + // in case a partition column has been renamed, spec needs to be adjusted too + if (!schemaDifference.getMissingFromSecond().isEmpty()) { + FieldSchema oldField = schemaDifference.getMissingFromFirst().get(0); + FieldSchema updatedField = schemaDifference.getMissingFromSecond().get(0); + if (icebergTable.spec().fields().stream().anyMatch(pf -> pf.name().equals(oldField.getName()))) { + updatePartitionSpec = transaction.updateSpec(); + updatePartitionSpec.renameField(oldField.getName(), updatedField.getName()); + updatePartitionSpec.commit(); + } + } } private Type.PrimitiveType getPrimitiveTypeOrThrow(FieldSchema field) throws MetaException { diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java index 4d01c5d..be04719 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java @@ -40,6 +40,7 @@ import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.BaseMetastoreTableOperations; import org.apache.iceberg.BaseTable; import org.apache.iceberg.FileFormat; +import org.apache.iceberg.PartitionField; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.PartitionSpecParser; import org.apache.iceberg.Schema; @@ -949,6 +950,35 @@ public class TestHiveIcebergStorageHandlerNoScan { } @Test + public void testAlterTableRenamePartitionColumn() throws Exception { + TableIdentifier identifier = TableIdentifier.of("default", "customers"); + + testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, SPEC, + FileFormat.PARQUET, ImmutableList.of()); + shell.executeStatement("ALTER TABLE default.customers SET PARTITION SPEC (last_name)"); + + // Renaming (and reordering) a partition column + shell.executeStatement("ALTER TABLE default.customers CHANGE last_name family_name string FIRST"); + List<PartitionField> partitionFields = testTables.loadTable(identifier).spec().fields(); + Assert.assertEquals(1, partitionFields.size()); + Assert.assertEquals("family_name", partitionFields.get(0).name()); + + // Addign new columns, assigning them as partition columns then removing 1 partition column + shell.executeStatement("ALTER TABLE default.customers ADD COLUMNS (p1 string, p2 string)"); + shell.executeStatement("ALTER TABLE default.customers SET PARTITION SPEC (family_name, p1, p2)"); + + shell.executeStatement("ALTER TABLE default.customers CHANGE p1 region string"); + shell.executeStatement("ALTER TABLE default.customers CHANGE p2 city string"); + + shell.executeStatement("ALTER TABLE default.customers SET PARTITION SPEC (region, city)"); + + List<Object[]> result = shell.executeStatement("DESCRIBE default.customers"); + Assert.assertArrayEquals(new String[] {"family_name", "VOID", null}, result.get(8)); + Assert.assertArrayEquals(new String[] {"region", "IDENTITY", null}, result.get(9)); + Assert.assertArrayEquals(new String[] {"city", "IDENTITY", null}, result.get(10)); + } + + @Test public void testAlterTableReplaceColumns() throws TException, InterruptedException { TableIdentifier identifier = TableIdentifier.of("default", "customers"); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java index 108a006..268660f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java @@ -22,7 +22,7 @@ import java.util.Optional; public class PartitionTransformSpec { public enum TransformType { - IDENTITY, YEAR, MONTH, DAY, HOUR, TRUNCATE, BUCKET + IDENTITY, YEAR, MONTH, DAY, HOUR, TRUNCATE, BUCKET, VOID } private String columnName;