This is an automated email from the ASF dual-hosted git repository. pvary pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 970c41c HIVE-25961: Altering partition specification parameters for Iceberg tables are not working (Peter Vary reviewed by Laszlo Pinter) (#3035) 970c41c is described below commit 970c41c302719cdca9bb9d004c6beca19451188b Author: pvary <pv...@cloudera.com> AuthorDate: Thu Feb 17 07:28:08 2022 +0100 HIVE-25961: Altering partition specification parameters for Iceberg tables are not working (Peter Vary reviewed by Laszlo Pinter) (#3035) --- .../apache/iceberg/mr/hive/IcebergTableUtil.java | 73 ++++++-------- .../hive/TestHiveIcebergStorageHandlerNoScan.java | 111 +++++++++++++++++++++ .../hadoop/hive/ql/parse/PartitionTransform.java | 5 +- 3 files changed, 143 insertions(+), 46 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java index 9a1f316..63ddfc3 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java @@ -21,14 +21,11 @@ package org.apache.iceberg.mr.hive; import java.util.List; import java.util.Properties; -import java.util.stream.Collectors; -import java.util.stream.IntStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.parse.PartitionTransformSpec; import org.apache.hadoop.hive.ql.session.SessionStateUtil; -import org.apache.iceberg.PartitionField; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; @@ -146,51 +143,39 @@ public class IcebergTableUtil { return; } - List<String> newPartitionNames = - newPartitionSpec.fields().stream().map(PartitionField::name).collect(Collectors.toList()); - List<String> currentPartitionNames = table.spec().fields().stream().map(PartitionField::name) - .collect(Collectors.toList()); - List<String> intersectingPartitionNames = - currentPartitionNames.stream().filter(newPartitionNames::contains).collect(Collectors.toList()); + // delete every field from the old partition spec + UpdatePartitionSpec updatePartitionSpec = table.updateSpec().caseSensitive(false); + table.spec().fields().forEach(field -> updatePartitionSpec.removeField(field.name())); - // delete those partitions which are not present among the new partion spec - UpdatePartitionSpec updatePartitionSpec = table.updateSpec(); - currentPartitionNames.stream().filter(p -> !intersectingPartitionNames.contains(p)) - .forEach(updatePartitionSpec::removeField); - updatePartitionSpec.apply(); - - // add new partitions which are not yet present List<PartitionTransformSpec> partitionTransformSpecList = SessionStateUtil .getResource(configuration, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC) .map(o -> (List<PartitionTransformSpec>) o).orElseGet(() -> null); - IntStream.range(0, partitionTransformSpecList.size()) - .filter(i -> !intersectingPartitionNames.contains(newPartitionSpec.fields().get(i).name())) - .forEach(i -> { - PartitionTransformSpec spec = partitionTransformSpecList.get(i); - switch (spec.getTransformType()) { - case IDENTITY: - updatePartitionSpec.addField(spec.getColumnName()); - break; - case YEAR: - updatePartitionSpec.addField(Expressions.year(spec.getColumnName())); - break; - case MONTH: - updatePartitionSpec.addField(Expressions.month(spec.getColumnName())); - break; - case DAY: - updatePartitionSpec.addField(Expressions.day(spec.getColumnName())); - break; - case HOUR: - updatePartitionSpec.addField(Expressions.hour(spec.getColumnName())); - break; - case TRUNCATE: - updatePartitionSpec.addField(Expressions.truncate(spec.getColumnName(), spec.getTransformParam().get())); - break; - case BUCKET: - updatePartitionSpec.addField(Expressions.bucket(spec.getColumnName(), spec.getTransformParam().get())); - break; - } - }); + + partitionTransformSpecList.forEach(spec -> { + switch (spec.getTransformType()) { + case IDENTITY: + updatePartitionSpec.addField(spec.getColumnName()); + break; + case YEAR: + updatePartitionSpec.addField(Expressions.year(spec.getColumnName())); + break; + case MONTH: + updatePartitionSpec.addField(Expressions.month(spec.getColumnName())); + break; + case DAY: + updatePartitionSpec.addField(Expressions.day(spec.getColumnName())); + break; + case HOUR: + updatePartitionSpec.addField(Expressions.hour(spec.getColumnName())); + break; + case TRUNCATE: + updatePartitionSpec.addField(Expressions.truncate(spec.getColumnName(), spec.getTransformParam().get())); + break; + case BUCKET: + updatePartitionSpec.addField(Expressions.bucket(spec.getColumnName(), spec.getTransformParam().get())); + break; + } + }); updatePartitionSpec.commit(); } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java index fc90722..136ad4f 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java @@ -286,6 +286,117 @@ public class TestHiveIcebergStorageHandlerNoScan { } @Test + public void testSetPartitionTransformSameField() { + Schema schema = new Schema( + optional(1, "id", Types.LongType.get()), + optional(2, "truncate_field", Types.StringType.get()), + optional(3, "bucket_field", Types.StringType.get()) + ); + + TableIdentifier identifier = TableIdentifier.of("default", "part_test"); + shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + + " PARTITIONED BY SPEC (truncate(2, truncate_field), bucket(2, bucket_field))" + + " STORED BY ICEBERG " + + testTables.locationForCreateTableSQL(identifier) + + "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + + SchemaParser.toJson(schema) + "', " + + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')"); + + PartitionSpec spec = PartitionSpec.builderFor(schema) + .truncate("truncate_field", 2) + .bucket("bucket_field", 2) + .build(); + + Table table = testTables.loadTable(identifier); + Assert.assertEquals(spec, table.spec()); + + // Change one, keep one + shell.executeStatement("ALTER TABLE default.part_test " + + "SET PARTITION SPEC (truncate(3, truncate_field), bucket(2, bucket_field) )"); + + spec = PartitionSpec.builderFor(schema) + .withSpecId(1) + .alwaysNull("truncate_field", "truncate_field_trunc") + .bucket("bucket_field", 2) + .truncate("truncate_field", 3, "truncate_field_trunc_3") + .build(); + + table.refresh(); + Assert.assertEquals(spec, table.spec()); + + // Change one again, keep the other one + shell.executeStatement("ALTER TABLE default.part_test " + + "SET PARTITION SPEC (truncate(4, truncate_field), bucket(2, bucket_field) )"); + + spec = PartitionSpec.builderFor(schema) + .withSpecId(2) + .alwaysNull("truncate_field", "truncate_field_trunc") + .bucket("bucket_field", 2) + .alwaysNull("truncate_field", "truncate_field_trunc_3") + .truncate("truncate_field", 4, "truncate_field_trunc_4") + .build(); + + table.refresh(); + Assert.assertEquals(spec, table.spec()); + + // Keep the already changed, change the other one (change the order of clauses in the spec) + shell.executeStatement("ALTER TABLE default.part_test " + + "SET PARTITION SPEC (bucket(3, bucket_field), truncate(4, truncate_field))"); + + spec = PartitionSpec.builderFor(schema) + .withSpecId(3) + .alwaysNull("truncate_field", "truncate_field_trunc") + .alwaysNull("bucket_field", "bucket_field_bucket") + .alwaysNull("truncate_field", "truncate_field_trunc_3") + .truncate("truncate_field", 4, "truncate_field_trunc_4") + .bucket("bucket_field", 3, "bucket_field_bucket_3") + .build(); + + table.refresh(); + Assert.assertEquals(spec, table.spec()); + } + + @Test + public void testSetPartitionTransformCaseSensitive() { + Schema schema = new Schema( + optional(1, "id", Types.LongType.get()), + optional(2, "truncate_field", Types.StringType.get()), + optional(3, "bucket_field", Types.StringType.get()) + ); + + TableIdentifier identifier = TableIdentifier.of("default", "part_test"); + shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + + " PARTITIONED BY SPEC (truncate(2, truncate_field), bucket(2, bucket_field))" + + " STORED BY ICEBERG " + + testTables.locationForCreateTableSQL(identifier) + + "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + + SchemaParser.toJson(schema) + "', " + + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')"); + + PartitionSpec spec = PartitionSpec.builderFor(schema) + .truncate("truncate_field", 2) + .bucket("bucket_field", 2) + .build(); + + Table table = testTables.loadTable(identifier); + Assert.assertEquals(spec, table.spec()); + + shell.executeStatement("ALTER TABLE default.part_test " + + "SET PARTITION SPEC (truncaTe(3, truncate_Field), buCket(3, bUckeT_field))"); + + spec = PartitionSpec.builderFor(schema) + .withSpecId(1) + .alwaysNull("truncate_field", "truncate_field_trunc") + .alwaysNull("bucket_field", "bucket_field_bucket") + .truncate("truncate_field", 3, "truncate_field_trunc_3") + .bucket("bucket_field", 3, "bucket_field_bucket_3") + .build(); + + table.refresh(); + Assert.assertEquals(spec, table.spec()); + } + + @Test public void testCreateDropTable() throws TException, IOException, InterruptedException { TableIdentifier identifier = TableIdentifier.of("default", "customers"); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java index 8013ca0..50a6371 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.parse.PartitionTransformSpec.TransformType; import java.util.ArrayList; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; @@ -68,14 +69,14 @@ public class PartitionTransform { case HiveParser.TOK_MONTH: case HiveParser.TOK_DAY: case HiveParser.TOK_HOUR: - spec.setColumnName(grandChild.getChild(0).getText()); + spec.setColumnName(grandChild.getChild(0).getText().toLowerCase()); spec.setTransformType(TRANSFORMS.get(grandChild.getToken().getType())); break; case HiveParser.TOK_TRUNCATE: case HiveParser.TOK_BUCKET: spec.setTransformType(TRANSFORMS.get(grandChild.getToken().getType())); spec.setTransformParam(Optional.ofNullable(Integer.valueOf(grandChild.getChild(0).getText()))); - spec.setColumnName(grandChild.getChild(1).getText()); + spec.setColumnName(grandChild.getChild(1).getText().toLowerCase()); break; } }