This is an automated email from the ASF dual-hosted git repository. vhs pushed a commit to branch phase-18-HoodieAvroUtils-removal in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 3bd409f28d01659e6537e967b1263867f7cbcb3a Author: voon <[email protected]> AuthorDate: Tue Dec 30 11:30:39 2025 +0800 Address comments --- .../bootstrap/HoodieBootstrapSchemaProvider.java | 3 +- .../org/apache/hudi/index/HoodieIndexUtils.java | 33 +++++++++------------- ...ConcurrentSchemaEvolutionTableSchemaGetter.java | 22 +++++++-------- .../apache/hudi/index/TestHoodieIndexUtils.java | 26 ++++++++++------- .../HoodieSchemaComparatorForSchemaEvolution.java | 10 ++----- 5 files changed, 45 insertions(+), 49 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/bootstrap/HoodieBootstrapSchemaProvider.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/bootstrap/HoodieBootstrapSchemaProvider.java index a459e6b9f67f..34848df31591 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/bootstrap/HoodieBootstrapSchemaProvider.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/bootstrap/HoodieBootstrapSchemaProvider.java @@ -21,7 +21,6 @@ package org.apache.hudi.client.bootstrap; import org.apache.hudi.avro.model.HoodieFileStatus; import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.common.schema.HoodieSchema; -import org.apache.hudi.common.schema.HoodieSchemaType; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.config.HoodieWriteConfig; @@ -48,7 +47,7 @@ public abstract class HoodieBootstrapSchemaProvider { if (writeConfig.getSchema() != null) { // Use schema specified by user if set HoodieSchema userSchema = HoodieSchema.parse(writeConfig.getSchema()); - if (!HoodieSchema.create(HoodieSchemaType.NULL).equals(userSchema)) { + if (!HoodieSchema.NULL_SCHEMA.equals(userSchema)) { return userSchema; } } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java index 46c04c1024a7..86f141bd6319 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java @@ -138,11 +138,9 @@ public class HoodieIndexUtils { */ static boolean validateDataTypeForSecondaryIndex(List<String> sourceFields, HoodieSchema tableSchema) { return sourceFields.stream().allMatch(fieldToIndex -> { - Option<Pair<String, HoodieSchemaField>> schema = HoodieSchemaUtils.getNestedField(tableSchema, fieldToIndex); - if (schema.isEmpty()) { - throw new HoodieException("Failed to get schema. Not a valid field name: " + fieldToIndex); - } - return isSecondaryIndexSupportedType(schema.get().getRight().schema()); + Pair<String, HoodieSchemaField> schema = HoodieSchemaUtils.getNestedField(tableSchema, fieldToIndex) + .orElseThrow(() -> new HoodieException("Failed to get schema. Not a valid field name: " + fieldToIndex)); + return isSecondaryIndexSupportedType(schema.getRight().schema()); }); } @@ -155,12 +153,12 @@ public class HoodieIndexUtils { */ public static boolean validateDataTypeForSecondaryOrExpressionIndex(List<String> sourceFields, HoodieSchema tableSchema) { return sourceFields.stream().anyMatch(fieldToIndex -> { - Option<Pair<String, HoodieSchemaField>> nestedFieldOpt = HoodieSchemaUtils.getNestedField(tableSchema, fieldToIndex); - if (nestedFieldOpt.isEmpty()) { - throw new HoodieException("Failed to get schema. Not a valid field name: " + fieldToIndex); - } - HoodieSchema fieldSchema = nestedFieldOpt.get().getRight().schema(); - return fieldSchema.getType() != HoodieSchemaType.RECORD && fieldSchema.getType() != HoodieSchemaType.ARRAY && fieldSchema.getType() != HoodieSchemaType.MAP; + Pair<String, HoodieSchemaField> nestedField = HoodieSchemaUtils.getNestedField(tableSchema, fieldToIndex) + .orElseThrow(() -> new HoodieException("Failed to get schema. Not a valid field name: " + fieldToIndex)); + HoodieSchema fieldSchema = nestedField.getRight().schema(); + return fieldSchema.getType() != HoodieSchemaType.RECORD + && fieldSchema.getType() != HoodieSchemaType.ARRAY + && fieldSchema.getType() != HoodieSchemaType.MAP; }); } @@ -182,6 +180,7 @@ public class HoodieIndexUtils { case INT: case LONG: case DOUBLE: + case FLOAT: case DATE: case TIME: return true; @@ -282,7 +281,7 @@ public class HoodieIndexUtils { HoodieStorage storage) throws HoodieIndexException { checkArgument(FSUtils.isBaseFile(filePath)); List<Pair<String, Long>> foundRecordKeys = new ArrayList<>(); - log.info(String.format("Going to filter %d keys from file %s", candidateRecordKeys.size(), filePath)); + log.info("Going to filter {} keys from file {}", candidateRecordKeys.size(), filePath); try (HoodieFileReader fileReader = HoodieIOFactory.getIOFactory(storage) .getReaderFactory(HoodieRecordType.AVRO) .getFileReader(DEFAULT_HUDI_CONFIG_FOR_READER, filePath)) { @@ -720,16 +719,12 @@ public class HoodieIndexUtils { String columnName = sourceFields.get(0); // We know there's only one column from the check above // First check if the field exists - Option<Pair<String, HoodieSchemaField>> fieldSchemaOpt = HoodieSchemaUtils.getNestedField(tableSchema, columnName); - if (fieldSchemaOpt.isEmpty()) { - throw new HoodieMetadataIndexException(String.format( + Pair<String, HoodieSchemaField> fieldSchema = HoodieSchemaUtils.getNestedField(tableSchema, columnName) + .orElseThrow(() -> new HoodieMetadataIndexException(String.format( "Cannot create %s index '%s': Column '%s' does not exist in the table schema. " + "Please verify the column name and ensure it exists in the table.", indexType.equals(PARTITION_NAME_SECONDARY_INDEX) ? "secondary" : "expression", - userIndexName, columnName)); - } - - Pair<String, HoodieSchemaField> fieldSchema = fieldSchemaOpt.get(); + userIndexName, columnName))); // Check for complex types (RECORD, ARRAY, MAP) - not supported for any index type if (!validateDataTypeForSecondaryOrExpressionIndex(sourceFields, tableSchema)) { diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestConcurrentSchemaEvolutionTableSchemaGetter.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestConcurrentSchemaEvolutionTableSchemaGetter.java index 0e1242a1531c..909ca863bfec 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestConcurrentSchemaEvolutionTableSchemaGetter.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestConcurrentSchemaEvolutionTableSchemaGetter.java @@ -122,10 +122,10 @@ public class TestConcurrentSchemaEvolutionTableSchemaGetter extends HoodieCommon + " {\"name\":\"partitionColumn\",\"type\":[\"null\",\"string\"],\"doc\":\"\",\"default\":null}" + " ]\n" + "}"; - private static HoodieSchema SCHEMA_WITHOUT_METADATA2 = HoodieSchema.parse(SCHEMA_WITHOUT_METADATA_STR2); - private static HoodieSchema SCHEMA_WITHOUT_METADATA = HoodieSchema.parse(SCHEMA_WITHOUT_METADATA_STR); - private static HoodieSchema SCHEMA_WITH_METADATA = HoodieSchemaUtils.addMetadataFields(SCHEMA_WITHOUT_METADATA); - private static HoodieSchema SCHEMA_WITH_PARTITION_COLUMN = HoodieSchema.parse(SCHEMA_WITH_PARTITION_COLUMN_STR); + private static final HoodieSchema SCHEMA_WITHOUT_METADATA2 = HoodieSchema.parse(SCHEMA_WITHOUT_METADATA_STR2); + private static final HoodieSchema SCHEMA_WITHOUT_METADATA = HoodieSchema.parse(SCHEMA_WITHOUT_METADATA_STR); + private static final HoodieSchema SCHEMA_WITH_METADATA = HoodieSchemaUtils.addMetadataFields(SCHEMA_WITHOUT_METADATA); + private static final HoodieSchema SCHEMA_WITH_PARTITION_COLUMN = HoodieSchema.parse(SCHEMA_WITH_PARTITION_COLUMN_STR); @BeforeEach public void setUp() throws Exception { @@ -269,7 +269,7 @@ public class TestConcurrentSchemaEvolutionTableSchemaGetter extends HoodieCommon @ParameterizedTest @MethodSource("commonTableConfigTestDimension") - void testGetTableAvroSchemaInternalNoSchemaFoundEmptyTimeline(HoodieTableType tableType) throws IOException { + void testGetTableSchemaInternalNoSchemaFoundEmptyTimeline(HoodieTableType tableType) throws IOException { // Don't set any schema in commit metadata or table config initMetaClient(false, tableType); testTable = HoodieTestTable.of(metaClient); @@ -285,7 +285,7 @@ public class TestConcurrentSchemaEvolutionTableSchemaGetter extends HoodieCommon // we will only use that and ignore the other instants. @ParameterizedTest @MethodSource("commonTableConfigTestDimension") - void testGetTableAvroSchemaInternalNoSchemaFoundDisqualifiedInstant(HoodieTableType tableType) throws Exception { + void testGetTableSchemaInternalNoSchemaFoundDisqualifiedInstant(HoodieTableType tableType) throws Exception { // Don't set any schema in commit metadata or table config initMetaClient(false, tableType); testTable = HoodieTestTable.of(metaClient); @@ -383,7 +383,7 @@ public class TestConcurrentSchemaEvolutionTableSchemaGetter extends HoodieCommon @ParameterizedTest @MethodSource("schemaTestParams") - void testGetTableAvroSchema(HoodieSchema inputSchema, boolean includeMetadataFields, HoodieSchema expectedSchema) throws Exception { + void testGetTableSchema(HoodieSchema inputSchema, boolean includeMetadataFields, HoodieSchema expectedSchema) throws Exception { metaClient = HoodieTestUtils.getMetaClientBuilder(HoodieTableType.COPY_ON_WRITE, new Properties(),"") .setTableCreateSchema(SCHEMA_WITH_METADATA.toString()) .initTable(getDefaultStorageConf(), basePath); @@ -442,7 +442,7 @@ public class TestConcurrentSchemaEvolutionTableSchemaGetter extends HoodieCommon @ParameterizedTest @MethodSource("createSchemaTestParam") - void testGetTableCreateAvroSchema(boolean includeMetadataFields, HoodieSchema expectedSchema) throws Exception { + void testGetTableCreateSchema(boolean includeMetadataFields, HoodieSchema expectedSchema) throws Exception { metaClient = HoodieTestUtils.getMetaClientBuilder(HoodieTableType.COPY_ON_WRITE, new Properties(),"") .setTableCreateSchema(SCHEMA_WITH_METADATA.toString()) .initTable(getDefaultStorageConf(), basePath); @@ -456,7 +456,7 @@ public class TestConcurrentSchemaEvolutionTableSchemaGetter extends HoodieCommon } @Test - public void testGetTableAvroSchemaInternalWithPartitionFields() throws IOException { + public void testGetTableSchemaInternalWithPartitionFields() throws IOException { initMetaClient(false, HoodieTableType.COPY_ON_WRITE); testTable = HoodieTestTable.of(metaClient); // Setup table config with partition fields @@ -477,7 +477,7 @@ public class TestConcurrentSchemaEvolutionTableSchemaGetter extends HoodieCommon @ParameterizedTest @MethodSource("commonTableConfigTestDimension") - void testGetTableAvroSchemaInternalWithSpecificInstant(HoodieTableType tableType) throws Exception { + void testGetTableSchemaInternalWithSpecificInstant(HoodieTableType tableType) throws Exception { initMetaClient(false, tableType); testTable = HoodieTestTable.of(metaClient); @@ -542,7 +542,7 @@ public class TestConcurrentSchemaEvolutionTableSchemaGetter extends HoodieCommon } @Test - void testTableAvroSchemaFromTimelineCachingBehavior() throws Exception { + void testTableSchemaFromTimelineCachingBehavior() throws Exception { // Initialize with COW table type initMetaClient(false, HoodieTableType.COPY_ON_WRITE); testTable = HoodieTestTable.of(metaClient); diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/index/TestHoodieIndexUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/index/TestHoodieIndexUtils.java index 70a871089fea..a56ae00e5590 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/index/TestHoodieIndexUtils.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/index/TestHoodieIndexUtils.java @@ -47,6 +47,7 @@ import static org.apache.hudi.index.HoodieIndexUtils.validateDataTypeForSecondar import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_SECONDARY_INDEX; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.when; @@ -225,20 +226,25 @@ public class TestHoodieIndexUtils { HoodieSchema timeMicros = HoodieSchema.createTimeMicros(); // Unsupported logical types - HoodieSchema decimal = HoodieSchema.createDecimal(10, 2); + HoodieSchema decimalBytesBackedField = HoodieSchema.createDecimal(10, 2); + HoodieSchema decimalFixedBackedField = HoodieSchema.createDecimal("decimal_fixed", null, null, 10, 2, 16); + assertInstanceOf(HoodieSchema.Decimal.class, decimalBytesBackedField); + assertInstanceOf(HoodieSchema.Decimal.class, decimalFixedBackedField); + assertFalse(((HoodieSchema.Decimal) decimalBytesBackedField).isFixed()); + assertTrue(((HoodieSchema.Decimal) decimalFixedBackedField).isFixed()); + HoodieSchema uuid = HoodieSchema.createUUID(); HoodieSchema localTimestampMillis = HoodieSchema.createLocalTimestampMillis(); HoodieSchema localTimestampMicros = HoodieSchema.createLocalTimestampMicros(); HoodieSchema schemaWithLogicalTypes = HoodieSchema.createRecord("TestRecord", null, null, Arrays.asList( - // Supported logical types HoodieSchemaField.of("timestampMillisField", timestampMillis), HoodieSchemaField.of("timestampMicrosField", timestampMicros), HoodieSchemaField.of("dateField", date), HoodieSchemaField.of("timeMillisField", timeMillis), HoodieSchemaField.of("timeMicrosField", timeMicros), - // Unsupported logical types - HoodieSchemaField.of("decimalField", decimal), + HoodieSchemaField.of("decimalBytesBackedField", decimalBytesBackedField), + HoodieSchemaField.of("decimalFixedBackedField", decimalFixedBackedField), HoodieSchemaField.of("uuidField", uuid), HoodieSchemaField.of("localTimestampMillisField", localTimestampMillis), HoodieSchemaField.of("localTimestampMicrosField", localTimestampMicros) @@ -250,15 +256,16 @@ public class TestHoodieIndexUtils { assertTrue(validateDataTypeForSecondaryIndex(Collections.singletonList("dateField"), schemaWithLogicalTypes)); assertTrue(validateDataTypeForSecondaryIndex(Collections.singletonList("timeMillisField"), schemaWithLogicalTypes)); assertTrue(validateDataTypeForSecondaryIndex(Collections.singletonList("timeMicrosField"), schemaWithLogicalTypes)); - + // Test unsupported logical types - assertFalse(validateDataTypeForSecondaryIndex(Collections.singletonList("decimalField"), schemaWithLogicalTypes)); + assertFalse(validateDataTypeForSecondaryIndex(Collections.singletonList("decimalBytesBackedField"), schemaWithLogicalTypes)); + assertFalse(validateDataTypeForSecondaryIndex(Collections.singletonList("decimalFixedBackedField"), schemaWithLogicalTypes)); assertFalse(validateDataTypeForSecondaryIndex(Collections.singletonList("uuidField"), schemaWithLogicalTypes)); assertFalse(validateDataTypeForSecondaryIndex(Collections.singletonList("localTimestampMillisField"), schemaWithLogicalTypes)); assertFalse(validateDataTypeForSecondaryIndex(Collections.singletonList("localTimestampMicrosField"), schemaWithLogicalTypes)); - + // Test mix of supported and unsupported logical types - assertFalse(validateDataTypeForSecondaryIndex(Arrays.asList("timestampMillisField", "decimalField"), schemaWithLogicalTypes)); + assertFalse(validateDataTypeForSecondaryIndex(Arrays.asList("timestampMillisField", "decimalBytesBackedField"), schemaWithLogicalTypes)); } /** @@ -299,8 +306,7 @@ public class TestHoodieIndexUtils { // When: Checking eligibility for secondary index // Then: Should not throw exception because float is now supported for secondary index - assertThrows(HoodieMetadataIndexException.class, - () -> HoodieIndexUtils.validateEligibilityForSecondaryOrExpressionIndex( + assertDoesNotThrow(() -> HoodieIndexUtils.validateEligibilityForSecondaryOrExpressionIndex( mockMetaClient, PARTITION_NAME_SECONDARY_INDEX, options, columns, "test_index")); // Test case 2: Supported double field (now supported) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaComparatorForSchemaEvolution.java b/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaComparatorForSchemaEvolution.java index fe72089e7ef4..9f2ea6e94ca6 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaComparatorForSchemaEvolution.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaComparatorForSchemaEvolution.java @@ -248,7 +248,7 @@ public class HoodieSchemaComparatorForSchemaEvolution { List<String> symbols1 = s1.getEnumSymbols(); List<String> symbols2 = s2.getEnumSymbols(); - // Quick size check before creating sets + // Quick size check before creating lists if (symbols1.size() != symbols2.size()) { return false; } @@ -280,12 +280,8 @@ public class HoodieSchemaComparatorForSchemaEvolution { return schemaEqualsInternal(s1.getValueType(), s2.getValueType()); } - protected boolean validateFixed(HoodieSchema s1, HoodieSchema s2) { - return s1.getName().equals(s2.getName()) && s1.getFixedSize() == s2.getFixedSize(); - } - private boolean fixedSchemaEquals(HoodieSchema s1, HoodieSchema s2) { - return validateFixed(s1, s2); + return s1.getName().equals(s2.getName()) && s1.getFixedSize() == s2.getFixedSize(); } private static boolean decimalSchemaEquals(HoodieSchema s1, HoodieSchema s2) { @@ -343,4 +339,4 @@ public class HoodieSchemaComparatorForSchemaEvolution { return schema.getType().hashCode(); } } -} \ No newline at end of file +}
