This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch release-1.2.0 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit f8431eb6cbb0f309f7a46ef1189b5fe952abaa7f Author: voonhous <[email protected]> AuthorDate: Thu May 14 02:20:22 2026 +0800 fix(metadata): Exclude Variant/Blob/Vector from V1 column stats (#18695) V2 already filters all three types; V1 (used by bloom filters unconditionally and by column/partition stats on table v8) was missing BLOB/VECTOR in the AVRO branch and VECTOR in the SPARK branch, letting indexes silently include columns whose stats are meaningless. Also clarifies the expression-index error message to list VARIANT/BLOB/VECTOR alongside RECORD/ARRAY/MAP. --- .../org/apache/hudi/index/HoodieIndexUtils.java | 2 +- .../hudi/metadata/HoodieTableMetadataUtil.java | 4 +++- .../hudi/metadata/TestHoodieTableMetadataUtil.java | 21 +++++++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java index b2b5a84fef09..b3086eb6a361 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java @@ -724,7 +724,7 @@ public class HoodieIndexUtils { if (fieldSchema.getNonNullType().getType().isComplex()) { throw new HoodieMetadataIndexException(String.format( "Cannot create expression index '%s': Column '%s' has unsupported data type '%s'. " - + "Complex types (RECORD, ARRAY, MAP) are not supported for indexing. " + + "Complex types (RECORD, ARRAY, MAP, VARIANT, BLOB, VECTOR) are not supported for indexing. " + "Please choose a column with a primitive data type.", userIndexName, columnName, fieldSchema.getType())); } diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java index bfc16dbf5529..2a272a3f2cc1 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java @@ -2045,7 +2045,8 @@ public class HoodieTableMetadataUtil { // if record type is set and if its AVRO, MAP, ARRAY, RECORD and ENUM types are unsupported. if (recordType.isPresent() && recordType.get() == HoodieRecordType.AVRO) { return (type != HoodieSchemaType.RECORD && type != HoodieSchemaType.ARRAY && type != HoodieSchemaType.MAP - && type != HoodieSchemaType.ENUM && type != HoodieSchemaType.VARIANT); + && type != HoodieSchemaType.ENUM && type != HoodieSchemaType.VARIANT + && type != HoodieSchemaType.BLOB && type != HoodieSchemaType.VECTOR); } // if record Type is not set or if recordType is SPARK then we cannot support AVRO, MAP, ARRAY, RECORD, ENUM and FIXED and BYTES type as well. // HUDI-8585 will add support for BYTES and FIXED @@ -2053,6 +2054,7 @@ public class HoodieTableMetadataUtil { && type != HoodieSchemaType.ENUM && type != HoodieSchemaType.BYTES && type != HoodieSchemaType.FIXED && type != HoodieSchemaType.DECIMAL // DECIMAL's underlying type is BYTES && type != HoodieSchemaType.BLOB + && type != HoodieSchemaType.VECTOR && type != HoodieSchemaType.VARIANT; } diff --git a/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java b/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java index b0093315306d..95023eebe695 100644 --- a/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java +++ b/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java @@ -21,6 +21,7 @@ package org.apache.hudi.metadata; import org.apache.hudi.common.function.SerializableBiFunction; import org.apache.hudi.common.model.HoodieIndexDefinition; import org.apache.hudi.common.model.HoodieIndexMetadata; +import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; import org.apache.hudi.common.schema.HoodieSchema; import org.apache.hudi.common.schema.HoodieSchemaField; import org.apache.hudi.common.schema.HoodieSchemaType; @@ -331,4 +332,24 @@ class TestHoodieTableMetadataUtil { assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(vectorSchema, Option.empty(), HoodieIndexVersion.V2)); assertTrue(HoodieTableMetadataUtil.isColumnTypeSupported(stringSchema, Option.empty(), HoodieIndexVersion.V2)); } + + @Test + void testVariantBlobVectorColumnsAreNotSupportedForV1ColumnStats() { + HoodieSchema variantSchema = HoodieSchema.createNullable(HoodieSchema.createVariant()); + HoodieSchema blobSchema = HoodieSchema.createNullable(HoodieSchema.createBlob()); + HoodieSchema vectorSchema = HoodieSchema.createNullable(HoodieSchema.createVector(128)); + HoodieSchema stringSchema = HoodieSchema.createNullable(HoodieSchema.create(HoodieSchemaType.STRING)); + + for (HoodieRecordType recordType : new HoodieRecordType[] {HoodieRecordType.AVRO, HoodieRecordType.SPARK}) { + Option<HoodieRecordType> rt = Option.of(recordType); + assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(variantSchema, rt, HoodieIndexVersion.V1), + "VARIANT must be excluded from V1 column stats for record type " + recordType); + assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(blobSchema, rt, HoodieIndexVersion.V1), + "BLOB must be excluded from V1 column stats for record type " + recordType); + assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(vectorSchema, rt, HoodieIndexVersion.V1), + "VECTOR must be excluded from V1 column stats for record type " + recordType); + assertTrue(HoodieTableMetadataUtil.isColumnTypeSupported(stringSchema, rt, HoodieIndexVersion.V1), + "STRING should remain supported for record type " + recordType); + } + } }
