This is an automated email from the ASF dual-hosted git repository.
bvaradar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new f2fdca26fef4 fix(metadata): Exclude Variant/Blob/Vector from V1 column
stats (#18695)
f2fdca26fef4 is described below
commit f2fdca26fef48d2d68a51d4ee98b03b2cbfbc93a
Author: voonhous <[email protected]>
AuthorDate: Thu May 14 02:20:22 2026 +0800
fix(metadata): Exclude Variant/Blob/Vector from V1 column stats (#18695)
V2 already filters all three types; V1 (used by bloom filters
unconditionally and by column/partition stats on table v8) was
missing BLOB/VECTOR in the AVRO branch and VECTOR in the SPARK
branch, letting indexes silently include columns whose stats
are meaningless. Also clarifies the expression-index error
message to list VARIANT/BLOB/VECTOR alongside RECORD/ARRAY/MAP.
---
.../org/apache/hudi/index/HoodieIndexUtils.java | 2 +-
.../hudi/metadata/HoodieTableMetadataUtil.java | 4 +++-
.../hudi/metadata/TestHoodieTableMetadataUtil.java | 21 +++++++++++++++++++++
3 files changed, 25 insertions(+), 2 deletions(-)
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java
index b2b5a84fef09..b3086eb6a361 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java
@@ -724,7 +724,7 @@ public class HoodieIndexUtils {
if (fieldSchema.getNonNullType().getType().isComplex()) {
throw new HoodieMetadataIndexException(String.format(
"Cannot create expression index '%s': Column '%s' has unsupported
data type '%s'. "
- + "Complex types (RECORD, ARRAY, MAP) are not supported for
indexing. "
+ + "Complex types (RECORD, ARRAY, MAP, VARIANT, BLOB, VECTOR)
are not supported for indexing. "
+ "Please choose a column with a primitive data type.",
userIndexName, columnName, fieldSchema.getType()));
}
diff --git
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
index bfc16dbf5529..2a272a3f2cc1 100644
---
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
+++
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
@@ -2045,7 +2045,8 @@ public class HoodieTableMetadataUtil {
// if record type is set and if its AVRO, MAP, ARRAY, RECORD and ENUM
types are unsupported.
if (recordType.isPresent() && recordType.get() == HoodieRecordType.AVRO) {
return (type != HoodieSchemaType.RECORD && type !=
HoodieSchemaType.ARRAY && type != HoodieSchemaType.MAP
- && type != HoodieSchemaType.ENUM && type !=
HoodieSchemaType.VARIANT);
+ && type != HoodieSchemaType.ENUM && type != HoodieSchemaType.VARIANT
+ && type != HoodieSchemaType.BLOB && type != HoodieSchemaType.VECTOR);
}
// if record Type is not set or if recordType is SPARK then we cannot
support AVRO, MAP, ARRAY, RECORD, ENUM and FIXED and BYTES type as well.
// HUDI-8585 will add support for BYTES and FIXED
@@ -2053,6 +2054,7 @@ public class HoodieTableMetadataUtil {
&& type != HoodieSchemaType.ENUM && type != HoodieSchemaType.BYTES &&
type != HoodieSchemaType.FIXED
&& type != HoodieSchemaType.DECIMAL // DECIMAL's underlying type is
BYTES
&& type != HoodieSchemaType.BLOB
+ && type != HoodieSchemaType.VECTOR
&& type != HoodieSchemaType.VARIANT;
}
diff --git
a/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java
b/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java
index b0093315306d..95023eebe695 100644
---
a/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java
+++
b/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java
@@ -21,6 +21,7 @@ package org.apache.hudi.metadata;
import org.apache.hudi.common.function.SerializableBiFunction;
import org.apache.hudi.common.model.HoodieIndexDefinition;
import org.apache.hudi.common.model.HoodieIndexMetadata;
+import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType;
import org.apache.hudi.common.schema.HoodieSchema;
import org.apache.hudi.common.schema.HoodieSchemaField;
import org.apache.hudi.common.schema.HoodieSchemaType;
@@ -331,4 +332,24 @@ class TestHoodieTableMetadataUtil {
assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(vectorSchema,
Option.empty(), HoodieIndexVersion.V2));
assertTrue(HoodieTableMetadataUtil.isColumnTypeSupported(stringSchema,
Option.empty(), HoodieIndexVersion.V2));
}
+
+ @Test
+ void testVariantBlobVectorColumnsAreNotSupportedForV1ColumnStats() {
+ HoodieSchema variantSchema =
HoodieSchema.createNullable(HoodieSchema.createVariant());
+ HoodieSchema blobSchema =
HoodieSchema.createNullable(HoodieSchema.createBlob());
+ HoodieSchema vectorSchema =
HoodieSchema.createNullable(HoodieSchema.createVector(128));
+ HoodieSchema stringSchema =
HoodieSchema.createNullable(HoodieSchema.create(HoodieSchemaType.STRING));
+
+ for (HoodieRecordType recordType : new HoodieRecordType[]
{HoodieRecordType.AVRO, HoodieRecordType.SPARK}) {
+ Option<HoodieRecordType> rt = Option.of(recordType);
+ assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(variantSchema,
rt, HoodieIndexVersion.V1),
+ "VARIANT must be excluded from V1 column stats for record type " +
recordType);
+ assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(blobSchema,
rt, HoodieIndexVersion.V1),
+ "BLOB must be excluded from V1 column stats for record type " +
recordType);
+ assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(vectorSchema,
rt, HoodieIndexVersion.V1),
+ "VECTOR must be excluded from V1 column stats for record type " +
recordType);
+ assertTrue(HoodieTableMetadataUtil.isColumnTypeSupported(stringSchema,
rt, HoodieIndexVersion.V1),
+ "STRING should remain supported for record type " + recordType);
+ }
+ }
}