This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch release-1.2.0
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit f8431eb6cbb0f309f7a46ef1189b5fe952abaa7f
Author: voonhous <[email protected]>
AuthorDate: Thu May 14 02:20:22 2026 +0800

    fix(metadata): Exclude Variant/Blob/Vector from V1 column stats (#18695)
    
    V2 already filters all three types; V1 (used by bloom filters
    unconditionally and by column/partition stats on table v8) was
    missing BLOB/VECTOR in the AVRO branch and VECTOR in the SPARK
    branch, letting indexes silently include columns whose stats
    are meaningless. Also clarifies the expression-index error
    message to list VARIANT/BLOB/VECTOR alongside RECORD/ARRAY/MAP.
---
 .../org/apache/hudi/index/HoodieIndexUtils.java     |  2 +-
 .../hudi/metadata/HoodieTableMetadataUtil.java      |  4 +++-
 .../hudi/metadata/TestHoodieTableMetadataUtil.java  | 21 +++++++++++++++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java
index b2b5a84fef09..b3086eb6a361 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java
@@ -724,7 +724,7 @@ public class HoodieIndexUtils {
       if (fieldSchema.getNonNullType().getType().isComplex()) {
         throw new HoodieMetadataIndexException(String.format(
             "Cannot create expression index '%s': Column '%s' has unsupported 
data type '%s'. "
-                + "Complex types (RECORD, ARRAY, MAP) are not supported for 
indexing. "
+                + "Complex types (RECORD, ARRAY, MAP, VARIANT, BLOB, VECTOR) 
are not supported for indexing. "
                 + "Please choose a column with a primitive data type.",
             userIndexName, columnName, fieldSchema.getType()));
       }
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
 
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
index bfc16dbf5529..2a272a3f2cc1 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
@@ -2045,7 +2045,8 @@ public class HoodieTableMetadataUtil {
     // if record type is set and if its AVRO, MAP, ARRAY, RECORD and ENUM 
types are unsupported.
     if (recordType.isPresent() && recordType.get() == HoodieRecordType.AVRO) {
       return (type != HoodieSchemaType.RECORD && type != 
HoodieSchemaType.ARRAY && type != HoodieSchemaType.MAP
-          && type != HoodieSchemaType.ENUM && type != 
HoodieSchemaType.VARIANT);
+          && type != HoodieSchemaType.ENUM && type != HoodieSchemaType.VARIANT
+          && type != HoodieSchemaType.BLOB && type != HoodieSchemaType.VECTOR);
     }
     // if record Type is not set or if recordType is SPARK then we cannot 
support AVRO, MAP, ARRAY, RECORD, ENUM and FIXED and BYTES type as well.
     // HUDI-8585 will add support for BYTES and FIXED
@@ -2053,6 +2054,7 @@ public class HoodieTableMetadataUtil {
         && type != HoodieSchemaType.ENUM && type != HoodieSchemaType.BYTES && 
type != HoodieSchemaType.FIXED
         && type != HoodieSchemaType.DECIMAL // DECIMAL's underlying type is 
BYTES
         && type != HoodieSchemaType.BLOB
+        && type != HoodieSchemaType.VECTOR
         && type != HoodieSchemaType.VARIANT;
   }
 
diff --git 
a/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java
 
b/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java
index b0093315306d..95023eebe695 100644
--- 
a/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java
+++ 
b/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java
@@ -21,6 +21,7 @@ package org.apache.hudi.metadata;
 import org.apache.hudi.common.function.SerializableBiFunction;
 import org.apache.hudi.common.model.HoodieIndexDefinition;
 import org.apache.hudi.common.model.HoodieIndexMetadata;
+import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType;
 import org.apache.hudi.common.schema.HoodieSchema;
 import org.apache.hudi.common.schema.HoodieSchemaField;
 import org.apache.hudi.common.schema.HoodieSchemaType;
@@ -331,4 +332,24 @@ class TestHoodieTableMetadataUtil {
     assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(vectorSchema, 
Option.empty(), HoodieIndexVersion.V2));
     assertTrue(HoodieTableMetadataUtil.isColumnTypeSupported(stringSchema, 
Option.empty(), HoodieIndexVersion.V2));
   }
+
+  @Test
+  void testVariantBlobVectorColumnsAreNotSupportedForV1ColumnStats() {
+    HoodieSchema variantSchema = 
HoodieSchema.createNullable(HoodieSchema.createVariant());
+    HoodieSchema blobSchema = 
HoodieSchema.createNullable(HoodieSchema.createBlob());
+    HoodieSchema vectorSchema = 
HoodieSchema.createNullable(HoodieSchema.createVector(128));
+    HoodieSchema stringSchema = 
HoodieSchema.createNullable(HoodieSchema.create(HoodieSchemaType.STRING));
+
+    for (HoodieRecordType recordType : new HoodieRecordType[] 
{HoodieRecordType.AVRO, HoodieRecordType.SPARK}) {
+      Option<HoodieRecordType> rt = Option.of(recordType);
+      assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(variantSchema, 
rt, HoodieIndexVersion.V1),
+          "VARIANT must be excluded from V1 column stats for record type " + 
recordType);
+      assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(blobSchema, 
rt, HoodieIndexVersion.V1),
+          "BLOB must be excluded from V1 column stats for record type " + 
recordType);
+      assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(vectorSchema, 
rt, HoodieIndexVersion.V1),
+          "VECTOR must be excluded from V1 column stats for record type " + 
recordType);
+      assertTrue(HoodieTableMetadataUtil.isColumnTypeSupported(stringSchema, 
rt, HoodieIndexVersion.V1),
+          "STRING should remain supported for record type " + recordType);
+    }
+  }
 }

Reply via email to