This is an automated email from the ASF dual-hosted git repository.

vinoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 1b2cee800994 feat(vector): add VECTOR type to HoodieSchema (#18146)
1b2cee800994 is described below

commit 1b2cee800994a5d878f045bd5e4e95618d6123a2
Author: Rahil C <[email protected]>
AuthorDate: Mon Mar 2 00:47:13 2026 -0800

    feat(vector): add VECTOR type to HoodieSchema (#18146)
    
    * add VECTOR type to HoodieSchema
    
    * keep fixed bytes only
    
    * address elementType
    
    * fixes
    
    * add all tests
    
    * move metadata to schema level instead of as fields
    
    * fix ci
    
    * use enum for vector elemnt type
    
    * remove nesting for fixed bytes
    
    * checkstyle fixes
    
    * remove fixed requirement
    
    * address comments
    
    * fix nested strutucres test
    
    * check style
    
    * minor fix
    
    * address feedback
    
    * address vinoth comments
    
    * address vinoth and tim comments
    
    * address voon comments
---
 .../apache/hudi/common/schema/HoodieSchema.java    | 352 +++++++++++++++++++++
 .../hudi/common/schema/HoodieSchemaType.java       |   5 +
 .../hudi/common/schema/TestHoodieSchema.java       | 317 +++++++++++++++++++
 .../hudi/common/schema/TestHoodieSchemaType.java   |  96 ++++++
 4 files changed, 770 insertions(+)

diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchema.java 
b/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchema.java
index e807576fba99..f35c77c099f5 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchema.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchema.java
@@ -123,6 +123,7 @@ public class HoodieSchema implements Serializable {
   static {
     LogicalTypes.register(VariantLogicalType.VARIANT_LOGICAL_TYPE_NAME, new 
VariantLogicalTypeFactory());
     LogicalTypes.register(BlobLogicalType.BLOB_LOGICAL_TYPE_NAME, new 
BlobLogicalTypeFactory());
+    LogicalTypes.register(VectorLogicalType.VECTOR_LOGICAL_TYPE_NAME, new 
VectorLogicalTypeFactory());
   }
 
   /**
@@ -173,6 +174,8 @@ public class HoodieSchema implements Serializable {
         return new HoodieSchema.Variant(avroSchema);
       } else if (logicalType == BlobLogicalType.blob()) {
         return new HoodieSchema.Blob(avroSchema);
+      } else if (logicalType instanceof VectorLogicalType) {
+        return new HoodieSchema.Vector(avroSchema);
       }
     }
     return new HoodieSchema(avroSchema);
@@ -645,6 +648,62 @@ public class HoodieSchema implements Serializable {
     return new HoodieSchema.Blob(Blob.DEFAULT_NAME);
   }
 
+  /**
+   * Creates Vector schema with default name and specified dimension.
+   * Defaults to {@link Vector.VectorElementType#FLOAT} element type.
+   *
+   * <p>The generated FIXED type name encodes dimension and element type 
(e.g., {@code vector_float_128})
+   * to avoid Avro name collisions when multiple vector columns exist in the 
same record.</p>
+   *
+   * @param dimension vector dimension (must be > 0)
+   * @return new HoodieSchema.Vector
+   */
+  public static HoodieSchema.Vector createVector(int dimension) {
+    return createVector(dimension, Vector.VectorElementType.FLOAT);
+  }
+
+  /**
+   * Creates Vector schema with custom name and dimension.
+   * Defaults to {@link Vector.VectorElementType#FLOAT} element type.
+   *
+   * @param name FIXED type name (must not be null or empty)
+   * @param dimension vector dimension (must be > 0)
+   * @return new HoodieSchema.Vector
+   */
+  public static HoodieSchema.Vector createVector(String name, int dimension) {
+    return createVector(name, dimension, Vector.VectorElementType.FLOAT);
+  }
+
+  /**
+   * Creates Vector schema with custom dimension and element type.
+   *
+   * <p>The generated FIXED type name encodes dimension and element type 
(e.g., {@code vector_double_256})
+   * to avoid Avro name collisions when multiple vector columns exist in the 
same record.</p>
+   *
+   * @param dimension vector dimension (must be > 0)
+   * @param elementType element type (use {@link 
Vector.VectorElementType#FLOAT} or {@link Vector.VectorElementType#DOUBLE})
+   * @return new HoodieSchema.Vector
+   */
+  public static HoodieSchema.Vector createVector(int dimension, 
Vector.VectorElementType elementType) {
+    String vectorName = Vector.DEFAULT_NAME + "_" + 
elementType.name().toLowerCase() + "_" + dimension;
+    return createVector(vectorName, dimension, elementType);
+  }
+
+  /**
+   * Creates Vector schema with custom name, dimension, and element type.
+   *
+   * @param name FIXED type name (must not be null or empty)
+   * @param dimension vector dimension (must be > 0)
+   * @param elementType element type (use {@link 
Vector.VectorElementType#FLOAT} or {@link Vector.VectorElementType#DOUBLE})
+   * @return new HoodieSchema.Vector
+   */
+  public static HoodieSchema.Vector createVector(String name, int dimension, 
Vector.VectorElementType elementType) {
+    ValidationUtils.checkArgument(name != null && !name.isEmpty(),
+        () -> "Vector name must not be null or empty");
+    Schema vectorSchema = Vector.createSchema(name, dimension, elementType);
+    return new HoodieSchema.Vector(vectorSchema);
+  }
+
   /**
    * Returns the Hudi schema version information.
    *
@@ -1551,6 +1610,215 @@ public class HoodieSchema implements Serializable {
     }
   }
 
+  public static class Vector extends HoodieSchema {
+    private static final String DEFAULT_NAME = "vector";
+
+    /**
+     * Enum representing vector element data types.
+     */
+    public enum VectorElementType {
+      FLOAT(4),
+      DOUBLE(8),
+      INT8(1);
+
+      private final int elementSize;
+
+      VectorElementType(int elementSize) {
+        this.elementSize = elementSize;
+      }
+
+      /**
+       * Returns the byte size of a single element.
+       *
+       * @return number of bytes per element
+       */
+      public int getElementSize() {
+        return elementSize;
+      }
+
+      /**
+       * Converts a string to VectorElementType enum.
+       *
+       * @param name the element type name (e.g., "FLOAT", "DOUBLE", "INT8")
+       * @return the corresponding enum value
+       * @throws IllegalArgumentException if name is unknown
+       */
+      public static VectorElementType fromString(String name) {
+        for (VectorElementType type : values()) {
+          if (type.name().equalsIgnoreCase(name)) {
+            return type;
+          }
+        }
+        throw new IllegalArgumentException("Unknown element type: " + name);
+      }
+    }
+
+    /**
+     * Enum representing the physical storage format backing a vector.
+     */
+    public enum StorageBacking {
+      FIXED_BYTES;
+
+      /**
+       * Converts a string to StorageBacking enum.
+       *
+       * @param name the storage backing name (e.g., "FIXED_BYTES")
+       * @return the corresponding enum value
+       * @throws IllegalArgumentException if name is unknown
+       */
+      public static StorageBacking fromString(String name) {
+        for (StorageBacking b : values()) {
+          if (b.name().equalsIgnoreCase(name)) {
+            return b;
+          }
+        }
+        throw new IllegalArgumentException("Unknown storage backing: " + name);
+      }
+    }
+
+    private final int dimension;
+    private final VectorElementType elementType;
+    private final StorageBacking storageBacking;
+
+    /**
+     * Creates Vector from pre-built schema (used by factory methods).
+     *
+     * @param avroSchema the Avro schema to wrap, must be a valid Vector schema
+     * @throws IllegalArgumentException if avroSchema is null or not a valid 
Vector schema
+     */
+    Vector(Schema avroSchema) {
+      super(avroSchema);
+
+      // Extract properties from LogicalType
+      LogicalType logicalType = avroSchema.getLogicalType();
+      if (!(logicalType instanceof VectorLogicalType)) {
+        throw new IllegalArgumentException(
+          "Schema must have VectorLogicalType, got: " + logicalType);
+      }
+
+      VectorLogicalType vectorLogicalType = (VectorLogicalType) logicalType;
+      this.dimension = vectorLogicalType.getDimension();
+      this.elementType = 
VectorElementType.fromString(vectorLogicalType.getElementType());
+      this.storageBacking = 
StorageBacking.fromString(vectorLogicalType.getStorageBacking());
+
+      // Validate schema structure
+      validateVectorSchema(avroSchema);
+    }
+
+    @Override
+    public String getName() {
+      return VectorLogicalType.VECTOR_LOGICAL_TYPE_NAME;
+    }
+
+    @Override
+    public HoodieSchemaType getType() {
+      return HoodieSchemaType.VECTOR;
+    }
+
+    /**
+     * Creates vector schema with specified dimension and element type.
+     *
+     * @param name fixed type name (not null)
+     * @param dimension vector dimension (must be > 0)
+     * @param elementType element type (defaults to FLOAT if null)
+     * @return new Vector schema
+     */
+    private static Schema createSchema(String name, int dimension, 
VectorElementType elementType) {
+      ValidationUtils.checkArgument(dimension > 0,
+          () -> "Vector dimension must be positive: " + dimension);
+
+      // Validate elementType
+      VectorElementType resolvedElementType = elementType != null ? 
elementType : VectorElementType.FLOAT;
+
+      // Calculate fixed size: dimension × element size in bytes
+      int elementSize = resolvedElementType.getElementSize();
+      int fixedSize = dimension * elementSize;
+
+      // Create fixed Schema
+      Schema vectorSchema = Schema.createFixed(name, null, null, fixedSize);
+
+      // Apply logical type with properties directly to FIXED
+      VectorLogicalType vectorLogicalType = new VectorLogicalType(dimension, 
resolvedElementType.name(), StorageBacking.FIXED_BYTES.name());
+      vectorLogicalType.addToSchema(vectorSchema);
+
+      return vectorSchema;
+    }
+
+    /**
+     * Validates that the given Avro schema conforms to Vector specification.
+     *
+     * @param avroSchema the schema to validate
+     * @throws IllegalArgumentException if schema is invalid
+     */
+    private void validateVectorSchema(Schema avroSchema) {
+      // Verify FIXED size matches: dimension × elementSize
+      int expectedSize = dimension * elementType.getElementSize();
+      int actualSize = avroSchema.getFixedSize();
+      ValidationUtils.checkArgument(actualSize == expectedSize,
+          () -> "Vector FIXED size mismatch: expected " + expectedSize
+                + " bytes (dimension=" + dimension + " × elementSize="
+                + elementType.getElementSize() + "), got " + actualSize);
+    }
+
+    /**
+     * Returns the dimension of this vector.
+     *
+     * @return vector dimension (always > 0)
+     */
+    public int getDimension() {
+      return dimension;
+    }
+
+    /**
+     * Returns the element type of this vector.
+     *
+     * @return element type enum (e.g., {@link VectorElementType#FLOAT}, 
{@link VectorElementType#DOUBLE}, {@link VectorElementType#INT8})
+     */
+    public VectorElementType getVectorElementType() {
+      return elementType;
+    }
+
+    /**
+     * Returns the storage backing type.
+     *
+     * @return storage backing enum value
+     */
+    public StorageBacking getStorageBacking() {
+      return storageBacking;
+    }
+
+    /**
+     * Returns the size of the fixed bytes backing this vector.
+     *
+     * @return size in bytes (dimension × elementSize)
+     */
+    public int getFixedSize() {
+      return getAvroSchema().getFixedSize();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+      if (!super.equals(o)) {
+        return false;
+      }
+      Vector vector = (Vector) o;
+      return dimension == vector.dimension
+          && Objects.equals(elementType, vector.elementType)
+          && Objects.equals(storageBacking, vector.storageBacking);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(super.hashCode(), dimension, elementType, 
storageBacking);
+    }
+  }
+
   public static class Timestamp extends HoodieSchema {
     private final boolean isUtcAdjusted;
     private final TimePrecision precision;
@@ -1719,6 +1987,90 @@ public class HoodieSchema implements Serializable {
     }
   }
 
+  static class VectorLogicalType extends LogicalType {
+    private static final String VECTOR_LOGICAL_TYPE_NAME = "vector";
+    private static final String PROP_DIMENSION = "dimension";
+    private static final String PROP_ELEMENT_TYPE = "elementType";
+    private static final String PROP_STORAGE_BACKING = "storageBacking";
+
+    private final int dimension;
+    private final String elementType;
+    private final String storageBacking;
+
+    public VectorLogicalType(int dimension, String elementType, String 
storageBacking) {
+      super(VectorLogicalType.VECTOR_LOGICAL_TYPE_NAME);
+      ValidationUtils.checkArgument(dimension > 0,
+          () -> "Vector dimension must be positive: " + dimension);
+      ValidationUtils.checkArgument(elementType != null && 
!elementType.isEmpty(),
+          () -> "Element type cannot be null or empty");
+      ValidationUtils.checkArgument(storageBacking != null && 
!storageBacking.isEmpty(),
+          () -> "Storage backing cannot be null or empty");
+
+      this.dimension = dimension;
+      this.elementType = elementType;
+      this.storageBacking = storageBacking;
+    }
+
+    public int getDimension() {
+      return dimension;
+    }
+
+    public String getElementType() {
+      return elementType;
+    }
+
+    public String getStorageBacking() {
+      return storageBacking;
+    }
+
+    @Override
+    public Schema addToSchema(Schema schema) {
+      super.addToSchema(schema);
+      schema.addProp(PROP_DIMENSION, dimension);
+      schema.addProp(PROP_ELEMENT_TYPE, elementType);
+      schema.addProp(PROP_STORAGE_BACKING, storageBacking);
+      return schema;
+    }
+  }
+
+  /**
+   * Factory for creating VectorLogicalType instances.
+   */
+  private static class VectorLogicalTypeFactory implements 
LogicalTypes.LogicalTypeFactory {
+    @Override
+    public LogicalType fromSchema(Schema schema) {
+      // Extract properties from schema, defensively handling 
string-serialized values
+      Object dimObj = schema.getObjectProp(VectorLogicalType.PROP_DIMENSION);
+      int dimension = 0;
+      if (dimObj != null) {
+        try {
+          dimension = Integer.parseInt(String.valueOf(dimObj));
+        } catch (NumberFormatException e) {
+          throw new IllegalArgumentException("Invalid vector dimension 
property: " + dimObj);
+        }
+      }
+      ValidationUtils.checkArgument(dimension > 0,
+          () -> "Missing or invalid 'dimension' property in vector schema");
+
+      String elementType = schema.getProp(VectorLogicalType.PROP_ELEMENT_TYPE);
+      if (elementType == null) {
+        elementType = Vector.VectorElementType.FLOAT.name();
+      }
+
+      String storageBacking = 
schema.getProp(VectorLogicalType.PROP_STORAGE_BACKING);
+      if (storageBacking == null) {
+        storageBacking = Vector.StorageBacking.FIXED_BYTES.name(); // default
+      }
+
+      return new VectorLogicalType(dimension, elementType, storageBacking);
+    }
+
+    @Override
+    public String getTypeName() {
+      return VectorLogicalType.VECTOR_LOGICAL_TYPE_NAME;
+    }
+  }
+
   /**
    * Factory for creating VariantLogicalType instances.
    */
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaType.java 
b/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaType.java
index 199d144a07d6..2b236d465334 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaType.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaType.java
@@ -124,6 +124,8 @@ public enum HoodieSchemaType {
    */
   BLOB(Schema.Type.RECORD),
 
+  VECTOR(Schema.Type.FIXED),
+
   /**
    * Null type - represents the absence of a value
    */
@@ -163,6 +165,8 @@ public enum HoodieSchemaType {
         return VARIANT;
       } else if (logicalType == HoodieSchema.BlobLogicalType.blob()) {
         return BLOB;
+      } else if (logicalType instanceof HoodieSchema.VectorLogicalType) {
+        return VECTOR;
       }
     }
     switch (avroSchema.getType()) {
@@ -231,6 +235,7 @@ public enum HoodieSchemaType {
       case UNION:
       case VARIANT:
       case BLOB:
+      case VECTOR:
         return true;
       default:
         return false;
diff --git 
a/hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchema.java 
b/hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchema.java
index 286e739e52a9..4851d33d1ee6 100644
--- 
a/hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchema.java
+++ 
b/hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchema.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.common.schema;
 
 import org.apache.hudi.common.schema.HoodieSchema.VariantLogicalType;
+import org.apache.hudi.common.schema.HoodieSchema.VectorLogicalType;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.exception.HoodieAvroSchemaException;
 
@@ -876,6 +877,322 @@ public class TestHoodieSchema {
     assertEquals(5, decimalFixedSchema.getFixedSize());
   }
 
+  @Test
+  void testCreateVectorWithDimension() {
+    // Create vector with dimension only (defaults to FLOAT)
+    HoodieSchema schema = HoodieSchema.createVector(1536);
+
+    HoodieSchema.Vector vectorSchema = assertVector(schema, 1536, 
HoodieSchema.Vector.VectorElementType.FLOAT);
+    assertEquals(HoodieSchemaType.VECTOR, schema.getType());
+
+    assertTrue(schema.getAvroSchema().getLogicalType() instanceof 
VectorLogicalType);
+
+    // Verify properties are at schema level
+    assertVectorAvroProperties(vectorSchema, 1536, 
HoodieSchema.Vector.VectorElementType.FLOAT);
+
+    // Verify Vector is FIXED type (not RECORD)
+    Schema avroSchema = vectorSchema.getAvroSchema();
+    assertEquals(Schema.Type.FIXED, avroSchema.getType());
+    assertFalse(vectorSchema.hasFields());
+
+    // Verify FIXED size = dimension × elementSize (1536 × 4 bytes for FLOAT)
+    assertEquals(1536 * 4, avroSchema.getFixedSize());
+  }
+
+  @Test
+  void testCreateVectorWithNameAndDimension() {
+    // Create vector with custom name and dimension
+    HoodieSchema schema = HoodieSchema.createVector("embeddings", 768);
+    HoodieSchema.Vector vectorSchema = assertVector(schema, 768, 
HoodieSchema.Vector.VectorElementType.FLOAT);
+    assertEquals(HoodieSchemaType.VECTOR, schema.getType());
+    assertEquals("embeddings", vectorSchema.getAvroSchema().getName());
+  }
+
+  @Test
+  void testCreateVectorWithDimensionAndElementType() {
+    // Create vector with DOUBLE element type
+    HoodieSchema schemaDouble = HoodieSchema.createVector(1536, 
HoodieSchema.Vector.VectorElementType.DOUBLE);
+    assertVector(schemaDouble, 1536, 
HoodieSchema.Vector.VectorElementType.DOUBLE);
+
+    // Create vector with FLOAT element type
+    HoodieSchema schemaFloat = HoodieSchema.createVector(512, 
HoodieSchema.Vector.VectorElementType.FLOAT);
+    assertVector(schemaFloat, 512, 
HoodieSchema.Vector.VectorElementType.FLOAT);
+
+    // Create vector with INT8 element type
+    HoodieSchema.Vector schemaInt = HoodieSchema.createVector(256, 
HoodieSchema.Vector.VectorElementType.INT8);
+    assertVector(schemaInt, 256, HoodieSchema.Vector.VectorElementType.INT8);
+  }
+
+  @Test
+  void testCreateVectorWithAllParameters() {
+    // Create vector with all parameters: custom name, dimension, and element 
type
+    HoodieSchema schema = HoodieSchema.createVector("precise_vectors", 512, 
HoodieSchema.Vector.VectorElementType.DOUBLE);
+    HoodieSchema.Vector vectorSchema = assertVector(schema, 512, 
HoodieSchema.Vector.VectorElementType.DOUBLE);
+    assertEquals("precise_vectors", vectorSchema.getAvroSchema().getName());
+    assertEquals(HoodieSchemaType.VECTOR, vectorSchema.getType());
+  }
+
+  @Test
+  void testVectorInvalidDimension() {
+    // Test zero dimension
+    IllegalArgumentException ex1 = assertThrows(
+        IllegalArgumentException.class,
+        () -> HoodieSchema.createVector(0)
+    );
+    assertTrue(ex1.getMessage().contains("must be positive"));
+
+    // Test negative dimension
+    IllegalArgumentException ex2 = assertThrows(
+        IllegalArgumentException.class,
+        () -> HoodieSchema.createVector(-1)
+    );
+    assertTrue(ex2.getMessage().contains("must be positive"));
+  }
+
+  @Test
+  void testVectorLogicalTypeDetection() {
+    // Create vector schema
+    HoodieSchema schema = HoodieSchema.createVector(1536);
+    assertTrue(schema.getAvroSchema().getLogicalType() instanceof 
VectorLogicalType);
+    assertEquals(HoodieSchemaType.VECTOR, schema.getType());
+  }
+
+  @Test
+  void testVectorSchemaValidation() {
+    // Create vector and verify FIXED structure
+    HoodieSchema.Vector vectorSchema = HoodieSchema.createVector(768);
+    Schema avroSchema = vectorSchema.getAvroSchema();
+
+    // Verify Vector is FIXED type
+    assertEquals(Schema.Type.FIXED, avroSchema.getType());
+    assertFalse(vectorSchema.hasFields());
+
+    // Verify dimension, elementType, storageBacking are schema properties
+    assertVectorAvroProperties(vectorSchema, 768, 
HoodieSchema.Vector.VectorElementType.FLOAT);
+
+    // Verify FIXED size = dimension × elementSize (768 × 4 bytes for FLOAT)
+    assertEquals(768 * 4, avroSchema.getFixedSize());
+    assertEquals(768 * 4, vectorSchema.getFixedSize());
+  }
+
+  @Test
+  void testVectorFieldAccess() {
+    // Create vector with FLOAT
+    HoodieSchema.Vector vectorFloat = HoodieSchema.createVector(1536);
+    assertVector(vectorFloat, 1536, 
HoodieSchema.Vector.VectorElementType.FLOAT);
+
+    HoodieSchema.Vector vectorDouble = HoodieSchema.createVector(768, 
HoodieSchema.Vector.VectorElementType.DOUBLE);
+    assertVector(vectorDouble, 768, 
HoodieSchema.Vector.VectorElementType.DOUBLE);
+
+    // Verify dimension/elementType/storageBacking are accessible via 
properties
+    assertVectorAvroProperties(vectorFloat, 1536, 
HoodieSchema.Vector.VectorElementType.FLOAT);
+
+    // Verify FIXED size access
+    assertEquals(1536 * 4, vectorFloat.getFixedSize()); // FLOAT is 4 bytes
+    assertEquals(768 * 8, vectorDouble.getFixedSize()); // DOUBLE is 8 bytes
+  }
+  
+  @Test
+  void testVectorEquality() {
+    HoodieSchema.Vector v1 = HoodieSchema.createVector(1536);
+    HoodieSchema.Vector v2 = HoodieSchema.createVector(1536);
+    HoodieSchema.Vector v3 = HoodieSchema.createVector(768);
+    HoodieSchema.Vector v4 = HoodieSchema.createVector(1536, 
HoodieSchema.Vector.VectorElementType.DOUBLE);
+
+    // Same dimension and element type -> equal
+    assertEquals(v1, v2);
+    assertEquals(v1.hashCode(), v2.hashCode());
+
+    // Different dimension -> not equal
+    assertNotEquals(v1, v3);
+
+    // Different element type -> not equal
+    assertNotEquals(v1, v4);
+
+    // Reflexivity
+    assertEquals(v1, v1);
+
+    // Null check
+    assertNotEquals(v1, null);
+
+    // Different class
+    assertNotEquals(v1, "string");
+  }
+  
+  @Test
+  void testVectorSerialization() throws Exception {
+    // Create vector with DOUBLE element type
+    HoodieSchema.Vector original = HoodieSchema.createVector(768, 
HoodieSchema.Vector.VectorElementType.DOUBLE);
+
+    // Java serialize
+    ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
+    ObjectOutputStream out = new ObjectOutputStream(byteOut);
+    out.writeObject(original);
+    out.close();
+
+    // Java deserialize
+    ByteArrayInputStream byteIn = new 
ByteArrayInputStream(byteOut.toByteArray());
+    ObjectInputStream in = new ObjectInputStream(byteIn);
+    HoodieSchema deserialized = (HoodieSchema) in.readObject();
+    in.close();
+
+    // Verify
+    assertVector(deserialized, 768, 
HoodieSchema.Vector.VectorElementType.DOUBLE);
+    assertEquals(original, deserialized);
+  }
+
+  @Test
+  void testVectorInNestedStructures() throws Exception {
+    // Create vector schema
+    HoodieSchema.Vector vectorSchema = HoodieSchema.createVector(128, 
HoodieSchema.Vector.VectorElementType.FLOAT);
+
+    // Test vector in record - verify it can be used as a field
+    List<HoodieSchemaField> fields = Arrays.asList(
+        HoodieSchemaField.of("id", HoodieSchema.create(HoodieSchemaType.INT)),
+        HoodieSchemaField.of("embedding", vectorSchema)
+    );
+    HoodieSchema recordSchema = HoodieSchema.createRecord("TestRecord", null, 
null, fields);
+    assertEquals(HoodieSchemaType.RECORD, recordSchema.getType());
+
+    // Verify vector field is preserved in the Avro schema
+    Schema.Field embeddingField = 
recordSchema.getAvroSchema().getField("embedding");
+    assertNotNull(embeddingField);
+    HoodieSchema embeddingSchema = 
HoodieSchema.fromAvroSchema(embeddingField.schema());
+    assertVector(embeddingSchema, 128, 
HoodieSchema.Vector.VectorElementType.FLOAT);
+
+    // Round-trip record with vector field through JSON
+    String recordJson = recordSchema.toString();
+    HoodieSchema parsedRecord = HoodieSchema.parse(recordJson);
+    assertEquals(recordSchema, parsedRecord);
+    Schema.Field parsedEmbeddingField = 
parsedRecord.getAvroSchema().getField("embedding");
+    assertNotNull(parsedEmbeddingField);
+    HoodieSchema parsedEmbedding = 
HoodieSchema.fromAvroSchema(parsedEmbeddingField.schema());
+    assertVector(parsedEmbedding, 128, 
HoodieSchema.Vector.VectorElementType.FLOAT);
+
+    // Test vector in array
+    HoodieSchema arraySchema = HoodieSchema.createArray(vectorSchema);
+    assertEquals(HoodieSchemaType.ARRAY, arraySchema.getType());
+    assertVector(arraySchema.getElementType(), 128, 
HoodieSchema.Vector.VectorElementType.FLOAT);
+
+    // Round-trip array of vectors through JSON
+    String arrayJson = arraySchema.toString();
+    HoodieSchema parsedArray = HoodieSchema.parse(arrayJson);
+    assertEquals(arraySchema, parsedArray);
+    assertVector(parsedArray.getElementType(), 128, 
HoodieSchema.Vector.VectorElementType.FLOAT);
+
+    // Test vector in map
+    HoodieSchema mapSchema = HoodieSchema.createMap(vectorSchema);
+    assertEquals(HoodieSchemaType.MAP, mapSchema.getType());
+    assertVector(mapSchema.getValueType(), 128, 
HoodieSchema.Vector.VectorElementType.FLOAT);
+
+    // Round-trip map with vector values through JSON
+    String mapJson = mapSchema.toString();
+    HoodieSchema parsedMap = HoodieSchema.parse(mapJson);
+    assertEquals(mapSchema, parsedMap);
+    assertVector(parsedMap.getValueType(), 128, 
HoodieSchema.Vector.VectorElementType.FLOAT);
+  }
+
+  @Test
+  void testVectorWithDefaultName() {
+    // createVector(dimension) generates a dimension-aware name to avoid Avro 
collisions
+    HoodieSchema.Vector v1 = HoodieSchema.createVector(1536);
+    assertEquals("vector_float_1536", v1.getAvroSchema().getName());
+
+    HoodieSchema.Vector v2 = HoodieSchema.createVector(768, 
HoodieSchema.Vector.VectorElementType.DOUBLE);
+    assertEquals("vector_double_768", v2.getAvroSchema().getName());
+
+    // Null or empty name is rejected
+    assertThrows(IllegalArgumentException.class, () -> 
HoodieSchema.createVector(null, 128));
+    assertThrows(IllegalArgumentException.class, () -> 
HoodieSchema.createVector("", 128));
+  }
+
+  @Test
+  void testMultipleVectorColumnsWithSameDimensionAndType() {
+    // Two vectors with identical dimension and element type share the same 
FIXED type name,
+    // which Avro allows since the definitions are identical.
+    HoodieSchema.Vector v1 = HoodieSchema.createVector(128);
+    HoodieSchema.Vector v2 = HoodieSchema.createVector(128);
+
+    List<HoodieSchemaField> fields = Arrays.asList(
+        HoodieSchemaField.of("id", HoodieSchema.create(HoodieSchemaType.INT)),
+        HoodieSchemaField.of("title_embedding", v1),
+        HoodieSchemaField.of("content_embedding", v2)
+    );
+
+    HoodieSchema record = HoodieSchema.createRecord("TestRecord", null, null, 
fields);
+    assertNotNull(record);
+
+    // Verify both fields survive a JSON round-trip
+    String json = record.toString();
+    HoodieSchema parsed = HoodieSchema.parse(json);
+    assertNotNull(parsed.getAvroSchema().getField("title_embedding"));
+    assertNotNull(parsed.getAvroSchema().getField("content_embedding"));
+    
assertVector(HoodieSchema.fromAvroSchema(parsed.getAvroSchema().getField("title_embedding").schema()),
+        128, HoodieSchema.Vector.VectorElementType.FLOAT);
+    
assertVector(HoodieSchema.fromAvroSchema(parsed.getAvroSchema().getField("content_embedding").schema()),
+        128, HoodieSchema.Vector.VectorElementType.FLOAT);
+  }
+
+  @Test
+  void testMultipleVectorColumnsWithDifferentDimensions() {
+    // Two vectors with different dimensions use dimension-aware names to 
avoid Avro collisions
+    HoodieSchema.Vector v128 = HoodieSchema.createVector(128);
+    HoodieSchema.Vector v256 = HoodieSchema.createVector(256);
+
+    List<HoodieSchemaField> fields = Arrays.asList(
+        HoodieSchemaField.of("id", HoodieSchema.create(HoodieSchemaType.INT)),
+        HoodieSchemaField.of("embedding_small", v128),
+        HoodieSchemaField.of("embedding_large", v256)
+    );
+
+    // A table with two vector columns of different dimensions is a valid use 
case
+    HoodieSchema record = HoodieSchema.createRecord("TestRecord", null, null, 
fields);
+    assertNotNull(record);
+
+    // Verify both fields survive a JSON round-trip (schema 
serialization/parsing)
+    String json = record.toString();
+    HoodieSchema parsed = HoodieSchema.parse(json);
+    assertNotNull(parsed.getAvroSchema().getField("embedding_small"));
+    assertNotNull(parsed.getAvroSchema().getField("embedding_large"));
+    
assertVector(HoodieSchema.fromAvroSchema(parsed.getAvroSchema().getField("embedding_small").schema()),
+        128, HoodieSchema.Vector.VectorElementType.FLOAT);
+    
assertVector(HoodieSchema.fromAvroSchema(parsed.getAvroSchema().getField("embedding_large").schema()),
+        256, HoodieSchema.Vector.VectorElementType.FLOAT);
+  }
+
+  @Test
+  void testVectorFromAvroSchema() {
+    // Create vector via factory
+    HoodieSchema.Vector original = HoodieSchema.createVector("embeddings", 
512, HoodieSchema.Vector.VectorElementType.DOUBLE);
+
+    // Get Avro schema
+    Schema avroSchema = original.getAvroSchema();
+
+    // Re-wrap via fromAvroSchema
+    HoodieSchema rewrapped = HoodieSchema.fromAvroSchema(avroSchema);
+
+    // Verify returns Vector instance with preserved dimension and elementType
+    assertVector(rewrapped, 512, HoodieSchema.Vector.VectorElementType.DOUBLE);
+    assertEquals(original, rewrapped);
+  }
+
+  private HoodieSchema.Vector assertVector(HoodieSchema schema, int 
expectedDimension,
+                                           
HoodieSchema.Vector.VectorElementType expectedElementType) {
+    assertTrue(schema instanceof HoodieSchema.Vector);
+    HoodieSchema.Vector vector = (HoodieSchema.Vector) schema;
+    assertEquals(expectedDimension, vector.getDimension());
+    assertEquals(expectedElementType, vector.getVectorElementType());
+    return vector;
+  }
+
+  private void assertVectorAvroProperties(HoodieSchema.Vector vector, int 
expectedDimension,
+                                          
HoodieSchema.Vector.VectorElementType expectedElementType) {
+    Schema avroSchema = vector.getAvroSchema();
+    assertEquals(expectedDimension, ((Number) 
avroSchema.getObjectProp("dimension")).intValue());
+    assertEquals(expectedElementType.name(), 
avroSchema.getProp("elementType"));
+    assertEquals(HoodieSchema.Vector.StorageBacking.FIXED_BYTES.name(), 
avroSchema.getProp("storageBacking"));
+  }
+
   @Test
   void testCreateTimestampMillis() {
     HoodieSchema timestampSchema = HoodieSchema.createTimestampMillis();
diff --git 
a/hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchemaType.java
 
b/hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchemaType.java
index a14435b2e7d8..012e4bb36b35 100644
--- 
a/hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchemaType.java
+++ 
b/hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchemaType.java
@@ -86,6 +86,7 @@ public class TestHoodieSchemaType {
     assertTrue(HoodieSchemaType.UNION.isComplex(), "UNION should be complex");
     assertTrue(HoodieSchemaType.VARIANT.isComplex(), "VARIANT should be 
complex");
     assertTrue(HoodieSchemaType.BLOB.isComplex(), "BLOB should be complex");
+    assertTrue(HoodieSchemaType.VECTOR.isComplex(), "VECTOR should be 
complex");
 
     assertFalse(HoodieSchemaType.STRING.isComplex(), "STRING should not be 
complex");
     assertFalse(HoodieSchemaType.INT.isComplex(), "INT should not be complex");
@@ -118,6 +119,7 @@ public class TestHoodieSchemaType {
     assertFalse(HoodieSchemaType.UNION.isNumeric(), "UNION should not be 
numeric");
     assertFalse(HoodieSchemaType.VARIANT.isNumeric(), "VARIANT should not be 
numeric");
     assertFalse(HoodieSchemaType.BLOB.isNumeric(), "BLOB should not be 
numeric");
+    assertFalse(HoodieSchemaType.VECTOR.isNumeric(), "VECTOR should not be 
numeric");
   }
 
   @Test
@@ -207,6 +209,7 @@ public class TestHoodieSchemaType {
         LogicalTypes.uuid().addToSchema(Schema.create(Schema.Type.STRING)));
     map.put(HoodieSchemaType.VARIANT, createVariantSchemaForTest());
     map.put(HoodieSchemaType.BLOB, HoodieSchema.createBlob().toAvroSchema());
+    map.put(HoodieSchemaType.VECTOR, createVectorSchemaForTest());
     return map;
   }
 
@@ -224,4 +227,97 @@ public class TestHoodieSchemaType {
     HoodieSchema.VariantLogicalType.variant().addToSchema(variantRecord);
     return variantRecord;
   }
+
+  @Test
+  void testVectorFromSchemaWithStringProperties() {
+    // Manually craft a JSON schema where 'dimension' is a string rather than 
an integer
+    String jsonSchema = "{"
+        + "\"type\":\"fixed\","
+        + "\"name\":\"vector_float_128\","
+        + "\"size\":512,"
+        + "\"logicalType\":\"vector\","
+        + "\"dimension\":\"128\","
+        + "\"elementType\":\"FLOAT\","
+        + "\"storageBacking\":\"FIXED_BYTES\""
+        + "}";
+
+    Schema avroSchema = new Schema.Parser().parse(jsonSchema);
+    HoodieSchema schema = HoodieSchema.fromAvroSchema(avroSchema);
+
+    assertTrue(schema instanceof HoodieSchema.Vector);
+    HoodieSchema.Vector vectorSchema = (HoodieSchema.Vector) schema;
+
+    // Verify it correctly parsed the string "128" into the integer 128
+    assertEquals(128, vectorSchema.getDimension());
+    assertEquals(HoodieSchema.Vector.VectorElementType.FLOAT, 
vectorSchema.getVectorElementType());
+  }
+
+  @Test
+  void testVectorSizeMismatchValidation() {
+    // Dimension 10, FLOAT (4 bytes) -> expected fixed size is 40.
+    // We intentionally create a FIXED schema with size 42 via JSON parsing
+    // so the VectorLogicalTypeFactory is properly invoked.
+    String jsonSchema = "{"
+        + "\"type\":\"fixed\","
+        + "\"name\":\"bad_vector\","
+        + "\"size\":42,"
+        + "\"logicalType\":\"vector\","
+        + "\"dimension\":10,"
+        + "\"elementType\":\"FLOAT\","
+        + "\"storageBacking\":\"FIXED_BYTES\""
+        + "}";
+
+    Schema avroSchema = new Schema.Parser().parse(jsonSchema);
+
+    IllegalArgumentException ex = assertThrows(IllegalArgumentException.class,
+        () -> HoodieSchema.fromAvroSchema(avroSchema));
+
+    assertTrue(ex.getMessage().contains("FIXED size mismatch"),
+        "Should throw size mismatch error, got: " + ex.getMessage());
+  }
+
+  @Test
+  void testVectorUnknownElementType() {
+    // Create a FIXED schema with an invalid element type via JSON parsing
+    // so the VectorLogicalTypeFactory is properly invoked.
+    String jsonSchema = "{"
+        + "\"type\":\"fixed\","
+        + "\"name\":\"bad_vector\","
+        + "\"size\":40,"
+        + "\"logicalType\":\"vector\","
+        + "\"dimension\":10,"
+        + "\"elementType\":\"VARCHAR\","
+        + "\"storageBacking\":\"FIXED_BYTES\""
+        + "}";
+
+    Schema avroSchema = new Schema.Parser().parse(jsonSchema);
+
+    IllegalArgumentException ex = assertThrows(IllegalArgumentException.class,
+        () -> HoodieSchema.fromAvroSchema(avroSchema));
+
+    assertTrue(ex.getMessage().contains("Unknown element type: VARCHAR"),
+        "Should reject unknown element types");
+  }
+
+  /**
+   * Creates a vector schema manually using Avro APIs.
+   *
+   * @return a vector FIXED schema with VectorLogicalType metadata
+   */
+  private static Schema createVectorSchemaForTest() {
+    int dimension = 128;
+    String elementType = HoodieSchema.Vector.VectorElementType.FLOAT.name();
+    String storageBacking = 
HoodieSchema.Vector.StorageBacking.FIXED_BYTES.name();
+
+    int fixedSize = dimension * 4;
+    // Create FIXED schema directly
+    Schema vectorSchema = Schema.createFixed("vector", null, null, fixedSize);
+
+    // Apply VectorLogicalType with metadata
+    HoodieSchema.VectorLogicalType vectorLogicalType =
+        new HoodieSchema.VectorLogicalType(dimension, elementType, 
storageBacking);
+    vectorLogicalType.addToSchema(vectorSchema);
+
+    return vectorSchema;
+  }
 }


Reply via email to