rahil-c commented on code in PR #18190:
URL: https://github.com/apache/hudi/pull/18190#discussion_r2880037929


##########
hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchema.java:
##########
@@ -84,18 +86,118 @@
  */
 public class HoodieSchema implements Serializable {
   private static final long serialVersionUID = 1L;
+
   /**
    * Constant representing a null JSON value, equivalent to 
JsonProperties.NULL_VALUE.
    * This provides compatibility with Avro's JsonProperties while maintaining 
Hudi's API.
    */
   public static final Object NULL_VALUE = JsonProperties.NULL_VALUE;
   public static final HoodieSchema NULL_SCHEMA = 
HoodieSchema.create(HoodieSchemaType.NULL);
-
   /**
    * Constant to use when attaching type metadata to external schema systems 
like Spark's StructType.
+   * Stores a parameterized type string for custom Hudi logical types such as 
VECTOR and BLOB.
+   * Examples: "VECTOR(128)", "VECTOR(512, DOUBLE)", "BLOB".
    */
   public static final String TYPE_METADATA_FIELD = "hudi_type";
 
+  /**
+   * Converts a HoodieSchema to its parameterized type string for custom Hudi 
logical types
+   * such as VECTOR and BLOB. Only supports custom logical types — throws for 
standard types.
+   * Parameterized types include positional parameters: "VECTOR(128)", 
"VECTOR(128, DOUBLE)".
+   * Default parameters are omitted: VECTOR(dim) implies elementType=FLOAT.
+   */
+  public String toTypeString() {
+    HoodieSchemaType type = getType();
+    switch (type) {
+      case VECTOR:
+        Vector v = (Vector) this;
+        if (v.getVectorElementType() == Vector.VectorElementType.FLOAT) {
+          return "VECTOR(" + v.getDimension() + ")";
+        }
+        return "VECTOR(" + v.getDimension() + ", " + 
v.getVectorElementType().getDataType() + ")";
+      case BLOB:
+        return "BLOB";
+      default:
+        throw new IllegalArgumentException(
+            "toTypeString only supports custom logical types, got: " + type);
+    }
+  }
+
+  /**
+   * Parses a parameterized type string for custom Hudi logical types such as 
VECTOR and BLOB.
+   * Examples: "VECTOR(128)" or "VECTOR(512, DOUBLE)".
+   * Throws for non-custom logical type names.
+   */
+  public static HoodieSchema parseTypeString(String descriptor) {
+    Pair<HoodieSchemaType, List<String>> parsedDescriptor = 
parseTypeDescriptor(descriptor);
+    HoodieSchemaType type = parsedDescriptor.getLeft();
+    List<String> params = parsedDescriptor.getRight();
+    switch (type) {
+      case VECTOR:
+        if (params.isEmpty()) {
+          throw new IllegalArgumentException("VECTOR type descriptor must 
include a dimension parameter");
+        }
+        if (params.size() > 2) {
+          throw new IllegalArgumentException(
+              "VECTOR type descriptor supports at most 2 parameters: dimension 
and optional element type");
+        }
+        int dimension;
+        try {
+          dimension = Integer.parseInt(params.get(0));
+        } catch (NumberFormatException e) {
+          throw new IllegalArgumentException("Invalid VECTOR dimension: " + 
params.get(0), e);
+        }
+        Vector.VectorElementType elementType = params.size() > 1
+            ? Vector.VectorElementType.fromString(params.get(1))
+            : Vector.VectorElementType.FLOAT;
+        return createVector(dimension, elementType);
+      case BLOB:
+        if (!params.isEmpty()) {
+          throw new IllegalArgumentException(
+              "BLOB type descriptor does not support parameters, got: " + 
params);
+        }
+        return createBlob();
+      default:
+        throw new IllegalArgumentException(
+            "parseTypeString only supports custom logical types, got: " + 
type);
+    }
+  }
+
+  private static Pair<HoodieSchemaType, List<String>> 
parseTypeDescriptor(String descriptor) {
+    ValidationUtils.checkArgument(descriptor != null && 
!descriptor.trim().isEmpty(),
+        "Type descriptor cannot be null or empty");
+    int parenStart = descriptor.indexOf('(');
+    String typeName;
+    List<String> params;
+    if (parenStart == -1) {
+      typeName = descriptor.trim();

Review Comment:
   Note `typeName` handling will be caught in `parseTypeString` if its not a 
valid custom logical type.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to