rahil-c commented on code in PR #18190:
URL: https://github.com/apache/hudi/pull/18190#discussion_r2880037929
##########
hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchema.java:
##########
@@ -84,18 +86,118 @@
*/
public class HoodieSchema implements Serializable {
private static final long serialVersionUID = 1L;
+
/**
* Constant representing a null JSON value, equivalent to
JsonProperties.NULL_VALUE.
* This provides compatibility with Avro's JsonProperties while maintaining
Hudi's API.
*/
public static final Object NULL_VALUE = JsonProperties.NULL_VALUE;
public static final HoodieSchema NULL_SCHEMA =
HoodieSchema.create(HoodieSchemaType.NULL);
-
/**
* Constant to use when attaching type metadata to external schema systems
like Spark's StructType.
+ * Stores a parameterized type string for custom Hudi logical types such as
VECTOR and BLOB.
+ * Examples: "VECTOR(128)", "VECTOR(512, DOUBLE)", "BLOB".
*/
public static final String TYPE_METADATA_FIELD = "hudi_type";
+ /**
+ * Converts a HoodieSchema to its parameterized type string for custom Hudi
logical types
+ * such as VECTOR and BLOB. Only supports custom logical types — throws for
standard types.
+ * Parameterized types include positional parameters: "VECTOR(128)",
"VECTOR(128, DOUBLE)".
+ * Default parameters are omitted: VECTOR(dim) implies elementType=FLOAT.
+ */
+ public String toTypeString() {
+ HoodieSchemaType type = getType();
+ switch (type) {
+ case VECTOR:
+ Vector v = (Vector) this;
+ if (v.getVectorElementType() == Vector.VectorElementType.FLOAT) {
+ return "VECTOR(" + v.getDimension() + ")";
+ }
+ return "VECTOR(" + v.getDimension() + ", " +
v.getVectorElementType().getDataType() + ")";
+ case BLOB:
+ return "BLOB";
+ default:
+ throw new IllegalArgumentException(
+ "toTypeString only supports custom logical types, got: " + type);
+ }
+ }
+
+ /**
+ * Parses a parameterized type string for custom Hudi logical types such as
VECTOR and BLOB.
+ * Examples: "VECTOR(128)" or "VECTOR(512, DOUBLE)".
+ * Throws for non-custom logical type names.
+ */
+ public static HoodieSchema parseTypeString(String descriptor) {
+ Pair<HoodieSchemaType, List<String>> parsedDescriptor =
parseTypeDescriptor(descriptor);
+ HoodieSchemaType type = parsedDescriptor.getLeft();
+ List<String> params = parsedDescriptor.getRight();
+ switch (type) {
+ case VECTOR:
+ if (params.isEmpty()) {
+ throw new IllegalArgumentException("VECTOR type descriptor must
include a dimension parameter");
+ }
+ if (params.size() > 2) {
+ throw new IllegalArgumentException(
+ "VECTOR type descriptor supports at most 2 parameters: dimension
and optional element type");
+ }
+ int dimension;
+ try {
+ dimension = Integer.parseInt(params.get(0));
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("Invalid VECTOR dimension: " +
params.get(0), e);
+ }
+ Vector.VectorElementType elementType = params.size() > 1
+ ? Vector.VectorElementType.fromString(params.get(1))
+ : Vector.VectorElementType.FLOAT;
+ return createVector(dimension, elementType);
+ case BLOB:
+ if (!params.isEmpty()) {
+ throw new IllegalArgumentException(
+ "BLOB type descriptor does not support parameters, got: " +
params);
+ }
+ return createBlob();
+ default:
+ throw new IllegalArgumentException(
+ "parseTypeString only supports custom logical types, got: " +
type);
+ }
+ }
+
+ private static Pair<HoodieSchemaType, List<String>>
parseTypeDescriptor(String descriptor) {
+ ValidationUtils.checkArgument(descriptor != null &&
!descriptor.trim().isEmpty(),
+ "Type descriptor cannot be null or empty");
+ int parenStart = descriptor.indexOf('(');
+ String typeName;
+ List<String> params;
+ if (parenStart == -1) {
+ typeName = descriptor.trim();
Review Comment:
Note `typeName` handling will be caught in `parseTypeString` if its not a
valid custom logical type.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]