voonhous commented on code in PR #18190:
URL: https://github.com/apache/hudi/pull/18190#discussion_r2867250297
##########
hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchema.java:
##########
@@ -1914,4 +2174,64 @@ public void testIsBlobField() {
assertFalse(HoodieSchema.createArray(createRecordWithBlob()).isBlobField());
assertFalse(HoodieSchema.createMap(createRecordWithBlob()).isBlobField());
}
+
+ @Test
+ public void testToTypeStringVectorDefaultElementType() {
+ HoodieSchema.Vector vector = HoodieSchema.createVector(128,
HoodieSchema.Vector.VectorElementType.FLOAT);
+ assertEquals("VECTOR(128)", HoodieSchema.toTypeString(vector));
+ }
+
+ @Test
+ public void testToTypeStringVectorNonDefaultElementType() {
+ HoodieSchema.Vector vector = HoodieSchema.createVector(512,
HoodieSchema.Vector.VectorElementType.DOUBLE);
+ assertEquals("VECTOR(512, DOUBLE)", HoodieSchema.toTypeString(vector));
+ }
+
+ @Test
+ public void testToTypeStringThrowsForNonCustomType() {
+ HoodieSchema intSchema = HoodieSchema.create(HoodieSchemaType.INT);
+ assertThrows(IllegalArgumentException.class, () ->
HoodieSchema.toTypeString(intSchema));
+ }
+
+ @Test
+ public void testParseTypeStringVector() {
+ HoodieSchema.TypeDescriptor td =
HoodieSchema.parseTypeString("VECTOR(128)");
+ assertEquals(HoodieSchemaType.VECTOR, td.getType());
+ assertEquals(1, td.getParams().size());
+ assertEquals("128", td.getParam(0));
+ }
+
+ @Test
+ public void testParseTypeStringVectorWithElementType() {
+ HoodieSchema.TypeDescriptor td = HoodieSchema.parseTypeString("VECTOR(512,
DOUBLE)");
+ assertEquals(HoodieSchemaType.VECTOR, td.getType());
+ assertEquals(2, td.getParams().size());
+ assertEquals("512", td.getParam(0));
+ assertEquals("DOUBLE", td.getParam(1));
+ }
+
+ @Test
+ public void testParseTypeStringThrowsForNonCustomType() {
+ assertThrows(IllegalArgumentException.class, () ->
HoodieSchema.parseTypeString("INT"));
+ }
+
+ @Test
+ public void testTypeStringRoundTrip() {
+ HoodieSchema.Vector vector = HoodieSchema.createVector(256,
HoodieSchema.Vector.VectorElementType.FLOAT);
+ String typeString = HoodieSchema.toTypeString(vector);
+ HoodieSchema.TypeDescriptor td = HoodieSchema.parseTypeString(typeString);
+
+ assertEquals(HoodieSchemaType.VECTOR, td.getType());
+ assertEquals("256", td.getParam(0));
+ assertEquals(1, td.getParams().size());
+
+ // Non-default element type round-trip
+ HoodieSchema.Vector vectorDouble = HoodieSchema.createVector(64,
HoodieSchema.Vector.VectorElementType.DOUBLE);
+ String typeStringDouble = HoodieSchema.toTypeString(vectorDouble);
+ HoodieSchema.TypeDescriptor tdDouble =
HoodieSchema.parseTypeString(typeStringDouble);
+
+ assertEquals(HoodieSchemaType.VECTOR, tdDouble.getType());
+ assertEquals("64", tdDouble.getParam(0));
+ assertEquals("DOUBLE", tdDouble.getParam(1));
+ }
Review Comment:
Since we are relying on string parsing from an external source (Spark
DataFrame metadata), you should add tests to ensure malformed data throws the
correct exceptions rather than crashing the executor with a
`NullPointerException` or `IndexOutOfBoundsException`.
Maybe a test like `testParseTypeStringMalformed` and
`testParseTypeStringUnknownCustomType` to verify behaviour can be added.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]