This is an automated email from the ASF dual-hosted git repository.
ostinru pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry-pxf.git
The following commit(s) were added to refs/heads/main by this push:
new df8568f9 fix: add Parquet UUID type support for read/write (#71)
df8568f9 is described below
commit df8568f91326df1ce0b8bf97dbf630e5d7490a8f
Author: liuxiaoyu <[email protected]>
AuthorDate: Thu Mar 26 17:10:22 2026 +0800
fix: add Parquet UUID type support for read/write (#71)
* fix: add Parquet UUID type support for read/write
- Add UUID (OID 2950) handling by mapping UUID to FIXED_LEN_BYTE_ARRAY
primitive type.
- UUIDARRAY is also supported.
- Add BYTEA fallback for unknown FIXED_LEN_BYTE_ARRAY logical types
- Add uuid_types.parquet test fixture and corresponding unit tests
---
.../pxf/plugins/hdfs/ParquetFileAccessor.java | 5 +
.../pxf/plugins/hdfs/ParquetResolver.java | 19 ++-
.../plugins/hdfs/parquet/ParquetTypeConverter.java | 69 +++++++++-
.../pxf/plugins/hdfs/ParquetFileAccessorTest.java | 47 +++++++
.../pxf/plugins/hdfs/ParquetResolverTest.java | 146 +++++++++++++++++++++
.../hdfs/parquet/ParquetTypeConverterTest.java | 51 +++++++
.../plugins/hdfs/parquet/ParquetUtilitiesTest.java | 9 ++
.../src/test/resources/parquet/uuid_types.parquet | Bin 0 -> 954 bytes
8 files changed, 338 insertions(+), 8 deletions(-)
diff --git
a/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessor.java
b/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessor.java
index c73b1a71..a6a315f3 100644
---
a/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessor.java
+++
b/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessor.java
@@ -581,6 +581,11 @@ public class ParquetFileAccessor extends BasePlugin
implements Accessor {
primitiveTypeName = PrimitiveTypeName.INT32;
logicalTypeAnnotation = LogicalTypeAnnotation.dateType();
break;
+ case UUID:
+ primitiveTypeName = PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
+ logicalTypeAnnotation = LogicalTypeAnnotation.uuidType();
+ length = 16;
+ break;
case TIME:
case VARCHAR:
case BPCHAR:
diff --git
a/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolver.java
b/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolver.java
index 2abde97f..19f5475a 100644
---
a/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolver.java
+++
b/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolver.java
@@ -54,6 +54,7 @@ import static
org.apache.parquet.schema.LogicalTypeAnnotation.DateLogicalTypeAnn
import static
org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation;
import static
org.apache.parquet.schema.LogicalTypeAnnotation.IntLogicalTypeAnnotation;
import static
org.apache.parquet.schema.LogicalTypeAnnotation.StringLogicalTypeAnnotation;
+import static
org.apache.parquet.schema.LogicalTypeAnnotation.UUIDLogicalTypeAnnotation;
import static org.apache.parquet.schema.Type.Repetition.REPEATED;
public class ParquetResolver extends BasePlugin implements Resolver {
@@ -241,11 +242,21 @@ public class ParquetResolver extends BasePlugin
implements Resolver {
group.add(columnIndex, (Float) fieldValue);
break;
case FIXED_LEN_BYTE_ARRAY:
- byte[] fixedLenByteArray = getFixedLenByteArray((String)
fieldValue, primitiveType, columnDescriptors.get(columnIndex).columnName());
- if (fixedLenByteArray == null) {
- return;
+ if (logicalTypeAnnotation instanceof
UUIDLogicalTypeAnnotation) {
+ byte[] uuidBytes =
ParquetTypeConverter.uuidToBytes((String) fieldValue);
+ group.add(columnIndex,
Binary.fromReusedByteArray(uuidBytes));
+ } else if (logicalTypeAnnotation instanceof
DecimalLogicalTypeAnnotation) {
+ byte[] fixedLenByteArray = getFixedLenByteArray((String)
fieldValue,
+ primitiveType,
columnDescriptors.get(columnIndex).columnName());
+ if (fixedLenByteArray == null) {
+ return;
+ }
+ group.add(columnIndex,
Binary.fromReusedByteArray(fixedLenByteArray));
+ } else {
+ throw new UnsupportedTypeException(
+ "Writing FIXED_LEN_BYTE_ARRAY with logical type "
+ logicalTypeAnnotation
+ + " is not supported. Supported: UUID,
DECIMAL");
}
- group.add(columnIndex,
Binary.fromReusedByteArray(fixedLenByteArray));
break;
case INT96: // SQL standard timestamp string value with or
without time zone literals:
https://www.postgresql.org/docs/9.4/datatype-datetime.html
String timestamp = (String) fieldValue;
diff --git
a/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverter.java
b/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverter.java
index c704d050..7ecaf223 100644
---
a/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverter.java
+++
b/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverter.java
@@ -28,11 +28,13 @@ import java.time.OffsetDateTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.Base64;
+import java.util.UUID;
import static
org.apache.parquet.schema.LogicalTypeAnnotation.DateLogicalTypeAnnotation;
import static
org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation;
import static
org.apache.parquet.schema.LogicalTypeAnnotation.IntLogicalTypeAnnotation;
import static
org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation;
+import static
org.apache.parquet.schema.LogicalTypeAnnotation.UUIDLogicalTypeAnnotation;
/**
* Converter for Parquet types and values into PXF data types and values.
@@ -205,18 +207,54 @@ public enum ParquetTypeConverter {
FIXED_LEN_BYTE_ARRAY {
@Override
public DataType getDataType(Type type) {
- return DataType.NUMERIC;
+ LogicalTypeAnnotation logicalType =
type.getLogicalTypeAnnotation();
+ if (logicalType instanceof UUIDLogicalTypeAnnotation) {
+ return DataType.UUID;
+ } else if (logicalType instanceof DecimalLogicalTypeAnnotation) {
+ return DataType.NUMERIC;
+ }
+ // fallback: treat unknown/null logical types as raw bytes
+ LOG.warn("FIXED_LEN_BYTE_ARRAY with logical type {} will be read
as BYTEA", logicalType);
+ return DataType.BYTEA;
}
@Override
public Object getValue(Group group, int columnIndex, int repeatIndex,
Type type) {
- int scale = ((DecimalLogicalTypeAnnotation)
type.getLogicalTypeAnnotation()).getScale();
- return new BigDecimal(new BigInteger(group.getBinary(columnIndex,
repeatIndex).getBytes()), scale);
+ LogicalTypeAnnotation logicalType =
type.getLogicalTypeAnnotation();
+ if (logicalType instanceof UUIDLogicalTypeAnnotation) {
+ byte[] bytes = group.getBinary(columnIndex,
repeatIndex).getBytes();
+ return uuidFromBytes(bytes);
+ } else if (logicalType instanceof DecimalLogicalTypeAnnotation) {
+ int scale = ((DecimalLogicalTypeAnnotation)
logicalType).getScale();
+ return new BigDecimal(new BigInteger(
+ group.getBinary(columnIndex, repeatIndex).getBytes()),
scale);
+ }
+ return group.getBinary(columnIndex, repeatIndex).getBytes();
}
@Override
public void addValueToJsonArray(Group group, int columnIndex, int
repeatIndex, Type type, ArrayNode jsonNode) {
- jsonNode.add((BigDecimal) getValue(group, columnIndex,
repeatIndex, type));
+ LogicalTypeAnnotation logicalType =
type.getLogicalTypeAnnotation();
+ if (logicalType instanceof UUIDLogicalTypeAnnotation) {
+ jsonNode.add((String) getValue(group, columnIndex,
repeatIndex, type));
+ } else if (logicalType instanceof DecimalLogicalTypeAnnotation) {
+ jsonNode.add((BigDecimal) getValue(group, columnIndex,
repeatIndex, type));
+ } else {
+ jsonNode.add(group.getBinary(columnIndex,
repeatIndex).getBytes());
+ }
+ }
+
+ @Override
+ public String getValueFromList(Group group, int columnIndex, int
repeatIndex, PrimitiveType primitiveType) {
+ Object value = getValue(group, columnIndex, repeatIndex,
primitiveType);
+ LogicalTypeAnnotation logicalType =
primitiveType.getLogicalTypeAnnotation();
+ if (logicalType == null) {
+ // BYTEA fallback: hex-encode raw bytes, same as BINARY does
+ ByteBuffer byteBuffer = ByteBuffer.wrap((byte[]) value);
+ return pgUtilities.encodeByteaHex(byteBuffer);
+ } else {
+ return String.valueOf(value);
+ }
}
},
@@ -385,6 +423,29 @@ public enum ParquetTypeConverter {
return new BigDecimal(BigInteger.valueOf(value),
decimalType.getScale());
}
+ /**
+ * Convert 16 bytes (big-endian) to a UUID string.
+ */
+ public static String uuidFromBytes(byte[] bytes) {
+ if (bytes.length != 16) {
+ throw new PxfRuntimeException(
+ String.format("Expected 16 bytes for UUID, but got %d",
bytes.length));
+ }
+ ByteBuffer bb = ByteBuffer.wrap(bytes);
+ return new UUID(bb.getLong(), bb.getLong()).toString();
+ }
+
+ /**
+ * Convert a UUID string to 16 bytes (big-endian).
+ */
+ public static byte[] uuidToBytes(String uuidString) {
+ UUID uuid = UUID.fromString(uuidString);
+ ByteBuffer bb = ByteBuffer.allocate(16);
+ bb.putLong(uuid.getMostSignificantBits());
+ bb.putLong(uuid.getLeastSignificantBits());
+ return bb.array();
+ }
+
/**
* Validate whether the element type in Parquet List type is supported by
pxf
*
diff --git
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessorTest.java
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessorTest.java
index 17ca9df3..90afb9a2 100644
---
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessorTest.java
+++
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessorTest.java
@@ -1,11 +1,22 @@
package org.apache.cloudberry.pxf.plugins.hdfs;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Type;
+import org.apache.cloudberry.pxf.api.io.DataType;
import org.apache.cloudberry.pxf.api.model.RequestContext;
+import org.apache.cloudberry.pxf.api.utilities.ColumnDescriptor;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
public class ParquetFileAccessorTest {
ParquetFileAccessor accessor;
@@ -26,4 +37,40 @@ public class ParquetFileAccessorTest {
accessor.setRequestContext(context);
assertNull(context.getMetadata());
}
+
+ @Test
+ public void testGetTypeForColumnDescriptor_UUID() throws Exception {
+ ColumnDescriptor uuidColumn = new ColumnDescriptor("id",
DataType.UUID.getOID(), 0, "uuid", new Integer[]{});
+
+ Method method =
ParquetFileAccessor.class.getDeclaredMethod("getTypeForColumnDescriptor",
ColumnDescriptor.class);
+ method.setAccessible(true);
+ Type result = (Type) method.invoke(accessor, uuidColumn);
+
+ assertEquals("id", result.getName());
+ assertTrue(result.isPrimitive());
+ PrimitiveType primitiveType = result.asPrimitiveType();
+ assertEquals(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY,
primitiveType.getPrimitiveTypeName());
+ assertEquals(LogicalTypeAnnotation.uuidType(),
primitiveType.getLogicalTypeAnnotation());
+ assertEquals(16, primitiveType.getTypeLength());
+ }
+
+ @Test
+ public void testGetTypeForColumnDescriptor_UUIDArray() throws Exception {
+ ColumnDescriptor uuidArrayColumn = new ColumnDescriptor("ids",
DataType.UUIDARRAY.getOID(), 0, "uuid[]", new Integer[]{});
+
+ Method method =
ParquetFileAccessor.class.getDeclaredMethod("getTypeForColumnDescriptor",
ColumnDescriptor.class);
+ method.setAccessible(true);
+ Type result = (Type) method.invoke(accessor, uuidArrayColumn);
+
+ assertEquals("ids", result.getName());
+ // array types are wrapped in a list group
+
assertTrue(result.asGroupType().isRepetition(Type.Repetition.OPTIONAL));
+
+ Type elementType =
result.asGroupType().getType(0).asGroupType().getType(0);
+ assertTrue(elementType.isPrimitive());
+ PrimitiveType elementPrimitive = elementType.asPrimitiveType();
+ assertEquals(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY,
elementPrimitive.getPrimitiveTypeName());
+ assertEquals(LogicalTypeAnnotation.uuidType(),
elementPrimitive.getLogicalTypeAnnotation());
+ assertEquals(16, elementPrimitive.getTypeLength());
+ }
}
diff --git
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolverTest.java
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolverTest.java
index 936978ff..44798224 100644
---
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolverTest.java
+++
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolverTest.java
@@ -31,6 +31,7 @@ import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.math.BigDecimal;
+import java.nio.ByteBuffer;
import java.time.Instant;
import java.time.OffsetDateTime;
import java.time.ZoneId;
@@ -40,8 +41,11 @@ import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.Objects;
+import java.util.UUID;
import java.util.stream.Collectors;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.uuidType;
+
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -193,6 +197,148 @@ public class ParquetResolverTest {
testSetFields_RightTrimCharHelper(" abcd ", " abc ", " abc");
}
+ @Test
+ public void testSetFields_UUID_FixedLenByteArray() throws IOException {
+ List<Type> typeFields = new ArrayList<>();
+
typeFields.add(org.apache.parquet.schema.Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
+ .length(16).as(uuidType()).named("id"));
+
typeFields.add(org.apache.parquet.schema.Types.optional(PrimitiveTypeName.BINARY)
+
.as(org.apache.parquet.schema.LogicalTypeAnnotation.stringType()).named("value"));
+ schema = new MessageType("hive_schema", typeFields);
+ context.setMetadata(schema);
+
+ List<ColumnDescriptor> columnDescriptors = new ArrayList<>();
+ columnDescriptors.add(new ColumnDescriptor("id",
DataType.UUID.getOID(), 0, "uuid", null));
+ columnDescriptors.add(new ColumnDescriptor("value",
DataType.VARCHAR.getOID(), 1, "varchar", null));
+ context.setTupleDescription(columnDescriptors);
+
+ resolver.setRequestContext(context);
+ resolver.afterPropertiesSet();
+
+ String uuidValue = "a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11";
+ List<OneField> fields = new ArrayList<>();
+ fields.add(new OneField(DataType.TEXT.getOID(), uuidValue));
+ fields.add(new OneField(DataType.TEXT.getOID(), "test"));
+
+ OneRow row = resolver.setFields(fields);
+ assertNotNull(row);
+ Object data = row.getData();
+ assertNotNull(data);
+ assertTrue(data instanceof Group);
+ Group group = (Group) data;
+
+ // assert UUID value is stored as 16-byte FIXED_LEN_BYTE_ARRAY
(big-endian)
+ byte[] storedBytes = group.getBinary(0, 0).getBytes();
+ assertEquals(16, storedBytes.length);
+ UUID expectedUuid = UUID.fromString(uuidValue);
+ ByteBuffer bb = ByteBuffer.wrap(storedBytes);
+ assertEquals(expectedUuid.getMostSignificantBits(), bb.getLong());
+ assertEquals(expectedUuid.getLeastSignificantBits(), bb.getLong());
+ assertEquals("test", group.getString(1, 0));
+
+ // assert value repetition count
+ for (int i = 0; i < 2; i++) {
+ assertEquals(1, group.getFieldRepetitionCount(i));
+ }
+ }
+
+ @Test
+ public void testSetFields_UUID_FixedLenByteArray_Null() throws IOException
{
+ List<Type> typeFields = new ArrayList<>();
+
typeFields.add(org.apache.parquet.schema.Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
+ .length(16).as(uuidType()).named("id"));
+
typeFields.add(org.apache.parquet.schema.Types.optional(PrimitiveTypeName.BINARY)
+
.as(org.apache.parquet.schema.LogicalTypeAnnotation.stringType()).named("value"));
+ schema = new MessageType("hive_schema", typeFields);
+ context.setMetadata(schema);
+
+ List<ColumnDescriptor> columnDescriptors = new ArrayList<>();
+ columnDescriptors.add(new ColumnDescriptor("id",
DataType.UUID.getOID(), 0, "uuid", null));
+ columnDescriptors.add(new ColumnDescriptor("value",
DataType.VARCHAR.getOID(), 1, "varchar", null));
+ context.setTupleDescription(columnDescriptors);
+
+ resolver.setRequestContext(context);
+ resolver.afterPropertiesSet();
+
+ List<OneField> fields = new ArrayList<>();
+ fields.add(new OneField(DataType.TEXT.getOID(), null));
+ fields.add(new OneField(DataType.TEXT.getOID(), "test"));
+
+ OneRow row = resolver.setFields(fields);
+ assertNotNull(row);
+ Object data = row.getData();
+ assertNotNull(data);
+ assertTrue(data instanceof Group);
+ Group group = (Group) data;
+
+ // assert null UUID is not written (repetition count 0)
+ assertEquals(0, group.getFieldRepetitionCount(0));
+ assertEquals(1, group.getFieldRepetitionCount(1));
+ }
+
+ @Test
+ public void testGetFields_UUID_FixedLenByteArray() throws IOException {
+ List<Type> typeFields = new ArrayList<>();
+
typeFields.add(org.apache.parquet.schema.Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
+ .length(16).as(uuidType()).named("id"));
+
typeFields.add(org.apache.parquet.schema.Types.optional(PrimitiveTypeName.BINARY)
+
.as(org.apache.parquet.schema.LogicalTypeAnnotation.stringType()).named("value"));
+ schema = new MessageType("hive_schema", typeFields);
+ context.setMetadata(schema);
+ context.setTupleDescription(getColumnDescriptorsFromSchema(schema));
+ resolver.setRequestContext(context);
+ resolver.afterPropertiesSet();
+
+ List<Group> groups = readParquetFile("uuid_types.parquet", 3, schema);
+ assertEquals(3, groups.size());
+
+ // row 0
+ List<OneField> fields = assertRow(groups, 0, 2);
+ assertField(fields, 0, "a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11",
DataType.UUID);
+ assertField(fields, 1, "test1", DataType.TEXT);
+
+ // row 1
+ fields = assertRow(groups, 1, 2);
+ assertField(fields, 0, "b1ffcd00-0d1c-5f09-cc7e-7ccace491b22",
DataType.UUID);
+ assertField(fields, 1, "test2", DataType.TEXT);
+
+ // row 2 (null UUID)
+ fields = assertRow(groups, 2, 2);
+ assertField(fields, 0, null, DataType.UUID);
+ assertField(fields, 1, "test3", DataType.TEXT);
+ }
+
+ @Test
+ public void testRoundTrip_UUID_FixedLenByteArray() throws IOException {
+ List<Type> typeFields = new ArrayList<>();
+
typeFields.add(org.apache.parquet.schema.Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
+ .length(16).as(uuidType()).named("id"));
+ schema = new MessageType("hive_schema", typeFields);
+ context.setMetadata(schema);
+
+ List<ColumnDescriptor> columnDescriptors = new ArrayList<>();
+ columnDescriptors.add(new ColumnDescriptor("id",
DataType.UUID.getOID(), 0, "uuid", null));
+ context.setTupleDescription(columnDescriptors);
+
+ resolver.setRequestContext(context);
+ resolver.afterPropertiesSet();
+
+ // Write
+ String uuidValue = "a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11";
+ List<OneField> fields = new ArrayList<>();
+ fields.add(new OneField(DataType.TEXT.getOID(), uuidValue));
+
+ OneRow row = resolver.setFields(fields);
+ assertNotNull(row);
+ Group group = (Group) row.getData();
+
+ // Read back
+ List<Group> groups = new ArrayList<>();
+ groups.add(group);
+ List<OneField> readFields = assertRow(groups, 0, 1);
+ assertField(readFields, 0, uuidValue, DataType.UUID);
+ }
+
@Test
public void testSetFields_Primitive_Nulls() throws IOException {
schema = getParquetSchemaForPrimitiveTypes(Type.Repetition.OPTIONAL,
false);
diff --git
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverterTest.java
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverterTest.java
index 5230a94e..e686da39 100644
---
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverterTest.java
+++
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverterTest.java
@@ -1,15 +1,23 @@
package org.apache.cloudberry.pxf.plugins.hdfs.parquet;
import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Type;
+import org.apache.parquet.schema.Types;
import org.apache.cloudberry.pxf.api.GreenplumDateTime;
+import org.apache.cloudberry.pxf.api.error.UnsupportedTypeException;
+import org.apache.cloudberry.pxf.api.io.DataType;
import org.junit.jupiter.api.Test;
+import java.nio.ByteBuffer;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeParseException;
+import java.util.UUID;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -94,6 +102,49 @@ public class ParquetTypeConverterTest {
assertEquals(expectedTimestampInSystemTimeZone2, convertedTimestamp2);
}
+ @Test
+ public void testUuidBytesRoundTrip() {
+ String uuidString = "a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11";
+ byte[] bytes = ParquetTypeConverter.uuidToBytes(uuidString);
+ assertEquals(16, bytes.length);
+ String result = ParquetTypeConverter.uuidFromBytes(bytes);
+ assertEquals(uuidString, result);
+ }
+
+ @Test
+ public void testUuidFromKnownBytes() {
+ UUID uuid = UUID.fromString("a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11");
+ ByteBuffer bb = ByteBuffer.allocate(16);
+ bb.putLong(uuid.getMostSignificantBits());
+ bb.putLong(uuid.getLeastSignificantBits());
+ String result = ParquetTypeConverter.uuidFromBytes(bb.array());
+ assertEquals("a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11", result);
+ }
+
+ @Test
+ public void testUuidToKnownBytes() {
+ byte[] bytes =
ParquetTypeConverter.uuidToBytes("a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11");
+ UUID uuid = UUID.fromString("a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11");
+ ByteBuffer bb = ByteBuffer.wrap(bytes);
+ assertEquals(uuid.getMostSignificantBits(), bb.getLong());
+ assertEquals(uuid.getLeastSignificantBits(), bb.getLong());
+ }
+
+ @Test
+ public void testFixedLenByteArray_NullLogicalType_FallbackToBytea() {
+ // FIXED_LEN_BYTE_ARRAY with no logical type should fallback to BYTEA
+ Type type =
Types.optional(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
+ .length(16).named("unknown");
+ assertEquals(DataType.BYTEA,
ParquetTypeConverter.FIXED_LEN_BYTE_ARRAY.getDataType(type));
+ }
+
+ @Test
+ public void testFixedLenByteArray_UUIDLogicalType_ReturnsUUID() {
+ Type type =
Types.optional(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
+
.length(16).as(LogicalTypeAnnotation.uuidType()).named("uuid_col");
+ assertEquals(DataType.UUID,
ParquetTypeConverter.FIXED_LEN_BYTE_ARRAY.getDataType(type));
+ }
+
// Helper function
private String convertUTCToCurrentSystemTimeZone(String expectedUTC) {
// convert expectedUTC string to ZonedDateTime zdt
diff --git
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetUtilitiesTest.java
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetUtilitiesTest.java
index c590b4ca..548faf9d 100644
---
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetUtilitiesTest.java
+++
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetUtilitiesTest.java
@@ -67,6 +67,15 @@ public class ParquetUtilitiesTest {
assertIterableEquals(Arrays.asList("fizz", "buzz", "fizzbuzz"),
result);
}
+ @Test
+ public void testParsePostgresArrayUuidArray() {
+ // GPDB UUID is a parquet FIXED_LEN_BYTE_ARRAY(16) with UUID logical
type annotation
+ String value =
"{a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11,b1ffcd00-0d1c-5f09-cc7e-7ccace491b22}";
+
+ List<Object> result = parquetUtilities.parsePostgresArray(value,
PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY,
LogicalTypeAnnotation.uuidType());
+
assertIterableEquals(Arrays.asList("a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11",
"b1ffcd00-0d1c-5f09-cc7e-7ccace491b22"), result);
+ }
+
@Test
public void testParsePostgresArrayDateArray() {
// GPDB Date is an parquet INT64 primitive type with String annotation
diff --git a/server/pxf-hdfs/src/test/resources/parquet/uuid_types.parquet
b/server/pxf-hdfs/src/test/resources/parquet/uuid_types.parquet
new file mode 100644
index 00000000..712dea34
Binary files /dev/null and
b/server/pxf-hdfs/src/test/resources/parquet/uuid_types.parquet differ
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]