This is an automated email from the ASF dual-hosted git repository.

ostinru pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry-pxf.git


The following commit(s) were added to refs/heads/main by this push:
     new df8568f9 fix: add Parquet UUID type support for read/write (#71)
df8568f9 is described below

commit df8568f91326df1ce0b8bf97dbf630e5d7490a8f
Author: liuxiaoyu <[email protected]>
AuthorDate: Thu Mar 26 17:10:22 2026 +0800

    fix: add Parquet UUID type support for read/write (#71)
    
    * fix: add Parquet UUID type support for read/write
    
      - Add UUID (OID 2950) handling by mapping UUID to FIXED_LEN_BYTE_ARRAY 
primitive type.
      - UUIDARRAY is also supported.
      - Add BYTEA fallback for unknown FIXED_LEN_BYTE_ARRAY logical types
      - Add uuid_types.parquet test fixture and corresponding unit tests
---
 .../pxf/plugins/hdfs/ParquetFileAccessor.java      |   5 +
 .../pxf/plugins/hdfs/ParquetResolver.java          |  19 ++-
 .../plugins/hdfs/parquet/ParquetTypeConverter.java |  69 +++++++++-
 .../pxf/plugins/hdfs/ParquetFileAccessorTest.java  |  47 +++++++
 .../pxf/plugins/hdfs/ParquetResolverTest.java      | 146 +++++++++++++++++++++
 .../hdfs/parquet/ParquetTypeConverterTest.java     |  51 +++++++
 .../plugins/hdfs/parquet/ParquetUtilitiesTest.java |   9 ++
 .../src/test/resources/parquet/uuid_types.parquet  | Bin 0 -> 954 bytes
 8 files changed, 338 insertions(+), 8 deletions(-)

diff --git 
a/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessor.java
 
b/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessor.java
index c73b1a71..a6a315f3 100644
--- 
a/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessor.java
+++ 
b/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessor.java
@@ -581,6 +581,11 @@ public class ParquetFileAccessor extends BasePlugin 
implements Accessor {
                 primitiveTypeName = PrimitiveTypeName.INT32;
                 logicalTypeAnnotation = LogicalTypeAnnotation.dateType();
                 break;
+            case UUID:
+                primitiveTypeName = PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
+                logicalTypeAnnotation = LogicalTypeAnnotation.uuidType();
+                length = 16;
+                break;
             case TIME:
             case VARCHAR:
             case BPCHAR:
diff --git 
a/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolver.java
 
b/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolver.java
index 2abde97f..19f5475a 100644
--- 
a/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolver.java
+++ 
b/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolver.java
@@ -54,6 +54,7 @@ import static 
org.apache.parquet.schema.LogicalTypeAnnotation.DateLogicalTypeAnn
 import static 
org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation;
 import static 
org.apache.parquet.schema.LogicalTypeAnnotation.IntLogicalTypeAnnotation;
 import static 
org.apache.parquet.schema.LogicalTypeAnnotation.StringLogicalTypeAnnotation;
+import static 
org.apache.parquet.schema.LogicalTypeAnnotation.UUIDLogicalTypeAnnotation;
 import static org.apache.parquet.schema.Type.Repetition.REPEATED;
 
 public class ParquetResolver extends BasePlugin implements Resolver {
@@ -241,11 +242,21 @@ public class ParquetResolver extends BasePlugin 
implements Resolver {
                 group.add(columnIndex, (Float) fieldValue);
                 break;
             case FIXED_LEN_BYTE_ARRAY:
-                byte[] fixedLenByteArray = getFixedLenByteArray((String) 
fieldValue, primitiveType, columnDescriptors.get(columnIndex).columnName());
-                if (fixedLenByteArray == null) {
-                    return;
+                if (logicalTypeAnnotation instanceof 
UUIDLogicalTypeAnnotation) {
+                    byte[] uuidBytes = 
ParquetTypeConverter.uuidToBytes((String) fieldValue);
+                    group.add(columnIndex, 
Binary.fromReusedByteArray(uuidBytes));
+                } else if (logicalTypeAnnotation instanceof 
DecimalLogicalTypeAnnotation) {
+                    byte[] fixedLenByteArray = getFixedLenByteArray((String) 
fieldValue,
+                            primitiveType, 
columnDescriptors.get(columnIndex).columnName());
+                    if (fixedLenByteArray == null) {
+                        return;
+                    }
+                    group.add(columnIndex, 
Binary.fromReusedByteArray(fixedLenByteArray));
+                } else {
+                    throw new UnsupportedTypeException(
+                            "Writing FIXED_LEN_BYTE_ARRAY with logical type " 
+ logicalTypeAnnotation
+                                    + " is not supported. Supported: UUID, 
DECIMAL");
                 }
-                group.add(columnIndex, 
Binary.fromReusedByteArray(fixedLenByteArray));
                 break;
             case INT96:  // SQL standard timestamp string value with or 
without time zone literals: 
https://www.postgresql.org/docs/9.4/datatype-datetime.html
                 String timestamp = (String) fieldValue;
diff --git 
a/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverter.java
 
b/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverter.java
index c704d050..7ecaf223 100644
--- 
a/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverter.java
+++ 
b/server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverter.java
@@ -28,11 +28,13 @@ import java.time.OffsetDateTime;
 import java.time.ZoneId;
 import java.time.ZonedDateTime;
 import java.util.Base64;
+import java.util.UUID;
 
 import static 
org.apache.parquet.schema.LogicalTypeAnnotation.DateLogicalTypeAnnotation;
 import static 
org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation;
 import static 
org.apache.parquet.schema.LogicalTypeAnnotation.IntLogicalTypeAnnotation;
 import static 
org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation;
+import static 
org.apache.parquet.schema.LogicalTypeAnnotation.UUIDLogicalTypeAnnotation;
 
 /**
  * Converter for Parquet types and values into PXF data types and values.
@@ -205,18 +207,54 @@ public enum ParquetTypeConverter {
     FIXED_LEN_BYTE_ARRAY {
         @Override
         public DataType getDataType(Type type) {
-            return DataType.NUMERIC;
+            LogicalTypeAnnotation logicalType = 
type.getLogicalTypeAnnotation();
+            if (logicalType instanceof UUIDLogicalTypeAnnotation) {
+                return DataType.UUID;
+            } else if (logicalType instanceof DecimalLogicalTypeAnnotation) {
+                return DataType.NUMERIC;
+            }
+            // fallback: treat unknown/null logical types as raw bytes
+            LOG.warn("FIXED_LEN_BYTE_ARRAY with logical type {} will be read 
as BYTEA", logicalType);
+            return DataType.BYTEA;
         }
 
         @Override
         public Object getValue(Group group, int columnIndex, int repeatIndex, 
Type type) {
-            int scale = ((DecimalLogicalTypeAnnotation) 
type.getLogicalTypeAnnotation()).getScale();
-            return new BigDecimal(new BigInteger(group.getBinary(columnIndex, 
repeatIndex).getBytes()), scale);
+            LogicalTypeAnnotation logicalType = 
type.getLogicalTypeAnnotation();
+            if (logicalType instanceof UUIDLogicalTypeAnnotation) {
+                byte[] bytes = group.getBinary(columnIndex, 
repeatIndex).getBytes();
+                return uuidFromBytes(bytes);
+            } else if (logicalType instanceof DecimalLogicalTypeAnnotation) {
+                int scale = ((DecimalLogicalTypeAnnotation) 
logicalType).getScale();
+                return new BigDecimal(new BigInteger(
+                        group.getBinary(columnIndex, repeatIndex).getBytes()), 
scale);
+            }
+            return group.getBinary(columnIndex, repeatIndex).getBytes();
         }
 
         @Override
         public void addValueToJsonArray(Group group, int columnIndex, int 
repeatIndex, Type type, ArrayNode jsonNode) {
-            jsonNode.add((BigDecimal) getValue(group, columnIndex, 
repeatIndex, type));
+            LogicalTypeAnnotation logicalType = 
type.getLogicalTypeAnnotation();
+            if (logicalType instanceof UUIDLogicalTypeAnnotation) {
+                jsonNode.add((String) getValue(group, columnIndex, 
repeatIndex, type));
+            } else if (logicalType instanceof DecimalLogicalTypeAnnotation) {
+                jsonNode.add((BigDecimal) getValue(group, columnIndex, 
repeatIndex, type));
+            } else {
+                jsonNode.add(group.getBinary(columnIndex, 
repeatIndex).getBytes());
+            }
+        }
+
+        @Override
+        public String getValueFromList(Group group, int columnIndex, int 
repeatIndex, PrimitiveType primitiveType) {
+            Object value = getValue(group, columnIndex, repeatIndex, 
primitiveType);
+            LogicalTypeAnnotation logicalType = 
primitiveType.getLogicalTypeAnnotation();
+            if (logicalType == null) {
+                // BYTEA fallback: hex-encode raw bytes, same as BINARY does
+                ByteBuffer byteBuffer = ByteBuffer.wrap((byte[]) value);
+                return pgUtilities.encodeByteaHex(byteBuffer);
+            } else {
+                return String.valueOf(value);
+            }
         }
     },
 
@@ -385,6 +423,29 @@ public enum ParquetTypeConverter {
         return new BigDecimal(BigInteger.valueOf(value), 
decimalType.getScale());
     }
 
+    /**
+     * Convert 16 bytes (big-endian) to a UUID string.
+     */
+    public static String uuidFromBytes(byte[] bytes) {
+        if (bytes.length != 16) {
+            throw new PxfRuntimeException(
+                    String.format("Expected 16 bytes for UUID, but got %d", 
bytes.length));
+        }
+        ByteBuffer bb = ByteBuffer.wrap(bytes);
+        return new UUID(bb.getLong(), bb.getLong()).toString();
+    }
+
+    /**
+     * Convert a UUID string to 16 bytes (big-endian).
+     */
+    public static byte[] uuidToBytes(String uuidString) {
+        UUID uuid = UUID.fromString(uuidString);
+        ByteBuffer bb = ByteBuffer.allocate(16);
+        bb.putLong(uuid.getMostSignificantBits());
+        bb.putLong(uuid.getLeastSignificantBits());
+        return bb.array();
+    }
+
     /**
      * Validate whether the element type in Parquet List type is supported by 
pxf
      *
diff --git 
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessorTest.java
 
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessorTest.java
index 17ca9df3..90afb9a2 100644
--- 
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessorTest.java
+++ 
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetFileAccessorTest.java
@@ -1,11 +1,22 @@
 package org.apache.cloudberry.pxf.plugins.hdfs;
 
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Type;
+import org.apache.cloudberry.pxf.api.io.DataType;
 import org.apache.cloudberry.pxf.api.model.RequestContext;
+import org.apache.cloudberry.pxf.api.utilities.ColumnDescriptor;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class ParquetFileAccessorTest {
     ParquetFileAccessor accessor;
@@ -26,4 +37,40 @@ public class ParquetFileAccessorTest {
         accessor.setRequestContext(context);
         assertNull(context.getMetadata());
     }
+
+    @Test
+    public void testGetTypeForColumnDescriptor_UUID() throws Exception {
+        ColumnDescriptor uuidColumn = new ColumnDescriptor("id", 
DataType.UUID.getOID(), 0, "uuid", new Integer[]{});
+
+        Method method = 
ParquetFileAccessor.class.getDeclaredMethod("getTypeForColumnDescriptor", 
ColumnDescriptor.class);
+        method.setAccessible(true);
+        Type result = (Type) method.invoke(accessor, uuidColumn);
+
+        assertEquals("id", result.getName());
+        assertTrue(result.isPrimitive());
+        PrimitiveType primitiveType = result.asPrimitiveType();
+        assertEquals(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, 
primitiveType.getPrimitiveTypeName());
+        assertEquals(LogicalTypeAnnotation.uuidType(), 
primitiveType.getLogicalTypeAnnotation());
+        assertEquals(16, primitiveType.getTypeLength());
+    }
+
+    @Test
+    public void testGetTypeForColumnDescriptor_UUIDArray() throws Exception {
+        ColumnDescriptor uuidArrayColumn = new ColumnDescriptor("ids", 
DataType.UUIDARRAY.getOID(), 0, "uuid[]", new Integer[]{});
+
+        Method method = 
ParquetFileAccessor.class.getDeclaredMethod("getTypeForColumnDescriptor", 
ColumnDescriptor.class);
+        method.setAccessible(true);
+        Type result = (Type) method.invoke(accessor, uuidArrayColumn);
+
+        assertEquals("ids", result.getName());
+        // array types are wrapped in a list group
+        
assertTrue(result.asGroupType().isRepetition(Type.Repetition.OPTIONAL));
+
+        Type elementType = 
result.asGroupType().getType(0).asGroupType().getType(0);
+        assertTrue(elementType.isPrimitive());
+        PrimitiveType elementPrimitive = elementType.asPrimitiveType();
+        assertEquals(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, 
elementPrimitive.getPrimitiveTypeName());
+        assertEquals(LogicalTypeAnnotation.uuidType(), 
elementPrimitive.getLogicalTypeAnnotation());
+        assertEquals(16, elementPrimitive.getTypeLength());
+    }
 }
diff --git 
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolverTest.java
 
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolverTest.java
index 936978ff..44798224 100644
--- 
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolverTest.java
+++ 
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/ParquetResolverTest.java
@@ -31,6 +31,7 @@ import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.math.BigDecimal;
+import java.nio.ByteBuffer;
 import java.time.Instant;
 import java.time.OffsetDateTime;
 import java.time.ZoneId;
@@ -40,8 +41,11 @@ import java.util.ArrayList;
 import java.util.Base64;
 import java.util.List;
 import java.util.Objects;
+import java.util.UUID;
 import java.util.stream.Collectors;
 
+import static org.apache.parquet.schema.LogicalTypeAnnotation.uuidType;
+
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -193,6 +197,148 @@ public class ParquetResolverTest {
         testSetFields_RightTrimCharHelper("  abcd  ", "  abc   ", "  abc");
     }
 
+    @Test
+    public void testSetFields_UUID_FixedLenByteArray() throws IOException {
+        List<Type> typeFields = new ArrayList<>();
+        
typeFields.add(org.apache.parquet.schema.Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
+                .length(16).as(uuidType()).named("id"));
+        
typeFields.add(org.apache.parquet.schema.Types.optional(PrimitiveTypeName.BINARY)
+                
.as(org.apache.parquet.schema.LogicalTypeAnnotation.stringType()).named("value"));
+        schema = new MessageType("hive_schema", typeFields);
+        context.setMetadata(schema);
+
+        List<ColumnDescriptor> columnDescriptors = new ArrayList<>();
+        columnDescriptors.add(new ColumnDescriptor("id", 
DataType.UUID.getOID(), 0, "uuid", null));
+        columnDescriptors.add(new ColumnDescriptor("value", 
DataType.VARCHAR.getOID(), 1, "varchar", null));
+        context.setTupleDescription(columnDescriptors);
+
+        resolver.setRequestContext(context);
+        resolver.afterPropertiesSet();
+
+        String uuidValue = "a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11";
+        List<OneField> fields = new ArrayList<>();
+        fields.add(new OneField(DataType.TEXT.getOID(), uuidValue));
+        fields.add(new OneField(DataType.TEXT.getOID(), "test"));
+
+        OneRow row = resolver.setFields(fields);
+        assertNotNull(row);
+        Object data = row.getData();
+        assertNotNull(data);
+        assertTrue(data instanceof Group);
+        Group group = (Group) data;
+
+        // assert UUID value is stored as 16-byte FIXED_LEN_BYTE_ARRAY 
(big-endian)
+        byte[] storedBytes = group.getBinary(0, 0).getBytes();
+        assertEquals(16, storedBytes.length);
+        UUID expectedUuid = UUID.fromString(uuidValue);
+        ByteBuffer bb = ByteBuffer.wrap(storedBytes);
+        assertEquals(expectedUuid.getMostSignificantBits(), bb.getLong());
+        assertEquals(expectedUuid.getLeastSignificantBits(), bb.getLong());
+        assertEquals("test", group.getString(1, 0));
+
+        // assert value repetition count
+        for (int i = 0; i < 2; i++) {
+            assertEquals(1, group.getFieldRepetitionCount(i));
+        }
+    }
+
+    @Test
+    public void testSetFields_UUID_FixedLenByteArray_Null() throws IOException 
{
+        List<Type> typeFields = new ArrayList<>();
+        
typeFields.add(org.apache.parquet.schema.Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
+                .length(16).as(uuidType()).named("id"));
+        
typeFields.add(org.apache.parquet.schema.Types.optional(PrimitiveTypeName.BINARY)
+                
.as(org.apache.parquet.schema.LogicalTypeAnnotation.stringType()).named("value"));
+        schema = new MessageType("hive_schema", typeFields);
+        context.setMetadata(schema);
+
+        List<ColumnDescriptor> columnDescriptors = new ArrayList<>();
+        columnDescriptors.add(new ColumnDescriptor("id", 
DataType.UUID.getOID(), 0, "uuid", null));
+        columnDescriptors.add(new ColumnDescriptor("value", 
DataType.VARCHAR.getOID(), 1, "varchar", null));
+        context.setTupleDescription(columnDescriptors);
+
+        resolver.setRequestContext(context);
+        resolver.afterPropertiesSet();
+
+        List<OneField> fields = new ArrayList<>();
+        fields.add(new OneField(DataType.TEXT.getOID(), null));
+        fields.add(new OneField(DataType.TEXT.getOID(), "test"));
+
+        OneRow row = resolver.setFields(fields);
+        assertNotNull(row);
+        Object data = row.getData();
+        assertNotNull(data);
+        assertTrue(data instanceof Group);
+        Group group = (Group) data;
+
+        // assert null UUID is not written (repetition count 0)
+        assertEquals(0, group.getFieldRepetitionCount(0));
+        assertEquals(1, group.getFieldRepetitionCount(1));
+    }
+
+    @Test
+    public void testGetFields_UUID_FixedLenByteArray() throws IOException {
+        List<Type> typeFields = new ArrayList<>();
+        
typeFields.add(org.apache.parquet.schema.Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
+                .length(16).as(uuidType()).named("id"));
+        
typeFields.add(org.apache.parquet.schema.Types.optional(PrimitiveTypeName.BINARY)
+                
.as(org.apache.parquet.schema.LogicalTypeAnnotation.stringType()).named("value"));
+        schema = new MessageType("hive_schema", typeFields);
+        context.setMetadata(schema);
+        context.setTupleDescription(getColumnDescriptorsFromSchema(schema));
+        resolver.setRequestContext(context);
+        resolver.afterPropertiesSet();
+
+        List<Group> groups = readParquetFile("uuid_types.parquet", 3, schema);
+        assertEquals(3, groups.size());
+
+        // row 0
+        List<OneField> fields = assertRow(groups, 0, 2);
+        assertField(fields, 0, "a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11", 
DataType.UUID);
+        assertField(fields, 1, "test1", DataType.TEXT);
+
+        // row 1
+        fields = assertRow(groups, 1, 2);
+        assertField(fields, 0, "b1ffcd00-0d1c-5f09-cc7e-7ccace491b22", 
DataType.UUID);
+        assertField(fields, 1, "test2", DataType.TEXT);
+
+        // row 2 (null UUID)
+        fields = assertRow(groups, 2, 2);
+        assertField(fields, 0, null, DataType.UUID);
+        assertField(fields, 1, "test3", DataType.TEXT);
+    }
+
+    @Test
+    public void testRoundTrip_UUID_FixedLenByteArray() throws IOException {
+        List<Type> typeFields = new ArrayList<>();
+        
typeFields.add(org.apache.parquet.schema.Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
+                .length(16).as(uuidType()).named("id"));
+        schema = new MessageType("hive_schema", typeFields);
+        context.setMetadata(schema);
+
+        List<ColumnDescriptor> columnDescriptors = new ArrayList<>();
+        columnDescriptors.add(new ColumnDescriptor("id", 
DataType.UUID.getOID(), 0, "uuid", null));
+        context.setTupleDescription(columnDescriptors);
+
+        resolver.setRequestContext(context);
+        resolver.afterPropertiesSet();
+
+        // Write
+        String uuidValue = "a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11";
+        List<OneField> fields = new ArrayList<>();
+        fields.add(new OneField(DataType.TEXT.getOID(), uuidValue));
+
+        OneRow row = resolver.setFields(fields);
+        assertNotNull(row);
+        Group group = (Group) row.getData();
+
+        // Read back
+        List<Group> groups = new ArrayList<>();
+        groups.add(group);
+        List<OneField> readFields = assertRow(groups, 0, 1);
+        assertField(readFields, 0, uuidValue, DataType.UUID);
+    }
+
     @Test
     public void testSetFields_Primitive_Nulls() throws IOException {
         schema = getParquetSchemaForPrimitiveTypes(Type.Repetition.OPTIONAL, 
false);
diff --git 
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverterTest.java
 
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverterTest.java
index 5230a94e..e686da39 100644
--- 
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverterTest.java
+++ 
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetTypeConverterTest.java
@@ -1,15 +1,23 @@
 package org.apache.cloudberry.pxf.plugins.hdfs.parquet;
 
 import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Type;
+import org.apache.parquet.schema.Types;
 import org.apache.cloudberry.pxf.api.GreenplumDateTime;
+import org.apache.cloudberry.pxf.api.error.UnsupportedTypeException;
+import org.apache.cloudberry.pxf.api.io.DataType;
 import org.junit.jupiter.api.Test;
 
+import java.nio.ByteBuffer;
 import java.time.Instant;
 import java.time.LocalDateTime;
 import java.time.ZoneId;
 import java.time.ZoneOffset;
 import java.time.ZonedDateTime;
 import java.time.format.DateTimeParseException;
+import java.util.UUID;
 
 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -94,6 +102,49 @@ public class ParquetTypeConverterTest {
         assertEquals(expectedTimestampInSystemTimeZone2, convertedTimestamp2);
     }
 
+    @Test
+    public void testUuidBytesRoundTrip() {
+        String uuidString = "a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11";
+        byte[] bytes = ParquetTypeConverter.uuidToBytes(uuidString);
+        assertEquals(16, bytes.length);
+        String result = ParquetTypeConverter.uuidFromBytes(bytes);
+        assertEquals(uuidString, result);
+    }
+
+    @Test
+    public void testUuidFromKnownBytes() {
+        UUID uuid = UUID.fromString("a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11");
+        ByteBuffer bb = ByteBuffer.allocate(16);
+        bb.putLong(uuid.getMostSignificantBits());
+        bb.putLong(uuid.getLeastSignificantBits());
+        String result = ParquetTypeConverter.uuidFromBytes(bb.array());
+        assertEquals("a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11", result);
+    }
+
+    @Test
+    public void testUuidToKnownBytes() {
+        byte[] bytes = 
ParquetTypeConverter.uuidToBytes("a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11");
+        UUID uuid = UUID.fromString("a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11");
+        ByteBuffer bb = ByteBuffer.wrap(bytes);
+        assertEquals(uuid.getMostSignificantBits(), bb.getLong());
+        assertEquals(uuid.getLeastSignificantBits(), bb.getLong());
+    }
+
+    @Test
+    public void testFixedLenByteArray_NullLogicalType_FallbackToBytea() {
+        // FIXED_LEN_BYTE_ARRAY with no logical type should fallback to BYTEA
+        Type type = 
Types.optional(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
+                .length(16).named("unknown");
+        assertEquals(DataType.BYTEA, 
ParquetTypeConverter.FIXED_LEN_BYTE_ARRAY.getDataType(type));
+    }
+
+    @Test
+    public void testFixedLenByteArray_UUIDLogicalType_ReturnsUUID() {
+        Type type = 
Types.optional(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
+                
.length(16).as(LogicalTypeAnnotation.uuidType()).named("uuid_col");
+        assertEquals(DataType.UUID, 
ParquetTypeConverter.FIXED_LEN_BYTE_ARRAY.getDataType(type));
+    }
+
     // Helper function
     private String convertUTCToCurrentSystemTimeZone(String expectedUTC) {
         // convert expectedUTC string to ZonedDateTime zdt
diff --git 
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetUtilitiesTest.java
 
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetUtilitiesTest.java
index c590b4ca..548faf9d 100644
--- 
a/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetUtilitiesTest.java
+++ 
b/server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/parquet/ParquetUtilitiesTest.java
@@ -67,6 +67,15 @@ public class ParquetUtilitiesTest {
         assertIterableEquals(Arrays.asList("fizz", "buzz", "fizzbuzz"), 
result);
     }
 
+    @Test
+    public void testParsePostgresArrayUuidArray() {
+        // GPDB UUID is a parquet FIXED_LEN_BYTE_ARRAY(16) with UUID logical 
type annotation
+        String value = 
"{a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11,b1ffcd00-0d1c-5f09-cc7e-7ccace491b22}";
+
+        List<Object> result = parquetUtilities.parsePostgresArray(value, 
PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, 
LogicalTypeAnnotation.uuidType());
+        
assertIterableEquals(Arrays.asList("a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11", 
"b1ffcd00-0d1c-5f09-cc7e-7ccace491b22"), result);
+    }
+
     @Test
     public void testParsePostgresArrayDateArray() {
         // GPDB Date is an parquet INT64 primitive type with String annotation
diff --git a/server/pxf-hdfs/src/test/resources/parquet/uuid_types.parquet 
b/server/pxf-hdfs/src/test/resources/parquet/uuid_types.parquet
new file mode 100644
index 00000000..712dea34
Binary files /dev/null and 
b/server/pxf-hdfs/src/test/resources/parquet/uuid_types.parquet differ


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to