This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new dbc804289d [variant] Port changes of variant from spark (#5895)
dbc804289d is described below

commit dbc804289d4aed6710fc75ee8c45b5f4556cfbe3
Author: Zouxxyy <[email protected]>
AuthorDate: Tue Jul 15 14:25:51 2025 +0800

    [variant] Port changes of variant from spark (#5895)
---
 .../apache/paimon/data/variant/GenericVariant.java | 31 ++++++++++++++++---
 .../paimon/data/variant/GenericVariantBuilder.java | 26 +++++++++++++---
 .../paimon/data/variant/GenericVariantUtil.java    | 36 +++++++++++++++++++---
 3 files changed, 80 insertions(+), 13 deletions(-)

diff --git 
a/paimon-common/src/main/java/org/apache/paimon/data/variant/GenericVariant.java
 
b/paimon-common/src/main/java/org/apache/paimon/data/variant/GenericVariant.java
index d79eb09054..17faf26097 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/data/variant/GenericVariant.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/data/variant/GenericVariant.java
@@ -35,6 +35,7 @@ import java.util.Arrays;
 import java.util.Base64;
 import java.util.Locale;
 import java.util.Objects;
+import java.util.UUID;
 
 import static 
org.apache.paimon.data.variant.GenericVariantUtil.BINARY_SEARCH_THRESHOLD;
 import static org.apache.paimon.data.variant.GenericVariantUtil.SIZE_LIMIT;
@@ -239,6 +240,11 @@ public final class GenericVariant implements Variant {
         return GenericVariantUtil.getType(value, pos);
     }
 
+    // Get a UUID value from the variant.
+    public UUID getUuid() {
+        return GenericVariantUtil.getUuid(value, pos);
+    }
+
     // Get the number of object fields in the variant.
     // It is only legal to call it when `getType()` is `Type.OBJECT`.
     public int objectSize() {
@@ -456,8 +462,15 @@ public final class GenericVariant implements Variant {
                 sb.append(escapeJson(GenericVariantUtil.getString(value, 
pos)));
                 break;
             case DOUBLE:
-                sb.append(GenericVariantUtil.getDouble(value, pos));
-                break;
+                {
+                    double d = GenericVariantUtil.getDouble(value, pos);
+                    if (Double.isFinite(d)) {
+                        sb.append(d);
+                    } else {
+                        appendQuoted(sb, Double.toString(d));
+                    }
+                    break;
+                }
             case DECIMAL:
                 sb.append(GenericVariantUtil.getDecimal(value, 
pos).toPlainString());
                 break;
@@ -482,14 +495,24 @@ public final class GenericVariant implements Variant {
                                         .atZone(ZoneOffset.UTC)));
                 break;
             case FLOAT:
-                sb.append(GenericVariantUtil.getFloat(value, pos));
-                break;
+                {
+                    float f = GenericVariantUtil.getFloat(value, pos);
+                    if (Float.isFinite(f)) {
+                        sb.append(f);
+                    } else {
+                        appendQuoted(sb, Float.toString(f));
+                    }
+                    break;
+                }
             case BINARY:
                 appendQuoted(
                         sb,
                         Base64.getEncoder()
                                 
.encodeToString(GenericVariantUtil.getBinary(value, pos)));
                 break;
+            case UUID:
+                appendQuoted(sb, GenericVariantUtil.getUuid(value, 
pos).toString());
+                break;
         }
     }
 }
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/data/variant/GenericVariantBuilder.java
 
b/paimon-common/src/main/java/org/apache/paimon/data/variant/GenericVariantBuilder.java
index 187fb9259e..169a2f8c9b 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/data/variant/GenericVariantBuilder.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/data/variant/GenericVariantBuilder.java
@@ -27,12 +27,15 @@ import 
org.apache.paimon.shade.jackson2.com.fasterxml.jackson.core.exc.InputCoer
 import java.io.IOException;
 import java.math.BigDecimal;
 import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.UUID;
 
 import static org.apache.paimon.data.variant.GenericVariantUtil.ARRAY;
 import static 
org.apache.paimon.data.variant.GenericVariantUtil.BASIC_TYPE_MASK;
@@ -61,9 +64,9 @@ import static 
org.apache.paimon.data.variant.GenericVariantUtil.TIMESTAMP_NTZ;
 import static org.apache.paimon.data.variant.GenericVariantUtil.TRUE;
 import static org.apache.paimon.data.variant.GenericVariantUtil.U16_MAX;
 import static org.apache.paimon.data.variant.GenericVariantUtil.U24_MAX;
-import static org.apache.paimon.data.variant.GenericVariantUtil.U24_SIZE;
 import static org.apache.paimon.data.variant.GenericVariantUtil.U32_SIZE;
 import static org.apache.paimon.data.variant.GenericVariantUtil.U8_MAX;
+import static org.apache.paimon.data.variant.GenericVariantUtil.UUID;
 import static org.apache.paimon.data.variant.GenericVariantUtil.VERSION;
 import static org.apache.paimon.data.variant.GenericVariantUtil.arrayHeader;
 import static org.apache.paimon.data.variant.GenericVariantUtil.checkIndex;
@@ -283,6 +286,18 @@ public class GenericVariantBuilder {
         writePos += binary.length;
     }
 
+    public void appendUuid(UUID uuid) {
+        checkCapacity(1 + 16);
+        writeBuffer[writePos++] = primitiveHeader(UUID);
+
+        // UUID is stored big-endian, so don't use writeLong.
+        ByteBuffer buffer = ByteBuffer.wrap(writeBuffer, writePos, 16);
+        buffer.order(ByteOrder.BIG_ENDIAN);
+        buffer.putLong(writePos, uuid.getMostSignificantBits());
+        buffer.putLong(writePos + 8, uuid.getLeastSignificantBits());
+        writePos += 16;
+    }
+
     // Add a key to the variant dictionary. If the key already exists, the 
dictionary is not
     // modified.
     // In either case, return the id of the key.
@@ -592,16 +607,19 @@ public class GenericVariantBuilder {
     }
 
     // Choose the smallest unsigned integer type that can store `value`. It 
must be within
-    // `[0, U24_MAX]`.
+    // `[0, SIZE_LIMIT]`.
     private int getIntegerSize(int value) {
-        assert value >= 0 && value <= U24_MAX;
+        assert value >= 0 && value <= SIZE_LIMIT;
         if (value <= U8_MAX) {
             return 1;
         }
         if (value <= U16_MAX) {
             return 2;
         }
-        return U24_SIZE;
+        if (value <= U24_MAX) {
+            return 3;
+        }
+        return 4;
     }
 
     private void parseFloatingPoint(JsonParser parser) throws IOException {
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/data/variant/GenericVariantUtil.java
 
b/paimon-common/src/main/java/org/apache/paimon/data/variant/GenericVariantUtil.java
index b37cbd7f6f..c99428b9d0 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/data/variant/GenericVariantUtil.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/data/variant/GenericVariantUtil.java
@@ -20,7 +20,10 @@ package org.apache.paimon.data.variant;
 
 import java.math.BigDecimal;
 import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
 import java.util.Arrays;
+import java.util.UUID;
 
 /* This file is based on source code from the Spark Project 
(http://spark.apache.org/), licensed by the Apache
  * Software Foundation (ASF) under the Apache License, Version 2.0. See the 
NOTICE file distributed with this work for
@@ -120,6 +123,8 @@ public class GenericVariantUtil {
     // Long string value. The content is (4-byte little-endian unsigned 
integer representing the
     // string size) + (size bytes of string content).
     public static final int LONG_STR = 16;
+    // UUID, 16-byte big-endian.
+    public static final int UUID = 20;
 
     public static final byte VERSION = 1;
     // The lower 4 bits of the first metadata byte contain the version.
@@ -131,8 +136,8 @@ public class GenericVariantUtil {
     public static final int U24_SIZE = 3;
     public static final int U32_SIZE = 4;
 
-    // Both variant value and variant metadata need to be no longer than 16MiB.
-    public static final int SIZE_LIMIT = U24_MAX + 1;
+    // Both variant value and variant metadata need to be no longer than 
128MiB.
+    public static final int SIZE_LIMIT = 128 * 1024 * 1024;
 
     public static final int MAX_DECIMAL4_PRECISION = 9;
     public static final int MAX_DECIMAL8_PRECISION = 18;
@@ -248,7 +253,8 @@ public class GenericVariantUtil {
         TIMESTAMP,
         TIMESTAMP_NTZ,
         FLOAT,
-        BINARY
+        BINARY,
+        UUID
     }
 
     public static int getTypeInfo(byte[] value, int pos) {
@@ -301,6 +307,8 @@ public class GenericVariantUtil {
                         return Type.BINARY;
                     case LONG_STR:
                         return Type.STRING;
+                    case UUID:
+                        return Type.UUID;
                     default:
                         throw unknownPrimitiveTypeInVariant(typeInfo);
                 }
@@ -367,6 +375,8 @@ public class GenericVariantUtil {
                     case BINARY:
                     case LONG_STR:
                         return 1 + U32_SIZE + readUnsigned(value, pos + 1, 
U32_SIZE);
+                    case UUID:
+                        return 17;
                     default:
                         throw unknownPrimitiveTypeInVariant(typeInfo);
                 }
@@ -531,7 +541,23 @@ public class GenericVariantUtil {
         throw unexpectedType(Type.STRING);
     }
 
-    /** 1. */
+    // Get a UUID value from variant value `value[pos...]`.
+    // Throw `MALFORMED_VARIANT` if the variant is malformed.
+    public static UUID getUuid(byte[] value, int pos) {
+        checkIndex(pos, value.length);
+        int basicType = value[pos] & BASIC_TYPE_MASK;
+        int typeInfo = (value[pos] >> BASIC_TYPE_BITS) & TYPE_INFO_MASK;
+        if (basicType != PRIMITIVE || typeInfo != UUID) {
+            throw unexpectedType(Type.UUID);
+        }
+        int start = pos + 1;
+        checkIndex(start + 15, value.length);
+        // UUID values are big-endian, so we can't use VariantUtil.readLong().
+        ByteBuffer bb = ByteBuffer.wrap(value, start, 
16).order(ByteOrder.BIG_ENDIAN);
+        return new UUID(bb.getLong(), bb.getLong());
+    }
+
+    /** ObjectHandler. */
     public interface ObjectHandler<T> {
         /**
          * @param size Number of object fields.
@@ -569,7 +595,7 @@ public class GenericVariantUtil {
         return handler.apply(size, idSize, offsetSize, idStart, offsetStart, 
dataStart);
     }
 
-    /** 1. */
+    /** ArrayHandler. */
     public interface ArrayHandler<T> {
         /**
          * @param size Number of array elements.

Reply via email to