This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git


The following commit(s) were added to refs/heads/main by this push:
     new 8fc076c6 feat(java): row format supports Optional<T> (#2254)
8fc076c6 is described below

commit 8fc076c6fde670d260682ebe990672e87ecccff6
Author: Steven Schlansker <[email protected]>
AuthorDate: Tue May 27 09:49:44 2025 -0700

    feat(java): row format supports Optional<T> (#2254)
    
    ## What does this PR do?
    
    Support `Optional<T>` in java row format
    Stored as an inline nullable field - so this implementation chooses not
    to differentiate between `(Optional) null` and `Optional.empty()` and
    always deserialize as `empty`.
    
    ## Does this PR introduce any user-facing change?
    
    New type supported - docs updated
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
---
 .../main/java/org/apache/fury/type/TypeUtils.java  |   8 +-
 java/fury-format/README.md                         |   2 +
 .../format/encoder/BaseBinaryEncoderBuilder.java   |  18 +++
 .../org/apache/fury/format/encoder/Encoders.java   |  17 ++-
 .../fury/format/encoder/RowEncoderBuilder.java     |  22 ++-
 .../org/apache/fury/format/type/TypeInference.java |  14 +-
 .../apache/fury/format/encoder/OptionalTest.java   | 161 +++++++++++++++++++++
 7 files changed, 230 insertions(+), 12 deletions(-)

diff --git a/java/fury-core/src/main/java/org/apache/fury/type/TypeUtils.java 
b/java/fury-core/src/main/java/org/apache/fury/type/TypeUtils.java
index 3262f7fd..1e8079c8 100644
--- a/java/fury-core/src/main/java/org/apache/fury/type/TypeUtils.java
+++ b/java/fury-core/src/main/java/org/apache/fury/type/TypeUtils.java
@@ -43,6 +43,7 @@ import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
 import org.apache.fury.collection.IdentityMap;
@@ -101,6 +102,7 @@ public class TypeUtils {
   public static final TypeRef<?> MAP_TYPE = TypeRef.of(Map.class);
   public static final TypeRef<?> MAP_ENTRY_TYPE = TypeRef.of(Map.Entry.class);
   public static final TypeRef<?> HASHMAP_TYPE = TypeRef.of(HashMap.class);
+  public static final TypeRef<?> OPTIONAL_TYPE = TypeRef.of(Optional.class);
   public static final TypeRef<?> OBJECT_TYPE = TypeRef.of(Object.class);
 
   public static Type ITERATOR_RETURN_TYPE;
@@ -155,6 +157,7 @@ public class TypeUtils {
     SUPPORTED_TYPES.add(LOCAL_DATE_TYPE);
     SUPPORTED_TYPES.add(TIMESTAMP_TYPE);
     SUPPORTED_TYPES.add(INSTANT_TYPE);
+    SUPPORTED_TYPES.add(OPTIONAL_TYPE);
   }
 
   static {
@@ -729,7 +732,10 @@ public class TypeUtils {
 
     for (TypeRef<?> typeToken : typeRefs) {
       Class<?> type = getRawType(typeToken);
-      if (isCollection(type)) {
+      if (type == Optional.class) {
+        TypeRef<?> elemType = getTypeArguments(typeToken).get(0);
+        beans.addAll(listBeansRecursiveInclusive(elemType.getRawType(), 
newCtx));
+      } else if (isCollection(type)) {
         TypeRef<?> elementType = getElementType(typeToken);
         while (isContainer(elementType.getRawType())) {
           elementType = getElementType(elementType);
diff --git a/java/fury-format/README.md b/java/fury-format/README.md
index 4dee94f7..7a3c0792 100644
--- a/java/fury-format/README.md
+++ b/java/fury-format/README.md
@@ -12,6 +12,8 @@ Fury row format is heavily inspired by spark tungsten row 
format, but with chang
 
 The initial fury java row data structure implementation is modified from spark 
unsafe row/writer.
 
+See `Encoders.bean` Javadoc for a list built-in supported types.
+
 It is possible to register custom type handling and collection factories for 
the row format -
 see Encoders.registerCustomCodec and Encoders.registerCustomCollectionFactory. 
For an interface,
 Fury can synthesize a simple value implementation, such as the UuidType below.
diff --git 
a/java/fury-format/src/main/java/org/apache/fury/format/encoder/BaseBinaryEncoderBuilder.java
 
b/java/fury-format/src/main/java/org/apache/fury/format/encoder/BaseBinaryEncoderBuilder.java
index 524ec8eb..91a11bf3 100644
--- 
a/java/fury-format/src/main/java/org/apache/fury/format/encoder/BaseBinaryEncoderBuilder.java
+++ 
b/java/fury-format/src/main/java/org/apache/fury/format/encoder/BaseBinaryEncoderBuilder.java
@@ -29,6 +29,7 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Optional;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.apache.fury.builder.CodecBuilder;
@@ -172,6 +173,14 @@ public abstract class BaseBinaryEncoderBuilder extends 
CodecBuilder {
           ExpressionUtils.eqNull(inputObject),
           new Invoke(writer, "setNullAt", ordinal),
           doSerialize);
+    } else if (rawType == Optional.class) {
+      TypeRef<?> elemType = TypeUtils.getTypeArguments(typeRef).get(0);
+      Invoke orNull =
+          new Invoke(inputObject, "orElse", TypeUtils.OBJECT_TYPE, new 
Expression.Null(elemType));
+      Expression unwrapped =
+          new If(ExpressionUtils.eqNull(inputObject), new 
Expression.Null(elemType), orNull);
+      return serializeFor(
+          ordinal, new Expression.Cast(unwrapped, elemType), writer, elemType, 
arrowField);
     } else if (TypeUtils.isPrimitive(rawType)) {
       return new ListExpression(
           // notNull is by default, no need to call setNotNullAt
@@ -541,6 +550,15 @@ public abstract class BaseBinaryEncoderBuilder extends 
CodecBuilder {
         return newValue;
       }
       return deserializeFor(newValue, rewrittenType, ctx);
+    } else if (rawType == Optional.class) {
+      TypeRef<?> elemType = TypeUtils.getTypeArguments(typeRef).get(0);
+      return new Expression.StaticInvoke(
+          Optional.class,
+          "ofNullable",
+          "optional",
+          typeRef,
+          true,
+          deserializeFor(value, elemType, ctx));
     } else if (TypeUtils.isPrimitive(rawType) || TypeUtils.isBoxed(rawType)) {
       return value;
     } else if (rawType == BigDecimal.class) {
diff --git 
a/java/fury-format/src/main/java/org/apache/fury/format/encoder/Encoders.java 
b/java/fury-format/src/main/java/org/apache/fury/format/encoder/Encoders.java
index c4c4255f..84ac0471 100644
--- 
a/java/fury-format/src/main/java/org/apache/fury/format/encoder/Encoders.java
+++ 
b/java/fury-format/src/main/java/org/apache/fury/format/encoder/Encoders.java
@@ -125,11 +125,18 @@ public class Encoders {
    *
    * <p>T must be publicly accessible.
    *
-   * <p>supported types for java bean field: - primitive types: boolean, int, 
double, etc. - boxed
-   * types: Boolean, Integer, Double, etc. - String - java.math.BigDecimal, 
java.math.BigInteger -
-   * time related: java.sql.Date, java.sql.Timestamp, java.time.LocalDate, 
java.time.Instant -
-   * collection types: only array and java.util.List currently, map support is 
in progress - nested
-   * java bean.
+   * <p>supported types for java bean field:
+   *
+   * <ul>
+   *   <li>primitive types: boolean, int, double, etc.
+   *   <li>boxed types: Boolean, Integer, Double, etc.
+   *   <li>String
+   *   <li>java.math.BigDecimal, java.math.BigInteger
+   *   <li>time related: java.sql.Date, java.sql.Timestamp, 
java.time.LocalDate, java.time.Instant
+   *   <li>Optional and friends: OptionalInt, OptionalLong, OptionalDouble
+   *   <li>collection types: only array and java.util.List currently, map 
support is in progress
+   *   <li>nested java bean
+   * </ul>
    */
   public static <T> RowEncoder<T> bean(Class<T> beanClass, BinaryRowWriter 
writer, Fury fury) {
     Schema schema = writer.getSchema();
diff --git 
a/java/fury-format/src/main/java/org/apache/fury/format/encoder/RowEncoderBuilder.java
 
b/java/fury-format/src/main/java/org/apache/fury/format/encoder/RowEncoderBuilder.java
index 4b56feac..b57f175f 100644
--- 
a/java/fury-format/src/main/java/org/apache/fury/format/encoder/RowEncoderBuilder.java
+++ 
b/java/fury-format/src/main/java/org/apache/fury/format/encoder/RowEncoderBuilder.java
@@ -23,6 +23,7 @@ import static org.apache.fury.type.TypeUtils.CLASS_TYPE;
 import static org.apache.fury.type.TypeUtils.getRawType;
 
 import java.lang.reflect.Modifier;
+import java.util.Optional;
 import java.util.SortedMap;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
@@ -228,7 +229,14 @@ public class RowEncoderBuilder extends 
BaseBinaryEncoderBuilder {
           new Expression.Variable(
               "decoded" + i, new Expression.Reference("decode" + i + "(row)", 
fieldType));
       Expression setActionExpr = setFieldValue(bean, d, value);
-      Expression action = new Expression.If(ExpressionUtils.not(isNullAt), 
setActionExpr);
+      Expression action;
+      if (fieldType.getRawType() == Optional.class) {
+        Expression setEmptyExpr =
+            setFieldValue(bean, d, new Expression.StaticInvoke(Optional.class, 
"empty"));
+        action = new Expression.If(isNullAt, setEmptyExpr, setActionExpr);
+      } else {
+        action = new Expression.If(ExpressionUtils.not(isNullAt), 
setActionExpr);
+      }
       expressions.add(action);
     }
 
@@ -250,12 +258,16 @@ public class RowEncoderBuilder extends 
BaseBinaryEncoderBuilder {
       } else {
         fieldCtx = typeCtx;
       }
-      CustomCodec<?, ?> customEncoder = customTypeHandler.findCodec(beanClass, 
rawFieldType);
       TypeRef<?> columnAccessType;
-      if (customEncoder == null) {
-        columnAccessType = fieldType;
+      if (rawFieldType == Optional.class) {
+        columnAccessType = TypeUtils.getTypeArguments(fieldType).get(0);
       } else {
-        columnAccessType = TypeRef.of(customEncoder.encodedType());
+        CustomCodec<?, ?> customEncoder = 
customTypeHandler.findCodec(beanClass, rawFieldType);
+        if (customEncoder == null) {
+          columnAccessType = fieldType;
+        } else {
+          columnAccessType = TypeRef.of(customEncoder.encodedType());
+        }
       }
       String columnAccessMethodName =
           BinaryUtils.getElemAccessMethodName(columnAccessType, fieldCtx);
diff --git 
a/java/fury-format/src/main/java/org/apache/fury/format/type/TypeInference.java 
b/java/fury-format/src/main/java/org/apache/fury/format/type/TypeInference.java
index 935a0cd9..4659c58d 100644
--- 
a/java/fury-format/src/main/java/org/apache/fury/format/type/TypeInference.java
+++ 
b/java/fury-format/src/main/java/org/apache/fury/format/type/TypeInference.java
@@ -146,7 +146,19 @@ public class TypeInference {
     Class<?> enclosingType = ctx.getEnclosingType().getRawType();
     CustomCodec<?, ?> customEncoder =
         ((CustomTypeHandler) 
ctx.getCustomTypeRegistry()).findCodec(enclosingType, rawType);
-    if (customEncoder != null) {
+    if (rawType == Optional.class) {
+      TypeRef<?> elemType = TypeUtils.getTypeArguments(typeRef).get(0);
+      Field result = inferField(name, elemType, ctx);
+      if (result.isNullable()) {
+        return result;
+      }
+      FieldType fieldType = result.getFieldType();
+      return new Field(
+          result.getName(),
+          new FieldType(
+              true, fieldType.getType(), fieldType.getDictionary(), 
fieldType.getMetadata()),
+          result.getChildren());
+    } else if (customEncoder != null) {
       return customEncoder.getField(name);
     } else if (rawType == boolean.class) {
       return field(name, DataTypes.notNullFieldType(ArrowType.Bool.INSTANCE));
diff --git 
a/java/fury-format/src/test/java/org/apache/fury/format/encoder/OptionalTest.java
 
b/java/fury-format/src/test/java/org/apache/fury/format/encoder/OptionalTest.java
new file mode 100644
index 00000000..1340b871
--- /dev/null
+++ 
b/java/fury-format/src/test/java/org/apache/fury/format/encoder/OptionalTest.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.fury.format.encoder;
+
+import java.util.Optional;
+import java.util.OptionalDouble;
+import java.util.OptionalInt;
+import java.util.OptionalLong;
+import lombok.Data;
+import org.apache.fury.format.row.binary.BinaryRow;
+import org.apache.fury.memory.MemoryBuffer;
+import org.apache.fury.memory.MemoryUtils;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class OptionalTest {
+
+  @Data
+  public static class OptionalType {
+    public Optional<Integer> f1;
+    public Optional<String> f2;
+
+    public OptionalType() {}
+  }
+
+  @Test
+  public void testOptionalEmpty() {
+    final OptionalType bean = new OptionalType();
+    bean.f1 = Optional.empty();
+    bean.f2 = Optional.empty();
+    final RowEncoder<OptionalType> encoder = Encoders.bean(OptionalType.class);
+    final BinaryRow row = encoder.toRow(bean);
+    final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+    row.pointTo(buffer, 0, buffer.size());
+    final OptionalType deserializedBean = encoder.fromRow(row);
+    Assert.assertEquals(deserializedBean, bean);
+  }
+
+  @Test
+  public void testOptionalPresent() {
+    final OptionalType bean = new OptionalType();
+    bean.f1 = Optional.of(42);
+    bean.f2 = Optional.of("Indubitably");
+    final RowEncoder<OptionalType> encoder = Encoders.bean(OptionalType.class);
+    final BinaryRow row = encoder.toRow(bean);
+    final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+    row.pointTo(buffer, 0, buffer.size());
+    final OptionalType deserializedBean = encoder.fromRow(row);
+    Assert.assertEquals(deserializedBean, bean);
+  }
+
+  @Data
+  public static class OptionalIntType {
+    public OptionalInt f1;
+
+    public OptionalIntType() {}
+  }
+
+  @Test
+  public void testIntEmpty() {
+    final OptionalIntType bean = new OptionalIntType();
+    bean.f1 = OptionalInt.empty();
+    final RowEncoder<OptionalIntType> encoder = 
Encoders.bean(OptionalIntType.class);
+    final BinaryRow row = encoder.toRow(bean);
+    final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+    row.pointTo(buffer, 0, buffer.size());
+    final OptionalIntType deserializedBean = encoder.fromRow(row);
+    Assert.assertEquals(deserializedBean, bean);
+  }
+
+  @Test
+  public void testIntPresent() {
+    final OptionalIntType bean = new OptionalIntType();
+    bean.f1 = OptionalInt.of(42);
+    final RowEncoder<OptionalIntType> encoder = 
Encoders.bean(OptionalIntType.class);
+    final BinaryRow row = encoder.toRow(bean);
+    final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+    row.pointTo(buffer, 0, buffer.size());
+    final OptionalIntType deserializedBean = encoder.fromRow(row);
+    Assert.assertEquals(deserializedBean, bean);
+  }
+
+  @Data
+  public static class OptionalLongType {
+    public OptionalLong f1;
+
+    public OptionalLongType() {}
+  }
+
+  @Test
+  public void testLongEmpty() {
+    final OptionalLongType bean = new OptionalLongType();
+    bean.f1 = OptionalLong.empty();
+    final RowEncoder<OptionalLongType> encoder = 
Encoders.bean(OptionalLongType.class);
+    final BinaryRow row = encoder.toRow(bean);
+    final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+    row.pointTo(buffer, 0, buffer.size());
+    final OptionalLongType deserializedBean = encoder.fromRow(row);
+    Assert.assertEquals(deserializedBean, bean);
+  }
+
+  @Test
+  public void testLongPresent() {
+    final OptionalLongType bean = new OptionalLongType();
+    bean.f1 = OptionalLong.of(42);
+    final RowEncoder<OptionalLongType> encoder = 
Encoders.bean(OptionalLongType.class);
+    final BinaryRow row = encoder.toRow(bean);
+    final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+    row.pointTo(buffer, 0, buffer.size());
+    final OptionalLongType deserializedBean = encoder.fromRow(row);
+    Assert.assertEquals(deserializedBean, bean);
+  }
+
+  @Data
+  public static class OptionalDoubleType {
+    public OptionalDouble f1;
+
+    public OptionalDoubleType() {}
+  }
+
+  @Test
+  public void testDoubleEmpty() {
+    final OptionalDoubleType bean = new OptionalDoubleType();
+    bean.f1 = OptionalDouble.empty();
+    final RowEncoder<OptionalDoubleType> encoder = 
Encoders.bean(OptionalDoubleType.class);
+    final BinaryRow row = encoder.toRow(bean);
+    final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+    row.pointTo(buffer, 0, buffer.size());
+    final OptionalDoubleType deserializedBean = encoder.fromRow(row);
+    Assert.assertEquals(deserializedBean, bean);
+  }
+
+  @Test
+  public void testDoublePresent() {
+    final OptionalDoubleType bean = new OptionalDoubleType();
+    bean.f1 = OptionalDouble.of(42);
+    final RowEncoder<OptionalDoubleType> encoder = 
Encoders.bean(OptionalDoubleType.class);
+    final BinaryRow row = encoder.toRow(bean);
+    final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+    row.pointTo(buffer, 0, buffer.size());
+    final OptionalDoubleType deserializedBean = encoder.fromRow(row);
+    Assert.assertEquals(deserializedBean, bean);
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to