This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git
The following commit(s) were added to refs/heads/main by this push:
new 8fc076c6 feat(java): row format supports Optional<T> (#2254)
8fc076c6 is described below
commit 8fc076c6fde670d260682ebe990672e87ecccff6
Author: Steven Schlansker <[email protected]>
AuthorDate: Tue May 27 09:49:44 2025 -0700
feat(java): row format supports Optional<T> (#2254)
## What does this PR do?
Support `Optional<T>` in java row format
Stored as an inline nullable field - so this implementation chooses not
to differentiate between `(Optional) null` and `Optional.empty()` and
always deserialize as `empty`.
## Does this PR introduce any user-facing change?
New type supported - docs updated
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
---
.../main/java/org/apache/fury/type/TypeUtils.java | 8 +-
java/fury-format/README.md | 2 +
.../format/encoder/BaseBinaryEncoderBuilder.java | 18 +++
.../org/apache/fury/format/encoder/Encoders.java | 17 ++-
.../fury/format/encoder/RowEncoderBuilder.java | 22 ++-
.../org/apache/fury/format/type/TypeInference.java | 14 +-
.../apache/fury/format/encoder/OptionalTest.java | 161 +++++++++++++++++++++
7 files changed, 230 insertions(+), 12 deletions(-)
diff --git a/java/fury-core/src/main/java/org/apache/fury/type/TypeUtils.java
b/java/fury-core/src/main/java/org/apache/fury/type/TypeUtils.java
index 3262f7fd..1e8079c8 100644
--- a/java/fury-core/src/main/java/org/apache/fury/type/TypeUtils.java
+++ b/java/fury-core/src/main/java/org/apache/fury/type/TypeUtils.java
@@ -43,6 +43,7 @@ import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
+import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.fury.collection.IdentityMap;
@@ -101,6 +102,7 @@ public class TypeUtils {
public static final TypeRef<?> MAP_TYPE = TypeRef.of(Map.class);
public static final TypeRef<?> MAP_ENTRY_TYPE = TypeRef.of(Map.Entry.class);
public static final TypeRef<?> HASHMAP_TYPE = TypeRef.of(HashMap.class);
+ public static final TypeRef<?> OPTIONAL_TYPE = TypeRef.of(Optional.class);
public static final TypeRef<?> OBJECT_TYPE = TypeRef.of(Object.class);
public static Type ITERATOR_RETURN_TYPE;
@@ -155,6 +157,7 @@ public class TypeUtils {
SUPPORTED_TYPES.add(LOCAL_DATE_TYPE);
SUPPORTED_TYPES.add(TIMESTAMP_TYPE);
SUPPORTED_TYPES.add(INSTANT_TYPE);
+ SUPPORTED_TYPES.add(OPTIONAL_TYPE);
}
static {
@@ -729,7 +732,10 @@ public class TypeUtils {
for (TypeRef<?> typeToken : typeRefs) {
Class<?> type = getRawType(typeToken);
- if (isCollection(type)) {
+ if (type == Optional.class) {
+ TypeRef<?> elemType = getTypeArguments(typeToken).get(0);
+ beans.addAll(listBeansRecursiveInclusive(elemType.getRawType(),
newCtx));
+ } else if (isCollection(type)) {
TypeRef<?> elementType = getElementType(typeToken);
while (isContainer(elementType.getRawType())) {
elementType = getElementType(elementType);
diff --git a/java/fury-format/README.md b/java/fury-format/README.md
index 4dee94f7..7a3c0792 100644
--- a/java/fury-format/README.md
+++ b/java/fury-format/README.md
@@ -12,6 +12,8 @@ Fury row format is heavily inspired by spark tungsten row
format, but with chang
The initial fury java row data structure implementation is modified from spark
unsafe row/writer.
+See `Encoders.bean` Javadoc for a list built-in supported types.
+
It is possible to register custom type handling and collection factories for
the row format -
see Encoders.registerCustomCodec and Encoders.registerCustomCollectionFactory.
For an interface,
Fury can synthesize a simple value implementation, such as the UuidType below.
diff --git
a/java/fury-format/src/main/java/org/apache/fury/format/encoder/BaseBinaryEncoderBuilder.java
b/java/fury-format/src/main/java/org/apache/fury/format/encoder/BaseBinaryEncoderBuilder.java
index 524ec8eb..91a11bf3 100644
---
a/java/fury-format/src/main/java/org/apache/fury/format/encoder/BaseBinaryEncoderBuilder.java
+++
b/java/fury-format/src/main/java/org/apache/fury/format/encoder/BaseBinaryEncoderBuilder.java
@@ -29,6 +29,7 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
+import java.util.Optional;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.fury.builder.CodecBuilder;
@@ -172,6 +173,14 @@ public abstract class BaseBinaryEncoderBuilder extends
CodecBuilder {
ExpressionUtils.eqNull(inputObject),
new Invoke(writer, "setNullAt", ordinal),
doSerialize);
+ } else if (rawType == Optional.class) {
+ TypeRef<?> elemType = TypeUtils.getTypeArguments(typeRef).get(0);
+ Invoke orNull =
+ new Invoke(inputObject, "orElse", TypeUtils.OBJECT_TYPE, new
Expression.Null(elemType));
+ Expression unwrapped =
+ new If(ExpressionUtils.eqNull(inputObject), new
Expression.Null(elemType), orNull);
+ return serializeFor(
+ ordinal, new Expression.Cast(unwrapped, elemType), writer, elemType,
arrowField);
} else if (TypeUtils.isPrimitive(rawType)) {
return new ListExpression(
// notNull is by default, no need to call setNotNullAt
@@ -541,6 +550,15 @@ public abstract class BaseBinaryEncoderBuilder extends
CodecBuilder {
return newValue;
}
return deserializeFor(newValue, rewrittenType, ctx);
+ } else if (rawType == Optional.class) {
+ TypeRef<?> elemType = TypeUtils.getTypeArguments(typeRef).get(0);
+ return new Expression.StaticInvoke(
+ Optional.class,
+ "ofNullable",
+ "optional",
+ typeRef,
+ true,
+ deserializeFor(value, elemType, ctx));
} else if (TypeUtils.isPrimitive(rawType) || TypeUtils.isBoxed(rawType)) {
return value;
} else if (rawType == BigDecimal.class) {
diff --git
a/java/fury-format/src/main/java/org/apache/fury/format/encoder/Encoders.java
b/java/fury-format/src/main/java/org/apache/fury/format/encoder/Encoders.java
index c4c4255f..84ac0471 100644
---
a/java/fury-format/src/main/java/org/apache/fury/format/encoder/Encoders.java
+++
b/java/fury-format/src/main/java/org/apache/fury/format/encoder/Encoders.java
@@ -125,11 +125,18 @@ public class Encoders {
*
* <p>T must be publicly accessible.
*
- * <p>supported types for java bean field: - primitive types: boolean, int,
double, etc. - boxed
- * types: Boolean, Integer, Double, etc. - String - java.math.BigDecimal,
java.math.BigInteger -
- * time related: java.sql.Date, java.sql.Timestamp, java.time.LocalDate,
java.time.Instant -
- * collection types: only array and java.util.List currently, map support is
in progress - nested
- * java bean.
+ * <p>supported types for java bean field:
+ *
+ * <ul>
+ * <li>primitive types: boolean, int, double, etc.
+ * <li>boxed types: Boolean, Integer, Double, etc.
+ * <li>String
+ * <li>java.math.BigDecimal, java.math.BigInteger
+ * <li>time related: java.sql.Date, java.sql.Timestamp,
java.time.LocalDate, java.time.Instant
+ * <li>Optional and friends: OptionalInt, OptionalLong, OptionalDouble
+ * <li>collection types: only array and java.util.List currently, map
support is in progress
+ * <li>nested java bean
+ * </ul>
*/
public static <T> RowEncoder<T> bean(Class<T> beanClass, BinaryRowWriter
writer, Fury fury) {
Schema schema = writer.getSchema();
diff --git
a/java/fury-format/src/main/java/org/apache/fury/format/encoder/RowEncoderBuilder.java
b/java/fury-format/src/main/java/org/apache/fury/format/encoder/RowEncoderBuilder.java
index 4b56feac..b57f175f 100644
---
a/java/fury-format/src/main/java/org/apache/fury/format/encoder/RowEncoderBuilder.java
+++
b/java/fury-format/src/main/java/org/apache/fury/format/encoder/RowEncoderBuilder.java
@@ -23,6 +23,7 @@ import static org.apache.fury.type.TypeUtils.CLASS_TYPE;
import static org.apache.fury.type.TypeUtils.getRawType;
import java.lang.reflect.Modifier;
+import java.util.Optional;
import java.util.SortedMap;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.Schema;
@@ -228,7 +229,14 @@ public class RowEncoderBuilder extends
BaseBinaryEncoderBuilder {
new Expression.Variable(
"decoded" + i, new Expression.Reference("decode" + i + "(row)",
fieldType));
Expression setActionExpr = setFieldValue(bean, d, value);
- Expression action = new Expression.If(ExpressionUtils.not(isNullAt),
setActionExpr);
+ Expression action;
+ if (fieldType.getRawType() == Optional.class) {
+ Expression setEmptyExpr =
+ setFieldValue(bean, d, new Expression.StaticInvoke(Optional.class,
"empty"));
+ action = new Expression.If(isNullAt, setEmptyExpr, setActionExpr);
+ } else {
+ action = new Expression.If(ExpressionUtils.not(isNullAt),
setActionExpr);
+ }
expressions.add(action);
}
@@ -250,12 +258,16 @@ public class RowEncoderBuilder extends
BaseBinaryEncoderBuilder {
} else {
fieldCtx = typeCtx;
}
- CustomCodec<?, ?> customEncoder = customTypeHandler.findCodec(beanClass,
rawFieldType);
TypeRef<?> columnAccessType;
- if (customEncoder == null) {
- columnAccessType = fieldType;
+ if (rawFieldType == Optional.class) {
+ columnAccessType = TypeUtils.getTypeArguments(fieldType).get(0);
} else {
- columnAccessType = TypeRef.of(customEncoder.encodedType());
+ CustomCodec<?, ?> customEncoder =
customTypeHandler.findCodec(beanClass, rawFieldType);
+ if (customEncoder == null) {
+ columnAccessType = fieldType;
+ } else {
+ columnAccessType = TypeRef.of(customEncoder.encodedType());
+ }
}
String columnAccessMethodName =
BinaryUtils.getElemAccessMethodName(columnAccessType, fieldCtx);
diff --git
a/java/fury-format/src/main/java/org/apache/fury/format/type/TypeInference.java
b/java/fury-format/src/main/java/org/apache/fury/format/type/TypeInference.java
index 935a0cd9..4659c58d 100644
---
a/java/fury-format/src/main/java/org/apache/fury/format/type/TypeInference.java
+++
b/java/fury-format/src/main/java/org/apache/fury/format/type/TypeInference.java
@@ -146,7 +146,19 @@ public class TypeInference {
Class<?> enclosingType = ctx.getEnclosingType().getRawType();
CustomCodec<?, ?> customEncoder =
((CustomTypeHandler)
ctx.getCustomTypeRegistry()).findCodec(enclosingType, rawType);
- if (customEncoder != null) {
+ if (rawType == Optional.class) {
+ TypeRef<?> elemType = TypeUtils.getTypeArguments(typeRef).get(0);
+ Field result = inferField(name, elemType, ctx);
+ if (result.isNullable()) {
+ return result;
+ }
+ FieldType fieldType = result.getFieldType();
+ return new Field(
+ result.getName(),
+ new FieldType(
+ true, fieldType.getType(), fieldType.getDictionary(),
fieldType.getMetadata()),
+ result.getChildren());
+ } else if (customEncoder != null) {
return customEncoder.getField(name);
} else if (rawType == boolean.class) {
return field(name, DataTypes.notNullFieldType(ArrowType.Bool.INSTANCE));
diff --git
a/java/fury-format/src/test/java/org/apache/fury/format/encoder/OptionalTest.java
b/java/fury-format/src/test/java/org/apache/fury/format/encoder/OptionalTest.java
new file mode 100644
index 00000000..1340b871
--- /dev/null
+++
b/java/fury-format/src/test/java/org/apache/fury/format/encoder/OptionalTest.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.fury.format.encoder;
+
+import java.util.Optional;
+import java.util.OptionalDouble;
+import java.util.OptionalInt;
+import java.util.OptionalLong;
+import lombok.Data;
+import org.apache.fury.format.row.binary.BinaryRow;
+import org.apache.fury.memory.MemoryBuffer;
+import org.apache.fury.memory.MemoryUtils;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class OptionalTest {
+
+ @Data
+ public static class OptionalType {
+ public Optional<Integer> f1;
+ public Optional<String> f2;
+
+ public OptionalType() {}
+ }
+
+ @Test
+ public void testOptionalEmpty() {
+ final OptionalType bean = new OptionalType();
+ bean.f1 = Optional.empty();
+ bean.f2 = Optional.empty();
+ final RowEncoder<OptionalType> encoder = Encoders.bean(OptionalType.class);
+ final BinaryRow row = encoder.toRow(bean);
+ final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+ row.pointTo(buffer, 0, buffer.size());
+ final OptionalType deserializedBean = encoder.fromRow(row);
+ Assert.assertEquals(deserializedBean, bean);
+ }
+
+ @Test
+ public void testOptionalPresent() {
+ final OptionalType bean = new OptionalType();
+ bean.f1 = Optional.of(42);
+ bean.f2 = Optional.of("Indubitably");
+ final RowEncoder<OptionalType> encoder = Encoders.bean(OptionalType.class);
+ final BinaryRow row = encoder.toRow(bean);
+ final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+ row.pointTo(buffer, 0, buffer.size());
+ final OptionalType deserializedBean = encoder.fromRow(row);
+ Assert.assertEquals(deserializedBean, bean);
+ }
+
+ @Data
+ public static class OptionalIntType {
+ public OptionalInt f1;
+
+ public OptionalIntType() {}
+ }
+
+ @Test
+ public void testIntEmpty() {
+ final OptionalIntType bean = new OptionalIntType();
+ bean.f1 = OptionalInt.empty();
+ final RowEncoder<OptionalIntType> encoder =
Encoders.bean(OptionalIntType.class);
+ final BinaryRow row = encoder.toRow(bean);
+ final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+ row.pointTo(buffer, 0, buffer.size());
+ final OptionalIntType deserializedBean = encoder.fromRow(row);
+ Assert.assertEquals(deserializedBean, bean);
+ }
+
+ @Test
+ public void testIntPresent() {
+ final OptionalIntType bean = new OptionalIntType();
+ bean.f1 = OptionalInt.of(42);
+ final RowEncoder<OptionalIntType> encoder =
Encoders.bean(OptionalIntType.class);
+ final BinaryRow row = encoder.toRow(bean);
+ final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+ row.pointTo(buffer, 0, buffer.size());
+ final OptionalIntType deserializedBean = encoder.fromRow(row);
+ Assert.assertEquals(deserializedBean, bean);
+ }
+
+ @Data
+ public static class OptionalLongType {
+ public OptionalLong f1;
+
+ public OptionalLongType() {}
+ }
+
+ @Test
+ public void testLongEmpty() {
+ final OptionalLongType bean = new OptionalLongType();
+ bean.f1 = OptionalLong.empty();
+ final RowEncoder<OptionalLongType> encoder =
Encoders.bean(OptionalLongType.class);
+ final BinaryRow row = encoder.toRow(bean);
+ final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+ row.pointTo(buffer, 0, buffer.size());
+ final OptionalLongType deserializedBean = encoder.fromRow(row);
+ Assert.assertEquals(deserializedBean, bean);
+ }
+
+ @Test
+ public void testLongPresent() {
+ final OptionalLongType bean = new OptionalLongType();
+ bean.f1 = OptionalLong.of(42);
+ final RowEncoder<OptionalLongType> encoder =
Encoders.bean(OptionalLongType.class);
+ final BinaryRow row = encoder.toRow(bean);
+ final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+ row.pointTo(buffer, 0, buffer.size());
+ final OptionalLongType deserializedBean = encoder.fromRow(row);
+ Assert.assertEquals(deserializedBean, bean);
+ }
+
+ @Data
+ public static class OptionalDoubleType {
+ public OptionalDouble f1;
+
+ public OptionalDoubleType() {}
+ }
+
+ @Test
+ public void testDoubleEmpty() {
+ final OptionalDoubleType bean = new OptionalDoubleType();
+ bean.f1 = OptionalDouble.empty();
+ final RowEncoder<OptionalDoubleType> encoder =
Encoders.bean(OptionalDoubleType.class);
+ final BinaryRow row = encoder.toRow(bean);
+ final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+ row.pointTo(buffer, 0, buffer.size());
+ final OptionalDoubleType deserializedBean = encoder.fromRow(row);
+ Assert.assertEquals(deserializedBean, bean);
+ }
+
+ @Test
+ public void testDoublePresent() {
+ final OptionalDoubleType bean = new OptionalDoubleType();
+ bean.f1 = OptionalDouble.of(42);
+ final RowEncoder<OptionalDoubleType> encoder =
Encoders.bean(OptionalDoubleType.class);
+ final BinaryRow row = encoder.toRow(bean);
+ final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes());
+ row.pointTo(buffer, 0, buffer.size());
+ final OptionalDoubleType deserializedBean = encoder.fromRow(row);
+ Assert.assertEquals(deserializedBean, bean);
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]