This is an automated email from the ASF dual-hosted git repository.

clesaec pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new 1b78e69f6 AVRO-3649: Fix for union type to match default values for 
any innertype (#2503)
1b78e69f6 is described below

commit 1b78e69f6de50a511fb57fb275294168bbc49c24
Author: Christophe Le Saec <[email protected]>
AuthorDate: Tue Sep 19 16:00:46 2023 +0200

    AVRO-3649: Fix for union type to match default values for any innertype 
(#2503)
    
    * avro-3649: Union type to match default values for any innertype
---
 .../en/docs/++version++/Specification/_index.md    |  4 +-
 .../avro/src/main/java/org/apache/avro/Schema.java | 26 +++++++++--
 .../java/org/apache/avro/reflect/ReflectData.java  |  7 +--
 .../src/test/java/org/apache/avro/TestSchema.java  | 52 ++++++++++++++++++++++
 .../java/org/apache/avro/reflect/TestReflect.java  | 28 ++++++++++++
 .../src/test/java/org/apache/avro/TestSchema.java  | 26 +++++------
 6 files changed, 117 insertions(+), 26 deletions(-)

diff --git a/doc/content/en/docs/++version++/Specification/_index.md 
b/doc/content/en/docs/++version++/Specification/_index.md
index 7cc5a1754..4772b0010 100755
--- a/doc/content/en/docs/++version++/Specification/_index.md
+++ b/doc/content/en/docs/++version++/Specification/_index.md
@@ -77,7 +77,7 @@ Records use the type name "record" and support the following 
attributes:
   * _type_: a [schema]({{< ref "#schema-declaration" >}} "Schema 
declaration"), as defined above
   * _order_: specifies how this field impacts sort ordering of this record 
(optional). Valid values are "ascending" (the default), "descending", or 
"ignore". For more details on how this is used, see the sort order section 
below.
   * _aliases_: a JSON array of strings, providing alternate names for this 
field (optional).
-  * _default_: A default value for this field, only used when reading 
instances that lack the field for schema evolution purposes. The presence of a 
default value does not make the field optional at encoding time. Permitted 
values depend on the field's schema type, according to the table below. Default 
values for union fields correspond to the first schema in the union. Default 
values for bytes and fixed fields are JSON strings, where Unicode code points 
0-255 are mapped to unsigned 8-bi [...]
+  * _default_: A default value for this field, only used when reading 
instances that lack the field for schema evolution purposes. The presence of a 
default value does not make the field optional at encoding time. Permitted 
values depend on the field's schema type, according to the table below. Default 
values for union fields correspond to the first schema that matches in the 
union. Default values for bytes and fixed fields are JSON strings, where 
Unicode code points 0-255 are mapped to  [...]
 
 *field default values*
 
@@ -160,7 +160,7 @@ For example, a map from string to long is declared with:
 ### Unions
 Unions, as mentioned above, are represented using JSON arrays. For example, 
`["null", "string"]` declares a schema which may be either a null or string.
 
-(Note that when a [default value]({{< ref "#schema-record" >}} "Schema 
record") is specified for a record field whose type is a union, the type of the 
default value must match the first element of the union. Thus, for unions 
containing "null", the "null" is usually listed first, since the default value 
of such unions is typically null.)
+(Note that when a [default value]({{< ref "#schema-record" >}} "Schema 
record") is specified for a record field whose type is a union, the type of the 
default value must match with one element of the union.
 
 Unions may not contain more than one schema with the same type, except for the 
named types record, fixed and enum. For example, unions containing two array 
types or two map types are not permitted, but two types with different names 
are permitted. (Names permit efficient resolution when reading and writing 
unions.)
 
diff --git a/lang/java/avro/src/main/java/org/apache/avro/Schema.java 
b/lang/java/avro/src/main/java/org/apache/avro/Schema.java
index 8933f20f0..38a6e4a9e 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/Schema.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/Schema.java
@@ -1332,6 +1332,16 @@ public abstract class Schema extends JsonProperties 
implements Serializable {
       }
     }
 
+    /**
+     * Checks if a JSON value matches the schema.
+     *
+     * @param jsonValue a value to check against the schema
+     * @return true if the value is valid according to this schema
+     */
+    public boolean isValidDefault(JsonNode jsonValue) {
+      return this.types.stream().anyMatch((Schema s) -> 
s.isValidDefault(jsonValue));
+    }
+
     @Override
     public List<Schema> getTypes() {
       return types;
@@ -1768,13 +1778,23 @@ public abstract class Schema extends JsonProperties 
implements Serializable {
   private static final ThreadLocal<Boolean> VALIDATE_DEFAULTS = 
ThreadLocalWithInitial.of(() -> true);
 
   private static JsonNode validateDefault(String fieldName, Schema schema, 
JsonNode defaultValue) {
-    if (VALIDATE_DEFAULTS.get() && (defaultValue != null) && 
!isValidDefault(schema, defaultValue)) { // invalid default
+    if (VALIDATE_DEFAULTS.get() && (defaultValue != null) && 
!schema.isValidDefault(defaultValue)) { // invalid default
       String message = "Invalid default for field " + fieldName + ": " + 
defaultValue + " not a " + schema;
       throw new AvroTypeException(message); // throw exception
     }
     return defaultValue;
   }
 
+  /**
+   * Checks if a JSON value matches the schema.
+   *
+   * @param jsonValue a value to check against the schema
+   * @return true if the value is valid according to this schema
+   */
+  public boolean isValidDefault(JsonNode jsonValue) {
+    return isValidDefault(this, jsonValue);
+  }
+
   private static boolean isValidDefault(Schema schema, JsonNode defaultValue) {
     if (defaultValue == null)
       return false;
@@ -1809,8 +1829,8 @@ public abstract class Schema extends JsonProperties 
implements Serializable {
         if (!isValidDefault(schema.getValueType(), value))
           return false;
       return true;
-    case UNION: // union default: first branch
-      return isValidDefault(schema.getTypes().get(0), defaultValue);
+    case UNION: // union default: any branch
+      return schema.getTypes().stream().anyMatch((Schema s) -> isValidValue(s, 
defaultValue));
     case RECORD:
       if (!defaultValue.isObject())
         return false;
diff --git 
a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java 
b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
index 9271cfa98..347490679 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
@@ -617,11 +617,8 @@ public class ReflectData extends SpecificData {
     AvroDefault defaultAnnotation = field.getAnnotation(AvroDefault.class);
     defaultValue = (defaultAnnotation == null) ? null : 
Schema.parseJsonToObject(defaultAnnotation.value());
 
-    if (defaultValue == null && fieldSchema.getType() == Schema.Type.UNION) {
-      Schema defaultType = fieldSchema.getTypes().get(0);
-      if (defaultType.getType() == Schema.Type.NULL) {
-        defaultValue = JsonProperties.NULL_VALUE;
-      }
+    if (defaultValue == null && fieldSchema.isNullable()) {
+      defaultValue = JsonProperties.NULL_VALUE;
     }
     return defaultValue;
   }
diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java 
b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
index 9900fd635..64748da13 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java
@@ -39,6 +39,11 @@ import java.util.Set;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.IntNode;
+import com.fasterxml.jackson.databind.node.JsonNodeFactory;
+import com.fasterxml.jackson.databind.node.NullNode;
+import com.fasterxml.jackson.databind.node.TextNode;
 
 import org.apache.avro.Schema.Field;
 import org.apache.avro.Schema.Type;
@@ -421,6 +426,53 @@ public class TestSchema {
     assertEquals("Int", nameInt.getQualified("space"));
   }
 
+  @Test
+  void validValue() {
+    // Valid null value
+    final Schema nullSchema = Schema.create(Type.NULL);
+    assertTrue(nullSchema.isValidDefault(JsonNodeFactory.instance.nullNode()));
+
+    // Valid int value
+    final Schema intSchema = Schema.create(Type.INT);
+    
assertTrue(intSchema.isValidDefault(JsonNodeFactory.instance.numberNode(12)));
+
+    // Valid Text value
+    final Schema strSchema = Schema.create(Type.STRING);
+    assertTrue(strSchema.isValidDefault(new TextNode("textNode")));
+
+    // Valid Array value
+    final Schema arraySchema = Schema.createArray(Schema.create(Type.STRING));
+    final ArrayNode arrayValue = JsonNodeFactory.instance.arrayNode();
+    assertTrue(arraySchema.isValidDefault(arrayValue)); // empty array
+
+    arrayValue.add("Hello");
+    arrayValue.add("World");
+    assertTrue(arraySchema.isValidDefault(arrayValue));
+
+    arrayValue.add(5);
+    assertFalse(arraySchema.isValidDefault(arrayValue));
+
+    // Valid Union type
+    final Schema unionSchema = Schema.createUnion(strSchema, intSchema, 
nullSchema);
+    
assertTrue(unionSchema.isValidDefault(JsonNodeFactory.instance.textNode("Hello")));
+    assertTrue(unionSchema.isValidDefault(new IntNode(23)));
+    
assertTrue(unionSchema.isValidDefault(JsonNodeFactory.instance.nullNode()));
+
+    assertFalse(unionSchema.isValidDefault(arrayValue));
+
+    // Array of union
+    final Schema arrayUnion = Schema.createArray(unionSchema);
+    final ArrayNode arrayUnionValue = JsonNodeFactory.instance.arrayNode();
+    arrayUnionValue.add("Hello");
+    arrayUnionValue.add(NullNode.getInstance());
+    assertTrue(arrayUnion.isValidDefault(arrayUnionValue));
+
+    // Union String, bytes
+    final Schema unionStrBytes = Schema.createUnion(strSchema, 
Schema.create(Type.BYTES));
+    
assertTrue(unionStrBytes.isValidDefault(JsonNodeFactory.instance.textNode("Hello")));
+    
assertFalse(unionStrBytes.isValidDefault(JsonNodeFactory.instance.numberNode(123)));
+  }
+
   @Test
   void enumLateDefine() {
     String schemaString = "{\n" + "    \"type\":\"record\",\n" + "    
\"name\": \"Main\",\n" + "    \"fields\":[\n"
diff --git 
a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java 
b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java
index e8fadeea7..5f52a2cf7 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java
@@ -536,6 +536,34 @@ public class TestReflect {
     void error() throws E1;
   }
 
+  private static class NullableDefaultTest {
+    @Nullable
+    @AvroDefault("1")
+    int foo;
+  }
+
+  @Test
+  public void testAvroNullableDefault() {
+    check(NullableDefaultTest.class,
+        "{\"type\":\"record\",\"name\":\"NullableDefaultTest\","
+            + 
"\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":["
+            + 
"{\"name\":\"foo\",\"type\":[\"null\",\"int\"],\"default\":1}]}");
+  }
+
+  private static class UnionDefaultTest {
+    @Union({ Integer.class, String.class })
+    @AvroDefault("1")
+    Object foo;
+  }
+
+  @Test
+  public void testAvroUnionDefault() {
+    check(UnionDefaultTest.class,
+        "{\"type\":\"record\",\"name\":\"UnionDefaultTest\","
+            + 
"\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":["
+            + 
"{\"name\":\"foo\",\"type\":[\"int\",\"string\"],\"default\":1}]}");
+  }
+
   @Test
   void p2() throws Exception {
     Schema e1 = ReflectData.get().getSchema(E1.class);
diff --git a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java 
b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java
index eb2d5c918..d85b28eff 100644
--- a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java
+++ b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java
@@ -302,26 +302,20 @@ public class TestSchema {
   void union(TestInfo testInfo) throws Exception {
     check(new File(DIR, testInfo.getTestMethod().get().getName()), 
"[\"string\", \"long\"]", false);
     checkDefault("[\"double\", \"long\"]", "1.1", 1.1);
+    checkDefault("[\"double\", \"string\"]", "\"TheString\"", new 
Utf8("TheString"));
 
     // test that erroneous default values cause errors
     for (String type : new String[] { "int", "long", "float", "double", 
"string", "bytes", "boolean" }) {
-      checkValidateDefaults("[\"" + type + "\", \"null\"]", "null"); // schema 
parse time
-      boolean error = false;
-      try {
-        checkDefault("[\"" + type + "\", \"null\"]", "null", 0); // read time
-      } catch (AvroTypeException e) {
-        error = true;
-      }
-      assertTrue(error);
-      checkValidateDefaults("[\"null\", \"" + type + "\"]", "0"); // schema 
parse time
-      error = false;
-      try {
-        checkDefault("[\"null\", \"" + type + "\"]", "0", null); // read time
-      } catch (AvroTypeException e) {
-        error = true;
-      }
-      assertTrue(error);
+//      checkValidateDefaults("[\"" + type + "\", \"null\"]", "null"); // 
schema parse time
+      checkDefault("[\"" + type + "\", \"null\"]", "null", null); // read time
     }
+    checkDefault("[\"null\", \"int\"]", "0", 0);
+    checkDefault("[\"null\", \"long\"]", "0", 0l);
+    checkDefault("[\"null\", \"float\"]", "0.0", 0.0f);
+    checkDefault("[\"null\", \"double\"]", "0.0", 0.0d);
+    checkDefault("[\"null\", \"string\"]", "\"Hi\"", new Utf8("Hi"));
+    checkDefault("[\"null\", \"bytes\"]", "\"01\"", 
ByteBuffer.wrap("01".getBytes(StandardCharsets.UTF_8)));
+    checkDefault("[\"null\", \"boolean\"]", "true", true);
 
     // check union json
     String record = "{\"type\":\"record\",\"name\":\"Foo\",\"fields\":[]}";

Reply via email to