This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new fc20f37d fix(avro): correctly set nullability for ListType (#709)
fc20f37d is described below

commit fc20f37d89be6b81ae9dd8567e00ac6943671aec
Author: Willem Jan <[email protected]>
AuthorDate: Fri Mar 13 17:56:12 2026 +0100

    fix(avro): correctly set nullability for ListType (#709)
    
    ### Rationale for this change
    Nullability of a list field itself is lost when converting from avro to
    arrow schema; only the nullability of the list members is applied.
    
    ### What changes are included in this PR?
    Use buildArrowField function for listfield to leverage arrow field
    creation; which applies nullability as expected. Also use
    buildArrowField for float,double,boolean case for consistency.
    
    ### Are these changes tested?
    Added testcase for a nullable list.
    
    ### Are there any user-facing changes?
    Debatable
    
    ---------
    
    Co-authored-by: Willem Jan Noort <[email protected]>
---
 arrow/avro/reader_test.go         |  5 +++++
 arrow/avro/schema.go              | 10 ++++++----
 arrow/avro/schema_test.go         |  5 +++++
 arrow/avro/testdata/alltypes.avsc | 10 ++++++++++
 arrow/avro/testdata/testdata.go   | 17 +++++++++--------
 5 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/arrow/avro/reader_test.go b/arrow/avro/reader_test.go
index 0eb513b3..4aaac675 100644
--- a/arrow/avro/reader_test.go
+++ b/arrow/avro/reader_test.go
@@ -80,6 +80,11 @@ func TestReader(t *testing.T) {
                                        Type:     arrow.BinaryTypes.Binary,
                                        Nullable: true,
                                },
+                               {
+                                       Name:     "nullable_remote_ips",
+                                       Type:     
arrow.ListOfNonNullable(arrow.BinaryTypes.Binary),
+                                       Nullable: true,
+                               },
                                {
                                        Name: "person",
                                        Type: arrow.StructOf(
diff --git a/arrow/avro/schema.go b/arrow/avro/schema.go
index 91b1729b..4d9e7670 100644
--- a/arrow/avro/schema.go
+++ b/arrow/avro/schema.go
@@ -106,7 +106,7 @@ func arrowSchemafromAvro(n *schemaNode) {
                        k := strconv.FormatInt(int64(index), 10)
                        symbols[k] = symbol
                }
-               var dt = arrow.DictionaryType{IndexType: 
arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered: 
false}
+               dt := arrow.DictionaryType{IndexType: 
arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered: 
false}
                sl := int64(len(symbols))
                switch {
                case sl <= math.MaxUint8:
@@ -125,12 +125,14 @@ func arrowSchemafromAvro(n *schemaNode) {
                } else {
                        arrowSchemafromAvro(c)
                }
+               var typ *arrow.ListType
                switch c.arrowField.Nullable {
                case true:
-                       n.arrowField = arrow.Field{Name: n.name, Type: 
arrow.ListOfField(c.arrowField), Metadata: c.arrowField.Metadata}
+                       typ = arrow.ListOfField(c.arrowField)
                case false:
-                       n.arrowField = arrow.Field{Name: n.name, Type: 
arrow.ListOfNonNullable(c.arrowField.Type), Metadata: c.arrowField.Metadata}
+                       typ = arrow.ListOfNonNullable(c.arrowField.Type)
                }
+               n.arrowField = buildArrowField(n, typ, c.arrowField.Metadata)
        case "map":
                
n.schemaCache.Add(n.schema.(*avro.MapSchema).Values().(avro.NamedSchema).Name(),
 n.schema.(*avro.MapSchema).Values())
                c := n.newChild(n.name, n.schema.(*avro.MapSchema).Values())
@@ -160,7 +162,7 @@ func arrowSchemafromAvro(n *schemaNode) {
                        n.arrowField = buildArrowField(n, 
avroPrimitiveToArrowType(string(st)), arrow.Metadata{})
                }
        case "float", "double", "boolean":
-               n.arrowField = arrow.Field{Name: n.name, Type: 
avroPrimitiveToArrowType(string(st)), Nullable: n.nullable}
+               n.arrowField = buildArrowField(n, 
avroPrimitiveToArrowType(string(st)), arrow.Metadata{})
        case "<ref>":
                refSchema := 
n.schemaCache.Get(string(n.schema.(*avro.RefSchema).Schema().Name()))
                if refSchema == nil {
diff --git a/arrow/avro/schema_test.go b/arrow/avro/schema_test.go
index 921e5e81..33b6d2a0 100644
--- a/arrow/avro/schema_test.go
+++ b/arrow/avro/schema_test.go
@@ -79,6 +79,11 @@ func TestSchemaStringEqual(t *testing.T) {
                                        Type:     arrow.BinaryTypes.Binary,
                                        Nullable: true,
                                },
+                               {
+                                       Name:     "nullable_remote_ips",
+                                       Type:     
arrow.ListOfNonNullable(arrow.BinaryTypes.Binary),
+                                       Nullable: true,
+                               },
                                {
                                        Name: "person",
                                        Type: arrow.StructOf(
diff --git a/arrow/avro/testdata/alltypes.avsc 
b/arrow/avro/testdata/alltypes.avsc
index a4e3037b..29a72e56 100644
--- a/arrow/avro/testdata/alltypes.avsc
+++ b/arrow/avro/testdata/alltypes.avsc
@@ -85,6 +85,16 @@
         "bytes"
       ]
     },
+    {
+      "name": "nullable_remote_ips",
+      "type": [
+        "null",
+        {
+          "type": "array",
+          "items": "bytes"
+        }
+      ]
+    },
     {
       "name": "person",
       "type": {
diff --git a/arrow/avro/testdata/testdata.go b/arrow/avro/testdata/testdata.go
index 9770cfec..235231da 100644
--- a/arrow/avro/testdata/testdata.go
+++ b/arrow/avro/testdata/testdata.go
@@ -126,6 +126,7 @@ type Example struct {
        Fraction          *float64          `avro:"fraction" json:"fraction"`
        IsEmergency       bool              `avro:"is_emergency" 
json:"is_emergency"`
        RemoteIP          *ByteArray        `avro:"remote_ip" json:"remote_ip"`
+       NullableRemoteIPS *[]ByteArray      `avro:"nullable_remote_ips" 
json:"nullable_remote_ips"`
        Person            PersonData        `avro:"person" json:"person"`
        DecimalField      DecimalType       `avro:"decimalField" 
json:"decimalField"`
        Decimal256Field   DecimalType       `avro:"decimal256Field" 
json:"decimal256Field"`
@@ -215,12 +216,12 @@ func sampleData() Example {
                        InheritNamespace: "d",
                        Md5:              MD5{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 
11, 12, 13, 14, 15},
                },
-               ID:          42,
-               BigID:       42000000000,
-               Temperature: func() *float32 { v := float32(36.6); return &v 
}(),
-               Fraction:    func() *float64 { v := float64(0.75); return &v 
}(),
-               IsEmergency: true,
-               RemoteIP:    func() *ByteArray { v := ByteArray{192, 168, 1, 
1}; return &v }(),
+               ID:                42,
+               BigID:             42000000000,
+               Temperature:       func() *float32 { v := float32(36.6); return 
&v }(),
+               Fraction:          func() *float64 { v := float64(0.75); return 
&v }(),
+               IsEmergency:       true,
+               RemoteIP:          func() *ByteArray { v := ByteArray{192, 168, 
1, 1}; return &v }(),
                Person: PersonData{
                        Lastname: "Doe",
                        Address: AddressUSRecord{
@@ -248,7 +249,7 @@ func sampleData() Example {
 
 func writeOCFSampleData(td string, data Example) string {
        path := filepath.Join(td, sampleAvroFileName)
-       ocfFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 
0644)
+       ocfFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 
0o644)
        if err != nil {
                log.Fatal(err)
        }
@@ -272,7 +273,7 @@ func writeOCFSampleData(td string, data Example) string {
 
 func writeJSONSampleData(td string, data Example) string {
        path := filepath.Join(td, sampleJSONFileName)
-       jsonFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 
0644)
+       jsonFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 
0o644)
        if err != nil {
                log.Fatal(err)
        }

Reply via email to