This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 04f207c685 [Json] Remove arrow-data dependency from arrow-json (#9812)
04f207c685 is described below

commit 04f207c6851863d42e88cee95d72e80a4afc02ea
Author: Liam Bao <[email protected]>
AuthorDate: Sat Apr 25 11:22:18 2026 -0400

    [Json] Remove arrow-data dependency from arrow-json (#9812)
    
    # Which issue does this PR close?
    
    <!--
    We generally require a GitHub issue to be filed for all bug fixes and
    enhancements and this helps us generate change logs for our releases.
    You can link an issue to this PR using the GitHub syntax.
    -->
    
    - Part of #9298.
    
    # Rationale for this change
    
    <!--
    Why are you proposing this change? If this is already explained clearly
    in the issue then this section is not needed.
    Explaining clearly why changes are proposed helps reviewers understand
    your changes and offer better suggestions for fixes.
    -->
    
    # What changes are included in this PR?
    
    <!--
    There is no need to duplicate the description in the issue here but it
    is sometimes worth providing a summary of the individual changes in this
    PR.
    -->
    
    Remove `arrow-data` dependancy from `arrow-json`
    
    # Are these changes tested?
    
    <!--
    We typically require tests for all PRs in order to:
    1. Prevent the code from being accidentally broken by subsequent changes
    2. Serve as another way to document the expected behavior of the code
    
    If tests are not included in your PR, please explain why (for example,
    are they covered by existing tests)?
    -->
    Yes
    
    # Are there any user-facing changes?
    
    <!--
    If there are user-facing changes then we may require documentation to be
    updated before approving the PR.
    
    If there are any breaking changes to public APIs, please call them out.
    -->
---
 arrow-json/Cargo.toml        |   1 -
 arrow-json/src/reader/mod.rs |  53 ++++++++--------
 arrow-json/src/writer/mod.rs | 142 +++++++++++++++++--------------------------
 3 files changed, 82 insertions(+), 114 deletions(-)

diff --git a/arrow-json/Cargo.toml b/arrow-json/Cargo.toml
index 2ab1af1fd0..d65f587e0c 100644
--- a/arrow-json/Cargo.toml
+++ b/arrow-json/Cargo.toml
@@ -55,7 +55,6 @@ ryu = "1.0"
 itoa = "1.0"
 
 [dev-dependencies]
-arrow-data = { workspace = true }
 flate2 = { version = "1", default-features = false, features = 
["rust_backend"] }
 serde = { version = "1.0", default-features = false, features = ["derive"] }
 futures = "0.3"
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index 04fe207c27..3cd45bb824 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -859,11 +859,10 @@ mod tests {
     use arrow_array::cast::AsArray;
     use arrow_array::{
         Array, BooleanArray, Float64Array, GenericListViewArray, Int32Array, 
ListArray, MapArray,
-        NullArray, OffsetSizeTrait, StringArray, StringViewArray, StructArray, 
make_array,
+        NullArray, OffsetSizeTrait, StringArray, StringViewArray, StructArray,
     };
-    use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer};
+    use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer, 
ScalarBuffer};
     use arrow_cast::display::{ArrayFormatter, FormatOptions};
-    use arrow_data::ArrayDataBuilder;
     use arrow_schema::{Field, Fields};
     use serde_json::json;
     use std::fs::File;
@@ -2177,12 +2176,13 @@ mod tests {
             None,
             None,
         ]);
-        let c = ArrayDataBuilder::new(c_field.data_type().clone())
-            .len(7)
-            .add_child_data(d.to_data())
-            .null_bit_buffer(Some(Buffer::from([0b00111011])))
-            .build()
-            .unwrap();
+        let c = StructArray::new(
+            vec![Field::new("d", DataType::Utf8, true)].into(),
+            vec![Arc::new(d.clone()) as ArrayRef],
+            Some(NullBuffer::from(vec![
+                true, true, false, true, true, true, false,
+            ])),
+        );
         let b = BooleanArray::from(vec![
             Some(true),
             Some(false),
@@ -2192,21 +2192,22 @@ mod tests {
             Some(true),
             None,
         ]);
-        let a = ArrayDataBuilder::new(a_struct_field.data_type().clone())
-            .len(7)
-            .add_child_data(b.to_data())
-            .add_child_data(c.clone())
-            .null_bit_buffer(Some(Buffer::from([0b00111111])))
-            .build()
-            .unwrap();
-        let a_list = ArrayDataBuilder::new(a_field.data_type().clone())
-            .len(6)
-            .add_buffer(Buffer::from_slice_ref([0i32, 2, 3, 6, 6, 6, 7]))
-            .add_child_data(a)
-            .null_bit_buffer(Some(Buffer::from([0b00110111])))
-            .build()
-            .unwrap();
-        let expected = make_array(a_list);
+        let a = StructArray::new(
+            vec![Field::new("b", DataType::Boolean, true), 
c_field.clone()].into(),
+            vec![
+                Arc::new(b.clone()) as ArrayRef,
+                Arc::new(c.clone()) as ArrayRef,
+            ],
+            Some(NullBuffer::from(vec![
+                true, true, true, true, true, true, false,
+            ])),
+        );
+        let a_list = ListArray::new(
+            Arc::new(a_struct_field.clone()),
+            OffsetBuffer::new(ScalarBuffer::from(vec![0i32, 2, 3, 6, 6, 6, 
7])),
+            Arc::new(a),
+            Some(NullBuffer::from(vec![true, true, true, false, true, true])),
+        );
 
         // compare `a` with result from json reader
         let batch = reader.next().unwrap().unwrap();
@@ -2214,7 +2215,7 @@ mod tests {
         assert_eq!(read.len(), 6);
         // compare the arrays the long way around, to better detect differences
         let read: &ListArray = read.as_list::<i32>();
-        let expected = expected.as_list::<i32>();
+        let expected = &a_list;
         assert_eq!(read.value_offsets(), &[0, 2, 3, 6, 6, 6, 7]);
         // compare list null buffers
         assert_eq!(read.nulls(), expected.nulls());
@@ -2232,7 +2233,7 @@ mod tests {
         let read_b = struct_array.column(0);
         assert_eq!(read_b.as_ref(), &b);
         let read_c = struct_array.column(1);
-        assert_eq!(read_c.to_data(), c);
+        assert_eq!(read_c.as_struct(), &c);
         let read_c = read_c.as_struct();
         let read_d = read_c.column(0);
         assert_eq!(read_d.as_ref(), &d);
diff --git a/arrow-json/src/writer/mod.rs b/arrow-json/src/writer/mod.rs
index 04cc8c9e2a..559916d06a 100644
--- a/arrow-json/src/writer/mod.rs
+++ b/arrow-json/src/writer/mod.rs
@@ -491,8 +491,7 @@ mod tests {
     use super::{Encoder, WriterBuilder};
     use arrow_array::builder::*;
     use arrow_array::types::*;
-    use arrow_buffer::{Buffer, NullBuffer, OffsetBuffer, ScalarBuffer, 
ToByteSlice, i256};
-    use arrow_data::ArrayData;
+    use arrow_buffer::{Buffer, NullBuffer, OffsetBuffer, ScalarBuffer, i256};
 
     use crate::reader::*;
 
@@ -1070,25 +1069,19 @@ mod tests {
 
     #[test]
     fn write_struct_with_list_field() {
-        let field_c1 = Field::new(
-            "c1",
-            DataType::List(Arc::new(Field::new("c_list", DataType::Utf8, 
false))),
-            false,
-        );
+        let field_c_list = Arc::new(Field::new("c_list", DataType::Utf8, 
false));
+        let field_c1 = Field::new("c1", DataType::List(field_c_list.clone()), 
false);
         let field_c2 = Field::new("c2", DataType::Int32, false);
         let schema = Schema::new(vec![field_c1.clone(), field_c2]);
 
         let a_values = StringArray::from(vec!["a", "a1", "b", "c", "d", "e"]);
         // list column rows: ["a", "a1"], ["b"], ["c"], ["d"], ["e"]
-        let a_value_offsets = Buffer::from([0, 2, 3, 4, 5, 6].to_byte_slice());
-        let a_list_data = ArrayData::builder(field_c1.data_type().clone())
-            .len(5)
-            .add_buffer(a_value_offsets)
-            .add_child_data(a_values.into_data())
-            .null_bit_buffer(Some(Buffer::from([0b00011111])))
-            .build()
-            .unwrap();
-        let a = ListArray::from(a_list_data);
+        let a = ListArray::new(
+            field_c_list,
+            OffsetBuffer::new(ScalarBuffer::from(vec![0i32, 2, 3, 4, 5, 6])),
+            Arc::new(a_values),
+            None,
+        );
 
         let b = Int32Array::from(vec![1, 2, 3, 4, 5]);
 
@@ -1113,41 +1106,28 @@ mod tests {
 
     #[test]
     fn write_nested_list() {
-        let list_inner_type = Field::new(
-            "a",
-            DataType::List(Arc::new(Field::new("b", DataType::Int32, false))),
-            false,
-        );
-        let field_c1 = Field::new(
-            "c1",
-            DataType::List(Arc::new(list_inner_type.clone())),
-            false,
-        );
+        let field_b = Arc::new(Field::new("b", DataType::Int32, false));
+        let field_a = Arc::new(Field::new("a", 
DataType::List(field_b.clone()), false));
+        let field_c1 = Field::new("c1", DataType::List(field_a.clone()), 
false);
         let field_c2 = Field::new("c2", DataType::Utf8, true);
         let schema = Schema::new(vec![field_c1.clone(), field_c2]);
 
         // list column rows: [[1, 2], [3]], [], [[4, 5, 6]]
         let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
 
-        let a_value_offsets = Buffer::from([0, 2, 3, 6].to_byte_slice());
-        // Construct a list array from the above two
-        let a_list_data = 
ArrayData::builder(list_inner_type.data_type().clone())
-            .len(3)
-            .add_buffer(a_value_offsets)
-            .null_bit_buffer(Some(Buffer::from([0b00000111])))
-            .add_child_data(a_values.into_data())
-            .build()
-            .unwrap();
-
-        let c1_value_offsets = Buffer::from([0, 2, 2, 3].to_byte_slice());
-        let c1_list_data = ArrayData::builder(field_c1.data_type().clone())
-            .len(3)
-            .add_buffer(c1_value_offsets)
-            .add_child_data(a_list_data)
-            .build()
-            .unwrap();
+        let a_list = ListArray::new(
+            field_b,
+            OffsetBuffer::new(ScalarBuffer::from(vec![0i32, 2, 3, 6])),
+            Arc::new(a_values),
+            None,
+        );
 
-        let c1 = ListArray::from(c1_list_data);
+        let c1 = ListArray::new(
+            field_a,
+            OffsetBuffer::new(ScalarBuffer::from(vec![0i32, 2, 2, 3])),
+            Arc::new(a_list),
+            None,
+        );
         let c2 = StringArray::from(vec![Some("foo"), Some("bar"), None]);
 
         let batch =
@@ -1211,15 +1191,16 @@ mod tests {
         // [{"c11": 1, "c12": {"c121": "e"}}, {"c12": {"c121": "f"}}],
         // null,
         // [{"c11": 5, "c12": {"c121": "g"}}]
-        let c1_value_offsets = Buffer::from([0, 2, 2, 3].to_byte_slice());
-        let c1_list_data = ArrayData::builder(field_c1.data_type().clone())
-            .len(3)
-            .add_buffer(c1_value_offsets)
-            .add_child_data(struct_values.into_data())
-            .null_bit_buffer(Some(Buffer::from([0b00000101])))
-            .build()
-            .unwrap();
-        let c1 = ListArray::from(c1_list_data);
+        let c1_inner = match field_c1.data_type() {
+            DataType::List(f) => f.clone(),
+            _ => unreachable!(),
+        };
+        let c1 = ListArray::new(
+            c1_inner,
+            OffsetBuffer::new(ScalarBuffer::from(vec![0i32, 2, 2, 3])),
+            Arc::new(struct_values),
+            Some(NullBuffer::from(vec![true, false, true])),
+        );
 
         let c2 = Int32Array::from(vec![1, 2, 3]);
 
@@ -1447,30 +1428,22 @@ mod tests {
             (values_field, Arc::new(values_array) as ArrayRef),
         ]);
 
-        let map_data_type = DataType::Map(
-            Arc::new(Field::new(
-                "entries",
-                entry_struct.data_type().clone(),
-                false,
-            )),
+        let entries_field = Arc::new(Field::new(
+            "entries",
+            entry_struct.data_type().clone(),
             false,
-        );
+        ));
 
         // [{"foo": 10}, null, {}, {"bar": 20, "baz": 30, "qux": 40}, {"quux": 
50}, {}]
-        let entry_offsets = Buffer::from([0, 1, 1, 1, 4, 5, 
5].to_byte_slice());
-        let valid_buffer = Buffer::from([0b00111101]);
-
-        let map_data = ArrayData::builder(map_data_type.clone())
-            .len(6)
-            .null_bit_buffer(Some(valid_buffer))
-            .add_buffer(entry_offsets)
-            .add_child_data(entry_struct.into_data())
-            .build()
-            .unwrap();
-
-        let map = MapArray::from(map_data);
+        let map = MapArray::new(
+            entries_field.clone(),
+            OffsetBuffer::new(ScalarBuffer::from(vec![0i32, 1, 1, 1, 4, 5, 
5])),
+            entry_struct,
+            Some(NullBuffer::from(vec![true, false, true, true, true, true])),
+            false,
+        );
 
-        let map_field = Field::new("map", map_data_type, true);
+        let map_field = Field::new("map", DataType::Map(entries_field, false), 
true);
         let schema = Arc::new(Schema::new(vec![map_field]));
 
         let batch = RecordBatch::try_new(schema, vec![Arc::new(map)]).unwrap();
@@ -1648,22 +1621,17 @@ mod tests {
                 ),
             ]);
 
-            let field = Field::new_list(
-                "list",
-                Field::new("struct", struct_array.data_type().clone(), true),
-                true,
-            );
+            let values_field =
+                Arc::new(Field::new("struct", 
struct_array.data_type().clone(), true));
+            let field = Field::new_list("list", values_field.as_ref().clone(), 
true);
 
             // 
[{"list":[{"int32":1,"utf8":"a"},{"int32":null,"utf8":"b"}]},{"list":null},{"list":[{int32":5,"utf8":null}]},{"list":null}]
-            let entry_offsets = Buffer::from([0, 2, 2, 3, 3].to_byte_slice());
-            let data = ArrayData::builder(field.data_type().clone())
-                .len(4)
-                .add_buffer(entry_offsets)
-                .add_child_data(struct_array.into_data())
-                .null_bit_buffer(Some([0b00000101].into()))
-                .build()
-                .unwrap();
-            let array = Arc::new(ListArray::from(data));
+            let array = Arc::new(ListArray::new(
+                values_field,
+                OffsetBuffer::new(ScalarBuffer::from(vec![0i32, 2, 2, 3, 3])),
+                Arc::new(struct_array),
+                Some(NullBuffer::from(vec![true, false, true, false])),
+            ));
             (array, field)
         }
 

Reply via email to