This is an automated email from the ASF dual-hosted git repository.

jeffreyvo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new ddde66de91 Add a test for reading nested REE data in json (#9634)
ddde66de91 is described below

commit ddde66de9139af42448d775cc2b49c77570d2386
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Apr 16 01:54:06 2026 -0400

    Add a test for reading nested REE data in json (#9634)
    
    # Which issue does this PR close?
    
    - related to https://github.com/apache/arrow-rs/pull/9497
    
    # Rationale for this change
    
    I (well Codex) found a regression in the changes in
    https://github.com/apache/arrow-rs/pull/9497 but they are not covered by
    a test. So we should add a test
    
    # What changes are included in this PR?
    
    Add a test for reading nested REE data from json (that currently passes
    on main but fails on https://github.com/apache/arrow-rs/pull/9497)
    
    # Are these changes tested?
    
    Only tests
    
    # Are there any user-facing changes?
    
    No this is just a test
    
    Co-authored-by: Jeffrey Vo <[email protected]>
---
 arrow-json/src/reader/mod.rs | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index 62c13c70ed..32ac243283 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -3457,4 +3457,43 @@ mod tests {
         assert_eq!(run_array.len(), 3);
         assert_eq!(run_array.run_ends().values(), &[2i16, 3]);
     }
+
+    #[test]
+    fn test_read_nested_run_end_encoded() {
+        let buf = r#"
+        {"a": "x"}
+        {"a": "x"}
+        {"a": "y"}
+        "#;
+
+        // The outer REE compresses whole rows, while the inner REE compresses 
the
+        // repeated string values produced by decoding those rows.
+        let inner_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int64, false)),
+            Arc::new(Field::new("values", DataType::Utf8, true)),
+        );
+        let outer_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int64, false)),
+            Arc::new(Field::new("values", inner_type, true)),
+        );
+        let schema = Arc::new(Schema::new(vec![Field::new("a", outer_type, 
true)]));
+        let batches = do_read(buf, 1024, false, false, schema);
+        assert_eq!(batches.len(), 1);
+
+        let col = batches[0].column(0);
+        let outer = col.as_run::<arrow_array::types::Int64Type>();
+        // Three logical rows compress to two outer runs: ["x", "x"] and ["y"].
+        assert_eq!(outer.len(), 3);
+        assert_eq!(outer.run_ends().values(), &[2, 3]);
+
+        let nested = outer.values().as_run::<arrow_array::types::Int64Type>();
+        // The physical values of the outer REE are themselves a two-element 
REE.
+        assert_eq!(nested.len(), 2);
+        assert_eq!(nested.run_ends().values(), &[1, 2]);
+
+        let nested_values = nested.values().as_string::<i32>();
+        assert_eq!(nested_values.len(), 2);
+        assert_eq!(nested_values.value(0), "x");
+        assert_eq!(nested_values.value(1), "y");
+    }
 }

Reply via email to