This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new e99815a280 fix(ipc): reader misalignment when skipping ListView / 
LargeListView columns (#9806)
e99815a280 is described below

commit e99815a280f575f2fd9f280137fa6c2efc255418
Author: pchintar <[email protected]>
AuthorDate: Fri Apr 24 18:49:20 2026 -0400

    fix(ipc): reader misalignment when skipping ListView / LargeListView 
columns (#9806)
    
    # Which issue does this PR close?
    
    - Closes #9805 .
    
    # Rationale for this change
    
    When reading IPC data with column selection enabled, skipping a
    `ListView` or `LargeListView` column can lead to buffer misalignment and
    incorrect decoding of subsequent columns.
    
    In `arrow-ipc/src/reader.rs`, `skip_field` currently does not handle
    these types explicitly and falls back to the default case:
    
    ```rust
    _ => {
        self.skip_buffer();
        self.skip_buffer();
    }
    ```
    
    However, `create_array` for `ListView` / `LargeListView` reads three
    buffers:
    
    ```rust
    self.next_buffer()?; // null
    self.next_buffer()?; // offsets
    self.next_buffer()?; // sizes
    ```
    
    This mismatch means that when a `ListView` column is skipped, fewer
    buffers are consumed than expected. As a result, the next column reads
    from incorrect buffer positions, which can lead to runtime errors or
    incorrect values/results.
    
    This change aligns the skip behavior with the read path to ensure
    buffers remain correctly aligned when columns are skipped.
    
    # What changes are included in this PR?
    
    * Updated `skip_field` in:
    
      * `arrow-ipc/src/reader.rs`
    * Added explicit handling for:
    
      * `ListView`
      * `LargeListView`
    * Ensures the number of skipped buffers matches how these types are
    encoded and read.
    
    # Are these changes tested?
    
    Yes.
    
    * Added a regression test:
    
      * `test_projection_skip_list_view` in `arrow-ipc/src/reader.rs`
    * The test:
    
    * creates a batch with a `ListView` column followed by a primitive
    column
      * reads only the second column
      * verifies the result matches expected values
    
    Before the fix, the current code failed this test with a buffer size
    error:
    ```
    InvalidArgumentError("Need at least 16 bytes in buffers[0] in array of type 
Int32, but got 1")
    ```
    After the changes made in `skip_field`, it passes.
    
    All existing `arrow-ipc` tests also pass.
    
    # Are there any user-facing changes?
    
    No.
---
 arrow-ipc/src/reader.rs | 64 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs
index 411c1f14c2..5312fad3ae 100644
--- a/arrow-ipc/src/reader.rs
+++ b/arrow-ipc/src/reader.rs
@@ -669,6 +669,12 @@ impl<'a> RecordBatchDecoder<'a> {
                 self.skip_buffer();
                 self.skip_field(list_field, variadic_count)?;
             }
+            ListView(list_field) | LargeListView(list_field) => {
+                self.skip_buffer(); // Null buffer
+                self.skip_buffer(); // Offsets
+                self.skip_buffer(); // Sizes
+                self.skip_field(list_field, variadic_count)?;
+            }
             FixedSizeList(list_field, _) => {
                 self.skip_buffer();
                 self.skip_field(list_field, variadic_count)?;
@@ -3453,4 +3459,62 @@ mod tests {
         // The result must be a dictionary-typed array.
         assert!(matches!(col.data_type(), DataType::Dictionary(_, _)));
     }
+
+    // Tests projected reads where a ListView column is skipped before another 
column.
+    // This catches cases where skipping the ListView consumes the wrong 
number of buffers.
+    #[test]
+    fn test_projection_skip_list_view() {
+        use crate::reader::FileReader;
+        use crate::writer::FileWriter;
+        use arrow_array::{
+            GenericListViewArray, Int32Array, RecordBatch,
+            builder::{GenericListViewBuilder, UInt32Builder},
+        };
+        use arrow_schema::{DataType, Field, Schema};
+        use std::sync::Arc;
+
+        // Build a small ListView column with a mix of valid and null entries
+        let mut builder = GenericListViewBuilder::<i32, 
_>::new(UInt32Builder::new());
+
+        builder.values().append_value(1);
+        builder.values().append_value(2);
+        builder.append(true);
+
+        builder.append(false);
+
+        builder.values().append_value(3);
+        builder.values().append_value(4);
+        builder.append(true);
+
+        let list_view: GenericListViewArray<i32> = builder.finish();
+
+        // Second column with simple values
+        let values = Int32Array::from(vec![10, 20, 30]);
+
+        // Schema: first column is ListView, second is Int32
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", list_view.data_type().clone(), true),
+            Field::new("b", DataType::Int32, false),
+        ]));
+        // Create a batch with both columns
+        let batch =
+            RecordBatch::try_new(schema, vec![Arc::new(list_view), 
Arc::new(values.clone())])
+                .unwrap();
+
+        // Write the batch to IPC
+        let mut buf = Vec::new();
+        {
+            let mut writer = FileWriter::try_new(&mut buf, 
&batch.schema()).unwrap();
+            writer.write(&batch).unwrap();
+            writer.finish().unwrap();
+        }
+
+        // Skip ListView column and Project only column "b"
+        let mut reader = FileReader::try_new(std::io::Cursor::new(buf), 
Some(vec![1])).unwrap();
+        let read_batch = reader.next().unwrap().unwrap();
+
+        // Verify that the projected column is read correctly
+        assert_eq!(read_batch.num_columns(), 1);
+        assert_eq!(read_batch.column(0).as_ref(), &values);
+    }
 }

Reply via email to