(arrow-rs) branch main updated: REE row conversion speed up (#9845)

alamb Wed, 29 Apr 2026 13:39:06 -0700

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git



The following commit(s) were added to refs/heads/main by this push:
     new 91102e7d2a REE row conversion speed up (#9845)
91102e7d2a is described below

commit 91102e7d2a136146f73a0a3ce9cb2f8eb9d91093
Author: RIchard Baah <[email protected]>
AuthorDate: Wed Apr 29 16:38:53 2026 -0400

    REE row conversion speed up (#9845)
    
    # Which issue does this PR close?
    this PR closes  #7693
    <!--
    We generally require a GitHub issue to be filed for all bug fixes and
    enhancements and this helps us generate change logs for our releases.
    You can link an issue to this PR using the GitHub syntax.
    -->
    
    # Rationale for this change
    Previously in https://github.com/apache/arrow-rs/pull/7649 within
    **run::encode()**, **encode_one()** was called repeatedly even in cases
    where the **row.row()** bytes didn't change in between calls.
    <!--
    Why are you proposing this change? If this is already explained clearly
    in the issue then this section is not needed.
    Explaining clearly why changes are proposed helps reviewers understand
    your changes and offer better suggestions for fixes.
    -->
    
    # What changes are included in this PR?
    I added benchmarks to show the improvement between the two approaches.
    In my implementation, **encode_one()** is called once per run, if the
    run length is greater than one, the encoded bytes are copied into the
    remaining output slots and the offsets are updated in a tight loop. This
    removes the nested while loop and its per-iteration overhead, leaving
    only a memcpy which is unavoidable.
    <!--
    There is no need to duplicate the description in the issue here but it
    is sometimes worth providing a summary of the individual changes in this
    PR.
    -->
    
    # Are these changes tested?
    yes, thankfully @brancz added extensive test in his PR, i also wrote a
    test do debug some things but it mostly covered behavior that his test
    caught so I didn't include it in this PR
    <!--
    We typically require tests for all PRs in order to:
    1. Prevent the code from being accidentally broken by subsequent changes
    2. Serve as another way to document the expected behavior of the code
    
    If tests are not included in your PR, please explain why (for example,
    are they covered by existing tests)?
    -->
    
    # Are there any user-facing changes?
    no
    <!--
    If there are user-facing changes then we may require documentation to be
    updated before approving the PR.
    
    If there are any breaking changes to public APIs, please call them out.
    -->
---
 arrow-row/src/run.rs        | 32 +++++++++++++-------------------
 arrow/benches/row_format.rs | 21 ++++++++++++++++++---
 2 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/arrow-row/src/run.rs b/arrow-row/src/run.rs
index f775abb635..3cb158924e 100644
--- a/arrow-row/src/run.rs
+++ b/arrow-row/src/run.rs
@@ -56,32 +56,26 @@ pub fn encode<R: RunEndIndexType>(
     array: &RunArray<R>,
 ) {
     let run_ends = array.run_ends().sliced_values();
-
     let mut logical_idx = 0;
     let mut offset_idx = 1; // Skip first offset
 
     // Iterate over each run
     for (physical_idx, run_end) in run_ends.enumerate() {
         let run_end = run_end.as_usize();
-
-        // Process all elements in this run
-        while logical_idx < run_end && offset_idx < offsets.len() {
-            let offset = &mut offsets[offset_idx];
-            let out = &mut data[*offset..];
-
-            // Use variable-length encoding to make the data self-describing
-            let row = rows.row(physical_idx);
-            let bytes_written = variable::encode_one(out, Some(row.data), 
opts);
-            *offset += bytes_written;
-
-            logical_idx += 1;
-            offset_idx += 1;
-        }
-
-        // Break if we've processed all offsets
-        if offset_idx >= offsets.len() {
-            break;
+        let iteration_count = run_end - logical_idx;
+
+        let first_offset = offsets[offset_idx];
+        let out = &mut data[first_offset..];
+        let bytes_written = variable::encode_one(out, 
Some(rows.row(physical_idx).data), opts);
+        offsets[offset_idx] += bytes_written;
+        // now if there are multiple logical positions in this run, we can 
just copy the same encoded data to the next offsets without re-encoding
+        for i in 1..iteration_count {
+            let dst = offsets[offset_idx + i];
+            data.copy_within(first_offset..first_offset + bytes_written, dst);
+            offsets[offset_idx + i] += bytes_written;
         }
+        logical_idx = run_end;
+        offset_idx += iteration_count;
     }
 }
 
diff --git a/arrow/benches/row_format.rs b/arrow/benches/row_format.rs
index 1c120bb2f2..f33d8d16b8 100644
--- a/arrow/benches/row_format.rs
+++ b/arrow/benches/row_format.rs
@@ -25,10 +25,11 @@ use arrow::row::{RowConverter, SortField};
 use arrow::util::bench_util::{
     create_boolean_array, create_boolean_array_with_seed, 
create_dict_from_values,
     create_f64_array_with_seed, create_primitive_array, 
create_primitive_array_with_seed,
-    create_string_array_with_len, 
create_string_array_with_len_range_and_prefix_and_seed,
-    create_string_dict_array, create_string_view_array_with_len,
-    create_string_view_array_with_max_len,
+    create_primitive_run_array, create_string_array_with_len,
+    create_string_array_with_len_range_and_prefix_and_seed, 
create_string_dict_array,
+    create_string_view_array_with_len, create_string_view_array_with_max_len,
 };
+
 use arrow::util::data_gen::create_random_array;
 use arrow_array::Array;
 use arrow_array::types::{Int8Type, Int32Type};
@@ -282,6 +283,20 @@ fn row_bench(c: &mut Criterion) {
         "4096 4096 string_dictionary(20, 0.5), string_dictionary(30, 0), 
string_dictionary(100, 0), i64(0)",
         cols,
     );
+    let cols = vec![Arc::new(create_primitive_run_array::<Int32Type, 
Int64Type>(
+        4096, 1024,
+    )) as ArrayRef];
+    do_bench(c, "4096 run_primitive(1024 physical)", cols);
+
+    let cols = vec![Arc::new(create_primitive_run_array::<Int32Type, 
Int64Type>(
+        4096, 512,
+    )) as ArrayRef];
+    do_bench(c, "4096 run_primitive(512 physical)", cols);
+
+    let cols = vec![Arc::new(create_primitive_run_array::<Int32Type, 
Int64Type>(
+        4096, 256,
+    )) as ArrayRef];
+    do_bench(c, "4096 run_primitive(256 physical)", cols);
 
     // List

(arrow-rs) branch main updated: REE row conversion speed up (#9845)

Reply via email to