Jefffrey commented on code in PR #8716:
URL: https://github.com/apache/arrow-rs/pull/8716#discussion_r2772009422


##########
arrow-cast/src/cast/run_array.rs:
##########
@@ -163,3 +170,486 @@ pub(crate) fn cast_to_run_end_encoded<K: RunEndIndexType>(
     let run_array = RunArray::<K>::try_new(&run_ends_array, 
values_array.as_ref())?;
     Ok(Arc::new(run_array))
 }
+
+fn compute_run_boundaries(array: &ArrayRef) -> (Vec<usize>, Vec<usize>) {
+    if array.is_empty() {
+        return (Vec::new(), Vec::new());
+    }
+
+    use arrow_schema::{DataType::*, IntervalUnit, TimeUnit};
+
+    match array.data_type() {
+        Null => (vec![array.len()], vec![0]),
+        Boolean => runs_for_boolean(array.as_boolean()),
+        Int8 => runs_for_primitive(array.as_primitive::<Int8Type>()),
+        Int16 => runs_for_primitive(array.as_primitive::<Int16Type>()),
+        Int32 => runs_for_primitive(array.as_primitive::<Int32Type>()),
+        Int64 => runs_for_primitive(array.as_primitive::<Int64Type>()),
+        UInt8 => runs_for_primitive(array.as_primitive::<UInt8Type>()),
+        UInt16 => runs_for_primitive(array.as_primitive::<UInt16Type>()),
+        UInt32 => runs_for_primitive(array.as_primitive::<UInt32Type>()),
+        UInt64 => runs_for_primitive(array.as_primitive::<UInt64Type>()),
+        Float16 => runs_for_primitive(array.as_primitive::<Float16Type>()),
+        Float32 => runs_for_primitive(array.as_primitive::<Float32Type>()),
+        Float64 => runs_for_primitive(array.as_primitive::<Float64Type>()),
+        Date32 => runs_for_primitive(array.as_primitive::<Date32Type>()),
+        Date64 => runs_for_primitive(array.as_primitive::<Date64Type>()),
+        Time32(TimeUnit::Second) => 
runs_for_primitive(array.as_primitive::<Time32SecondType>()),
+        Time32(TimeUnit::Millisecond) => {
+            runs_for_primitive(array.as_primitive::<Time32MillisecondType>())
+        }
+        Time64(TimeUnit::Microsecond) => {
+            runs_for_primitive(array.as_primitive::<Time64MicrosecondType>())
+        }
+        Time64(TimeUnit::Nanosecond) => {
+            runs_for_primitive(array.as_primitive::<Time64NanosecondType>())
+        }
+        Duration(TimeUnit::Second) => {
+            runs_for_primitive(array.as_primitive::<DurationSecondType>())
+        }
+        Duration(TimeUnit::Millisecond) => {
+            runs_for_primitive(array.as_primitive::<DurationMillisecondType>())
+        }
+        Duration(TimeUnit::Microsecond) => {
+            runs_for_primitive(array.as_primitive::<DurationMicrosecondType>())
+        }
+        Duration(TimeUnit::Nanosecond) => {
+            runs_for_primitive(array.as_primitive::<DurationNanosecondType>())
+        }
+        Timestamp(TimeUnit::Second, _) => {
+            runs_for_primitive(array.as_primitive::<TimestampSecondType>())
+        }
+        Timestamp(TimeUnit::Millisecond, _) => {
+            
runs_for_primitive(array.as_primitive::<TimestampMillisecondType>())
+        }
+        Timestamp(TimeUnit::Microsecond, _) => {
+            
runs_for_primitive(array.as_primitive::<TimestampMicrosecondType>())
+        }
+        Timestamp(TimeUnit::Nanosecond, _) => {
+            runs_for_primitive(array.as_primitive::<TimestampNanosecondType>())
+        }
+        Interval(IntervalUnit::YearMonth) => {
+            runs_for_primitive(array.as_primitive::<IntervalYearMonthType>())
+        }
+        Interval(IntervalUnit::DayTime) => {
+            runs_for_primitive(array.as_primitive::<IntervalDayTimeType>())
+        }
+        Interval(IntervalUnit::MonthDayNano) => {
+            
runs_for_primitive(array.as_primitive::<IntervalMonthDayNanoType>())
+        }
+        Decimal128(_, _) => 
runs_for_primitive(array.as_primitive::<Decimal128Type>()),
+        Decimal256(_, _) => 
runs_for_primitive(array.as_primitive::<Decimal256Type>()),
+        Utf8 => runs_for_string_i32(array.as_string::<i32>()),
+        LargeUtf8 => runs_for_string_i64(array.as_string::<i64>()),
+        Utf8View => runs_for_string_view(array.as_string_view()),
+        Binary => runs_for_binary_i32(array.as_binary::<i32>()),
+        LargeBinary => runs_for_binary_i64(array.as_binary::<i64>()),
+        BinaryView => runs_for_binary_view(array.as_binary_view()),
+        FixedSizeBinary(_) => 
runs_for_fixed_size_binary(array.as_fixed_size_binary()),
+        Dictionary(key_type, _) => match key_type.as_ref() {
+            Int8 => runs_for_dictionary::<Int8Type>(array.as_dictionary()),
+            Int16 => runs_for_dictionary::<Int16Type>(array.as_dictionary()),
+            Int32 => runs_for_dictionary::<Int32Type>(array.as_dictionary()),
+            Int64 => runs_for_dictionary::<Int64Type>(array.as_dictionary()),
+            UInt8 => runs_for_dictionary::<UInt8Type>(array.as_dictionary()),
+            UInt16 => runs_for_dictionary::<UInt16Type>(array.as_dictionary()),
+            UInt32 => runs_for_dictionary::<UInt32Type>(array.as_dictionary()),
+            UInt64 => runs_for_dictionary::<UInt64Type>(array.as_dictionary()),
+            _ => runs_generic(array.as_ref()),
+        },
+        _ => runs_generic(array.as_ref()),
+    }
+}
+
+fn runs_for_boolean(array: &BooleanArray) -> (Vec<usize>, Vec<usize>) {
+    let len = array.len();
+    if let Some(runs) = trivial_runs(len) {
+        return runs;
+    }
+
+    let mut run_boundaries = Vec::with_capacity(len / 64 + 2);
+    let mut current_valid = array.is_valid(0);
+    let mut current_value = if current_valid { array.value(0) } else { false };
+
+    for idx in 1..len {
+        // Treat a change in validity the same as a change in value so null 
boundaries are recorded.
+        let valid = array.is_valid(idx);
+        let mut boundary = false;
+        if current_valid && valid {
+            let value = array.value(idx);
+            if value != current_value {
+                current_value = value;
+                boundary = true;
+            }
+        } else if current_valid != valid {
+            boundary = true;
+            if valid {
+                current_value = array.value(idx);
+            }
+        }
+
+        if boundary {
+            ensure_capacity(&mut run_boundaries, len);
+            run_boundaries.push(idx);
+        }
+        current_valid = valid;
+    }
+
+    finalize_runs(run_boundaries, len)
+}
+
+fn runs_for_primitive<T: ArrowPrimitiveType>(
+    array: &PrimitiveArray<T>,
+) -> (Vec<usize>, Vec<usize>) {
+    let len = array.len();
+    if let Some(runs) = trivial_runs(len) {
+        return runs;
+    }
+
+    let values = array.values();
+    let mut run_boundaries = Vec::with_capacity(len / 64 + 2);
+
+    if array.null_count() == 0 {
+        let mut current = unsafe { *values.get_unchecked(0) };
+        let mut idx = 1;
+        while idx < len {
+            // Attempt to advance in 16-byte chunks before falling back to 
scalar comparison.
+            let boundary = scan_run_end::<T>(values, current, idx);
+            if boundary == len {
+                break;
+            }
+            ensure_capacity(&mut run_boundaries, len);
+            run_boundaries.push(boundary);
+            current = unsafe { *values.get_unchecked(boundary) };
+            idx = boundary + 1;
+        }
+        return finalize_runs(run_boundaries, len);
+    }
+
+    let nulls = array
+        .nulls()
+        .expect("null_count > 0 implies a null buffer is present");
+    let mut current_valid = nulls.is_valid(0);
+    let mut current_value = unsafe { *values.get_unchecked(0) };
+    for idx in 1..len {
+        let valid = nulls.is_valid(idx);
+        let mut boundary = false;
+        if current_valid && valid {
+            let value = unsafe { *values.get_unchecked(idx) };
+            if value != current_value {
+                current_value = value;
+                boundary = true;
+            }
+        } else if current_valid != valid {
+            boundary = true;
+            if valid {
+                current_value = unsafe { *values.get_unchecked(idx) };
+            }
+        }
+        if boundary {
+            ensure_capacity(&mut run_boundaries, len);
+            run_boundaries.push(idx);
+        }
+        current_valid = valid;
+    }
+    finalize_runs(run_boundaries, len)
+}
+
+fn runs_for_binary_i32(array: &GenericBinaryArray<i32>) -> (Vec<usize>, 
Vec<usize>) {
+    let mut to_usize = |v: i32| v as usize;
+    runs_for_binary_like(
+        array.len(),
+        array.null_count(),
+        array.value_offsets(),
+        array.value_data(),
+        |idx| array.is_valid(idx),
+        &mut to_usize,
+    )
+}
+
+fn runs_for_binary_i64(array: &GenericBinaryArray<i64>) -> (Vec<usize>, 
Vec<usize>) {
+    let mut to_usize = |v: i64| v as usize;
+    runs_for_binary_like(
+        array.len(),
+        array.null_count(),
+        array.value_offsets(),
+        array.value_data(),
+        |idx| array.is_valid(idx),
+        &mut to_usize,
+    )
+}
+
+fn runs_for_binary_like<T: Copy>(
+    len: usize,
+    null_count: usize,
+    offsets: &[T],
+    values: &[u8],
+    mut is_valid: impl FnMut(usize) -> bool,
+    to_usize: &mut impl FnMut(T) -> usize,
+) -> (Vec<usize>, Vec<usize>) {
+    if let Some(runs) = trivial_runs(len) {
+        return runs;
+    }
+
+    let mut run_boundaries = Vec::with_capacity(len / 64 + 2);
+
+    if null_count == 0 {
+        let mut current_start = to_usize(offsets[0]);
+        let mut current_end = to_usize(offsets[1]);
+        for idx in 1..len {
+            let start = to_usize(offsets[idx]);
+            let end = to_usize(offsets[idx + 1]);
+            // Any difference in byte length or payload means a new run.
+            if (end - start) != (current_end - current_start)
+                || values[start..end] != values[current_start..current_end]
+            {
+                ensure_capacity(&mut run_boundaries, len);
+                run_boundaries.push(idx);
+                current_start = start;
+                current_end = end;
+            }
+        }
+    } else {
+        let mut current_valid = is_valid(0);
+        let mut current_range = (to_usize(offsets[0]), to_usize(offsets[1]));
+        for idx in 1..len {
+            let valid = is_valid(idx);
+            let mut boundary = false;
+            if current_valid && valid {
+                let start = to_usize(offsets[idx]);
+                let end = to_usize(offsets[idx + 1]);
+                let (current_start, current_end) = current_range;
+                // Keep reusing the current byte-range as long as both 
validity and payload match.
+                if (end - start) != (current_end - current_start)
+                    || values[start..end] != values[current_start..current_end]
+                {
+                    boundary = true;
+                    current_range = (start, end);
+                }
+            } else if current_valid != valid {
+                boundary = true;
+                if valid {
+                    current_range = (to_usize(offsets[idx]), 
to_usize(offsets[idx + 1]));
+                }
+            }
+            if boundary {
+                ensure_capacity(&mut run_boundaries, len);
+                run_boundaries.push(idx);
+            }
+            current_valid = valid;
+        }
+    }
+
+    finalize_runs(run_boundaries, len)
+}
+
+fn runs_for_string_i32(array: &GenericStringArray<i32>) -> (Vec<usize>, 
Vec<usize>) {
+    let mut to_usize = |v: i32| v as usize;
+    runs_for_binary_like(
+        array.len(),
+        array.null_count(),
+        array.value_offsets(),
+        array.value_data(),
+        |idx| array.is_valid(idx),
+        &mut to_usize,
+    )
+}
+
+fn runs_for_string_i64(array: &GenericStringArray<i64>) -> (Vec<usize>, 
Vec<usize>) {
+    let mut to_usize = |v: i64| v as usize;
+    runs_for_binary_like(
+        array.len(),
+        array.null_count(),
+        array.value_offsets(),
+        array.value_data(),
+        |idx| array.is_valid(idx),
+        &mut to_usize,
+    )
+}
+
+fn runs_for_string_view(array: &StringViewArray) -> (Vec<usize>, Vec<usize>) {

Review Comment:
   Probably remove these thin wrappers since we have a catch-all for 
non-specialized types in the big match above?



##########
arrow-cast/src/cast/run_array.rs:
##########
@@ -163,3 +170,486 @@ pub(crate) fn cast_to_run_end_encoded<K: RunEndIndexType>(
     let run_array = RunArray::<K>::try_new(&run_ends_array, 
values_array.as_ref())?;
     Ok(Arc::new(run_array))
 }
+
+fn compute_run_boundaries(array: &ArrayRef) -> (Vec<usize>, Vec<usize>) {
+    if array.is_empty() {
+        return (Vec::new(), Vec::new());
+    }
+
+    use arrow_schema::{DataType::*, IntervalUnit, TimeUnit};
+
+    match array.data_type() {
+        Null => (vec![array.len()], vec![0]),
+        Boolean => runs_for_boolean(array.as_boolean()),
+        Int8 => runs_for_primitive(array.as_primitive::<Int8Type>()),

Review Comment:
   Could probably use 
[`downcast_primitive_array`](https://docs.rs/arrow/latest/arrow/macro.downcast_primitive_array.html)
 to cut down some of the boilerplate here?



##########
arrow-cast/src/cast/run_array.rs:
##########
@@ -163,3 +170,486 @@ pub(crate) fn cast_to_run_end_encoded<K: RunEndIndexType>(
     let run_array = RunArray::<K>::try_new(&run_ends_array, 
values_array.as_ref())?;
     Ok(Arc::new(run_array))
 }
+
+fn compute_run_boundaries(array: &ArrayRef) -> (Vec<usize>, Vec<usize>) {
+    if array.is_empty() {
+        return (Vec::new(), Vec::new());
+    }
+
+    use arrow_schema::{DataType::*, IntervalUnit, TimeUnit};
+
+    match array.data_type() {
+        Null => (vec![array.len()], vec![0]),
+        Boolean => runs_for_boolean(array.as_boolean()),
+        Int8 => runs_for_primitive(array.as_primitive::<Int8Type>()),
+        Int16 => runs_for_primitive(array.as_primitive::<Int16Type>()),
+        Int32 => runs_for_primitive(array.as_primitive::<Int32Type>()),
+        Int64 => runs_for_primitive(array.as_primitive::<Int64Type>()),
+        UInt8 => runs_for_primitive(array.as_primitive::<UInt8Type>()),
+        UInt16 => runs_for_primitive(array.as_primitive::<UInt16Type>()),
+        UInt32 => runs_for_primitive(array.as_primitive::<UInt32Type>()),
+        UInt64 => runs_for_primitive(array.as_primitive::<UInt64Type>()),
+        Float16 => runs_for_primitive(array.as_primitive::<Float16Type>()),
+        Float32 => runs_for_primitive(array.as_primitive::<Float32Type>()),
+        Float64 => runs_for_primitive(array.as_primitive::<Float64Type>()),
+        Date32 => runs_for_primitive(array.as_primitive::<Date32Type>()),
+        Date64 => runs_for_primitive(array.as_primitive::<Date64Type>()),
+        Time32(TimeUnit::Second) => 
runs_for_primitive(array.as_primitive::<Time32SecondType>()),
+        Time32(TimeUnit::Millisecond) => {
+            runs_for_primitive(array.as_primitive::<Time32MillisecondType>())
+        }
+        Time64(TimeUnit::Microsecond) => {
+            runs_for_primitive(array.as_primitive::<Time64MicrosecondType>())
+        }
+        Time64(TimeUnit::Nanosecond) => {
+            runs_for_primitive(array.as_primitive::<Time64NanosecondType>())
+        }
+        Duration(TimeUnit::Second) => {
+            runs_for_primitive(array.as_primitive::<DurationSecondType>())
+        }
+        Duration(TimeUnit::Millisecond) => {
+            runs_for_primitive(array.as_primitive::<DurationMillisecondType>())
+        }
+        Duration(TimeUnit::Microsecond) => {
+            runs_for_primitive(array.as_primitive::<DurationMicrosecondType>())
+        }
+        Duration(TimeUnit::Nanosecond) => {
+            runs_for_primitive(array.as_primitive::<DurationNanosecondType>())
+        }
+        Timestamp(TimeUnit::Second, _) => {
+            runs_for_primitive(array.as_primitive::<TimestampSecondType>())
+        }
+        Timestamp(TimeUnit::Millisecond, _) => {
+            
runs_for_primitive(array.as_primitive::<TimestampMillisecondType>())
+        }
+        Timestamp(TimeUnit::Microsecond, _) => {
+            
runs_for_primitive(array.as_primitive::<TimestampMicrosecondType>())
+        }
+        Timestamp(TimeUnit::Nanosecond, _) => {
+            runs_for_primitive(array.as_primitive::<TimestampNanosecondType>())
+        }
+        Interval(IntervalUnit::YearMonth) => {
+            runs_for_primitive(array.as_primitive::<IntervalYearMonthType>())
+        }
+        Interval(IntervalUnit::DayTime) => {
+            runs_for_primitive(array.as_primitive::<IntervalDayTimeType>())
+        }
+        Interval(IntervalUnit::MonthDayNano) => {
+            
runs_for_primitive(array.as_primitive::<IntervalMonthDayNanoType>())
+        }
+        Decimal128(_, _) => 
runs_for_primitive(array.as_primitive::<Decimal128Type>()),
+        Decimal256(_, _) => 
runs_for_primitive(array.as_primitive::<Decimal256Type>()),
+        Utf8 => runs_for_string_i32(array.as_string::<i32>()),
+        LargeUtf8 => runs_for_string_i64(array.as_string::<i64>()),
+        Utf8View => runs_for_string_view(array.as_string_view()),
+        Binary => runs_for_binary_i32(array.as_binary::<i32>()),
+        LargeBinary => runs_for_binary_i64(array.as_binary::<i64>()),
+        BinaryView => runs_for_binary_view(array.as_binary_view()),
+        FixedSizeBinary(_) => 
runs_for_fixed_size_binary(array.as_fixed_size_binary()),
+        Dictionary(key_type, _) => match key_type.as_ref() {
+            Int8 => runs_for_dictionary::<Int8Type>(array.as_dictionary()),
+            Int16 => runs_for_dictionary::<Int16Type>(array.as_dictionary()),
+            Int32 => runs_for_dictionary::<Int32Type>(array.as_dictionary()),
+            Int64 => runs_for_dictionary::<Int64Type>(array.as_dictionary()),
+            UInt8 => runs_for_dictionary::<UInt8Type>(array.as_dictionary()),
+            UInt16 => runs_for_dictionary::<UInt16Type>(array.as_dictionary()),
+            UInt32 => runs_for_dictionary::<UInt32Type>(array.as_dictionary()),
+            UInt64 => runs_for_dictionary::<UInt64Type>(array.as_dictionary()),
+            _ => runs_generic(array.as_ref()),
+        },
+        _ => runs_generic(array.as_ref()),
+    }
+}
+
+fn runs_for_boolean(array: &BooleanArray) -> (Vec<usize>, Vec<usize>) {
+    let len = array.len();
+    if let Some(runs) = trivial_runs(len) {
+        return runs;
+    }
+
+    let mut run_boundaries = Vec::with_capacity(len / 64 + 2);
+    let mut current_valid = array.is_valid(0);
+    let mut current_value = if current_valid { array.value(0) } else { false };
+
+    for idx in 1..len {
+        // Treat a change in validity the same as a change in value so null 
boundaries are recorded.
+        let valid = array.is_valid(idx);
+        let mut boundary = false;
+        if current_valid && valid {
+            let value = array.value(idx);
+            if value != current_value {
+                current_value = value;
+                boundary = true;
+            }
+        } else if current_valid != valid {
+            boundary = true;
+            if valid {
+                current_value = array.value(idx);
+            }
+        }
+
+        if boundary {
+            ensure_capacity(&mut run_boundaries, len);
+            run_boundaries.push(idx);
+        }
+        current_valid = valid;
+    }
+
+    finalize_runs(run_boundaries, len)
+}
+
+fn runs_for_primitive<T: ArrowPrimitiveType>(
+    array: &PrimitiveArray<T>,
+) -> (Vec<usize>, Vec<usize>) {
+    let len = array.len();
+    if let Some(runs) = trivial_runs(len) {
+        return runs;
+    }
+
+    let values = array.values();
+    let mut run_boundaries = Vec::with_capacity(len / 64 + 2);
+
+    if array.null_count() == 0 {
+        let mut current = unsafe { *values.get_unchecked(0) };
+        let mut idx = 1;
+        while idx < len {
+            // Attempt to advance in 16-byte chunks before falling back to 
scalar comparison.
+            let boundary = scan_run_end::<T>(values, current, idx);
+            if boundary == len {
+                break;
+            }
+            ensure_capacity(&mut run_boundaries, len);
+            run_boundaries.push(boundary);
+            current = unsafe { *values.get_unchecked(boundary) };
+            idx = boundary + 1;
+        }
+        return finalize_runs(run_boundaries, len);
+    }
+
+    let nulls = array
+        .nulls()
+        .expect("null_count > 0 implies a null buffer is present");
+    let mut current_valid = nulls.is_valid(0);
+    let mut current_value = unsafe { *values.get_unchecked(0) };
+    for idx in 1..len {
+        let valid = nulls.is_valid(idx);
+        let mut boundary = false;
+        if current_valid && valid {
+            let value = unsafe { *values.get_unchecked(idx) };
+            if value != current_value {
+                current_value = value;
+                boundary = true;
+            }
+        } else if current_valid != valid {
+            boundary = true;
+            if valid {
+                current_value = unsafe { *values.get_unchecked(idx) };
+            }
+        }
+        if boundary {
+            ensure_capacity(&mut run_boundaries, len);
+            run_boundaries.push(idx);
+        }
+        current_valid = valid;
+    }
+    finalize_runs(run_boundaries, len)
+}
+
+fn runs_for_binary_i32(array: &GenericBinaryArray<i32>) -> (Vec<usize>, 
Vec<usize>) {
+    let mut to_usize = |v: i32| v as usize;
+    runs_for_binary_like(
+        array.len(),
+        array.null_count(),
+        array.value_offsets(),
+        array.value_data(),
+        |idx| array.is_valid(idx),
+        &mut to_usize,
+    )
+}
+
+fn runs_for_binary_i64(array: &GenericBinaryArray<i64>) -> (Vec<usize>, 
Vec<usize>) {
+    let mut to_usize = |v: i64| v as usize;
+    runs_for_binary_like(
+        array.len(),
+        array.null_count(),
+        array.value_offsets(),
+        array.value_data(),
+        |idx| array.is_valid(idx),
+        &mut to_usize,
+    )
+}
+
+fn runs_for_binary_like<T: Copy>(
+    len: usize,
+    null_count: usize,
+    offsets: &[T],
+    values: &[u8],
+    mut is_valid: impl FnMut(usize) -> bool,
+    to_usize: &mut impl FnMut(T) -> usize,
+) -> (Vec<usize>, Vec<usize>) {
+    if let Some(runs) = trivial_runs(len) {
+        return runs;
+    }
+
+    let mut run_boundaries = Vec::with_capacity(len / 64 + 2);
+
+    if null_count == 0 {
+        let mut current_start = to_usize(offsets[0]);
+        let mut current_end = to_usize(offsets[1]);
+        for idx in 1..len {
+            let start = to_usize(offsets[idx]);
+            let end = to_usize(offsets[idx + 1]);
+            // Any difference in byte length or payload means a new run.
+            if (end - start) != (current_end - current_start)
+                || values[start..end] != values[current_start..current_end]
+            {
+                ensure_capacity(&mut run_boundaries, len);
+                run_boundaries.push(idx);
+                current_start = start;
+                current_end = end;
+            }
+        }
+    } else {
+        let mut current_valid = is_valid(0);
+        let mut current_range = (to_usize(offsets[0]), to_usize(offsets[1]));
+        for idx in 1..len {
+            let valid = is_valid(idx);
+            let mut boundary = false;
+            if current_valid && valid {
+                let start = to_usize(offsets[idx]);
+                let end = to_usize(offsets[idx + 1]);
+                let (current_start, current_end) = current_range;
+                // Keep reusing the current byte-range as long as both 
validity and payload match.
+                if (end - start) != (current_end - current_start)
+                    || values[start..end] != values[current_start..current_end]
+                {
+                    boundary = true;
+                    current_range = (start, end);
+                }
+            } else if current_valid != valid {
+                boundary = true;
+                if valid {
+                    current_range = (to_usize(offsets[idx]), 
to_usize(offsets[idx + 1]));
+                }
+            }
+            if boundary {
+                ensure_capacity(&mut run_boundaries, len);
+                run_boundaries.push(idx);
+            }
+            current_valid = valid;
+        }
+    }
+
+    finalize_runs(run_boundaries, len)
+}
+
+fn runs_for_string_i32(array: &GenericStringArray<i32>) -> (Vec<usize>, 
Vec<usize>) {

Review Comment:
   ```rust
   fn runs_for_string<O: OffsetSizeTrait>(array: &GenericStringArray<O>) -> 
(Vec<usize>, Vec<usize>) {
       let mut to_usize = |v: O| v.as_usize();
       runs_for_binary_like(
           array.len(),
           array.null_count(),
           array.value_offsets(),
           array.value_data(),
           |idx| array.is_valid(idx),
           &mut to_usize,
       )
   }
   ```
   
   Nice deduplication



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to