zhuqi-lucas commented on code in PR #7962: URL: https://github.com/apache/arrow-rs/pull/7962#discussion_r2217145627
########## arrow-ord/src/sort.rs: ########## @@ -178,44 +178,136 @@ where } } -// partition indices into valid and null indices -fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) { +/// Partition indices of an Arrow array into two categories: +/// - `valid`: indices of non-null elements +/// - `nulls`: indices of null elements +/// +/// Optimized for performance with fast-path for all-valid arrays +/// and bit-parallel scan for null-containing arrays. +#[inline(always)] +pub fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) { let len = array.len(); let null_count = array.null_count(); - match array.nulls() { - Some(nulls) if null_count > 0 => { - let mut valid_indices = Vec::with_capacity(len - null_count); - let mut null_indices = Vec::with_capacity(null_count); - - let valid_slice = valid_indices.spare_capacity_mut(); - let null_slice = null_indices.spare_capacity_mut(); - let mut valid_idx = 0; - let mut null_idx = 0; - - nulls.into_iter().enumerate().for_each(|(i, v)| { - if v { - valid_slice[valid_idx].write(i as u32); - valid_idx += 1; - } else { - null_slice[null_idx].write(i as u32); - null_idx += 1; + + // Fast path: if there are no nulls, all elements are valid + if null_count == 0 { + // Simply return a range of indices [0, len) + let valid = (0..len as u32).collect(); + return (valid, Vec::new()); + } + + // null bitmap exists and some values are null + partition_validity_scan(array, len, null_count) +} + +/// Scans the null bitmap and partitions valid/null indices efficiently. +/// Uses bit-level operations to extract bit positions. +/// This function is only called when nulls exist. +#[inline(always)] Review Comment: Thank you @alamb for review , i added testing in latest PR, and i also using our existed logic to do this. So it will be more safe. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org