alamb commented on code in PR #7937: URL: https://github.com/apache/arrow-rs/pull/7937#discussion_r2210834216
########## arrow-ord/src/sort.rs: ########## @@ -180,13 +180,36 @@ where // partition indices into valid and null indices fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) { - match array.null_count() { - // faster path - 0 => ((0..(array.len() as u32)).collect(), vec![]), - _ => { - let indices = 0..(array.len() as u32); - indices.partition(|index| array.is_valid(*index as usize)) + let len = array.len(); + match array.nulls() { + Some(nulls) if nulls.null_count() > 0 => { + let mut valid_indices = Vec::with_capacity(len - nulls.null_count()); + let mut null_indices = Vec::with_capacity(nulls.null_count()); + + let valid_slice = valid_indices.spare_capacity_mut(); + let null_slice = null_indices.spare_capacity_mut(); + let mut valid_idx = 0; + let mut null_idx = 0; + + nulls.into_iter().enumerate().for_each(|(i, v)| { + if v { + valid_slice[valid_idx].write(i as u32); + valid_idx += 1; + } else { + null_slice[null_idx].write(i as u32); + null_idx += 1; + } + }); + + unsafe { Review Comment: Can we perhaps get a comment here explaining the safety -- specifically we know it is safe because the vector was initialized with the correct capacty. COuld we also add an assert here that `valid_idx == len - nulls.null_count()) and likewise that `null_idx == nulls.null_count()` as an extra check? ########## arrow-ord/src/sort.rs: ########## @@ -180,13 +180,36 @@ where // partition indices into valid and null indices fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) { - match array.null_count() { - // faster path - 0 => ((0..(array.len() as u32)).collect(), vec![]), - _ => { - let indices = 0..(array.len() as u32); - indices.partition(|index| array.is_valid(*index as usize)) + let len = array.len(); + match array.nulls() { + Some(nulls) if nulls.null_count() > 0 => { + let mut valid_indices = Vec::with_capacity(len - nulls.null_count()); Review Comment: I wonder if saving `null_count` into a local would make a difference ########## arrow-ord/src/sort.rs: ########## @@ -180,13 +180,36 @@ where // partition indices into valid and null indices fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) { - match array.null_count() { - // faster path - 0 => ((0..(array.len() as u32)).collect(), vec![]), - _ => { - let indices = 0..(array.len() as u32); - indices.partition(|index| array.is_valid(*index as usize)) + let len = array.len(); + match array.nulls() { + Some(nulls) if nulls.null_count() > 0 => { + let mut valid_indices = Vec::with_capacity(len - nulls.null_count()); + let mut null_indices = Vec::with_capacity(nulls.null_count()); + + let valid_slice = valid_indices.spare_capacity_mut(); Review Comment: TIL: https://doc.rust-lang.org/std/vec/struct.Vec.html#method.spare_capacity_mut 👍 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org