jhorstmann commented on code in PR #7962:
URL: https://github.com/apache/arrow-rs/pull/7962#discussion_r2216741483


##########
arrow-ord/src/sort.rs:
##########
@@ -178,44 +178,136 @@ where
     }
 }
 
-// partition indices into valid and null indices
-fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
+/// Partition indices of an Arrow array into two categories:
+/// - `valid`: indices of non-null elements
+/// - `nulls`: indices of null elements
+///
+/// Optimized for performance with fast-path for all-valid arrays
+/// and bit-parallel scan for null-containing arrays.
+#[inline(always)]
+pub fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
     let len = array.len();
     let null_count = array.null_count();
-    match array.nulls() {
-        Some(nulls) if null_count > 0 => {
-            let mut valid_indices = Vec::with_capacity(len - null_count);
-            let mut null_indices = Vec::with_capacity(null_count);
-
-            let valid_slice = valid_indices.spare_capacity_mut();
-            let null_slice = null_indices.spare_capacity_mut();
-            let mut valid_idx = 0;
-            let mut null_idx = 0;
-
-            nulls.into_iter().enumerate().for_each(|(i, v)| {
-                if v {
-                    valid_slice[valid_idx].write(i as u32);
-                    valid_idx += 1;
-                } else {
-                    null_slice[null_idx].write(i as u32);
-                    null_idx += 1;
+
+    // Fast path: if there are no nulls, all elements are valid
+    if null_count == 0 {
+        // Simply return a range of indices [0, len)
+        let valid = (0..len as u32).collect();
+        return (valid, Vec::new());
+    }
+
+    // null bitmap exists and some values are null
+    partition_validity_scan(array, len, null_count)
+}
+
+/// Scans the null bitmap and partitions valid/null indices efficiently.
+/// Uses bit-level operations to extract bit positions.
+/// This function is only called when nulls exist.
+#[inline(always)]
+fn partition_validity_scan(
+    array: &dyn Array,
+    len: usize,
+    null_count: usize,
+) -> (Vec<u32>, Vec<u32>) {
+    // SAFETY: Guaranteed by caller that null_count > 0, so bitmap must exist
+    let bitmap = array.nulls().unwrap();
+    let buffer = bitmap.buffer(); // Raw byte slice of bitmap bits
+
+    // Preallocate result vectors with exact capacities (avoids reallocations)
+    let mut valid = Vec::with_capacity(len - null_count);
+    let mut nulls = Vec::with_capacity(null_count);
+
+    unsafe {
+        // Access spare capacity of vectors to write directly without bounds 
checks
+        let valid_slice = valid.spare_capacity_mut();
+        let null_slice = nulls.spare_capacity_mut();
+        let mut vi = 0; // index for writing into valid_slice
+        let mut ni = 0; // index for writing into null_slice
+
+        let mut base = 0; // bit index offset
+        let n_words = buffer.len() / 8; // number of 64-bit chunks in bitmap
+
+        // Process the bitmap in 64-bit (8 byte) chunks for efficiency
+        for word_idx in 0..n_words {

Review Comment:
   I think this needs to use `UnalignedBitChunk` to deal with arbitrarily 
sliced arrays.
   
   Fascinating results! I did not expect this to be faster since we still have 
to visit each bit once. Must be related to branch prediction, or just writing 
to one of the slices at a time.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to