korowa commented on code in PR #5858:
URL: https://github.com/apache/arrow-rs/pull/5858#discussion_r1644964810


##########
arrow-row/src/fixed.rs:
##########
@@ -216,14 +216,80 @@ where
 ///
 /// - 1 byte `0` if null or `1` if valid
 /// - bytes of [`FixedLengthEncoding`]
-pub fn encode<T: FixedLengthEncoding, I: IntoIterator<Item = Option<T>>>(
+pub fn encode<T: ArrowPrimitiveType>(
     data: &mut [u8],
     offsets: &mut [usize],
-    i: I,
+    array: &PrimitiveArray<T>,
+    opts: SortOptions,
+) where
+    T::Native: FixedLengthEncoding,
+{
+    let mut offset_idx = 1;
+    for maybe_val in array {
+        let offset = &mut offsets[offset_idx];
+        let end_offset = *offset + T::Native::ENCODED_LEN;
+        if let Some(val) = maybe_val {
+            let to_write = &mut data[*offset..end_offset];
+            to_write[0] = 1;
+            let mut encoded = val.encode();
+            if opts.descending {
+                // Flip bits to reverse order
+                encoded.as_mut().iter_mut().for_each(|v| *v = !*v)
+            }
+            to_write[1..].copy_from_slice(encoded.as_ref())
+        } else {
+            data[*offset] = null_sentinel(opts);
+        }
+        *offset = end_offset;
+        offset_idx += 1;
+    }
+}
+
+/// Encoding for non-nullable primitive arrays.
+/// Iterates directly over the `values`, and skips NULLs-checking.
+pub fn encode_not_null<T: ArrowPrimitiveType>(
+    data: &mut [u8],
+    offsets: &mut [usize],
+    array: &PrimitiveArray<T>,
+    opts: SortOptions,
+) where
+    T::Native: FixedLengthEncoding,
+{
+    assert!(!array.is_nullable());
+
+    let mut offset_idx = 1;
+    for val in array.values() {
+        let offset = &mut offsets[offset_idx];
+        let end_offset = *offset + T::Native::ENCODED_LEN;
+
+        let to_write = &mut data[*offset..end_offset];
+        to_write[0] = 1;
+        let mut encoded = val.encode();
+        if opts.descending {

Review Comment:
   I guess it'll be a follow up (need a bit more time with it) -- as for now 
after quick check, for non-nullable columns, encoded with descending order
   ```rs
       if opts.descending {
           // Flip bits to reverse order for encoded data skipping validity byte
           for offset_idx in 1..offsets.len() {
               let end_offset = offsets[offset_idx];
               let buf = &mut data[end_offset-T::ENCODED_LEN+1..end_offset];
               buf.iter_mut().for_each(|v| *v = !*v);
           }
       }
   ```
   after "main" loop shows  `[9.7890 µs 9.9628 µs 10.196 µs]` vs `[12.391 µs 
12.448 µs 12.545 µs]`, so there may be an improvement, but it's a bit unclear 
for nullable columns (don't have any solution with meaningful improvement yet).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to