pitrou commented on code in PR #46789:
URL: https://github.com/apache/arrow/pull/46789#discussion_r2142290799


##########
cpp/src/arrow/util/byte_stream_split_internal.h:
##########
@@ -123,95 +186,80 @@ void ByteStreamSplitEncodeSimd128(const uint8_t* 
raw_values, int width,
       output_buffer_raw[j * num_values + i] = byte_in_value;
     }
   }
-  // The current shuffling algorithm diverges for float and double types but 
the compiler
-  // should be able to remove the branch since only one path is taken for each 
template
-  // instantiation.
-  // Example run for 32-bit variables:
-  // Step 0: copy from unaligned input bytes:
-  //   0: ABCD ABCD ABCD ABCD 1: ABCD ABCD ABCD ABCD ...
-  // Step 1: simd_batch<int8_t, 8>::zip_lo and simd_batch<int8_t, 8>::zip_hi:
-  //   0: AABB CCDD AABB CCDD 1: AABB CCDD AABB CCDD ...
-  // Step 2: apply simd_batch<int8_t, 8>::zip_lo and  simd_batch<int8_t, 
8>::zip_hi again:
-  //   0: AAAA BBBB CCCC DDDD 1: AAAA BBBB CCCC DDDD ...
-  // Step 3: simd_batch<int8_t, 8>::zip_lo and simd_batch<int8_t, 8>::zip_hi:
-  //   0: AAAA AAAA BBBB BBBB 1: CCCC CCCC DDDD DDDD ...
-  // Step 4: simd_batch<int64_t, 2>::zip_lo and simd_batch<int64_t, 2>::zip_hi:
-  //   0: AAAA AAAA AAAA AAAA 1: BBBB BBBB BBBB BBBB ...
+
+  // Number of input values we can fit in a simd register
+  constexpr int NumValuesInBatch = sizeof(simd_batch) / kNumStreams;

Review Comment:
   Let's call this `kNumValuesInBatch`. Same for others below of course :)



##########
cpp/src/arrow/util/byte_stream_split_internal.h:
##########
@@ -555,7 +603,7 @@ inline void ByteStreamSplitEncode(const uint8_t* 
raw_values, int width,
       memcpy(out, raw_values, num_values);
       return;
     case 2:
-      return ByteStreamSplitEncodeScalar<2>(raw_values, width, num_values, 
out);
+      return ByteStreamSplitEncodeSimd128<2>(raw_values, width, num_values, 
out);

Review Comment:
   We want this to be conditional, see the `ByteStreamSplitEncodePerhapsSimd` 
macro above.



##########
cpp/src/arrow/util/byte_stream_split_internal.h:
##########
@@ -95,18 +109,67 @@ void ByteStreamSplitDecodeSimd128(const uint8_t* data, int 
width, int64_t num_va
   }
 }
 
+template <int N>
+struct grouped_bytes_impl;
+
+template <>
+struct grouped_bytes_impl<1> {
+  using type = int8_t;
+};
+
+template <>
+struct grouped_bytes_impl<2> {
+  using type = int16_t;
+};
+
+template <>
+struct grouped_bytes_impl<4> {
+  using type = int32_t;
+};
+
+template <>
+struct grouped_bytes_impl<8> {
+  using type = int64_t;
+};
+
+// Map a number of bytes to a type
+template <int N>
+using grouped_bytes_t = typename grouped_bytes_impl<N>::type;
+
+// Like xsimd::zlip_lo, but zip groups of NBytes at once
+template <int NBytes, int BatchSize = 16,
+          typename Batch = xsimd::make_sized_batch_t<int8_t, BatchSize>>

Review Comment:
   We preferably use `kCamelCase` for constants, including integral template 
parameters.
   ```suggestion
   template <int kGroupSize, int kBatchSize = 16,
             typename Batch = xsimd::make_sized_batch_t<int8_t, kBatchSize>>
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to