This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new c1656ffea Convince the compiler to auto-vectorize the range check in 
parquet DictionaryBuffer (#4453)
c1656ffea is described below

commit c1656ffea5bba726d7af892e013b6c5b184dd3b4
Author: Jörn Horstmann <[email protected]>
AuthorDate: Tue Jun 27 19:03:43 2023 +0200

    Convince the compiler to auto-vectorize the range check in parquet 
DictionaryBuffer (#4453)
---
 parquet/src/arrow/buffer/dictionary_buffer.rs | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/parquet/src/arrow/buffer/dictionary_buffer.rs 
b/parquet/src/arrow/buffer/dictionary_buffer.rs
index 6344d9dd3..a0a47e3b9 100644
--- a/parquet/src/arrow/buffer/dictionary_buffer.rs
+++ b/parquet/src/arrow/buffer/dictionary_buffer.rs
@@ -152,8 +152,15 @@ impl<K: ScalarValue + ArrowNativeType + Ord, V: 
ScalarValue + OffsetSizeTrait>
                     let min = K::from_usize(0).unwrap();
                     let max = K::from_usize(values.len()).unwrap();
 
-                    // It may be possible to use SIMD here
-                    if keys.as_slice().iter().any(|x| *x < min || *x >= max) {
+                    // using copied and fold gets auto-vectorized since rust 
1.70
+                    // all/any would allow early exit on invalid values
+                    // but in the happy case all values have to be checked 
anyway
+                    if !keys
+                        .as_slice()
+                        .iter()
+                        .copied()
+                        .fold(true, |a, x| a && x >= min && x < max)
+                    {
                         return Err(general_err!(
                             "dictionary key beyond bounds of dictionary: 
0..{}",
                             values.len()

Reply via email to