This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new c1656ffea Convince the compiler to auto-vectorize the range check in
parquet DictionaryBuffer (#4453)
c1656ffea is described below
commit c1656ffea5bba726d7af892e013b6c5b184dd3b4
Author: Jörn Horstmann <[email protected]>
AuthorDate: Tue Jun 27 19:03:43 2023 +0200
Convince the compiler to auto-vectorize the range check in parquet
DictionaryBuffer (#4453)
---
parquet/src/arrow/buffer/dictionary_buffer.rs | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/parquet/src/arrow/buffer/dictionary_buffer.rs
b/parquet/src/arrow/buffer/dictionary_buffer.rs
index 6344d9dd3..a0a47e3b9 100644
--- a/parquet/src/arrow/buffer/dictionary_buffer.rs
+++ b/parquet/src/arrow/buffer/dictionary_buffer.rs
@@ -152,8 +152,15 @@ impl<K: ScalarValue + ArrowNativeType + Ord, V:
ScalarValue + OffsetSizeTrait>
let min = K::from_usize(0).unwrap();
let max = K::from_usize(values.len()).unwrap();
- // It may be possible to use SIMD here
- if keys.as_slice().iter().any(|x| *x < min || *x >= max) {
+ // using copied and fold gets auto-vectorized since rust
1.70
+ // all/any would allow early exit on invalid values
+ // but in the happy case all values have to be checked
anyway
+ if !keys
+ .as_slice()
+ .iter()
+ .copied()
+ .fold(true, |a, x| a && x >= min && x < max)
+ {
return Err(general_err!(
"dictionary key beyond bounds of dictionary:
0..{}",
values.len()