This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new bd75582fd Optimize `min_boolean` and `bool_and` (#6144)
bd75582fd is described below
commit bd75582fd08813949030d5a2303a80fb463be76f
Author: Simon Vandel Sillesen <[email protected]>
AuthorDate: Thu Aug 8 13:43:08 2024 +0200
Optimize `min_boolean` and `bool_and` (#6144)
* Optimize `min_boolean` and `bool_and`
Closes #https://github.com/apache/arrow-rs/issues/6103
* use any
---
arrow-arith/src/aggregate.rs | 58 +++++++++++++++++++++++++++++++++++++-------
1 file changed, 49 insertions(+), 9 deletions(-)
diff --git a/arrow-arith/src/aggregate.rs b/arrow-arith/src/aggregate.rs
index a81e7fc1a..a4915f589 100644
--- a/arrow-arith/src/aggregate.rs
+++ b/arrow-arith/src/aggregate.rs
@@ -350,11 +350,46 @@ pub fn min_boolean(array: &BooleanArray) -> Option<bool> {
}
// Note the min bool is false (0), so short circuit as soon as we see it
- array
- .iter()
- .find(|&b| b == Some(false))
- .flatten()
- .or(Some(true))
+ match array.nulls() {
+ None => {
+ let bit_chunks = array.values().bit_chunks();
+ if bit_chunks.iter().any(|x| {
+ // u64::MAX has all bits set, so if the value is not that,
then there is a false
+ x != u64::MAX
+ }) {
+ return Some(false);
+ }
+ // If the remainder bits are not all set, then there is a false
+ if bit_chunks.remainder_bits().count_ones() as usize !=
bit_chunks.remainder_len() {
+ Some(false)
+ } else {
+ Some(true)
+ }
+ }
+ Some(nulls) => {
+ let validity_chunks = nulls.inner().bit_chunks();
+ let value_chunks = array.values().bit_chunks();
+
+ if value_chunks
+ .iter()
+ .zip(validity_chunks.iter())
+ .any(|(value, validity)| {
+ // We are looking for a false value, but because applying
the validity mask
+ // can create a false for a true value (e.g. value: true,
validity: false), we instead invert the value, so that we have to look for a
true.
+ (!value & validity) != 0
+ })
+ {
+ return Some(false);
+ }
+
+ // Same trick as above: Instead of looking for a false, we invert
the value bits and look for a true
+ if (!value_chunks.remainder_bits() &
validity_chunks.remainder_bits()) != 0 {
+ Some(false)
+ } else {
+ Some(true)
+ }
+ }
+ }
}
/// Returns the maximum value in the boolean array
@@ -707,10 +742,7 @@ bit_operation!(
///
/// Returns `None` if the array is empty or only contains null values.
pub fn bool_and(array: &BooleanArray) -> Option<bool> {
- if array.null_count() == array.len() {
- return None;
- }
- Some(array.false_count() == 0)
+ min_boolean(array)
}
/// Returns true if any non-null input value is true, otherwise false.
@@ -1382,6 +1414,14 @@ mod tests {
let a = BooleanArray::from(vec![Some(false), None]);
assert_eq!(Some(false), min_boolean(&a));
assert_eq!(Some(false), max_boolean(&a));
+
+ let a = BooleanArray::from(vec![Some(true)]);
+ assert_eq!(Some(true), min_boolean(&a));
+ assert_eq!(Some(true), max_boolean(&a));
+
+ let a = BooleanArray::from(vec![Some(false)]);
+ assert_eq!(Some(false), min_boolean(&a));
+ assert_eq!(Some(false), max_boolean(&a));
}
#[test]