This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 0f1133e9f8 fix: partition pruning stats pruning when multiple values
are present (#18923)
0f1133e9f8 is described below
commit 0f1133e9f828309562751ac2f3607ab42e7685c6
Author: Nimalan <[email protected]>
AuthorDate: Mon Dec 1 20:07:55 2025 +0530
fix: partition pruning stats pruning when multiple values are present
(#18923)
- Closes #18922
---------
Signed-off-by: Nimalan <[email protected]>
---
datafusion/common/src/pruning.rs | 75 +++++++++++++++++++++++++++++++++++++++-
1 file changed, 74 insertions(+), 1 deletion(-)
diff --git a/datafusion/common/src/pruning.rs b/datafusion/common/src/pruning.rs
index 4307b4d786..8f82eada75 100644
--- a/datafusion/common/src/pruning.rs
+++ b/datafusion/common/src/pruning.rs
@@ -245,7 +245,7 @@ impl PruningStatistics for PartitionPruningStatistics {
match acc {
None => Some(Some(eq_result)),
Some(acc_array) => {
- arrow::compute::kernels::boolean::and(&acc_array,
&eq_result)
+ arrow::compute::kernels::boolean::or_kleene(&acc_array,
&eq_result)
.map(Some)
.ok()
}
@@ -560,6 +560,79 @@ mod tests {
assert_eq!(partition_stats.num_containers(), 2);
}
+ #[test]
+ fn test_partition_pruning_statistics_multiple_positive_values() {
+ let partition_values = vec![
+ vec![ScalarValue::from(1i32), ScalarValue::from(2i32)],
+ vec![ScalarValue::from(3i32), ScalarValue::from(4i32)],
+ ];
+ let partition_fields = vec![
+ Arc::new(Field::new("a", DataType::Int32, false)),
+ Arc::new(Field::new("b", DataType::Int32, false)),
+ ];
+ let partition_stats =
+ PartitionPruningStatistics::try_new(partition_values,
partition_fields)
+ .unwrap();
+
+ let column_a = Column::new_unqualified("a");
+
+ let values = HashSet::from([ScalarValue::from(1i32),
ScalarValue::from(3i32)]);
+ let contained_a = partition_stats.contained(&column_a,
&values).unwrap();
+ let expected_contained_a = BooleanArray::from(vec![true, true]);
+ assert_eq!(contained_a, expected_contained_a);
+ }
+
+ #[test]
+ fn test_partition_pruning_statistics_null_in_values() {
+ let partition_values = vec![
+ vec![
+ ScalarValue::from(1i32),
+ ScalarValue::from(2i32),
+ ScalarValue::from(3i32),
+ ],
+ vec![
+ ScalarValue::from(4i32),
+ ScalarValue::from(5i32),
+ ScalarValue::from(6i32),
+ ],
+ ];
+ let partition_fields = vec![
+ Arc::new(Field::new("a", DataType::Int32, false)),
+ Arc::new(Field::new("b", DataType::Int32, false)),
+ Arc::new(Field::new("c", DataType::Int32, false)),
+ ];
+ let partition_stats =
+ PartitionPruningStatistics::try_new(partition_values,
partition_fields)
+ .unwrap();
+
+ let column_a = Column::new_unqualified("a");
+ let column_b = Column::new_unqualified("b");
+ let column_c = Column::new_unqualified("c");
+
+ let values_a = HashSet::from([ScalarValue::from(1i32),
ScalarValue::Int32(None)]);
+ let contained_a = partition_stats.contained(&column_a,
&values_a).unwrap();
+ let mut builder = BooleanArray::builder(2);
+ builder.append_value(true);
+ builder.append_null();
+ let expected_contained_a = builder.finish();
+ assert_eq!(contained_a, expected_contained_a);
+
+ // First match creates a NULL boolean array
+ // The accumulator should update the value to true for the second value
+ let values_b = HashSet::from([ScalarValue::Int32(None),
ScalarValue::from(5i32)]);
+ let contained_b = partition_stats.contained(&column_b,
&values_b).unwrap();
+ let mut builder = BooleanArray::builder(2);
+ builder.append_null();
+ builder.append_value(true);
+ let expected_contained_b = builder.finish();
+ assert_eq!(contained_b, expected_contained_b);
+
+ // All matches are null, contained should return None
+ let values_c = HashSet::from([ScalarValue::Int32(None)]);
+ let contained_c = partition_stats.contained(&column_c, &values_c);
+ assert!(contained_c.is_none());
+ }
+
#[test]
fn test_partition_pruning_statistics_empty() {
let partition_values = vec![];
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]