This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new e43e8bf134 Add additional test coverage of multi-value
PartitionPruningStats (#19021)
e43e8bf134 is described below
commit e43e8bf13406790184ea0e29a79b52d0af81fa1e
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Dec 1 13:42:02 2025 -0500
Add additional test coverage of multi-value PartitionPruningStats (#19021)
## Which issue does this PR close?
- Follow on to https://github.com/apache/datafusion/pull/18923
## Rationale for this change
I was confused about some of the tests for `PartitionPruningStatistics`
so let's add some
more comments to explain what it is doing, and add additional coverage
for multi-value columns
## What changes are included in this PR?
Add a new test
## Are these changes tested?
Only tests
## Are there any user-facing changes?
No
---
datafusion/common/src/pruning.rs | 47 ++++++++++++++++++++++++++--------------
1 file changed, 31 insertions(+), 16 deletions(-)
diff --git a/datafusion/common/src/pruning.rs b/datafusion/common/src/pruning.rs
index 8f82eada75..441b3b852d 100644
--- a/datafusion/common/src/pruning.rs
+++ b/datafusion/common/src/pruning.rs
@@ -493,8 +493,14 @@ mod tests {
use arrow::datatypes::{DataType, Field};
use std::sync::Arc;
- #[test]
- fn test_partition_pruning_statistics() {
+ /// return a PartitionPruningStatistics for two columns 'a' and 'b'
+ /// and the following stats
+ ///
+ /// | a | b |
+ /// | - | - |
+ /// | 1 | 2 |
+ /// | 3 | 4 |
+ fn partition_pruning_statistics_setup() -> PartitionPruningStatistics {
let partition_values = vec![
vec![ScalarValue::from(1i32), ScalarValue::from(2i32)],
vec![ScalarValue::from(3i32), ScalarValue::from(4i32)],
@@ -503,9 +509,12 @@ mod tests {
Arc::new(Field::new("a", DataType::Int32, false)),
Arc::new(Field::new("b", DataType::Int32, false)),
];
- let partition_stats =
- PartitionPruningStatistics::try_new(partition_values,
partition_fields)
- .unwrap();
+ PartitionPruningStatistics::try_new(partition_values,
partition_fields).unwrap()
+ }
+
+ #[test]
+ fn test_partition_pruning_statistics() {
+ let partition_stats = partition_pruning_statistics_setup();
let column_a = Column::new_unqualified("a");
let column_b = Column::new_unqualified("b");
@@ -562,26 +571,32 @@ mod tests {
#[test]
fn test_partition_pruning_statistics_multiple_positive_values() {
- let partition_values = vec![
- vec![ScalarValue::from(1i32), ScalarValue::from(2i32)],
- vec![ScalarValue::from(3i32), ScalarValue::from(4i32)],
- ];
- let partition_fields = vec![
- Arc::new(Field::new("a", DataType::Int32, false)),
- Arc::new(Field::new("b", DataType::Int32, false)),
- ];
- let partition_stats =
- PartitionPruningStatistics::try_new(partition_values,
partition_fields)
- .unwrap();
+ let partition_stats = partition_pruning_statistics_setup();
let column_a = Column::new_unqualified("a");
+ // The two containers have `a` values 1 and 3, so they both only
contain values from 1 and 3
let values = HashSet::from([ScalarValue::from(1i32),
ScalarValue::from(3i32)]);
let contained_a = partition_stats.contained(&column_a,
&values).unwrap();
let expected_contained_a = BooleanArray::from(vec![true, true]);
assert_eq!(contained_a, expected_contained_a);
}
+ #[test]
+ fn test_partition_pruning_statistics_multiple_negative_values() {
+ let partition_stats = partition_pruning_statistics_setup();
+
+ let column_a = Column::new_unqualified("a");
+
+ // The two containers have `a` values 1 and 3,
+ // so the first contains ONLY values from 1,2
+ // but the second does not
+ let values = HashSet::from([ScalarValue::from(1i32),
ScalarValue::from(2i32)]);
+ let contained_a = partition_stats.contained(&column_a,
&values).unwrap();
+ let expected_contained_a = BooleanArray::from(vec![true, false]);
+ assert_eq!(contained_a, expected_contained_a);
+ }
+
#[test]
fn test_partition_pruning_statistics_null_in_values() {
let partition_values = vec![
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]