(datafusion) branch main updated: Add additional test coverage of multi-value PartitionPruningStats (#19021)

github-bot Mon, 01 Dec 2025 10:43:35 -0800

This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new e43e8bf134 Add additional test coverage of multi-value 
PartitionPruningStats (#19021)
e43e8bf134 is described below

commit e43e8bf13406790184ea0e29a79b52d0af81fa1e
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Dec 1 13:42:02 2025 -0500

    Add additional test coverage of multi-value PartitionPruningStats (#19021)
    
    ## Which issue does this PR close?
    
    - Follow on to https://github.com/apache/datafusion/pull/18923
    
    ## Rationale for this change
    
    I was confused about some of the tests for `PartitionPruningStatistics`
    so let's add some
    more comments to explain what it is doing, and add additional coverage
    for multi-value columns
    
    
    ## What changes are included in this PR?
    
    Add a new test
    
    ## Are these changes tested?
    
    Only tests
    ## Are there any user-facing changes?
    
    No
---
 datafusion/common/src/pruning.rs | 47 ++++++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/datafusion/common/src/pruning.rs b/datafusion/common/src/pruning.rs
index 8f82eada75..441b3b852d 100644
--- a/datafusion/common/src/pruning.rs
+++ b/datafusion/common/src/pruning.rs
@@ -493,8 +493,14 @@ mod tests {
     use arrow::datatypes::{DataType, Field};
     use std::sync::Arc;
 
-    #[test]
-    fn test_partition_pruning_statistics() {
+    /// return a PartitionPruningStatistics for two columns 'a' and 'b'
+    /// and the following stats
+    ///
+    /// | a | b |
+    /// | - | - |
+    /// | 1 | 2 |
+    /// | 3 | 4 |
+    fn partition_pruning_statistics_setup() -> PartitionPruningStatistics {
         let partition_values = vec![
             vec![ScalarValue::from(1i32), ScalarValue::from(2i32)],
             vec![ScalarValue::from(3i32), ScalarValue::from(4i32)],
@@ -503,9 +509,12 @@ mod tests {
             Arc::new(Field::new("a", DataType::Int32, false)),
             Arc::new(Field::new("b", DataType::Int32, false)),
         ];
-        let partition_stats =
-            PartitionPruningStatistics::try_new(partition_values, 
partition_fields)
-                .unwrap();
+        PartitionPruningStatistics::try_new(partition_values, 
partition_fields).unwrap()
+    }
+
+    #[test]
+    fn test_partition_pruning_statistics() {
+        let partition_stats = partition_pruning_statistics_setup();
 
         let column_a = Column::new_unqualified("a");
         let column_b = Column::new_unqualified("b");
@@ -562,26 +571,32 @@ mod tests {
 
     #[test]
     fn test_partition_pruning_statistics_multiple_positive_values() {
-        let partition_values = vec![
-            vec![ScalarValue::from(1i32), ScalarValue::from(2i32)],
-            vec![ScalarValue::from(3i32), ScalarValue::from(4i32)],
-        ];
-        let partition_fields = vec![
-            Arc::new(Field::new("a", DataType::Int32, false)),
-            Arc::new(Field::new("b", DataType::Int32, false)),
-        ];
-        let partition_stats =
-            PartitionPruningStatistics::try_new(partition_values, 
partition_fields)
-                .unwrap();
+        let partition_stats = partition_pruning_statistics_setup();
 
         let column_a = Column::new_unqualified("a");
 
+        // The two containers have `a` values 1 and 3, so they both only 
contain values from 1 and 3
         let values = HashSet::from([ScalarValue::from(1i32), 
ScalarValue::from(3i32)]);
         let contained_a = partition_stats.contained(&column_a, 
&values).unwrap();
         let expected_contained_a = BooleanArray::from(vec![true, true]);
         assert_eq!(contained_a, expected_contained_a);
     }
 
+    #[test]
+    fn test_partition_pruning_statistics_multiple_negative_values() {
+        let partition_stats = partition_pruning_statistics_setup();
+
+        let column_a = Column::new_unqualified("a");
+
+        // The two containers have `a` values 1 and 3,
+        // so the first contains ONLY values from 1,2
+        // but the second does not
+        let values = HashSet::from([ScalarValue::from(1i32), 
ScalarValue::from(2i32)]);
+        let contained_a = partition_stats.contained(&column_a, 
&values).unwrap();
+        let expected_contained_a = BooleanArray::from(vec![true, false]);
+        assert_eq!(contained_a, expected_contained_a);
+    }
+
     #[test]
     fn test_partition_pruning_statistics_null_in_values() {
         let partition_values = vec![


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion) branch main updated: Add additional test coverage of multi-value PartitionPruningStats (#19021)

Reply via email to