(paimon) branch master updated: [python] do not prune file when null_count stats are missing during isNull filter (#7250)

lzljs3620320 Tue, 10 Feb 2026 03:15:22 -0800

This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git



The following commit(s) were added to refs/heads/master by this push:
     new eaf18d79f7 [python] do not prune file when null_count stats are 
missing during isNull filter (#7250)
eaf18d79f7 is described below

commit eaf18d79f7d3111d90695b1c78de7972d4aacdac
Author: XiaoHongbo <[email protected]>
AuthorDate: Tue Feb 10 19:15:07 2026 +0800

    [python] do not prune file when null_count stats are missing during isNull 
filter (#7250)
---
 paimon-python/pypaimon/common/predicate.py      |  2 +-
 paimon-python/pypaimon/tests/predicates_test.py | 31 +++++++++++++++++++++++--
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/paimon-python/pypaimon/common/predicate.py 
b/paimon-python/pypaimon/common/predicate.py
index c48dc6811c..e6f69934e9 100644
--- a/paimon-python/pypaimon/common/predicate.py
+++ b/paimon-python/pypaimon/common/predicate.py
@@ -76,7 +76,7 @@ class Predicate:
         null_count = stat.null_counts[self.index]
 
         if self.method == 'isNull':
-            return null_count is not None and null_count > 0
+            return null_count is None or null_count > 0
         if self.method == 'isNotNull':
             return null_count is None or row_count is None or null_count < 
row_count
 
diff --git a/paimon-python/pypaimon/tests/predicates_test.py 
b/paimon-python/pypaimon/tests/predicates_test.py
index 1a260fbf2b..bceae05c4c 100644
--- a/paimon-python/pypaimon/tests/predicates_test.py
+++ b/paimon-python/pypaimon/tests/predicates_test.py
@@ -24,7 +24,9 @@ import pandas as pd
 import pyarrow as pa
 
 from pypaimon import CatalogFactory, Schema
-from pypaimon.table.row.generic_row import GenericRowDeserializer
+from pypaimon.common.predicate import Predicate
+from pypaimon.manifest.schema.simple_stats import SimpleStats
+from pypaimon.table.row.generic_row import GenericRow, GenericRowDeserializer
 
 
 def _check_filtered_result(read_builder, expected_df):
@@ -373,8 +375,33 @@ class PredicateTest(unittest.TestCase):
         _check_filtered_result(table.new_read_builder().with_filter(predicate),
                                self.df.loc[[0, 3, 4]])
 
+    def test_is_null(self):
+        stat_no_count = SimpleStats(
+            min_values=GenericRow([], []),
+            max_values=GenericRow([], []),
+            null_counts=[None],
+        )
+        pred = Predicate(method="isNull", index=0, field="c", literals=None)
+        self.assertTrue(
+            pred.test_by_simple_stats(stat_no_count, 10),
+            "isNull must keep file when null_count is missing",
+        )
+        # null_count == 0 -> can prune
+        stat_zero = SimpleStats(
+            min_values=GenericRow([], []),
+            max_values=GenericRow([], []),
+            null_counts=[0],
+        )
+        self.assertFalse(pred.test_by_simple_stats(stat_zero, 10))
+        # null_count > 0 -> keep
+        stat_positive = SimpleStats(
+            min_values=GenericRow([], []),
+            max_values=GenericRow([], []),
+            null_counts=[3],
+        )
+        self.assertTrue(pred.test_by_simple_stats(stat_positive, 10))
+
     def test_filter_with_null_and_or(self):
-        from pypaimon.common.predicate import Predicate
         from pypaimon.table.row.offset_row import OffsetRow
 
         p_gt = Predicate(method='greaterThan', index=1, field='score', 
literals=[10])

(paimon) branch master updated: [python] do not prune file when null_count stats are missing during isNull filter (#7250)

Reply via email to