This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new eaf18d79f7 [python] do not prune file when null_count stats are
missing during isNull filter (#7250)
eaf18d79f7 is described below
commit eaf18d79f7d3111d90695b1c78de7972d4aacdac
Author: XiaoHongbo <[email protected]>
AuthorDate: Tue Feb 10 19:15:07 2026 +0800
[python] do not prune file when null_count stats are missing during isNull
filter (#7250)
---
paimon-python/pypaimon/common/predicate.py | 2 +-
paimon-python/pypaimon/tests/predicates_test.py | 31 +++++++++++++++++++++++--
2 files changed, 30 insertions(+), 3 deletions(-)
diff --git a/paimon-python/pypaimon/common/predicate.py
b/paimon-python/pypaimon/common/predicate.py
index c48dc6811c..e6f69934e9 100644
--- a/paimon-python/pypaimon/common/predicate.py
+++ b/paimon-python/pypaimon/common/predicate.py
@@ -76,7 +76,7 @@ class Predicate:
null_count = stat.null_counts[self.index]
if self.method == 'isNull':
- return null_count is not None and null_count > 0
+ return null_count is None or null_count > 0
if self.method == 'isNotNull':
return null_count is None or row_count is None or null_count <
row_count
diff --git a/paimon-python/pypaimon/tests/predicates_test.py
b/paimon-python/pypaimon/tests/predicates_test.py
index 1a260fbf2b..bceae05c4c 100644
--- a/paimon-python/pypaimon/tests/predicates_test.py
+++ b/paimon-python/pypaimon/tests/predicates_test.py
@@ -24,7 +24,9 @@ import pandas as pd
import pyarrow as pa
from pypaimon import CatalogFactory, Schema
-from pypaimon.table.row.generic_row import GenericRowDeserializer
+from pypaimon.common.predicate import Predicate
+from pypaimon.manifest.schema.simple_stats import SimpleStats
+from pypaimon.table.row.generic_row import GenericRow, GenericRowDeserializer
def _check_filtered_result(read_builder, expected_df):
@@ -373,8 +375,33 @@ class PredicateTest(unittest.TestCase):
_check_filtered_result(table.new_read_builder().with_filter(predicate),
self.df.loc[[0, 3, 4]])
+ def test_is_null(self):
+ stat_no_count = SimpleStats(
+ min_values=GenericRow([], []),
+ max_values=GenericRow([], []),
+ null_counts=[None],
+ )
+ pred = Predicate(method="isNull", index=0, field="c", literals=None)
+ self.assertTrue(
+ pred.test_by_simple_stats(stat_no_count, 10),
+ "isNull must keep file when null_count is missing",
+ )
+ # null_count == 0 -> can prune
+ stat_zero = SimpleStats(
+ min_values=GenericRow([], []),
+ max_values=GenericRow([], []),
+ null_counts=[0],
+ )
+ self.assertFalse(pred.test_by_simple_stats(stat_zero, 10))
+ # null_count > 0 -> keep
+ stat_positive = SimpleStats(
+ min_values=GenericRow([], []),
+ max_values=GenericRow([], []),
+ null_counts=[3],
+ )
+ self.assertTrue(pred.test_by_simple_stats(stat_positive, 10))
+
def test_filter_with_null_and_or(self):
- from pypaimon.common.predicate import Predicate
from pypaimon.table.row.offset_row import OffsetRow
p_gt = Predicate(method='greaterThan', index=1, field='score',
literals=[10])