This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 0fd82ce70a [python] Fix null handling when filter with or expression
(#7234)
0fd82ce70a is described below
commit 0fd82ce70a807909fc5fa4887f1a78a8cf5f2f91
Author: XiaoHongbo <[email protected]>
AuthorDate: Mon Feb 9 10:56:26 2026 +0800
[python] Fix null handling when filter with or expression (#7234)
---
paimon-python/pypaimon/common/predicate.py | 24 ++++++++++++++++++++++++
paimon-python/pypaimon/tests/predicates_test.py | 19 ++++++++++++++++++-
2 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/paimon-python/pypaimon/common/predicate.py
b/paimon-python/pypaimon/common/predicate.py
index 9ae2cdfce3..c48dc6811c 100644
--- a/paimon-python/pypaimon/common/predicate.py
+++ b/paimon-python/pypaimon/common/predicate.py
@@ -178,6 +178,8 @@ class Equal(Tester):
name = 'equal'
def test_by_value(self, val, literals) -> bool:
+ if val is None or not literals:
+ return False
return val == literals[0]
def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -191,6 +193,8 @@ class NotEqual(Tester):
name = "notEqual"
def test_by_value(self, val, literals) -> bool:
+ if val is None or not literals:
+ return False
return val != literals[0]
def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -204,6 +208,8 @@ class LessThan(Tester):
name = "lessThan"
def test_by_value(self, val, literals) -> bool:
+ if val is None or not literals:
+ return False
return val < literals[0]
def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -217,6 +223,8 @@ class LessOrEqual(Tester):
name = "lessOrEqual"
def test_by_value(self, val, literals) -> bool:
+ if val is None or not literals:
+ return False
return val <= literals[0]
def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -230,6 +238,8 @@ class GreaterThan(Tester):
name = "greaterThan"
def test_by_value(self, val, literals) -> bool:
+ if val is None or not literals:
+ return False
return val > literals[0]
def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -243,6 +253,8 @@ class GreaterOrEqual(Tester):
name = "greaterOrEqual"
def test_by_value(self, val, literals) -> bool:
+ if val is None or not literals:
+ return False
return val >= literals[0]
def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -256,6 +268,8 @@ class In(Tester):
name = "in"
def test_by_value(self, val, literals) -> bool:
+ if val is None:
+ return False
return val in literals
def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -269,6 +283,8 @@ class NotIn(Tester):
name = "notIn"
def test_by_value(self, val, literals) -> bool:
+ if val is None:
+ return False
return val not in literals
def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -282,6 +298,8 @@ class Between(Tester):
name = "between"
def test_by_value(self, val, literals) -> bool:
+ if val is None or not literals or len(literals) < 2:
+ return False
return literals[0] <= val <= literals[1]
def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -295,6 +313,8 @@ class StartsWith(Tester):
name = "startsWith"
def test_by_value(self, val, literals) -> bool:
+ if val is None or not literals:
+ return False
return isinstance(val, str) and val.startswith(literals[0])
def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -310,6 +330,8 @@ class EndsWith(Tester):
name = "endsWith"
def test_by_value(self, val, literals) -> bool:
+ if val is None or not literals:
+ return False
return isinstance(val, str) and val.endswith(literals[0])
def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -323,6 +345,8 @@ class Contains(Tester):
name = "contains"
def test_by_value(self, val, literals) -> bool:
+ if val is None or not literals:
+ return False
return isinstance(val, str) and literals[0] in val
def test_by_stats(self, min_v, max_v, literals) -> bool:
diff --git a/paimon-python/pypaimon/tests/predicates_test.py
b/paimon-python/pypaimon/tests/predicates_test.py
index f54a18dd93..1a260fbf2b 100644
--- a/paimon-python/pypaimon/tests/predicates_test.py
+++ b/paimon-python/pypaimon/tests/predicates_test.py
@@ -342,7 +342,7 @@ class PredicateTest(unittest.TestCase):
table = self.catalog.get_table('default.test_pk')
predicate_builder = table.new_read_builder().new_predicate_builder()
predicate = predicate_builder.is_not_in('f1', ['abc', 'abbc'])
-
_check_filtered_result(table.new_read_builder().with_filter(predicate),
self.df.loc[2:4])
+
_check_filtered_result(table.new_read_builder().with_filter(predicate),
self.df.loc[[2, 3]])
def test_between_append(self):
table = self.catalog.get_table('default.test_append')
@@ -373,6 +373,23 @@ class PredicateTest(unittest.TestCase):
_check_filtered_result(table.new_read_builder().with_filter(predicate),
self.df.loc[[0, 3, 4]])
+ def test_filter_with_null_and_or(self):
+ from pypaimon.common.predicate import Predicate
+ from pypaimon.table.row.offset_row import OffsetRow
+
+ p_gt = Predicate(method='greaterThan', index=1, field='score',
literals=[10])
+ p_null = Predicate(method='isNull', index=1, field='score',
literals=[])
+ predicate = Predicate(method='or', index=None, field=None,
literals=[p_gt, p_null])
+
+ record_null = OffsetRow([1, None], 0, 2) # id=1, score=None
+ self.assertTrue(predicate.test(record_null))
+
+ record_ok = OffsetRow([1, 15], 0, 2)
+ self.assertTrue(predicate.test(record_ok))
+
+ predicate_safe = Predicate(method='or', index=None, field=None,
literals=[p_null, p_gt])
+ self.assertTrue(predicate_safe.test(record_null))
+
def test_pk_reader_with_filter(self):
pa_schema = pa.schema([
pa.field('key1', pa.int32(), nullable=False),