This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 0fd82ce70a [python] Fix null handling when filter with or expression 
(#7234)
0fd82ce70a is described below

commit 0fd82ce70a807909fc5fa4887f1a78a8cf5f2f91
Author: XiaoHongbo <[email protected]>
AuthorDate: Mon Feb 9 10:56:26 2026 +0800

    [python] Fix null handling when filter with or expression (#7234)
---
 paimon-python/pypaimon/common/predicate.py      | 24 ++++++++++++++++++++++++
 paimon-python/pypaimon/tests/predicates_test.py | 19 ++++++++++++++++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/paimon-python/pypaimon/common/predicate.py 
b/paimon-python/pypaimon/common/predicate.py
index 9ae2cdfce3..c48dc6811c 100644
--- a/paimon-python/pypaimon/common/predicate.py
+++ b/paimon-python/pypaimon/common/predicate.py
@@ -178,6 +178,8 @@ class Equal(Tester):
     name = 'equal'
 
     def test_by_value(self, val, literals) -> bool:
+        if val is None or not literals:
+            return False
         return val == literals[0]
 
     def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -191,6 +193,8 @@ class NotEqual(Tester):
     name = "notEqual"
 
     def test_by_value(self, val, literals) -> bool:
+        if val is None or not literals:
+            return False
         return val != literals[0]
 
     def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -204,6 +208,8 @@ class LessThan(Tester):
     name = "lessThan"
 
     def test_by_value(self, val, literals) -> bool:
+        if val is None or not literals:
+            return False
         return val < literals[0]
 
     def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -217,6 +223,8 @@ class LessOrEqual(Tester):
     name = "lessOrEqual"
 
     def test_by_value(self, val, literals) -> bool:
+        if val is None or not literals:
+            return False
         return val <= literals[0]
 
     def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -230,6 +238,8 @@ class GreaterThan(Tester):
     name = "greaterThan"
 
     def test_by_value(self, val, literals) -> bool:
+        if val is None or not literals:
+            return False
         return val > literals[0]
 
     def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -243,6 +253,8 @@ class GreaterOrEqual(Tester):
     name = "greaterOrEqual"
 
     def test_by_value(self, val, literals) -> bool:
+        if val is None or not literals:
+            return False
         return val >= literals[0]
 
     def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -256,6 +268,8 @@ class In(Tester):
     name = "in"
 
     def test_by_value(self, val, literals) -> bool:
+        if val is None:
+            return False
         return val in literals
 
     def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -269,6 +283,8 @@ class NotIn(Tester):
     name = "notIn"
 
     def test_by_value(self, val, literals) -> bool:
+        if val is None:
+            return False
         return val not in literals
 
     def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -282,6 +298,8 @@ class Between(Tester):
     name = "between"
 
     def test_by_value(self, val, literals) -> bool:
+        if val is None or not literals or len(literals) < 2:
+            return False
         return literals[0] <= val <= literals[1]
 
     def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -295,6 +313,8 @@ class StartsWith(Tester):
     name = "startsWith"
 
     def test_by_value(self, val, literals) -> bool:
+        if val is None or not literals:
+            return False
         return isinstance(val, str) and val.startswith(literals[0])
 
     def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -310,6 +330,8 @@ class EndsWith(Tester):
     name = "endsWith"
 
     def test_by_value(self, val, literals) -> bool:
+        if val is None or not literals:
+            return False
         return isinstance(val, str) and val.endswith(literals[0])
 
     def test_by_stats(self, min_v, max_v, literals) -> bool:
@@ -323,6 +345,8 @@ class Contains(Tester):
     name = "contains"
 
     def test_by_value(self, val, literals) -> bool:
+        if val is None or not literals:
+            return False
         return isinstance(val, str) and literals[0] in val
 
     def test_by_stats(self, min_v, max_v, literals) -> bool:
diff --git a/paimon-python/pypaimon/tests/predicates_test.py 
b/paimon-python/pypaimon/tests/predicates_test.py
index f54a18dd93..1a260fbf2b 100644
--- a/paimon-python/pypaimon/tests/predicates_test.py
+++ b/paimon-python/pypaimon/tests/predicates_test.py
@@ -342,7 +342,7 @@ class PredicateTest(unittest.TestCase):
         table = self.catalog.get_table('default.test_pk')
         predicate_builder = table.new_read_builder().new_predicate_builder()
         predicate = predicate_builder.is_not_in('f1', ['abc', 'abbc'])
-        
_check_filtered_result(table.new_read_builder().with_filter(predicate), 
self.df.loc[2:4])
+        
_check_filtered_result(table.new_read_builder().with_filter(predicate), 
self.df.loc[[2, 3]])
 
     def test_between_append(self):
         table = self.catalog.get_table('default.test_append')
@@ -373,6 +373,23 @@ class PredicateTest(unittest.TestCase):
         _check_filtered_result(table.new_read_builder().with_filter(predicate),
                                self.df.loc[[0, 3, 4]])
 
+    def test_filter_with_null_and_or(self):
+        from pypaimon.common.predicate import Predicate
+        from pypaimon.table.row.offset_row import OffsetRow
+
+        p_gt = Predicate(method='greaterThan', index=1, field='score', 
literals=[10])
+        p_null = Predicate(method='isNull', index=1, field='score', 
literals=[])
+        predicate = Predicate(method='or', index=None, field=None, 
literals=[p_gt, p_null])
+
+        record_null = OffsetRow([1, None], 0, 2)  # id=1, score=None
+        self.assertTrue(predicate.test(record_null))
+
+        record_ok = OffsetRow([1, 15], 0, 2)
+        self.assertTrue(predicate.test(record_ok))
+
+        predicate_safe = Predicate(method='or', index=None, field=None, 
literals=[p_null, p_gt])
+        self.assertTrue(predicate_safe.test(record_null))
+
     def test_pk_reader_with_filter(self):
         pa_schema = pa.schema([
             pa.field('key1', pa.int32(), nullable=False),

Reply via email to