(paimon) branch master updated: [python] fix data evolution with_slice out-of-bounds: return empty rows instead of all data (#7207)

lzljs3620320 Wed, 04 Feb 2026 00:53:38 -0800

This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git



The following commit(s) were added to refs/heads/master by this push:
     new 8f2a4ad962 [python] fix data evolution with_slice out-of-bounds: 
return empty rows instead of all data (#7207)
8f2a4ad962 is described below

commit 8f2a4ad9623175593209eaa61bc8dec25217121b
Author: XiaoHongbo <[email protected]>
AuthorDate: Wed Feb 4 16:53:27 2026 +0800

    [python] fix data evolution with_slice out-of-bounds: return empty rows 
instead of all data (#7207)
---
 .../pypaimon/read/scanner/data_evolution_split_generator.py   |  5 ++---
 paimon-python/pypaimon/tests/data_evolution_test.py           | 11 +++++++++++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git 
a/paimon-python/pypaimon/read/scanner/data_evolution_split_generator.py 
b/paimon-python/pypaimon/read/scanner/data_evolution_split_generator.py
index 6d39628635..d9329af302 100644
--- a/paimon-python/pypaimon/read/scanner/data_evolution_split_generator.py
+++ b/paimon-python/pypaimon/read/scanner/data_evolution_split_generator.py
@@ -69,9 +69,8 @@ class DataEvolutionSplitGenerator(AbstractSplitGenerator):
         if self.start_pos_of_this_subtask is not None:
             # Calculate Row ID range for slice-based filtering
             slice_row_ranges = 
self._calculate_slice_row_ranges(partitioned_files)
-            if slice_row_ranges:
-                # Filter files by Row ID range
-                partitioned_files = 
self._filter_files_by_row_ranges(partitioned_files, slice_row_ranges)
+            # Filter files by Row ID range
+            partitioned_files = 
self._filter_files_by_row_ranges(partitioned_files, slice_row_ranges)
         elif self.idx_of_this_subtask is not None:
             partitioned_files = self._filter_by_shard(
                 partitioned_files, self.idx_of_this_subtask, 
self.number_of_para_subtasks
diff --git a/paimon-python/pypaimon/tests/data_evolution_test.py 
b/paimon-python/pypaimon/tests/data_evolution_test.py
index f92034f367..a9f0de508b 100644
--- a/paimon-python/pypaimon/tests/data_evolution_test.py
+++ b/paimon-python/pypaimon/tests/data_evolution_test.py
@@ -226,6 +226,17 @@ class DataEvolutionTest(unittest.TestCase):
             "with_slice(1, 4) should return id in (2, 1001, 2001). Got ids=%s" 
% ids,
         )
 
+        # Out-of-bounds slice: 6 rows total, slice(10, 12) should return 0 rows
+        scan_oob = rb.new_scan().with_slice(10, 12)
+        splits_oob = scan_oob.plan().splits()
+        result_oob = rb.new_read().to_pandas(splits_oob)
+        self.assertEqual(
+            len(result_oob),
+            0,
+            "with_slice(10, 12) on 6 rows should return 0 rows (out of 
bounds), got %d"
+            % len(result_oob),
+        )
+
     def test_multiple_appends(self):
         simple_pa_schema = pa.schema([
             ('f0', pa.int32()),

(paimon) branch master updated: [python] fix data evolution with_slice out-of-bounds: return empty rows instead of all data (#7207)

Reply via email to