This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 8f2a4ad962 [python] fix data evolution with_slice out-of-bounds:
return empty rows instead of all data (#7207)
8f2a4ad962 is described below
commit 8f2a4ad9623175593209eaa61bc8dec25217121b
Author: XiaoHongbo <[email protected]>
AuthorDate: Wed Feb 4 16:53:27 2026 +0800
[python] fix data evolution with_slice out-of-bounds: return empty rows
instead of all data (#7207)
---
.../pypaimon/read/scanner/data_evolution_split_generator.py | 5 ++---
paimon-python/pypaimon/tests/data_evolution_test.py | 11 +++++++++++
2 files changed, 13 insertions(+), 3 deletions(-)
diff --git
a/paimon-python/pypaimon/read/scanner/data_evolution_split_generator.py
b/paimon-python/pypaimon/read/scanner/data_evolution_split_generator.py
index 6d39628635..d9329af302 100644
--- a/paimon-python/pypaimon/read/scanner/data_evolution_split_generator.py
+++ b/paimon-python/pypaimon/read/scanner/data_evolution_split_generator.py
@@ -69,9 +69,8 @@ class DataEvolutionSplitGenerator(AbstractSplitGenerator):
if self.start_pos_of_this_subtask is not None:
# Calculate Row ID range for slice-based filtering
slice_row_ranges =
self._calculate_slice_row_ranges(partitioned_files)
- if slice_row_ranges:
- # Filter files by Row ID range
- partitioned_files =
self._filter_files_by_row_ranges(partitioned_files, slice_row_ranges)
+ # Filter files by Row ID range
+ partitioned_files =
self._filter_files_by_row_ranges(partitioned_files, slice_row_ranges)
elif self.idx_of_this_subtask is not None:
partitioned_files = self._filter_by_shard(
partitioned_files, self.idx_of_this_subtask,
self.number_of_para_subtasks
diff --git a/paimon-python/pypaimon/tests/data_evolution_test.py
b/paimon-python/pypaimon/tests/data_evolution_test.py
index f92034f367..a9f0de508b 100644
--- a/paimon-python/pypaimon/tests/data_evolution_test.py
+++ b/paimon-python/pypaimon/tests/data_evolution_test.py
@@ -226,6 +226,17 @@ class DataEvolutionTest(unittest.TestCase):
"with_slice(1, 4) should return id in (2, 1001, 2001). Got ids=%s"
% ids,
)
+ # Out-of-bounds slice: 6 rows total, slice(10, 12) should return 0 rows
+ scan_oob = rb.new_scan().with_slice(10, 12)
+ splits_oob = scan_oob.plan().splits()
+ result_oob = rb.new_read().to_pandas(splits_oob)
+ self.assertEqual(
+ len(result_oob),
+ 0,
+ "with_slice(10, 12) on 6 rows should return 0 rows (out of
bounds), got %d"
+ % len(result_oob),
+ )
+
def test_multiple_appends(self):
simple_pa_schema = pa.schema([
('f0', pa.int32()),