This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 9661f05741 [python] fix ray dataset not lazy loading issue when
parallelism = 1 (#6916)
9661f05741 is described below
commit 9661f05741d39131c66158c89d433e24a3198b72
Author: XiaoHongbo <[email protected]>
AuthorDate: Mon Dec 29 00:30:43 2025 +0800
[python] fix ray dataset not lazy loading issue when parallelism = 1 (#6916)
---
paimon-python/pypaimon/read/table_read.py | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/paimon-python/pypaimon/read/table_read.py
b/paimon-python/pypaimon/read/table_read.py
index 6487610cc6..626cbc2be7 100644
--- a/paimon-python/pypaimon/read/table_read.py
+++ b/paimon-python/pypaimon/read/table_read.py
@@ -144,14 +144,9 @@ class TableRead:
if parallelism < 1:
raise ValueError(f"parallelism must be at least 1, got
{parallelism}")
- if parallelism == 1:
- # Single-task read (simple mode)
- return ray.data.from_arrow(self.to_arrow(splits))
- else:
- # Distributed read with specified parallelism
- from pypaimon.read.ray_datasource import PaimonDatasource
- datasource = PaimonDatasource(self, splits)
- return ray.data.read_datasource(datasource,
parallelism=parallelism)
+ from pypaimon.read.ray_datasource import PaimonDatasource
+ datasource = PaimonDatasource(self, splits)
+ return ray.data.read_datasource(datasource, parallelism=parallelism)
def _create_split_read(self, split: Split) -> SplitRead:
if self.table.is_primary_key_table and not split.raw_convertible: