This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 77078566bd [Python] fix oss file_io client_kwargs for pyarrow 16.0
(#6255)
77078566bd is described below
commit 77078566bd9212dba54e348a27aa68a74c93fc49
Author: ChengHui Chen <[email protected]>
AuthorDate: Mon Sep 15 15:59:31 2025 +0800
[Python] fix oss file_io client_kwargs for pyarrow 16.0 (#6255)
---
paimon-python/pypaimon/common/config.py | 1 +
paimon-python/pypaimon/common/file_io.py | 12 ++++++++++--
2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/paimon-python/pypaimon/common/config.py
b/paimon-python/pypaimon/common/config.py
index 0478c207bb..b3c9a673b3 100644
--- a/paimon-python/pypaimon/common/config.py
+++ b/paimon-python/pypaimon/common/config.py
@@ -21,6 +21,7 @@ class OssOptions:
OSS_SECURITY_TOKEN = "fs.oss.securityToken"
OSS_ENDPOINT = "fs.oss.endpoint"
OSS_REGION = "fs.oss.region"
+ OSS_BUCKET = "fs.oss.bucket"
class S3Options:
diff --git a/paimon-python/pypaimon/common/file_io.py
b/paimon-python/pypaimon/common/file_io.py
index 3ec930b24b..809d65338c 100644
--- a/paimon-python/pypaimon/common/file_io.py
+++ b/paimon-python/pypaimon/common/file_io.py
@@ -24,6 +24,7 @@ from typing import Any, Dict, List, Optional
from urllib.parse import splitport, urlparse
import pyarrow
+from packaging.version import parse
from pyarrow._fs import FileSystem
from pypaimon.common.config import OssOptions, S3Options
@@ -57,15 +58,22 @@ class FileIO:
def _initialize_oss_fs(self) -> FileSystem:
from pyarrow.fs import S3FileSystem
- bucket_name = self.properties.get("prefix")
+
client_kwargs = {
- "endpoint_override": bucket_name + "." +
self.properties.get(OssOptions.OSS_ENDPOINT),
"access_key": self.properties.get(OssOptions.OSS_ACCESS_KEY_ID),
"secret_key":
self.properties.get(OssOptions.OSS_ACCESS_KEY_SECRET),
"session_token":
self.properties.get(OssOptions.OSS_SECURITY_TOKEN),
"region": self.properties.get(OssOptions.OSS_REGION),
}
+ # Based on https://github.com/apache/arrow/issues/40506
+ if parse(pyarrow.__version__) >= parse("7.0.0"):
+ client_kwargs['force_virtual_addressing'] = True
+ client_kwargs['endpoint_override'] =
self.properties.get(OssOptions.OSS_ENDPOINT)
+ else:
+ client_kwargs['endpoint_override'] =
(self.properties.get(OssOptions.OSS_BUCKET) + "." +
+
self.properties.get(OssOptions.OSS_ENDPOINT))
+
return S3FileSystem(**client_kwargs)
def _initialize_s3_fs(self) -> FileSystem: