This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 65ff5c56c5 [python] Auto check code style (#5917)
65ff5c56c5 is described below
commit 65ff5c56c5a0e0b733aa2931d59fd2bf90068440
Author: HeavenZH <[email protected]>
AuthorDate: Fri Jul 18 18:06:13 2025 +0800
[python] Auto check code style (#5917)
---
.github/workflows/paimon-python-checks.yml | 2 +-
paimon-python/dev/{cfg.ini => auto-check.sh} | 16 ++-
paimon-python/dev/cfg.ini | 9 ++
paimon-python/dev/lint-python.sh | 11 +-
paimon-python/pypaimon/api/__init__.py | 110 ++++++++++++-------
paimon-python/pypaimon/api/api_response.py | 79 ++++++++------
paimon-python/pypaimon/api/auth.py | 156 +++++++++++++++------------
paimon-python/pypaimon/api/data_types.py | 139 ++++++++++++++----------
paimon-python/pypaimon/api/rest_json.py | 10 +-
paimon-python/pypaimon/api/typedef.py | 11 +-
paimon-python/pypaimon/tests/api_test.py | 1 -
paimon-python/setup.py | 37 +++----
12 files changed, 338 insertions(+), 243 deletions(-)
diff --git a/.github/workflows/paimon-python-checks.yml
b/.github/workflows/paimon-python-checks.yml
index e07b93e65c..7426bb4dcd 100644
--- a/.github/workflows/paimon-python-checks.yml
+++ b/.github/workflows/paimon-python-checks.yml
@@ -16,7 +16,7 @@
# limitations under the License.
################################################################################
-name: Check Code Style by Flake8 and Mypy
+name: Check Code Style and Test
on:
push:
diff --git a/paimon-python/dev/cfg.ini b/paimon-python/dev/auto-check.sh
old mode 100644
new mode 100755
similarity index 75%
copy from paimon-python/dev/cfg.ini
copy to paimon-python/dev/auto-check.sh
index 195ed1c6cf..4f6ed56617
--- a/paimon-python/dev/cfg.ini
+++ b/paimon-python/dev/auto-check.sh
@@ -1,3 +1,4 @@
+#!/usr/bin/env bash
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
@@ -16,13 +17,10 @@
# limitations under the License.
################################################################################
-[flake8]
-# We follow PEP 8 (https://www.python.org/dev/peps/pep-0008/) with one
exception: lines can be
-# up to 100 characters in length, not 79.
-ignore=E226,E241,E305,E402,E722,E731,E741,W503,W504,F821
-max-line-length=120
-exclude=.tox/*,dev/*,build/*,dist/*
-[mypy]
-ignore_missing_imports = True
-strict_optional=False
+# CURRENT_DIR is "paimon-python/"
+SCRIPT_PATH="$(readlink -f "$0")"
+cd "$(dirname "$SCRIPT_PATH")/.." || exit
+
+autopep8 ./ --global-config=./dev/cfg.ini
+autoflake --config=./dev/cfg.ini ./
diff --git a/paimon-python/dev/cfg.ini b/paimon-python/dev/cfg.ini
index 195ed1c6cf..c90c2f61db 100644
--- a/paimon-python/dev/cfg.ini
+++ b/paimon-python/dev/cfg.ini
@@ -22,6 +22,15 @@
ignore=E226,E241,E305,E402,E722,E731,E741,W503,W504,F821
max-line-length=120
exclude=.tox/*,dev/*,build/*,dist/*
+# autopep8 setting
+in-place = true
+aggressive = 3
+recursive = true
+
+[autoflake]
+in-place=true
+recursive=true
+remove-all-unused-imports=true
[mypy]
ignore_missing_imports = True
diff --git a/paimon-python/dev/lint-python.sh b/paimon-python/dev/lint-python.sh
index 02ca321161..9c53310399 100755
--- a/paimon-python/dev/lint-python.sh
+++ b/paimon-python/dev/lint-python.sh
@@ -131,7 +131,6 @@ function check_stage() {
# This part defines all check functions such as tox_check and flake8_check
# We make a rule that all check functions are suffixed with _ check. e.g.
tox_check, flake8_check
#########################
-
# Flake8 check
function flake8_check() {
local PYTHON_SOURCE="$(find . \( -path ./dev -o -path ./.tox \) -prune -o
-type f -name "*.py" -print )"
@@ -166,8 +165,7 @@ function pytest_check() {
print_function "STAGE" "pytest checks"
if [ ! -f "$PYTEST_PATH" ]; then
- echo "For some unknown reasons, the pytest package is not complete,\
- you should exec the script with the parameter: -f"
+ echo "For some unknown reasons, the pytest package is not complete."
fi
# the return value of a pipeline is the status of the last command to exit
@@ -185,7 +183,6 @@ function pytest_check() {
fi
}
###############################################################All Checks
Definitions###############################################################
-
# CURRENT_DIR is "paimon-python/"
SCRIPT_PATH="$(readlink -f "$0")"
cd "$(dirname "$SCRIPT_PATH")/.." || exit
@@ -194,10 +191,6 @@ echo ${CURRENT_DIR}
# flake8 path
FLAKE8_PATH="$(which flake8)"
-echo $FLAKE8_PATH
-# mypy path
-MYPY_PATH="$(which mypy)"
-echo $MYPY_PATH
# pytest path
PYTEST_PATH="$(which pytest)"
echo $PYTEST_PATH
@@ -270,6 +263,6 @@ done
# collect checks according to the options
collect_checks
-
+# run checks
check_stage
diff --git a/paimon-python/pypaimon/api/__init__.py
b/paimon-python/pypaimon/api/__init__.py
index ef17c2ad7b..9920eef4bb 100644
--- a/paimon-python/pypaimon/api/__init__.py
+++ b/paimon-python/pypaimon/api/__init__.py
@@ -19,8 +19,15 @@ import logging
from typing import Dict, List, Optional, Callable
from urllib.parse import unquote
from .auth import RESTAuthFunction
-from .api_response import PagedList, GetTableResponse, ListDatabasesResponse,
ListTablesResponse, \
- GetDatabaseResponse, ConfigResponse, PagedResponse
+from .api_response import (
+ PagedList,
+ GetTableResponse,
+ ListDatabasesResponse,
+ ListTablesResponse,
+ GetDatabaseResponse,
+ ConfigResponse,
+ PagedResponse,
+)
from .api_resquest import CreateDatabaseRequest, AlterDatabaseRequest
from .typedef import Identifier
from .client import HttpClient
@@ -36,7 +43,7 @@ class RESTCatalogOptions:
DLF_ACCESS_KEY_ID = "dlf.access-key-id"
DLF_ACCESS_KEY_SECRET = "dlf.access-key-secret"
DLF_ACCESS_SECURITY_TOKEN = "dlf.security-token"
- PREFIX = 'prefix'
+ PREFIX = "prefix"
class RESTException(Exception):
@@ -59,6 +66,7 @@ class RESTUtil:
@staticmethod
def encode_string(value: str) -> str:
import urllib.parse
+
return urllib.parse.quote(value)
@staticmethod
@@ -67,7 +75,8 @@ class RESTUtil:
return unquote(encoded)
@staticmethod
- def extract_prefix_map(options: Dict[str, str], prefix: str) -> Dict[str,
str]:
+ def extract_prefix_map(
+ options: Dict[str, str], prefix: str) -> Dict[str, str]:
result = {}
config = options
for key, value in config.items():
@@ -84,10 +93,11 @@ class ResourcePaths:
TABLE_DETAILS = "table-details"
def __init__(self, base_path: str = ""):
- self.base_path = base_path.rstrip('/')
+ self.base_path = base_path.rstrip("/")
@classmethod
- def for_catalog_properties(cls, options: dict[str, str]) ->
'ResourcePaths':
+ def for_catalog_properties(
+ cls, options: dict[str, str]) -> "ResourcePaths":
prefix = options.get(RESTCatalogOptions.PREFIX, "")
return cls(f"/{cls.V1}/{prefix}" if prefix else f"/{cls.V1}")
@@ -131,24 +141,29 @@ class RESTApi:
warehouse = options.get(RESTCatalogOptions.WAREHOUSE)
query_params = {}
if warehouse:
- query_params[RESTCatalogOptions.WAREHOUSE] =
RESTUtil.encode_string(warehouse)
+ query_params[RESTCatalogOptions.WAREHOUSE] =
RESTUtil.encode_string(
+ warehouse)
config_response = self.client.get_with_params(
ResourcePaths().config(),
query_params,
ConfigResponse,
- RESTAuthFunction({}, auth_provider)
+ RESTAuthFunction({}, auth_provider),
)
options = config_response.merge(options)
- base_headers.update(RESTUtil.extract_prefix_map(options,
self.HEADER_PREFIX))
+ base_headers.update(
+ RESTUtil.extract_prefix_map(options, self.HEADER_PREFIX)
+ )
self.rest_auth_function = RESTAuthFunction(base_headers, auth_provider)
self.options = options
self.resource_paths = ResourcePaths.for_catalog_properties(options)
- def __build_paged_query_params(max_results: Optional[int],
- page_token: Optional[str],
- name_patterns: Dict[str, str]) -> Dict[str,
str]:
+ def __build_paged_query_params(
+ max_results: Optional[int],
+ page_token: Optional[str],
+ name_patterns: Dict[str, str],
+ ) -> Dict[str, str]:
query_params = {}
if max_results is not None and max_results > 0:
query_params[RESTApi.MAX_RESULTS] = str(max_results)
@@ -156,13 +171,15 @@ class RESTApi:
if page_token is not None and page_token.strip():
query_params[RESTApi.PAGE_TOKEN] = page_token
- for (key, value) in name_patterns:
+ for key, value in name_patterns:
if key and value and key.strip() and value.strip():
query_params[key] = value
return query_params
- def __list_data_from_page_api(self, page_api: Callable[[Dict[str, str]],
PagedResponse[T]]) -> List[T]:
+ def __list_data_from_page_api(
+ self, page_api: Callable[[Dict[str, str]], PagedResponse[T]]
+ ) -> List[T]:
results = []
query_params = {}
page_token = None
@@ -194,24 +211,26 @@ class RESTApi:
self.resource_paths.databases(),
query_params,
ListDatabasesResponse,
- self.rest_auth_function
+ self.rest_auth_function,
)
)
- def list_databases_paged(self,
- max_results: Optional[int] = None,
- page_token: Optional[str] = None,
- database_name_pattern: Optional[str] = None) ->
PagedList[str]:
+ def list_databases_paged(
+ self,
+ max_results: Optional[int] = None,
+ page_token: Optional[str] = None,
+ database_name_pattern: Optional[str] = None,
+ ) -> PagedList[str]:
response = self.client.get_with_params(
self.resource_paths.databases(),
self.__build_paged_query_params(
max_results,
page_token,
- {self.DATABASE_NAME_PATTERN: database_name_pattern}
+ {self.DATABASE_NAME_PATTERN: database_name_pattern},
),
ListDatabasesResponse,
- self.rest_auth_function
+ self.rest_auth_function,
)
databases = response.data() or []
@@ -219,20 +238,28 @@ class RESTApi:
def create_database(self, name: str, options: Dict[str, str]) -> None:
request = CreateDatabaseRequest(name, options)
- self.client.post(self.resource_paths.databases(), request,
self.rest_auth_function)
+ self.client.post(
+ self.resource_paths.databases(), request, self.rest_auth_function
+ )
def get_database(self, name: str) -> GetDatabaseResponse:
return self.client.get(
self.resource_paths.database(name),
GetDatabaseResponse,
- self.rest_auth_function
+ self.rest_auth_function,
)
def drop_database(self, name: str) -> None:
- self.client.delete(self.resource_paths.database(name),
self.rest_auth_function)
-
- def alter_database(self, name: str, removals: Optional[List[str]] = None,
- updates: Optional[Dict[str, str]] = None):
+ self.client.delete(
+ self.resource_paths.database(name),
+ self.rest_auth_function)
+
+ def alter_database(
+ self,
+ name: str,
+ removals: Optional[List[str]] = None,
+ updates: Optional[Dict[str, str]] = None,
+ ):
if not name or not name.strip():
raise ValueError("Database name cannot be empty")
removals = removals or []
@@ -242,8 +269,7 @@ class RESTApi:
return self.client.post(
self.resource_paths.database(name),
request,
- self.rest_auth_function
- )
+ self.rest_auth_function)
def list_tables(self, database_name: str) -> List[str]:
return self.__list_data_from_page_api(
@@ -251,24 +277,24 @@ class RESTApi:
self.resource_paths.tables(database_name),
query_params,
ListTablesResponse,
- self.rest_auth_function
+ self.rest_auth_function,
)
)
- def list_tables_paged(self,
- database_name: str,
- max_results: Optional[int] = None,
- page_token: Optional[str] = None,
- table_name_pattern: Optional[str] = None) ->
PagedList[str]:
+ def list_tables_paged(
+ self,
+ database_name: str,
+ max_results: Optional[int] = None,
+ page_token: Optional[str] = None,
+ table_name_pattern: Optional[str] = None,
+ ) -> PagedList[str]:
response = self.client.get_with_params(
self.resource_paths.tables(database_name),
self.__build_paged_query_params(
- max_results,
- page_token,
- {self.TABLE_NAME_PATTERN: table_name_pattern}
+ max_results, page_token, {self.TABLE_NAME_PATTERN:
table_name_pattern}
),
ListTablesResponse,
- self.rest_auth_function
+ self.rest_auth_function,
)
tables = response.data() or []
@@ -276,7 +302,9 @@ class RESTApi:
def get_table(self, identifier: Identifier) -> GetTableResponse:
return self.client.get(
- self.resource_paths.table(identifier.database_name,
identifier.object_name),
+ self.resource_paths.table(
+ identifier.database_name,
+ identifier.object_name),
GetTableResponse,
- self.rest_auth_function
+ self.rest_auth_function,
)
diff --git a/paimon-python/pypaimon/api/api_response.py
b/paimon-python/pypaimon/api/api_response.py
index e93af5ae05..61169f7cd3 100644
--- a/paimon-python/pypaimon/api/api_response.py
+++ b/paimon-python/pypaimon/api/api_response.py
@@ -43,11 +43,13 @@ class ErrorResponse(RESTResponse):
message: Optional[str] = json_field("message", default=None)
code: Optional[int] = json_field("code", default=None)
- def __init__(self,
- resource_type: Optional[str] = None,
- resource_name: Optional[str] = None,
- message: Optional[str] = None,
- code: Optional[int] = None):
+ def __init__(
+ self,
+ resource_type: Optional[str] = None,
+ resource_name: Optional[str] = None,
+ message: Optional[str] = None,
+ code: Optional[int] = None,
+ ):
self.resource_type = resource_type
self.resource_name = resource_name
self.message = message
@@ -115,7 +117,8 @@ class ListTablesResponse(PagedResponse[str]):
FIELD_TABLES = "tables"
tables: Optional[List[str]] = json_field(FIELD_TABLES)
- next_page_token: Optional[str] =
json_field(PagedResponse.FIELD_NEXT_PAGE_TOKEN)
+ next_page_token: Optional[str] = json_field(
+ PagedResponse.FIELD_NEXT_PAGE_TOKEN)
def data(self) -> Optional[List[str]]:
return self.tables
@@ -133,8 +136,10 @@ class Schema:
FIELD_COMMENT = "comment"
fields: List[DataField] = json_field(FIELD_FIELDS, default_factory=list)
- partition_keys: List[str] = json_field(FIELD_PARTITION_KEYS,
default_factory=list)
- primary_keys: List[str] = json_field(FIELD_PRIMARY_KEYS,
default_factory=list)
+ partition_keys: List[str] = json_field(
+ FIELD_PARTITION_KEYS, default_factory=list)
+ primary_keys: List[str] = json_field(
+ FIELD_PRIMARY_KEYS, default_factory=list)
options: Dict[str, str] = json_field(FIELD_OPTIONS, default_factory=dict)
comment: Optional[str] = json_field(FIELD_COMMENT, default=None)
@@ -142,6 +147,7 @@ class Schema:
@dataclass
class TableSchema:
"""Table schema with ID"""
+
id: int
fields: List[DataField]
highest_field_id: int
@@ -156,13 +162,14 @@ class TableSchema:
partition_keys=self.partition_keys,
primary_keys=self.primary_keys,
options=self.options,
- comment=self.comment
+ comment=self.comment,
)
@dataclass
class TableMetadata:
"""Table metadata"""
+
schema: TableSchema
is_external: bool
uuid: str
@@ -171,6 +178,7 @@ class TableMetadata:
@dataclass
class RESTToken:
"""REST authentication token"""
+
token: Dict[str, str]
expire_at_millis: int
@@ -194,18 +202,20 @@ class GetTableResponse(AuditRESTResponse):
schema_id: Optional[int] = json_field(FIELD_SCHEMA_ID, default=None)
schema: Optional[Schema] = json_field(FIELD_SCHEMA, default=None)
- def __init__(self,
- id: str,
- name: str,
- path: str,
- is_external: bool,
- schema_id: int,
- schema: Schema,
- owner: Optional[str] = None,
- created_at: Optional[int] = None,
- created_by: Optional[str] = None,
- updated_at: Optional[int] = None,
- updated_by: Optional[str] = None):
+ def __init__(
+ self,
+ id: str,
+ name: str,
+ path: str,
+ is_external: bool,
+ schema_id: int,
+ schema: Schema,
+ owner: Optional[str] = None,
+ created_at: Optional[int] = None,
+ created_by: Optional[str] = None,
+ updated_at: Optional[int] = None,
+ updated_by: Optional[str] = None,
+ ):
super().__init__(owner, created_at, created_by, updated_at, updated_by)
self.id = id
self.name = name
@@ -225,18 +235,21 @@ class GetDatabaseResponse(AuditRESTResponse):
id: Optional[str] = json_field(FIELD_ID, default=None)
name: Optional[str] = json_field(FIELD_NAME, default=None)
location: Optional[str] = json_field(FIELD_LOCATION, default=None)
- options: Optional[Dict[str, str]] = json_field(FIELD_OPTIONS,
default_factory=dict)
-
- def __init__(self,
- id: Optional[str] = None,
- name: Optional[str] = None,
- location: Optional[str] = None,
- options: Optional[Dict[str, str]] = None,
- owner: Optional[str] = None,
- created_at: Optional[int] = None,
- created_by: Optional[str] = None,
- updated_at: Optional[int] = None,
- updated_by: Optional[str] = None):
+ options: Optional[Dict[str, str]] = json_field(
+ FIELD_OPTIONS, default_factory=dict)
+
+ def __init__(
+ self,
+ id: Optional[str] = None,
+ name: Optional[str] = None,
+ location: Optional[str] = None,
+ options: Optional[Dict[str, str]] = None,
+ owner: Optional[str] = None,
+ created_at: Optional[int] = None,
+ created_by: Optional[str] = None,
+ updated_at: Optional[int] = None,
+ updated_by: Optional[str] = None,
+ ):
super().__init__(owner, created_at, created_by, updated_at, updated_by)
self.id = id
self.name = name
diff --git a/paimon-python/pypaimon/api/auth.py
b/paimon-python/pypaimon/api/auth.py
index ddeca5e5e6..83e0e44c91 100644
--- a/paimon-python/pypaimon/api/auth.py
+++ b/paimon-python/pypaimon/api/auth.py
@@ -43,26 +43,37 @@ class DLFToken:
def __init__(self, options: Dict[str, str]):
from . import RESTCatalogOptions
+
self.access_key_id = options.get(RESTCatalogOptions.DLF_ACCESS_KEY_ID)
- self.access_key_secret =
options.get(RESTCatalogOptions.DLF_ACCESS_KEY_SECRET)
- self.security_token =
options.get(RESTCatalogOptions.DLF_ACCESS_SECURITY_TOKEN)
+ self.access_key_secret = options.get(
+ RESTCatalogOptions.DLF_ACCESS_KEY_SECRET)
+ self.security_token = options.get(
+ RESTCatalogOptions.DLF_ACCESS_SECURITY_TOKEN)
class AuthProvider(ABC):
@abstractmethod
- def merge_auth_header(self, base_header: Dict[str, str], parammeter:
RESTAuthParameter) -> Dict[str, str]:
+ def merge_auth_header(
+ self, base_header: Dict[str, str], parammeter: RESTAuthParameter
+ ) -> Dict[str, str]:
"""Merge authorization header into header."""
class RESTAuthFunction:
- def __init__(self, init_header: Dict[str, str], auth_provider:
AuthProvider):
+ def __init__(self,
+ init_header: Dict[str,
+ str],
+ auth_provider: AuthProvider):
self.init_header = init_header.copy() if init_header else {}
self.auth_provider = auth_provider
- def __call__(self, rest_auth_parameter: RESTAuthParameter) -> Dict[str,
str]:
- return self.auth_provider.merge_auth_header(self.init_header,
rest_auth_parameter)
+ def __call__(
+ self, rest_auth_parameter: RESTAuthParameter) -> Dict[str, str]:
+ return self.auth_provider.merge_auth_header(
+ self.init_header, rest_auth_parameter
+ )
def apply(self, rest_auth_parameter: RESTAuthParameter) -> Dict[str, str]:
return self.__call__(rest_auth_parameter)
@@ -81,28 +92,24 @@ class DLFAuthProvider(AuthProvider):
AUTH_DATE_TIME_FORMAT = "%Y%m%dT%H%M%SZ"
MEDIA_TYPE = "application/json"
- def __init__(self,
- token: DLFToken,
- region: str):
+ def __init__(self, token: DLFToken, region: str):
self.logger = logging.getLogger(self.__class__.__name__)
self.token = token
self.region = region
self._lock = threading.Lock()
- def merge_auth_header(self,
- base_header: Dict[str, str],
- rest_auth_parameter: RESTAuthParameter) -> Dict[str,
str]:
+ def merge_auth_header(
+ self, base_header: Dict[str, str], rest_auth_parameter:
RESTAuthParameter
+ ) -> Dict[str, str]:
try:
date_time = base_header.get(
- self.DLF_DATE_HEADER_KEY.lower(),
- datetime.now(timezone.utc).strftime(self.AUTH_DATE_TIME_FORMAT)
- )
+ self.DLF_DATE_HEADER_KEY.lower(), datetime.now(
+ timezone.utc).strftime(
+ self.AUTH_DATE_TIME_FORMAT), )
date = date_time[:8]
sign_headers = self.generate_sign_headers(
- rest_auth_parameter.data,
- date_time,
- self.token.security_token
+ rest_auth_parameter.data, date_time, self.token.security_token
)
authorization = DLFAuthSignature.get_authorization(
@@ -111,7 +118,7 @@ class DLFAuthProvider(AuthProvider):
region=self.region,
headers=sign_headers,
date_time=date_time,
- date=date
+ date=date,
)
headers_with_auth = base_header.copy()
@@ -124,19 +131,20 @@ class DLFAuthProvider(AuthProvider):
raise RuntimeError(f"Failed to merge auth header: {e}")
@classmethod
- def generate_sign_headers(cls,
- data: Optional[str],
- date_time: str,
- security_token: Optional[str]) -> Dict[str, str]:
+ def generate_sign_headers(
+ cls, data: Optional[str], date_time: str, security_token: Optional[str]
+ ) -> Dict[str, str]:
sign_headers = {}
sign_headers[cls.DLF_DATE_HEADER_KEY] = date_time
sign_headers[cls.DLF_CONTENT_SHA56_HEADER_KEY] =
cls.DLF_CONTENT_SHA56_VALUE
- sign_headers[cls.DLF_AUTH_VERSION_HEADER_KEY] = "v1" #
DLFAuthSignature.VERSION
+ # DLFAuthSignature.VERSION
+ sign_headers[cls.DLF_AUTH_VERSION_HEADER_KEY] = "v1"
if data is not None and data != "":
sign_headers[cls.DLF_CONTENT_TYPE_KEY] = cls.MEDIA_TYPE
- sign_headers[cls.DLF_CONTENT_MD5_HEADER_KEY] =
DLFAuthSignature.md5(data)
+ sign_headers[cls.DLF_CONTENT_MD5_HEADER_KEY] =
DLFAuthSignature.md5(
+ data)
if security_token is not None:
sign_headers[cls.DLF_SECURITY_TOKEN_HEADER_KEY] = security_token
@@ -157,31 +165,40 @@ class DLFAuthSignature:
DLFAuthProvider.DLF_CONTENT_SHA56_HEADER_KEY.lower(),
DLFAuthProvider.DLF_DATE_HEADER_KEY.lower(),
DLFAuthProvider.DLF_AUTH_VERSION_HEADER_KEY.lower(),
- DLFAuthProvider.DLF_SECURITY_TOKEN_HEADER_KEY.lower()
+ DLFAuthProvider.DLF_SECURITY_TOKEN_HEADER_KEY.lower(),
]
@classmethod
- def get_authorization(cls,
- rest_auth_parameter: RESTAuthParameter,
- dlf_token: DLFToken,
- region: str,
- headers: Dict[str, str],
- date_time: str,
- date: str) -> str:
+ def get_authorization(
+ cls,
+ rest_auth_parameter: RESTAuthParameter,
+ dlf_token: DLFToken,
+ region: str,
+ headers: Dict[str, str],
+ date_time: str,
+ date: str,
+ ) -> str:
try:
- canonical_request = cls.get_canonical_request(rest_auth_parameter,
headers)
-
- string_to_sign = cls.NEW_LINE.join([
- cls.SIGNATURE_ALGORITHM,
- date_time,
- f"{date}/{region}/{cls.PRODUCT}/{cls.REQUEST_TYPE}",
- cls._sha256_hex(canonical_request)
- ])
+ canonical_request = cls.get_canonical_request(
+ rest_auth_parameter, headers)
+
+ string_to_sign = cls.NEW_LINE.join(
+ [
+ cls.SIGNATURE_ALGORITHM,
+ date_time,
+ f"{date}/{region}/{cls.PRODUCT}/{cls.REQUEST_TYPE}",
+ cls._sha256_hex(canonical_request),
+ ]
+ )
- date_key =
cls._hmac_sha256(f"aliyun_v4{dlf_token.access_key_secret}".encode('utf-8'),
date)
+ date_key = cls._hmac_sha256(
+ f"aliyun_v4{dlf_token.access_key_secret}".encode("utf-8"), date
+ )
date_region_key = cls._hmac_sha256(date_key, region)
- date_region_service_key = cls._hmac_sha256(date_region_key,
cls.PRODUCT)
- signing_key = cls._hmac_sha256(date_region_service_key,
cls.REQUEST_TYPE)
+ date_region_service_key = cls._hmac_sha256(
+ date_region_key, cls.PRODUCT)
+ signing_key = cls._hmac_sha256(
+ date_region_service_key, cls.REQUEST_TYPE)
result = cls._hmac_sha256(signing_key, string_to_sign)
signature = cls._hex_encode(result)
@@ -198,46 +215,49 @@ class DLFAuthSignature:
@classmethod
def md5(cls, raw: str) -> str:
try:
- md5_hash = hashlib.md5(raw.encode('utf-8')).digest()
- return base64.b64encode(md5_hash).decode('utf-8')
+ md5_hash = hashlib.md5(raw.encode("utf-8")).digest()
+ return base64.b64encode(md5_hash).decode("utf-8")
except Exception as e:
raise RuntimeError(f"Failed to calculate MD5: {e}")
@classmethod
def _hmac_sha256(cls, key: bytes, data: str) -> bytes:
try:
- return hmac.new(key, data.encode('utf-8'), hashlib.sha256).digest()
+ return hmac.new(key, data.encode("utf-8"), hashlib.sha256).digest()
except Exception as e:
raise RuntimeError(f"Failed to calculate HMAC-SHA256: {e}")
@classmethod
- def get_canonical_request(cls,
- rest_auth_parameter: RESTAuthParameter,
- headers: Dict[str, str]) -> str:
- canonical_request = cls.NEW_LINE.join([
- rest_auth_parameter.method,
- rest_auth_parameter.path
- ])
-
- canonical_query_string =
cls._build_canonical_query_string(rest_auth_parameter.parameters)
- canonical_request = cls.NEW_LINE.join([canonical_request,
canonical_query_string])
-
- sorted_signed_headers_map =
cls._build_sorted_signed_headers_map(headers)
+ def get_canonical_request(
+ cls, rest_auth_parameter: RESTAuthParameter, headers: Dict[str, str]
+ ) -> str:
+ canonical_request = cls.NEW_LINE.join(
+ [rest_auth_parameter.method, rest_auth_parameter.path]
+ )
+
+ canonical_query_string = cls._build_canonical_query_string(
+ rest_auth_parameter.parameters
+ )
+ canonical_request = cls.NEW_LINE.join(
+ [canonical_request, canonical_query_string]
+ )
+
+ sorted_signed_headers_map = cls._build_sorted_signed_headers_map(
+ headers)
for key, value in sorted_signed_headers_map.items():
- canonical_request = cls.NEW_LINE.join([
- canonical_request,
- f"{key}:{value}"
- ])
+ canonical_request = cls.NEW_LINE.join(
+ [canonical_request, f"{key}:{value}"])
content_sha256 = headers.get(
DLFAuthProvider.DLF_CONTENT_SHA56_HEADER_KEY,
- DLFAuthProvider.DLF_CONTENT_SHA56_VALUE
+ DLFAuthProvider.DLF_CONTENT_SHA56_VALUE,
)
return cls.NEW_LINE.join([canonical_request, content_sha256])
@classmethod
- def _build_canonical_query_string(cls, parameters: Optional[Dict[str,
str]]) -> str:
+ def _build_canonical_query_string(
+ cls, parameters: Optional[Dict[str, str]]) -> str:
if not parameters:
return ""
@@ -255,7 +275,9 @@ class DLFAuthSignature:
return "&".join(query_parts)
@classmethod
- def _build_sorted_signed_headers_map(cls, headers: Optional[Dict[str,
str]]) -> OrderedDict:
+ def _build_sorted_signed_headers_map(
+ cls, headers: Optional[Dict[str, str]]
+ ) -> OrderedDict:
sorted_headers = OrderedDict()
if headers:
@@ -269,7 +291,7 @@ class DLFAuthSignature:
@classmethod
def _sha256_hex(cls, raw: str) -> str:
try:
- sha256_hash = hashlib.sha256(raw.encode('utf-8')).digest()
+ sha256_hash = hashlib.sha256(raw.encode("utf-8")).digest()
return cls._hex_encode(sha256_hash)
except Exception as e:
raise RuntimeError(f"Failed to calculate SHA256: {e}")
diff --git a/paimon-python/pypaimon/api/data_types.py
b/paimon-python/pypaimon/api/data_types.py
index 9a8080fdab..6e6ea97456 100644
--- a/paimon-python/pypaimon/api/data_types.py
+++ b/paimon-python/pypaimon/api/data_types.py
@@ -71,13 +71,10 @@ class AtomicType(DataType):
self.type = type
def to_dict(self) -> Dict[str, Any]:
- return {
- 'type': self.type,
- 'nullable': self.nullable
- }
+ return {"type": self.type, "nullable": self.nullable}
def __str__(self) -> str:
- null_suffix = '' if self.nullable else ' NOT NULL'
+ null_suffix = "" if self.nullable else " NOT NULL"
return f"{self.type}{null_suffix}"
@@ -91,13 +88,13 @@ class ArrayType(DataType):
def to_dict(self) -> Dict[str, Any]:
return {
- 'type': f"ARRAY{'<' + str(self.element) + '>' if self.element else
''}",
- 'element': self.element.to_dict() if self.element else None,
- 'nullable': self.nullable
+ "type": f"ARRAY{'<' + str(self.element) + '>' if self.element else
''}",
+ "element": self.element.to_dict() if self.element else None,
+ "nullable": self.nullable,
}
def __str__(self) -> str:
- null_suffix = '' if self.nullable else ' NOT NULL'
+ null_suffix = "" if self.nullable else " NOT NULL"
return f"ARRAY<{self.element}>{null_suffix}"
@@ -111,13 +108,13 @@ class MultisetType(DataType):
def to_dict(self) -> Dict[str, Any]:
return {
- 'type': f"MULTISET{'<' + str(self.element) + '>' if self.element
else ''}",
- 'element': self.element.to_dict() if self.element else None,
- 'nullable': self.nullable
+ "type": f"MULTISET{'<' + str(self.element) + '>' if self.element
else ''}",
+ "element": self.element.to_dict() if self.element else None,
+ "nullable": self.nullable,
}
def __str__(self) -> str:
- null_suffix = '' if self.nullable else ' NOT NULL'
+ null_suffix = "" if self.nullable else " NOT NULL"
return f"MULTISET<{self.element}>{null_suffix}"
@@ -126,21 +123,25 @@ class MapType(DataType):
key: DataType
value: DataType
- def __init__(self, nullable: bool, key_type: DataType, value_type:
DataType):
+ def __init__(
+ self,
+ nullable: bool,
+ key_type: DataType,
+ value_type: DataType):
super().__init__(nullable)
self.key = key_type
self.value = value_type
def to_dict(self) -> Dict[str, Any]:
return {
- 'type': f"MAP<{self.key}, {self.value}>",
- 'key': self.key.to_dict() if self.key else None,
- 'value': self.value.to_dict() if self.value else None,
- 'nullable': self.nullable
+ "type": f"MAP<{self.key}, {self.value}>",
+ "key": self.key.to_dict() if self.key else None,
+ "value": self.value.to_dict() if self.value else None,
+ "nullable": self.nullable,
}
def __str__(self) -> str:
- null_suffix = '' if self.nullable else ' NOT NULL'
+ null_suffix = "" if self.nullable else " NOT NULL"
return f"MAP<{self.key}, {self.value}>{null_suffix}"
@@ -158,8 +159,14 @@ class DataField:
description: Optional[str] = None
default_value: Optional[str] = None
- def __init__(self, id: int, name: str, type: DataType, description:
Optional[str] = None,
- default_value: Optional[str] = None):
+ def __init__(
+ self,
+ id: int,
+ name: str,
+ type: DataType,
+ description: Optional[str] = None,
+ default_value: Optional[str] = None,
+ ):
self.id = id
self.name = name
self.type = type
@@ -167,14 +174,14 @@ class DataField:
self.default_value = default_value
@classmethod
- def from_dict(cls, data: Dict[str, Any]) -> 'DataField':
+ def from_dict(cls, data: Dict[str, Any]) -> "DataField":
return DataTypeParser.parse_data_field(data)
def to_dict(self) -> Dict[str, Any]:
result = {
self.FIELD_ID: self.id,
self.FIELD_NAME: self.name,
- self.FIELD_TYPE: self.type.to_dict() if self.type else None
+ self.FIELD_TYPE: self.type.to_dict() if self.type else None,
}
if self.description is not None:
@@ -196,14 +203,14 @@ class RowType(DataType):
def to_dict(self) -> Dict[str, Any]:
return {
- 'type': 'ROW' + ('' if self.nullable else ' NOT NULL'),
- 'fields': [field.to_dict() for field in self.fields],
- 'nullable': self.nullable
+ "type": "ROW" + ("" if self.nullable else " NOT NULL"),
+ "fields": [field.to_dict() for field in self.fields],
+ "nullable": self.nullable,
}
def __str__(self) -> str:
field_strs = [f"{field.name}: {field.type}" for field in self.fields]
- null_suffix = '' if self.nullable else ' NOT NULL'
+ null_suffix = "" if self.nullable else " NOT NULL"
return f"ROW<{', '.join(field_strs)}>{null_suffix}"
@@ -236,9 +243,9 @@ class DataTypeParser:
@staticmethod
def parse_nullability(type_string: str) -> bool:
- if ('NOT NULL' in type_string):
+ if "NOT NULL" in type_string:
return False
- elif ('NULL' in type_string):
+ elif "NULL" in type_string:
return True
return True
@@ -246,51 +253,63 @@ class DataTypeParser:
def parse_atomic_type_sql_string(type_string: str) -> DataType:
type_upper = type_string.upper().strip()
- if '(' in type_upper:
- base_type = type_upper.split('(')[0]
+ if "(" in type_upper:
+ base_type = type_upper.split("(")[0]
else:
base_type = type_upper
try:
Keyword(base_type)
- return AtomicType(type_string,
DataTypeParser.parse_nullability(type_string))
+ return AtomicType(
+ type_string, DataTypeParser.parse_nullability(type_string)
+ )
except ValueError:
raise Exception(f"Unknown type: {base_type}")
@staticmethod
- def parse_data_type(json_data: Union[Dict[str, Any], str], field_id:
Optional[AtomicInteger] = None) -> DataType:
+ def parse_data_type(
+ json_data: Union[Dict[str, Any], str], field_id:
Optional[AtomicInteger] = None
+ ) -> DataType:
if isinstance(json_data, str):
return DataTypeParser.parse_atomic_type_sql_string(json_data)
if isinstance(json_data, dict):
- if 'type' not in json_data:
+ if "type" not in json_data:
raise ValueError(f"Missing 'type' field in JSON: {json_data}")
- type_string = json_data['type']
+ type_string = json_data["type"]
if type_string.startswith("ARRAY"):
- element =
DataTypeParser.parse_data_type(json_data.get('element'), field_id)
- nullable = 'NOT NULL' not in type_string
+ element = DataTypeParser.parse_data_type(
+ json_data.get("element"), field_id
+ )
+ nullable = "NOT NULL" not in type_string
return ArrayType(nullable, element)
elif type_string.startswith("MULTISET"):
- element =
DataTypeParser.parse_data_type(json_data.get('element'), field_id)
- nullable = 'NOT NULL' not in type_string
+ element = DataTypeParser.parse_data_type(
+ json_data.get("element"), field_id
+ )
+ nullable = "NOT NULL" not in type_string
return MultisetType(nullable, element)
elif type_string.startswith("MAP"):
- key = DataTypeParser.parse_data_type(json_data.get('key'),
field_id)
- value = DataTypeParser.parse_data_type(json_data.get('value'),
field_id)
- nullable = 'NOT NULL' not in type_string
+ key = DataTypeParser.parse_data_type(
+ json_data.get("key"), field_id)
+ value = DataTypeParser.parse_data_type(
+ json_data.get("value"), field_id)
+ nullable = "NOT NULL" not in type_string
return MapType(nullable, key, value)
elif type_string.startswith("ROW"):
- field_array = json_data.get('fields', [])
+ field_array = json_data.get("fields", [])
fields = []
for field_json in field_array:
- fields.append(DataTypeParser.parse_data_field(field_json,
field_id))
- nullable = 'NOT NULL' not in type_string
+ fields.append(
+ DataTypeParser.parse_data_field(
+ field_json, field_id))
+ nullable = "NOT NULL" not in type_string
return RowType(nullable, fields)
else:
@@ -299,15 +318,21 @@ class DataTypeParser:
raise ValueError(f"Cannot parse data type: {json_data}")
@staticmethod
- def parse_data_field(json_data: Dict[str, Any], field_id:
Optional[AtomicInteger] = None) -> DataField:
-
- if DataField.FIELD_ID in json_data and json_data[DataField.FIELD_ID]
is not None:
+ def parse_data_field(
+ json_data: Dict[str, Any], field_id: Optional[AtomicInteger] = None
+ ) -> DataField:
+
+ if (
+ DataField.FIELD_ID in json_data
+ and json_data[DataField.FIELD_ID] is not None
+ ):
if field_id is not None and field_id.get() != -1:
raise ValueError("Partial field id is not allowed.")
- field_id_value = int(json_data['id'])
+ field_id_value = int(json_data["id"])
else:
if field_id is None:
- raise ValueError("Field ID is required when not provided in
JSON")
+ raise ValueError(
+ "Field ID is required when not provided in JSON")
field_id_value = field_id.increment_and_get()
if DataField.FIELD_NAME not in json_data:
@@ -316,7 +341,9 @@ class DataTypeParser:
if DataField.FIELD_TYPE not in json_data:
raise ValueError("Missing 'type' field in JSON")
- data_type =
DataTypeParser.parse_data_type(json_data[DataField.FIELD_TYPE], field_id)
+ data_type = DataTypeParser.parse_data_type(
+ json_data[DataField.FIELD_TYPE], field_id
+ )
description = json_data.get(DataField.FIELD_DESCRIPTION)
@@ -327,15 +354,19 @@ class DataTypeParser:
name=name,
type=data_type,
description=description,
- default_value=default_value
+ default_value=default_value,
)
-def parse_data_type_from_json(json_str: str, field_id: Optional[AtomicInteger]
= None) -> DataType:
+def parse_data_type_from_json(
+ json_str: str, field_id: Optional[AtomicInteger] = None
+) -> DataType:
json_data = json.loads(json_str)
return DataTypeParser.parse_data_type(json_data, field_id)
-def parse_data_field_from_json(json_str: str, field_id:
Optional[AtomicInteger] = None) -> DataField:
+def parse_data_field_from_json(
+ json_str: str, field_id: Optional[AtomicInteger] = None
+) -> DataField:
json_data = json.loads(json_str)
return DataTypeParser.parse_data_field(json_data, field_id)
diff --git a/paimon-python/pypaimon/api/rest_json.py
b/paimon-python/pypaimon/api/rest_json.py
index c416c1c680..95291ecb03 100644
--- a/paimon-python/pypaimon/api/rest_json.py
+++ b/paimon-python/pypaimon/api/rest_json.py
@@ -24,7 +24,7 @@ from .typedef import T
def json_field(json_name: str, **kwargs):
"""Create a field with custom JSON name"""
- return field(metadata={'json_name': json_name}, **kwargs)
+ return field(metadata={"json_name": json_name}, **kwargs)
class JSON:
@@ -48,16 +48,16 @@ class JSON:
field_value = getattr(obj, field_info.name)
# Get custom JSON name from metadata
- json_name = field_info.metadata.get('json_name', field_info.name)
+ json_name = field_info.metadata.get("json_name", field_info.name)
# Handle nested objects
if is_dataclass(field_value):
result[json_name] = JSON.__to_dict(field_value)
- elif hasattr(field_value, 'to_dict'):
+ elif hasattr(field_value, "to_dict"):
result[json_name] = field_value.to_dict()
elif isinstance(field_value, list):
result[json_name] = [
- item.to_dict() if hasattr(item, 'to_dict') else item
+ item.to_dict() if hasattr(item, "to_dict") else item
for item in field_value
]
else:
@@ -71,7 +71,7 @@ class JSON:
# Create field name mapping (json_name -> field_name)
field_mapping = {}
for field_info in fields(target_class):
- json_name = field_info.metadata.get('json_name', field_info.name)
+ json_name = field_info.metadata.get("json_name", field_info.name)
field_mapping[json_name] = field_info.name
# Map JSON data to field names
diff --git a/paimon-python/pypaimon/api/typedef.py
b/paimon-python/pypaimon/api/typedef.py
index bb6ce4cef4..9c79e48323 100644
--- a/paimon-python/pypaimon/api/typedef.py
+++ b/paimon-python/pypaimon/api/typedef.py
@@ -18,23 +18,24 @@
from dataclasses import dataclass
from typing import Optional, TypeVar
-T = TypeVar('T')
+T = TypeVar("T")
@dataclass
class Identifier:
"""Table/View/Function identifier"""
+
database_name: str
object_name: str
branch_name: Optional[str] = None
@classmethod
- def create(cls, database_name: str, object_name: str) -> 'Identifier':
+ def create(cls, database_name: str, object_name: str) -> "Identifier":
return cls(database_name, object_name)
@classmethod
- def from_string(cls, full_name: str) -> 'Identifier':
- parts = full_name.split('.')
+ def from_string(cls, full_name: str) -> "Identifier":
+ parts = full_name.split(".")
if len(parts) == 2:
return cls(parts[0], parts[1])
elif len(parts) == 3:
@@ -60,4 +61,4 @@ class Identifier:
return self.branch_name
def is_system_table(self) -> bool:
- return self.object_name.startswith('$')
+ return self.object_name.startswith("$")
diff --git a/paimon-python/pypaimon/tests/api_test.py
b/paimon-python/pypaimon/tests/api_test.py
index 0fcbcb420d..0150c4919e 100644
--- a/paimon-python/pypaimon/tests/api_test.py
+++ b/paimon-python/pypaimon/tests/api_test.py
@@ -79,7 +79,6 @@ class ResourcePaths:
# Exception classes
class CatalogException(Exception):
"""Base catalog exception"""
- pass
class DatabaseNotExistException(CatalogException):
diff --git a/paimon-python/setup.py b/paimon-python/setup.py
index d1ec439905..3fbc6108dc 100644
--- a/paimon-python/setup.py
+++ b/paimon-python/setup.py
@@ -1,4 +1,4 @@
-################################################################################
+##########################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -14,37 +14,38 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-################################################################################
+##########################################################################
from setuptools import setup, find_packages
VERSION = "0.3.dev" # noqa
-PACKAGES = find_packages(include=['pypaimon*'])
+PACKAGES = find_packages(include=["pypaimon*"])
install_requires = []
-long_description = 'See Apache Paimon Python API \
-[Doc](https://paimon.apache.org/docs/master/program-api/python-api/) for
usage.'
+long_description = "See Apache Paimon Python API \
+[Doc](https://paimon.apache.org/docs/master/program-api/python-api/) for
usage."
setup(
- name='pypaimon',
+ name="pypaimon",
version=VERSION,
packages=PACKAGES,
include_package_data=True,
install_requires=install_requires,
extras_require={},
- description='Apache Paimon Python API',
+ description="Apache Paimon Python API",
long_description=long_description,
- long_description_content_type='text/markdown',
- author='Apache Software Foundation',
- author_email='[email protected]',
- url='https://paimon.apache.org',
+ long_description_content_type="text/markdown",
+ author="Apache Software Foundation",
+ author_email="[email protected]",
+ url="https://paimon.apache.org",
classifiers=[
- 'Development Status :: 5 - Production/Stable',
- 'License :: OSI Approved :: Apache Software License',
- 'Programming Language :: Python :: 3.8',
- 'Programming Language :: Python :: 3.9',
- 'Programming Language :: Python :: 3.10',
- 'Programming Language :: Python :: 3.11'],
- python_requires='>=3.8'
+ "Development Status :: 5 - Production/Stable",
+ "License :: OSI Approved :: Apache Software License",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ ],
+ python_requires=">=3.8",
)