This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 01215e8143 [Python] Miscellaneous fixes for hybrid write/read (#6220)
01215e8143 is described below
commit 01215e814333a804fc615eed1215263d4bd28482
Author: ChengHui Chen <[email protected]>
AuthorDate: Tue Sep 9 10:51:34 2025 +0800
[Python] Miscellaneous fixes for hybrid write/read (#6220)
---
paimon-python/pypaimon/manifest/manifest_file_manager.py | 2 +-
paimon-python/pypaimon/read/reader/sort_merge_reader.py | 7 +++----
paimon-python/pypaimon/schema/table_schema.py | 2 +-
paimon-python/pypaimon/table/row/binary_row.py | 3 ---
4 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/paimon-python/pypaimon/manifest/manifest_file_manager.py
b/paimon-python/pypaimon/manifest/manifest_file_manager.py
index b2cd7868bb..15d880356c 100644
--- a/paimon-python/pypaimon/manifest/manifest_file_manager.py
+++ b/paimon-python/pypaimon/manifest/manifest_file_manager.py
@@ -106,7 +106,7 @@ class ManifestFileManager:
"_KIND": entry.kind,
"_PARTITION": BinaryRowSerializer.to_bytes(entry.partition),
"_BUCKET": entry.bucket,
- "_TOTAL_BUCKETS": entry.bucket,
+ "_TOTAL_BUCKETS": entry.total_buckets,
"_FILE": {
"_FILE_NAME": entry.file.file_name,
"_FILE_SIZE": entry.file.file_size,
diff --git a/paimon-python/pypaimon/read/reader/sort_merge_reader.py
b/paimon-python/pypaimon/read/reader/sort_merge_reader.py
index 9a99b5acdc..aedd593b70 100644
--- a/paimon-python/pypaimon/read/reader/sort_merge_reader.py
+++ b/paimon-python/pypaimon/read/reader/sort_merge_reader.py
@@ -21,7 +21,7 @@ from typing import Any, Callable, List, Optional
from pypaimon.read.reader.iface.record_iterator import RecordIterator
from pypaimon.read.reader.iface.record_reader import RecordReader
-from pypaimon.schema.data_types import DataField
+from pypaimon.schema.data_types import DataField, Keyword
from pypaimon.schema.table_schema import TableSchema
from pypaimon.table.row.internal_row import InternalRow
from pypaimon.table.row.key_value import KeyValue
@@ -180,9 +180,8 @@ class HeapEntry:
def builtin_key_comparator(key_schema: List[DataField]) -> Callable[[Any,
Any], int]:
# Precompute comparability flags to avoid repeated type checks
- comparable_types = ["BOOLEAN", "DECIMAL", "NUMERIC", "TINYINT",
"SMALLINT", "INT", "INTEGER", "BIGINT", "FLOAT",
- "DOUBLE", "TIMESTAMP", "TIMESTAMP_LTZ"]
- comparable_flags = [field.type.type in comparable_types for field in
key_schema]
+ comparable_types = {member.value for member in Keyword if member is not
Keyword.VARIANT}
+ comparable_flags = [field.type.type.split(' ')[0] in comparable_types for
field in key_schema]
def comparator(key1: InternalRow, key2: InternalRow) -> int:
if key1 is None and key2 is None:
diff --git a/paimon-python/pypaimon/schema/table_schema.py
b/paimon-python/pypaimon/schema/table_schema.py
index 852ef8c4ea..f74d713e9f 100644
--- a/paimon-python/pypaimon/schema/table_schema.py
+++ b/paimon-python/pypaimon/schema/table_schema.py
@@ -122,7 +122,7 @@ class TableSchema:
highest_field_id=data[TableSchema.FIELD_HIGHEST_FIELD_ID],
partition_keys=data[TableSchema.FIELD_PARTITION_KEYS],
primary_keys=data[TableSchema.FIELD_PRIMARY_KEYS],
- options=options,
+ options=options or {},
comment=data.get(TableSchema.FIELD_COMMENT),
time_millis=data.get(TableSchema.FIELD_TIME_MILLIS)
)
diff --git a/paimon-python/pypaimon/table/row/binary_row.py
b/paimon-python/pypaimon/table/row/binary_row.py
index f1f8e740df..468556dcb0 100644
--- a/paimon-python/pypaimon/table/row/binary_row.py
+++ b/paimon-python/pypaimon/table/row/binary_row.py
@@ -241,9 +241,6 @@ class BinaryRowSerializer:
@classmethod
def to_bytes(cls, binary_row: BinaryRow) -> bytes:
- if not binary_row.values:
- return b''
-
arity = len(binary_row.fields)
null_bits_size_in_bytes = cls._calculate_bit_set_width_in_bytes(arity)
fixed_part_size = null_bits_size_in_bytes + arity * 8