(paimon) branch master updated: [Python] Miscellaneous fixes for hybrid write/read (#6220)

lzljs3620320 Mon, 08 Sep 2025 20:00:26 -0700

This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git



The following commit(s) were added to refs/heads/master by this push:
     new 01215e8143 [Python] Miscellaneous fixes for hybrid write/read (#6220)
01215e8143 is described below

commit 01215e814333a804fc615eed1215263d4bd28482
Author: ChengHui Chen <[email protected]>
AuthorDate: Tue Sep 9 10:51:34 2025 +0800

    [Python] Miscellaneous fixes for hybrid write/read (#6220)
---
 paimon-python/pypaimon/manifest/manifest_file_manager.py | 2 +-
 paimon-python/pypaimon/read/reader/sort_merge_reader.py  | 7 +++----
 paimon-python/pypaimon/schema/table_schema.py            | 2 +-
 paimon-python/pypaimon/table/row/binary_row.py           | 3 ---
 4 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/paimon-python/pypaimon/manifest/manifest_file_manager.py 
b/paimon-python/pypaimon/manifest/manifest_file_manager.py
index b2cd7868bb..15d880356c 100644
--- a/paimon-python/pypaimon/manifest/manifest_file_manager.py
+++ b/paimon-python/pypaimon/manifest/manifest_file_manager.py
@@ -106,7 +106,7 @@ class ManifestFileManager:
                 "_KIND": entry.kind,
                 "_PARTITION": BinaryRowSerializer.to_bytes(entry.partition),
                 "_BUCKET": entry.bucket,
-                "_TOTAL_BUCKETS": entry.bucket,
+                "_TOTAL_BUCKETS": entry.total_buckets,
                 "_FILE": {
                     "_FILE_NAME": entry.file.file_name,
                     "_FILE_SIZE": entry.file.file_size,
diff --git a/paimon-python/pypaimon/read/reader/sort_merge_reader.py 
b/paimon-python/pypaimon/read/reader/sort_merge_reader.py
index 9a99b5acdc..aedd593b70 100644
--- a/paimon-python/pypaimon/read/reader/sort_merge_reader.py
+++ b/paimon-python/pypaimon/read/reader/sort_merge_reader.py
@@ -21,7 +21,7 @@ from typing import Any, Callable, List, Optional
 
 from pypaimon.read.reader.iface.record_iterator import RecordIterator
 from pypaimon.read.reader.iface.record_reader import RecordReader
-from pypaimon.schema.data_types import DataField
+from pypaimon.schema.data_types import DataField, Keyword
 from pypaimon.schema.table_schema import TableSchema
 from pypaimon.table.row.internal_row import InternalRow
 from pypaimon.table.row.key_value import KeyValue
@@ -180,9 +180,8 @@ class HeapEntry:
 
 def builtin_key_comparator(key_schema: List[DataField]) -> Callable[[Any, 
Any], int]:
     # Precompute comparability flags to avoid repeated type checks
-    comparable_types = ["BOOLEAN", "DECIMAL", "NUMERIC", "TINYINT", 
"SMALLINT", "INT", "INTEGER", "BIGINT", "FLOAT",
-                        "DOUBLE", "TIMESTAMP", "TIMESTAMP_LTZ"]
-    comparable_flags = [field.type.type in comparable_types for field in 
key_schema]
+    comparable_types = {member.value for member in Keyword if member is not 
Keyword.VARIANT}
+    comparable_flags = [field.type.type.split(' ')[0] in comparable_types for 
field in key_schema]
 
     def comparator(key1: InternalRow, key2: InternalRow) -> int:
         if key1 is None and key2 is None:
diff --git a/paimon-python/pypaimon/schema/table_schema.py 
b/paimon-python/pypaimon/schema/table_schema.py
index 852ef8c4ea..f74d713e9f 100644
--- a/paimon-python/pypaimon/schema/table_schema.py
+++ b/paimon-python/pypaimon/schema/table_schema.py
@@ -122,7 +122,7 @@ class TableSchema:
                 highest_field_id=data[TableSchema.FIELD_HIGHEST_FIELD_ID],
                 partition_keys=data[TableSchema.FIELD_PARTITION_KEYS],
                 primary_keys=data[TableSchema.FIELD_PRIMARY_KEYS],
-                options=options,
+                options=options or {},
                 comment=data.get(TableSchema.FIELD_COMMENT),
                 time_millis=data.get(TableSchema.FIELD_TIME_MILLIS)
             )
diff --git a/paimon-python/pypaimon/table/row/binary_row.py 
b/paimon-python/pypaimon/table/row/binary_row.py
index f1f8e740df..468556dcb0 100644
--- a/paimon-python/pypaimon/table/row/binary_row.py
+++ b/paimon-python/pypaimon/table/row/binary_row.py
@@ -241,9 +241,6 @@ class BinaryRowSerializer:
 
     @classmethod
     def to_bytes(cls, binary_row: BinaryRow) -> bytes:
-        if not binary_row.values:
-            return b''
-
         arity = len(binary_row.fields)
         null_bits_size_in_bytes = cls._calculate_bit_set_width_in_bytes(arity)
         fixed_part_size = null_bits_size_in_bytes + arity * 8

(paimon) branch master updated: [Python] Miscellaneous fixes for hybrid write/read (#6220)

Reply via email to