This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 8e5378a520 [python] Python commit snapshot should not contain None
fields (#7103)
8e5378a520 is described below
commit 8e5378a520eec40ced124913fe1b52ead64f3c43
Author: Jingsong Lee <[email protected]>
AuthorDate: Fri Jan 23 04:37:06 2026 +0800
[python] Python commit snapshot should not contain None fields (#7103)
---
paimon-python/pypaimon/common/json_util.py | 10 +++++++++
paimon-python/pypaimon/snapshot/snapshot.py | 14 ++++++------
.../pypaimon/snapshot/snapshot_manager.py | 9 ++++++--
.../pypaimon/tests/write/table_write_test.py | 26 ++++++++++++++++++++++
4 files changed, 50 insertions(+), 9 deletions(-)
diff --git a/paimon-python/pypaimon/common/json_util.py
b/paimon-python/pypaimon/common/json_util.py
index 5f4d513a27..6fdd7bbb62 100644
--- a/paimon-python/pypaimon/common/json_util.py
+++ b/paimon-python/pypaimon/common/json_util.py
@@ -27,6 +27,11 @@ def json_field(json_name: str, **kwargs):
return field(metadata={"json_name": json_name}, **kwargs)
+def optional_json_field(json_name: str, json_include: str):
+ """Create a field with custom JSON name"""
+ return field(metadata={"json_name": json_name, "json_include":
json_include}, default=None)
+
+
class JSON:
@staticmethod
@@ -55,6 +60,11 @@ class JSON:
# Get custom JSON name from metadata
json_name = field_info.metadata.get("json_name", field_info.name)
+ # Json include
+ if field_value is None:
+ if field_info.metadata.get("json_include", None) == "non_null":
+ continue
+
# Handle nested objects
if hasattr(field_value, "to_dict"):
result[json_name] = field_value.to_dict()
diff --git a/paimon-python/pypaimon/snapshot/snapshot.py
b/paimon-python/pypaimon/snapshot/snapshot.py
index b1fdbaafea..f269a9ac85 100644
--- a/paimon-python/pypaimon/snapshot/snapshot.py
+++ b/paimon-python/pypaimon/snapshot/snapshot.py
@@ -19,7 +19,7 @@
from dataclasses import dataclass
from typing import Optional
-from pypaimon.common.json_util import json_field
+from pypaimon.common.json_util import json_field, optional_json_field
BATCH_COMMIT_IDENTIFIER = 0x7fffffffffffffff
@@ -39,9 +39,9 @@ class Snapshot:
commit_kind: str = json_field("commitKind")
time_millis: int = json_field("timeMillis")
# Optional fields with defaults
- changelog_manifest_list: Optional[str] =
json_field("changelogManifestList", default=None)
- index_manifest: Optional[str] = json_field("indexManifest", default=None)
- changelog_record_count: Optional[int] = json_field("changelogRecordCount",
default=None)
- watermark: Optional[int] = json_field("watermark", default=None)
- statistics: Optional[str] = json_field("statistics", default=None)
- next_row_id: Optional[int] = json_field("nextRowId", default=None)
+ changelog_manifest_list: Optional[str] =
optional_json_field("changelogManifestList", "non_null")
+ index_manifest: Optional[str] = optional_json_field("indexManifest",
"non_null")
+ changelog_record_count: Optional[int] =
optional_json_field("changelogRecordCount", "non_null")
+ watermark: Optional[int] = optional_json_field("watermark", "non_null")
+ statistics: Optional[str] = optional_json_field("statistics", "non_null")
+ next_row_id: Optional[int] = optional_json_field("nextRowId", "non_null")
diff --git a/paimon-python/pypaimon/snapshot/snapshot_manager.py
b/paimon-python/pypaimon/snapshot/snapshot_manager.py
index 8291d9cf2c..cc044fbc1a 100644
--- a/paimon-python/pypaimon/snapshot/snapshot_manager.py
+++ b/paimon-python/pypaimon/snapshot/snapshot_manager.py
@@ -35,6 +35,12 @@ class SnapshotManager:
self.latest_file = f"{self.snapshot_dir}/LATEST"
def get_latest_snapshot(self) -> Optional[Snapshot]:
+ snapshot_json = self.get_latest_snapshot_json()
+ if snapshot_json is None:
+ return None
+ return JSON.from_json(snapshot_json, Snapshot)
+
+ def get_latest_snapshot_json(self) -> Optional[str]:
if not self.file_io.exists(self.latest_file):
return None
@@ -45,8 +51,7 @@ class SnapshotManager:
if not self.file_io.exists(snapshot_file):
return None
- snapshot_content = self.file_io.read_file_utf8(snapshot_file)
- return JSON.from_json(snapshot_content, Snapshot)
+ return self.file_io.read_file_utf8(snapshot_file)
def read_latest_file(self, max_retries: int = 5):
"""
diff --git a/paimon-python/pypaimon/tests/write/table_write_test.py
b/paimon-python/pypaimon/tests/write/table_write_test.py
index 04c9610a4b..a73dc631b1 100644
--- a/paimon-python/pypaimon/tests/write/table_write_test.py
+++ b/paimon-python/pypaimon/tests/write/table_write_test.py
@@ -53,6 +53,32 @@ class TableWriteTest(unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tempdir, ignore_errors=True)
+ def test_write_snapshot(self):
+ schema = Schema.from_pyarrow_schema(self.pa_schema,
partition_keys=['dt'])
+ self.catalog.create_table('default.test_write_snapshot', schema, False)
+ table = self.catalog.get_table('default.test_write_snapshot')
+ write_builder = table.new_batch_write_builder()
+
+ # write
+ table_write = write_builder.new_write()
+ table_commit = write_builder.new_commit()
+ table_write.write_arrow(self.expected)
+ table_commit.commit(table_write.prepare_commit())
+ table_write.close()
+ table_commit.close()
+
+ # read
+ read_builder = table.new_read_builder()
+ table_read = read_builder.new_read()
+ splits = read_builder.new_scan().plan().splits()
+ actual = table_read.to_arrow(splits).sort_by('user_id')
+ self.assertEqual(self.expected, actual)
+
+ # snapshot
+ snapshot_json: str =
table.snapshot_manager().get_latest_snapshot_json()
+ self.assertEquals(True, snapshot_json.__contains__("baseManifestList"))
+ self.assertEquals(False, snapshot_json.__contains__("nextRowId"))
+
def test_multi_prepare_commit_ao(self):
schema = Schema.from_pyarrow_schema(self.pa_schema,
partition_keys=['dt'])
self.catalog.create_table('default.test_append_only_parquet', schema,
False)