This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 8e5378a520 [python] Python commit snapshot should not contain None 
fields (#7103)
8e5378a520 is described below

commit 8e5378a520eec40ced124913fe1b52ead64f3c43
Author: Jingsong Lee <[email protected]>
AuthorDate: Fri Jan 23 04:37:06 2026 +0800

    [python] Python commit snapshot should not contain None fields (#7103)
---
 paimon-python/pypaimon/common/json_util.py         | 10 +++++++++
 paimon-python/pypaimon/snapshot/snapshot.py        | 14 ++++++------
 .../pypaimon/snapshot/snapshot_manager.py          |  9 ++++++--
 .../pypaimon/tests/write/table_write_test.py       | 26 ++++++++++++++++++++++
 4 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/paimon-python/pypaimon/common/json_util.py 
b/paimon-python/pypaimon/common/json_util.py
index 5f4d513a27..6fdd7bbb62 100644
--- a/paimon-python/pypaimon/common/json_util.py
+++ b/paimon-python/pypaimon/common/json_util.py
@@ -27,6 +27,11 @@ def json_field(json_name: str, **kwargs):
     return field(metadata={"json_name": json_name}, **kwargs)
 
 
+def optional_json_field(json_name: str, json_include: str):
+    """Create a field with custom JSON name"""
+    return field(metadata={"json_name": json_name, "json_include": 
json_include}, default=None)
+
+
 class JSON:
 
     @staticmethod
@@ -55,6 +60,11 @@ class JSON:
             # Get custom JSON name from metadata
             json_name = field_info.metadata.get("json_name", field_info.name)
 
+            # Json include
+            if field_value is None:
+                if field_info.metadata.get("json_include", None) == "non_null":
+                    continue
+
             # Handle nested objects
             if hasattr(field_value, "to_dict"):
                 result[json_name] = field_value.to_dict()
diff --git a/paimon-python/pypaimon/snapshot/snapshot.py 
b/paimon-python/pypaimon/snapshot/snapshot.py
index b1fdbaafea..f269a9ac85 100644
--- a/paimon-python/pypaimon/snapshot/snapshot.py
+++ b/paimon-python/pypaimon/snapshot/snapshot.py
@@ -19,7 +19,7 @@
 from dataclasses import dataclass
 from typing import Optional
 
-from pypaimon.common.json_util import json_field
+from pypaimon.common.json_util import json_field, optional_json_field
 
 BATCH_COMMIT_IDENTIFIER = 0x7fffffffffffffff
 
@@ -39,9 +39,9 @@ class Snapshot:
     commit_kind: str = json_field("commitKind")
     time_millis: int = json_field("timeMillis")
     # Optional fields with defaults
-    changelog_manifest_list: Optional[str] = 
json_field("changelogManifestList", default=None)
-    index_manifest: Optional[str] = json_field("indexManifest", default=None)
-    changelog_record_count: Optional[int] = json_field("changelogRecordCount", 
default=None)
-    watermark: Optional[int] = json_field("watermark", default=None)
-    statistics: Optional[str] = json_field("statistics", default=None)
-    next_row_id: Optional[int] = json_field("nextRowId", default=None)
+    changelog_manifest_list: Optional[str] = 
optional_json_field("changelogManifestList", "non_null")
+    index_manifest: Optional[str] = optional_json_field("indexManifest", 
"non_null")
+    changelog_record_count: Optional[int] = 
optional_json_field("changelogRecordCount", "non_null")
+    watermark: Optional[int] = optional_json_field("watermark", "non_null")
+    statistics: Optional[str] = optional_json_field("statistics", "non_null")
+    next_row_id: Optional[int] = optional_json_field("nextRowId", "non_null")
diff --git a/paimon-python/pypaimon/snapshot/snapshot_manager.py 
b/paimon-python/pypaimon/snapshot/snapshot_manager.py
index 8291d9cf2c..cc044fbc1a 100644
--- a/paimon-python/pypaimon/snapshot/snapshot_manager.py
+++ b/paimon-python/pypaimon/snapshot/snapshot_manager.py
@@ -35,6 +35,12 @@ class SnapshotManager:
         self.latest_file = f"{self.snapshot_dir}/LATEST"
 
     def get_latest_snapshot(self) -> Optional[Snapshot]:
+        snapshot_json = self.get_latest_snapshot_json()
+        if snapshot_json is None:
+            return None
+        return JSON.from_json(snapshot_json, Snapshot)
+
+    def get_latest_snapshot_json(self) -> Optional[str]:
         if not self.file_io.exists(self.latest_file):
             return None
 
@@ -45,8 +51,7 @@ class SnapshotManager:
         if not self.file_io.exists(snapshot_file):
             return None
 
-        snapshot_content = self.file_io.read_file_utf8(snapshot_file)
-        return JSON.from_json(snapshot_content, Snapshot)
+        return self.file_io.read_file_utf8(snapshot_file)
 
     def read_latest_file(self, max_retries: int = 5):
         """
diff --git a/paimon-python/pypaimon/tests/write/table_write_test.py 
b/paimon-python/pypaimon/tests/write/table_write_test.py
index 04c9610a4b..a73dc631b1 100644
--- a/paimon-python/pypaimon/tests/write/table_write_test.py
+++ b/paimon-python/pypaimon/tests/write/table_write_test.py
@@ -53,6 +53,32 @@ class TableWriteTest(unittest.TestCase):
     def tearDownClass(cls):
         shutil.rmtree(cls.tempdir, ignore_errors=True)
 
+    def test_write_snapshot(self):
+        schema = Schema.from_pyarrow_schema(self.pa_schema, 
partition_keys=['dt'])
+        self.catalog.create_table('default.test_write_snapshot', schema, False)
+        table = self.catalog.get_table('default.test_write_snapshot')
+        write_builder = table.new_batch_write_builder()
+
+        # write
+        table_write = write_builder.new_write()
+        table_commit = write_builder.new_commit()
+        table_write.write_arrow(self.expected)
+        table_commit.commit(table_write.prepare_commit())
+        table_write.close()
+        table_commit.close()
+
+        # read
+        read_builder = table.new_read_builder()
+        table_read = read_builder.new_read()
+        splits = read_builder.new_scan().plan().splits()
+        actual = table_read.to_arrow(splits).sort_by('user_id')
+        self.assertEqual(self.expected, actual)
+
+        # snapshot
+        snapshot_json: str = 
table.snapshot_manager().get_latest_snapshot_json()
+        self.assertEquals(True, snapshot_json.__contains__("baseManifestList"))
+        self.assertEquals(False, snapshot_json.__contains__("nextRowId"))
+
     def test_multi_prepare_commit_ao(self):
         schema = Schema.from_pyarrow_schema(self.pa_schema, 
partition_keys=['dt'])
         self.catalog.create_table('default.test_append_only_parquet', schema, 
False)

Reply via email to