This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch release-1.3
in repository https://gitbox.apache.org/repos/asf/paimon.git

commit ae574508537d1d7f98c819e44a0ade6d38c1b342
Author: umi <[email protected]>
AuthorDate: Mon Nov 10 11:20:32 2025 +0800

    [python] Fix File Source type in data file meta (#6571)
---
 .../pypaimon/manifest/schema/data_file_meta.py     |  4 +--
 paimon-python/pypaimon/tests/manifest/__init__.py  | 17 +++++++++++
 .../tests/{ => manifest}/manifest_schema_test.py   | 35 +++++++---------------
 paimon-python/pypaimon/write/file_store_commit.py  |  2 +-
 .../pypaimon/write/writer/data_blob_writer.py      |  2 +-
 paimon-python/pypaimon/write/writer/data_writer.py |  2 +-
 6 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/paimon-python/pypaimon/manifest/schema/data_file_meta.py 
b/paimon-python/pypaimon/manifest/schema/data_file_meta.py
index a414644e27..405c2e3483 100644
--- a/paimon-python/pypaimon/manifest/schema/data_file_meta.py
+++ b/paimon-python/pypaimon/manifest/schema/data_file_meta.py
@@ -44,7 +44,7 @@ class DataFileMeta:
     creation_time: Optional[datetime] = None
     delete_row_count: Optional[int] = None
     embedded_index: Optional[bytes] = None
-    file_source: Optional[str] = None
+    file_source: Optional[int] = None
     value_stats_cols: Optional[List[str]] = None
     external_path: Optional[str] = None
     first_row_id: Optional[int] = None
@@ -163,7 +163,7 @@ DATA_FILE_META_SCHEMA = {
          "default": None},
         {"name": "_DELETE_ROW_COUNT", "type": ["null", "long"], "default": 
None},
         {"name": "_EMBEDDED_FILE_INDEX", "type": ["null", "bytes"], "default": 
None},
-        {"name": "_FILE_SOURCE", "type": ["null", "string"], "default": None},
+        {"name": "_FILE_SOURCE", "type": ["null", "int"], "default": None},
         {"name": "_VALUE_STATS_COLS",
          "type": ["null", {"type": "array", "items": "string"}],
          "default": None},
diff --git a/paimon-python/pypaimon/tests/manifest/__init__.py 
b/paimon-python/pypaimon/tests/manifest/__init__.py
new file mode 100644
index 0000000000..53ed4d36c2
--- /dev/null
+++ b/paimon-python/pypaimon/tests/manifest/__init__.py
@@ -0,0 +1,17 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
diff --git a/paimon-python/pypaimon/tests/manifest_schema_test.py 
b/paimon-python/pypaimon/tests/manifest/manifest_schema_test.py
similarity index 87%
rename from paimon-python/pypaimon/tests/manifest_schema_test.py
rename to paimon-python/pypaimon/tests/manifest/manifest_schema_test.py
index 3d82ededa6..b4b60ffd43 100644
--- a/paimon-python/pypaimon/tests/manifest_schema_test.py
+++ b/paimon-python/pypaimon/tests/manifest/manifest_schema_test.py
@@ -1,23 +1,7 @@
-"""
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
 import unittest
 
+from pypaimon.manifest.schema import data_file_meta
+
 from pypaimon.manifest.schema.data_file_meta import DATA_FILE_META_SCHEMA
 from pypaimon.manifest.schema.manifest_file_meta import 
MANIFEST_FILE_META_SCHEMA
 from pypaimon.manifest.schema.simple_stats import (
@@ -28,7 +12,14 @@ from pypaimon.manifest.schema.simple_stats import (
 
 
 class ManifestSchemaTest(unittest.TestCase):
-    """Test cases for the manifest schema definitions."""
+    def test_file_source_field_type_and_default(self):
+        schema = data_file_meta.DATA_FILE_META_SCHEMA
+        fields = schema.get("fields", [])
+        file_source_field = next((f for f in fields if f.get("name") == 
"_FILE_SOURCE"), None)
+
+        self.assertIsNotNone(file_source_field, "_FILE_SOURCE field not found 
in DATA_FILE_META_SCHEMA")
+        self.assertEqual(file_source_field.get("type"), ["null", "int"])
+        self.assertIsNone(file_source_field.get("default"))
 
     def test_data_file_meta_schema_structure(self):
         """Test that DATA_FILE_META_SCHEMA has the correct structure."""
@@ -73,7 +64,7 @@ class ManifestSchemaTest(unittest.TestCase):
                          ["null", {"type": "long", "logicalType": 
"timestamp-millis"}])
         self.assertEqual(field_map["_DELETE_ROW_COUNT"]["type"], ["null", 
"long"])
         self.assertEqual(field_map["_EMBEDDED_FILE_INDEX"]["type"], ["null", 
"bytes"])
-        self.assertEqual(field_map["_FILE_SOURCE"]["type"], ["null", "string"])
+        self.assertEqual(field_map["_FILE_SOURCE"]["type"], ["null", "int"])
         self.assertEqual(field_map["_VALUE_STATS_COLS"]["type"], ["null", 
{"type": "array", "items": "string"}])
         self.assertEqual(field_map["_EXTERNAL_PATH"]["type"], ["null", 
"string"])
         self.assertEqual(field_map["_FIRST_ROW_ID"]["type"], ["null", "long"])
@@ -141,7 +132,3 @@ class ManifestSchemaTest(unittest.TestCase):
             PARTITION_STATS_SCHEMA["name"]
         ]
         self.assertEqual(len(names), len(set(names)), "Schema names should be 
unique")
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/paimon-python/pypaimon/write/file_store_commit.py 
b/paimon-python/pypaimon/write/file_store_commit.py
index afeba52b3c..224e4a82c9 100644
--- a/paimon-python/pypaimon/write/file_store_commit.py
+++ b/paimon-python/pypaimon/write/file_store_commit.py
@@ -361,7 +361,7 @@ class FileStoreCommit:
         for entry in commit_entries:
             # Check if this is an append file that needs row ID assignment
             if (entry.kind == 0 and  # ADD kind
-                    entry.file.file_source == "APPEND" and  # APPEND file 
source
+                    entry.file.file_source == 0 and  # APPEND file source
                     entry.file.first_row_id is None):  # No existing 
first_row_id
 
                 if self._is_blob_file(entry.file.file_name):
diff --git a/paimon-python/pypaimon/write/writer/data_blob_writer.py 
b/paimon-python/pypaimon/write/writer/data_blob_writer.py
index b711d2e695..9d2e0982a4 100644
--- a/paimon-python/pypaimon/write/writer/data_blob_writer.py
+++ b/paimon-python/pypaimon/write/writer/data_blob_writer.py
@@ -301,7 +301,7 @@ class DataBlobWriter(DataWriter):
             extra_files=[],
             creation_time=datetime.now(),
             delete_row_count=0,
-            file_source="APPEND",
+            file_source=0,
             value_stats_cols=self.normal_column_names,
             file_path=str(file_path),
             write_cols=self.write_cols)
diff --git a/paimon-python/pypaimon/write/writer/data_writer.py 
b/paimon-python/pypaimon/write/writer/data_writer.py
index 24e3b0ca48..351ff32979 100644
--- a/paimon-python/pypaimon/write/writer/data_writer.py
+++ b/paimon-python/pypaimon/write/writer/data_writer.py
@@ -210,7 +210,7 @@ class DataWriter(ABC):
             extra_files=[],
             creation_time=datetime.now(),
             delete_row_count=0,
-            file_source="APPEND",
+            file_source=0,
             value_stats_cols=None,  # None means all columns in the data have 
statistics
             external_path=None,
             first_row_id=None,

Reply via email to