This is an automated email from the ASF dual-hosted git repository. lzljs3620320 pushed a commit to branch release-1.3 in repository https://gitbox.apache.org/repos/asf/paimon.git
commit ae574508537d1d7f98c819e44a0ade6d38c1b342 Author: umi <[email protected]> AuthorDate: Mon Nov 10 11:20:32 2025 +0800 [python] Fix File Source type in data file meta (#6571) --- .../pypaimon/manifest/schema/data_file_meta.py | 4 +-- paimon-python/pypaimon/tests/manifest/__init__.py | 17 +++++++++++ .../tests/{ => manifest}/manifest_schema_test.py | 35 +++++++--------------- paimon-python/pypaimon/write/file_store_commit.py | 2 +- .../pypaimon/write/writer/data_blob_writer.py | 2 +- paimon-python/pypaimon/write/writer/data_writer.py | 2 +- 6 files changed, 33 insertions(+), 29 deletions(-) diff --git a/paimon-python/pypaimon/manifest/schema/data_file_meta.py b/paimon-python/pypaimon/manifest/schema/data_file_meta.py index a414644e27..405c2e3483 100644 --- a/paimon-python/pypaimon/manifest/schema/data_file_meta.py +++ b/paimon-python/pypaimon/manifest/schema/data_file_meta.py @@ -44,7 +44,7 @@ class DataFileMeta: creation_time: Optional[datetime] = None delete_row_count: Optional[int] = None embedded_index: Optional[bytes] = None - file_source: Optional[str] = None + file_source: Optional[int] = None value_stats_cols: Optional[List[str]] = None external_path: Optional[str] = None first_row_id: Optional[int] = None @@ -163,7 +163,7 @@ DATA_FILE_META_SCHEMA = { "default": None}, {"name": "_DELETE_ROW_COUNT", "type": ["null", "long"], "default": None}, {"name": "_EMBEDDED_FILE_INDEX", "type": ["null", "bytes"], "default": None}, - {"name": "_FILE_SOURCE", "type": ["null", "string"], "default": None}, + {"name": "_FILE_SOURCE", "type": ["null", "int"], "default": None}, {"name": "_VALUE_STATS_COLS", "type": ["null", {"type": "array", "items": "string"}], "default": None}, diff --git a/paimon-python/pypaimon/tests/manifest/__init__.py b/paimon-python/pypaimon/tests/manifest/__init__.py new file mode 100644 index 0000000000..53ed4d36c2 --- /dev/null +++ b/paimon-python/pypaimon/tests/manifest/__init__.py @@ -0,0 +1,17 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" diff --git a/paimon-python/pypaimon/tests/manifest_schema_test.py b/paimon-python/pypaimon/tests/manifest/manifest_schema_test.py similarity index 87% rename from paimon-python/pypaimon/tests/manifest_schema_test.py rename to paimon-python/pypaimon/tests/manifest/manifest_schema_test.py index 3d82ededa6..b4b60ffd43 100644 --- a/paimon-python/pypaimon/tests/manifest_schema_test.py +++ b/paimon-python/pypaimon/tests/manifest/manifest_schema_test.py @@ -1,23 +1,7 @@ -""" -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - import unittest +from pypaimon.manifest.schema import data_file_meta + from pypaimon.manifest.schema.data_file_meta import DATA_FILE_META_SCHEMA from pypaimon.manifest.schema.manifest_file_meta import MANIFEST_FILE_META_SCHEMA from pypaimon.manifest.schema.simple_stats import ( @@ -28,7 +12,14 @@ from pypaimon.manifest.schema.simple_stats import ( class ManifestSchemaTest(unittest.TestCase): - """Test cases for the manifest schema definitions.""" + def test_file_source_field_type_and_default(self): + schema = data_file_meta.DATA_FILE_META_SCHEMA + fields = schema.get("fields", []) + file_source_field = next((f for f in fields if f.get("name") == "_FILE_SOURCE"), None) + + self.assertIsNotNone(file_source_field, "_FILE_SOURCE field not found in DATA_FILE_META_SCHEMA") + self.assertEqual(file_source_field.get("type"), ["null", "int"]) + self.assertIsNone(file_source_field.get("default")) def test_data_file_meta_schema_structure(self): """Test that DATA_FILE_META_SCHEMA has the correct structure.""" @@ -73,7 +64,7 @@ class ManifestSchemaTest(unittest.TestCase): ["null", {"type": "long", "logicalType": "timestamp-millis"}]) self.assertEqual(field_map["_DELETE_ROW_COUNT"]["type"], ["null", "long"]) self.assertEqual(field_map["_EMBEDDED_FILE_INDEX"]["type"], ["null", "bytes"]) - self.assertEqual(field_map["_FILE_SOURCE"]["type"], ["null", "string"]) + self.assertEqual(field_map["_FILE_SOURCE"]["type"], ["null", "int"]) self.assertEqual(field_map["_VALUE_STATS_COLS"]["type"], ["null", {"type": "array", "items": "string"}]) self.assertEqual(field_map["_EXTERNAL_PATH"]["type"], ["null", "string"]) self.assertEqual(field_map["_FIRST_ROW_ID"]["type"], ["null", "long"]) @@ -141,7 +132,3 @@ class ManifestSchemaTest(unittest.TestCase): PARTITION_STATS_SCHEMA["name"] ] self.assertEqual(len(names), len(set(names)), "Schema names should be unique") - - -if __name__ == "__main__": - unittest.main() diff --git a/paimon-python/pypaimon/write/file_store_commit.py b/paimon-python/pypaimon/write/file_store_commit.py index afeba52b3c..224e4a82c9 100644 --- a/paimon-python/pypaimon/write/file_store_commit.py +++ b/paimon-python/pypaimon/write/file_store_commit.py @@ -361,7 +361,7 @@ class FileStoreCommit: for entry in commit_entries: # Check if this is an append file that needs row ID assignment if (entry.kind == 0 and # ADD kind - entry.file.file_source == "APPEND" and # APPEND file source + entry.file.file_source == 0 and # APPEND file source entry.file.first_row_id is None): # No existing first_row_id if self._is_blob_file(entry.file.file_name): diff --git a/paimon-python/pypaimon/write/writer/data_blob_writer.py b/paimon-python/pypaimon/write/writer/data_blob_writer.py index b711d2e695..9d2e0982a4 100644 --- a/paimon-python/pypaimon/write/writer/data_blob_writer.py +++ b/paimon-python/pypaimon/write/writer/data_blob_writer.py @@ -301,7 +301,7 @@ class DataBlobWriter(DataWriter): extra_files=[], creation_time=datetime.now(), delete_row_count=0, - file_source="APPEND", + file_source=0, value_stats_cols=self.normal_column_names, file_path=str(file_path), write_cols=self.write_cols) diff --git a/paimon-python/pypaimon/write/writer/data_writer.py b/paimon-python/pypaimon/write/writer/data_writer.py index 24e3b0ca48..351ff32979 100644 --- a/paimon-python/pypaimon/write/writer/data_writer.py +++ b/paimon-python/pypaimon/write/writer/data_writer.py @@ -210,7 +210,7 @@ class DataWriter(ABC): extra_files=[], creation_time=datetime.now(), delete_row_count=0, - file_source="APPEND", + file_source=0, value_stats_cols=None, # None means all columns in the data have statistics external_path=None, first_row_id=None,
