This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 4c875333f9 [python] fix reading on legacy manifest list without 
_MIN_ROW_ID/_MAX_ROW_ID (#7248)
4c875333f9 is described below

commit 4c875333f9e7ee3bfdca645b36545ff8a40fafba
Author: XiaoHongbo <[email protected]>
AuthorDate: Tue Feb 10 18:26:50 2026 +0800

    [python] fix reading on legacy manifest list without 
_MIN_ROW_ID/_MAX_ROW_ID (#7248)
---
 .../pypaimon/manifest/manifest_list_manager.py     |  4 +-
 .../tests/manifest/manifest_schema_test.py         | 88 +++++++++++++++++++++-
 2 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/paimon-python/pypaimon/manifest/manifest_list_manager.py 
b/paimon-python/pypaimon/manifest/manifest_list_manager.py
index 81f1714b2b..0bc0cc2b88 100644
--- a/paimon-python/pypaimon/manifest/manifest_list_manager.py
+++ b/paimon-python/pypaimon/manifest/manifest_list_manager.py
@@ -81,8 +81,8 @@ class ManifestListManager:
                 num_deleted_files=record['_NUM_DELETED_FILES'],
                 partition_stats=partition_stats,
                 schema_id=record['_SCHEMA_ID'],
-                min_row_id=record['_MIN_ROW_ID'],
-                max_row_id=record['_MAX_ROW_ID'],
+                min_row_id=record.get('_MIN_ROW_ID'),
+                max_row_id=record.get('_MAX_ROW_ID'),
             )
             manifest_files.append(manifest_file_meta)
 
diff --git a/paimon-python/pypaimon/tests/manifest/manifest_schema_test.py 
b/paimon-python/pypaimon/tests/manifest/manifest_schema_test.py
index 13890f783c..a6076998d4 100644
--- a/paimon-python/pypaimon/tests/manifest/manifest_schema_test.py
+++ b/paimon-python/pypaimon/tests/manifest/manifest_schema_test.py
@@ -15,18 +15,44 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
+import os
+import tempfile
 import unittest
+from io import BytesIO
+from unittest.mock import Mock
 
-from pypaimon.manifest.schema import data_file_meta
+import fastavro
 
+from pypaimon.common.options import Options
+from pypaimon.filesystem.local_file_io import LocalFileIO
+from pypaimon.manifest.manifest_list_manager import ManifestListManager
+from pypaimon.manifest.schema import data_file_meta
 from pypaimon.manifest.schema.data_file_meta import DATA_FILE_META_SCHEMA
 from pypaimon.manifest.schema.manifest_file_meta import 
MANIFEST_FILE_META_SCHEMA
 from pypaimon.manifest.schema.simple_stats import (
     KEY_STATS_SCHEMA,
     VALUE_STATS_SCHEMA,
-    PARTITION_STATS_SCHEMA
+    PARTITION_STATS_SCHEMA,
 )
 
+LEGACY_MANIFEST_FILE_META_SCHEMA = {
+    "type": "record",
+    "name": "ManifestFileMeta",
+    "fields": [
+        {"name": "_VERSION", "type": "int"},
+        {"name": "_FILE_NAME", "type": "string"},
+        {"name": "_FILE_SIZE", "type": "long"},
+        {"name": "_NUM_ADDED_FILES", "type": "long"},
+        {"name": "_NUM_DELETED_FILES", "type": "long"},
+        {"name": "_PARTITION_STATS", "type": PARTITION_STATS_SCHEMA},
+        {"name": "_SCHEMA_ID", "type": "long"},
+    ],
+}
+
+
+def _empty_partition_stats_bytes():
+    return b"\x00\x00\x00\x00\x00"
+
 
 class ManifestSchemaTest(unittest.TestCase):
     def test_file_source_field_type_and_default(self):
@@ -103,7 +129,8 @@ class ManifestSchemaTest(unittest.TestCase):
         # Check that all expected fields are present
         expected_fields = [
             "_VERSION", "_FILE_NAME", "_FILE_SIZE", "_NUM_ADDED_FILES",
-            "_NUM_DELETED_FILES", "_PARTITION_STATS", "_SCHEMA_ID"
+            "_NUM_DELETED_FILES", "_PARTITION_STATS", "_SCHEMA_ID",
+            "_MIN_ROW_ID", "_MAX_ROW_ID",
         ]
 
         for field_name in expected_fields:
@@ -117,6 +144,16 @@ class ManifestSchemaTest(unittest.TestCase):
         self.assertEqual(field_map["_NUM_DELETED_FILES"]["type"], "long")
         self.assertEqual(field_map["_PARTITION_STATS"]["type"], 
PARTITION_STATS_SCHEMA)
         self.assertEqual(field_map["_SCHEMA_ID"]["type"], "long")
+        self.assertEqual(field_map["_MIN_ROW_ID"]["type"], ["null", "long"])
+        self.assertEqual(field_map["_MAX_ROW_ID"]["type"], ["null", "long"])
+        self.assertIsNone(
+            field_map["_MIN_ROW_ID"].get("default"),
+            "_MIN_ROW_ID should have default None for backward compatibility",
+        )
+        self.assertIsNone(
+            field_map["_MAX_ROW_ID"].get("default"),
+            "_MAX_ROW_ID should have default None for backward compatibility",
+        )
 
     def test_schema_references(self):
         """Test that schema references are correctly used."""
@@ -149,3 +186,48 @@ class ManifestSchemaTest(unittest.TestCase):
             PARTITION_STATS_SCHEMA["name"]
         ]
         self.assertEqual(len(names), len(set(names)), "Schema names should be 
unique")
+
+    def test_read_legacy_manifest_list(self):
+        temp_dir = tempfile.mkdtemp(prefix="manifest_schema_test_")
+        table_path = f"file://{temp_dir}"
+        file_io = LocalFileIO(table_path, Options({}))
+        os.makedirs(os.path.join(temp_dir, "manifest"), exist_ok=True)
+        manifest_list_name = "legacy-manifest-list-avro"
+
+        legacy_record = {
+            "_VERSION": 2,
+            "_FILE_NAME": "data-12345.parquet",
+            "_FILE_SIZE": 1000,
+            "_NUM_ADDED_FILES": 1,
+            "_NUM_DELETED_FILES": 0,
+            "_PARTITION_STATS": {
+                "_MIN_VALUES": _empty_partition_stats_bytes(),
+                "_MAX_VALUES": _empty_partition_stats_bytes(),
+                "_NULL_COUNTS": None,
+            },
+            "_SCHEMA_ID": 0,
+        }
+        with self.assertRaises(KeyError):
+            _ = legacy_record["_MIN_ROW_ID"]
+        self.assertIsNone(legacy_record.get("_MIN_ROW_ID"))
+        self.assertIsNone(legacy_record.get("_MAX_ROW_ID"))
+
+        buffer = BytesIO()
+        fastavro.writer(buffer, LEGACY_MANIFEST_FILE_META_SCHEMA, 
[legacy_record])
+        with 
file_io.new_output_stream(f"{table_path}/manifest/{manifest_list_name}") as out:
+            out.write(buffer.getvalue())
+
+        table = Mock()
+        table.table_path = table_path
+        table.file_io = file_io
+        table.partition_keys_fields = []
+        manager = ManifestListManager(table)
+        metas = manager.read(manifest_list_name)
+
+        self.assertEqual(len(metas), 1)
+        meta = metas[0]
+        self.assertEqual(meta.file_name, "data-12345.parquet")
+        self.assertEqual(meta.file_size, 1000)
+        self.assertEqual(meta.schema_id, 0)
+        self.assertIsNone(meta.min_row_id)
+        self.assertIsNone(meta.max_row_id)

Reply via email to