This is an automated email from the ASF dual-hosted git repository.

honahx pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 8cdf4abd 🐛 Write fields instead of spec object (#846)
8cdf4abd is described below

commit 8cdf4abdc5e4779ff888c62041f027bb3309d4c5
Author: Fokko Driesprong <[email protected]>
AuthorDate: Mon Jun 24 08:47:35 2024 +0200

    🐛 Write fields instead of spec object (#846)
---
 pyiceberg/manifest.py        | 46 ++++++++++++++++++++------------------------
 tests/utils/test_manifest.py |  4 ++--
 2 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py
index defe5958..bf5749ce 100644
--- a/pyiceberg/manifest.py
+++ b/pyiceberg/manifest.py
@@ -31,13 +31,15 @@ from typing import (
     Type,
 )
 
+from pydantic_core import to_json
+
 from pyiceberg.avro.file import AvroFile, AvroOutputFile
 from pyiceberg.conversions import to_bytes
 from pyiceberg.exceptions import ValidationError
 from pyiceberg.io import FileIO, InputFile, OutputFile
 from pyiceberg.partitioning import PartitionSpec
 from pyiceberg.schema import Schema
-from pyiceberg.typedef import EMPTY_DICT, Record, TableVersion
+from pyiceberg.typedef import Record, TableVersion
 from pyiceberg.types import (
     BinaryType,
     BooleanType,
@@ -645,7 +647,6 @@ class ManifestWriter(ABC):
     _output_file: OutputFile
     _writer: AvroOutputFile[ManifestEntry]
     _snapshot_id: int
-    _meta: Dict[str, str]
     _added_files: int
     _added_rows: int
     _existing_files: int
@@ -655,15 +656,12 @@ class ManifestWriter(ABC):
     _min_data_sequence_number: Optional[int]
     _partitions: List[Record]
 
-    def __init__(
-        self, spec: PartitionSpec, schema: Schema, output_file: OutputFile, 
snapshot_id: int, meta: Dict[str, str] = EMPTY_DICT
-    ) -> None:
+    def __init__(self, spec: PartitionSpec, schema: Schema, output_file: 
OutputFile, snapshot_id: int) -> None:
         self.closed = False
         self._spec = spec
         self._schema = schema
         self._output_file = output_file
         self._snapshot_id = snapshot_id
-        self._meta = meta
 
         self._added_files = 0
         self._added_rows = 0
@@ -697,6 +695,15 @@ class ManifestWriter(ABC):
     @abstractmethod
     def version(self) -> TableVersion: ...
 
+    @property
+    def _meta(self) -> Dict[str, str]:
+        return {
+            "schema": self._schema.model_dump_json(),
+            "partition-spec": to_json(self._spec.fields).decode("utf-8"),
+            "partition-spec-id": str(self._spec.spec_id),
+            "format-version": str(self.version),
+        }
+
     def _with_partition(self, format_version: TableVersion) -> Schema:
         data_file_type = data_file_with_partition(
             format_version=format_version, 
partition_type=self._spec.partition_type(self._schema)
@@ -771,12 +778,6 @@ class ManifestWriterV1(ManifestWriter):
             schema,
             output_file,
             snapshot_id,
-            {
-                "schema": schema.model_dump_json(),
-                "partition-spec": spec.model_dump_json(),
-                "partition-spec-id": str(spec.spec_id),
-                "format-version": "1",
-            },
         )
 
     def content(self) -> ManifestContent:
@@ -792,19 +793,7 @@ class ManifestWriterV1(ManifestWriter):
 
 class ManifestWriterV2(ManifestWriter):
     def __init__(self, spec: PartitionSpec, schema: Schema, output_file: 
OutputFile, snapshot_id: int):
-        super().__init__(
-            spec,
-            schema,
-            output_file,
-            snapshot_id,
-            meta={
-                "schema": schema.model_dump_json(),
-                "partition-spec": spec.model_dump_json(),
-                "partition-spec-id": str(spec.spec_id),
-                "format-version": "2",
-                "content": "data",
-            },
-        )
+        super().__init__(spec, schema, output_file, snapshot_id)
 
     def content(self) -> ManifestContent:
         return ManifestContent.DATA
@@ -813,6 +802,13 @@ class ManifestWriterV2(ManifestWriter):
     def version(self) -> TableVersion:
         return 2
 
+    @property
+    def _meta(self) -> Dict[str, str]:
+        return {
+            **super()._meta,
+            "content": "data",
+        }
+
     def prepare_entry(self, entry: ManifestEntry) -> ManifestEntry:
         if entry.data_sequence_number is None:
             if entry.snapshot_id is not None and entry.snapshot_id != 
self._snapshot_id:
diff --git a/tests/utils/test_manifest.py b/tests/utils/test_manifest.py
index 8bb03cd8..a812b384 100644
--- a/tests/utils/test_manifest.py
+++ b/tests/utils/test_manifest.py
@@ -348,8 +348,8 @@ def test_write_manifest(
 
         expected_metadata = {
             "schema": test_schema.model_dump_json(),
-            "partition-spec": test_spec.model_dump_json(),
-            "partition-spec-id": str(test_spec.spec_id),
+            "partition-spec": 
"""[{"source-id":1,"field-id":1,"transform":"identity","name":"VendorID"},{"source-id":2,"field-id":2,"transform":"identity","name":"tpep_pickup_datetime"}]""",
+            "partition-spec-id": str(demo_manifest_file.partition_spec_id),
             "format-version": str(format_version),
         }
         _verify_metadata_with_fastavro(

Reply via email to