zeroshade commented on code in PR #177:
URL: https://github.com/apache/iceberg-go/pull/177#discussion_r1817390906
##########
manifest.go:
##########
@@ -567,6 +570,97 @@ func ReadManifestList(in io.Reader) ([]ManifestFile,
error) {
return out, dec.Error()
}
+// WriteManifestListV2 writes a list of v2 manifest files to an avro file.
+func WriteManifestListV2(out io.Writer, files []ManifestFile) error {
+ return writeManifestList(out, files, 2)
+}
+
+// WriteManifestListV1 writes a list of v1 manifest files to an avro file.
+func WriteManifestListV1(out io.Writer, files []ManifestFile) error {
+ return writeManifestList(out, files, 1)
+}
Review Comment:
Since we enforce that the version has to match for all of the manifest
files, we can avoid needing two separate functions and simply determine our
version from the version of the first file in the slice, erroring on an empty
slice.
I think that would be a better API than having separate
`WriteManifestListV1` and `WriteManifestListV2` functions.
##########
manifest.go:
##########
@@ -567,6 +570,97 @@ func ReadManifestList(in io.Reader) ([]ManifestFile,
error) {
return out, dec.Error()
}
+// WriteManifestListV2 writes a list of v2 manifest files to an avro file.
+func WriteManifestListV2(out io.Writer, files []ManifestFile) error {
+ return writeManifestList(out, files, 2)
+}
+
+// WriteManifestListV1 writes a list of v1 manifest files to an avro file.
+func WriteManifestListV1(out io.Writer, files []ManifestFile) error {
+ return writeManifestList(out, files, 1)
+}
+
+func writeManifestList(out io.Writer, files []ManifestFile, version int) error
{
+ for _, file := range files {
+ if file.Version() != version {
+ return fmt.Errorf(
+ "%w: ManifestFile '%s' has non-matching version
%d instead of %d",
+ ErrInvalidArgument, file.FilePath(),
file.Version(), version,
+ )
+ }
+ }
+
+ var key string
+ switch version {
+ case 1:
+ key = internal.ManifestListV1Key
+ case 2:
+ key = internal.ManifestListV2Key
+ default:
+ return fmt.Errorf("%w: non-recognized version %d",
ErrInvalidArgument, version)
+ }
+
+ enc, err := ocf.NewEncoderWithSchema(
+ internal.AvroSchemaCache.Get(key),
+ out, ocf.WithMetadata(map[string][]byte{
+ "format-version": []byte(strconv.Itoa(version)),
+ }),
+ ocf.WithCodec(ocf.Deflate),
+ )
+ if err != nil {
+ return err
+ }
+
+ for _, file := range files {
+ if err := enc.Encode(file); err != nil {
+ return err
+ }
+ }
+
+ return enc.Close()
+}
+
+// WriteManifestEntriesV2 writes a list of v2 manifest entries to an avro file.
+func WriteManifestEntriesV2(out io.Writer, entries []ManifestEntry) error {
+ return writeManifestEntries(out, entries, 2)
+}
+
+// WriteManifestEntriesV1 writes a list of v1 manifest entries to an avro file.
+func WriteManifestEntriesV1(out io.Writer, entries []ManifestEntry) error {
+ return writeManifestEntries(out, entries, 1)
+}
+
+func writeManifestEntries(out io.Writer, entries []ManifestEntry, version int)
error {
Review Comment:
do we need to validate that all the entries are the same version like we do
for the ManifestList?
If so, then my previous comment applies here too for simplifying the API
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]