This is an automated email from the ASF dual-hosted git repository.
laskoviymishka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 2fcff157 perf(table): dedupe shared manifest reads during snapshot
expiration (#1118)
2fcff157 is described below
commit 2fcff157abdaec817077f423da54c754ee8634ae
Author: Ondřej Pavela <[email protected]>
AuthorDate: Mon May 25 09:13:54 2026 +0200
perf(table): dedupe shared manifest reads during snapshot expiration (#1118)
Fixes https://github.com/apache/iceberg-go/issues/1117
### What changed
Reworked `removeSnapshotsUpdate.PostCommit` so each unique manifest file
is
opened at most once per call, regardless of how many expired or retained
snapshots reference it.
Two passes, both deduped:
1) Build the set of manifest paths reachable from any retained snapshot,
reading only manifest-lists. Cache the resulting `[]ManifestFile` per
snapshot so the retained-side pass below doesn't re-download each list.
2) Walk expired snapshots' manifest lists; for each manifest, skip if
it's
in the retained set (its data files are live by definition and the
manifest itself must not be deleted) or if a prior expired snapshot
already enumerated it. Otherwise read its entries once.
3) Subtract live data files via a single walk over each unique retained
manifest. DELETED entries remain tombstones (unchanged from prior
semantics).
### Behavior
Semantically equivalent to the previous implementation — the final
`filesToDelete` set is the same on well-formed metadata. No spec change,
no API change. The only difference is the I/O cost.
### Performance impact
For a 491-snapshot incremental-append table where expiring 490 snapshots
previously triggered ~sum(1..490) ≈ 120,000 manifest-file downloads, the
rewrite reduces that to roughly the count of unique orphaned manifests
(a few hundred in practice). Two-to-three orders of magnitude fewer
object-store reads, in our test.
---
table/updates.go | 50 ++++++--
table/updates_test.go | 335 +++++++++++++++++++++++++++++++++++---------------
2 files changed, 276 insertions(+), 109 deletions(-)
diff --git a/table/updates.go b/table/updates.go
index a1596a10..31352d44 100644
--- a/table/updates.go
+++ b/table/updates.go
@@ -506,10 +506,33 @@ func (u *removeSnapshotsUpdate) PostCommit(ctx
context.Context, preTable *Table,
}
}
+ // Preload retained manifest lists once so the live-data-file pass
below can walk them without re-fetching.
+ retainedSnapshots := postTable.Metadata().Snapshots()
+ retainedManifests := make(map[string]struct{})
+ retainedSnapshotManifests := make([]iceberg.ManifestFile, 0,
len(retainedSnapshots))
+ for _, snap := range retainedSnapshots {
+ mans, err := snap.Manifests(prefs)
+ if err != nil {
+ return err
+ }
+ for _, man := range mans {
+ manPath := man.FilePath()
+ if _, ok := retainedManifests[manPath]; !ok {
+ retainedManifests[manPath] = struct{}{}
+ retainedSnapshotManifests =
append(retainedSnapshotManifests, man)
+ }
+ }
+ }
+
+ // Open each orphaned manifest at most once: skip manifests that
+ // retained snapshots still reference, and dedupe across expired
+ // snapshots that share manifests by reference.
+ visitedManifests := make(map[string]struct{})
+
for _, snapId := range u.SnapshotIDs {
snap := preTable.SnapshotByID(snapId)
if snap == nil {
- return errors.New("missing snapshot")
+ return fmt.Errorf("missing snapshot %d", snapId)
}
mans, err := snap.Manifests(prefs)
@@ -518,7 +541,17 @@ func (u *removeSnapshotsUpdate) PostCommit(ctx
context.Context, preTable *Table,
}
for _, man := range mans {
- filesToDelete[man.FilePath()] = struct{}{}
+ manPath := man.FilePath()
+
+ if _, ok := retainedManifests[manPath]; ok {
+ continue
+ }
+ if _, ok := visitedManifests[manPath]; ok {
+ continue
+ }
+ visitedManifests[manPath] = struct{}{}
+
+ filesToDelete[manPath] = struct{}{}
for entry, err := range man.Entries(prefs, false) {
if err != nil {
@@ -529,15 +562,10 @@ func (u *removeSnapshotsUpdate) PostCommit(ctx
context.Context, preTable *Table,
}
}
- for _, snap := range postTable.Metadata().Snapshots() {
- mans, err := snap.Manifests(prefs)
- if err != nil {
- return err
- }
-
- for _, man := range mans {
- delete(filesToDelete, man.FilePath())
-
+ // Keep files still referenced (non-DELETED) by retained manifests,
+ // including data files carried forward as EXISTING by manifest merges.
+ if len(filesToDelete) > 0 {
+ for _, man := range retainedSnapshotManifests {
for entry, err := range man.Entries(prefs, false) {
if err != nil {
return err
diff --git a/table/updates_test.go b/table/updates_test.go
index 73b40e2f..3273713b 100644
--- a/table/updates_test.go
+++ b/table/updates_test.go
@@ -18,34 +18,109 @@
package table
import (
+ "bytes"
"context"
"encoding/json"
+ "fmt"
"testing"
"github.com/apache/iceberg-go"
+ iceio "github.com/apache/iceberg-go/io"
"github.com/google/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
-func TestRemoveSnapshotsPostCommitSkipped(t *testing.T) {
- update := NewRemoveSnapshotsUpdate([]int64{1, 2, 3}, false)
+func testFSF(io iceio.IO) FSysF {
+ return func(context.Context) (iceio.IO, error) { return io, nil }
+}
- // PostCommit should return nil immediately when postCommit is false,
- // without accessing the table arguments (which are nil here)
- err := update.PostCommit(context.Background(), nil, nil)
- assert.NoError(t, err)
+// trackingCallsIO wraps trackingIO to count Open and Remove calls per path.
+type trackingCallsIO struct {
+ *trackingIO
+ openCount map[string]int
+ removeCount map[string]int
}
-func TestRemoveSnapshotsPostCommitDeletesStatisticsFiles(t *testing.T) {
- // preTable has snapshot 1 with associated statistics and partition
statistics files.
- // postTable has no snapshots and no statistics (snapshot 1 has been
expired).
- // PostCommit must delete the statistics paths that belonged to the
expired snapshot.
- const preMeta = `{
+func newTrackingCallsIO() *trackingCallsIO {
+ return &trackingCallsIO{
+ trackingIO: newTrackingIO(),
+ openCount: make(map[string]int),
+ removeCount: make(map[string]int),
+ }
+}
+
+func (c *trackingCallsIO) Open(name string) (iceio.File, error) {
+ c.openCount[name]++
+
+ return c.trackingIO.Open(name)
+}
+
+func (c *trackingCallsIO) Remove(name string) error {
+ c.removeCount[name]++
+
+ return c.trackingIO.Remove(name)
+}
+
+// writeManifest writes a v2 data manifest with a single ADDED
+// entry pointing at dataPath into tio.files at manifestPath, and returns a
+// ManifestFile descriptor with seqNum pre-assigned so the same descriptor
+// can be referenced from multiple manifest lists.
+func writeManifest(t *testing.T, tio *trackingIO, snapshotID, seqNum int64,
manifestPath, dataPath string) iceberg.ManifestFile {
+ t.Helper()
+
+ dataSchema := iceberg.NewSchema(0,
+ iceberg.NestedField{ID: 1, Name: "x", Type:
iceberg.PrimitiveTypes.Int64, Required: true},
+ )
+ spec := iceberg.NewPartitionSpec()
+
+ df, err := iceberg.NewDataFileBuilder(
+ spec, iceberg.EntryContentData, dataPath, iceberg.ParquetFile,
+ nil, nil, nil, 1, 1024,
+ )
+ require.NoError(t, err)
+
+ entry := iceberg.NewManifestEntryBuilder(iceberg.EntryStatusADDED,
&snapshotID, df.Build()).
+ SequenceNum(seqNum).
+ Build()
+
+ var buf bytes.Buffer
+ _, err = iceberg.WriteManifest(manifestPath, &buf, 2, spec, dataSchema,
snapshotID, []iceberg.ManifestEntry{entry})
+ require.NoError(t, err)
+ require.NoError(t, tio.WriteFile(manifestPath, buf.Bytes()))
+
+ return iceberg.NewManifestFile(2, manifestPath, int64(buf.Len()), 0,
snapshotID).
+ SequenceNum(seqNum, seqNum).
+ AddedFiles(1).
+ AddedRows(1).
+ Build()
+}
+
+// writeManifestList writes a v2 manifest list referencing the
+// given manifests into tio.files at listPath.
+func writeManifestList(t *testing.T, tio *trackingIO, snapshotID int64,
listPath string, manifests []iceberg.ManifestFile) {
+ t.Helper()
+
+ var buf bytes.Buffer
+ seqNum := int64(1)
+ require.NoError(t, iceberg.WriteManifestList(2, &buf, snapshotID, nil,
&seqNum, 0, manifests))
+ require.NoError(t, tio.WriteFile(listPath, buf.Bytes()))
+}
+
+// metaJSONOpts configures the metadata document built by buildMetaJSON.
+type metaJSONOpts struct {
+ snapshots string
+ statistics string
+ partitionStatistics string
+}
+
+// buildMetaJSON returns the minimal v2 metadata document that
ParseMetadataString will accept
+func buildMetaJSON(o metaJSONOpts) string {
+ return fmt.Sprintf(`{
"format-version": 2,
"table-uuid": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
"location": "s3://bucket/table",
- "last-sequence-number": 1,
+ "last-sequence-number": 0,
"last-updated-ms": 1000,
"last-column-id": 1,
"current-schema-id": 0,
@@ -55,43 +130,42 @@ func TestRemoveSnapshotsPostCommitDeletesStatisticsFiles(t
*testing.T) {
"last-partition-id": 0,
"default-sort-order-id": 0,
"sort-orders": [{"order-id":0,"fields":[]}],
- "current-snapshot-id": 1,
- "snapshots":
[{"snapshot-id":1,"timestamp-ms":1000,"sequence-number":1,"schema-id":0}],
- "snapshot-log": [],
- "metadata-log": [],
- "statistics":
[{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]}],
- "partition-statistics":
[{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1-part.puffin","file-size-in-bytes":50}]
- }`
- const postMeta = `{
- "format-version": 2,
- "table-uuid": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
- "location": "s3://bucket/table",
- "last-sequence-number": 1,
- "last-updated-ms": 2000,
- "last-column-id": 1,
- "current-schema-id": 0,
- "schemas":
[{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"x","required":true,"type":"long"}]}],
- "default-spec-id": 0,
- "partition-specs": [{"spec-id":0,"fields":[]}],
- "last-partition-id": 0,
- "default-sort-order-id": 0,
- "sort-orders": [{"order-id":0,"fields":[]}],
- "snapshot-log": [],
- "metadata-log": []
- }`
+ "snapshots": [%s],
+ "statistics": [%s],
+ "partition-statistics": [%s]
+ }`, o.snapshots, o.statistics, o.partitionStatistics)
+}
+
+func TestRemoveSnapshotsPostCommitSkipped(t *testing.T) {
+ update := NewRemoveSnapshotsUpdate([]int64{1, 2, 3}, false)
+
+ // PostCommit should return nil immediately when postCommit is false,
+ // without accessing the table arguments (which are nil here)
+ err := update.PostCommit(context.Background(), nil, nil)
+ assert.NoError(t, err)
+}
+
+func TestRemoveSnapshotsPostCommitDeletesStatisticsFiles(t *testing.T) {
+ // preTable has snapshot 1 with associated statistics and partition
statistics files.
+ // postTable has no snapshots and no statistics (snapshot 1 has been
expired).
+ // PostCommit must delete the statistics paths that belonged to the
expired snapshot.
+ preMeta := buildMetaJSON(metaJSONOpts{
+ snapshots: `{"snapshot-id":1,"timestamp-ms":1000}`,
+ statistics:
`{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]}`,
+ partitionStatistics:
`{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1-part.puffin","file-size-in-bytes":50}`,
+ })
+ postMeta := buildMetaJSON(metaJSONOpts{})
pre, err := ParseMetadataString(preMeta)
require.NoError(t, err)
post, err := ParseMetadataString(postMeta)
require.NoError(t, err)
- // Pass *trackingIO to createTestTransaction — Go converts it to
iceio.IO implicitly,
tio := newTrackingIO()
tio.files["s3://bucket/stats/snap1.puffin"] = []byte("puffin")
tio.files["s3://bucket/stats/snap1-part.puffin"] = []byte("puffin")
- txn := createTestTransaction(t, tio, iceberg.NewPartitionSpec())
- fsF := txn.tbl.fsF
+ fsF := testFSF(tio)
preTable := New(Identifier{"ns", "tbl"}, pre, "metadata.json", fsF, nil)
postTable := New(Identifier{"ns", "tbl"}, post, "metadata.json", fsF,
nil)
@@ -107,63 +181,18 @@ func
TestRemoveSnapshotsPostCommitPreservesStatisticsOfSurvivingSnapshots(t *tes
// pre: snapshots 1 and 2, statistics for both.
// post: snapshot 1 expired, snapshot 2 kept with its statistics still
present.
// PostCommit must delete only snap1's statistics files; snap2's must
survive.
- const preMeta = `{
- "format-version": 2,
- "table-uuid": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
- "location": "s3://bucket/table",
- "last-sequence-number": 2,
- "last-updated-ms": 1000,
- "last-column-id": 1,
- "current-schema-id": 0,
- "schemas":
[{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"x","required":true,"type":"long"}]}],
- "default-spec-id": 0,
- "partition-specs": [{"spec-id":0,"fields":[]}],
- "last-partition-id": 0,
- "default-sort-order-id": 0,
- "sort-orders": [{"order-id":0,"fields":[]}],
- "current-snapshot-id": 2,
- "snapshots": [
-
{"snapshot-id":1,"timestamp-ms":1000,"sequence-number":1,"schema-id":0},
-
{"snapshot-id":2,"timestamp-ms":2000,"sequence-number":2,"schema-id":0}
- ],
- "snapshot-log": [],
- "metadata-log": [],
- "statistics": [
-
{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]},
-
{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]}
- ],
- "partition-statistics": [
-
{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1-part.puffin","file-size-in-bytes":50},
-
{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2-part.puffin","file-size-in-bytes":50}
- ]
- }`
- const postMeta = `{
- "format-version": 2,
- "table-uuid": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
- "location": "s3://bucket/table",
- "last-sequence-number": 2,
- "last-updated-ms": 3000,
- "last-column-id": 1,
- "current-schema-id": 0,
- "schemas":
[{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"x","required":true,"type":"long"}]}],
- "default-spec-id": 0,
- "partition-specs": [{"spec-id":0,"fields":[]}],
- "last-partition-id": 0,
- "default-sort-order-id": 0,
- "sort-orders": [{"order-id":0,"fields":[]}],
- "current-snapshot-id": 2,
- "snapshots": [
-
{"snapshot-id":2,"timestamp-ms":2000,"sequence-number":2,"schema-id":0}
- ],
- "snapshot-log": [],
- "metadata-log": [],
- "statistics": [
-
{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]}
- ],
- "partition-statistics": [
-
{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2-part.puffin","file-size-in-bytes":50}
- ]
- }`
+ preMeta := buildMetaJSON(metaJSONOpts{
+ snapshots:
`{"snapshot-id":1,"timestamp-ms":1000},{"snapshot-id":2,"timestamp-ms":2000}`,
+ statistics:
`{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]},`
+
+
`{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]}`,
+ partitionStatistics:
`{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1-part.puffin","file-size-in-bytes":50},`
+
+
`{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2-part.puffin","file-size-in-bytes":50}`,
+ })
+ postMeta := buildMetaJSON(metaJSONOpts{
+ snapshots: `{"snapshot-id":2,"timestamp-ms":2000}`,
+ statistics:
`{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]}`,
+ partitionStatistics:
`{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2-part.puffin","file-size-in-bytes":50}`,
+ })
pre, err := ParseMetadataString(preMeta)
require.NoError(t, err)
@@ -176,8 +205,7 @@ func
TestRemoveSnapshotsPostCommitPreservesStatisticsOfSurvivingSnapshots(t *tes
tio.files["s3://bucket/stats/snap2.puffin"] = []byte("puffin")
tio.files["s3://bucket/stats/snap2-part.puffin"] = []byte("puffin")
- txn := createTestTransaction(t, tio, iceberg.NewPartitionSpec())
- fsF := txn.tbl.fsF
+ fsF := testFSF(tio)
preTable := New(Identifier{"ns", "tbl"}, pre, "metadata.json", fsF, nil)
postTable := New(Identifier{"ns", "tbl"}, post, "metadata.json", fsF,
nil)
@@ -194,6 +222,117 @@ func
TestRemoveSnapshotsPostCommitPreservesStatisticsOfSurvivingSnapshots(t *tes
assert.Contains(t, tio.files, "s3://bucket/stats/snap2-part.puffin")
}
+func TestRemoveSnapshotsPostCommitSharedManifestRetained(t *testing.T) {
+ // Manifest M is referenced by both an expired snapshot (1) and a
+ // retained snapshot (2). PostCommit must keep M and its data file
+ // because a retained snapshot still references them; only snap1's
+ // manifest list should be deleted.
+ const (
+ dataPath = "s3://bucket/data/file-1.parquet"
+ manifestPath = "s3://bucket/meta/manifest-shared.avro"
+ manifestList1 = "s3://bucket/meta/snap-1.avro"
+ manifestList2 = "s3://bucket/meta/snap-2.avro"
+ )
+
+ tio := newTrackingIO()
+ mf := writeManifest(t, tio, 1, 1, manifestPath, dataPath)
+ writeManifestList(t, tio, 1, manifestList1, []iceberg.ManifestFile{mf})
+ writeManifestList(t, tio, 2, manifestList2, []iceberg.ManifestFile{mf})
+ tio.files[dataPath] = []byte("data")
+
+ preMeta := buildMetaJSON(metaJSONOpts{
+ snapshots: fmt.Sprintf(
+
`{"snapshot-id":1,"timestamp-ms":1000,"manifest-list":%q},`+
+
`{"snapshot-id":2,"timestamp-ms":2000,"manifest-list":%q}`,
+ manifestList1, manifestList2),
+ })
+ postMeta := buildMetaJSON(metaJSONOpts{
+ snapshots:
fmt.Sprintf(`{"snapshot-id":2,"timestamp-ms":2000,"manifest-list":%q}`,
manifestList2),
+ })
+
+ pre, err := ParseMetadataString(preMeta)
+ require.NoError(t, err)
+ post, err := ParseMetadataString(postMeta)
+ require.NoError(t, err)
+
+ fsF := testFSF(tio)
+ preTable := New(Identifier{"ns", "tbl"}, pre, "metadata.json", fsF, nil)
+ postTable := New(Identifier{"ns", "tbl"}, post, "metadata.json", fsF,
nil)
+
+ update := NewRemoveSnapshotsUpdate([]int64{1}, true)
+ err = update.PostCommit(context.Background(), preTable, postTable)
+ require.NoError(t, err)
+
+ // snap1's manifest list must be deleted.
+ assert.NotContains(t, tio.files, manifestList1)
+
+ // The shared manifest, its data file, and the retained snapshot's
+ // manifest list must all survive because snap2 still references them.
+ assert.Contains(t, tio.files, manifestPath)
+ assert.Contains(t, tio.files, dataPath)
+ assert.Contains(t, tio.files, manifestList2)
+}
+
+func TestRemoveSnapshotsPostCommitSharedManifestExpiredOnce(t *testing.T) {
+ // Manifest M is referenced by two expired snapshots (1 and 2) and no
+ // retained snapshot. PostCommit must delete M and its data file, and
+ // must open M exactly once across the two expired snapshots — the
+ // dedup invariant the PR is built on. Open count on the shared
+ // manifest is the perf signal: a regression that drops the dedup
+ // would open it twice.
+ const (
+ dataPath = "s3://bucket/data/file-1.parquet"
+ manifestPath = "s3://bucket/meta/manifest-shared.avro"
+ manifestList1 = "s3://bucket/meta/snap-1.avro"
+ manifestList2 = "s3://bucket/meta/snap-2.avro"
+ )
+
+ tio := newTrackingCallsIO()
+ mf := writeManifest(t, tio.trackingIO, 1, 1, manifestPath, dataPath)
+ writeManifestList(t, tio.trackingIO, 1, manifestList1,
[]iceberg.ManifestFile{mf})
+ writeManifestList(t, tio.trackingIO, 2, manifestList2,
[]iceberg.ManifestFile{mf})
+ tio.files[dataPath] = []byte("data")
+
+ preMeta := buildMetaJSON(metaJSONOpts{
+ snapshots: fmt.Sprintf(
+
`{"snapshot-id":1,"timestamp-ms":1000,"manifest-list":%q},`+
+
`{"snapshot-id":2,"timestamp-ms":2000,"manifest-list":%q}`,
+ manifestList1, manifestList2),
+ })
+ postMeta := buildMetaJSON(metaJSONOpts{})
+
+ pre, err := ParseMetadataString(preMeta)
+ require.NoError(t, err)
+ post, err := ParseMetadataString(postMeta)
+ require.NoError(t, err)
+
+ fsF := testFSF(tio)
+ preTable := New(Identifier{"ns", "tbl"}, pre, "metadata.json", fsF, nil)
+ postTable := New(Identifier{"ns", "tbl"}, post, "metadata.json", fsF,
nil)
+
+ update := NewRemoveSnapshotsUpdate([]int64{1, 2}, true)
+ err = update.PostCommit(context.Background(), preTable, postTable)
+ require.NoError(t, err)
+
+ // Both manifest lists, the shared manifest, and its data file must
+ // all be deleted.
+ assert.NotContains(t, tio.files, manifestList1)
+ assert.NotContains(t, tio.files, manifestList2)
+ assert.NotContains(t, tio.files, manifestPath)
+ assert.NotContains(t, tio.files, dataPath)
+
+ // The shared manifest must be opened exactly once: snap1's pass
+ // records it in visitedManifests, snap2's pass must skip it.
+ assert.Equal(t, 1, tio.openCount[manifestPath],
+ "shared manifest must be read only once across expired
snapshots")
+
+ // Each file must be deleted exactly once.
+ assert.Equal(t, 1, tio.removeCount[manifestPath],
+ "shared manifest must be deleted exactly once")
+ assert.Equal(t, 1, tio.removeCount[dataPath],
+ "data file must be deleted exactly once")
+}
+
func TestUnmarshalUpdates(t *testing.T) {
spec := iceberg.NewPartitionSpecID(3,
iceberg.PartitionField{