This is an automated email from the ASF dual-hosted git repository.

laskoviymishka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 2fcff157 perf(table): dedupe shared manifest reads during snapshot 
expiration (#1118)
2fcff157 is described below

commit 2fcff157abdaec817077f423da54c754ee8634ae
Author: Ondřej Pavela <[email protected]>
AuthorDate: Mon May 25 09:13:54 2026 +0200

    perf(table): dedupe shared manifest reads during snapshot expiration (#1118)
    
    Fixes https://github.com/apache/iceberg-go/issues/1117
    
    ### What changed
    Reworked `removeSnapshotsUpdate.PostCommit` so each unique manifest file
    is
    opened at most once per call, regardless of how many expired or retained
    snapshots reference it.
    
    Two passes, both deduped:
    
    1) Build the set of manifest paths reachable from any retained snapshot,
    reading only manifest-lists. Cache the resulting `[]ManifestFile` per
    snapshot so the retained-side pass below doesn't re-download each list.
    2) Walk expired snapshots' manifest lists; for each manifest, skip if
    it's
    in the retained set (its data files are live by definition and the
    manifest itself must not be deleted) or if a prior expired snapshot
    already enumerated it. Otherwise read its entries once.
    3) Subtract live data files via a single walk over each unique retained
    manifest. DELETED entries remain tombstones (unchanged from prior
    semantics).
    
    ### Behavior
    Semantically equivalent to the previous implementation — the final
    `filesToDelete` set is the same on well-formed metadata. No spec change,
    no API change. The only difference is the I/O cost.
    
    ### Performance impact
    For a 491-snapshot incremental-append table where expiring 490 snapshots
    previously triggered ~sum(1..490) ≈ 120,000 manifest-file downloads, the
    rewrite reduces that to roughly the count of unique orphaned manifests
    (a few hundred in practice). Two-to-three orders of magnitude fewer
    object-store reads, in our test.
---
 table/updates.go      |  50 ++++++--
 table/updates_test.go | 335 +++++++++++++++++++++++++++++++++++---------------
 2 files changed, 276 insertions(+), 109 deletions(-)

diff --git a/table/updates.go b/table/updates.go
index a1596a10..31352d44 100644
--- a/table/updates.go
+++ b/table/updates.go
@@ -506,10 +506,33 @@ func (u *removeSnapshotsUpdate) PostCommit(ctx 
context.Context, preTable *Table,
                }
        }
 
+       // Preload retained manifest lists once so the live-data-file pass 
below can walk them without re-fetching.
+       retainedSnapshots := postTable.Metadata().Snapshots()
+       retainedManifests := make(map[string]struct{})
+       retainedSnapshotManifests := make([]iceberg.ManifestFile, 0, 
len(retainedSnapshots))
+       for _, snap := range retainedSnapshots {
+               mans, err := snap.Manifests(prefs)
+               if err != nil {
+                       return err
+               }
+               for _, man := range mans {
+                       manPath := man.FilePath()
+                       if _, ok := retainedManifests[manPath]; !ok {
+                               retainedManifests[manPath] = struct{}{}
+                               retainedSnapshotManifests = 
append(retainedSnapshotManifests, man)
+                       }
+               }
+       }
+
+       // Open each orphaned manifest at most once: skip manifests that
+       // retained snapshots still reference, and dedupe across expired
+       // snapshots that share manifests by reference.
+       visitedManifests := make(map[string]struct{})
+
        for _, snapId := range u.SnapshotIDs {
                snap := preTable.SnapshotByID(snapId)
                if snap == nil {
-                       return errors.New("missing snapshot")
+                       return fmt.Errorf("missing snapshot %d", snapId)
                }
 
                mans, err := snap.Manifests(prefs)
@@ -518,7 +541,17 @@ func (u *removeSnapshotsUpdate) PostCommit(ctx 
context.Context, preTable *Table,
                }
 
                for _, man := range mans {
-                       filesToDelete[man.FilePath()] = struct{}{}
+                       manPath := man.FilePath()
+
+                       if _, ok := retainedManifests[manPath]; ok {
+                               continue
+                       }
+                       if _, ok := visitedManifests[manPath]; ok {
+                               continue
+                       }
+                       visitedManifests[manPath] = struct{}{}
+
+                       filesToDelete[manPath] = struct{}{}
 
                        for entry, err := range man.Entries(prefs, false) {
                                if err != nil {
@@ -529,15 +562,10 @@ func (u *removeSnapshotsUpdate) PostCommit(ctx 
context.Context, preTable *Table,
                }
        }
 
-       for _, snap := range postTable.Metadata().Snapshots() {
-               mans, err := snap.Manifests(prefs)
-               if err != nil {
-                       return err
-               }
-
-               for _, man := range mans {
-                       delete(filesToDelete, man.FilePath())
-
+       // Keep files still referenced (non-DELETED) by retained manifests,
+       // including data files carried forward as EXISTING by manifest merges.
+       if len(filesToDelete) > 0 {
+               for _, man := range retainedSnapshotManifests {
                        for entry, err := range man.Entries(prefs, false) {
                                if err != nil {
                                        return err
diff --git a/table/updates_test.go b/table/updates_test.go
index 73b40e2f..3273713b 100644
--- a/table/updates_test.go
+++ b/table/updates_test.go
@@ -18,34 +18,109 @@
 package table
 
 import (
+       "bytes"
        "context"
        "encoding/json"
+       "fmt"
        "testing"
 
        "github.com/apache/iceberg-go"
+       iceio "github.com/apache/iceberg-go/io"
        "github.com/google/uuid"
        "github.com/stretchr/testify/assert"
        "github.com/stretchr/testify/require"
 )
 
-func TestRemoveSnapshotsPostCommitSkipped(t *testing.T) {
-       update := NewRemoveSnapshotsUpdate([]int64{1, 2, 3}, false)
+func testFSF(io iceio.IO) FSysF {
+       return func(context.Context) (iceio.IO, error) { return io, nil }
+}
 
-       // PostCommit should return nil immediately when postCommit is false,
-       // without accessing the table arguments (which are nil here)
-       err := update.PostCommit(context.Background(), nil, nil)
-       assert.NoError(t, err)
+// trackingCallsIO wraps trackingIO to count Open and Remove calls per path.
+type trackingCallsIO struct {
+       *trackingIO
+       openCount   map[string]int
+       removeCount map[string]int
 }
 
-func TestRemoveSnapshotsPostCommitDeletesStatisticsFiles(t *testing.T) {
-       // preTable has snapshot 1 with associated statistics and partition 
statistics files.
-       // postTable has no snapshots and no statistics (snapshot 1 has been 
expired).
-       // PostCommit must delete the statistics paths that belonged to the 
expired snapshot.
-       const preMeta = `{
+func newTrackingCallsIO() *trackingCallsIO {
+       return &trackingCallsIO{
+               trackingIO:  newTrackingIO(),
+               openCount:   make(map[string]int),
+               removeCount: make(map[string]int),
+       }
+}
+
+func (c *trackingCallsIO) Open(name string) (iceio.File, error) {
+       c.openCount[name]++
+
+       return c.trackingIO.Open(name)
+}
+
+func (c *trackingCallsIO) Remove(name string) error {
+       c.removeCount[name]++
+
+       return c.trackingIO.Remove(name)
+}
+
+// writeManifest writes a v2 data manifest with a single ADDED
+// entry pointing at dataPath into tio.files at manifestPath, and returns a
+// ManifestFile descriptor with seqNum pre-assigned so the same descriptor
+// can be referenced from multiple manifest lists.
+func writeManifest(t *testing.T, tio *trackingIO, snapshotID, seqNum int64, 
manifestPath, dataPath string) iceberg.ManifestFile {
+       t.Helper()
+
+       dataSchema := iceberg.NewSchema(0,
+               iceberg.NestedField{ID: 1, Name: "x", Type: 
iceberg.PrimitiveTypes.Int64, Required: true},
+       )
+       spec := iceberg.NewPartitionSpec()
+
+       df, err := iceberg.NewDataFileBuilder(
+               spec, iceberg.EntryContentData, dataPath, iceberg.ParquetFile,
+               nil, nil, nil, 1, 1024,
+       )
+       require.NoError(t, err)
+
+       entry := iceberg.NewManifestEntryBuilder(iceberg.EntryStatusADDED, 
&snapshotID, df.Build()).
+               SequenceNum(seqNum).
+               Build()
+
+       var buf bytes.Buffer
+       _, err = iceberg.WriteManifest(manifestPath, &buf, 2, spec, dataSchema, 
snapshotID, []iceberg.ManifestEntry{entry})
+       require.NoError(t, err)
+       require.NoError(t, tio.WriteFile(manifestPath, buf.Bytes()))
+
+       return iceberg.NewManifestFile(2, manifestPath, int64(buf.Len()), 0, 
snapshotID).
+               SequenceNum(seqNum, seqNum).
+               AddedFiles(1).
+               AddedRows(1).
+               Build()
+}
+
+// writeManifestList writes a v2 manifest list referencing the
+// given manifests into tio.files at listPath.
+func writeManifestList(t *testing.T, tio *trackingIO, snapshotID int64, 
listPath string, manifests []iceberg.ManifestFile) {
+       t.Helper()
+
+       var buf bytes.Buffer
+       seqNum := int64(1)
+       require.NoError(t, iceberg.WriteManifestList(2, &buf, snapshotID, nil, 
&seqNum, 0, manifests))
+       require.NoError(t, tio.WriteFile(listPath, buf.Bytes()))
+}
+
+// metaJSONOpts configures the metadata document built by buildMetaJSON.
+type metaJSONOpts struct {
+       snapshots           string
+       statistics          string
+       partitionStatistics string
+}
+
+// buildMetaJSON returns the minimal v2 metadata document that 
ParseMetadataString will accept
+func buildMetaJSON(o metaJSONOpts) string {
+       return fmt.Sprintf(`{
          "format-version": 2,
          "table-uuid": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
          "location": "s3://bucket/table",
-         "last-sequence-number": 1,
+         "last-sequence-number": 0,
          "last-updated-ms": 1000,
          "last-column-id": 1,
          "current-schema-id": 0,
@@ -55,43 +130,42 @@ func TestRemoveSnapshotsPostCommitDeletesStatisticsFiles(t 
*testing.T) {
          "last-partition-id": 0,
          "default-sort-order-id": 0,
          "sort-orders": [{"order-id":0,"fields":[]}],
-         "current-snapshot-id": 1,
-         "snapshots": 
[{"snapshot-id":1,"timestamp-ms":1000,"sequence-number":1,"schema-id":0}],
-         "snapshot-log": [],
-         "metadata-log": [],
-         "statistics": 
[{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]}],
-         "partition-statistics": 
[{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1-part.puffin","file-size-in-bytes":50}]
-       }`
-       const postMeta = `{
-         "format-version": 2,
-         "table-uuid": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
-         "location": "s3://bucket/table",
-         "last-sequence-number": 1,
-         "last-updated-ms": 2000,
-         "last-column-id": 1,
-         "current-schema-id": 0,
-         "schemas": 
[{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"x","required":true,"type":"long"}]}],
-         "default-spec-id": 0,
-         "partition-specs": [{"spec-id":0,"fields":[]}],
-         "last-partition-id": 0,
-         "default-sort-order-id": 0,
-         "sort-orders": [{"order-id":0,"fields":[]}],
-         "snapshot-log": [],
-         "metadata-log": []
-       }`
+         "snapshots": [%s],
+         "statistics": [%s],
+         "partition-statistics": [%s]
+       }`, o.snapshots, o.statistics, o.partitionStatistics)
+}
+
+func TestRemoveSnapshotsPostCommitSkipped(t *testing.T) {
+       update := NewRemoveSnapshotsUpdate([]int64{1, 2, 3}, false)
+
+       // PostCommit should return nil immediately when postCommit is false,
+       // without accessing the table arguments (which are nil here)
+       err := update.PostCommit(context.Background(), nil, nil)
+       assert.NoError(t, err)
+}
+
+func TestRemoveSnapshotsPostCommitDeletesStatisticsFiles(t *testing.T) {
+       // preTable has snapshot 1 with associated statistics and partition 
statistics files.
+       // postTable has no snapshots and no statistics (snapshot 1 has been 
expired).
+       // PostCommit must delete the statistics paths that belonged to the 
expired snapshot.
+       preMeta := buildMetaJSON(metaJSONOpts{
+               snapshots:           `{"snapshot-id":1,"timestamp-ms":1000}`,
+               statistics:          
`{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]}`,
+               partitionStatistics: 
`{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1-part.puffin","file-size-in-bytes":50}`,
+       })
+       postMeta := buildMetaJSON(metaJSONOpts{})
 
        pre, err := ParseMetadataString(preMeta)
        require.NoError(t, err)
        post, err := ParseMetadataString(postMeta)
        require.NoError(t, err)
 
-       // Pass *trackingIO to createTestTransaction — Go converts it to 
iceio.IO implicitly,
        tio := newTrackingIO()
        tio.files["s3://bucket/stats/snap1.puffin"] = []byte("puffin")
        tio.files["s3://bucket/stats/snap1-part.puffin"] = []byte("puffin")
 
-       txn := createTestTransaction(t, tio, iceberg.NewPartitionSpec())
-       fsF := txn.tbl.fsF
+       fsF := testFSF(tio)
        preTable := New(Identifier{"ns", "tbl"}, pre, "metadata.json", fsF, nil)
        postTable := New(Identifier{"ns", "tbl"}, post, "metadata.json", fsF, 
nil)
 
@@ -107,63 +181,18 @@ func 
TestRemoveSnapshotsPostCommitPreservesStatisticsOfSurvivingSnapshots(t *tes
        // pre:  snapshots 1 and 2, statistics for both.
        // post: snapshot 1 expired, snapshot 2 kept with its statistics still 
present.
        // PostCommit must delete only snap1's statistics files; snap2's must 
survive.
-       const preMeta = `{
-         "format-version": 2,
-         "table-uuid": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
-         "location": "s3://bucket/table",
-         "last-sequence-number": 2,
-         "last-updated-ms": 1000,
-         "last-column-id": 1,
-         "current-schema-id": 0,
-         "schemas": 
[{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"x","required":true,"type":"long"}]}],
-         "default-spec-id": 0,
-         "partition-specs": [{"spec-id":0,"fields":[]}],
-         "last-partition-id": 0,
-         "default-sort-order-id": 0,
-         "sort-orders": [{"order-id":0,"fields":[]}],
-         "current-snapshot-id": 2,
-         "snapshots": [
-           
{"snapshot-id":1,"timestamp-ms":1000,"sequence-number":1,"schema-id":0},
-           
{"snapshot-id":2,"timestamp-ms":2000,"sequence-number":2,"schema-id":0}
-         ],
-         "snapshot-log": [],
-         "metadata-log": [],
-         "statistics": [
-           
{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]},
-           
{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]}
-         ],
-         "partition-statistics": [
-           
{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1-part.puffin","file-size-in-bytes":50},
-           
{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2-part.puffin","file-size-in-bytes":50}
-         ]
-       }`
-       const postMeta = `{
-         "format-version": 2,
-         "table-uuid": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
-         "location": "s3://bucket/table",
-         "last-sequence-number": 2,
-         "last-updated-ms": 3000,
-         "last-column-id": 1,
-         "current-schema-id": 0,
-         "schemas": 
[{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"x","required":true,"type":"long"}]}],
-         "default-spec-id": 0,
-         "partition-specs": [{"spec-id":0,"fields":[]}],
-         "last-partition-id": 0,
-         "default-sort-order-id": 0,
-         "sort-orders": [{"order-id":0,"fields":[]}],
-         "current-snapshot-id": 2,
-         "snapshots": [
-           
{"snapshot-id":2,"timestamp-ms":2000,"sequence-number":2,"schema-id":0}
-         ],
-         "snapshot-log": [],
-         "metadata-log": [],
-         "statistics": [
-           
{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]}
-         ],
-         "partition-statistics": [
-           
{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2-part.puffin","file-size-in-bytes":50}
-         ]
-       }`
+       preMeta := buildMetaJSON(metaJSONOpts{
+               snapshots: 
`{"snapshot-id":1,"timestamp-ms":1000},{"snapshot-id":2,"timestamp-ms":2000}`,
+               statistics: 
`{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]},`
 +
+                       
`{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]}`,
+               partitionStatistics: 
`{"snapshot-id":1,"statistics-path":"s3://bucket/stats/snap1-part.puffin","file-size-in-bytes":50},`
 +
+                       
`{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2-part.puffin","file-size-in-bytes":50}`,
+       })
+       postMeta := buildMetaJSON(metaJSONOpts{
+               snapshots:           `{"snapshot-id":2,"timestamp-ms":2000}`,
+               statistics:          
`{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2.puffin","file-size-in-bytes":100,"file-footer-size-in-bytes":10,"blob-metadata":[]}`,
+               partitionStatistics: 
`{"snapshot-id":2,"statistics-path":"s3://bucket/stats/snap2-part.puffin","file-size-in-bytes":50}`,
+       })
 
        pre, err := ParseMetadataString(preMeta)
        require.NoError(t, err)
@@ -176,8 +205,7 @@ func 
TestRemoveSnapshotsPostCommitPreservesStatisticsOfSurvivingSnapshots(t *tes
        tio.files["s3://bucket/stats/snap2.puffin"] = []byte("puffin")
        tio.files["s3://bucket/stats/snap2-part.puffin"] = []byte("puffin")
 
-       txn := createTestTransaction(t, tio, iceberg.NewPartitionSpec())
-       fsF := txn.tbl.fsF
+       fsF := testFSF(tio)
        preTable := New(Identifier{"ns", "tbl"}, pre, "metadata.json", fsF, nil)
        postTable := New(Identifier{"ns", "tbl"}, post, "metadata.json", fsF, 
nil)
 
@@ -194,6 +222,117 @@ func 
TestRemoveSnapshotsPostCommitPreservesStatisticsOfSurvivingSnapshots(t *tes
        assert.Contains(t, tio.files, "s3://bucket/stats/snap2-part.puffin")
 }
 
+func TestRemoveSnapshotsPostCommitSharedManifestRetained(t *testing.T) {
+       // Manifest M is referenced by both an expired snapshot (1) and a
+       // retained snapshot (2). PostCommit must keep M and its data file
+       // because a retained snapshot still references them; only snap1's
+       // manifest list should be deleted.
+       const (
+               dataPath      = "s3://bucket/data/file-1.parquet"
+               manifestPath  = "s3://bucket/meta/manifest-shared.avro"
+               manifestList1 = "s3://bucket/meta/snap-1.avro"
+               manifestList2 = "s3://bucket/meta/snap-2.avro"
+       )
+
+       tio := newTrackingIO()
+       mf := writeManifest(t, tio, 1, 1, manifestPath, dataPath)
+       writeManifestList(t, tio, 1, manifestList1, []iceberg.ManifestFile{mf})
+       writeManifestList(t, tio, 2, manifestList2, []iceberg.ManifestFile{mf})
+       tio.files[dataPath] = []byte("data")
+
+       preMeta := buildMetaJSON(metaJSONOpts{
+               snapshots: fmt.Sprintf(
+                       
`{"snapshot-id":1,"timestamp-ms":1000,"manifest-list":%q},`+
+                               
`{"snapshot-id":2,"timestamp-ms":2000,"manifest-list":%q}`,
+                       manifestList1, manifestList2),
+       })
+       postMeta := buildMetaJSON(metaJSONOpts{
+               snapshots: 
fmt.Sprintf(`{"snapshot-id":2,"timestamp-ms":2000,"manifest-list":%q}`, 
manifestList2),
+       })
+
+       pre, err := ParseMetadataString(preMeta)
+       require.NoError(t, err)
+       post, err := ParseMetadataString(postMeta)
+       require.NoError(t, err)
+
+       fsF := testFSF(tio)
+       preTable := New(Identifier{"ns", "tbl"}, pre, "metadata.json", fsF, nil)
+       postTable := New(Identifier{"ns", "tbl"}, post, "metadata.json", fsF, 
nil)
+
+       update := NewRemoveSnapshotsUpdate([]int64{1}, true)
+       err = update.PostCommit(context.Background(), preTable, postTable)
+       require.NoError(t, err)
+
+       // snap1's manifest list must be deleted.
+       assert.NotContains(t, tio.files, manifestList1)
+
+       // The shared manifest, its data file, and the retained snapshot's
+       // manifest list must all survive because snap2 still references them.
+       assert.Contains(t, tio.files, manifestPath)
+       assert.Contains(t, tio.files, dataPath)
+       assert.Contains(t, tio.files, manifestList2)
+}
+
+func TestRemoveSnapshotsPostCommitSharedManifestExpiredOnce(t *testing.T) {
+       // Manifest M is referenced by two expired snapshots (1 and 2) and no
+       // retained snapshot. PostCommit must delete M and its data file, and
+       // must open M exactly once across the two expired snapshots — the
+       // dedup invariant the PR is built on. Open count on the shared
+       // manifest is the perf signal: a regression that drops the dedup
+       // would open it twice.
+       const (
+               dataPath      = "s3://bucket/data/file-1.parquet"
+               manifestPath  = "s3://bucket/meta/manifest-shared.avro"
+               manifestList1 = "s3://bucket/meta/snap-1.avro"
+               manifestList2 = "s3://bucket/meta/snap-2.avro"
+       )
+
+       tio := newTrackingCallsIO()
+       mf := writeManifest(t, tio.trackingIO, 1, 1, manifestPath, dataPath)
+       writeManifestList(t, tio.trackingIO, 1, manifestList1, 
[]iceberg.ManifestFile{mf})
+       writeManifestList(t, tio.trackingIO, 2, manifestList2, 
[]iceberg.ManifestFile{mf})
+       tio.files[dataPath] = []byte("data")
+
+       preMeta := buildMetaJSON(metaJSONOpts{
+               snapshots: fmt.Sprintf(
+                       
`{"snapshot-id":1,"timestamp-ms":1000,"manifest-list":%q},`+
+                               
`{"snapshot-id":2,"timestamp-ms":2000,"manifest-list":%q}`,
+                       manifestList1, manifestList2),
+       })
+       postMeta := buildMetaJSON(metaJSONOpts{})
+
+       pre, err := ParseMetadataString(preMeta)
+       require.NoError(t, err)
+       post, err := ParseMetadataString(postMeta)
+       require.NoError(t, err)
+
+       fsF := testFSF(tio)
+       preTable := New(Identifier{"ns", "tbl"}, pre, "metadata.json", fsF, nil)
+       postTable := New(Identifier{"ns", "tbl"}, post, "metadata.json", fsF, 
nil)
+
+       update := NewRemoveSnapshotsUpdate([]int64{1, 2}, true)
+       err = update.PostCommit(context.Background(), preTable, postTable)
+       require.NoError(t, err)
+
+       // Both manifest lists, the shared manifest, and its data file must
+       // all be deleted.
+       assert.NotContains(t, tio.files, manifestList1)
+       assert.NotContains(t, tio.files, manifestList2)
+       assert.NotContains(t, tio.files, manifestPath)
+       assert.NotContains(t, tio.files, dataPath)
+
+       // The shared manifest must be opened exactly once: snap1's pass
+       // records it in visitedManifests, snap2's pass must skip it.
+       assert.Equal(t, 1, tio.openCount[manifestPath],
+               "shared manifest must be read only once across expired 
snapshots")
+
+       // Each file must be deleted exactly once.
+       assert.Equal(t, 1, tio.removeCount[manifestPath],
+               "shared manifest must be deleted exactly once")
+       assert.Equal(t, 1, tio.removeCount[dataPath],
+               "data file must be deleted exactly once")
+}
+
 func TestUnmarshalUpdates(t *testing.T) {
        spec := iceberg.NewPartitionSpecID(3,
                iceberg.PartitionField{

Reply via email to