This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 554fd113 fix: handle missing parent snapshots in ExpireSnapshots (#671)
554fd113 is described below
commit 554fd113b33d65dcb3b6909477205024fa729a44
Author: Krutika Dhananjay <[email protected]>
AuthorDate: Sat Jan 17 05:48:15 2026 +0530
fix: handle missing parent snapshots in ExpireSnapshots (#671)
After snapshot expiration, remaining snapshots may have
parent-snapshot-id references pointing to snapshots that no longer
exist. ExpireSnapshots was failing when walking the parent chain and
encountering these dangling references.
Change the behavior to treat a missing parent snapshot as the end of the
chain rather than returning an error. This is expected and valid
behavior in Iceberg tables that have undergone previous snapshot
expiration.
---------
Co-authored-by: Krutika Dhananjay <[email protected]>
---
table/table_test.go | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++
table/transaction.go | 4 ++-
2 files changed, 91 insertions(+), 1 deletion(-)
diff --git a/table/table_test.go b/table/table_test.go
index 72997fef..b97f0a6f 100644
--- a/table/table_test.go
+++ b/table/table_test.go
@@ -970,6 +970,93 @@ func (t *TableWritingTestSuite) TestExpireSnapshots() {
t.Require().Equal(2, len(slices.Collect(tbl.Metadata().SnapshotLogs())))
}
+func (t *TableWritingTestSuite) TestExpireSnapshotsWithMissingParent() {
+ // This test validates the fix for handling missing parent snapshots.
+ // After expiring snapshots, remaining snapshots may have
parent-snapshot-id
+ // references pointing to snapshots that no longer exist.
ExpireSnapshots should
+ // treat missing parents as the end of the chain rather than returning
an error.
+
+ fs := iceio.LocalFS{}
+
+ files := make([]string, 0)
+ for i := range 5 {
+ filePath :=
fmt.Sprintf("%s/expire_with_missing_parent_v%d/data-%d.parquet", t.location,
t.formatVersion, i)
+ t.writeParquet(fs, filePath, t.arrTablePromotedTypes)
+ files = append(files, filePath)
+ }
+
+ ident := table.Identifier{"default", "expire_with_missing_parent_v" +
strconv.Itoa(t.formatVersion)}
+ meta, err := table.NewMetadata(t.tableSchemaPromotedTypes,
iceberg.UnpartitionedSpec,
+ table.UnsortedSortOrder, t.location,
iceberg.Properties{table.PropertyFormatVersion: strconv.Itoa(t.formatVersion)})
+ t.Require().NoError(err)
+
+ ctx := context.Background()
+
+ tbl := table.New(
+ ident,
+ meta,
+ t.getMetadataLoc(),
+ func(ctx context.Context) (iceio.IO, error) {
+ return fs, nil
+ },
+ &mockedCatalog{meta},
+ )
+
+ // Create 5 snapshots, each one with a parent pointing to the previous
+ for i := range 5 {
+ tx := tbl.NewTransaction()
+ t.Require().NoError(tx.AddFiles(ctx, files[i:i+1], nil, false))
+ tbl, err = tx.Commit(ctx)
+ t.Require().NoError(err)
+ }
+
+ t.Require().Equal(5, len(tbl.Metadata().Snapshots()))
+
+ // Get the snapshot IDs before expiration
+ snapshotsBeforeExpire := tbl.Metadata().Snapshots()
+ snapshot3ID := snapshotsBeforeExpire[2].SnapshotID
+ snapshot4ID := snapshotsBeforeExpire[3].SnapshotID
+
+ // Expire the first 3 snapshots, keeping only the last 2
+ tx := tbl.NewTransaction()
+ t.Require().NoError(tx.ExpireSnapshots(table.WithOlderThan(0),
table.WithRetainLast(2)))
+ tbl, err = tx.Commit(ctx)
+ t.Require().NoError(err)
+ t.Require().Equal(2, len(tbl.Metadata().Snapshots()))
+
+ // Verify that the 4th snapshot's parent (snapshot 3) is no longer in
the metadata
+ remainingSnapshots := tbl.Metadata().Snapshots()
+ var snapshot4 *table.Snapshot
+ for i := range remainingSnapshots {
+ if remainingSnapshots[i].SnapshotID == snapshot4ID {
+ snapshot4 = &remainingSnapshots[i]
+
+ break
+ }
+ }
+ t.Require().NotNil(snapshot4, "snapshot 4 should still exist")
+ t.Require().NotNil(snapshot4.ParentSnapshotID, "snapshot 4 should have
a parent ID")
+ t.Require().Equal(snapshot3ID, *snapshot4.ParentSnapshotID, "snapshot
4's parent should be snapshot 3")
+
+ // Verify snapshot 3 is no longer in the metadata
+ t.Nil(tbl.Metadata().SnapshotByID(snapshot3ID), "snapshot 3 should have
been removed")
+
+ // At this point, the 4th snapshot has a parent-snapshot-id pointing to
+ // the 3rd snapshot which no longer exists. Try to expire again - this
+ // should not fail even though the parent is missing. Use
WithRetainLast(2)
+ // to force walking the full parent chain.
+ tx = tbl.NewTransaction()
+ // This should succeed without error despite the missing parent.
+ // WithRetainLast(2) will cause it to walk back through snapshot 4's
parent chain,
+ // encountering the missing snapshot 3.
+ err = tx.ExpireSnapshots(table.WithOlderThan(0),
table.WithRetainLast(2))
+ t.Require().NoError(err, "ExpireSnapshots should handle missing parent
gracefully")
+
+ tbl, err = tx.Commit(ctx)
+ t.Require().NoError(err)
+ t.Require().Equal(2, len(tbl.Metadata().Snapshots()), "should still
have 2 snapshots")
+}
+
// validatingCatalog validates requirements before applying updates,
// simulating real catalog behavior for concurrent modification tests.
type validatingCatalog struct {
@@ -1042,6 +1129,7 @@ func (t *TableWritingTestSuite)
TestExpireSnapshotsRejectsOnRefRollback() {
tbl, err = tx.Commit(ctx)
t.Require().NoError(err)
}
+
t.Require().Equal(5, len(tbl.Metadata().Snapshots()))
// Get snapshot IDs for later use
diff --git a/table/transaction.go b/table/transaction.go
index 68961bf3..f9deaa62 100644
--- a/table/transaction.go
+++ b/table/transaction.go
@@ -259,7 +259,9 @@ func (t *Transaction) ExpireSnapshots(opts
...ExpireSnapshotsOpt) error {
for {
snap, err := t.meta.SnapshotByID(snapId)
if err != nil {
- return err
+ // Parent snapshot may have been removed by a
previous expiration.
+ // Treat missing parent as end of chain - this
is expected behavior.
+ break
}
snapAge := time.Now().UnixMilli() - snap.TimestampMs