This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 22e34f87 fix(table): skip RemoveSnapshotsUpdate when no snapshots to
expire (#679)
22e34f87 is described below
commit 22e34f87312b769b27ad9002a8183e3efefd2f3d
Author: Krutika Dhananjay <[email protected]>
AuthorDate: Tue Jan 20 04:11:00 2026 +0530
fix(table): skip RemoveSnapshotsUpdate when no snapshots to expire (#679)
ExpireSnapshots was adding a RemoveSnapshotsUpdate even when there were
no snapshots to delete. This caused Commit to create a new metadata file
on every call, even when nothing changed, leading to metadata file
proliferation in maintenance jobs that leverage iceberg-go's snapshot
expiration implementation.
Co-authored-by: Krutika Dhananjay <[email protected]>
---
table/table_test.go | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++
table/transaction.go | 9 +++++---
2 files changed, 65 insertions(+), 3 deletions(-)
diff --git a/table/table_test.go b/table/table_test.go
index b97f0a6f..f524142a 100644
--- a/table/table_test.go
+++ b/table/table_test.go
@@ -970,6 +970,65 @@ func (t *TableWritingTestSuite) TestExpireSnapshots() {
t.Require().Equal(2, len(slices.Collect(tbl.Metadata().SnapshotLogs())))
}
+// TestExpireSnapshotsNoOpWhenNothingToExpire verifies that when there are no
+// snapshots to expire, no new metadata file is created. This prevents
unnecessary
+// metadata file proliferation when the maintenance job runs but finds nothing
to do.
+func (t *TableWritingTestSuite) TestExpireSnapshotsNoOpWhenNothingToExpire() {
+ fs := iceio.LocalFS{}
+
+ files := make([]string, 0)
+ for i := range 3 {
+ filePath := fmt.Sprintf("%s/expire_noop_v%d/data-%d.parquet",
t.location, t.formatVersion, i)
+ t.writeParquet(fs, filePath, t.arrTablePromotedTypes)
+ files = append(files, filePath)
+ }
+
+ ident := table.Identifier{"default", "expire_noop_v" +
strconv.Itoa(t.formatVersion)}
+ meta, err := table.NewMetadata(t.tableSchemaPromotedTypes,
iceberg.UnpartitionedSpec,
+ table.UnsortedSortOrder, t.location,
iceberg.Properties{table.PropertyFormatVersion: strconv.Itoa(t.formatVersion)})
+ t.Require().NoError(err)
+
+ ctx := context.Background()
+
+ tbl := table.New(
+ ident,
+ meta,
+ t.getMetadataLoc(),
+ func(ctx context.Context) (iceio.IO, error) {
+ return fs, nil
+ },
+ &mockedCatalog{meta},
+ )
+
+ // Create 3 snapshots
+ for i := range 3 {
+ tx := tbl.NewTransaction()
+ t.Require().NoError(tx.AddFiles(ctx, files[i:i+1], nil, false))
+ tbl, err = tx.Commit(ctx)
+ t.Require().NoError(err)
+ }
+
+ t.Require().Equal(3, len(tbl.Metadata().Snapshots()))
+
+ // Record the metadata location before ExpireSnapshots
+ metadataLocationBefore := tbl.MetadataLocation()
+
+ // Call ExpireSnapshots with parameters that won't expire anything:
+ // - RetainLast(10) keeps more snapshots than we have
+ // - OlderThan(time.Hour) won't expire recent snapshots
+ tx := tbl.NewTransaction()
+ t.Require().NoError(tx.ExpireSnapshots(table.WithOlderThan(time.Hour),
table.WithRetainLast(10)))
+ tbl, err = tx.Commit(ctx)
+ t.Require().NoError(err)
+
+ // Verify no snapshots were removed
+ t.Require().Equal(3, len(tbl.Metadata().Snapshots()))
+
+ // Verify no new metadata file was created (metadata location unchanged)
+ t.Require().Equal(metadataLocationBefore, tbl.MetadataLocation(),
+ "metadata location should not change when there are no
snapshots to expire")
+}
+
func (t *TableWritingTestSuite) TestExpireSnapshotsWithMissingParent() {
// This test validates the fix for handling missing parent snapshots.
// After expiring snapshots, remaining snapshots may have
parent-snapshot-id
diff --git a/table/transaction.go b/table/transaction.go
index f9deaa62..e3a2fe90 100644
--- a/table/transaction.go
+++ b/table/transaction.go
@@ -288,9 +288,12 @@ func (t *Transaction) ExpireSnapshots(opts
...ExpireSnapshotsOpt) error {
}
}
- update := NewRemoveSnapshotsUpdate(snapsToDelete)
- update.postCommit = cfg.postCommit
- updates = append(updates, update)
+ // Only add the update if there are actually snapshots to delete
+ if len(snapsToDelete) > 0 {
+ update := NewRemoveSnapshotsUpdate(snapsToDelete)
+ update.postCommit = cfg.postCommit
+ updates = append(updates, update)
+ }
return t.apply(updates, reqs)
}