shangxinli commented on code in PR #592:
URL: https://github.com/apache/iceberg-cpp/pull/592#discussion_r2996574718
##########
src/iceberg/update/expire_snapshots.cc:
##########
@@ -285,7 +291,247 @@ Result<ExpireSnapshots::ApplyResult>
ExpireSnapshots::Apply() {
});
}
+ // Cache the result for use during Finalize()
+ apply_result_ = result;
+
return result;
}
+Status ExpireSnapshots::Finalize(std::optional<Error> commit_error) {
+ if (commit_error.has_value()) {
+ return {};
+ }
+
+ if (cleanup_level_ == CleanupLevel::kNone) {
+ return {};
+ }
+
+ if (!apply_result_.has_value() ||
apply_result_->snapshot_ids_to_remove.empty()) {
+ return {};
+ }
+
+ // File cleanup is best-effort: log and continue on individual file deletion
failures
+ // to avoid blocking metadata updates (matching Java behavior).
+ return CleanExpiredFiles(apply_result_->snapshot_ids_to_remove);
+}
+
+void ExpireSnapshots::DeleteFilePath(const std::string& path) {
+ try {
+ if (delete_func_) {
+ delete_func_(path);
+ } else {
+ auto status = ctx_->table->io()->DeleteFile(path);
+ // Best-effort: ignore NotFound (file already deleted) and other errors.
+ // Java uses suppressFailureWhenFinished + onFailure logging.
+ std::ignore = status;
+ }
+ } catch (...) {
+ // Suppress all exceptions during file cleanup to match Java's
+ // suppressFailureWhenFinished behavior.
+ }
+}
+
+Status ExpireSnapshots::ReadManifestsForSnapshot(
+ int64_t snapshot_id, std::unordered_set<std::string>& manifest_paths) {
+ const TableMetadata& metadata = base();
+ auto file_io = ctx_->table->io();
+
+ auto snapshot_result = metadata.SnapshotById(snapshot_id);
+ if (!snapshot_result.has_value()) {
+ return {};
+ }
+ auto& snapshot = snapshot_result.value();
+
+ SnapshotCache snapshot_cache(snapshot.get());
+ auto manifests_result = snapshot_cache.Manifests(file_io);
+ if (!manifests_result.has_value()) {
+ // Best-effort: skip this snapshot if we can't read its manifests
+ return {};
+ }
+
+ for (const auto& manifest : manifests_result.value()) {
+ manifest_paths.insert(manifest.manifest_path);
+ }
+
+ return {};
+}
+
+Status ExpireSnapshots::FindDataFilesToDelete(
+ const std::unordered_set<std::string>& manifests_to_delete,
+ const std::unordered_set<std::string>& retained_manifests,
+ std::unordered_set<std::string>& data_files_to_delete) {
+ const TableMetadata& metadata = base();
+ auto file_io = ctx_->table->io();
+
+ // Step 1: Collect all file paths from manifests being deleted
+ for (const auto& manifest_path : manifests_to_delete) {
+ // Find the ManifestFile for this path by scanning expired snapshots
+ for (const auto& snapshot : metadata.snapshots) {
+ if (!snapshot) continue;
+ SnapshotCache snapshot_cache(snapshot.get());
+ auto manifests_result = snapshot_cache.Manifests(file_io);
+ if (!manifests_result.has_value()) continue;
+
+ for (const auto& manifest : manifests_result.value()) {
+ if (manifest.manifest_path != manifest_path) continue;
+
+ auto schema_result = metadata.Schema();
+ if (!schema_result.has_value()) continue;
+ auto spec_result =
metadata.PartitionSpecById(manifest.partition_spec_id);
+ if (!spec_result.has_value()) continue;
+
+ auto reader_result = ManifestReader::Make(
+ manifest, file_io, schema_result.value(), spec_result.value());
+ if (!reader_result.has_value()) continue;
+
+ auto entries_result = reader_result.value()->Entries();
+ if (!entries_result.has_value()) continue;
+
+ for (const auto& entry : entries_result.value()) {
+ if (entry.data_file) {
+ data_files_to_delete.insert(entry.data_file->file_path);
+ }
+ }
+ goto next_manifest; // Found and processed this manifest, move to next
Review Comment:
Removed. The nested loop + goto was eliminated by introducing
`manifest_cache_` (populated in Phase 1) and a `MakeManifestReader()` helper.
`FindDataFilesToDelete()` now iterates over the cache directly with simple map
lookups.
##########
src/iceberg/update/expire_snapshots.cc:
##########
@@ -285,7 +291,247 @@ Result<ExpireSnapshots::ApplyResult>
ExpireSnapshots::Apply() {
});
}
+ // Cache the result for use during Finalize()
+ apply_result_ = result;
+
return result;
}
+Status ExpireSnapshots::Finalize(std::optional<Error> commit_error) {
+ if (commit_error.has_value()) {
+ return {};
+ }
+
+ if (cleanup_level_ == CleanupLevel::kNone) {
+ return {};
+ }
+
+ if (!apply_result_.has_value() ||
apply_result_->snapshot_ids_to_remove.empty()) {
+ return {};
+ }
+
+ // File cleanup is best-effort: log and continue on individual file deletion
failures
+ // to avoid blocking metadata updates (matching Java behavior).
+ return CleanExpiredFiles(apply_result_->snapshot_ids_to_remove);
+}
+
+void ExpireSnapshots::DeleteFilePath(const std::string& path) {
+ try {
+ if (delete_func_) {
+ delete_func_(path);
+ } else {
+ auto status = ctx_->table->io()->DeleteFile(path);
+ // Best-effort: ignore NotFound (file already deleted) and other errors.
+ // Java uses suppressFailureWhenFinished + onFailure logging.
+ std::ignore = status;
+ }
+ } catch (...) {
+ // Suppress all exceptions during file cleanup to match Java's
+ // suppressFailureWhenFinished behavior.
+ }
+}
+
+Status ExpireSnapshots::ReadManifestsForSnapshot(
+ int64_t snapshot_id, std::unordered_set<std::string>& manifest_paths) {
+ const TableMetadata& metadata = base();
+ auto file_io = ctx_->table->io();
+
+ auto snapshot_result = metadata.SnapshotById(snapshot_id);
+ if (!snapshot_result.has_value()) {
+ return {};
+ }
+ auto& snapshot = snapshot_result.value();
+
+ SnapshotCache snapshot_cache(snapshot.get());
+ auto manifests_result = snapshot_cache.Manifests(file_io);
+ if (!manifests_result.has_value()) {
+ // Best-effort: skip this snapshot if we can't read its manifests
+ return {};
+ }
+
+ for (const auto& manifest : manifests_result.value()) {
+ manifest_paths.insert(manifest.manifest_path);
+ }
+
+ return {};
+}
+
+Status ExpireSnapshots::FindDataFilesToDelete(
+ const std::unordered_set<std::string>& manifests_to_delete,
+ const std::unordered_set<std::string>& retained_manifests,
+ std::unordered_set<std::string>& data_files_to_delete) {
Review Comment:
Changed `FindDataFilesToDelete` to return
`Result<std::unordered_set<std::string>>` instead of using a mutable
out-parameter.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]