dongxiao1198 commented on code in PR #490:
URL: https://github.com/apache/iceberg-cpp/pull/490#discussion_r2667394097


##########
src/iceberg/update/expire_snapshots.h:
##########
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <unordered_set>
+#include <vector>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/result.h"
+#include "iceberg/type_fwd.h"
+#include "iceberg/update/pending_update.h"
+#include "iceberg/util/timepoint.h"
+
+/// \file iceberg/update/expire_snapshots.h
+/// \brief API for removing old snapshots from a table.
+
+namespace iceberg {
+
+/// \brief An enum representing possible clean up levels used in snapshot 
expiration.
+enum class CleanupLevel : uint8_t {
+  /// Skip all file cleanup, only remove snapshot metadata.
+  kNone,
+  /// Clean up only metadata files (manifests, manifest lists, statistics), 
retain data
+  /// files.
+  kMetadataOnly,
+  /// Clean up both metadata and data files (default).
+  kAll
+};
+
+/// \brief API for removing old snapshots from a table.
+///
+/// This API accumulates snapshot deletions and commits the new list to the 
table. This
+/// API does not allow deleting the current snapshot.
+///
+/// When committing, these changes will be applied to the latest table 
metadata. Commit
+/// conflicts will be resolved by applying the changes to the new latest 
metadata and
+/// reattempting the commit.
+///
+/// Manifest files that are no longer used by valid snapshots will be deleted. 
Data files
+/// that were deleted by snapshots that are expired will be deleted. 
DeleteWith() can be
+/// used to pass an alternative deletion method.
+///
+/// Apply() returns a list of the snapshots that will be removed.
+class ICEBERG_EXPORT ExpireSnapshots : public PendingUpdate {
+ public:
+  static Result<std::shared_ptr<ExpireSnapshots>> Make(
+      std::shared_ptr<Transaction> transaction);
+
+  ~ExpireSnapshots() override;
+
+  using SnapshotToRef = std::unordered_map<std::string, 
std::shared_ptr<SnapshotRef>>;
+
+  struct ExpireSnapshotsResult {
+    SnapshotToRef ref_to_remove;
+    std::vector<int64_t> snapshot_ids_to_remove;
+    std::vector<int32_t> partition_spec_to_remove;
+    std::unordered_set<int32_t> schema_to_remove;
+  };
+
+  /// \brief Expires a specific Snapshot identified by id.
+  ///
+  /// \param snapshot_id Long id of the snapshot to expire.
+  /// \return Reference to this for method chaining.
+  ExpireSnapshots& ExpireSnapshotId(int64_t snapshot_id);
+
+  /// \brief Expires all snapshots older than the given timestamp.
+  ///
+  /// \param timestamp_millis A long timestamp in milliseconds.
+  /// \return Reference to this for method chaining.
+  ExpireSnapshots& ExpireOlderThan(int64_t timestamp_millis);
+
+  /// \brief Retains the most recent ancestors of the current snapshot.
+  ///
+  /// If a snapshot would be expired because it is older than the expiration 
timestamp,
+  /// but is one of the num_snapshots most recent ancestors of the current 
state, it will
+  /// be retained. This will not cause snapshots explicitly identified by id 
from
+  /// expiring.
+  ///
+  /// This may keep more than num_snapshots ancestors if snapshots are added 
concurrently.
+  /// This may keep less than num_snapshots ancestors if the current table 
state does not
+  /// have that many.
+  ///
+  /// \param num_snapshots The number of snapshots to retain.
+  /// \return Reference to this for method chaining.
+  ExpireSnapshots& RetainLast(int num_snapshots);
+
+  /// \brief Passes an alternative delete implementation that will be used for 
manifests
+  /// and data files.
+  ///
+  /// Manifest files that are no longer used by valid snapshots will be 
deleted. Data
+  /// files that were deleted by snapshots that are expired will be deleted.
+  ///
+  /// If this method is not called, unnecessary manifests and data files will 
still be
+  /// deleted.
+  ///
+  /// \param delete_func A function that will be called to delete manifests 
and data files
+  /// \return Reference to this for method chaining.
+  ExpireSnapshots& DeleteWith(std::function<void(const std::string&)> 
delete_func);
+
+  /// \brief Configures the cleanup level for expired files.
+  ///
+  /// This method provides fine-grained control over which files are cleaned 
up during
+  /// snapshot expiration.
+  ///
+  /// Consider CleanupLevel::kMetadataOnly when data files are shared across 
tables or
+  /// when using procedures like add-files that may reference the same data 
files.
+  ///
+  /// Consider CleanupLevel::kNone when data and metadata files may be more 
efficiently
+  /// removed using a distributed framework through the actions API.
+  ///
+  /// \param level The cleanup level to use for expired snapshots.
+  /// \return Reference to this for method chaining.
+  ExpireSnapshots& CleanupLevel(enum CleanupLevel level);

Review Comment:
   CleanupLevel and function CleanupLevel will be ambiguous, maybe use 
SetCleanupLevel(CleanupLevel level); instead
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to