swamirishi commented on code in PR #9324:
URL: https://github.com/apache/ozone/pull/9324#discussion_r2564521182
##########
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/defrag/SnapshotDefragService.java:
##########
@@ -192,6 +527,103 @@ public BackgroundTaskResult call() throws Exception {
}
}
+ /**
+ * Creates a new checkpoint by modifying the metadata manager from a
snapshot.
+ * This involves generating a temporary checkpoint and truncating specified
+ * column families from the checkpoint before returning the updated metadata
manager.
+ *
+ * @param snapshotInfo Information about the snapshot for which the
checkpoint
+ * is being created.
+ * @param incrementalColumnFamilies A set of table names representing
incremental
+ * column families to be retained in the
checkpoint.
+ * @return A new instance of OmMetadataManagerImpl initialized with the
modified
+ * checkpoint.
+ * @throws IOException If an I/O error occurs during snapshot processing,
+ * checkpoint creation, or table operations.
+ */
+ private OmMetadataManagerImpl createCheckpoint(SnapshotInfo snapshotInfo,
+ Set<String> incrementalColumnFamilies) throws IOException {
+ try (UncheckedAutoCloseableSupplier<OmSnapshot> snapshot =
omSnapshotManager.getActiveSnapshot(
+ snapshotInfo.getVolumeName(), snapshotInfo.getBucketName(),
snapshotInfo.getName())) {
+ DBCheckpoint checkpoint =
snapshot.get().getMetadataManager().getStore().getCheckpoint(tmpDefragDir,
true);
+ try (OmMetadataManagerImpl metadataManagerBeforeTruncate =
+ OmMetadataManagerImpl.createCheckpointMetadataManager(conf,
checkpoint, false)) {
+ DBStore dbStore = metadataManagerBeforeTruncate.getStore();
+ for (String table : metadataManagerBeforeTruncate.listTableNames()) {
+ if (!incrementalColumnFamilies.contains(table)) {
+ dbStore.dropTable(table);
+ }
+ }
+ } catch (Exception e) {
+ throw new IOException("Failed to close checkpoint of snapshot: " +
snapshotInfo.getSnapshotId(), e);
+ }
+ // This will recreate the column families in the checkpoint.
+ return OmMetadataManagerImpl.createCheckpointMetadataManager(conf,
checkpoint, false);
+ }
+ }
+
+ private void acquireContentLock(UUID snapshotID) throws IOException {
+ lockIds.clear();
+ lockIds.add(snapshotID);
+ OMLockDetails lockDetails = snapshotContentLocks.acquireLock(lockIds);
+ if (!lockDetails.isLockAcquired()) {
+ throw new IOException("Failed to acquire lock on snapshot: " +
snapshotID);
+ }
+ LOG.debug("Acquired MultiSnapshotLocks on snapshot: {}", snapshotID);
+ }
+
+ private boolean checkAndDefragSnapshot(SnapshotChainManager chainManager,
UUID snapshotId) throws IOException {
+ SnapshotInfo snapshotInfo = SnapshotUtils.getSnapshotInfo(ozoneManager,
chainManager, snapshotId);
+
+ if (snapshotInfo.getSnapshotStatus() !=
SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE) {
+ LOG.debug("Skipping defragmentation for non-active snapshot: {} (ID:
{})",
+ snapshotInfo.getName(), snapshotInfo.getSnapshotId());
+ return false;
+ }
+ Pair<Boolean, Integer> needsDefragVersionPair =
needsDefragmentation(snapshotInfo);
+ if (!needsDefragVersionPair.getLeft()) {
+ return false;
+ }
+ // Create a checkpoint of the previous snapshot or the current snapshot if
it is the first snapshot in the chain.
+ SnapshotInfo checkpointSnapshotInfo =
snapshotInfo.getPathPreviousSnapshotId() == null ? snapshotInfo :
+ SnapshotUtils.getSnapshotInfo(ozoneManager, chainManager,
snapshotInfo.getPathPreviousSnapshotId());
+
+ OmMetadataManagerImpl checkpointMetadataManager =
createCheckpoint(checkpointSnapshotInfo,
+ COLUMN_FAMILIES_TO_TRACK_IN_SNAPSHOT);
+ Path checkpointLocation =
checkpointMetadataManager.getStore().getDbLocation().toPath();
+ try {
+ DBStore checkpointDBStore = checkpointMetadataManager.getStore();
+ TablePrefixInfo prefixInfo =
ozoneManager.getMetadataManager().getTableBucketPrefix(snapshotInfo.getVolumeName(),
+ snapshotInfo.getBucketName());
+ // If first snapshot in the chain perform full defragmentation.
+ if (snapshotInfo.getPathPreviousSnapshotId() == null) {
+ performFullDefragmentation(checkpointDBStore, prefixInfo,
COLUMN_FAMILIES_TO_TRACK_IN_SNAPSHOT);
+ } else {
+ performIncrementalDefragmentation(checkpointSnapshotInfo,
snapshotInfo, needsDefragVersionPair.getValue(),
+ checkpointDBStore, prefixInfo,
COLUMN_FAMILIES_TO_TRACK_IN_SNAPSHOT);
+ }
+
+ // Acquire Content lock on the snapshot to ensure the contents of the
table doesn't get changed.
+ acquireContentLock(snapshotId);
+ try {
+ // Ingestion of incremental tables KeyTable/FileTable/DirectoryTable
done now we need to just reingest the
+ // remaining tables from the original snapshot.
+ ingestNonIncrementalTables(checkpointDBStore, snapshotInfo,
prefixInfo, COLUMN_FAMILIES_TO_TRACK_IN_SNAPSHOT);
+ checkpointMetadataManager.close();
+ // Switch the snapshot DB location to the new version.
+ atomicSwitchSnapshotDB(snapshotId, checkpointLocation);
+ } finally {
+ snapshotContentLocks.releaseLock();
Review Comment:
https://issues.apache.org/jira/browse/HDDS-14015 We should have done the
delete subsequently under the same content lock to ensure we don't have some
handles still hanging around in the snapshot cache which could have been used
by a different thread after the defrag thread releases the snapshot cache lock
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]