This is an automated email from the ASF dual-hosted git repository.
hemant pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 7da5ecb585 HDDS-10590. [Snapshot] Synchronized snapshot purge, set
snapshot property and SstFilteringService (#6456)
7da5ecb585 is described below
commit 7da5ecb5855b773fd262a839f8791f50ebca6fca
Author: Hemant Kumar <[email protected]>
AuthorDate: Wed Apr 3 13:22:22 2024 -0700
HDDS-10590. [Snapshot] Synchronized snapshot purge, set snapshot property
and SstFilteringService (#6456)
---
.../hadoop/ozone/om/helpers/SnapshotInfo.java | 2 +-
.../request/snapshot/OMSnapshotPurgeRequest.java | 218 +++++++++++++--------
.../snapshot/OMSnapshotSetPropertyRequest.java | 38 +++-
.../response/snapshot/OMSnapshotPurgeResponse.java | 2 +-
4 files changed, 174 insertions(+), 86 deletions(-)
diff --git
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java
index 5a1ecee26f..47a48c37e8 100644
---
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java
+++
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java
@@ -719,7 +719,7 @@ public final class SnapshotInfo implements Auditable,
CopyObject<SnapshotInfo> {
public String toString() {
return "SnapshotInfo{" +
"snapshotId: '" + snapshotId + '\'' +
- ", name: '" + name + "'," +
+ ", name: '" + name + '\'' +
", volumeName: '" + volumeName + '\'' +
", bucketName: '" + bucketName + '\'' +
", snapshotStatus: '" + snapshotStatus + '\'' +
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java
index 02c44e0084..7ac65b240e 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java
@@ -19,6 +19,9 @@
package org.apache.hadoop.ozone.om.request.snapshot;
+import org.apache.commons.lang3.tuple.Triple;
+import org.apache.hadoop.ozone.om.OMMetadataManager;
+import org.apache.hadoop.ozone.om.exceptions.OMException;
import org.apache.ratis.server.protocol.TermIndex;
import org.apache.hadoop.hdds.utils.db.cache.CacheKey;
import org.apache.hadoop.hdds.utils.db.cache.CacheValue;
@@ -40,11 +43,15 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
+import java.util.Set;
import java.util.UUID;
+import static
org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.SNAPSHOT_LOCK;
+
/**
* Handles OMSnapshotPurge Request.
* This is an OM internal request. Does not need @RequireSnapshotFeatureState.
@@ -60,6 +67,7 @@ public class OMSnapshotPurgeRequest extends OMClientRequest {
@Override
public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager,
TermIndex termIndex) {
final long trxnLogIndex = termIndex.getIndex();
+
OmSnapshotManager omSnapshotManager = ozoneManager.getOmSnapshotManager();
OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl)
ozoneManager.getMetadataManager();
@@ -80,33 +88,63 @@ public class OMSnapshotPurgeRequest extends OMClientRequest
{
Map<String, SnapshotInfo> updatedPathPreviousAndGlobalSnapshots =
new HashMap<>();
- // Snapshots that are purged by the SnapshotDeletingService
- // will update the next snapshot so that is can be deep cleaned
- // by the KeyDeletingService in the next run.
+ // Each snapshot purge operation does three things:
+ // 1. Update the snapshot chain,
+ // 2. Update the deep clean flag for the next active snapshot (So that
it can be
+ // deep cleaned by the KeyDeletingService in the next run),
+ // 3. Finally, purge the snapshot.
+ // All of these steps have to be performed only when it acquires all the
necessary
+ // locks (lock on the snapshot to be purged, lock on the next active
snapshot, and
+ // lock on the next path and global previous snapshots). Ideally, there
is no need
+ // for locks for snapshot purge and can rely on OMStateMachine because
OMStateMachine
+ // is going to process each request sequentially.
+ //
+ // But there is a problem with that. After filtering unnecessary SST
files for a snapshot,
+ // SstFilteringService updates that snapshot's SstFilter flag.
SstFilteringService cannot
+ // use SetSnapshotProperty API because it runs on each OM independently
and One OM does
+ // not know if the snapshot has been filtered on the other OM in HA
environment.
+ //
+ // If locks are not taken snapshot purge and SstFilteringService will
cause a race condition
+ // and override one's update with another.
for (String snapTableKey : snapshotDbKeys) {
- SnapshotInfo fromSnapshot = omMetadataManager.getSnapshotInfoTable()
- .get(snapTableKey);
-
- if (fromSnapshot == null) {
- // Snapshot may have been purged in the previous iteration of
SnapshotDeletingService.
- LOG.warn("The snapshot {} is not longer in snapshot table, It maybe
removed in the previous " +
- "Snapshot purge request.", snapTableKey);
- continue;
- }
+ // To acquire all the locks, a set is maintained which is keyed by
snapshotTableKey.
+ // snapshotTableKey is nothing but /volumeName/bucketName/snapshotName.
+ // Once all the locks are acquired, it performs the three steps
mentioned above and
+ // release all the locks after that.
+ Set<Triple<String, String, String>> lockSet = new HashSet<>(4, 1);
+ try {
+ if (omMetadataManager.getSnapshotInfoTable().get(snapTableKey) ==
null) {
+ // Snapshot may have been purged in the previous iteration of
SnapshotDeletingService.
+ LOG.warn("The snapshot {} is not longer in snapshot table, It
maybe removed in the previous " +
+ "Snapshot purge request.", snapTableKey);
+ continue;
+ }
+
+ acquireLock(lockSet, snapTableKey, omMetadataManager);
+ SnapshotInfo fromSnapshot =
omMetadataManager.getSnapshotInfoTable().get(snapTableKey);
- SnapshotInfo nextSnapshot = SnapshotUtils
- .getNextActiveSnapshot(fromSnapshot,
- snapshotChainManager, omSnapshotManager);
-
- updateSnapshotInfoAndCache(nextSnapshot, omMetadataManager,
- trxnLogIndex, updatedSnapInfos);
- updateSnapshotChainAndCache(omMetadataManager, fromSnapshot,
- trxnLogIndex, updatedPathPreviousAndGlobalSnapshots);
- // Remove and close snapshot's RocksDB instance from SnapshotCache.
-
ozoneManager.getOmSnapshotManager().invalidateCacheEntry(fromSnapshot.getSnapshotId());
- // Update SnapshotInfoTable cache.
- ozoneManager.getMetadataManager().getSnapshotInfoTable()
- .addCacheEntry(new CacheKey<>(fromSnapshot.getTableKey()),
CacheValue.get(trxnLogIndex));
+ SnapshotInfo nextSnapshot =
+ SnapshotUtils.getNextActiveSnapshot(fromSnapshot,
snapshotChainManager, omSnapshotManager);
+
+ if (nextSnapshot != null) {
+ acquireLock(lockSet, nextSnapshot.getTableKey(),
omMetadataManager);
+ }
+
+ // Update the chain first so that it has all the necessary locks
before updating deep clean.
+ updateSnapshotChainAndCache(lockSet, omMetadataManager,
fromSnapshot, trxnLogIndex,
+ updatedPathPreviousAndGlobalSnapshots);
+ updateSnapshotInfoAndCache(nextSnapshot, omMetadataManager,
trxnLogIndex, updatedSnapInfos);
+ // Remove and close snapshot's RocksDB instance from SnapshotCache.
+ omSnapshotManager.invalidateCacheEntry(fromSnapshot.getSnapshotId());
+ // Update SnapshotInfoTable cache.
+ omMetadataManager.getSnapshotInfoTable()
+ .addCacheEntry(new CacheKey<>(fromSnapshot.getTableKey()),
CacheValue.get(trxnLogIndex));
+ } finally {
+ for (Triple<String, String, String> lockKey: lockSet) {
+ omMetadataManager.getLock()
+ .releaseWriteLock(SNAPSHOT_LOCK, lockKey.getLeft(),
lockKey.getMiddle(), lockKey.getRight());
+ }
+ }
}
omClientResponse = new OMSnapshotPurgeResponse(omResponse.build(),
@@ -120,20 +158,41 @@ public class OMSnapshotPurgeRequest extends
OMClientRequest {
return omClientResponse;
}
+ private void acquireLock(Set<Triple<String, String, String>> lockSet, String
snapshotTableKey,
+ OMMetadataManager omMetadataManager) throws
IOException {
+ SnapshotInfo snapshotInfo =
omMetadataManager.getSnapshotInfoTable().get(snapshotTableKey);
+
+ // It should not be the case that lock is required for non-existing
snapshot.
+ if (snapshotInfo == null) {
+ LOG.error("Snapshot: '{}' doesn't not exist in snapshot table.",
snapshotTableKey);
+ throw new OMException("Snapshot: '{" + snapshotTableKey + "}' doesn't
not exist in snapshot table.",
+ OMException.ResultCodes.FILE_NOT_FOUND);
+ }
+ Triple<String, String, String> lockKey =
Triple.of(snapshotInfo.getVolumeName(), snapshotInfo.getBucketName(),
+ snapshotInfo.getName());
+ if (!lockSet.contains(lockKey)) {
+ mergeOmLockDetails(omMetadataManager.getLock()
+ .acquireWriteLock(SNAPSHOT_LOCK, lockKey.getLeft(),
lockKey.getMiddle(), lockKey.getRight()));
+ lockSet.add(lockKey);
+ }
+ }
+
private void updateSnapshotInfoAndCache(SnapshotInfo snapInfo,
OmMetadataManagerImpl omMetadataManager, long trxnLogIndex,
- Map<String, SnapshotInfo> updatedSnapInfos) {
+ Map<String, SnapshotInfo> updatedSnapInfos) throws IOException {
if (snapInfo != null) {
+ // Fetch the latest value again after acquiring lock.
+ SnapshotInfo updatedSnapshotInfo =
omMetadataManager.getSnapshotInfoTable().get(snapInfo.getTableKey());
+
// Setting next snapshot deep clean to false, Since the
// current snapshot is deleted. We can potentially
// reclaim more keys in the next snapshot.
- snapInfo.setDeepClean(false);
+ updatedSnapshotInfo.setDeepClean(false);
// Update table cache first
- omMetadataManager.getSnapshotInfoTable().addCacheEntry(
- new CacheKey<>(snapInfo.getTableKey()),
- CacheValue.get(trxnLogIndex, snapInfo));
- updatedSnapInfos.put(snapInfo.getTableKey(), snapInfo);
+ omMetadataManager.getSnapshotInfoTable().addCacheEntry(new
CacheKey<>(updatedSnapshotInfo.getTableKey()),
+ CacheValue.get(trxnLogIndex, updatedSnapshotInfo));
+ updatedSnapInfos.put(updatedSnapshotInfo.getTableKey(),
updatedSnapshotInfo);
}
}
@@ -144,6 +203,7 @@ public class OMSnapshotPurgeRequest extends OMClientRequest
{
* update in DB.
*/
private void updateSnapshotChainAndCache(
+ Set<Triple<String, String, String>> lockSet,
OmMetadataManagerImpl metadataManager,
SnapshotInfo snapInfo,
long trxnLogIndex,
@@ -155,7 +215,6 @@ public class OMSnapshotPurgeRequest extends OMClientRequest
{
SnapshotChainManager snapshotChainManager = metadataManager
.getSnapshotChainManager();
- SnapshotInfo nextPathSnapInfo = null;
// If the snapshot is deleted in the previous run, then the in-memory
// SnapshotChainManager might throw NoSuchElementException as the snapshot
@@ -171,58 +230,63 @@ public class OMSnapshotPurgeRequest extends
OMClientRequest {
return;
}
- // Updates next path snapshot's previous snapshot ID
+ String nextPathSnapshotKey = null;
+
if (hasNextPathSnapshot) {
UUID nextPathSnapshotId = snapshotChainManager.nextPathSnapshot(
snapInfo.getSnapshotPath(), snapInfo.getSnapshotId());
-
- String snapshotTableKey = snapshotChainManager
+ nextPathSnapshotKey = snapshotChainManager
.getTableKey(nextPathSnapshotId);
- nextPathSnapInfo = metadataManager.getSnapshotInfoTable()
- .get(snapshotTableKey);
- if (nextPathSnapInfo != null) {
- nextPathSnapInfo.setPathPreviousSnapshotId(
- snapInfo.getPathPreviousSnapshotId());
- metadataManager.getSnapshotInfoTable().addCacheEntry(
- new CacheKey<>(nextPathSnapInfo.getTableKey()),
- CacheValue.get(trxnLogIndex, nextPathSnapInfo));
- updatedPathPreviousAndGlobalSnapshots
- .put(nextPathSnapInfo.getTableKey(), nextPathSnapInfo);
- }
+
+ // Acquire lock from the snapshot
+ acquireLock(lockSet, nextPathSnapshotKey, metadataManager);
}
- // Updates next global snapshot's previous snapshot ID
+ String nextGlobalSnapshotKey = null;
if (hasNextGlobalSnapshot) {
- UUID nextGlobalSnapshotId =
- snapshotChainManager.nextGlobalSnapshot(snapInfo.getSnapshotId());
-
- String snapshotTableKey = snapshotChainManager
- .getTableKey(nextGlobalSnapshotId);
-
- SnapshotInfo nextGlobalSnapInfo = metadataManager.getSnapshotInfoTable()
- .get(snapshotTableKey);
- // If both next global and path snapshot are same, it may overwrite
- // nextPathSnapInfo.setPathPreviousSnapshotID(), adding this check
- // will prevent it.
- if (nextGlobalSnapInfo != null && nextPathSnapInfo != null &&
- nextGlobalSnapInfo.getSnapshotId().equals(
- nextPathSnapInfo.getSnapshotId())) {
- nextPathSnapInfo.setGlobalPreviousSnapshotId(
- snapInfo.getGlobalPreviousSnapshotId());
- metadataManager.getSnapshotInfoTable().addCacheEntry(
- new CacheKey<>(nextPathSnapInfo.getTableKey()),
- CacheValue.get(trxnLogIndex, nextPathSnapInfo));
- updatedPathPreviousAndGlobalSnapshots
- .put(nextPathSnapInfo.getTableKey(), nextPathSnapInfo);
- } else if (nextGlobalSnapInfo != null) {
- nextGlobalSnapInfo.setGlobalPreviousSnapshotId(
- snapInfo.getGlobalPreviousSnapshotId());
- metadataManager.getSnapshotInfoTable().addCacheEntry(
- new CacheKey<>(nextGlobalSnapInfo.getTableKey()),
- CacheValue.get(trxnLogIndex, nextGlobalSnapInfo));
- updatedPathPreviousAndGlobalSnapshots
- .put(nextGlobalSnapInfo.getTableKey(), nextGlobalSnapInfo);
- }
+ UUID nextGlobalSnapshotId =
snapshotChainManager.nextGlobalSnapshot(snapInfo.getSnapshotId());
+ nextGlobalSnapshotKey =
snapshotChainManager.getTableKey(nextGlobalSnapshotId);
+
+ // Acquire lock from the snapshot
+ acquireLock(lockSet, nextGlobalSnapshotKey, metadataManager);
+ }
+
+ SnapshotInfo nextPathSnapInfo =
+ nextPathSnapshotKey != null ?
metadataManager.getSnapshotInfoTable().get(nextPathSnapshotKey) : null;
+
+ SnapshotInfo nextGlobalSnapInfo =
+ nextGlobalSnapshotKey != null ?
metadataManager.getSnapshotInfoTable().get(nextGlobalSnapshotKey) : null;
+
+ // Updates next path snapshot's previous snapshot ID
+ if (nextPathSnapInfo != null) {
+
nextPathSnapInfo.setPathPreviousSnapshotId(snapInfo.getPathPreviousSnapshotId());
+ metadataManager.getSnapshotInfoTable().addCacheEntry(
+ new CacheKey<>(nextPathSnapInfo.getTableKey()),
+ CacheValue.get(trxnLogIndex, nextPathSnapInfo));
+ updatedPathPreviousAndGlobalSnapshots
+ .put(nextPathSnapInfo.getTableKey(), nextPathSnapInfo);
+ }
+
+ // Updates next global snapshot's previous snapshot ID
+ // If both next global and path snapshot are same, it may overwrite
+ // nextPathSnapInfo.setPathPreviousSnapshotID(), adding this check
+ // will prevent it.
+ if (nextGlobalSnapInfo != null && nextPathSnapInfo != null &&
+
nextGlobalSnapInfo.getSnapshotId().equals(nextPathSnapInfo.getSnapshotId())) {
+
nextPathSnapInfo.setGlobalPreviousSnapshotId(snapInfo.getGlobalPreviousSnapshotId());
+ metadataManager.getSnapshotInfoTable().addCacheEntry(
+ new CacheKey<>(nextPathSnapInfo.getTableKey()),
+ CacheValue.get(trxnLogIndex, nextPathSnapInfo));
+ updatedPathPreviousAndGlobalSnapshots
+ .put(nextPathSnapInfo.getTableKey(), nextPathSnapInfo);
+ } else if (nextGlobalSnapInfo != null) {
+ nextGlobalSnapInfo.setGlobalPreviousSnapshotId(
+ snapInfo.getGlobalPreviousSnapshotId());
+ metadataManager.getSnapshotInfoTable().addCacheEntry(
+ new CacheKey<>(nextGlobalSnapInfo.getTableKey()),
+ CacheValue.get(trxnLogIndex, nextGlobalSnapInfo));
+ updatedPathPreviousAndGlobalSnapshots
+ .put(nextGlobalSnapInfo.getTableKey(), nextGlobalSnapInfo);
}
snapshotChainManager.deleteSnapshot(snapInfo);
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotSetPropertyRequest.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotSetPropertyRequest.java
index b3dd5206c9..9f1ff24a6d 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotSetPropertyRequest.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotSetPropertyRequest.java
@@ -36,7 +36,8 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
-import static
org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.INVALID_SNAPSHOT_ERROR;
+import static
org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.FILE_NOT_FOUND;
+import static
org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.SNAPSHOT_LOCK;
/**
* Updates the exclusive size of the snapshot.
@@ -62,16 +63,31 @@ public class OMSnapshotSetPropertyRequest extends
OMClientRequest {
.getSetSnapshotPropertyRequest();
SnapshotInfo updatedSnapInfo = null;
+ String snapshotKey = setSnapshotPropertyRequest.getSnapshotKey();
+ boolean acquiredSnapshotLock = false;
+ String volumeName = null;
+ String bucketName = null;
+ String snapshotName = null;
+
try {
- String snapshotKey = setSnapshotPropertyRequest.getSnapshotKey();
+ SnapshotInfo snapshotInfo =
metadataManager.getSnapshotInfoTable().get(snapshotKey);
+ if (snapshotInfo == null) {
+ LOG.error("Snapshot: '{}' doesn't not exist in snapshot table.",
snapshotKey);
+ throw new OMException("Snapshot: '{" + snapshotKey + "}' doesn't not
exist in snapshot table.", FILE_NOT_FOUND);
+ }
+
+ volumeName = snapshotInfo.getVolumeName();
+ bucketName = snapshotInfo.getBucketName();
+ snapshotName = snapshotInfo.getName();
+
+ mergeOmLockDetails(metadataManager.getLock()
+ .acquireWriteLock(SNAPSHOT_LOCK, volumeName, bucketName,
snapshotName));
+
+ acquiredSnapshotLock = getOmLockDetails().isLockAcquired();
+
updatedSnapInfo = metadataManager.getSnapshotInfoTable()
.get(snapshotKey);
- if (updatedSnapInfo == null) {
- LOG.error("SnapshotInfo for Snapshot: {} is not found", snapshotKey);
- throw new OMException("SnapshotInfo for Snapshot: " + snapshotKey +
- " is not found", INVALID_SNAPSHOT_ERROR);
- }
if (setSnapshotPropertyRequest.hasDeepCleanedDeletedDir()) {
updatedSnapInfo.setDeepCleanedDeletedDir(setSnapshotPropertyRequest
@@ -104,6 +120,14 @@ public class OMSnapshotSetPropertyRequest extends
OMClientRequest {
} catch (IOException ex) {
omClientResponse = new OMSnapshotSetPropertyResponse(
createErrorOMResponse(omResponse, ex));
+ } finally {
+ if (acquiredSnapshotLock) {
+ mergeOmLockDetails(metadataManager.getLock()
+ .releaseWriteLock(SNAPSHOT_LOCK, volumeName, bucketName,
snapshotName));
+ }
+ if (omClientResponse != null) {
+ omClientResponse.setOmLockDetails(getOmLockDetails());
+ }
}
return omClientResponse;
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotPurgeResponse.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotPurgeResponse.java
index 16411baa96..d300601b38 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotPurgeResponse.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotPurgeResponse.java
@@ -80,9 +80,9 @@ public class OMSnapshotPurgeResponse extends OMClientResponse
{
OmMetadataManagerImpl metadataManager = (OmMetadataManagerImpl)
omMetadataManager;
- updateSnapInfo(metadataManager, batchOperation, updatedSnapInfos);
updateSnapInfo(metadataManager, batchOperation,
updatedPreviousAndGlobalSnapInfos);
+ updateSnapInfo(metadataManager, batchOperation, updatedSnapInfos);
for (String dbKey: snapshotDbKeys) {
// Skip the cache here because snapshot is purged from cache in
OMSnapshotPurgeRequest.
SnapshotInfo snapshotInfo = omMetadataManager
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]