This is an automated email from the ASF dual-hosted git repository.
siyao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new c7de8d5871 HDDS-7741. [Snapshot] Delete keys in snapshot scope from
deleteTable during createSnapshot to accommodate snapshot garbage collection
(#4280)
c7de8d5871 is described below
commit c7de8d5871209a86e79f8422c6334697b32f113a
Author: Siyao Meng <[email protected]>
AuthorDate: Thu Mar 16 00:11:59 2023 -0700
HDDS-7741. [Snapshot] Delete keys in snapshot scope from deleteTable during
createSnapshot to accommodate snapshot garbage collection (#4280)
---
.../ozone/container/metadata/DatanodeTable.java | 5 +
.../metadata/SchemaOneDeletedBlocksTable.java | 5 +
.../org/apache/hadoop/hdds/utils/db/RDBTable.java | 5 +
.../apache/hadoop/hdds/utils/db/RocksDatabase.java | 16 ++++
.../org/apache/hadoop/hdds/utils/db/Table.java | 9 ++
.../apache/hadoop/hdds/utils/db/TypedTable.java | 5 +
.../hadoop/hdds/utils/db/TestRDBTableStore.java | 71 ++++++++++++++-
.../apache/hadoop/ozone/om/OMMetadataManager.java | 8 ++
.../org/apache/hadoop/ozone/om/KeyManagerImpl.java | 2 +-
.../hadoop/ozone/om/OmMetadataManagerImpl.java | 29 ++++++
.../apache/hadoop/ozone/om/OmSnapshotManager.java | 101 ++++++++++++++++++++-
.../request/snapshot/OMSnapshotCreateRequest.java | 4 +-
.../key/OMDirectoriesPurgeResponseWithFSO.java | 4 +
.../snapshot/OMSnapshotCreateResponse.java | 15 +--
.../ozone/om/service/KeyDeletingService.java | 6 ++
.../snapshot/TestOMSnapshotCreateRequest.java | 4 +-
.../snapshot/TestOMSnapshotCreateResponse.java | 79 +++++++++++++++-
.../snapshot/TestOMSnapshotDeleteResponse.java | 2 +-
.../recon/recovery/ReconOmMetadataManagerImpl.java | 6 ++
19 files changed, 355 insertions(+), 21 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeTable.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeTable.java
index 1d35fab9d5..fbb5cd7d64 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeTable.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeTable.java
@@ -65,6 +65,11 @@ public class DatanodeTable<KEY, VALUE> implements Table<KEY,
VALUE> {
table.delete(key);
}
+ @Override
+ public void deleteRange(KEY beginKey, KEY endKey) throws IOException {
+ table.deleteRange(beginKey, endKey);
+ }
+
@Override
public void deleteWithBatch(BatchOperation batch, KEY key)
throws IOException {
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneDeletedBlocksTable.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneDeletedBlocksTable.java
index 42858c8b64..69b2eeb3bc 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneDeletedBlocksTable.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneDeletedBlocksTable.java
@@ -75,6 +75,11 @@ public class SchemaOneDeletedBlocksTable extends
DatanodeTable<String,
super.deleteWithBatch(batch, prefix(key));
}
+ @Override
+ public void deleteRange(String beginKey, String endKey) throws IOException {
+ super.deleteRange(prefix(beginKey), prefix(endKey));
+ }
+
@Override
public boolean isExist(String key) throws IOException {
return super.isExist(prefix(key));
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBTable.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBTable.java
index 09f58a9335..79d1037e8e 100644
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBTable.java
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBTable.java
@@ -145,6 +145,11 @@ class RDBTable implements Table<byte[], byte[]> {
db.delete(family, key);
}
+ @Override
+ public void deleteRange(byte[] beginKey, byte[] endKey) throws IOException {
+ db.deleteRange(family, beginKey, endKey);
+ }
+
@Override
public void deleteWithBatch(BatchOperation batch, byte[] key)
throws IOException {
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java
index ff1bc6cfa8..37159c9d88 100644
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java
@@ -792,6 +792,22 @@ public final class RocksDatabase {
}
}
+ public void deleteRange(ColumnFamily family, byte[] beginKey, byte[] endKey)
+ throws IOException {
+ assertClose();
+ try {
+ counter.incrementAndGet();
+ db.get().deleteRange(family.getHandle(), beginKey, endKey);
+ } catch (RocksDBException e) {
+ closeOnError(e, true);
+ final String message = "delete range " + bytes2String(beginKey) +
+ " to " + bytes2String(endKey) + " from " + family;
+ throw toIOException(this, message, e);
+ } finally {
+ counter.decrementAndGet();
+ }
+ }
+
@Override
public String toString() {
return name;
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java
index 0e0e67be01..3d85b357b1 100644
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java
@@ -148,6 +148,15 @@ public interface Table<KEY, VALUE> extends AutoCloseable {
*/
void deleteWithBatch(BatchOperation batch, KEY key) throws IOException;
+ /**
+ * Deletes a range of keys from the metadata store.
+ *
+ * @param beginKey start metadata key
+ * @param endKey end metadata key
+ * @throws IOException on Failure
+ */
+ void deleteRange(KEY beginKey, KEY endKey) throws IOException;
+
/**
* Returns the iterator for this metadata store.
*
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java
index ee15fa896d..147a22b2f5 100644
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java
@@ -272,7 +272,12 @@ public class TypedTable<KEY, VALUE> implements Table<KEY,
VALUE> {
public void deleteWithBatch(BatchOperation batch, KEY key)
throws IOException {
rawTable.deleteWithBatch(batch, codecRegistry.asRawData(key));
+ }
+ @Override
+ public void deleteRange(KEY beginKey, KEY endKey) throws IOException {
+ rawTable.deleteRange(codecRegistry.asRawData(beginKey),
+ codecRegistry.asRawData(endKey));
}
@Override
diff --git
a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBTableStore.java
b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBTableStore.java
index dc65410646..c53c34fdb2 100644
---
a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBTableStore.java
+++
b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBTableStore.java
@@ -57,7 +57,7 @@ public class TestRDBTableStore {
"First", "Second", "Third",
"Fourth", "Fifth",
"Sixth", "Seventh",
- "Eighth");
+ "Eighth", "Ninth");
private final List<String> prefixedFamilies = Arrays.asList(
"PrefixFirst",
"PrefixTwo", "PrefixThree",
@@ -165,7 +165,7 @@ public class TestRDBTableStore {
}
// Write all the keys and delete the keys scheduled for delete.
- //Assert we find only expected keys in the Table.
+ // Assert we find only expected keys in the Table.
try (Table testTable = rdbStore.getTable("Fourth")) {
for (int x = 0; x < deletedKeys.size(); x++) {
testTable.put(deletedKeys.get(x), value);
@@ -177,15 +177,78 @@ public class TestRDBTableStore {
}
for (int x = 0; x < validKeys.size(); x++) {
- Assertions.assertNotNull(testTable.get(validKeys.get(0)));
+ Assertions.assertNotNull(testTable.get(validKeys.get(x)));
}
for (int x = 0; x < deletedKeys.size(); x++) {
- Assertions.assertNull(testTable.get(deletedKeys.get(0)));
+ Assertions.assertNull(testTable.get(deletedKeys.get(x)));
}
}
}
+ @Test
+ public void deleteRange() throws Exception {
+
+ // Prepare keys to be written to the test table
+ List<byte[]> keys = new ArrayList<>();
+ for (int x = 0; x < 100; x++) {
+ // Left pad DB keys with zeros
+ String k = String.format("%03d", x) + "-" + RandomStringUtils.random(6);
+ keys.add(k.getBytes(StandardCharsets.UTF_8));
+ }
+ // Some random value
+ byte[] val = RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8);
+
+ try (Table testTable = rdbStore.getTable("Ninth")) {
+
+ // Write keys to the table
+ for (int x = 0; x < keys.size(); x++) {
+ testTable.put(keys.get(x), val);
+ }
+
+ // All keys should exist at this point
+ for (int x = 0; x < keys.size(); x++) {
+ Assertions.assertNotNull(testTable.get(keys.get(x)));
+ }
+
+ // Delete a range of keys: [10th, 20th), zero-indexed
+ final int deleteRangeBegin = 10, deleteRangeEnd = 20;
+ byte[] dRangeBeginKey = keys.get(deleteRangeBegin);
+ byte[] dRangeEndKey = keys.get(deleteRangeEnd);
+
+ testTable.deleteRange(dRangeBeginKey, dRangeEndKey);
+
+ // Keys [10th, 20th) should be gone now
+ for (int x = deleteRangeBegin; x < deleteRangeEnd; x++) {
+ Assertions.assertNull(testTable.get(keys.get(x)));
+ }
+
+ // While the rest of the keys should be untouched
+ for (int x = 0; x < deleteRangeBegin; x++) {
+ Assertions.assertNotNull(testTable.get(keys.get(x)));
+ }
+ for (int x = deleteRangeEnd; x < 100; x++) {
+ Assertions.assertNotNull(testTable.get(keys.get(x)));
+ }
+
+ // Delete the rest of the keys
+ testTable.deleteRange(keys.get(0), keys.get(100 - 1));
+
+ // Confirm key deletion
+ for (int x = 0; x < 100 - 1; x++) {
+ Assertions.assertNull(testTable.get(keys.get(x)));
+ }
+ // The last key is still there because
+ // deleteRange() excludes the endKey by design
+ Assertions.assertNotNull(testTable.get(keys.get(100 - 1)));
+
+ // Delete the last key
+ testTable.delete(keys.get(100 - 1));
+ Assertions.assertNull(testTable.get(keys.get(100 - 1)));
+ }
+
+ }
+
@Test
public void batchPut() throws Exception {
try (Table testTable = rdbStore.getTable("Fifth");
diff --git
a/hadoop-ozone/interface-storage/src/main/java/org/apache/hadoop/ozone/om/OMMetadataManager.java
b/hadoop-ozone/interface-storage/src/main/java/org/apache/hadoop/ozone/om/OMMetadataManager.java
index 285fb0245b..7e0f3511ef 100644
---
a/hadoop-ozone/interface-storage/src/main/java/org/apache/hadoop/ozone/om/OMMetadataManager.java
+++
b/hadoop-ozone/interface-storage/src/main/java/org/apache/hadoop/ozone/om/OMMetadataManager.java
@@ -22,6 +22,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.utils.DBStoreHAManager;
@@ -282,6 +283,13 @@ public interface OMMetadataManager extends
DBStoreHAManager {
ExpiredOpenKeys getExpiredOpenKeys(Duration expireThreshold, int count,
BucketLayout bucketLayout) throws IOException;
+ /**
+ * Retrieve RWLock for the table.
+ * @param tableName table name.
+ * @return ReentrantReadWriteLock
+ */
+ ReentrantReadWriteLock getTableLock(String tableName);
+
/**
* Returns the user Table.
*
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
index a02cf37bc6..12f7b89d12 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
@@ -619,7 +619,7 @@ public class KeyManagerImpl implements KeyManager {
@Override
public List<BlockGroup> getPendingDeletionKeys(final int count)
throws IOException {
- return metadataManager.getPendingDeletionKeys(count);
+ return metadataManager.getPendingDeletionKeys(count);
}
@Override
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
index ede9b5893a..03414980e8 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
@@ -30,6 +30,7 @@ import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -281,6 +282,16 @@ public class OmMetadataManagerImpl implements
OMMetadataManager,
private boolean ignorePipelineinKey;
private Table deletedDirTable;
+ // Table-level locks that protects table read/write access. Note:
+ // Don't use this lock for tables other than deletedTable and
deletedDirTable.
+ // This is a stopgap solution. Will remove when HDDS-5905 (HDDS-6483) is
done.
+ private Map<String, ReentrantReadWriteLock> tableLockMap = new HashMap<>();
+
+ @Override
+ public ReentrantReadWriteLock getTableLock(String tableName) {
+ return tableLockMap.get(tableName);
+ }
+
// Epoch is used to generate the objectIDs. The most significant 2 bits of
// objectIDs is set to this epoch. For clusters before HDDS-4315 there is
// no epoch as such. But it can be safely assumed that the most significant
@@ -561,6 +572,8 @@ public class OmMetadataManagerImpl implements
OMMetadataManager,
deletedTable = this.store.getTable(DELETED_TABLE, String.class,
RepeatedOmKeyInfo.class);
checkTableStatus(deletedTable, DELETED_TABLE, addCacheMetrics);
+ // Currently, deletedTable is the only table that will need the table lock
+ tableLockMap.put(DELETED_TABLE, new ReentrantReadWriteLock(true));
openKeyTable =
this.store.getTable(OPEN_KEY_TABLE, String.class,
@@ -1385,8 +1398,24 @@ public class OmMetadataManagerImpl implements
OMMetadataManager,
while (keyIter.hasNext() && currentCount < keyCount) {
KeyValue<String, RepeatedOmKeyInfo> kv = keyIter.next();
if (kv != null) {
+ // Multiple keys with the same path can be queued in one DB entry
RepeatedOmKeyInfo infoList = kv.getValue();
for (OmKeyInfo info : infoList.cloneOmKeyInfoList()) {
+ // Skip the key if it exists in the previous snapshot (of the same
+ // scope) as in this case its blocks should not be reclaimed
+
+ // TODO: [SNAPSHOT] HDDS-7968
+ // 1. If previous snapshot keyTable has key info.getObjectID(),
+ // skip it. Pending HDDS-7740 merge to reuse the util methods to
+ // check previousSnapshot.
+ // 2. For efficient lookup, the addition in design doc 4.b)1.b
+ // is critical.
+ // 3. With snapshot it is possible that only some of the keys in
+ // the DB key's RepeatedOmKeyInfo list can be reclaimed,
+ // make sure to update deletedTable accordingly in this case.
+ // 4. Further optimization: Skip all snapshotted keys altogether
+ // e.g. by prefixing all unreclaimable keys, then calling seek
+
// Add all blocks from all versions of the key to the deletion list
for (OmKeyLocationInfoGroup keyLocations :
info.getKeyLocationVersions()) {
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java
index 2c5a6c9b61..ffad3677da 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java
@@ -31,12 +31,15 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.server.ServerUtils;
import org.apache.hadoop.hdds.utils.db.DBCheckpoint;
import org.apache.hadoop.hdds.utils.db.RDBStore;
+import org.apache.hadoop.hdds.utils.db.Table;
+import org.apache.hadoop.hdds.utils.db.TableIterator;
import org.apache.hadoop.hdds.utils.db.cache.CacheValue;
import org.apache.hadoop.hdds.utils.db.managed.ManagedOptions;
import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB;
import org.apache.hadoop.hdds.utils.db.cache.CacheKey;
import org.apache.hadoop.ozone.OzoneConfigKeys;
import org.apache.hadoop.ozone.om.exceptions.OMException;
+import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo;
import org.apache.hadoop.ozone.om.helpers.SnapshotInfo;
import org.apache.hadoop.ozone.om.helpers.SnapshotInfo.SnapshotStatus;
import org.apache.hadoop.ozone.om.snapshot.SnapshotDiffManager;
@@ -64,13 +67,18 @@ public final class OmSnapshotManager implements
AutoCloseable {
private static final Logger LOG =
LoggerFactory.getLogger(OmSnapshotManager.class);
+ // Threshold for the table iterator loop in nanoseconds.
+ private static final long DB_TABLE_ITER_LOOP_THRESHOLD_NS = 100000;
+
private final OzoneManager ozoneManager;
+ private final OMMetadataManager omMetadataManager;
private final SnapshotDiffManager snapshotDiffManager;
private final LoadingCache<String, OmSnapshot> snapshotCache;
private final ManagedRocksDB snapshotDiffDb;
OmSnapshotManager(OzoneManager ozoneManager) {
this.ozoneManager = ozoneManager;
+ this.omMetadataManager = ozoneManager.getMetadataManager();
// Pass in the differ
final RocksDBCheckpointDiffer differ = ozoneManager
@@ -164,8 +172,24 @@ public final class OmSnapshotManager implements
AutoCloseable {
throws IOException {
RDBStore store = (RDBStore) omMetadataManager.getStore();
- final DBCheckpoint dbCheckpoint = store.getSnapshot(
- snapshotInfo.getCheckpointDirName());
+ final DBCheckpoint dbCheckpoint;
+
+ // Acquire deletedTable write lock
+ omMetadataManager.getTableLock(OmMetadataManagerImpl.DELETED_TABLE)
+ .writeLock().lock();
+ try {
+ // Create DB checkpoint for snapshot
+ dbCheckpoint = store.getSnapshot(snapshotInfo.getCheckpointDirName());
+ // Clean up active DB's deletedTable right after checkpoint is taken,
+ // with table write lock held
+ deleteKeysInSnapshotScopeFromDTableInternal(omMetadataManager,
+ snapshotInfo.getVolumeName(), snapshotInfo.getBucketName());
+ // TODO: [SNAPSHOT] HDDS-8064. Clean up deletedDirTable as well
+ } finally {
+ // Release deletedTable write lock
+ omMetadataManager.getTableLock(OmMetadataManagerImpl.DELETED_TABLE)
+ .writeLock().unlock();
+ }
LOG.info("Created checkpoint : {} for snapshot {}",
dbCheckpoint.getCheckpointLocation(), snapshotInfo.getName());
@@ -193,6 +217,79 @@ public final class OmSnapshotManager implements
AutoCloseable {
return dbCheckpoint;
}
+ /**
+ * Helper method to delete keys in the snapshot scope from active DB's
+ * deletedTable.
+ *
+ * @param omMetadataManager OMMetadataManager instance
+ * @param volumeName volume name
+ * @param bucketName bucket name
+ */
+ private static void deleteKeysInSnapshotScopeFromDTableInternal(
+ OMMetadataManager omMetadataManager,
+ String volumeName,
+ String bucketName) throws IOException {
+
+ // Range delete start key (inclusive)
+ String beginKey =
+ omMetadataManager.getOzoneKey(volumeName, bucketName, "");
+
+ // Range delete end key (exclusive) to be found
+ String endKey;
+
+ // Start performance tracking timer
+ long startTime = System.nanoTime();
+
+ try (TableIterator<String,
+ ? extends Table.KeyValue<String, RepeatedOmKeyInfo>>
+ keyIter = omMetadataManager.getDeletedTable().iterator()) {
+
+ keyIter.seek(beginKey);
+ // Continue only when there are entries of snapshot (bucket) scope
+ // in deletedTable in the first place
+ if (!keyIter.hasNext()) {
+ // Use null as a marker. No need to do deleteRange() at all.
+ endKey = null;
+ } else {
+ // Remember the last key with a matching prefix
+ endKey = keyIter.next().getKey();
+
+ // Loop until prefix mismatches.
+ // TODO: [SNAPSHOT] Try to seek next predicted bucket name (speed up?)
+ while (keyIter.hasNext()) {
+ Table.KeyValue<String, RepeatedOmKeyInfo> entry = keyIter.next();
+ String dbKey = entry.getKey();
+ if (dbKey.startsWith(beginKey)) {
+ endKey = dbKey;
+ }
+ }
+ }
+ }
+
+ // Time took for the iterator to finish (in ns)
+ long timeElapsed = System.nanoTime() - startTime;
+ if (timeElapsed >= DB_TABLE_ITER_LOOP_THRESHOLD_NS) {
+ // Print time elapsed
+ LOG.warn("Took {} ns to clean up deletedTable", timeElapsed);
+ }
+
+ if (endKey != null) {
+ // Clean up deletedTable
+ omMetadataManager.getDeletedTable().deleteRange(beginKey, endKey);
+
+ // Remove range end key itself
+ omMetadataManager.getDeletedTable().delete(endKey);
+ }
+
+ // Note: We do not need to invalidate deletedTable cache since entries
+ // are not added to its table cache in the first place.
+ // See OMKeyDeleteRequest and OMKeyPurgeRequest#validateAndUpdateCache.
+
+ // This makes the table clean up efficient as we only need one
+ // deleteRange() operation. No need to invalidate cache entries
+ // one by one.
+ }
+
// Get OmSnapshot if the keyname has ".snapshot" key indicator
public IOmMetadataReader checkForSnapshot(String volumeName,
String bucketName, String keyname)
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java
index 2ee67692ff..a337a791d8 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java
@@ -137,7 +137,7 @@ public class OMSnapshotCreateRequest extends
OMClientRequest {
omMetadataManager.getLock().acquireWriteLock(SNAPSHOT_LOCK,
volumeName, bucketName, snapshotName);
- //Check if snapshot already exists
+ // Check if snapshot already exists
if (omMetadataManager.getSnapshotInfoTable().isExist(key)) {
LOG.debug("Snapshot '{}' already exists under '{}'", key,
snapshotPath);
throw new OMException("Snapshot already exists", FILE_ALREADY_EXISTS);
@@ -178,7 +178,7 @@ public class OMSnapshotCreateRequest extends
OMClientRequest {
CreateSnapshotResponse.newBuilder()
.setSnapshotInfo(snapshotInfo.getProtobuf()));
omClientResponse = new OMSnapshotCreateResponse(
- omResponse.build(), volumeName, bucketName, snapshotName);
+ omResponse.build(), snapshotInfo);
} catch (IOException ex) {
exception = ex;
omClientResponse = new OMSnapshotCreateResponse(
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java
index b89d56dc04..dec84eaef3 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java
@@ -118,9 +118,13 @@ public class OMDirectoriesPurgeResponseWithFSO extends
OmKeyResponse {
.getOzoneKey(keyInfo.getVolumeName(), keyInfo.getBucketName(),
keyInfo.getKeyName());
+ // TODO: [SNAPSHOT] Acquire deletedTable write table lock
+
omMetadataManager.getDeletedTable().putWithBatch(batchOperation,
deletedKey, repeatedOmKeyInfo);
+ // TODO: [SNAPSHOT] Release deletedTable write table lock
+
}
// Delete the visited directory from deleted directory table
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotCreateResponse.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotCreateResponse.java
index 718b67ec89..f970870e6a 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotCreateResponse.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotCreateResponse.java
@@ -33,19 +33,23 @@ import javax.annotation.Nonnull;
import java.io.IOException;
import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
+import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.DELETED_TABLE;
import static
org.apache.hadoop.ozone.om.OmMetadataManagerImpl.RENAMED_KEY_TABLE;
import static
org.apache.hadoop.ozone.om.OmMetadataManagerImpl.SNAPSHOT_INFO_TABLE;
/**
* Response for OMSnapshotCreateRequest.
*/
-@CleanupTableInfo(cleanupTables = {SNAPSHOT_INFO_TABLE, RENAMED_KEY_TABLE})
+@CleanupTableInfo(cleanupTables = {
+ DELETED_TABLE, RENAMED_KEY_TABLE, SNAPSHOT_INFO_TABLE})
public class OMSnapshotCreateResponse extends OMClientResponse {
+ private SnapshotInfo snapshotInfo;
+
public OMSnapshotCreateResponse(@Nonnull OMResponse omResponse,
- @Nonnull String volumeName, @Nonnull String bucketName,
- @Nonnull String snapshotName) {
+ @Nonnull SnapshotInfo snapshotInfo) {
super(omResponse);
+ this.snapshotInfo = snapshotInfo;
}
/**
@@ -61,11 +65,8 @@ public class OMSnapshotCreateResponse extends
OMClientResponse {
public void addToDBBatch(OMMetadataManager omMetadataManager,
BatchOperation batchOperation) throws IOException {
- SnapshotInfo snapshotInfo =
- SnapshotInfo.getFromProtobuf(
- getOMResponse().getCreateSnapshotResponse().getSnapshotInfo());
-
// Create the snapshot checkpoint
+ // Also cleans up deletedTable
OmSnapshotManager.createOmSnapshotCheckpoint(omMetadataManager,
snapshotInfo);
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java
index b0c6b7a13a..897c1fa531 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java
@@ -163,6 +163,12 @@ public class KeyDeletingService extends BackgroundService {
runCount.incrementAndGet();
try {
long startTime = Time.monotonicNow();
+ // TODO: [SNAPSHOT] HDDS-7968. Reclaim eligible key blocks in
+ // snapshot's deletedTable when active DB's deletedTable
+ // doesn't have enough entries left.
+ // OM would have to keep track of which snapshot the key is coming
+ // from. And PurgeKeysRequest would have to be adjusted to be able
+ // to operate on snapshot checkpoints.
List<BlockGroup> keyBlocksList = manager
.getPendingDeletionKeys(keyLimitPerTask);
if (keyBlocksList != null && !keyBlocksList.isEmpty()) {
diff --git
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java
index e530f712cb..8065e78bff 100644
---
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java
+++
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java
@@ -228,11 +228,11 @@ public class TestOMSnapshotCreateRequest {
// return null.
Assert.assertNull(omMetadataManager.getSnapshotInfoTable().get(key));
- // add key to cache
+ // run validateAndUpdateCache. add key to cache
OMClientResponse omClientResponse =
omSnapshotCreateRequest.validateAndUpdateCache(ozoneManager, 1,
ozoneManagerDoubleBufferHelper);
-
+
// check cache
SnapshotInfo snapshotInfo =
omMetadataManager.getSnapshotInfoTable().get(key);
diff --git
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java
index 09d6849ee1..26a4265417 100644
---
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java
+++
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java
@@ -20,9 +20,15 @@
package org.apache.hadoop.ozone.om.response.snapshot;
import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
import java.util.UUID;
import org.apache.hadoop.hdds.utils.db.Table;
+import org.apache.hadoop.hdds.utils.db.TableIterator;
+import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo;
import org.apache.hadoop.ozone.om.helpers.SnapshotInfo;
import org.junit.After;
import org.junit.Assert;
@@ -45,7 +51,6 @@ import static org.apache.hadoop.ozone.OzoneConsts.OM_DB_NAME;
import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
import static org.apache.hadoop.ozone.OzoneConsts.OM_SNAPSHOT_DIR;
-
/**
* This class tests OMSnapshotCreateResponse.
*/
@@ -90,6 +95,10 @@ public class TestOMSnapshotCreateResponse {
omMetadataManager
.countRowsInTable(omMetadataManager.getSnapshotInfoTable()));
+ // Prepare for deletedTable clean up logic verification
+ Set<String> dtSentinelKeys = addTestKeysToDeletedTable(
+ volumeName, bucketName);
+
// commit to table
OMSnapshotCreateResponse omSnapshotCreateResponse =
new OMSnapshotCreateResponse(OMResponse.newBuilder()
@@ -98,7 +107,7 @@ public class TestOMSnapshotCreateResponse {
.setCreateSnapshotResponse(
CreateSnapshotResponse.newBuilder()
.setSnapshotInfo(snapshotInfo.getProtobuf())
- .build()).build(), volumeName, bucketName, snapshotName);
+ .build()).build(), snapshotInfo);
omSnapshotCreateResponse.addToDBBatch(omMetadataManager, batchOperation);
omMetadataManager.getStore().commitBatchOperation(batchOperation);
@@ -118,5 +127,71 @@ public class TestOMSnapshotCreateResponse {
SnapshotInfo storedInfo = keyValue.getValue();
Assert.assertEquals(snapshotInfo.getTableKey(), keyValue.getKey());
Assert.assertEquals(snapshotInfo, storedInfo);
+
+ // Check deletedTable clean up works as expected
+ verifyEntriesLeftInDeletedTable(dtSentinelKeys);
+ }
+
+ private Set<String> addTestKeysToDeletedTable(
+ String volumeName, String bucketName) throws IOException {
+
+ RepeatedOmKeyInfo dummyRepeatedKeyInfo = new RepeatedOmKeyInfo.Builder()
+ .setOmKeyInfos(new ArrayList<>()).build();
+
+ // Add deletedTable key entries that surround the snapshot scope
+ Set<String> dtSentinelKeys = new HashSet<>();
+ // Get a bucket name right before and after the bucketName
+ // e.g. When bucketName is buck2, bucketNameBefore is buck1,
+ // bucketNameAfter is buck3
+ // This will not guarantee the bucket name is valid for Ozone but
+ // this would be good enough for this unit test.
+ char bucketNameLastChar = bucketName.charAt(bucketName.length() - 1);
+ String bucketNameBefore = bucketName.substring(0, bucketName.length() - 1)
+
+ Character.toString((char)(bucketNameLastChar - 1));
+ for (int i = 0; i < 3; i++) {
+ String dtKey = omMetadataManager.getOzoneKey(volumeName,
bucketNameBefore,
+ "dtkey" + i);
+ omMetadataManager.getDeletedTable().put(dtKey, dummyRepeatedKeyInfo);
+ dtSentinelKeys.add(dtKey);
+ }
+ String bucketNameAfter = bucketName.substring(0, bucketName.length() - 1) +
+ Character.toString((char)(bucketNameLastChar + 1));
+ for (int i = 0; i < 3; i++) {
+ String dtKey = omMetadataManager.getOzoneKey(volumeName, bucketNameAfter,
+ "dtkey" + i);
+ omMetadataManager.getDeletedTable().put(dtKey, dummyRepeatedKeyInfo);
+ dtSentinelKeys.add(dtKey);
+ }
+ // Add deletedTable key entries in the snapshot (bucket) scope
+ for (int i = 0; i < 10; i++) {
+ String dtKey = omMetadataManager.getOzoneKey(volumeName, bucketName,
+ "dtkey" + i);
+ omMetadataManager.getDeletedTable().put(dtKey, dummyRepeatedKeyInfo);
+ }
+
+ return dtSentinelKeys;
+ }
+
+ private void verifyEntriesLeftInDeletedTable(Set<String> dtSentinelKeys)
+ throws IOException {
+
+ // Verify that only keys inside the snapshot scope are gone from
+ // deletedTable.
+ try (TableIterator<String,
+ ? extends Table.KeyValue<String, RepeatedOmKeyInfo>>
+ keyIter = omMetadataManager.getDeletedTable().iterator()) {
+ keyIter.seekToFirst();
+ while (keyIter.hasNext()) {
+ Table.KeyValue<String, RepeatedOmKeyInfo> entry = keyIter.next();
+ String dtKey = entry.getKey();
+ // deletedTable should not have bucketName keys
+ Assert.assertTrue("deletedTable should contain key",
+ dtSentinelKeys.contains(dtKey));
+ dtSentinelKeys.remove(dtKey);
+ }
+ }
+
+ Assert.assertTrue("deletedTable is missing keys that should be there",
+ dtSentinelKeys.isEmpty());
}
}
diff --git
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotDeleteResponse.java
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotDeleteResponse.java
index 67a2afeea7..8c861735d6 100644
---
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotDeleteResponse.java
+++
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotDeleteResponse.java
@@ -97,7 +97,7 @@ public class TestOMSnapshotDeleteResponse {
.setCreateSnapshotResponse(
CreateSnapshotResponse.newBuilder()
.setSnapshotInfo(snapshotInfo.getProtobuf())
- .build()).build(), volumeName, bucketName, snapshotName);
+ .build()).build(), snapshotInfo);
omSnapshotCreateResponse.addToDBBatch(omMetadataManager, batchOperation);
omMetadataManager.getStore().commitBatchOperation(batchOperation);
diff --git
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/recovery/ReconOmMetadataManagerImpl.java
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/recovery/ReconOmMetadataManagerImpl.java
index 4faced0e30..6bd1ab4751 100644
---
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/recovery/ReconOmMetadataManagerImpl.java
+++
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/recovery/ReconOmMetadataManagerImpl.java
@@ -23,6 +23,7 @@ import static
org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_OM
import java.io.File;
import java.io.IOException;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
import javax.inject.Inject;
import javax.inject.Singleton;
@@ -60,6 +61,11 @@ public class ReconOmMetadataManagerImpl extends
OmMetadataManagerImpl
this.ozoneConfiguration = configuration;
}
+ @Override
+ public ReentrantReadWriteLock getTableLock(String tableName) {
+ return super.getTableLock(tableName);
+ }
+
@Override
public void start(OzoneConfiguration configuration) throws IOException {
LOG.info("Starting ReconOMMetadataManagerImpl");
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]