This is an automated email from the ASF dual-hosted git repository.

siyao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new c7de8d5871 HDDS-7741. [Snapshot] Delete keys in snapshot scope from 
deleteTable during createSnapshot to accommodate snapshot garbage collection 
(#4280)
c7de8d5871 is described below

commit c7de8d5871209a86e79f8422c6334697b32f113a
Author: Siyao Meng <[email protected]>
AuthorDate: Thu Mar 16 00:11:59 2023 -0700

    HDDS-7741. [Snapshot] Delete keys in snapshot scope from deleteTable during 
createSnapshot to accommodate snapshot garbage collection (#4280)
---
 .../ozone/container/metadata/DatanodeTable.java    |   5 +
 .../metadata/SchemaOneDeletedBlocksTable.java      |   5 +
 .../org/apache/hadoop/hdds/utils/db/RDBTable.java  |   5 +
 .../apache/hadoop/hdds/utils/db/RocksDatabase.java |  16 ++++
 .../org/apache/hadoop/hdds/utils/db/Table.java     |   9 ++
 .../apache/hadoop/hdds/utils/db/TypedTable.java    |   5 +
 .../hadoop/hdds/utils/db/TestRDBTableStore.java    |  71 ++++++++++++++-
 .../apache/hadoop/ozone/om/OMMetadataManager.java  |   8 ++
 .../org/apache/hadoop/ozone/om/KeyManagerImpl.java |   2 +-
 .../hadoop/ozone/om/OmMetadataManagerImpl.java     |  29 ++++++
 .../apache/hadoop/ozone/om/OmSnapshotManager.java  | 101 ++++++++++++++++++++-
 .../request/snapshot/OMSnapshotCreateRequest.java  |   4 +-
 .../key/OMDirectoriesPurgeResponseWithFSO.java     |   4 +
 .../snapshot/OMSnapshotCreateResponse.java         |  15 +--
 .../ozone/om/service/KeyDeletingService.java       |   6 ++
 .../snapshot/TestOMSnapshotCreateRequest.java      |   4 +-
 .../snapshot/TestOMSnapshotCreateResponse.java     |  79 +++++++++++++++-
 .../snapshot/TestOMSnapshotDeleteResponse.java     |   2 +-
 .../recon/recovery/ReconOmMetadataManagerImpl.java |   6 ++
 19 files changed, 355 insertions(+), 21 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeTable.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeTable.java
index 1d35fab9d5..fbb5cd7d64 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeTable.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeTable.java
@@ -65,6 +65,11 @@ public class DatanodeTable<KEY, VALUE> implements Table<KEY, 
VALUE> {
     table.delete(key);
   }
 
+  @Override
+  public void deleteRange(KEY beginKey, KEY endKey) throws IOException {
+    table.deleteRange(beginKey, endKey);
+  }
+
   @Override
   public void deleteWithBatch(BatchOperation batch, KEY key)
           throws IOException {
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneDeletedBlocksTable.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneDeletedBlocksTable.java
index 42858c8b64..69b2eeb3bc 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneDeletedBlocksTable.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneDeletedBlocksTable.java
@@ -75,6 +75,11 @@ public class SchemaOneDeletedBlocksTable extends 
DatanodeTable<String,
     super.deleteWithBatch(batch, prefix(key));
   }
 
+  @Override
+  public void deleteRange(String beginKey, String endKey) throws IOException {
+    super.deleteRange(prefix(beginKey), prefix(endKey));
+  }
+
   @Override
   public boolean isExist(String key) throws IOException {
     return super.isExist(prefix(key));
diff --git 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBTable.java
 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBTable.java
index 09f58a9335..79d1037e8e 100644
--- 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBTable.java
+++ 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBTable.java
@@ -145,6 +145,11 @@ class RDBTable implements Table<byte[], byte[]> {
     db.delete(family, key);
   }
 
+  @Override
+  public void deleteRange(byte[] beginKey, byte[] endKey) throws IOException {
+    db.deleteRange(family, beginKey, endKey);
+  }
+
   @Override
   public void deleteWithBatch(BatchOperation batch, byte[] key)
       throws IOException {
diff --git 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java
 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java
index ff1bc6cfa8..37159c9d88 100644
--- 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java
+++ 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java
@@ -792,6 +792,22 @@ public final class RocksDatabase {
     }
   }
 
+  public void deleteRange(ColumnFamily family, byte[] beginKey, byte[] endKey)
+      throws IOException {
+    assertClose();
+    try {
+      counter.incrementAndGet();
+      db.get().deleteRange(family.getHandle(), beginKey, endKey);
+    } catch (RocksDBException e) {
+      closeOnError(e, true);
+      final String message = "delete range " + bytes2String(beginKey) +
+          " to " + bytes2String(endKey) + " from " + family;
+      throw toIOException(this, message, e);
+    } finally {
+      counter.decrementAndGet();
+    }
+  }
+
   @Override
   public String toString() {
     return name;
diff --git 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java
 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java
index 0e0e67be01..3d85b357b1 100644
--- 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java
+++ 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/Table.java
@@ -148,6 +148,15 @@ public interface Table<KEY, VALUE> extends AutoCloseable {
    */
   void deleteWithBatch(BatchOperation batch, KEY key) throws IOException;
 
+  /**
+   * Deletes a range of keys from the metadata store.
+   *
+   * @param beginKey start metadata key
+   * @param endKey end metadata key
+   * @throws IOException on Failure
+   */
+  void deleteRange(KEY beginKey, KEY endKey) throws IOException;
+
   /**
    * Returns the iterator for this metadata store.
    *
diff --git 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java
 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java
index ee15fa896d..147a22b2f5 100644
--- 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java
+++ 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java
@@ -272,7 +272,12 @@ public class TypedTable<KEY, VALUE> implements Table<KEY, 
VALUE> {
   public void deleteWithBatch(BatchOperation batch, KEY key)
       throws IOException {
     rawTable.deleteWithBatch(batch, codecRegistry.asRawData(key));
+  }
 
+  @Override
+  public void deleteRange(KEY beginKey, KEY endKey) throws IOException {
+    rawTable.deleteRange(codecRegistry.asRawData(beginKey),
+        codecRegistry.asRawData(endKey));
   }
 
   @Override
diff --git 
a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBTableStore.java
 
b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBTableStore.java
index dc65410646..c53c34fdb2 100644
--- 
a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBTableStore.java
+++ 
b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBTableStore.java
@@ -57,7 +57,7 @@ public class TestRDBTableStore {
           "First", "Second", "Third",
           "Fourth", "Fifth",
           "Sixth", "Seventh",
-          "Eighth");
+          "Eighth", "Ninth");
   private final List<String> prefixedFamilies = Arrays.asList(
       "PrefixFirst",
       "PrefixTwo", "PrefixThree",
@@ -165,7 +165,7 @@ public class TestRDBTableStore {
     }
 
     // Write all the keys and delete the keys scheduled for delete.
-    //Assert we find only expected keys in the Table.
+    // Assert we find only expected keys in the Table.
     try (Table testTable = rdbStore.getTable("Fourth")) {
       for (int x = 0; x < deletedKeys.size(); x++) {
         testTable.put(deletedKeys.get(x), value);
@@ -177,15 +177,78 @@ public class TestRDBTableStore {
       }
 
       for (int x = 0; x < validKeys.size(); x++) {
-        Assertions.assertNotNull(testTable.get(validKeys.get(0)));
+        Assertions.assertNotNull(testTable.get(validKeys.get(x)));
       }
 
       for (int x = 0; x < deletedKeys.size(); x++) {
-        Assertions.assertNull(testTable.get(deletedKeys.get(0)));
+        Assertions.assertNull(testTable.get(deletedKeys.get(x)));
       }
     }
   }
 
+  @Test
+  public void deleteRange() throws Exception {
+
+    // Prepare keys to be written to the test table
+    List<byte[]> keys = new ArrayList<>();
+    for (int x = 0; x < 100; x++) {
+      // Left pad DB keys with zeros
+      String k = String.format("%03d", x) + "-" + RandomStringUtils.random(6);
+      keys.add(k.getBytes(StandardCharsets.UTF_8));
+    }
+    // Some random value
+    byte[] val = RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8);
+
+    try (Table testTable = rdbStore.getTable("Ninth")) {
+
+      // Write keys to the table
+      for (int x = 0; x < keys.size(); x++) {
+        testTable.put(keys.get(x), val);
+      }
+
+      // All keys should exist at this point
+      for (int x = 0; x < keys.size(); x++) {
+        Assertions.assertNotNull(testTable.get(keys.get(x)));
+      }
+
+      // Delete a range of keys: [10th, 20th), zero-indexed
+      final int deleteRangeBegin = 10, deleteRangeEnd = 20;
+      byte[] dRangeBeginKey = keys.get(deleteRangeBegin);
+      byte[] dRangeEndKey = keys.get(deleteRangeEnd);
+
+      testTable.deleteRange(dRangeBeginKey, dRangeEndKey);
+
+      // Keys [10th, 20th) should be gone now
+      for (int x = deleteRangeBegin; x < deleteRangeEnd; x++) {
+        Assertions.assertNull(testTable.get(keys.get(x)));
+      }
+
+      // While the rest of the keys should be untouched
+      for (int x = 0; x < deleteRangeBegin; x++) {
+        Assertions.assertNotNull(testTable.get(keys.get(x)));
+      }
+      for (int x = deleteRangeEnd; x < 100; x++) {
+        Assertions.assertNotNull(testTable.get(keys.get(x)));
+      }
+
+      // Delete the rest of the keys
+      testTable.deleteRange(keys.get(0), keys.get(100 - 1));
+
+      // Confirm key deletion
+      for (int x = 0; x < 100 - 1; x++) {
+        Assertions.assertNull(testTable.get(keys.get(x)));
+      }
+      // The last key is still there because
+      // deleteRange() excludes the endKey by design
+      Assertions.assertNotNull(testTable.get(keys.get(100 - 1)));
+
+      // Delete the last key
+      testTable.delete(keys.get(100 - 1));
+      Assertions.assertNull(testTable.get(keys.get(100 - 1)));
+    }
+
+  }
+
   @Test
   public void batchPut() throws Exception {
     try (Table testTable = rdbStore.getTable("Fifth");
diff --git 
a/hadoop-ozone/interface-storage/src/main/java/org/apache/hadoop/ozone/om/OMMetadataManager.java
 
b/hadoop-ozone/interface-storage/src/main/java/org/apache/hadoop/ozone/om/OMMetadataManager.java
index 285fb0245b..7e0f3511ef 100644
--- 
a/hadoop-ozone/interface-storage/src/main/java/org/apache/hadoop/ozone/om/OMMetadataManager.java
+++ 
b/hadoop-ozone/interface-storage/src/main/java/org/apache/hadoop/ozone/om/OMMetadataManager.java
@@ -22,6 +22,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.utils.DBStoreHAManager;
@@ -282,6 +283,13 @@ public interface OMMetadataManager extends 
DBStoreHAManager {
   ExpiredOpenKeys getExpiredOpenKeys(Duration expireThreshold, int count,
       BucketLayout bucketLayout) throws IOException;
 
+  /**
+   * Retrieve RWLock for the table.
+   * @param tableName table name.
+   * @return ReentrantReadWriteLock
+   */
+  ReentrantReadWriteLock getTableLock(String tableName);
+
   /**
    * Returns the user Table.
    *
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
index a02cf37bc6..12f7b89d12 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
@@ -619,7 +619,7 @@ public class KeyManagerImpl implements KeyManager {
   @Override
   public List<BlockGroup> getPendingDeletionKeys(final int count)
       throws IOException {
-    return  metadataManager.getPendingDeletionKeys(count);
+    return metadataManager.getPendingDeletionKeys(count);
   }
 
   @Override
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
index ede9b5893a..03414980e8 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
@@ -30,6 +30,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.TreeSet;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
@@ -281,6 +282,16 @@ public class OmMetadataManagerImpl implements 
OMMetadataManager,
   private boolean ignorePipelineinKey;
   private Table deletedDirTable;
 
+  // Table-level locks that protects table read/write access. Note:
+  // Don't use this lock for tables other than deletedTable and 
deletedDirTable.
+  // This is a stopgap solution. Will remove when HDDS-5905 (HDDS-6483) is 
done.
+  private Map<String, ReentrantReadWriteLock> tableLockMap = new HashMap<>();
+
+  @Override
+  public ReentrantReadWriteLock getTableLock(String tableName) {
+    return tableLockMap.get(tableName);
+  }
+
   // Epoch is used to generate the objectIDs. The most significant 2 bits of
   // objectIDs is set to this epoch. For clusters before HDDS-4315 there is
   // no epoch as such. But it can be safely assumed that the most significant
@@ -561,6 +572,8 @@ public class OmMetadataManagerImpl implements 
OMMetadataManager,
     deletedTable = this.store.getTable(DELETED_TABLE, String.class,
         RepeatedOmKeyInfo.class);
     checkTableStatus(deletedTable, DELETED_TABLE, addCacheMetrics);
+    // Currently, deletedTable is the only table that will need the table lock
+    tableLockMap.put(DELETED_TABLE, new ReentrantReadWriteLock(true));
 
     openKeyTable =
         this.store.getTable(OPEN_KEY_TABLE, String.class,
@@ -1385,8 +1398,24 @@ public class OmMetadataManagerImpl implements 
OMMetadataManager,
       while (keyIter.hasNext() && currentCount < keyCount) {
         KeyValue<String, RepeatedOmKeyInfo> kv = keyIter.next();
         if (kv != null) {
+          // Multiple keys with the same path can be queued in one DB entry
           RepeatedOmKeyInfo infoList = kv.getValue();
           for (OmKeyInfo info : infoList.cloneOmKeyInfoList()) {
+            // Skip the key if it exists in the previous snapshot (of the same
+            // scope) as in this case its blocks should not be reclaimed
+
+            // TODO: [SNAPSHOT] HDDS-7968
+            //  1. If previous snapshot keyTable has key info.getObjectID(),
+            //  skip it. Pending HDDS-7740 merge to reuse the util methods to
+            //  check previousSnapshot.
+            //  2. For efficient lookup, the addition in design doc 4.b)1.b
+            //  is critical.
+            //  3. With snapshot it is possible that only some of the keys in
+            //  the DB key's RepeatedOmKeyInfo list can be reclaimed,
+            //  make sure to update deletedTable accordingly in this case.
+            //  4. Further optimization: Skip all snapshotted keys altogether
+            //  e.g. by prefixing all unreclaimable keys, then calling seek
+
             // Add all blocks from all versions of the key to the deletion list
             for (OmKeyLocationInfoGroup keyLocations :
                 info.getKeyLocationVersions()) {
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java
index 2c5a6c9b61..ffad3677da 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java
@@ -31,12 +31,15 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.server.ServerUtils;
 import org.apache.hadoop.hdds.utils.db.DBCheckpoint;
 import org.apache.hadoop.hdds.utils.db.RDBStore;
+import org.apache.hadoop.hdds.utils.db.Table;
+import org.apache.hadoop.hdds.utils.db.TableIterator;
 import org.apache.hadoop.hdds.utils.db.cache.CacheValue;
 import org.apache.hadoop.hdds.utils.db.managed.ManagedOptions;
 import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB;
 import org.apache.hadoop.hdds.utils.db.cache.CacheKey;
 import org.apache.hadoop.ozone.OzoneConfigKeys;
 import org.apache.hadoop.ozone.om.exceptions.OMException;
+import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo;
 import org.apache.hadoop.ozone.om.helpers.SnapshotInfo;
 import org.apache.hadoop.ozone.om.helpers.SnapshotInfo.SnapshotStatus;
 import org.apache.hadoop.ozone.om.snapshot.SnapshotDiffManager;
@@ -64,13 +67,18 @@ public final class OmSnapshotManager implements 
AutoCloseable {
   private static final Logger LOG =
       LoggerFactory.getLogger(OmSnapshotManager.class);
 
+  // Threshold for the table iterator loop in nanoseconds.
+  private static final long DB_TABLE_ITER_LOOP_THRESHOLD_NS = 100000;
+
   private final OzoneManager ozoneManager;
+  private final OMMetadataManager omMetadataManager;
   private final SnapshotDiffManager snapshotDiffManager;
   private final LoadingCache<String, OmSnapshot> snapshotCache;
   private final ManagedRocksDB snapshotDiffDb;
 
   OmSnapshotManager(OzoneManager ozoneManager) {
     this.ozoneManager = ozoneManager;
+    this.omMetadataManager = ozoneManager.getMetadataManager();
 
     // Pass in the differ
     final RocksDBCheckpointDiffer differ = ozoneManager
@@ -164,8 +172,24 @@ public final class OmSnapshotManager implements 
AutoCloseable {
       throws IOException {
     RDBStore store = (RDBStore) omMetadataManager.getStore();
 
-    final DBCheckpoint dbCheckpoint = store.getSnapshot(
-        snapshotInfo.getCheckpointDirName());
+    final DBCheckpoint dbCheckpoint;
+
+    // Acquire deletedTable write lock
+    omMetadataManager.getTableLock(OmMetadataManagerImpl.DELETED_TABLE)
+        .writeLock().lock();
+    try {
+      // Create DB checkpoint for snapshot
+      dbCheckpoint = store.getSnapshot(snapshotInfo.getCheckpointDirName());
+      // Clean up active DB's deletedTable right after checkpoint is taken,
+      // with table write lock held
+      deleteKeysInSnapshotScopeFromDTableInternal(omMetadataManager,
+          snapshotInfo.getVolumeName(), snapshotInfo.getBucketName());
+      // TODO: [SNAPSHOT] HDDS-8064. Clean up deletedDirTable as well
+    } finally {
+      // Release deletedTable write lock
+      omMetadataManager.getTableLock(OmMetadataManagerImpl.DELETED_TABLE)
+          .writeLock().unlock();
+    }
 
     LOG.info("Created checkpoint : {} for snapshot {}",
         dbCheckpoint.getCheckpointLocation(), snapshotInfo.getName());
@@ -193,6 +217,79 @@ public final class OmSnapshotManager implements 
AutoCloseable {
     return dbCheckpoint;
   }
 
+  /**
+   * Helper method to delete keys in the snapshot scope from active DB's
+   * deletedTable.
+   *
+   * @param omMetadataManager OMMetadataManager instance
+   * @param volumeName volume name
+   * @param bucketName bucket name
+   */
+  private static void deleteKeysInSnapshotScopeFromDTableInternal(
+      OMMetadataManager omMetadataManager,
+      String volumeName,
+      String bucketName) throws IOException {
+
+    // Range delete start key (inclusive)
+    String beginKey =
+        omMetadataManager.getOzoneKey(volumeName, bucketName, "");
+
+    // Range delete end key (exclusive) to be found
+    String endKey;
+
+    // Start performance tracking timer
+    long startTime = System.nanoTime();
+
+    try (TableIterator<String,
+        ? extends Table.KeyValue<String, RepeatedOmKeyInfo>>
+        keyIter = omMetadataManager.getDeletedTable().iterator()) {
+
+      keyIter.seek(beginKey);
+      // Continue only when there are entries of snapshot (bucket) scope
+      // in deletedTable in the first place
+      if (!keyIter.hasNext()) {
+        // Use null as a marker. No need to do deleteRange() at all.
+        endKey = null;
+      } else {
+        // Remember the last key with a matching prefix
+        endKey = keyIter.next().getKey();
+
+        // Loop until prefix mismatches.
+        // TODO: [SNAPSHOT] Try to seek next predicted bucket name (speed up?)
+        while (keyIter.hasNext()) {
+          Table.KeyValue<String, RepeatedOmKeyInfo> entry = keyIter.next();
+          String dbKey = entry.getKey();
+          if (dbKey.startsWith(beginKey)) {
+            endKey = dbKey;
+          }
+        }
+      }
+    }
+
+    // Time took for the iterator to finish (in ns)
+    long timeElapsed = System.nanoTime() - startTime;
+    if (timeElapsed >= DB_TABLE_ITER_LOOP_THRESHOLD_NS) {
+      // Print time elapsed
+      LOG.warn("Took {} ns to clean up deletedTable", timeElapsed);
+    }
+
+    if (endKey != null) {
+      // Clean up deletedTable
+      omMetadataManager.getDeletedTable().deleteRange(beginKey, endKey);
+
+      // Remove range end key itself
+      omMetadataManager.getDeletedTable().delete(endKey);
+    }
+
+    // Note: We do not need to invalidate deletedTable cache since entries
+    // are not added to its table cache in the first place.
+    // See OMKeyDeleteRequest and OMKeyPurgeRequest#validateAndUpdateCache.
+
+    // This makes the table clean up efficient as we only need one
+    // deleteRange() operation. No need to invalidate cache entries
+    // one by one.
+  }
+
   // Get OmSnapshot if the keyname has ".snapshot" key indicator
   public IOmMetadataReader checkForSnapshot(String volumeName,
                                      String bucketName, String keyname)
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java
index 2ee67692ff..a337a791d8 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java
@@ -137,7 +137,7 @@ public class OMSnapshotCreateRequest extends 
OMClientRequest {
           omMetadataManager.getLock().acquireWriteLock(SNAPSHOT_LOCK,
               volumeName, bucketName, snapshotName);
 
-      //Check if snapshot already exists
+      // Check if snapshot already exists
       if (omMetadataManager.getSnapshotInfoTable().isExist(key)) {
         LOG.debug("Snapshot '{}' already exists under '{}'", key, 
snapshotPath);
         throw new OMException("Snapshot already exists", FILE_ALREADY_EXISTS);
@@ -178,7 +178,7 @@ public class OMSnapshotCreateRequest extends 
OMClientRequest {
           CreateSnapshotResponse.newBuilder()
           .setSnapshotInfo(snapshotInfo.getProtobuf()));
       omClientResponse = new OMSnapshotCreateResponse(
-          omResponse.build(), volumeName, bucketName, snapshotName);
+          omResponse.build(), snapshotInfo);
     } catch (IOException ex) {
       exception = ex;
       omClientResponse = new OMSnapshotCreateResponse(
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java
index b89d56dc04..dec84eaef3 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java
@@ -118,9 +118,13 @@ public class OMDirectoriesPurgeResponseWithFSO extends 
OmKeyResponse {
                 .getOzoneKey(keyInfo.getVolumeName(), keyInfo.getBucketName(),
                         keyInfo.getKeyName());
 
+        // TODO: [SNAPSHOT] Acquire deletedTable write table lock
+
         omMetadataManager.getDeletedTable().putWithBatch(batchOperation,
                 deletedKey, repeatedOmKeyInfo);
 
+        // TODO: [SNAPSHOT] Release deletedTable write table lock
+
       }
 
       // Delete the visited directory from deleted directory table
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotCreateResponse.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotCreateResponse.java
index 718b67ec89..f970870e6a 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotCreateResponse.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/snapshot/OMSnapshotCreateResponse.java
@@ -33,19 +33,23 @@ import javax.annotation.Nonnull;
 import java.io.IOException;
 
 import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
+import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.DELETED_TABLE;
 import static 
org.apache.hadoop.ozone.om.OmMetadataManagerImpl.RENAMED_KEY_TABLE;
 import static 
org.apache.hadoop.ozone.om.OmMetadataManagerImpl.SNAPSHOT_INFO_TABLE;
 
 /**
  * Response for OMSnapshotCreateRequest.
  */
-@CleanupTableInfo(cleanupTables = {SNAPSHOT_INFO_TABLE, RENAMED_KEY_TABLE})
+@CleanupTableInfo(cleanupTables = {
+    DELETED_TABLE, RENAMED_KEY_TABLE, SNAPSHOT_INFO_TABLE})
 public class OMSnapshotCreateResponse extends OMClientResponse {
 
+  private SnapshotInfo snapshotInfo;
+
   public OMSnapshotCreateResponse(@Nonnull OMResponse omResponse,
-      @Nonnull String volumeName, @Nonnull String bucketName,
-                                  @Nonnull String snapshotName) {
+      @Nonnull SnapshotInfo snapshotInfo) {
     super(omResponse);
+    this.snapshotInfo = snapshotInfo;
   }
 
   /**
@@ -61,11 +65,8 @@ public class OMSnapshotCreateResponse extends 
OMClientResponse {
   public void addToDBBatch(OMMetadataManager omMetadataManager,
       BatchOperation batchOperation) throws IOException {
 
-    SnapshotInfo snapshotInfo =
-        SnapshotInfo.getFromProtobuf(
-        getOMResponse().getCreateSnapshotResponse().getSnapshotInfo());
-
     // Create the snapshot checkpoint
+    // Also cleans up deletedTable
     OmSnapshotManager.createOmSnapshotCheckpoint(omMetadataManager,
         snapshotInfo);
 
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java
index b0c6b7a13a..897c1fa531 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java
@@ -163,6 +163,12 @@ public class KeyDeletingService extends BackgroundService {
         runCount.incrementAndGet();
         try {
           long startTime = Time.monotonicNow();
+          // TODO: [SNAPSHOT] HDDS-7968. Reclaim eligible key blocks in
+          //  snapshot's deletedTable when active DB's deletedTable
+          //  doesn't have enough entries left.
+          //  OM would have to keep track of which snapshot the key is coming
+          //  from. And PurgeKeysRequest would have to be adjusted to be able
+          //  to operate on snapshot checkpoints.
           List<BlockGroup> keyBlocksList = manager
               .getPendingDeletionKeys(keyLimitPerTask);
           if (keyBlocksList != null && !keyBlocksList.isEmpty()) {
diff --git 
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java
 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java
index e530f712cb..8065e78bff 100644
--- 
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java
+++ 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java
@@ -228,11 +228,11 @@ public class TestOMSnapshotCreateRequest {
     // return null.
     Assert.assertNull(omMetadataManager.getSnapshotInfoTable().get(key));
 
-    // add key to cache
+    // run validateAndUpdateCache. add key to cache
     OMClientResponse omClientResponse =
         omSnapshotCreateRequest.validateAndUpdateCache(ozoneManager, 1,
             ozoneManagerDoubleBufferHelper);
-    
+
     // check cache
     SnapshotInfo snapshotInfo =
         omMetadataManager.getSnapshotInfoTable().get(key);
diff --git 
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java
 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java
index 09d6849ee1..26a4265417 100644
--- 
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java
+++ 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotCreateResponse.java
@@ -20,9 +20,15 @@
 package org.apache.hadoop.ozone.om.response.snapshot;
 
 import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
 import java.util.UUID;
 
 import org.apache.hadoop.hdds.utils.db.Table;
+import org.apache.hadoop.hdds.utils.db.TableIterator;
+import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo;
 import org.apache.hadoop.ozone.om.helpers.SnapshotInfo;
 import org.junit.After;
 import org.junit.Assert;
@@ -45,7 +51,6 @@ import static org.apache.hadoop.ozone.OzoneConsts.OM_DB_NAME;
 import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
 import static org.apache.hadoop.ozone.OzoneConsts.OM_SNAPSHOT_DIR;
 
-
 /**
  * This class tests OMSnapshotCreateResponse.
  */
@@ -90,6 +95,10 @@ public class TestOMSnapshotCreateResponse {
         omMetadataManager
         .countRowsInTable(omMetadataManager.getSnapshotInfoTable()));
 
+    // Prepare for deletedTable clean up logic verification
+    Set<String> dtSentinelKeys = addTestKeysToDeletedTable(
+        volumeName, bucketName);
+
     // commit to table
     OMSnapshotCreateResponse omSnapshotCreateResponse =
         new OMSnapshotCreateResponse(OMResponse.newBuilder()
@@ -98,7 +107,7 @@ public class TestOMSnapshotCreateResponse {
             .setCreateSnapshotResponse(
                 CreateSnapshotResponse.newBuilder()
                 .setSnapshotInfo(snapshotInfo.getProtobuf())
-                .build()).build(), volumeName, bucketName, snapshotName);
+                .build()).build(), snapshotInfo);
     omSnapshotCreateResponse.addToDBBatch(omMetadataManager, batchOperation);
     omMetadataManager.getStore().commitBatchOperation(batchOperation);
 
@@ -118,5 +127,71 @@ public class TestOMSnapshotCreateResponse {
     SnapshotInfo storedInfo = keyValue.getValue();
     Assert.assertEquals(snapshotInfo.getTableKey(), keyValue.getKey());
     Assert.assertEquals(snapshotInfo, storedInfo);
+
+    // Check deletedTable clean up works as expected
+    verifyEntriesLeftInDeletedTable(dtSentinelKeys);
+  }
+
+  private Set<String> addTestKeysToDeletedTable(
+      String volumeName, String bucketName) throws IOException {
+
+    RepeatedOmKeyInfo dummyRepeatedKeyInfo = new RepeatedOmKeyInfo.Builder()
+        .setOmKeyInfos(new ArrayList<>()).build();
+
+    // Add deletedTable key entries that surround the snapshot scope
+    Set<String> dtSentinelKeys = new HashSet<>();
+    // Get a bucket name right before and after the bucketName
+    // e.g. When bucketName is buck2, bucketNameBefore is buck1,
+    // bucketNameAfter is buck3
+    // This will not guarantee the bucket name is valid for Ozone but
+    // this would be good enough for this unit test.
+    char bucketNameLastChar = bucketName.charAt(bucketName.length() - 1);
+    String bucketNameBefore = bucketName.substring(0, bucketName.length() - 1) 
+
+        Character.toString((char)(bucketNameLastChar - 1));
+    for (int i = 0; i < 3; i++) {
+      String dtKey = omMetadataManager.getOzoneKey(volumeName, 
bucketNameBefore,
+          "dtkey" + i);
+      omMetadataManager.getDeletedTable().put(dtKey, dummyRepeatedKeyInfo);
+      dtSentinelKeys.add(dtKey);
+    }
+    String bucketNameAfter = bucketName.substring(0, bucketName.length() - 1) +
+        Character.toString((char)(bucketNameLastChar + 1));
+    for (int i = 0; i < 3; i++) {
+      String dtKey = omMetadataManager.getOzoneKey(volumeName, bucketNameAfter,
+          "dtkey" + i);
+      omMetadataManager.getDeletedTable().put(dtKey, dummyRepeatedKeyInfo);
+      dtSentinelKeys.add(dtKey);
+    }
+    // Add deletedTable key entries in the snapshot (bucket) scope
+    for (int i = 0; i < 10; i++) {
+      String dtKey = omMetadataManager.getOzoneKey(volumeName, bucketName,
+          "dtkey" + i);
+      omMetadataManager.getDeletedTable().put(dtKey, dummyRepeatedKeyInfo);
+    }
+
+    return dtSentinelKeys;
+  }
+
+  private void verifyEntriesLeftInDeletedTable(Set<String> dtSentinelKeys)
+      throws IOException {
+
+    // Verify that only keys inside the snapshot scope are gone from
+    // deletedTable.
+    try (TableIterator<String,
+        ? extends Table.KeyValue<String, RepeatedOmKeyInfo>>
+        keyIter = omMetadataManager.getDeletedTable().iterator()) {
+      keyIter.seekToFirst();
+      while (keyIter.hasNext()) {
+        Table.KeyValue<String, RepeatedOmKeyInfo> entry = keyIter.next();
+        String dtKey = entry.getKey();
+        // deletedTable should not have bucketName keys
+        Assert.assertTrue("deletedTable should contain key",
+            dtSentinelKeys.contains(dtKey));
+        dtSentinelKeys.remove(dtKey);
+      }
+    }
+
+    Assert.assertTrue("deletedTable is missing keys that should be there",
+        dtSentinelKeys.isEmpty());
   }
 }
diff --git 
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotDeleteResponse.java
 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotDeleteResponse.java
index 67a2afeea7..8c861735d6 100644
--- 
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotDeleteResponse.java
+++ 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/snapshot/TestOMSnapshotDeleteResponse.java
@@ -97,7 +97,7 @@ public class TestOMSnapshotDeleteResponse {
             .setCreateSnapshotResponse(
                 CreateSnapshotResponse.newBuilder()
                 .setSnapshotInfo(snapshotInfo.getProtobuf())
-                .build()).build(), volumeName, bucketName, snapshotName);
+                .build()).build(), snapshotInfo);
     omSnapshotCreateResponse.addToDBBatch(omMetadataManager, batchOperation);
     omMetadataManager.getStore().commitBatchOperation(batchOperation);
 
diff --git 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/recovery/ReconOmMetadataManagerImpl.java
 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/recovery/ReconOmMetadataManagerImpl.java
index 4faced0e30..6bd1ab4751 100644
--- 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/recovery/ReconOmMetadataManagerImpl.java
+++ 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/recovery/ReconOmMetadataManagerImpl.java
@@ -23,6 +23,7 @@ import static 
org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_OM
 
 import java.io.File;
 import java.io.IOException;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import javax.inject.Inject;
 import javax.inject.Singleton;
@@ -60,6 +61,11 @@ public class ReconOmMetadataManagerImpl extends 
OmMetadataManagerImpl
     this.ozoneConfiguration = configuration;
   }
 
+  @Override
+  public ReentrantReadWriteLock getTableLock(String tableName) {
+    return super.getTableLock(tableName);
+  }
+
   @Override
   public void start(OzoneConfiguration configuration) throws IOException {
     LOG.info("Starting ReconOMMetadataManagerImpl");


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to