This is an automated email from the ASF dual-hosted git repository.

zanderxu pushed a commit to branch HDFS-17384
in repository https://gitbox.apache.org/repos/asf/hadoop.git

commit 24139acc8652d5c2f8053aa18bd0e77a6c54c336
Author: ZanderXu <zande...@apache.org>
AuthorDate: Tue Mar 26 10:52:45 2024 +0800

    HDFS-17423. [FGL] BlockManagerSafeMode supports fine-grained lock (#6645)
---
 .../hdfs/server/blockmanagement/BlockManager.java  |  2 +-
 .../blockmanagement/BlockManagerSafeMode.java      | 33 ++++++++++++----------
 .../hadoop/hdfs/server/namenode/FSNamesystem.java  | 32 +++++++++++++++------
 .../blockmanagement/TestBlockManagerSafeMode.java  |  3 ++
 .../blockmanagement/TestReplicationPolicy.java     |  1 +
 5 files changed, 46 insertions(+), 25 deletions(-)

diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 433b1266bf7b..82b82433e70e 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -3947,7 +3947,7 @@ public class BlockManager implements BlockStatsMXBean {
    * extra or low redundancy. Place it into the respective queue.
    */
   public void processMisReplicatedBlocks() {
-    assert namesystem.hasWriteLock();
+    assert namesystem.hasWriteLock(FSNamesystemLockMode.BM);
     stopReconstructionInitializer();
     neededReconstruction.clear();
     reconstructionQueuesInitializer = new Daemon() {
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java
index 4349ba01401d..49cd409211e7 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java
@@ -26,6 +26,7 @@ import 
org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeSt
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.Namesystem;
+import org.apache.hadoop.hdfs.server.namenode.fgl.FSNamesystemLockMode;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
 import 
org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
@@ -169,7 +170,7 @@ class BlockManagerSafeMode {
    * @param total initial total blocks
    */
   void activate(long total) {
-    assert namesystem.hasWriteLock();
+    assert namesystem.hasWriteLock(FSNamesystemLockMode.BM);
     assert status == BMSafeModeStatus.OFF;
 
     startTime = monotonicNow();
@@ -203,7 +204,7 @@ class BlockManagerSafeMode {
    * If safe mode is not currently on, this is a no-op.
    */
   void checkSafeMode() {
-    assert namesystem.hasWriteLock();
+    assert namesystem.hasWriteLock(FSNamesystemLockMode.BM);
     if (namesystem.inTransitionToActive()) {
       return;
     }
@@ -219,6 +220,7 @@ class BlockManagerSafeMode {
           initializeReplQueuesIfNecessary();
           reportStatus("STATE* Safe mode extension entered.", true);
         } else {
+          // TODO: let the smmthread to leave the safemode.
           // PENDING_THRESHOLD -> OFF
           leaveSafeMode(false);
         }
@@ -244,7 +246,7 @@ class BlockManagerSafeMode {
    * @param deltaTotal the change in number of total blocks expected
    */
   void adjustBlockTotals(int deltaSafe, int deltaTotal) {
-    assert namesystem.hasWriteLock();
+    assert namesystem.hasWriteLock(FSNamesystemLockMode.BM);
     if (!isSafeModeTrackingBlocks()) {
       return;
     }
@@ -278,7 +280,7 @@ class BlockManagerSafeMode {
    * set after the image has been loaded.
    */
   boolean isSafeModeTrackingBlocks() {
-    assert namesystem.hasWriteLock();
+    assert namesystem.hasWriteLock(FSNamesystemLockMode.BM);
     return haEnabled && status != BMSafeModeStatus.OFF;
   }
 
@@ -286,7 +288,7 @@ class BlockManagerSafeMode {
    * Set total number of blocks.
    */
   void setBlockTotal(long total) {
-    assert namesystem.hasWriteLock();
+    assert namesystem.hasWriteLock(FSNamesystemLockMode.BM);
     synchronized (this) {
       this.blockTotal = total;
       this.blockThreshold = (long) (total * threshold);
@@ -372,7 +374,7 @@ class BlockManagerSafeMode {
    * @return true if it leaves safe mode successfully else false
    */
   boolean leaveSafeMode(boolean force) {
-    assert namesystem.hasWriteLock() : "Leaving safe mode needs write lock!";
+    assert namesystem.hasWriteLock(FSNamesystemLockMode.BM) : "Leaving safe 
mode needs write lock!";
 
     final long bytesInFuture = getBytesInFuture();
     if (bytesInFuture > 0) {
@@ -443,7 +445,7 @@ class BlockManagerSafeMode {
    */
   synchronized void incrementSafeBlockCount(int storageNum,
       BlockInfo storedBlock) {
-    assert namesystem.hasWriteLock();
+    assert namesystem.hasWriteLock(FSNamesystemLockMode.BM);
     if (status == BMSafeModeStatus.OFF) {
       return;
     }
@@ -475,7 +477,7 @@ class BlockManagerSafeMode {
    * If safe mode is not currently on, this is a no-op.
    */
   synchronized void decrementSafeBlockCount(BlockInfo b) {
-    assert namesystem.hasWriteLock();
+    assert namesystem.hasWriteLock(FSNamesystemLockMode.BM);
     if (status == BMSafeModeStatus.OFF) {
       return;
     }
@@ -498,7 +500,7 @@ class BlockManagerSafeMode {
    * @param brr block report replica which belongs to no file in BlockManager
    */
   void checkBlocksWithFutureGS(BlockReportReplica brr) {
-    assert namesystem.hasWriteLock();
+    assert namesystem.hasWriteLock(FSNamesystemLockMode.BM);
     if (status == BMSafeModeStatus.OFF) {
       return;
     }
@@ -532,7 +534,8 @@ class BlockManagerSafeMode {
   }
 
   void close() {
-    assert namesystem.hasWriteLock() : "Closing bmSafeMode needs write lock!";
+    assert namesystem.hasWriteLock(FSNamesystemLockMode.GLOBAL)
+        : "Closing bmSafeMode needs write lock!";
     try {
       smmthread.interrupt();
       smmthread.join(3000);
@@ -566,7 +569,7 @@ class BlockManagerSafeMode {
 
   /** Check if we are ready to initialize replication queues. */
   private void initializeReplQueuesIfNecessary() {
-    assert namesystem.hasWriteLock();
+    assert namesystem.hasWriteLock(FSNamesystemLockMode.BM);
     // Whether it has reached the threshold for initializing replication 
queues.
     boolean canInitializeReplQueues = blockManager.shouldPopulateReplQueues() 
&&
         blockSafe >= blockReplQueueThreshold;
@@ -581,7 +584,7 @@ class BlockManagerSafeMode {
    * @return true if both block and datanode threshold are met else false.
    */
   private boolean areThresholdsMet() {
-    assert namesystem.hasWriteLock();
+    assert namesystem.hasWriteLock(FSNamesystemLockMode.BM);
     // Calculating the number of live datanodes is time-consuming
     // in large clusters. Skip it when datanodeThreshold is zero.
     // We need to evaluate getNumLiveDataNodes only when
@@ -626,7 +629,7 @@ class BlockManagerSafeMode {
    * Print status every 20 seconds.
    */
   private void reportStatus(String msg, boolean rightNow) {
-    assert namesystem.hasWriteLock();
+    assert namesystem.hasWriteLock(FSNamesystemLockMode.BM);
     long curTime = monotonicNow();
     if(!rightNow && (curTime - lastStatusReport < 20 * 1000)) {
       return;
@@ -660,7 +663,7 @@ class BlockManagerSafeMode {
     public void run() {
       while (namesystem.isRunning()) {
         try {
-          namesystem.writeLock();
+          namesystem.writeLock(FSNamesystemLockMode.GLOBAL);
           if (status == BMSafeModeStatus.OFF) { // Not in safe mode.
             break;
           }
@@ -670,7 +673,7 @@ class BlockManagerSafeMode {
             break;
           }
         } finally {
-          namesystem.writeUnlock("leaveSafeMode");
+          namesystem.writeUnlock(FSNamesystemLockMode.GLOBAL, "leaveSafeMode");
         }
 
         try {
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 62db0411e16b..f23db6e0f3a8 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -1334,7 +1334,7 @@ public class FSNamesystem implements Namesystem, 
FSNamesystemMBean,
   
   @Override
   public void startSecretManagerIfNecessary() {
-    assert hasWriteLock() : "Starting secret manager needs write lock";
+    assert hasWriteLock(FSNamesystemLockMode.BM) : "Starting secret manager 
needs write lock";
     boolean shouldRun = shouldUseDelegationTokens() &&
       !isInSafeMode() && getEditLog().isOpenForWrite();
     boolean running = dtSecretManager.isRunning();
@@ -1354,7 +1354,7 @@ public class FSNamesystem implements Namesystem, 
FSNamesystemMBean,
    */
   void startCommonServices(Configuration conf, HAContext haContext) throws 
IOException {
     this.registerMBean(); // register the MBean for the FSNamesystemState
-    writeLock();
+    writeLock(FSNamesystemLockMode.GLOBAL);
     this.haContext = haContext;
     try {
       nnResourceChecker = new NameNodeResourceChecker(conf);
@@ -1367,7 +1367,7 @@ public class FSNamesystem implements Namesystem, 
FSNamesystemMBean,
           completeBlocksTotal);
       blockManager.activate(conf, completeBlocksTotal);
     } finally {
-      writeUnlock("startCommonServices");
+      writeUnlock(FSNamesystemLockMode.GLOBAL, "startCommonServices");
     }
     
     registerMXBean();
@@ -1406,7 +1406,7 @@ public class FSNamesystem implements Namesystem, 
FSNamesystemMBean,
   void startActiveServices() throws IOException {
     startingActiveService = true;
     LOG.info("Starting services required for active state");
-    writeLock();
+    writeLock(FSNamesystemLockMode.GLOBAL);
     try {
       FSEditLog editLog = getFSImage().getEditLog();
       
@@ -1499,7 +1499,7 @@ public class FSNamesystem implements Namesystem, 
FSNamesystemMBean,
     } finally {
       startingActiveService = false;
       blockManager.checkSafeMode();
-      writeUnlock("startActiveServices");
+      writeUnlock(FSNamesystemLockMode.GLOBAL, "startActiveServices");
     }
   }
 
@@ -5231,7 +5231,7 @@ public class FSNamesystem implements Namesystem, 
FSNamesystemMBean,
    * @throws IOException
    */
   void enterSafeMode(boolean resourcesLow) throws IOException {
-    writeLock();
+    writeLock(FSNamesystemLockMode.GLOBAL);
     try {
       // Stop the secret manager, since rolling the master key would
       // try to write to the edit log
@@ -5250,7 +5250,8 @@ public class FSNamesystem implements Namesystem, 
FSNamesystemMBean,
       NameNode.stateChangeLog.info("STATE* Safe mode is ON.\n" +
           getSafeModeTip());
     } finally {
-      writeUnlock("enterSafeMode", getLockReportInfoSupplier(null));
+      writeUnlock(FSNamesystemLockMode.GLOBAL,
+          "enterSafeMode", getLockReportInfoSupplier(null));
     }
   }
 
@@ -5259,7 +5260,7 @@ public class FSNamesystem implements Namesystem, 
FSNamesystemMBean,
    * @param force true if to leave safe mode forcefully with -forceExit option
    */
   void leaveSafeMode(boolean force) {
-    writeLock();
+    writeLock(FSNamesystemLockMode.GLOBAL);
     try {
       if (!isInSafeMode()) {
         NameNode.stateChangeLog.info("STATE* Safe mode is already OFF"); 
@@ -5270,7 +5271,8 @@ public class FSNamesystem implements Namesystem, 
FSNamesystemMBean,
         startSecretManagerIfNecessary();
       }
     } finally {
-      writeUnlock("leaveSafeMode", getLockReportInfoSupplier(null));
+      writeUnlock(FSNamesystemLockMode.GLOBAL,
+          "leaveSafeMode", getLockReportInfoSupplier(null));
     }
   }
 
@@ -8781,8 +8783,15 @@ public class FSNamesystem implements Namesystem, 
FSNamesystemMBean,
    */
   @Override
   public synchronized void checkAndProvisionSnapshotTrashRoots() {
+    assert hasWriteLock(FSNamesystemLockMode.BM);
     if (isSnapshotTrashRootEnabled && (haEnabled && inActiveState()
         || !haEnabled) && !blockManager.isInSafeMode()) {
+      boolean releaseFSLock = false;
+      if (!hasWriteLock(FSNamesystemLockMode.GLOBAL)) {
+        writeUnlock(FSNamesystemLockMode.BM, 
"CheckAndProvisionSnapshotTrashRoots");
+        writeLock(FSNamesystemLockMode.GLOBAL);
+        releaseFSLock = true;
+      }
       SnapshottableDirectoryStatus dirStatus = null;
       try {
         SnapshottableDirectoryStatus[] dirStatusList =
@@ -8815,6 +8824,11 @@ public class FSNamesystem implements Namesystem, 
FSNamesystemMBean,
           LOG.error("Could not provision Trash directory for existing "
               + "snapshottable directory {}", dirStatus, e);
         }
+      } finally {
+        if (releaseFSLock) {
+          writeUnlock(FSNamesystemLockMode.GLOBAL, 
"checkAndProvisionSnapshotTrashRoots");
+          writeLock(FSNamesystemLockMode.BM);
+        }
       }
     }
   }
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManagerSafeMode.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManagerSafeMode.java
index d32cde834736..ca823d455147 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManagerSafeMode.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManagerSafeMode.java
@@ -27,6 +27,7 @@ import 
org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerSafeMode.BMSafe
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.fgl.FSNamesystemLockMode;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.Whitebox;
 
@@ -95,6 +96,8 @@ public class TestBlockManagerSafeMode {
     fsn = mock(FSNamesystem.class);
     doReturn(true).when(fsn).hasWriteLock();
     doReturn(true).when(fsn).hasReadLock();
+    doReturn(true).when(fsn).hasWriteLock(FSNamesystemLockMode.BM);
+    doReturn(true).when(fsn).hasReadLock(FSNamesystemLockMode.BM);
     doReturn(true).when(fsn).isRunning();
     NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
 
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java
index fd7ea14446ab..5dbdef446e96 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java
@@ -1462,6 +1462,7 @@ public class TestReplicationPolicy extends 
BaseReplicationPolicyTest {
           throws IOException {
     Namesystem mockNS = mock(Namesystem.class);
     when(mockNS.hasWriteLock()).thenReturn(true);
+    when(mockNS.hasWriteLock(FSNamesystemLockMode.BM)).thenReturn(true);
 
     BlockManager bm = new BlockManager(mockNS, false, new HdfsConfiguration());
     LowRedundancyBlocks lowRedundancyBlocks = bm.neededReconstruction;


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to