peterxcli commented on code in PR #8386:
URL: https://github.com/apache/ozone/pull/8386#discussion_r2072647505


##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java:
##########
@@ -122,19 +128,49 @@ protected synchronized boolean validate() {
 
   @Override
   protected synchronized void process(Pipeline pipeline) {
+    Preconditions.checkNotNull(pipeline);
 
     // When SCM is in safe mode for long time, already registered
     // datanode can send pipeline report again, or SCMPipelineManager will
     // create new pipelines.
-    Preconditions.checkNotNull(pipeline);
-    if (pipeline.getType() == HddsProtos.ReplicationType.RATIS &&
-        ((RatisReplicationConfig) pipeline.getReplicationConfig())
-            .getReplicationFactor() == HddsProtos.ReplicationFactor.THREE &&
-        !processedPipelineIDs.contains(pipeline.getId())) {
+
+    // Only handle RATIS + 3-replica pipelines.
+    if (pipeline.getType() != HddsProtos.ReplicationType.RATIS ||
+        ((RatisReplicationConfig) 
pipeline.getReplicationConfig()).getReplicationFactor() !=
+            HddsProtos.ReplicationFactor.THREE) {
+      SCMSafeModeManager.getLogger().warn(
+          "Skipping HealthyPipelineSafeModeRule check as Replication type 
isn't RATIS or replication factor isn't 3.");
+      return;
+    }
+
+    // Skip already processed ones.
+    if (processedPipelineIDs.contains(pipeline.getId())) {
+      LOG.info("Skipping HealthyPipelineSafeModeRule check as pipeline: {} is 
already recorded.", pipeline.getId());
+      return;
+    }
+
+    List<DatanodeDetails> dnsWithSCM = 
nodeManager.getNodes(NodeStatus.inServiceHealthy());
+    List<DatanodeDetails> pipelineDns = pipeline.getNodes();
+
+    if (pipelineDns.size() == 3 && new 
HashSet<>(dnsWithSCM).containsAll(pipelineDns)) {
       getSafeModeMetrics().incCurrentHealthyPipelinesCount();
       currentHealthyPipelineCount++;
       processedPipelineIDs.add(pipeline.getId());
       unProcessedPipelineSet.remove(pipeline.getId());
+    } else {

Review Comment:
   I think it would be better to form those check in a chain, and update the 
"healthy pipeline state" after all prerequisites are stratified.
   1. pipeline size equals to 3? If not, log an warning with some info like 
greater or less and return
   2. all dns in pipeline are healthy and in service? If not, log `DNs: {} 
reported by pipeline: {} aren't registered with SCM.` and return.
   3. This pipeline has been checked and is healthy, so do:
   ```java
   getSafeModeMetrics().incCurrentHealthyPipelinesCount();
   currentHealthyPipelineCount++;
   processedPipelineIDs.add(pipeline.getId());
   unProcessedPipelineSet.remove(pipeline.getId());
   ```



##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java:
##########
@@ -122,19 +128,49 @@ protected synchronized boolean validate() {
 
   @Override
   protected synchronized void process(Pipeline pipeline) {
+    Preconditions.checkNotNull(pipeline);
 
     // When SCM is in safe mode for long time, already registered
     // datanode can send pipeline report again, or SCMPipelineManager will
     // create new pipelines.
-    Preconditions.checkNotNull(pipeline);
-    if (pipeline.getType() == HddsProtos.ReplicationType.RATIS &&
-        ((RatisReplicationConfig) pipeline.getReplicationConfig())
-            .getReplicationFactor() == HddsProtos.ReplicationFactor.THREE &&
-        !processedPipelineIDs.contains(pipeline.getId())) {
+
+    // Only handle RATIS + 3-replica pipelines.
+    if (pipeline.getType() != HddsProtos.ReplicationType.RATIS ||
+        ((RatisReplicationConfig) 
pipeline.getReplicationConfig()).getReplicationFactor() !=
+            HddsProtos.ReplicationFactor.THREE) {
+      SCMSafeModeManager.getLogger().warn(
+          "Skipping HealthyPipelineSafeModeRule check as Replication type 
isn't RATIS or replication factor isn't 3.");

Review Comment:
   ```suggestion
             "Skipping pipeline safemode report processing as Replication type 
isn't RATIS or replication factor isn't 3.");
   ```



##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java:
##########
@@ -122,19 +128,49 @@ protected synchronized boolean validate() {
 
   @Override
   protected synchronized void process(Pipeline pipeline) {
+    Preconditions.checkNotNull(pipeline);
 
     // When SCM is in safe mode for long time, already registered
     // datanode can send pipeline report again, or SCMPipelineManager will
     // create new pipelines.
-    Preconditions.checkNotNull(pipeline);
-    if (pipeline.getType() == HddsProtos.ReplicationType.RATIS &&
-        ((RatisReplicationConfig) pipeline.getReplicationConfig())
-            .getReplicationFactor() == HddsProtos.ReplicationFactor.THREE &&
-        !processedPipelineIDs.contains(pipeline.getId())) {
+
+    // Only handle RATIS + 3-replica pipelines.
+    if (pipeline.getType() != HddsProtos.ReplicationType.RATIS ||
+        ((RatisReplicationConfig) 
pipeline.getReplicationConfig()).getReplicationFactor() !=
+            HddsProtos.ReplicationFactor.THREE) {
+      SCMSafeModeManager.getLogger().warn(
+          "Skipping HealthyPipelineSafeModeRule check as Replication type 
isn't RATIS or replication factor isn't 3.");
+      return;
+    }
+
+    // Skip already processed ones.
+    if (processedPipelineIDs.contains(pipeline.getId())) {
+      LOG.info("Skipping HealthyPipelineSafeModeRule check as pipeline: {} is 
already recorded.", pipeline.getId());

Review Comment:
   ```suggestion
         LOG.info("Skipping pipeline safemode report processing check as 
pipeline: {} is already recorded.", pipeline.getId());
   ```



##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java:
##########
@@ -122,19 +128,49 @@ protected synchronized boolean validate() {
 
   @Override
   protected synchronized void process(Pipeline pipeline) {
+    Preconditions.checkNotNull(pipeline);
 
     // When SCM is in safe mode for long time, already registered
     // datanode can send pipeline report again, or SCMPipelineManager will
     // create new pipelines.
-    Preconditions.checkNotNull(pipeline);
-    if (pipeline.getType() == HddsProtos.ReplicationType.RATIS &&
-        ((RatisReplicationConfig) pipeline.getReplicationConfig())
-            .getReplicationFactor() == HddsProtos.ReplicationFactor.THREE &&
-        !processedPipelineIDs.contains(pipeline.getId())) {
+
+    // Only handle RATIS + 3-replica pipelines.
+    if (pipeline.getType() != HddsProtos.ReplicationType.RATIS ||
+        ((RatisReplicationConfig) 
pipeline.getReplicationConfig()).getReplicationFactor() !=
+            HddsProtos.ReplicationFactor.THREE) {
+      SCMSafeModeManager.getLogger().warn(
+          "Skipping HealthyPipelineSafeModeRule check as Replication type 
isn't RATIS or replication factor isn't 3.");
+      return;
+    }
+
+    // Skip already processed ones.
+    if (processedPipelineIDs.contains(pipeline.getId())) {
+      LOG.info("Skipping HealthyPipelineSafeModeRule check as pipeline: {} is 
already recorded.", pipeline.getId());
+      return;
+    }
+
+    List<DatanodeDetails> dnsWithSCM = 
nodeManager.getNodes(NodeStatus.inServiceHealthy());
+    List<DatanodeDetails> pipelineDns = pipeline.getNodes();
+
+    if (pipelineDns.size() == 3 && new 
HashSet<>(dnsWithSCM).containsAll(pipelineDns)) {

Review Comment:
   I think this part could be optimized by using `nodemanager.getNode(DNId)` to 
get the datanode status of the pipeline.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to