devmadhuu commented on code in PR #10326:
URL: https://github.com/apache/ozone/pull/10326#discussion_r3373521631


##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java:
##########
@@ -210,28 +215,161 @@ void transitionOpenToClosing(ContainerID containerID, 
ContainerInfo containerInf
         pipelineToOpenContainer.put(pipelineID, curCnt - 1);
       }
     }
-    updateContainerState(containerID, FINALIZE);  // OPEN → CLOSING
+    updateContainerState(containerID, FINALIZE);
   }
 
   /**
-   * Check if an OPEN container should move to CLOSING based on a healthy
-   * non-OPEN DN replica report.
+   * Check if Recon's container lifecycle state can be corrected based on a DN
+   * replica report and SCM's authoritative state.
+   *
+   * <p>Closed or quasi-closed DN reports wake Recon to query SCM and reconcile
+   * the existing container using SCM's authoritative state. Other healthy
+   * non-OPEN reports can still move an OPEN Recon container to CLOSING. For
+   * DELETED, any non-DELETED DN report can trigger recovery, but Recon still
+   * recovers only when SCM reports the container as live.
+   *
+   * @param containerID containerID to check
+   * @param state replica state reported by a DataNode
    */
   private void checkContainerStateAndUpdate(ContainerID containerID,
-                                            ContainerReplicaProto.State 
replicaState)
+                                            ContainerReplicaProto.State state)
       throws IOException, InvalidStateTransitionException {
     ContainerInfo containerInfo = getContainer(containerID);
     HddsProtos.LifeCycleState reconState = containerInfo.getState();
 
-    if (reconState != HddsProtos.LifeCycleState.OPEN
-        || replicaState == ContainerReplicaProto.State.OPEN
-        || !isHealthy(replicaState)) {
+    if (isClosedReplicaState(state)) {
+      reconcileContainerFromScm(containerID, containerInfo, reconState, state);
+      return;
+    }
+
+    if (reconState == HddsProtos.LifeCycleState.DELETED) {
+      recoverDeletedContainerFromScm(containerID, state);
       return;
     }
 
-    LOG.info("Container {} is OPEN in Recon but DN reports replica state {}. "
-        + "Moving to CLOSING.", containerID, replicaState);
-    transitionOpenToClosing(containerID, containerInfo);
+    if (reconState == HddsProtos.LifeCycleState.OPEN
+        && state != ContainerReplicaProto.State.OPEN && isHealthy(state)) {
+      LOG.info("Container {} has state OPEN, but given state is {}.",
+          containerID, state);
+      transitionOpenToClosing(containerID, containerInfo);
+    }
+  }
+
+  private void reconcileContainerFromScm(ContainerID containerID,
+      ContainerInfo containerInfo, HddsProtos.LifeCycleState reconState,
+      ContainerReplicaProto.State replicaState)
+      throws IOException, InvalidStateTransitionException {
+    ContainerWithPipeline scmContainer =
+        scmClient.getContainerWithPipeline(containerID.getId());
+    HddsProtos.LifeCycleState scmState =
+        scmContainer.getContainerInfo().getState();
+
+    if (reconState == HddsProtos.LifeCycleState.DELETED) {
+      recoverDeletedContainerFromScm(containerID, replicaState, scmContainer);
+      return;
+    }
+
+    if (scmState == HddsProtos.LifeCycleState.QUASI_CLOSED) {
+      reconcileToQuasiClosed(containerID, containerInfo, reconState);
+    } else if (scmState == HddsProtos.LifeCycleState.CLOSED) {
+      reconcileToClosed(containerID, containerInfo, reconState);
+    } else if (scmState == HddsProtos.LifeCycleState.DELETING
+        || scmState == HddsProtos.LifeCycleState.DELETED) {
+      reconcileToDeleted(containerID, containerInfo, reconState, scmState);
+    }
+  }
+
+  private void reconcileToQuasiClosed(ContainerID containerID,
+      ContainerInfo containerInfo, HddsProtos.LifeCycleState reconState)
+      throws IOException, InvalidStateTransitionException {
+    if (reconState == HddsProtos.LifeCycleState.OPEN) {
+      transitionOpenToClosing(containerID, containerInfo);
+      reconState = HddsProtos.LifeCycleState.CLOSING;
+    }
+    if (reconState == HddsProtos.LifeCycleState.CLOSING) {
+      updateContainerState(containerID, QUASI_CLOSE);
+      LOG.info("Container {} advanced to QUASI_CLOSED in Recon based on "
+          + "SCM state.", containerID);
+    }
+  }
+
+  private void reconcileToClosed(ContainerID containerID,
+      ContainerInfo containerInfo, HddsProtos.LifeCycleState reconState)
+      throws IOException, InvalidStateTransitionException {
+    if (reconState == HddsProtos.LifeCycleState.OPEN) {
+      transitionOpenToClosing(containerID, containerInfo);
+      reconState = HddsProtos.LifeCycleState.CLOSING;
+    }
+    if (reconState == HddsProtos.LifeCycleState.CLOSING) {
+      updateContainerState(containerID, CLOSE);
+      LOG.info("Container {} advanced to CLOSED in Recon based on SCM state.",
+          containerID);
+    } else if (reconState == HddsProtos.LifeCycleState.QUASI_CLOSED) {
+      updateContainerState(containerID, FORCE_CLOSE);
+      LOG.info("Container {} advanced from QUASI_CLOSED to CLOSED in Recon "
+          + "based on SCM state.", containerID);
+    }
+  }
+
+  private void reconcileToDeleted(ContainerID containerID,
+      ContainerInfo containerInfo, HddsProtos.LifeCycleState reconState,
+      HddsProtos.LifeCycleState scmState)
+      throws IOException, InvalidStateTransitionException {
+    if (reconState == HddsProtos.LifeCycleState.OPEN) {
+      transitionOpenToClosing(containerID, containerInfo);
+      reconState = HddsProtos.LifeCycleState.CLOSING;
+    }
+    if (reconState == HddsProtos.LifeCycleState.CLOSING) {
+      updateContainerState(containerID, CLOSE);
+      reconState = HddsProtos.LifeCycleState.CLOSED;
+    }
+    if (reconState == HddsProtos.LifeCycleState.CLOSED
+        || reconState == HddsProtos.LifeCycleState.QUASI_CLOSED) {
+      updateContainerState(containerID, DELETE);
+      reconState = HddsProtos.LifeCycleState.DELETING;
+    }
+    if (scmState == HddsProtos.LifeCycleState.DELETED
+        && reconState == HddsProtos.LifeCycleState.DELETING) {
+      updateContainerState(containerID, CLEANUP);
+      LOG.info("Container {} advanced to {} in Recon based on SCM state.",
+          containerID, scmState);
+    }
+  }
+
+  private void recoverDeletedContainerFromScm(ContainerID containerID,
+      ContainerReplicaProto.State replicaState) throws IOException {
+    if (replicaState == ContainerReplicaProto.State.DELETED) {
+      return;
+    }
+
+    ContainerWithPipeline scmContainer =
+        scmClient.getContainerWithPipeline(containerID.getId());
+    recoverDeletedContainerFromScm(containerID, replicaState, scmContainer);
+  }
+
+  private void recoverDeletedContainerFromScm(ContainerID containerID,

Review Comment:
   Removed that heavy sync which was not necessary. Reusing same from SCM as 
earlier.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to