DaanHoogland commented on code in PR #9885:
URL: https://github.com/apache/cloudstack/pull/9885#discussion_r1837748547


##########
engine/schema/src/main/resources/META-INF/db/schema-41910to42000.sql:
##########
@@ -425,3 +425,10 @@ INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid, 
hypervisor_type, hypervi
 
 CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.vm_instance', 'delete_protection', 
'boolean DEFAULT FALSE COMMENT "delete protection for vm" ');
 CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.volumes', 'delete_protection', 
'boolean DEFAULT FALSE COMMENT "delete protection for volumes" ');
+
+-- Modify index for mshost_peer
+DELETE FROM `cloud`.`mshost_peer`;
+CALL 
`cloud`.`IDEMPOTENT_DROP_FOREIGN_KEY`('cloud.mshost_peer','fk_mshost_peer__owner_mshost');
+CALL 
`cloud`.`IDEMPOTENT_DROP_INDEX`('i_mshost_peer__owner_peer_runid','mshost_peer');
+CALL `cloud`.`IDEMPOTENT_ADD_UNIQUE_KEY`('cloud.mshost_peer', 
'i_mshost_peer__owner_peer', '(owner_mshost, peer_mshost)');
+CALL `cloud`.`IDEMPOTENT_ADD_FOREIGN_KEY`('cloud.mshost_peer', 
'fk_mshost_peer__owner_mshost', '(owner_mshost)', '`mshost`(`id`)');

Review Comment:
   doesn't this mean it's going to be 21 instead of 20.1?
   in any case not in this file, unless we add it to 4.20.



##########
framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java:
##########
@@ -898,6 +903,44 @@ private void peerScan() throws ActiveFencingException {
 
         final Profiler profilerInvalidatedNodeList = new Profiler();
         profilerInvalidatedNodeList.start();
+        processInvalidatedNodes(invalidatedNodeList);
+        profilerInvalidatedNodeList.stop();
+
+        final Profiler profilerRemovedList = new Profiler();
+        profilerRemovedList.start();
+        processRemovedNodes(cutTime, removedNodeList);
+        profilerRemovedList.stop();
+
+        final Profiler profilerNewList = new Profiler();
+        profilerNewList.start();
+        processNewNodes(cutTime, currentList);
+        profilerNewList.stop();
+
+        final Profiler profilerInactiveList = new Profiler();
+        profilerInactiveList.start();
+        processInactiveNodes(cutTime);
+        profilerInactiveList.stop();
+
+        profiler.stop();
+
+        logger.debug(String.format("Peer scan is finished. profiler: %s , 
profilerQueryActiveList: %s, " +
+                        ", profilerSyncClusterInfo: %s, 
profilerInvalidatedNodeList: %s, profilerRemovedList: %s," +
+                        ", profilerNewList: %s, profilerInactiveList: %s",
+                profiler, profilerQueryActiveList, profilerSyncClusterInfo, 
profilerInvalidatedNodeList, profilerRemovedList,
+                profilerNewList, profilerInactiveList));
+
+        if (profiler.getDurationInMillis() >= HeartbeatInterval.value()) {
+            if (logger.isDebugEnabled()) {
+                logger.debug(String.format("Peer scan takes too long to 
finish. profiler: %s , profilerQueryActiveList: %s, " +
+                        ", profilerSyncClusterInfo: %s, 
profilerInvalidatedNodeList: %s, profilerRemovedList: %s," +
+                        ", profilerNewList: %s, profilerInactiveList: %s",
+                        profiler, profilerQueryActiveList, 
profilerSyncClusterInfo, profilerInvalidatedNodeList, profilerRemovedList,
+                        profilerNewList, profilerInactiveList));
+            }
+        }

Review Comment:
   ```suggestion
           if (profiler.getDurationInMillis() >= HeartbeatInterval.value()) {
               if (logger.isDebugEnabled()) {
                   logger.debug("Peer scan takes too long to finish. profiler: 
{} , profilerQueryActiveList: {}, " +
                           ", profilerSyncClusterInfo: {}, 
profilerInvalidatedNodeList: {}, profilerRemovedList: {}," +
                           ", profilerNewList: {}, profilerInactiveList: {}",
                           profiler, profilerQueryActiveList,
                           profilerSyncClusterInfo, 
profilerInvalidatedNodeList, profilerRemovedList,
                           profilerNewList, profilerInactiveList);
               }
           }
   ```



##########
framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java:
##########
@@ -898,6 +903,44 @@ private void peerScan() throws ActiveFencingException {
 
         final Profiler profilerInvalidatedNodeList = new Profiler();
         profilerInvalidatedNodeList.start();
+        processInvalidatedNodes(invalidatedNodeList);
+        profilerInvalidatedNodeList.stop();
+
+        final Profiler profilerRemovedList = new Profiler();
+        profilerRemovedList.start();
+        processRemovedNodes(cutTime, removedNodeList);
+        profilerRemovedList.stop();
+
+        final Profiler profilerNewList = new Profiler();
+        profilerNewList.start();
+        processNewNodes(cutTime, currentList);
+        profilerNewList.stop();
+
+        final Profiler profilerInactiveList = new Profiler();
+        profilerInactiveList.start();
+        processInactiveNodes(cutTime);
+        profilerInactiveList.stop();
+
+        profiler.stop();
+
+        logger.debug(String.format("Peer scan is finished. profiler: %s , 
profilerQueryActiveList: %s, " +
+                        ", profilerSyncClusterInfo: %s, 
profilerInvalidatedNodeList: %s, profilerRemovedList: %s," +
+                        ", profilerNewList: %s, profilerInactiveList: %s",
+                profiler, profilerQueryActiveList, profilerSyncClusterInfo, 
profilerInvalidatedNodeList, profilerRemovedList,
+                profilerNewList, profilerInactiveList));
+
+        if (profiler.getDurationInMillis() >= HeartbeatInterval.value()) {
+            if (logger.isDebugEnabled()) {
+                logger.debug(String.format("Peer scan takes too long to 
finish. profiler: %s , profilerQueryActiveList: %s, " +
+                        ", profilerSyncClusterInfo: %s, 
profilerInvalidatedNodeList: %s, profilerRemovedList: %s," +
+                        ", profilerNewList: %s, profilerInactiveList: %s",
+                        profiler, profilerQueryActiveList, 
profilerSyncClusterInfo, profilerInvalidatedNodeList, profilerRemovedList,
+                        profilerNewList, profilerInactiveList));
+            }
+        }
+    }
+
+    private void processInvalidatedNodes(List<ManagementServerHostVO> 
invalidatedNodeList) {

Review Comment:
   :+1: 



##########
framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java:
##########
@@ -911,33 +954,34 @@ private void peerScan() throws ActiveFencingException {
 
             queueNotification(new 
ClusterManagerMessage(ClusterManagerMessage.MessageType.nodeRemoved, 
invalidatedNodeList));
         }
-        profilerInvalidatedNodeList.stop();
+    }
 
-        final Profiler profilerRemovedList = new Profiler();
-        profilerRemovedList.start();
+    private void processRemovedNodes(Date cutTime, 
List<ManagementServerHostVO> removedNodeList) {
         // process removed node list
         final Iterator<ManagementServerHostVO> it = removedNodeList.iterator();
         while (it.hasNext()) {
             final ManagementServerHostVO mshost = it.next();
-            if (!pingManagementNode(mshost)) {
-                logger.warn("Management node " + mshost.getId() + " is 
detected inactive by timestamp and also not pingable");
+            // Check if peer state is Up in the period
+            if (!_mshostPeerDao.isPeerUpState(_mshostId, mshost.getId(), new 
Date(cutTime.getTime() - HeartbeatThreshold.value()))) {
+                logger.warn("Management node " + mshost.getId() + " is 
detected inactive by timestamp and did not send node status to this node");

Review Comment:
   ```suggestion
                   logger.warn("Management node {} is detected inactive by 
timestamp and did not send node status to this node",  mshost.getId());
   ```



##########
framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java:
##########
@@ -959,18 +1003,31 @@ private void peerScan() throws ActiveFencingException {
         if (newNodeList.size() > 0) {
             queueNotification(new 
ClusterManagerMessage(ClusterManagerMessage.MessageType.nodeAdded, 
newNodeList));
         }
+    }
 
-        profiler.stop();
-
-        if (profiler.getDurationInMillis() >= HeartbeatInterval.value()) {
-            if (logger.isDebugEnabled()) {
-                logger.debug("Peer scan takes too long to finish. profiler: " 
+ profiler.toString() + ", profilerQueryActiveList: " +
-                        profilerQueryActiveList.toString() + ", 
profilerSyncClusterInfo: " + profilerSyncClusterInfo.toString() + ", 
profilerInvalidatedNodeList: " +
-                        profilerInvalidatedNodeList.toString() + ", 
profilerRemovedList: " + profilerRemovedList.toString());
+    private void processInactiveNodes(Date cutTime) {
+        final List<ManagementServerHostVO> inactiveList = 
_mshostDao.getInactiveList(new Date(cutTime.getTime() - 
HeartbeatThreshold.value()));
+        if (inactiveList.size() > 0) {
+            if (logger.isInfoEnabled()) {
+                logger.info(String.format("Found %s inactive management server 
node based on timestamp", inactiveList.size()));

Review Comment:
   ```suggestion
                   logger.info("Found {} inactive management server node based 
on timestamp", inactiveList.size());
   ```



##########
framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java:
##########
@@ -911,33 +954,34 @@ private void peerScan() throws ActiveFencingException {
 
             queueNotification(new 
ClusterManagerMessage(ClusterManagerMessage.MessageType.nodeRemoved, 
invalidatedNodeList));
         }
-        profilerInvalidatedNodeList.stop();
+    }
 
-        final Profiler profilerRemovedList = new Profiler();
-        profilerRemovedList.start();
+    private void processRemovedNodes(Date cutTime, 
List<ManagementServerHostVO> removedNodeList) {
         // process removed node list
         final Iterator<ManagementServerHostVO> it = removedNodeList.iterator();
         while (it.hasNext()) {
             final ManagementServerHostVO mshost = it.next();
-            if (!pingManagementNode(mshost)) {
-                logger.warn("Management node " + mshost.getId() + " is 
detected inactive by timestamp and also not pingable");
+            // Check if peer state is Up in the period
+            if (!_mshostPeerDao.isPeerUpState(_mshostId, mshost.getId(), new 
Date(cutTime.getTime() - HeartbeatThreshold.value()))) {
+                logger.warn("Management node " + mshost.getId() + " is 
detected inactive by timestamp and did not send node status to this node");
                 _activePeers.remove(mshost.getId());
                 try {
                     JmxUtil.unregisterMBean("ClusterManager", "Node " + 
mshost.getId());
                 } catch (final Exception e) {
                     logger.warn("Unable to deregiester cluster node from JMX 
monitoring due to exception " + e.toString());
                 }
             } else {
-                logger.info("Management node " + mshost.getId() + " is 
detected inactive by timestamp but is pingable");
+                logger.info("Management node " + mshost.getId() + " is 
detected inactive by timestamp but sent node status to this node");

Review Comment:
   ```suggestion
                   logger.info("Management node {} is detected inactive by 
timestamp but sent node status to this node", mshost.getId());
   ```



##########
framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java:
##########
@@ -959,18 +1003,31 @@ private void peerScan() throws ActiveFencingException {
         if (newNodeList.size() > 0) {
             queueNotification(new 
ClusterManagerMessage(ClusterManagerMessage.MessageType.nodeAdded, 
newNodeList));
         }
+    }
 
-        profiler.stop();
-
-        if (profiler.getDurationInMillis() >= HeartbeatInterval.value()) {
-            if (logger.isDebugEnabled()) {
-                logger.debug("Peer scan takes too long to finish. profiler: " 
+ profiler.toString() + ", profilerQueryActiveList: " +
-                        profilerQueryActiveList.toString() + ", 
profilerSyncClusterInfo: " + profilerSyncClusterInfo.toString() + ", 
profilerInvalidatedNodeList: " +
-                        profilerInvalidatedNodeList.toString() + ", 
profilerRemovedList: " + profilerRemovedList.toString());
+    private void processInactiveNodes(Date cutTime) {
+        final List<ManagementServerHostVO> inactiveList = 
_mshostDao.getInactiveList(new Date(cutTime.getTime() - 
HeartbeatThreshold.value()));
+        if (inactiveList.size() > 0) {
+            if (logger.isInfoEnabled()) {
+                logger.info(String.format("Found %s inactive management server 
node based on timestamp", inactiveList.size()));
             }
+            for (final ManagementServerHostVO host : inactiveList) {
+                logger.info(String.format("management server node msid: %s, 
name: %s, service ip: %s, version: %s",
+                        host.getMsid(),  host.getName(), host.getServiceIP(), 
host.getVersion()));

Review Comment:
   ```suggestion
                   logger.info("management server node msid: {}, name: {}, 
service ip: {}, version: {}",
                           host.getMsid(),  host.getName(), 
host.getServiceIP(), host.getVersion());
   ```



##########
server/src/main/java/com/cloud/server/StatsCollector.java:
##########
@@ -1153,6 +1157,17 @@ public String newStatus(ClusterServicePdu pdu) {
             try {
                 hostStatsEntry = gson.fromJson(pdu.getJsonPackage(),new 
TypeToken<ManagementServerHostStatsEntry>(){}.getType());
                 
managementServerHostStats.put(hostStatsEntry.getManagementServerHostUuid(), 
hostStatsEntry);
+
+                // Update peer state to Up in mshost_peer
+                ManagementServerHostVO mgmtServerVo = 
managementServerHostDao.findByMsid(managementServerNodeId);
+                if (mgmtServerVo != null) {
+                    msId = mgmtServerVo.getId();
+                    if (msId != hostStatsEntry.getManagementServerHostId()) {
+                        managementServerHostPeerDao.updatePeerInfo(msId, 
hostStatsEntry.getManagementServerHostId(), 
hostStatsEntry.getManagementServerRunId(), ManagementServerHost.State.Up);
+                    }
+                } else {
+                    logger.warn(String.format("Cannot find management server 
with msid [%s]. Therefore, do not update peer info.", managementServerNodeId));
+                }

Review Comment:
   new method?



##########
framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java:
##########
@@ -959,18 +1003,31 @@ private void peerScan() throws ActiveFencingException {
         if (newNodeList.size() > 0) {
             queueNotification(new 
ClusterManagerMessage(ClusterManagerMessage.MessageType.nodeAdded, 
newNodeList));
         }
+    }
 
-        profiler.stop();
-
-        if (profiler.getDurationInMillis() >= HeartbeatInterval.value()) {
-            if (logger.isDebugEnabled()) {
-                logger.debug("Peer scan takes too long to finish. profiler: " 
+ profiler.toString() + ", profilerQueryActiveList: " +
-                        profilerQueryActiveList.toString() + ", 
profilerSyncClusterInfo: " + profilerSyncClusterInfo.toString() + ", 
profilerInvalidatedNodeList: " +
-                        profilerInvalidatedNodeList.toString() + ", 
profilerRemovedList: " + profilerRemovedList.toString());
+    private void processInactiveNodes(Date cutTime) {
+        final List<ManagementServerHostVO> inactiveList = 
_mshostDao.getInactiveList(new Date(cutTime.getTime() - 
HeartbeatThreshold.value()));
+        if (inactiveList.size() > 0) {
+            if (logger.isInfoEnabled()) {
+                logger.info(String.format("Found %s inactive management server 
node based on timestamp", inactiveList.size()));
             }
+            for (final ManagementServerHostVO host : inactiveList) {
+                logger.info(String.format("management server node msid: %s, 
name: %s, service ip: %s, version: %s",
+                        host.getMsid(),  host.getName(), host.getServiceIP(), 
host.getVersion()));
+                // Check if any peer state is Up in the period
+                if (ManagementServerHost.State.Up.equals(host.getState()) &&
+                        !_mshostPeerDao.isPeerUpState(host.getId(), new 
Date(cutTime.getTime() - HeartbeatThreshold.value()))) {
+                    logger.warn("Management node " + host.getId() + " is 
detected inactive by timestamp and did not send node status to all other 
nodes");

Review Comment:
   ```suggestion
                       logger.warn("Management node {} is detected inactive by 
timestamp and did not send node status to all other nodes", host.getId());
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to