anuragaw commented on a change in pull request #3575: [WIP DO NOT MERGE] Health 
check feature for virtual router
URL: https://github.com/apache/cloudstack/pull/3575#discussion_r357993085
 
 

 ##########
 File path: 
server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java
 ##########
 @@ -1186,6 +1219,290 @@ protected void pushToUpdateQueue(final List<NetworkVO> 
networks) throws Interrup
         }
     }
 
+    protected class AnalyseRouterMonitorResultsTask extends 
ManagedContextRunnable {
+        public AnalyseRouterMonitorResultsTask() {
+        }
+
+        @Override
+        protected void runInContext() {
+            try {
+                final List<DomainRouterVO> routers = 
_routerDao.listByStateAndManagementServer(VirtualMachine.State.Running, 
mgmtSrvrId);
+                s_logger.debug("Found " + routers.size() + " running routers. 
");
+
+                for (final DomainRouterVO router : routers) {
+                    GetRouterMonitorResultsAnswer answer = 
getMonitorResults(router, false);
+                    String checkFailsToRestartVr = 
RouterHealthChecksFailuresToRestartVr.valueIn(router.getDataCenterId());
+                    if (answer != null && answer.getFailingChecks().size() > 0 
&& StringUtils.isNotBlank(checkFailsToRestartVr)) {
+                        for (String failedCheck : answer.getFailingChecks()) {
+                            if (checkFailsToRestartVr.contains(failedCheck)) {
+                                rebootRouter(router.getId(), true);
+                            }
+                        }
+                    }
+                }
+            } catch (final Exception ex) {
+                s_logger.error("Fail to complete the 
AnalyseRouterMonitorResultsTask! ", ex);
+            }
+        }
+    }
+
+    // Returns null if health checks are not enabled
+    private GetRouterMonitorResultsAnswer getMonitorResults(DomainRouterVO 
router, boolean performFreshChecks) {
+        if (!RouterHealthChecksEnabled.valueIn(router.getDataCenterId())) {
+            return null;
+        }
+
+        String controlIP = getRouterControlIP(router);
+        if (StringUtils.isNotBlank(controlIP) && !controlIP.equals("0.0.0.0")) 
{
+            final GetRouterMonitorResultsCommand command = new 
GetRouterMonitorResultsCommand(performFreshChecks);
+            command.setAccessDetail(NetworkElementCommand.ROUTER_IP, 
controlIP);
+            command.setAccessDetail(NetworkElementCommand.ROUTER_NAME, 
router.getInstanceName());
+            try {
+                final Answer answer = _agentMgr.easySend(router.getHostId(), 
command);
+
+                if (answer == null) {
+                    s_logger.warn("Unable to fetch monitoring results data 
from router " + router.getHostName());
+                    return null;
+                }
+                if (answer instanceof GetRouterMonitorResultsAnswer) {
+                    return (GetRouterMonitorResultsAnswer) answer;
+                } else {
+                    s_logger.warn("Unable to fetch health checks results to 
router " + router.getHostName() + " Received answer " + answer.getDetails());
+                    return new GetRouterMonitorResultsAnswer(command, false, 
null, answer.getDetails());
+                }
+            } catch (final Exception e) {
+                s_logger.warn("Error while collecting alerts from router: " + 
router.getInstanceName(), e);
+                return null;
+            }
+        }
+
+        return null;
+    }
+
+    @Override
+    public Map<String, String> getRouterHealthCheckResults(long routerId, 
boolean runChecks) {
+        DomainRouterVO router = _routerDao.findById(routerId);
+        Map<String, String> result = new HashMap<>();
+
+        if (router == null) {
+            result.put("success", "False");
+            result.put("message", "Router not found");
+            return result;
+        }
+
+        if (!RouterHealthChecksEnabled.valueIn(router.getDataCenterId())) {
+            result.put("success", "False");
+            result.put("message", "Router id not valid. Health checks are 
disabled in router's zone.");
+            return result;
+        }
+
+        s_logger.info("Getting router health check results for router " + 
router.getUuid());
+
+        if (runChecks) {
+            boolean successfullyUpdatedData = 
updateRouterHealthCheckData(router);
+            s_logger.info("Updating health check data for fresh run 
successfully: " + successfullyUpdatedData);
+        }
+
+        s_logger.info("Retrieving results for fresh health check execution for 
router " + router.getUuid());
+        GetRouterMonitorResultsAnswer answer = getMonitorResults(router, 
runChecks);
+        if (answer == null) {
+            result.put("success", "False");
+            result.put("message", "Router is unreachable.");
+            return result;
+        }
+
+        result.put("success", String.valueOf(answer.getResult()));
+        result.put("message", answer.getDetails());
+
+        return result;
+    }
+
+    protected class UpdateRouterHealthChecksConfigDataTask extends 
ManagedContextRunnable {
+        public UpdateRouterHealthChecksConfigDataTask() {
+        }
+
+        @Override
+        protected void runInContext() {
+            try {
+                final List<DomainRouterVO> routers = 
_routerDao.listByStateAndManagementServer(VirtualMachine.State.Running, 
mgmtSrvrId);
+                s_logger.debug("Found " + routers.size() + " running routers. 
");
+
+                for (final DomainRouterVO router : routers) {
+                    updateRouterHealthCheckData(router);
+                }
+            } catch (final Exception ex) {
+                s_logger.error("Fail to complete the 
UpdateRouterHealthChecksConfigDataTask! ", ex);
+            }
+        }
+    }
+
+    private boolean updateRouterHealthCheckData(DomainRouterVO router) {
+        if (!RouterHealthChecksEnabled.valueIn(router.getDataCenterId())) {
+            return false;
+        }
+
+        String controlIP = getRouterControlIP(router);
+        if (StringUtils.isNotBlank(controlIP) && !controlIP.equals("0.0.0.0")) 
{
+            s_logger.info("Updating data for router health checks for router " 
+ router.getUuid());
+            final SetMonitorServiceCommand command = new 
SetMonitorServiceCommand();
+            command.setAccessDetail(NetworkElementCommand.ROUTER_IP, 
getRouterControlIP(router));
+            command.setAccessDetail(NetworkElementCommand.ROUTER_NAME, 
router.getInstanceName());
+            
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ENABLED, 
RouterHealthChecksEnabled.valueIn(router.getDataCenterId()).toString());
+            
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_BASIC_INTERVAL,
 RouterHealthChecksBasicInterval.value().toString());
+            
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL,
 RouterHealthChecksAdvancedInterval.value().toString());
+            
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_EXCLUDED, 
RouterHealthChecksToExclude.valueIn(router.getDataCenterId()));
+            
command.setAdditionalData(getAdditionalDataForRouterHealthChecks(router));
+            command.setReconfigureAfterUpdate(true);
+
+            Answer origAnswer = null;
+            try {
+                origAnswer = _agentMgr.easySend(router.getHostId(), command);
+            } catch (final Exception e) {
+                s_logger.warn("Error while collecting alerts from router: " + 
router.getInstanceName(), e);
+                return false;
+            }
+
+            if (origAnswer == null) {
+                s_logger.warn("Unable to update health checks data to router " 
+ router.getHostName());
+                return false;
+            }
+
+            GroupAnswer answer = null;
+            if (origAnswer instanceof GroupAnswer) {
+                answer = (GroupAnswer) origAnswer;
+            } else {
+                s_logger.warn("Unable to update health checks data to router " 
+ router.getHostName() + " Received answer " + origAnswer.getDetails());
+                return false;
+            }
+
+            if (!answer.getResult()) {
+                s_logger.warn("Unable to update health checks data to router " 
+ router.getHostName() + ", details : " + answer.getDetails());
+            }
+
+            return answer.getResult();
+        }
+        s_logger.debug("Skipping update data on router " + router.getUuid() + 
" because controlIp is not correct.");
+        return false;
+    }
+
+    private Map<String, String> getAdditionalDataForRouterHealthChecks(final 
DomainRouterVO router) {
 
 Review comment:
   Done

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to