Eli Mesika has uploaded a new change for review.

Change subject: core: start PM enabled hosts after engine restart
......................................................................

core: start PM enabled hosts after engine restart

This patch handles the following scenario
host non-responding->Restart->Stop->off->engine restart
In this case the host will not be started again after the engine is up.
This patch handle such scenarios by searching for hosts that both have
PM enabled and are in 'Reboot' status after the
DisableFenceAtStartupInSec quite time in which PM operations are
skipped.
If such hosts are found after the quite time, an attempt to start the
host via its PM card is executed.
Since the status of a Host is moved to 'Reboot' when it is restarted
just after the Stop command (Reboot=stop->wait for off->start->wait for
on), if the engine fails or stopped at this point the Host will be left
in the Reboot status.

Change-Id: Ibbfe83535364556df72eaac348e58d4b7146156e
Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1005756
Signed-off-by: Eli Mesika <[email protected]>
---
M 
backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitBackendServicesOnStartupBean.java
M 
backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsCommand.java
M 
backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/pm/PmHealthCheckManager.java
3 files changed, 68 insertions(+), 3 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/05/28305/1

diff --git 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitBackendServicesOnStartupBean.java
 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitBackendServicesOnStartupBean.java
index a51fd4c..dd4b16b 100644
--- 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitBackendServicesOnStartupBean.java
+++ 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitBackendServicesOnStartupBean.java
@@ -20,11 +20,19 @@
 import org.ovirt.engine.core.bll.storage.StoragePoolStatusHandler;
 import org.ovirt.engine.core.common.action.MigrateVmParameters;
 import org.ovirt.engine.core.common.action.VdcActionType;
+import org.ovirt.engine.core.common.businessentities.VDS;
+import org.ovirt.engine.core.common.businessentities.VDSStatus;
+import org.ovirt.engine.core.common.config.Config;
+import org.ovirt.engine.core.common.config.ConfigValues;
 import org.ovirt.engine.core.common.utils.customprop.VmPropertiesUtils;
 import org.ovirt.engine.core.common.utils.exceptions.InitializationException;
 import org.ovirt.engine.core.compat.Guid;
+import org.ovirt.engine.core.dal.dbbroker.DbFacade;
+import org.ovirt.engine.core.utils.ThreadUtils;
 import org.ovirt.engine.core.utils.customprop.DevicePropertiesUtils;
 import org.ovirt.engine.core.utils.extensionsmgr.EngineExtensionsManager;
+import org.ovirt.engine.core.utils.linq.LinqUtils;
+import org.ovirt.engine.core.utils.linq.Predicate;
 import org.ovirt.engine.core.utils.threadpool.ThreadPoolUtil;
 import org.ovirt.engine.core.vdsbroker.ResourceManager;
 import org.slf4j.Logger;
@@ -40,8 +48,9 @@
 public class InitBackendServicesOnStartupBean implements 
InitBackendServicesOnStartup{
 
     private static final Logger log = 
LoggerFactory.getLogger(InitBackendServicesOnStartupBean.class);
+    private List<VDS> hostsWithPMInReboot;
 
-    /**
+     /**
      * This method is called upon the bean creation as part
      * of the management Service bean life cycle.
      */
@@ -50,6 +59,9 @@
     public void create() {
 
         try {
+            // This must be done before starting to sample the hosts status 
from VDSM since the sampling will turn such host from Reboot to NonResponsive
+            setHostsWithPMInReboot();
+
             // Create authentication profiles for all the domains that exist 
in the database:
             // TODO: remove this later, and rely only on the custom and built 
in extensions directories configuration
 
@@ -59,7 +71,6 @@
             AsyncTaskManager.getInstance().initAsyncTaskManager();
             ResourceManager.getInstance().init();
             OvfDataUpdater.getInstance().initOvfDataUpdater();
-
             SchedulingManager.getInstance().setMigrationHandler(new 
MigrationHandler() {
 
                 @Override
@@ -104,11 +115,35 @@
 
             // Initialize Power Management Health Check
             PmHealthCheckManager.getInstance().initialize();
+            // Start hosts that were fenced in the middle of an engine restart 
(if exists)
+            startHostsWithPMInReboot();
         } catch (Exception ex) {
             log.error("Failed to initialize backend", ex);
             throw ex;
         }
     }
 
+    private void startHostsWithPMInReboot() {
+        if (hostsWithPMInReboot.size() > 0) {
+            ThreadPoolUtil.execute(new Runnable() {
+                @Override
+                public void run() {
+                    // wait the quiet time from engine start in which we skip 
fencing operations
+                    int mSecToWait = 
Config.<Integer>getValue(ConfigValues.DisableFenceAtStartupInSec) * 1000;
+                    ThreadUtils.sleep(mSecToWait);
+                    
PmHealthCheckManager.getInstance().startHostsWithPMInReboot(hostsWithPMInReboot);
+                }
+            });
+        }
+    }
 
+    private void setHostsWithPMInReboot() {
+        hostsWithPMInReboot = 
LinqUtils.filter(DbFacade.getInstance().getVdsDao().getAll(),
+                new Predicate<VDS> () {
+                    @Override
+                    public boolean eval(VDS host) {
+                        return (host.getpm_enabled() && host.getStatus() == 
VDSStatus.Reboot);
+                    }
+                });
+    }
 }
diff --git 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsCommand.java
 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsCommand.java
index 4ff10c9..7a287e6 100644
--- 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsCommand.java
+++ 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsCommand.java
@@ -312,7 +312,7 @@
      * @return
      *              boolean
      */
-    protected boolean isPmReportsStatusDown() {
+    public boolean isPmReportsStatusDown() {
         boolean result = false;
         VDS vds = getVds();
         VDSReturnValue returnValue=null;
diff --git 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/pm/PmHealthCheckManager.java
 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/pm/PmHealthCheckManager.java
index 86b1cde..7850b84 100644
--- 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/pm/PmHealthCheckManager.java
+++ 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/pm/PmHealthCheckManager.java
@@ -1,12 +1,18 @@
 package org.ovirt.engine.core.bll.pm;
 
+import org.ovirt.engine.core.bll.Backend;
 import org.ovirt.engine.core.bll.FenceExecutor;
+import org.ovirt.engine.core.bll.RestartVdsCommand;
 import org.ovirt.engine.core.common.AuditLogType;
+import org.ovirt.engine.core.common.action.FenceVdsActionParameters;
+import org.ovirt.engine.core.common.action.VdcActionType;
+import org.ovirt.engine.core.common.action.VdcReturnValueBase;
 import org.ovirt.engine.core.common.businessentities.FenceActionType;
 import org.ovirt.engine.core.common.businessentities.FenceAgentOrder;
 import org.ovirt.engine.core.common.businessentities.VDS;
 import org.ovirt.engine.core.common.config.Config;
 import org.ovirt.engine.core.common.config.ConfigValues;
+import org.ovirt.engine.core.common.vdscommands.VDSReturnValue;
 import org.ovirt.engine.core.compat.Guid;
 import org.ovirt.engine.core.dal.dbbroker.DbFacade;
 import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AlertDirector;
@@ -105,6 +111,30 @@
         }
     }
 
+    /**
+     * This method starts hosts remained in off status because of the 
following flow
+     * non-responding -> stop -> wait -> off -> engine restart
+     * Such hosts will stay DOWN while its status will show Reboot
+     * We should try to catch such hosts and attempt to restart it.
+     * @param hostWithPMInStatusReboot
+     */
+    public void startHostsWithPMInReboot(List<VDS> hostWithPMInStatusReboot) {
+        VDSReturnValue returnValue = null;
+        for (VDS host : hostWithPMInStatusReboot) {
+            RestartVdsCommand restartVdsCommand = new RestartVdsCommand(new
+                    FenceVdsActionParameters(host.getId(), 
FenceActionType.Status));
+            if (restartVdsCommand.isPmReportsStatusDown()) {
+                VdcReturnValueBase retValue = 
Backend.getInstance().runInternalAction(VdcActionType.RestartVds, 
restartVdsCommand.getParameters());
+                if (retValue!= null && retValue.getSucceeded()) {
+                    log.infoFormat("Host {0} was started successfully by PM 
Health Check Manager",
+                            host.getName());
+                }
+                else {
+                    log.infoFormat("PM Health Check Manager failed to start 
Host {0}", host.getName());
+                }
+            }
+        }
+    }
     public static PmHealthCheckManager getInstance() {
         return instance;
     }


-- 
To view, visit http://gerrit.ovirt.org/28305
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ibbfe83535364556df72eaac348e58d4b7146156e
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-engine
Gerrit-Branch: master
Gerrit-Owner: Eli Mesika <[email protected]>
_______________________________________________
Engine-patches mailing list
[email protected]
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to