Let VMSync be aware of HA take-over on VM state management.

Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo
Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/a5f418dd
Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/a5f418dd
Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/a5f418dd

Branch: refs/heads/4.4
Commit: a5f418dd022ea64e56690c7b52ec68752a083e60
Parents: 0043a8f
Author: Kelven Yang <[email protected]>
Authored: Sun Mar 16 12:34:17 2014 -0700
Committer: Kelven Yang <[email protected]>
Committed: Tue Mar 18 17:34:36 2014 -0700

----------------------------------------------------------------------
 .../com/cloud/vm/VirtualMachineManagerImpl.java | 17 ++++-
 .../schema/src/com/cloud/vm/VMInstanceVO.java   |  2 +-
 .../src/com/cloud/vm/dao/VMInstanceDaoImpl.java | 70 ++++++++++----------
 .../cloud/ha/HighAvailabilityManagerImpl.java   | 42 +++---------
 4 files changed, 58 insertions(+), 73 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cloudstack/blob/a5f418dd/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java
----------------------------------------------------------------------
diff --git 
a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java 
b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java
index a31172c..bdc0f34 100755
--- a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java
+++ b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java
@@ -4221,6 +4221,16 @@ public class VirtualMachineManagerImpl extends 
ManagerBase implements VirtualMac
         case Stopped:
         case Migrating:
             s_logger.info("VM " + vm.getInstanceName() + " is at " + 
vm.getState() + " and we received a power-off report while there is no pending 
jobs on it");
+            if(vm.isHaEnabled() && vm.getState() == State.Running && 
vm.getHypervisorType() != HypervisorType.VMware && vm.getHypervisorType() != 
HypervisorType.Hyperv) {
+                s_logger.info("Detected out-of-band stop of a HA enabled VM " 
+ vm.getInstanceName() + ", will schedule restart");
+                if(!_haMgr.hasPendingHaWork(vm.getId()))
+                       _haMgr.scheduleRestart(vm, true);
+                else
+                    s_logger.info("VM " + vm.getInstanceName() + " already has 
an pending HA task working on it");
+                
+                return;
+            }
+            
             VirtualMachineGuru vmGuru = getVmGuru(vm);
             VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm);
             sendStop(vmGuru, profile, true);
@@ -4406,10 +4416,11 @@ public class VirtualMachineManagerImpl extends 
ManagerBase implements VirtualMac
             super(VirtualMachine.class, job, VmJobCheckInterval.value(), new 
Predicate() {
                 @Override
                 public boolean checkCondition() {
-                    VMInstanceVO instance = _vmDao.findById(vmId);
-                    if ((instance.getPowerState() == desiredPowerState && 
srcHostIdForMigration == null) ||
-                            (instance.getPowerState() == desiredPowerState && 
(srcHostIdForMigration != null && instance.getPowerHostId() != 
srcHostIdForMigration)))
+                    AsyncJobVO jobVo = _entityMgr.findById(AsyncJobVO.class, 
job.getId());
+                    assert (jobVo != null);
+                    if (jobVo == null || jobVo.getStatus() != 
JobInfo.Status.IN_PROGRESS)
                         return true;
+                    
                     return false;
                 }
             }, Topics.VM_POWER_STATE, AsyncJob.Topics.JOB_STATE);

http://git-wip-us.apache.org/repos/asf/cloudstack/blob/a5f418dd/engine/schema/src/com/cloud/vm/VMInstanceVO.java
----------------------------------------------------------------------
diff --git a/engine/schema/src/com/cloud/vm/VMInstanceVO.java 
b/engine/schema/src/com/cloud/vm/VMInstanceVO.java
index 9ebade1..376b835 100644
--- a/engine/schema/src/com/cloud/vm/VMInstanceVO.java
+++ b/engine/schema/src/com/cloud/vm/VMInstanceVO.java
@@ -465,7 +465,7 @@ public class VMInstanceVO implements VirtualMachine, 
FiniteStateObject<State, Vi
     @Override
     public String toString() {
         if (toString == null) {
-            toString = new 
StringBuilder("VM[").append(type.toString()).append("|").append(hostName).append("]").toString();
+            toString = new 
StringBuilder("VM[").append(type.toString()).append("|").append(getInstanceName()).append("]").toString();
         }
         return toString;
     }

http://git-wip-us.apache.org/repos/asf/cloudstack/blob/a5f418dd/engine/schema/src/com/cloud/vm/dao/VMInstanceDaoImpl.java
----------------------------------------------------------------------
diff --git a/engine/schema/src/com/cloud/vm/dao/VMInstanceDaoImpl.java 
b/engine/schema/src/com/cloud/vm/dao/VMInstanceDaoImpl.java
index 0c13ae7..cc05f53 100644
--- a/engine/schema/src/com/cloud/vm/dao/VMInstanceDaoImpl.java
+++ b/engine/schema/src/com/cloud/vm/dao/VMInstanceDaoImpl.java
@@ -417,44 +417,44 @@ public class VMInstanceDaoImpl extends 
GenericDaoBase<VMInstanceVO, Long> implem
 
     @Override
     public boolean updateState(State oldState, Event event, State newState, 
VirtualMachine vm, Object opaque) {
-       if (newState == null) {
-               if (s_logger.isDebugEnabled()) {
-                       s_logger.debug("There's no way to transition from old 
state: " + oldState.toString() + " event: " + event.toString());
-               }
-               return false;
-       }
-
-       @SuppressWarnings("unchecked")
-               Pair<Long, Long> hosts = (Pair<Long,Long>)opaque;
-               Long newHostId = hosts.second();
-
-       VMInstanceVO vmi = (VMInstanceVO)vm;
-       Long oldHostId = vmi.getHostId();
-       Long oldUpdated = vmi.getUpdated();
-       Date oldUpdateDate = vmi.getUpdateTime();
-       if ( newState.equals(oldState) && newHostId != null && 
newHostId.equals(oldHostId) ) {
-           // state is same, don't need to update
-           return true;
-       }
+        if (newState == null) {
+            if (s_logger.isDebugEnabled()) {
+                s_logger.debug("There's no way to transition from old state: " 
+ oldState.toString() + " event: " + event.toString());
+            }
+            return false;
+        }
+
+        @SuppressWarnings("unchecked")
+        Pair<Long, Long> hosts = (Pair<Long, Long>)opaque;
+        Long newHostId = hosts.second();
+
+        VMInstanceVO vmi = (VMInstanceVO)vm;
+        Long oldHostId = vmi.getHostId();
+        Long oldUpdated = vmi.getUpdated();
+        Date oldUpdateDate = vmi.getUpdateTime();
+        if (newState.equals(oldState) && newHostId != null && 
newHostId.equals(oldHostId)) {
+            // state is same, don't need to update
+            return true;
+        }
 
         // lock the target row at beginning to avoid lock-promotion caused 
deadlock
         lockRow(vm.getId(), true);
-       
-       SearchCriteria<VMInstanceVO> sc = StateChangeSearch.create();
-       sc.setParameters("id", vmi.getId());
-       sc.setParameters("states", oldState);
-       sc.setParameters("host", vmi.getHostId());
-       sc.setParameters("update", vmi.getUpdated());
-
-       vmi.incrUpdated();
-       UpdateBuilder ub = getUpdateBuilder(vmi);
-
-       ub.set(vmi, "state", newState);
-       ub.set(vmi, "hostId", newHostId);
-       ub.set(vmi, "podIdToDeployIn", vmi.getPodIdToDeployIn());
-       ub.set(vmi, _updateTimeAttr, new Date());
-
-       int result = update(vmi, sc);
+
+        SearchCriteria<VMInstanceVO> sc = StateChangeSearch.create();
+        sc.setParameters("id", vmi.getId());
+        sc.setParameters("states", oldState);
+        sc.setParameters("host", vmi.getHostId());
+        sc.setParameters("update", vmi.getUpdated());
+
+        vmi.incrUpdated();
+        UpdateBuilder ub = getUpdateBuilder(vmi);
+
+        ub.set(vmi, "state", newState);
+        ub.set(vmi, "hostId", newHostId);
+        ub.set(vmi, "podIdToDeployIn", vmi.getPodIdToDeployIn());
+        ub.set(vmi, _updateTimeAttr, new Date());
+
+        int result = update(vmi, sc);
         if (result == 0) {
             VMInstanceVO vo = findByIdIncludingRemoved(vm.getId());
 

http://git-wip-us.apache.org/repos/asf/cloudstack/blob/a5f418dd/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java
----------------------------------------------------------------------
diff --git a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java 
b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java
index 48d998a..f7ab552 100755
--- a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java
+++ b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java
@@ -72,7 +72,6 @@ import com.cloud.utils.NumbersUtil;
 import com.cloud.utils.component.ManagerBase;
 import com.cloud.utils.concurrency.NamedThreadFactory;
 import com.cloud.utils.exception.CloudRuntimeException;
-import com.cloud.utils.fsm.StateListener;
 import com.cloud.vm.VMInstanceVO;
 import com.cloud.vm.VirtualMachine;
 import com.cloud.vm.VirtualMachine.State;
@@ -103,8 +102,7 @@ import com.cloud.vm.dao.VMInstanceDao;
  *         before retrying the stop | seconds | 120 || * }
  **/
 @Local(value = { HighAvailabilityManager.class })
-public class HighAvailabilityManagerImpl extends ManagerBase implements 
HighAvailabilityManager, ClusterManagerListener,
-        StateListener<State, VirtualMachine.Event, VirtualMachine> {
+public class HighAvailabilityManagerImpl extends ManagerBase implements 
HighAvailabilityManager, ClusterManagerListener {
 
     protected static final Logger s_logger = 
Logger.getLogger(HighAvailabilityManagerImpl.class);
     WorkerThread[] _workers;
@@ -236,7 +234,7 @@ public class HighAvailabilityManagerImpl extends 
ManagerBase implements HighAvai
             return;
         }
 
-        s_logger.warn("Scheduling restart for VMs on host " + host.getId());
+        s_logger.warn("Scheduling restart for VMs on host " + host.getId() + 
"-" + host.getName());
 
         final List<VMInstanceVO> vms = _instanceDao.listByHostId(host.getId());
         final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
@@ -806,7 +804,6 @@ public class HighAvailabilityManagerImpl extends 
ManagerBase implements HighAvai
         _stopped = true;
 
         _executor = Executors.newScheduledThreadPool(count, new 
NamedThreadFactory("HA"));
-        VirtualMachine.State.getStateMachine().registerListener(this);
 
         return true;
     }
@@ -921,6 +918,12 @@ public class HighAvailabilityManagerImpl extends 
ManagerBase implements HighAvai
                     work.setTimeToTry(nextTime);
                     work.setServerId(null);
                     work.setDateTaken(null);
+
+                    // if restart failed in the middle due to exception, VM 
state may has been changed
+                    // recapture into the HA worker so that it can really 
continue in it next turn
+                    VMInstanceVO vm = 
_instanceDao.findById(work.getInstanceId());
+                    work.setUpdateTime(vm.getUpdated());
+                    work.setPreviousState(vm.getState());
                 }
                 _haDao.update(work.getId(), work);
             } catch (final Throwable th) {
@@ -963,35 +966,6 @@ public class HighAvailabilityManagerImpl extends 
ManagerBase implements HighAvai
     }
 
     @Override
-    public boolean preStateTransitionEvent(State oldState, 
VirtualMachine.Event event, State newState, VirtualMachine vo, boolean status, 
Object opaque) {
-        return true;
-    }
-
-    @Override
-    public boolean postStateTransitionEvent(State oldState, 
VirtualMachine.Event event, State newState, VirtualMachine vo, boolean status, 
Object opaque) {
-        if (oldState == State.Running && event == 
VirtualMachine.Event.FollowAgentPowerOffReport && newState == State.Stopped) {
-            final VMInstanceVO vm = _instanceDao.findById(vo.getId());
-            if (vm.isHaEnabled()) {
-                if (vm.getState() != State.Stopped)
-                    s_logger.warn("Sanity check failed. 
postStateTransitionEvent reports transited to Stopped but VM " + vm + " is 
still at state " + vm.getState());
-
-                s_logger.info("Detected out-of-band stop of a HA enabled VM " 
+ vm.getInstanceName() + ", will schedule restart");
-                _executor.submit(new ManagedContextRunnable() {
-                    @Override
-                    protected void runInContext() {
-                        try {
-                            scheduleRestart(vm, false);
-                        } catch (Exception e) {
-                            s_logger.warn("Unexpected exception when 
scheduling a HA restart", e);
-                        }
-                    }
-                });
-            }
-        }
-        return true;
-    }
-
-    @Override
     public boolean hasPendingHaWork(long vmId) {
         List<HaWorkVO> haWorks = _haDao.listRunningHaWorkForVm(vmId);
         return haWorks.size() > 0;

Reply via email to