Let VMSync be aware of HA take-over on VM state management.
Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/a5f418dd Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/a5f418dd Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/a5f418dd Branch: refs/heads/4.4 Commit: a5f418dd022ea64e56690c7b52ec68752a083e60 Parents: 0043a8f Author: Kelven Yang <[email protected]> Authored: Sun Mar 16 12:34:17 2014 -0700 Committer: Kelven Yang <[email protected]> Committed: Tue Mar 18 17:34:36 2014 -0700 ---------------------------------------------------------------------- .../com/cloud/vm/VirtualMachineManagerImpl.java | 17 ++++- .../schema/src/com/cloud/vm/VMInstanceVO.java | 2 +- .../src/com/cloud/vm/dao/VMInstanceDaoImpl.java | 70 ++++++++++---------- .../cloud/ha/HighAvailabilityManagerImpl.java | 42 +++--------- 4 files changed, 58 insertions(+), 73 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cloudstack/blob/a5f418dd/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java ---------------------------------------------------------------------- diff --git a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java index a31172c..bdc0f34 100755 --- a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java @@ -4221,6 +4221,16 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac case Stopped: case Migrating: s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-off report while there is no pending jobs on it"); + if(vm.isHaEnabled() && vm.getState() == State.Running && vm.getHypervisorType() != HypervisorType.VMware && vm.getHypervisorType() != HypervisorType.Hyperv) { + s_logger.info("Detected out-of-band stop of a HA enabled VM " + vm.getInstanceName() + ", will schedule restart"); + if(!_haMgr.hasPendingHaWork(vm.getId())) + _haMgr.scheduleRestart(vm, true); + else + s_logger.info("VM " + vm.getInstanceName() + " already has an pending HA task working on it"); + + return; + } + VirtualMachineGuru vmGuru = getVmGuru(vm); VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm); sendStop(vmGuru, profile, true); @@ -4406,10 +4416,11 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac super(VirtualMachine.class, job, VmJobCheckInterval.value(), new Predicate() { @Override public boolean checkCondition() { - VMInstanceVO instance = _vmDao.findById(vmId); - if ((instance.getPowerState() == desiredPowerState && srcHostIdForMigration == null) || - (instance.getPowerState() == desiredPowerState && (srcHostIdForMigration != null && instance.getPowerHostId() != srcHostIdForMigration))) + AsyncJobVO jobVo = _entityMgr.findById(AsyncJobVO.class, job.getId()); + assert (jobVo != null); + if (jobVo == null || jobVo.getStatus() != JobInfo.Status.IN_PROGRESS) return true; + return false; } }, Topics.VM_POWER_STATE, AsyncJob.Topics.JOB_STATE); http://git-wip-us.apache.org/repos/asf/cloudstack/blob/a5f418dd/engine/schema/src/com/cloud/vm/VMInstanceVO.java ---------------------------------------------------------------------- diff --git a/engine/schema/src/com/cloud/vm/VMInstanceVO.java b/engine/schema/src/com/cloud/vm/VMInstanceVO.java index 9ebade1..376b835 100644 --- a/engine/schema/src/com/cloud/vm/VMInstanceVO.java +++ b/engine/schema/src/com/cloud/vm/VMInstanceVO.java @@ -465,7 +465,7 @@ public class VMInstanceVO implements VirtualMachine, FiniteStateObject<State, Vi @Override public String toString() { if (toString == null) { - toString = new StringBuilder("VM[").append(type.toString()).append("|").append(hostName).append("]").toString(); + toString = new StringBuilder("VM[").append(type.toString()).append("|").append(getInstanceName()).append("]").toString(); } return toString; } http://git-wip-us.apache.org/repos/asf/cloudstack/blob/a5f418dd/engine/schema/src/com/cloud/vm/dao/VMInstanceDaoImpl.java ---------------------------------------------------------------------- diff --git a/engine/schema/src/com/cloud/vm/dao/VMInstanceDaoImpl.java b/engine/schema/src/com/cloud/vm/dao/VMInstanceDaoImpl.java index 0c13ae7..cc05f53 100644 --- a/engine/schema/src/com/cloud/vm/dao/VMInstanceDaoImpl.java +++ b/engine/schema/src/com/cloud/vm/dao/VMInstanceDaoImpl.java @@ -417,44 +417,44 @@ public class VMInstanceDaoImpl extends GenericDaoBase<VMInstanceVO, Long> implem @Override public boolean updateState(State oldState, Event event, State newState, VirtualMachine vm, Object opaque) { - if (newState == null) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("There's no way to transition from old state: " + oldState.toString() + " event: " + event.toString()); - } - return false; - } - - @SuppressWarnings("unchecked") - Pair<Long, Long> hosts = (Pair<Long,Long>)opaque; - Long newHostId = hosts.second(); - - VMInstanceVO vmi = (VMInstanceVO)vm; - Long oldHostId = vmi.getHostId(); - Long oldUpdated = vmi.getUpdated(); - Date oldUpdateDate = vmi.getUpdateTime(); - if ( newState.equals(oldState) && newHostId != null && newHostId.equals(oldHostId) ) { - // state is same, don't need to update - return true; - } + if (newState == null) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("There's no way to transition from old state: " + oldState.toString() + " event: " + event.toString()); + } + return false; + } + + @SuppressWarnings("unchecked") + Pair<Long, Long> hosts = (Pair<Long, Long>)opaque; + Long newHostId = hosts.second(); + + VMInstanceVO vmi = (VMInstanceVO)vm; + Long oldHostId = vmi.getHostId(); + Long oldUpdated = vmi.getUpdated(); + Date oldUpdateDate = vmi.getUpdateTime(); + if (newState.equals(oldState) && newHostId != null && newHostId.equals(oldHostId)) { + // state is same, don't need to update + return true; + } // lock the target row at beginning to avoid lock-promotion caused deadlock lockRow(vm.getId(), true); - - SearchCriteria<VMInstanceVO> sc = StateChangeSearch.create(); - sc.setParameters("id", vmi.getId()); - sc.setParameters("states", oldState); - sc.setParameters("host", vmi.getHostId()); - sc.setParameters("update", vmi.getUpdated()); - - vmi.incrUpdated(); - UpdateBuilder ub = getUpdateBuilder(vmi); - - ub.set(vmi, "state", newState); - ub.set(vmi, "hostId", newHostId); - ub.set(vmi, "podIdToDeployIn", vmi.getPodIdToDeployIn()); - ub.set(vmi, _updateTimeAttr, new Date()); - - int result = update(vmi, sc); + + SearchCriteria<VMInstanceVO> sc = StateChangeSearch.create(); + sc.setParameters("id", vmi.getId()); + sc.setParameters("states", oldState); + sc.setParameters("host", vmi.getHostId()); + sc.setParameters("update", vmi.getUpdated()); + + vmi.incrUpdated(); + UpdateBuilder ub = getUpdateBuilder(vmi); + + ub.set(vmi, "state", newState); + ub.set(vmi, "hostId", newHostId); + ub.set(vmi, "podIdToDeployIn", vmi.getPodIdToDeployIn()); + ub.set(vmi, _updateTimeAttr, new Date()); + + int result = update(vmi, sc); if (result == 0) { VMInstanceVO vo = findByIdIncludingRemoved(vm.getId()); http://git-wip-us.apache.org/repos/asf/cloudstack/blob/a5f418dd/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java ---------------------------------------------------------------------- diff --git a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java index 48d998a..f7ab552 100755 --- a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java +++ b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java @@ -72,7 +72,6 @@ import com.cloud.utils.NumbersUtil; import com.cloud.utils.component.ManagerBase; import com.cloud.utils.concurrency.NamedThreadFactory; import com.cloud.utils.exception.CloudRuntimeException; -import com.cloud.utils.fsm.StateListener; import com.cloud.vm.VMInstanceVO; import com.cloud.vm.VirtualMachine; import com.cloud.vm.VirtualMachine.State; @@ -103,8 +102,7 @@ import com.cloud.vm.dao.VMInstanceDao; * before retrying the stop | seconds | 120 || * } **/ @Local(value = { HighAvailabilityManager.class }) -public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvailabilityManager, ClusterManagerListener, - StateListener<State, VirtualMachine.Event, VirtualMachine> { +public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvailabilityManager, ClusterManagerListener { protected static final Logger s_logger = Logger.getLogger(HighAvailabilityManagerImpl.class); WorkerThread[] _workers; @@ -236,7 +234,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai return; } - s_logger.warn("Scheduling restart for VMs on host " + host.getId()); + s_logger.warn("Scheduling restart for VMs on host " + host.getId() + "-" + host.getName()); final List<VMInstanceVO> vms = _instanceDao.listByHostId(host.getId()); final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); @@ -806,7 +804,6 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai _stopped = true; _executor = Executors.newScheduledThreadPool(count, new NamedThreadFactory("HA")); - VirtualMachine.State.getStateMachine().registerListener(this); return true; } @@ -921,6 +918,12 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai work.setTimeToTry(nextTime); work.setServerId(null); work.setDateTaken(null); + + // if restart failed in the middle due to exception, VM state may has been changed + // recapture into the HA worker so that it can really continue in it next turn + VMInstanceVO vm = _instanceDao.findById(work.getInstanceId()); + work.setUpdateTime(vm.getUpdated()); + work.setPreviousState(vm.getState()); } _haDao.update(work.getId(), work); } catch (final Throwable th) { @@ -963,35 +966,6 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai } @Override - public boolean preStateTransitionEvent(State oldState, VirtualMachine.Event event, State newState, VirtualMachine vo, boolean status, Object opaque) { - return true; - } - - @Override - public boolean postStateTransitionEvent(State oldState, VirtualMachine.Event event, State newState, VirtualMachine vo, boolean status, Object opaque) { - if (oldState == State.Running && event == VirtualMachine.Event.FollowAgentPowerOffReport && newState == State.Stopped) { - final VMInstanceVO vm = _instanceDao.findById(vo.getId()); - if (vm.isHaEnabled()) { - if (vm.getState() != State.Stopped) - s_logger.warn("Sanity check failed. postStateTransitionEvent reports transited to Stopped but VM " + vm + " is still at state " + vm.getState()); - - s_logger.info("Detected out-of-band stop of a HA enabled VM " + vm.getInstanceName() + ", will schedule restart"); - _executor.submit(new ManagedContextRunnable() { - @Override - protected void runInContext() { - try { - scheduleRestart(vm, false); - } catch (Exception e) { - s_logger.warn("Unexpected exception when scheduling a HA restart", e); - } - } - }); - } - } - return true; - } - - @Override public boolean hasPendingHaWork(long vmId) { List<HaWorkVO> haWorks = _haDao.listRunningHaWorkForVm(vmId); return haWorks.size() > 0;
