Adam Litke has uploaded a new change for review. Change subject: Live Merge: Mark active layer ILLEGAL before pivoting ......................................................................
Live Merge: Mark active layer ILLEGAL before pivoting A block commit of the active layer takes place in two phases. First, data is copied from the current leaf to its parent. During this time, all writes to the disk are mirrored to both volumes. When this process converges the two volumes are and will remain identical due to ongoing drive mirroring by qemu. At this point we instruct libvirt to perform a pivot from the current leaf to the new leaf. Once this is complete we synchronize our metadata and end the job. If we are catastrophically interrupted (lose contact with the host) between the pivot request and final metadata synchronization there is currently no way to know whether the pivot actually happened. This could be dangerous because restarting the VM on another host with the wrong leaf volume could lead to data corruption. The solution is to mark the current leaf ILLEGAL before requesting a pivot. Since the volumes are identical at this point it will always be safe to rerun the VM using the new leaf. A subsequent patch will take advantage of this ILLEGAL marker to provide engine with a reliable and correct volume chain for this recovery scenario. Change-Id: I97ce8fbdfad7c81181a206f160ffdd647c552d4d Signed-off-by: Adam Litke <[email protected]> --- M vdsm/storage/hsm.py M vdsm/virt/vm.py 2 files changed, 56 insertions(+), 27 deletions(-) git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/87/31787/1 diff --git a/vdsm/storage/hsm.py b/vdsm/storage/hsm.py index 6a53ca7..fc7681c 100644 --- a/vdsm/storage/hsm.py +++ b/vdsm/storage/hsm.py @@ -1850,9 +1850,14 @@ return dstParent = sdDom.produceVolume(imgUUID, subChain[0]).getParent() - children = sdDom.produceVolume(imgUUID, subChain[-1]).getChildren() + subChainTailVol = sdDom.produceVolume(imgUUID, subChain[-1]) + children = subChainTailVol.getChildren() for childID in children: sdDom.produceVolume(imgUUID, childID).setParentMeta(dstParent) + if not children: + self.log.debug("Leaf volume is being removed from the chain. " + "Marking it ILLEGAL to prevent data corruption") + subChainTailVol.setLegality(volume.ILLEGAL_VOL) @public def mergeSnapshots(self, sdUUID, spUUID, vmUUID, imgUUID, ancestor, diff --git a/vdsm/virt/vm.py b/vdsm/virt/vm.py index a3abde8..dacdbf6 100644 --- a/vdsm/virt/vm.py +++ b/vdsm/virt/vm.py @@ -5351,8 +5351,8 @@ return False def queryBlockJobs(self): - def startCleanup(job, drive): - t = LiveMergeCleanupThread(self, job['jobID'], drive) + def startCleanup(job, drive, mode): + t = LiveMergeCleanupThread(self, job['jobID'], drive, mode) t.start() self._liveMergeCleanupThreads[job['jobID']] = t @@ -5364,10 +5364,10 @@ for storedJob in self.conf['_blockJobs'].values(): jobID = storedJob['jobID'] cleanThread = self._liveMergeCleanupThreads.get(jobID) - if cleanThread and cleanThread.isSuccessful(): - # Handle successfully cleaned jobs early because the job - # just needs to be untracked and the stored disk info might - # be stale anyway (ie. after active layer commit). + if cleanThread and cleanThread.isFinalized(): + # Handle finalized jobs early because the job just needs + # to be untracked and the stored disk info might be stale + # anyway (ie. after active layer commit). self.untrackBlockJob(jobID) continue @@ -5386,32 +5386,32 @@ jobsRet[jobID] = entry continue + mode = None if liveInfo: entry['bandwidth'] = liveInfo['bandwidth'] entry['cur'] = str(liveInfo['cur']) entry['end'] = str(liveInfo['end']) if self._activeLayerCommitReady(liveInfo): - try: - self.handleBlockJobEvent(jobID, drive, 'pivot') - except Exception: - # Just log it. We will retry next time - self.log.error("Pivot failed for job %s", jobID) + mode = LiveMergeCleanupThread.MODE_PIVOT else: # Libvirt has stopped reporting this job so we know it will # never report it again. + mode = LiveMergeCleanupThread.MODE_CLEANUP storedJob['gone'] = True + if mode: if not cleanThread: # There is no cleanup thread so the job must have just # ended. Spawn an async cleanup. - startCleanup(storedJob, drive) + startCleanup(storedJob, drive, mode) elif cleanThread.isAlive(): # Let previously started cleanup thread continue self.log.debug("Still waiting for block job %s to be " - "cleaned up", jobID) - elif not cleanThread.isSuccessful(): + "synchronized", jobID) + elif not cleanThread.isFinalized(): # At this point we know the thread is not alive and the - # cleanup failed. Retry it with a new thread. - startCleanup(storedJob, drive) + # cleanup failed or we have another step to complete. + # Continue or retry it with a new thread. + startCleanup(storedJob, drive, mode) jobsRet[jobID] = entry return jobsRet @@ -5616,11 +5616,21 @@ device['volumeChain'] = drive.volumeChain = newChain def handleBlockJobEvent(self, jobID, drive, mode): - if mode == 'finished': + if mode == LiveMergeCleanupThread.MODE_CLEANUP: self.log.info("Live merge job completed (job %s)", jobID) self._syncVolumeChain(drive) self.startDisksStatsCollection() - elif mode == 'pivot': + elif mode == LiveMergeCleanupThread.MODE_PIVOT: + # We call imageSyncVolumeChain which will mark the current leaf + # ILLEGAL. We do this before requesting a pivot so that we can + # properly recover the VM in case we crash. At this point the + # active layer contains the same data as its parent so the ILLEGAL + # flag indicates that the VM should be restarted using the parent. + newVols = [vol['volumeID'] for vol in drive.volumeChain + if vol['volumeID'] != drive.volumeID] + self.cif.irs.imageSyncVolumeChain(drive.domainID, drive.imageID, + drive['volumeID'], newVols) + # A pivot changes the top volume being used for the VM Disk. Until # we can correct our metadata following the pivot we should not # attempt to collect disk stats. @@ -5669,32 +5679,46 @@ class LiveMergeCleanupThread(threading.Thread): - def __init__(self, vm, jobId, drive): + MODE_CLEANUP = 'cleanup' + MODE_PIVOT = 'pivot' + + def __init__(self, vm, jobId, drive, mode): threading.Thread.__init__(self) self.setDaemon(True) self.vm = vm self.jobId = jobId self.drive = drive + self.mode = mode self.success = False @utils.traceback() def run(self): - self.vm.log.info("Starting live merge cleanup for job %s", - self.jobId) + self.vm.log.info("Starting live merge %s for job %s", + self.mode, self.jobId) try: - self.vm.handleBlockJobEvent(self.jobId, self.drive, 'finished') + self.vm.handleBlockJobEvent(self.jobId, self.drive, self.mode) except Exception: - self.vm.log.warning("Cleanup failed for live merge job %s", - self.jobId) + self.vm.log.warning("%s failed for live merge job %s", + self.mode, self.jobId) raise else: self.success = True - self.vm.log.info("Cleanup completed for live merge job %s", - self.jobId) + self.vm.log.info("%s completed for live merge job %s", + self.mode, self.jobId) def isSuccessful(self): + """ + Returns True if this phase completed successfully. + """ return self.success + def isFinalized(self): + """ + Returns True if the last cleanup phase succeeded and the job can be + removed. + """ + return self.success and self.mode == self.MODE_CLEANUP + def _devicesWithAlias(domXML): return vmxml.filter_devices_with_alias(vmxml.all_devices(domXML)) -- To view, visit http://gerrit.ovirt.org/31787 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I97ce8fbdfad7c81181a206f160ffdd647c552d4d Gerrit-PatchSet: 1 Gerrit-Project: vdsm Gerrit-Branch: master Gerrit-Owner: Adam Litke <[email protected]> _______________________________________________ vdsm-patches mailing list [email protected] https://lists.fedorahosted.org/mailman/listinfo/vdsm-patches
