Before, when doing kvm live migrations we use to accept an "unknown status" but to reject anything that didn't match our regexp. Since we've seen "info migrate" return a completely empty answer, we'll be more tolerant of completely unknown results (while still logging them) and at the same time we'll limit the number of them which we're willing to accept in a row.
Signed-off-by: Guido Trotter <ultrot...@google.com> --- lib/hypervisor/hv_kvm.py | 17 ++++++++++++++--- 1 files changed, 14 insertions(+), 3 deletions(-) diff --git a/lib/hypervisor/hv_kvm.py b/lib/hypervisor/hv_kvm.py index 92a2854..40b5ef0 100644 --- a/lib/hypervisor/hv_kvm.py +++ b/lib/hypervisor/hv_kvm.py @@ -675,17 +675,25 @@ class KVMHypervisor(hv_base.BaseHypervisor): info_command = 'info migrate' done = False + broken_answers = 0 while not done: result = self._CallMonitorCommand(instance_name, info_command) match = self._MIGRATION_STATUS_RE.search(result.stdout) if not match: - raise errors.HypervisorError("Unknown 'info migrate' result: %s" % - result.stdout) + broken_answers += 1 + if not result.stdout: + logging.info("KVM: empty 'info migrate' result") + else: + logging.warning("KVM: unknown 'info migrate' result: %s" % + result.stdout) + time.sleep(2) else: status = match.group(1) if status == 'completed': done = True elif status == 'active': + # reset the broken answers count + broken_answers = 0 time.sleep(2) elif status == 'failed' or status == 'cancelled': if not live: @@ -693,8 +701,11 @@ class KVMHypervisor(hv_base.BaseHypervisor): raise errors.HypervisorError("Migration %s at the kvm level" % status) else: - logging.info("KVM: unknown migration status '%s'", status) + logging.warning("KVM: unknown migration status '%s'", status) + broken_answers += 1 time.sleep(2) + if broken_answers >= 5: + raise errors.HypervisorError("Too many 'info migrate' broken answers") utils.KillProcess(pid) self._RemoveInstanceRuntimeFiles(pidfile, instance_name) -- 1.6.5