Before, when doing kvm live migrations we use to accept an "unknown
status" but to reject anything that didn't match our regexp. Since we've
seen "info migrate" return a completely empty answer, we'll be more
tolerant of completely unknown results (while still logging them) and at
the same time we'll limit the number of them which we're willing to
accept in a row.

Signed-off-by: Guido Trotter <ultrot...@google.com>
---
 lib/hypervisor/hv_kvm.py |   17 ++++++++++++++---
 1 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/lib/hypervisor/hv_kvm.py b/lib/hypervisor/hv_kvm.py
index 92a2854..40b5ef0 100644
--- a/lib/hypervisor/hv_kvm.py
+++ b/lib/hypervisor/hv_kvm.py
@@ -675,17 +675,25 @@ class KVMHypervisor(hv_base.BaseHypervisor):
 
     info_command = 'info migrate'
     done = False
+    broken_answers = 0
     while not done:
       result = self._CallMonitorCommand(instance_name, info_command)
       match = self._MIGRATION_STATUS_RE.search(result.stdout)
       if not match:
-        raise errors.HypervisorError("Unknown 'info migrate' result: %s" %
-                                     result.stdout)
+        broken_answers += 1
+        if not result.stdout:
+          logging.info("KVM: empty 'info migrate' result")
+        else:
+          logging.warning("KVM: unknown 'info migrate' result: %s" %
+                          result.stdout)
+        time.sleep(2)
       else:
         status = match.group(1)
         if status == 'completed':
           done = True
         elif status == 'active':
+          # reset the broken answers count
+          broken_answers = 0
           time.sleep(2)
         elif status == 'failed' or status == 'cancelled':
           if not live:
@@ -693,8 +701,11 @@ class KVMHypervisor(hv_base.BaseHypervisor):
           raise errors.HypervisorError("Migration %s at the kvm level" %
                                        status)
         else:
-          logging.info("KVM: unknown migration status '%s'", status)
+          logging.warning("KVM: unknown migration status '%s'", status)
+          broken_answers += 1
           time.sleep(2)
+      if broken_answers >= 5:
+        raise errors.HypervisorError("Too many 'info migrate' broken answers")
 
     utils.KillProcess(pid)
     self._RemoveInstanceRuntimeFiles(pidfile, instance_name)
-- 
1.6.5

Reply via email to