On Thu, Jan 19, 2017 at 02:06:30PM +0000, Iain Lane wrote:
> Package: autopkgtest
> Version: 4.3
> Severity: minor
> Tags: upstream
> 
> I was attempting to debug/fix LP: #1630578, so I wrote the attached
> package to trigger a panic manually.
> 
> Seems that the qemu runner hangs when the panic happens. (Hopefully it's
> going to timeout eventually.)
> 
> I thought about solving by slightly refactoring the auxverb use to use
> asyncio when waiting for the command to exit, and then also for reading
> from ttyS0 for occurrences of 'Kernel panic'.

Hmm.

It was fun learning a bit about asyncio to implement this on the train
back from FOSDEM. What I have currently is attached, but it's not fully
ready. I explained a problem in the commit message - if the machine is
dead due to a panic, how can we execute copyup and other commands that
autopkgtest wants to run after a failure?

Cheers,

-- 
Iain Lane                                  [ i...@orangesquash.org.uk ]
Debian Developer                                   [ la...@debian.org ]
Ubuntu Developer                                   [ la...@ubuntu.com ]
From 4a0a75c6925b9d91f314760847855659b9429e45 Mon Sep 17 00:00:00 2001
From: Iain Lane <iain.l...@canonical.com>
Date: Mon, 6 Feb 2017 10:49:42 +0000
Subject: [PATCH] WIP: autopkgtest-virt-qemu: Record kernel panics as failures

In the auxverb, look for either the exit file being created, or for a
kernel panic being outputted to the console, to know when the command is
finished running.

This is WIP because we can't run later commands (e.g. copyup) even if we
manage to detect a panic.

Closes: #851870
---
 virt/autopkgtest-virt-qemu | 61 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 54 insertions(+), 7 deletions(-)

diff --git a/virt/autopkgtest-virt-qemu b/virt/autopkgtest-virt-qemu
index 35c4990..432cce6 100755
--- a/virt/autopkgtest-virt-qemu
+++ b/virt/autopkgtest-virt-qemu
@@ -320,9 +320,12 @@ def make_auxverb(shared_dir):
     auxverb = os.path.join(workdir, 'runcmd')
     with open(auxverb, 'w') as f:
         f.write('''#!%(py)s
+import asyncio, functools
 import sys, os, tempfile, threading, time, atexit, shutil, fcntl, errno, pipes
 import socket
 
+from concurrent.futures import CancelledError
+
 dir_host = '%(dir)s'
 job_host = tempfile.mkdtemp(prefix='job.', dir=dir_host)
 atexit.register(shutil.rmtree, job_host)
@@ -389,10 +392,49 @@ cmd = 'PYTHONHASHSEED=0 /tmp/eofcat %%(d)s/stdin_eof %%(d)s/exit.tmp < %%(d)s/st
 s.send(cmd.encode())
 
 # wait until command has exited
-path_exit = os.path.join(job_host, 'exit')
-while not os.path.exists(path_exit) or os.path.getsize(path_exit) == 0:
-    time.sleep(0.2)
-running = False
+@asyncio.coroutine
+def should_exit(data_future):
+    global running
+    path_exit = os.path.join(job_host, 'exit')
+    while not os.path.exists(path_exit) or os.path.getsize(path_exit) == 0:
+        yield from asyncio.sleep(0.2)
+    else:
+        running = False
+        # stop looking at ttyS0
+        data_future.cancel()
+
+# also quit early if the guest kernel panics
+@asyncio.coroutine
+def wait_for_data():
+    global running
+    # Register the open socket to wait for data
+    reader, writer = yield from asyncio.open_unix_connection(path="%(console)s")
+
+    # Wait for data
+    while True:
+        data = yield from reader.readline()
+
+        if b'Kernel panic - not syncing' in data:
+            running = False
+            return
+
+def future_done(cancelme, future):
+    # One of the futures finished
+    cancelme.cancel()
+
+try:
+    loop = asyncio.get_event_loop()
+
+    data_future = asyncio.async(wait_for_data())
+    exitfile = asyncio.async(should_exit(data_future))
+
+    g = asyncio.gather(data_future, exitfile, loop=loop)
+    data_future.add_done_callback(functools.partial(future_done, g))
+    exitfile.add_done_callback(functools.partial(future_done, g))
+
+    loop.run_until_complete(g)
+except CancelledError:
+    pass
 
 # mop up terminal response
 while True:
@@ -405,15 +447,20 @@ while True:
     time.sleep(0.05)
 s.close()
 
-with open(path_exit) as f:
-    rc = int(f.read().strip())
+try:
+    with open(path_exit) as f:
+        rc = int(f.read().strip())
+except FileNotFoundError:
+    # this will happen in the case of a kernel panic (wait_for_data), which is
+    # a failure
+    rc = 1
 
 t_stdin.join()
 t_stdout.join()
 t_stderr.join()
 # code 255 means that the auxverb itself failed, so translate
 sys.exit(rc == 255 and 253 or rc)
-''' % {'py': sys.executable, 'tty': os.path.join(workdir, 'ttyS1'), 'dir': shared_dir})
+''' % {'py': sys.executable, 'console': os.path.join(workdir, 'ttyS0'), 'tty': os.path.join(workdir, 'ttyS1'), 'dir': shared_dir})
 
     os.chmod(auxverb, 0o755)
 
-- 
2.10.2

Attachment: signature.asc
Description: PGP signature

Reply via email to