Am 02.06.2020 um 21:48 hat John Snow geschrieben: > This is done primarily to avoid the 'bare except' pattern, which > suppresses ALL exceptions and not just ones that we are anticipating to > see. > > Replace this with a pattern that isolates the different kind of shutdown > paradigms and a new fallback shutdown handler that gracefully attempts > one before the other. > > Move all of the post-shutdown code into _post_shutdown, and adjust the > main shutdown() function to ALWAYS call the post_shutdown logic no > matter what kind of error we encountered: An expected one (subprocess > wait timeout) or an unexpected one (Everything else). > > In cases where we encounter an expected error in the graceful shutdown > timeout, we will not re-raise an exception above shutdown(). Otherwise, > after post_shutdown cleanup, we will. > > I anticipate that this WILL lead to additional bug reports filed against > this module, but that is unfortunately somewhat the point: This code > shouldn't be hiding failures that exist elsewhere within the python > code. > > Signed-off-by: John Snow <js...@redhat.com> > --- > python/qemu/machine.py | 93 +++++++++++++++++++++++++++++------------- > 1 file changed, 65 insertions(+), 28 deletions(-) > > diff --git a/python/qemu/machine.py b/python/qemu/machine.py > index 041c615052e..a2abd2c35e3 100644 > --- a/python/qemu/machine.py > +++ b/python/qemu/machine.py > @@ -283,6 +283,8 @@ def _post_launch(self): > self._qmp.accept() > > def _post_shutdown(self): > + self._load_io_log() > + > if self._qemu_log_file is not None: > self._qemu_log_file.close() > self._qemu_log_file = None > @@ -296,6 +298,17 @@ def _post_shutdown(self): > while len(self._remove_files) > 0: > self._remove_if_exists(self._remove_files.pop()) > > + exitcode = self.exitcode() > + if exitcode is not None and exitcode < 0: > + msg = 'qemu received signal %i; command: "%s"' > + if self._qemu_full_args: > + command = ' '.join(self._qemu_full_args) > + else: > + command = '' > + LOG.warning(msg, -int(exitcode), command) > + > + self._launched = False
It would have been a little easier to review if this code motion to _post_shutdown() were a separate patch. It looks mostly independent of the other changes. The big change here is that we now log warnings for exit due to a signal for QEMUMachine.wait(), too. I assume this is okay. > def launch(self): > """ > Launch the VM and make sure we cleanup and expose the > @@ -344,12 +357,53 @@ def wait(self): > self._popen.wait() > if self._qmp: > self._qmp.close() > - self._load_io_log() > self._post_shutdown() > > - def shutdown(self, has_quit=False, hard=False): > + def _hard_shutdown(self) -> None: > """ > - Terminate the VM and clean up > + Kill the VM if it is running. > + """ > + if not self.is_running(): > + return > + > + self._popen.kill() > + self._popen.wait(timeout=60) > + > + def _soft_shutdown(self, has_quit: bool = False, timeout: int = 3) -> > None: > + """ > + Attempt to shutdown the VM gracefully if it is running. > + > + :param has_quit: When True, don't attempt to issue 'quit' QMP command > + :param timeout: Timeout for graceful shutdown. Default 3 seconds. > + """ > + if not self.is_running(): > + return > + > + if self._qmp is not None: > + if not has_quit: > + self._qmp.cmd('quit') > + self._qmp.close() > + > + self._popen.wait(timeout=timeout) > + > + def _do_shutdown(self, has_quit: bool = False, timeout: int = 3) -> None: > + """ > + Attempt to shutdown the VM gracefully; fallback to a hard shutdown. > + > + :param has_quit: When True, don't attempt to issue 'quit' QMP command > + :param timeout: Timeout for graceful shutdown. Default 3 seconds. > + """ > + try: > + self._soft_shutdown(has_quit, timeout) > + except subprocess.TimeoutExpired: > + self._hard_shutdown() > + except: > + self._hard_shutdown() > + raise > + > + def shutdown(self, has_quit: bool = False, hard: bool = False) -> None: > + """ > + Terminate the VM (gracefully if possible) and perform cleanup. > """ > # If we keep the console socket open, we may deadlock waiting > # for QEMU to exit, while QEMU is waiting for the socket to > @@ -358,35 +412,18 @@ def shutdown(self, has_quit=False, hard=False): > self._console_socket.close() > self._console_socket = None > > - if self.is_running(): > + try: > if hard: > - self._popen.kill() > - elif self._qmp: > - try: > - if not has_quit: > - self._qmp.cmd('quit') > - self._qmp.close() > - self._popen.wait(timeout=3) > - except: > - self._popen.kill() > - self._popen.wait() > - > - self._load_io_log() > - self._post_shutdown() > - > - exitcode = self.exitcode() > - if exitcode is not None and exitcode < 0 and \ > - not (exitcode == -9 and hard): The condition in the second line went away without a replacement. This means that previously, intentionally killing a VM with hard=True would not log a warning (which makes sense to me), but now it does. On the other hand, I couldn't find a single user of hard=True or the kill() method in the whole tree. So I guess it doesn't matter anyway? > - msg = 'qemu received signal %i: %s' > - if self._qemu_full_args: > - command = ' '.join(self._qemu_full_args) > + self._hard_shutdown() > else: > - command = '' > - LOG.warning(msg, -int(exitcode), command) > - > - self._launched = False > + self._do_shutdown(has_quit) > + finally: > + self._post_shutdown() > > def kill(self): > + """ > + Terminate the VM forcefully and perform cleanup. > + """ > self.shutdown(hard=True) > > def set_qmp_monitor(self, enabled=True): In the end, while the new behaviour is a bit different in details, it's not unreasonable, and if it does cause any problems, we can still change it back. Reviewed-by: Kevin Wolf <kw...@redhat.com>