On Tue, Feb 28, 2012 at 12:39:18PM +0500, Timur Irmatov wrote: > Hi, > > I can also add that same thing (apt hanging as zombie and run-parts not > collecting its status) happened during mdadm upgrade. After restarting > mdadm by hand, apt and run-parts finish as they should.
Thanks for your bugreport and thanks to Santiago Garcia for the detailed analysis in the previous mail too! Could you please try the attached patch (you can ignore the diff in the test_*.py file) that hopefully fixes the leaking of the fds and the subsequent issues? This probably needs to be pushed up to python-apt/libapt too. I did some light testing with it and it seems to be fine here, but please be careful and let me know how it goes. Review on the patch itself is of course welcome too! Thanks! Michael
=== modified file 'debian/changelog' --- debian/changelog 2012-01-02 13:14:02 +0000 +++ debian/changelog 2012-02-28 10:48:18 +0000 @@ -5,6 +5,8 @@ * test improvements * fix mispelled "Unattended-Upgrade::MinimalSteps" (and add compat mode) + * unattended-upgrade: + - cleanup FDs to hopefully fix zombies (closes: #646620) -- Michael Vogt <michael.v...@ubuntu.com> Mon, 02 Jan 2012 14:00:03 +0100 === modified file 'test/test_against_real_archive.py' --- test/test_against_real_archive.py 2011-11-18 10:46:15 +0000 +++ test/test_against_real_archive.py 2012-02-28 10:48:18 +0000 @@ -39,10 +39,12 @@ logfile = os.path.join(logdir, "unattended-upgrades.log") apt_pkg.config.set("APT::UnattendedUpgrades::LogDir", logdir) unattended_upgrade.DISTRO_CODENAME = "lucid" - unattended_upgrade.main(options, os.path.abspath("./aptroot")) + res = unattended_upgrade.main(options, os.path.abspath("./aptroot")) # check if the log file exists self.assertTrue(os.path.exists(logfile)) log = open(logfile).read() + # check that stuff worked + self.assertFalse(" ERROR " in log) # check if we actually have the expected ugprade in it self.assertTrue( re.search("INFO Packages that are upgraded:.*awstats", log)) === modified file 'unattended-upgrade' --- unattended-upgrade 2012-01-02 13:14:02 +0000 +++ unattended-upgrade 2012-02-28 10:48:18 +0000 @@ -27,6 +27,7 @@ import ConfigParser import copy import datetime +import fcntl import re import os import string @@ -106,6 +107,45 @@ f.write(_("Progress: %s %% (%s)") % (percent, pkg)) f.close() + def _fixup_fds(self): + required_fds = [ 0, 1, 2, # stdin, stdout, stderr + self.writefd, + self.write_stream.fileno(), + self.statusfd, + self.status_stream.fileno() + ] + # ensure that our required fds close on exec + for fd in required_fds[3:]: + old_flags = fcntl.fcntl(fd, fcntl.F_GETFD) + fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC) + # close all fds + proc_fd = "/proc/self/fd" + if os.path.exists(proc_fd): + error_count = 0 + for fdname in os.listdir(proc_fd): + try: + fd = int(fdname) + except Exception as e: + print "ERROR: can not get fd for '%s'" % fdname + if fd in required_fds: + continue + try: + os.close(fd) + #print "closed: ", fd + except OSError as e: + # there will be one fd that can not be closed + # as its the fd from pythons internal diropen() + # so its ok to ignore one close error + error_count += 1 + if error_count > 1: + print "ERROR: os.close(%s): %s" % (fd, e) + + def fork(self): + pid = os.fork() + if pid == 0: + self._fixup_fds() + return pid + class Unlocked: """ context manager for unlocking the apt lock while cache.commit()