On Tue, Feb 28, 2012 at 12:39:18PM +0500, Timur Irmatov wrote:
> Hi,
> 
> I can also add that same thing (apt hanging as zombie and run-parts not
> collecting its status) happened during mdadm upgrade. After restarting
> mdadm by hand, apt and run-parts finish as they should.

Thanks for your bugreport and thanks to Santiago Garcia for the
detailed analysis in the previous mail too!

Could you please try the attached patch (you can ignore the diff in
the test_*.py file) that hopefully fixes the leaking of the fds and
the subsequent issues? This probably needs to be pushed up to
python-apt/libapt too.

I did some light testing with it and it seems to be fine here, but
please be careful and let me know how it goes. Review on the patch
itself is of course welcome too!

Thanks!
 Michael
=== modified file 'debian/changelog'
--- debian/changelog	2012-01-02 13:14:02 +0000
+++ debian/changelog	2012-02-28 10:48:18 +0000
@@ -5,6 +5,8 @@
   * test improvements
   * fix mispelled "Unattended-Upgrade::MinimalSteps" (and add compat
     mode)
+  * unattended-upgrade:
+    - cleanup FDs to hopefully fix zombies (closes: #646620)
 
  -- Michael Vogt <michael.v...@ubuntu.com>  Mon, 02 Jan 2012 14:00:03 +0100
 

=== modified file 'test/test_against_real_archive.py'
--- test/test_against_real_archive.py	2011-11-18 10:46:15 +0000
+++ test/test_against_real_archive.py	2012-02-28 10:48:18 +0000
@@ -39,10 +39,12 @@
         logfile = os.path.join(logdir, "unattended-upgrades.log")
         apt_pkg.config.set("APT::UnattendedUpgrades::LogDir", logdir)
         unattended_upgrade.DISTRO_CODENAME = "lucid"
-        unattended_upgrade.main(options, os.path.abspath("./aptroot"))
+        res = unattended_upgrade.main(options, os.path.abspath("./aptroot"))
         # check if the log file exists
         self.assertTrue(os.path.exists(logfile))
         log = open(logfile).read()
+        # check that stuff worked
+        self.assertFalse(" ERROR " in log)
         # check if we actually have the expected ugprade in it
         self.assertTrue(
             re.search("INFO Packages that are upgraded:.*awstats", log))

=== modified file 'unattended-upgrade'
--- unattended-upgrade	2012-01-02 13:14:02 +0000
+++ unattended-upgrade	2012-02-28 10:48:18 +0000
@@ -27,6 +27,7 @@
 import ConfigParser
 import copy
 import datetime
+import fcntl
 import re
 import os
 import string
@@ -106,6 +107,45 @@
         f.write(_("Progress: %s %% (%s)") % (percent, pkg))
         f.close()
 
+    def _fixup_fds(self):
+        required_fds = [ 0, 1, 2, # stdin, stdout, stderr
+                         self.writefd,
+                         self.write_stream.fileno(),
+                         self.statusfd,
+                         self.status_stream.fileno()
+                       ]
+        # ensure that our required fds close on exec
+        for fd in required_fds[3:]:
+            old_flags = fcntl.fcntl(fd, fcntl.F_GETFD)
+            fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC)
+        # close all fds
+        proc_fd = "/proc/self/fd"
+        if os.path.exists(proc_fd):
+            error_count = 0
+            for fdname in os.listdir(proc_fd):
+                try:
+                    fd = int(fdname)
+                except Exception as e:
+                    print "ERROR: can not get fd for '%s'" % fdname
+                if fd in required_fds:
+                    continue
+                try:
+                    os.close(fd)
+                    #print "closed: ", fd
+                except OSError as e:
+                    # there will be one fd that can not be closed
+                    # as its the fd from pythons internal diropen()
+                    # so its ok to ignore one close error
+                    error_count += 1
+                    if error_count > 1:
+                        print "ERROR: os.close(%s): %s" % (fd, e)
+
+    def fork(self):
+        pid = os.fork()
+        if pid == 0:
+            self._fixup_fds()
+        return pid
+
 
 class Unlocked:
     """ context manager for unlocking the apt lock while cache.commit()

Reply via email to