Package: debian-goodies Version: 0.64 lsof takes a long time to return on systems with a large number of files, causing checkrestart to take several minutes to run. lsof also uses lots of CPU resources on systems with a large number of open files.
# time checkrestart -p Found 1 processes using old versions of upgraded files (1 distinct program) (1 distinct packages) These processes do not seem to have an associated init script to restart them: ruby1.9.1: 8408 /usr/bin/ruby1.9.1 real 1m3.366s user 0m33.206s sys 0m31.298s``` lsof reads `/proc/locks` at startup. This file can take a long time to read when there are a large number of open files on the system. lsof failed to complete after 5 minutes on a system that has the following sysctl variables set to allow a large number of open files: fs.nr_open=2097152 fs.aio-max-nr=262144 Reading `/proc/locks` does return data but takes a long time to complete (5+ minutes on this machine). The attached patch is a proof of concept to remove the requirement of lsof from checkrestart by utilising /proc/<pid>/fd to find open files natively in python and pmap to find mmap'd files in processes. This significantly reduces the processing time and resource requirements for checkrestart. # time /home/luser/checkrestart -p Found 1 processes using old versions of upgraded files (1 distinct program) (1 distinct packages) These processes do not seem to have an associated init script to restart them: ruby1.9.1: 8408 /usr/bin/ruby1.9.1 real 0m0.463s user 0m0.136s sys 0m0.144s I've also added an option to ignore specific pids during the check. Our MySQL servers have a ton of open files and we can reduce processing time further by ignoring the mysql daemon completely: /home/luser/checkrestart -p -e`cat /var/run/mysqld/mysqld.pid` Based on this pull request: https://github.com/tpo/debian-goodies/pull/2
From 4657964213ac113d62a269841bd0786e2891f38f Mon Sep 17 00:00:00 2001 From: Ian Bissett <ian.biss...@bigcommerce.com> Date: Fri, 16 Jan 2015 15:17:22 +1100 Subject: [PATCH] Replace lsof check and add option to ignore specific process ids --- checkrestart | 78 +++++++++++++++++++++++++++++----------------------------- checkrestart.1 | 16 +++++++++++- debian/control | 1 - 3 files changed, 54 insertions(+), 41 deletions(-) diff --git a/checkrestart b/checkrestart index 7f27b03..92a4a9f 100755 --- a/checkrestart +++ b/checkrestart @@ -60,7 +60,7 @@ def find_cmd(cmd): return 1 def usage(): - sys.stderr.write('usage: checkrestart [-vhpa] [-bblacklist] [-iignore]\n') + sys.stderr.write('usage: checkrestart [-vhpa] [-bblacklist] [-iignore] [-eexcludepid]\n') def main(): global lc_all_c_env, file_query_check @@ -72,7 +72,8 @@ def main(): file_query_check = {} blacklistFiles = [] blacklist = [] - ignorelist = [ 'screen' ] + ignorelist = [ 'util-linux', 'screen' ] + excludepidlist = [] # Process options try: @@ -102,6 +103,8 @@ def main(): elif o in ("-a", "--all"): allFiles = True onlyPackageFiles = False + elif o in ("-e", "--excludepid"): + excludepidlist.append(a) elif o in ("-b", "--blacklist"): blacklistFiles.append(a) onlyPackageFiles = False @@ -120,18 +123,7 @@ def main(): # Start checking - if find_cmd('lsof') == 1: - sys.stderr.write('ERROR: This program needs lsof in order to run.\n') - sys.stderr.write('Please install the lsof package in your system.\n') - sys.exit(1) -# Check if we have lsof, if not, use psdel -# if find_cmd('lsof'): -# toRestart = lsofcheck() -# else: -# TODO - This does not work yet: -# toRestart = psdelcheck() - - toRestart = lsofcheck(blacklist = blacklist) + toRestart = deletedfilescheck(blacklist = blacklist, excludepidlist = excludepidlist) print "Found %d processes using old versions of upgraded files" % len(toRestart) @@ -277,33 +269,41 @@ def main(): for process in package.processes: print "\t%s\t%s" % (process.pid,process.program) -def lsofcheck(blacklist = None): +def deletedfilescheck(blacklist = None, excludepidlist = None): processes = {} - for line in os.popen('lsof +XL -F nf').readlines(): - field, data = line[0], line[1:-1] - - if field == 'p': - process = processes.setdefault(data,Process(int(data))) - elif field == 'k': - process.links.append(data) - elif field == 'n': - # Remove the previous entry to check if this is something we should use - if data.startswith('/SYSV'): - # If we find SYSV we discard the previous descriptor - last = process.descriptors.pop() - elif data.startswith('/'): - last = process.descriptors.pop() - # Add it to the list of deleted files if the previous descriptor - # was DEL or lsof marks it as deleted - if re.compile("DEL").search(last) or re.compile("deleted").search(data) or re.compile("\(path inode=[0-9]+\)$").search(data): - process.files.append(data) - else: - # We discard the previous descriptors and drop it - last = process.descriptors.pop() - elif field == 'f': - # Save the descriptor for later comparison - process.descriptors.append(data) + # Get a list of running processes + pids = [pid for pid in os.listdir('/proc') if pid.isdigit()] + + for pid in pids: + if pid in excludepidlist: + continue + + # Get the list of open files for this process from /proc + # We can ignore failures over this block as links will + # disappear as we run them + foundfiles = [] + try: + for fd in os.listdir('/proc/' + pid + '/fd'): + if os.path.islink('/proc/' + pid + '/fd/' + fd): + fname = os.readlink('/proc/' + pid + '/fd/' + fd) + if re.compile("\s\(deleted\)$").search(fname): + foundfiles.append(fname) + except: + continue + + # Get the list of memory mapped files using system pmap + for output in os.popen('pmap ' + pid).readlines(): + data = re.split('\s+', output.strip('\n'), 3) + if len(data) == 4: + f = data[3] + if re.compile("\s\(deleted\)$").search(f): + foundfiles.append(f) + + if len(foundfiles) > 1: + process = processes.setdefault(pid,Process(int(pid))) + # print pid + ': ' + ', '.join(foundfiles) + process.files = foundfiles toRestart = filter(lambda process: process.needsRestart(blacklist), processes.values()) diff --git a/checkrestart.1 b/checkrestart.1 index b7d1df8..7b69f1b 100644 --- a/checkrestart.1 +++ b/checkrestart.1 @@ -6,7 +6,7 @@ .SH NAME checkrestart \- check which processes need to be restarted after an upgrade .SH SYNOPSIS -.B checkrestart [ -hvpa ] [ -b blacklist_file ] [ -i package_name ] +.B checkrestart [ -hvpa ] [ -b blacklist_file ] [ -i package_name ] [ -e pid ] .SH DESCRIPTION The .B checkrestart @@ -67,6 +67,20 @@ Any files matching the patterns will be ignored. .BI -i\ name, --ignore=name Ignore services that are associated to the package name provided in .I name. +.TP + +.BI -e\ pid, --excludepid=pid +Exclude processes running with +.I pid +when looking for open files. This can speed up checkrestart on systems with a +large number of open files +.TP + +.TP +.BI -e\ pid, --excludepid=pid +Exclude process with +.I pid +when searching for open files .SH EXIT STATUS diff --git a/debian/control b/debian/control index 60ff7b7..862f494 100644 --- a/debian/control +++ b/debian/control @@ -17,7 +17,6 @@ Depends: curl, python (>= 2.4), whiptail | dialog, ${misc:Depends} -Recommends: lsof Suggests: popularity-contest, xdg-utils, zenity