* import-gnulib.config (gnulib_version): Update to more recent version of gnulib. Import the cloexec module too. * lib/fdleak.c: New file; a module for detecting file descriptor leaks. Also define an open_cloexec() function. * lib/fdleak.h: Declare the global functions defined in fdleak.c. * lib/Makefile.am (libfind_a_SOURCES): Add fdleak.c. * find/find.c (main): Call remember_non_cloexec_fds in order to detect file descriptor leaks. (main): set FD_CLOEXEC on starting_desc. (safely_chdir_lstat): Likewise for dotfd. (safely_chdir_nofollow): Likewise for fd. * find/ftsfind.c (inside_dir): set FD_CLOEXEC on curr_fd. (main): Call remember_non_cloexec_fds in order to detect file descriptor leaks. (main): set FD_CLOEXEC on starting_desc. * find/pred.c (launch): before we exec the child, call complain_about_leaky_fds in order to complain about non-FD_CLOEXEC file descriptors that weren't open when the program was run. * find/sharefile.c (sharefile_fopen): Set FD_CLOEXEC on the files we open. * gnulib-local/lib/save-cwd.c.diff: Patch gnulib to set FD_CLOEXEC on the file descriptors opened by save_cwd. * gnulib-local/modules/save-cwd.diff: Hence the save-cwd module depends on the cloexec module. * import-gnulib.sh (run_gnulib_tool): Pass the --local-dir option in order to apply these patches.
Signed-off-by: James Youngman <[email protected]> --- ChangeLog | 31 ++++ NEWS | 2 + find/find.c | 19 ++- find/ftsfind.c | 37 ++--- find/pred.c | 6 + find/sharefile.c | 2 + find/util.c | 3 + gnulib-local/lib/save-cwd.c.diff | 21 +++ gnulib-local/modules/save-cwd.diff | 15 ++ import-gnulib.config | 3 +- import-gnulib.sh | 2 +- lib/Makefile.am | 2 +- lib/dircallback.c | 2 +- lib/fdleak.c | 346 ++++++++++++++++++++++++++++++++++++ lib/fdleak.h | 23 +++ 15 files changed, 479 insertions(+), 35 deletions(-) create mode 100644 gnulib-local/lib/save-cwd.c.diff create mode 100644 gnulib-local/modules/save-cwd.diff create mode 100644 lib/fdleak.c create mode 100644 lib/fdleak.h diff --git a/ChangeLog b/ChangeLog index 03fffbc..8409fe4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,34 @@ +2010-03-29 James Youngman <[email protected]> + + Fix file descriptor leaks. This fixes Savannah bug bug #27375. + * import-gnulib.config (gnulib_version): Update to more recent + version of gnulib. Import the cloexec module too. + * lib/fdleak.c: New file; a module for detecting file descriptor + leaks. Also define an open_cloexec() function. + * lib/fdleak.h: Declare the global functions defined in fdleak.c. + * lib/Makefile.am (libfind_a_SOURCES): Add fdleak.c. + * find/find.c (main): Call remember_non_cloexec_fds in order to + detect file descriptor leaks. + (main): set FD_CLOEXEC on starting_desc. + (safely_chdir_lstat): Likewise for dotfd. + (safely_chdir_nofollow): Likewise for fd. + * find/ftsfind.c (inside_dir): set FD_CLOEXEC on curr_fd. + (main): Call remember_non_cloexec_fds in order to detect file + descriptor leaks. + (main): set FD_CLOEXEC on starting_desc. + * find/pred.c (launch): before we exec the child, call + complain_about_leaky_fds in order to complain about non-FD_CLOEXEC + file descriptors that weren't open when the program was run. + * find/sharefile.c (sharefile_fopen): Set FD_CLOEXEC on the files + we open. + * gnulib-local/lib/save-cwd.c.diff: Patch gnulib to set FD_CLOEXEC + on the file descriptors opened by save_cwd. + * gnulib-local/modules/save-cwd.diff: Hence the save-cwd module + depends on the cloexec module. + * import-gnulib.sh (run_gnulib_tool): Pass the --local-dir option + in order to apply these patches. + * NEWS: Mention this change. + 2010-02-25 James Youngman <[email protected]> Explain the problems with "-name a/b" and "-path foo/". diff --git a/NEWS b/NEWS index 698d2df..29b78f9 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,8 @@ GNU findutils NEWS - User visible changes. -*- outline -*- (allout) #27846: Assertion failure in xargs.c on AIX. +#27375: Open file descriptors leak into child processes. + #27017: find -D opt / -fstype ext3 -print , -quit coredumps #27328: segfault if the initial exec for "find -exec" fails. diff --git a/find/find.c b/find/find.c index 0623680..d07bcb7 100644 --- a/find/find.c +++ b/find/find.c @@ -51,6 +51,7 @@ #include "quotearg.h" #include "xgetcwd.h" #include "error.h" +#include "fdleak.h" #ifdef HAVE_LOCALE_H #include <locale.h> @@ -130,6 +131,8 @@ main (int argc, char **argv) program_name = argv[0]; state.exit_status = 0; + remember_non_cloexec_fds (); + state.shared_files = sharefile_init("w"); if (NULL == state.shared_files) { @@ -188,11 +191,11 @@ main (int argc, char **argv) } - starting_desc = open (".", O_RDONLY + starting_desc = open_cloexec (".", O_RDONLY #if defined O_LARGEFILE - |O_LARGEFILE + |O_LARGEFILE #endif - ); + ); if (0 <= starting_desc && fchdir (starting_desc) != 0) { close (starting_desc); @@ -205,6 +208,7 @@ main (int argc, char **argv) if (! starting_dir) error (1, errno, _("cannot get current directory")); } + set_stat_placeholders(&starting_stat_buf); if ((*options.xstat) (".", &starting_stat_buf) != 0) error (1, errno, _("cannot stat current directory")); @@ -583,11 +587,11 @@ safely_chdir_lstat(const char *dest, saved_errno = errno = 0; - dotfd = open(".", O_RDONLY + dotfd = open_cloexec (".", O_RDONLY #if defined O_LARGEFILE - |O_LARGEFILE + |O_LARGEFILE #endif - ); + ); /* We jump back to here if wd_sanity_check() * recoverably triggers an alert. @@ -825,6 +829,9 @@ safely_chdir_nofollow(const char *dest, #if defined O_LARGEFILE |O_LARGEFILE #endif +#if defined O_CLOEXEC + |O_CLOEXEC +#endif |extraflags); if (fd < 0) { diff --git a/find/ftsfind.c b/find/ftsfind.c index eb0c8c1..328d03c 100644 --- a/find/ftsfind.c +++ b/find/ftsfind.c @@ -53,6 +53,8 @@ #include "xgetcwd.h" #include "error.h" #include "dircallback.h" +#include "cloexec.h" +#include "fdleak.h" #ifdef HAVE_LOCALE_H #include <locale.h> @@ -74,21 +76,6 @@ #endif -static void set_close_on_exec(int fd) -{ -#if defined F_GETFD && defined FD_CLOEXEC - int flags; - flags = fcntl(fd, F_GETFD); - if (flags >= 0) - { - flags |= FD_CLOEXEC; - fcntl(fd, F_SETFD, flags); - } -#endif -} - - - /* FTS_TIGHT_CYCLE_CHECK tries to work around Savannah bug #17877 * (but actually using it doesn't fix the bug). */ @@ -151,8 +138,7 @@ static void inside_dir(int dir_fd) } else if (dir_fd >= 0) { - curr_fd = dup(dir_fd); - set_close_on_exec(curr_fd); + curr_fd = dup_cloexec (dir_fd); } else { @@ -586,13 +572,12 @@ consider_visiting(FTS *p, FTSENT *ent) static void -find(char *arg) +find (char *arg) { char * arglist[2]; FTS *p; FTSENT *ent; - state.starting_path_length = strlen(arg); inside_dir(AT_FDCWD); @@ -617,7 +602,7 @@ find(char *arg) if (options.stay_on_filesystem) ftsoptions |= FTS_XDEV; - p = fts_open(arglist, ftsoptions, NULL); + p = fts_open (arglist, ftsoptions, NULL); if (NULL == p) { error (0, errno, _("cannot search %s"), @@ -647,7 +632,7 @@ process_all_startpoints(int argc, char *argv[]) for (i = 0; i < argc && !looks_like_expression(argv[i], true); i++) { state.starting_path_length = strlen(argv[i]); /* TODO: is this redundant? */ - find(argv[i]); + find (argv[i]); } if (i == 0) @@ -677,6 +662,9 @@ main (int argc, char **argv) state.execdirs_outstanding = false; state.cwd_dir_fd = AT_FDCWD; + remember_non_cloexec_fds (); + + state.shared_files = sharefile_init("w"); if (NULL == state.shared_files) { @@ -732,11 +720,11 @@ main (int argc, char **argv) } - starting_desc = open (".", O_RDONLY + starting_desc = open_cloexec (".", O_RDONLY #if defined O_LARGEFILE - |O_LARGEFILE + |O_LARGEFILE #endif - ); + ); if (0 <= starting_desc && fchdir (starting_desc) != 0) { close (starting_desc); @@ -749,7 +737,6 @@ main (int argc, char **argv) error (1, errno, _("cannot get current directory")); } - process_all_startpoints(argc-end_of_leading_options, argv+end_of_leading_options); /* If "-exec ... {} +" has been used, there may be some diff --git a/find/pred.c b/find/pred.c index 087d7e2..1511751 100644 --- a/find/pred.c +++ b/find/pred.c @@ -46,6 +46,7 @@ #include "dircallback.h" #include "error.h" #include "verify.h" +#include "fdleak.h" #if ENABLE_NLS # include <libintl.h> @@ -1988,6 +1989,11 @@ launch (struct buildcmd_control *ctl, void *usercontext, int argc, char **argv) { _exit(1); } + else + { + complain_about_leaky_fds (); + } + if (bc_args_exceed_testing_limit (argv)) errno = E2BIG; else diff --git a/find/sharefile.c b/find/sharefile.c index 8b48046..fa329d4 100644 --- a/find/sharefile.c +++ b/find/sharefile.c @@ -28,6 +28,7 @@ #include "stdio-safer.h" #include "hash.h" #include "sharefile.h" +#include "cloexec.h" #include "defs.h" @@ -160,6 +161,7 @@ sharefile_fopen (sharefile_handle h, const char *filename) const int fd = fileno (new_entry->fp); assert (fd >= 0); + set_cloexec_flag (fd, true); if (fstat (fd, &st) < 0) { entry_free (new_entry); diff --git a/find/util.c b/find/util.c index 72220a6..62caeec 100644 --- a/find/util.c +++ b/find/util.c @@ -465,6 +465,9 @@ cleanup (void) if (eval_tree) traverse_tree(eval_tree, undangle_file_pointers); + complain_about_leaky_fds (); + forget_non_cloexec_fds (); + if (fflush (stdout) == EOF) nonfatal_file_error ("standard output"); } diff --git a/gnulib-local/lib/save-cwd.c.diff b/gnulib-local/lib/save-cwd.c.diff new file mode 100644 index 0000000..7cc6ffe --- /dev/null +++ b/gnulib-local/lib/save-cwd.c.diff @@ -0,0 +1,21 @@ +diff --git a/lib/save-cwd.c b/lib/save-cwd.c +index 7394d50..cf43a35 100644 +--- a/lib/save-cwd.c ++++ b/lib/save-cwd.c +@@ -31,6 +31,7 @@ + #include "chdir-long.h" + #include "unistd--.h" + #include "xgetcwd.h" ++#include "cloexec.h" + + #if GNULIB_FCNTL_SAFER + # include "fcntl--.h" +@@ -84,6 +85,7 @@ save_cwd (struct saved_cwd *cwd) + return cwd->name ? 0 : -1; + } + ++ set_cloexec_flag (cwd->desc, true); + return 0; + } + + diff --git a/gnulib-local/modules/save-cwd.diff b/gnulib-local/modules/save-cwd.diff new file mode 100644 index 0000000..f4d8334 --- /dev/null +++ b/gnulib-local/modules/save-cwd.diff @@ -0,0 +1,15 @@ +diff --git a/modules/save-cwd b/modules/save-cwd +index 46a1276..aab5e5e 100644 +--- a/modules/save-cwd ++++ b/modules/save-cwd +@@ -8,0 +8,0 @@ m4/save-cwd.m4 + + Depends-on: + chdir-long ++cloexec + stdbool + unistd-safer + xgetcwd +-- +1.5.6.5 + diff --git a/import-gnulib.config b/import-gnulib.config index 686a645..cb95383 100644 --- a/import-gnulib.config +++ b/import-gnulib.config @@ -1,7 +1,7 @@ # findutils gnulib.config -*- sh -*- # What version of gnulib to use? -gnulib_version="fa073b2cb2db320c5004cde6f473cc8b5fdbd7b3" +gnulib_version="0771f626ab33a517372f630331d1887252521bcf" destdir="gnulib" # Random extra gnulib files needed for findutils. @@ -28,6 +28,7 @@ argmatch assert c-strstr canonicalize +cloexec closein closeout dirname diff --git a/import-gnulib.sh b/import-gnulib.sh index 2200714..1786a75 100755 --- a/import-gnulib.sh +++ b/import-gnulib.sh @@ -131,7 +131,7 @@ run_gnulib_tool() { fi set -x - if "$tool" --import --symlink --with-tests --dir=. --lib=libgnulib --source-base=gnulib/lib --m4-base=gnulib/m4 $modules + if "$tool" --import --symlink --with-tests --dir=. --lib=libgnulib --source-base=gnulib/lib --m4-base=gnulib/m4 --local-dir=gnulib-local $modules then set +x else diff --git a/lib/Makefile.am b/lib/Makefile.am index 879f2ff..ce9f9e8 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -36,7 +36,7 @@ LDADD = ../gnulib/lib/libgnulib.a $(LIBINTL) libfind_a_SOURCES += modetype.h nextelem.h printquoted.h listfile.h \ regextype.h dircallback.h libfind_a_SOURCES += listfile.c nextelem.c extendbuf.c buildcmd.c savedirinfo.c \ - forcefindlib.c qmark.c printquoted.c regextype.c dircallback.c + forcefindlib.c qmark.c printquoted.c regextype.c dircallback.c fdleak.c EXTRA_DIST += waitpid.c forcefindlib.c TESTS_ENVIRONMENT = REGEXPROPS=regexprops$(EXEEXT) diff --git a/lib/dircallback.c b/lib/dircallback.c index aa5ecc5..b7bca0a 100644 --- a/lib/dircallback.c +++ b/lib/dircallback.c @@ -71,7 +71,7 @@ run_in_dir (int dir_fd, int (*callback)(void*), void *usercontext) saved_errno = errno; free_cwd (&saved_cwd); errno = saved_errno; - return -1; + return -1; } err = (*callback)(usercontext); diff --git a/lib/fdleak.c b/lib/fdleak.c new file mode 100644 index 0000000..d211310 --- /dev/null +++ b/lib/fdleak.c @@ -0,0 +1,346 @@ +/* fdleak.c -- detect file descriptor leaks + Copyright (C) 2010, Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <config.h> +#include <unistd.h> +#include <poll.h> +#include <fcntl.h> +#include <sys/resource.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> + +#include "dirent-safer.h" +#include "extendbuf.h" +#include "fdleak.h" +#include "error.h" + +#if ENABLE_NLS +# include <libintl.h> +# define _(Text) gettext (Text) +#else +# define _(Text) Text +#endif +#ifdef gettext_noop +# define N_(String) gettext_noop (String) +#else +/* See locate.c for explanation as to why not use (String) */ +# define N_(String) String +#endif + +/* In order to detect FD leaks, we take a snapshot of the open + * file descriptors which are not FD_CLOEXEC when the program starts. + * When the program exits, we discover if there are any new + * file descriptors which aren't FD_CLOEXEC. + */ +static int *non_cloexec_fds; +static size_t num_cloexec_fds; + + + +/* Determine the value of the largest open fd, on systems that + * offer /proc/self/fd. */ +static int +get_proc_max_fd () +{ + const char *path = "/proc/self/fd"; + int maxfd = -1; + /* We don't use readdir, because we cannot trust pathconf + * to tell us the maximum possible length of a path in + * a given directory (the manpage for readdir_r claims this + * is the approved method, but the manpage for pathconf indicates + * that _PC_NAME_MAX is not an upper limit). */ + DIR *dir = opendir_safer (path); + if (dir) + { + int good = 0; + struct dirent *dent; + + while ((dent=readdir (dir)) != NULL) + { + int fd = -1; + + if (1 == sscanf (dent->d_name, "%d", &fd)) + { + good = 1; + if (fd > maxfd) + maxfd = fd; + } + } + closedir (dir); + if (good) + return maxfd; + } + return -1; +} + + + +/* Estimate the value of the largest possible file descriptor */ +static int +get_max_fd (void) +{ + struct rlimit fd_limit; + long open_max; + + open_max = get_proc_max_fd (); + if (open_max >= 0) + return open_max; + + open_max = sysconf (_SC_OPEN_MAX); + if (open_max == -1) + open_max = _POSIX_OPEN_MAX; /* underestimate */ + + /* There are really only two cases here for the return value, + but we keep the conditions separate because a different thing is + going on in each case. + */ + if (0 == getrlimit (RLIMIT_NOFILE, &fd_limit)) + { + if (fd_limit.rlim_cur == RLIM_SAVED_MAX) + return open_max; + else if (fd_limit.rlim_cur == RLIM_SAVED_CUR) + return open_max; + else if (fd_limit.rlim_cur == RLIM_INFINITY) + return open_max; + else + return (int) fd_limit.rlim_cur; + } + else + { + /* cannot determine the limit's value */ + return open_max; + } +} + + +static int +visit_open_fds(int fd_min, int fd_max, + int (*callback)(int, void*), void *cb_context) +{ + enum { MAX_POLL = 64 }; + struct pollfd pf[MAX_POLL]; + int rv = 0; + + while (fd_min < fd_max) + { + int i; + int limit = fd_max - fd_min; + if (limit > MAX_POLL) + limit = MAX_POLL; + + for (i=0; i<limit; i++) + { + pf[i].events = POLLIN|POLLOUT; + pf[i].revents = 0; + pf[i].fd = fd_min + i; + } + rv = poll(pf, limit, 0); + if (-1 == rv) + { + return -1; + } + else + { + int i; + for (i=0; i<limit; i++) + { + if (pf[i].revents != POLLNVAL) + { + if (0 != (rv = callback (pf[i].fd, cb_context))) + return rv; + } + } + } + fd_min += limit; + } + return 0; +} + +static int +fd_is_cloexec (int fd) +{ + const int flags = fcntl (fd, F_GETFD); + return flags & FD_CLOEXEC; +} + + +/* Faking closures in C is a bit of a pain. */ +struct remember_fd_context +{ + int *buf; + size_t used; + size_t allocated; +}; + + +/* Record FD is it's not FD_CLOEXEC. */ +static int +remember_fd_if_non_cloexec (int fd, void *context) +{ + if (fd_is_cloexec (fd)) + { + return 0; + } + else + { + struct remember_fd_context * const p = context; + void *newbuf = extendbuf (p->buf, + sizeof(p->buf[0])*(p->used+1), + &(p->allocated)); + if (newbuf) + { + p->buf = newbuf; + p->buf[p->used] = fd; + ++p->used; + return 0; + } + else + { + return -1; + } + } +} + +void +remember_non_cloexec_fds (void) +{ + int max_fd = get_max_fd (); + struct remember_fd_context cb_data; + cb_data.buf = NULL; + cb_data.used = cb_data.allocated = 0; + + if (max_fd < INT_MAX) + ++max_fd; + visit_open_fds (3, max_fd, remember_fd_if_non_cloexec, &cb_data); + + non_cloexec_fds = cb_data.buf; + num_cloexec_fds = cb_data.used; +} + + +struct fd_leak_context +{ + const int *prev_buf; + size_t used; + size_t lookup_pos; + int leaked_fd; +}; + +/* FD is open and not close-on-exec. + * If it's not in the list of non-cloexec file descriptors we saw before, it's a leak. + */ +static int +find_first_leak_callback (int fd, void *context) +{ + if (!fd_is_cloexec (fd)) + { + struct fd_leak_context *p = context; + while (p->lookup_pos < p->used) + { + if (p->prev_buf[p->lookup_pos] < fd) + { + ++p->lookup_pos; + } + else if (p->prev_buf[p->lookup_pos] == fd) + { + /* FD was open and still is, it's not a leak. */ + return 0; + } + else + { + break; + } + } + /* We come here if p->prev_buf[p->lookup_pos] > fd, or + if we ran out of items in the lookup table. + Either way, this is a leak. */ + p->leaked_fd = fd; + return -1; /* No more callbacks needed. */ + } + return 0; +} + + +static int +find_first_leaked_fd (const int* prev_non_cloexec_fds, size_t n) +{ + struct fd_leak_context context; + int max_fd = get_max_fd (); + + if (max_fd < INT_MAX) + ++max_fd; + context.prev_buf = prev_non_cloexec_fds; + context.used = n; + context.lookup_pos = 0; + context.leaked_fd = -1; + visit_open_fds (3, max_fd, find_first_leak_callback, &context); + return context.leaked_fd; +} + +int +open_cloexec (const char *path, int flags) +{ + int fd; + + fd = open (path, flags +#if defined O_CLOEXEC + |O_CLOEXEC +#endif + ); + if (fd < 0) + return fd; + +#if !defined O_CLOEXEC + make_fd_cloexec (fd); +#endif + return fd; +} + +void +forget_non_cloexec_fds (void) +{ + free (non_cloexec_fds); + non_cloexec_fds = NULL; + num_cloexec_fds = 0; +} + + +void +complain_about_leaky_fds (void) +{ + const int leaking_fd = find_first_leaked_fd (non_cloexec_fds, num_cloexec_fds); + + if (leaking_fd >= 0) + { + error (0, 0, + _("File descriptor %d will leak; please report this as a bug, " + "remembering to include a detailed description of the simplest " + "way to reproduce this problem."), + leaking_fd); + if (0) + { + char * const args[] = {"/bin/ls", "-l", "/proc/self/fd", + (char*)NULL }; + execv("/bin/ls", args); + perror("exec"); + } + assert (0); + } +} + diff --git a/lib/fdleak.h b/lib/fdleak.h new file mode 100644 index 0000000..47b01cb --- /dev/null +++ b/lib/fdleak.h @@ -0,0 +1,23 @@ +/* fdleak.h -- detect file descriptor leaks + Copyright (C) 2010, Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +void remember_non_cloexec_fds (void); +void forget_non_cloexec_fds (void); +void complain_about_leaky_fds (void); + +int open_cloexec(const char *path, int flags); + -- 1.5.6.5 _______________________________________________ Findutils-patches mailing list [email protected] http://lists.gnu.org/mailman/listinfo/findutils-patches
