2009-11-29 James Youngman <[email protected]> Fix Savannah bug#27328, segfault if the initial exec for "find -exec" fails. * lib/buildcmd.h (struct buildcmd_state): Introduce largest_successful_arg_count and smallest_failed_arg_count in order to avoid an assumption that we understand how the operatingn system interprets ARG_MAX and the assumption that the compile-time ARG_MAX value (or the equivalent from sysconf()) is avtually correct. (struct buildcmd_control): Change the calling convention of the exec callback to allow us to pass the argument list separately. (bc_args_exceed_testing_limit): declare this new funciton. * lib/buildcmd.c (bc_args_complete): New function, NULL-terminates the argv list. We use this instead of passing NULL to bc_push_arg(). (update_limit): New function. Decides how many arguments to pass to the invoked command on the next attempt. (copy_args): Build an argument list containing all the initial arguments plus some of the other args (the number to be used is decided by update_limit). (bc_do_exec): Avoid special-casing the first call to exec. Use update_limit to decide how many arguments to pass and copy_args to build the argument list. The new form of the loop should fix Savannah bug #27328. (bc_push_arg): Drop support for passing NULL as an argument (to terminate the arg list we just pass in a special argument instead). (bc_args_exceed_testing_limit): New function, returns nonzero if the argument list exceeds a testing limit (used for failure injection by tests). (exceeds): New support function, implementing part of bc_args_exceed_testing_limit. (bc_init_state): Initialise largest_successful_arg_count and smallest_failed_arg_count. * find/defs.h (struct exec_val): New member last_child_status. This stores the status of the most recently completed child. * find/pred.c (new_impl_pred_exec): Use last_child_status. (launch): Use the new calling convention for the exec callback. Set last_child_status. Call bc_args_exceed_testing_limit() to do failure injection for unit tests. * find/util.c (do_complete_pending_execdirs): Call bc_do_exec rather than calling launch directly in order to allow for breaking the argument list up if it's too long. * xargs/xargs.c (xargs_do_exec): Update to use the new caling convention for the exec callback.
Signed-off-by: James Youngman <[email protected]> --- ChangeLog | 46 ++++++++ find/defs.h | 4 +- find/pred.c | 59 ++++++----- find/util.c | 5 +- lib/buildcmd.c | 312 ++++++++++++++++++++++++++++++++++++++------------------ lib/buildcmd.h | 7 +- xargs/xargs.c | 22 ++-- 7 files changed, 314 insertions(+), 141 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6c88e3d..7be8db2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,49 @@ +2009-11-29 James Youngman <[email protected]> + + Fix Savannah bug#27328, segfault if the initial exec for "find + -exec" fails. + * lib/buildcmd.h (struct buildcmd_state): Introduce + largest_successful_arg_count and smallest_failed_arg_count in + order to avoid an assumption that we understand how the operatingn + system interprets ARG_MAX and the assumption that the compile-time + ARG_MAX value (or the equivalent from sysconf()) is avtually + correct. + (struct buildcmd_control): Change the calling convention of the + exec callback to allow us to pass the argument list separately. + (bc_args_exceed_testing_limit): declare this new funciton. + * lib/buildcmd.c (bc_args_complete): New function, NULL-terminates + the argv list. We use this instead of passing NULL to + bc_push_arg(). + (update_limit): New function. Decides how many arguments to + pass to the invoked command on the next attempt. + (copy_args): Build an argument list containing all the initial + arguments plus some of the other args (the number to be used is + decided by update_limit). + (bc_do_exec): Avoid special-casing the first call to exec. Use + update_limit to decide how many arguments to pass and copy_args to + build the argument list. The new form of the loop should fix + Savannah bug #27328. + (bc_push_arg): Drop support for passing NULL as an argument (to + terminate the arg list we just pass in a special argument instead). + (bc_args_exceed_testing_limit): New function, returns nonzero if + the argument list exceeds a testing limit (used for failure + injection by tests). + (exceeds): New support function, implementing part of + bc_args_exceed_testing_limit. + (bc_init_state): Initialise largest_successful_arg_count and + smallest_failed_arg_count. + * find/defs.h (struct exec_val): New member last_child_status. + This stores the status of the most recently completed child. + * find/pred.c (new_impl_pred_exec): Use last_child_status. + (launch): Use the new calling convention for the exec callback. + Set last_child_status. Call bc_args_exceed_testing_limit() to do + failure injection for unit tests. + * find/util.c (do_complete_pending_execdirs): Call bc_do_exec + rather than calling launch directly in order to allow for breaking + the argument list up if it's too long. + * xargs/xargs.c (xargs_do_exec): Update to use the new caling + convention for the exec callback. + 2009-11-09 Jim Meyering <[email protected]> Adjust two xargs diagnostics. diff --git a/find/defs.h b/find/defs.h index 155927f..f3bf4e2 100644 --- a/find/defs.h +++ b/find/defs.h @@ -198,6 +198,7 @@ struct exec_val boolean use_current_dir; /* If nonzero, don't chdir to start dir */ boolean close_stdin; /* If true, close stdin in the child. */ int dir_fd; /* The directory to do the exec in. */ + int last_child_status; /* Status of the most recent child. */ }; /* The format string for a -printf or -fprintf is chopped into one or @@ -466,8 +467,7 @@ PREDICATEFUNCTION pred_xtype; -int launch PARAMS((struct buildcmd_control *ctl, - struct buildcmd_state *buildstate)); +int launch (struct buildcmd_control *ctl, void *usercontext, int argc, char **argv); char *find_pred_name PARAMS((PRED_FUNC pred_func)); diff --git a/find/pred.c b/find/pred.c index 1b95959..4e59642 100644 --- a/find/pred.c +++ b/find/pred.c @@ -546,8 +546,18 @@ new_impl_pred_exec (int dir_fd, const char *pathname, } /* Actually invoke the command. */ - return execp->ctl.exec_callback(&execp->ctl, - &execp->state); + bc_do_exec (&execp->ctl, &execp->state); + if (WIFEXITED(execp->last_child_status)) + { + if (0 == WEXITSTATUS(execp->last_child_status)) + return true; /* The child succeeded. */ + else + return false; + } + else + { + return false; + } } } @@ -1940,14 +1950,15 @@ prep_child_for_exec (boolean close_stdin, int dir_fd) + + + int -launch (struct buildcmd_control *ctl, - struct buildcmd_state *buildstate) +launch (struct buildcmd_control *ctl, void *usercontext, int argc, char **argv) { - int wait_status; pid_t child_pid; static int first_time = 1; - const struct exec_val *execp = buildstate->usercontext; + struct exec_val *execp = usercontext; if (!execp->use_current_dir) { @@ -1955,10 +1966,6 @@ launch (struct buildcmd_control *ctl, assert (execp->dir_fd == starting_desc); } - - /* Null terminate the arg list. */ - bc_push_arg (ctl, buildstate, (char *) NULL, 0, NULL, 0, false); - /* Make sure output of command doesn't get mixed with find output. */ fflush (stdout); fflush (stderr); @@ -1981,35 +1988,32 @@ launch (struct buildcmd_control *ctl, { _exit(1); } - - execvp (buildstate->cmd_argv[0], buildstate->cmd_argv); + if (bc_args_exceed_testing_limit (argv)) + errno = E2BIG; + else + execvp (argv[0], argv); + /* TODO: use a pipe to pass back the errno value, like xargs does */ error (0, errno, "%s", - safely_quote_err_filename(0, buildstate->cmd_argv[0])); + safely_quote_err_filename(0, argv[0])); _exit (1); } - - /* In parent; set up for next time. */ - bc_clear_args(ctl, buildstate); - - - while (waitpid (child_pid, &wait_status, 0) == (pid_t) -1) + while (waitpid (child_pid, &(execp->last_child_status), 0) == (pid_t) -1) { if (errno != EINTR) { error (0, errno, _("error waiting for %s"), - safely_quote_err_filename(0, buildstate->cmd_argv[0])); + safely_quote_err_filename(0, argv[0])); state.exit_status = 1; return 0; /* FAIL */ } } - if (WIFSIGNALED (wait_status)) + if (WIFSIGNALED (execp->last_child_status)) { error (0, 0, _("%s terminated by signal %d"), - quotearg_n_style(0, options.err_quoting_style, - buildstate->cmd_argv[0]), - WTERMSIG (wait_status)); + quotearg_n_style(0, options.err_quoting_style, argv[0]), + WTERMSIG (execp->last_child_status)); if (execp->multiple) { @@ -2023,7 +2027,7 @@ launch (struct buildcmd_control *ctl, return 1; /* OK */ } - if (0 == WEXITSTATUS (wait_status)) + if (0 == WEXITSTATUS (execp->last_child_status)) { return 1; /* OK */ } @@ -2037,7 +2041,10 @@ launch (struct buildcmd_control *ctl, */ state.exit_status = 1; } - return 0; /* FAIL */ + /* The child failed, but this is the exec callback. We + * don't want to run the child again in this case anwyay. + */ + return 1; /* FAIL (but don't try again) */ } } diff --git a/find/util.c b/find/util.c index 4a0bb9b..585d56e 100644 --- a/find/util.c +++ b/find/util.c @@ -360,7 +360,7 @@ do_complete_pending_execdirs(struct predicate *p, int dir_fd) if (execp->state.todo) { /* There are not-yet-executed arguments. */ - launch (&execp->ctl, &execp->state); + bc_do_exec (&execp->ctl, &execp->state); } } } @@ -408,7 +408,7 @@ complete_pending_execs(struct predicate *p) if (execp->state.todo) { /* There are not-yet-executed arguments. */ - launch (&execp->ctl, &execp->state); + bc_do_exec (&execp->ctl, &execp->state); } } @@ -800,6 +800,7 @@ process_debug_options(char *arg) exit(0); } } + static void process_optimisation_option(const char *arg) diff --git a/lib/buildcmd.c b/lib/buildcmd.c index fd28d2d..3f35ab4 100644 --- a/lib/buildcmd.c +++ b/lib/buildcmd.c @@ -71,14 +71,28 @@ #include <xalloc.h> +#include <errno.h> #include <error.h> #include <openat.h> +#include "xstrtol.h" #include "buildcmd.h" extern char **environ; +static char *special_terminating_arg = "do_not_care"; + + + +/* Add a terminator to the argument list. */ +static void +bc_args_complete (struct buildcmd_control *ctl, + struct buildcmd_state *state) +{ + bc_push_arg (ctl, state, special_terminating_arg, 0, NULL, 0, 0); +} + /* Replace all instances of `replace_pat' in ARG with `linebuf', and add the resulting string to the list of arguments for the command @@ -168,105 +182,152 @@ bc_do_insert (struct buildcmd_control *ctl, initial_args); } -/* get the length of a zero-terminated string array */ -static unsigned int -get_stringv_len(char** sv) -{ - char** p = sv; - - while (*p++); - - return (p - sv - 1); -} - -void -bc_do_exec(struct buildcmd_control *ctl, - struct buildcmd_state *state) +/* Update our best guess as to how many arguments we should pass to the next + * invocation of the command. + */ +static size_t +update_limit (struct buildcmd_control *ctl, + struct buildcmd_state *state, + bool success, + size_t limit) { - int argc_orig; - char** argv_orig; - char** initial_args; - int i, pos, done, argc_current; - - bc_push_arg (ctl, state, - (char *) NULL, 0, - NULL, 0, - false); /* Null terminate the arg list. */ - - /* save original argv data so we can free the memory later */ - argc_orig = state->cmd_argc; - argv_orig = state->cmd_argv; - - if ((ctl->exec_callback)(ctl, state)) - goto fin; + if (success) + { + if (limit > state->largest_successful_arg_count) + state->largest_successful_arg_count = limit; + } + else + { + if (limit < state->smallest_failed_arg_count + || (0 == state->smallest_failed_arg_count)) + state->smallest_failed_arg_count = limit; + } - /* got E2BIG, adjust arguments */ - initial_args = xmalloc(ctl->initial_argc * sizeof(char*)); - for (i=0; i<ctl->initial_argc; ++i) - initial_args[i] = argv_orig[i]; + if (0 == (state->largest_successful_arg_count) + || (state->smallest_failed_arg_count <= state->largest_successful_arg_count)) + { + /* No success yet, or running on a system which has + limits on total argv length, but not arg count. */ + if (success) + { + if (limit < SIZE_MAX) + ++limit; + } + else + { + limit /= 2; + } + } + else /* We can use bisection. */ + { + const size_t shift = (state->smallest_failed_arg_count + - state->largest_successful_arg_count) / 2; + if (success) + { + if (shift) + limit += shift; + else + ++limit; + } + else + { + if (shift) + limit -= shift; + else + --limit; + } + } - state->cmd_argc -= ctl->initial_argc; - state->cmd_argv += ctl->initial_argc; + /* Make sure the returned value is such that progress is + * actually possible. + */ + if (ctl->initial_argc && (limit <= ctl->initial_argc + 1u)) + limit = ctl->initial_argc + 1u; + if (0 == limit) + limit = 1u; + return limit; +} - done = 0; /* number of argv elements we have relayed successfully */ - argc_current = state->cmd_argc; - pos = -1; /* array offset from the right end */ +/* Copy some of the program arguments into an argv list. Copy all the + * initial arguments, plus up to LIMIT additional arguments. + */ +static size_t +copy_args (struct buildcmd_control *ctl, + struct buildcmd_state *state, + char** working_args, size_t limit, size_t done) +{ + size_t dst_pos = 0; + size_t src_pos = 0; - for (;;) + while (src_pos < ctl->initial_argc) { - int r; - int divider = argc_current+pos; - char* swapped_out = state->cmd_argv[divider]; - state->cmd_argv[divider] = NULL; - state->cmd_argc = get_stringv_len (state->cmd_argv); - - if (state->cmd_argc < 1) - error(1, 0, "can't call exec() due to argument size restrictions"); - - /* prepend initial args */ - state->cmd_argv -= ctl->initial_argc; - state->cmd_argc += ctl->initial_argc; - for (i=0; i<ctl->initial_argc; ++i) - state->cmd_argv[i] = initial_args[i]; - - ++state->cmd_argc; /* include trailing NULL */ - r = (ctl->exec_callback)(ctl, state); - state->cmd_argv += ctl->initial_argc; - state->cmd_argc -= ctl->initial_argc; - --state->cmd_argc; /* exclude trailing NULL again */ - - if (r) - { - /* success */ - done += state->cmd_argc; - - /* check whether we have any left */ - if (argc_orig - done > ctl->initial_argc) - { - state->cmd_argv[divider] = swapped_out; - state->cmd_argv += divider; - - argc_current -= state->cmd_argc; - pos = -1; - } - else - goto fin; - } - else - { - /* failure, make smaller */ - state->cmd_argv[divider] = swapped_out; - --pos; - } + working_args[dst_pos++] = state->cmd_argv[src_pos++]; } + src_pos += done; + while (src_pos < state->cmd_argc && dst_pos < limit) + { + working_args[dst_pos++] = state->cmd_argv[src_pos++]; + } + assert (dst_pos >= ctl->initial_argc); + working_args[dst_pos] = NULL; + return dst_pos; +} + -fin: - state->cmd_argv = argv_orig; +/* Execute the program with the currently-built list of arguments. */ +void +bc_do_exec(struct buildcmd_control *ctl, + struct buildcmd_state *state) +{ + char** working_args; + size_t limit, done; + + /* Terminate the args. */ + bc_args_complete (ctl, state); + /* Verify that the argument list is terminated. */ + assert (state->cmd_argc > 0); + assert (state->cmd_argv[state->cmd_argc-1] == NULL); + + working_args = xmalloc((1+state->cmd_argc) * sizeof(char*)); + done = 0; + limit = state->cmd_argc; + + do + { + const size_t dst_pos = copy_args (ctl, state, working_args, + limit, done); + if (ctl->exec_callback (ctl, state->usercontext, dst_pos, working_args)) + { + limit = update_limit (ctl, state, true, limit); + done += (dst_pos - ctl->initial_argc); + } + else /* got E2BIG, adjust arguments */ + { + if (limit <= ctl->initial_argc + 1) + { + /* No room to reduce the length of the argument list. + Issue an error message and give up. */ + error(1, 0, + _("can't call exec() due to argument size restrictions")); + } + else + { + /* Try fewer arguments. */ + limit = update_limit (ctl, state, false, limit); + } + } + } + while ((done + 1) < (state->cmd_argc - ctl->initial_argc)); + /* (state->cmd_argc - ctl->initial_argc) includes the terminating NULL, + * which is why we add 1 to done in the test above. */ + + if (working_args) + free (working_args); bc_clear_args(ctl, state); } @@ -301,11 +362,6 @@ bc_argc_limit_reached(int initial_args, LEN is the length of ARG, including the terminating null. If this brings the list up to its maximum size, execute the command. */ -/* XXX: sometimes this function is called (internally) - * just to push a NULL onto the and of the arg list. - * We should probably do that with a separate function - * for greater clarity. - */ void bc_push_arg (struct buildcmd_control *ctl, struct buildcmd_state *state, @@ -313,12 +369,16 @@ bc_push_arg (struct buildcmd_control *ctl, const char *prefix, size_t pfxlen, int initial_args) { + const int terminate = (arg == special_terminating_arg); + + assert (arg != NULL); + if (!initial_args) { state->todo = 1; } - if (arg) + if (!terminate) { if (state->cmd_argv_chars + len > ctl->arg_max) { @@ -351,7 +411,7 @@ bc_push_arg (struct buildcmd_control *ctl, } } - if (!arg) + if (terminate) state->cmd_argv[state->cmd_argc++] = NULL; else { @@ -366,12 +426,12 @@ bc_push_arg (struct buildcmd_control *ctl, state->cmd_argv_chars += len; /* If we have now collected enough arguments, - * do the exec immediately. This must be - * conditional on arg!=NULL, since do_exec() - * actually calls bc_push_arg(ctl, state, NULL, 0, false). + * do the exec immediately. */ if (bc_argc_limit_reached(initial_args, ctl, state)) - bc_do_exec (ctl, state); + { + bc_do_exec (ctl, state); + } } /* If this is an initial argument, set the high-water mark. */ @@ -450,12 +510,17 @@ bc_get_arg_max(void) } -static int cb_exec_noop(struct buildcmd_control *ctl, - struct buildcmd_state *state) +static int +cb_exec_noop(struct buildcmd_control * ctl, + void *usercontext, + int argc, + char **argv) { /* does nothing. */ (void) ctl; - (void) state; + (void) usercontext; + (void) argc; + (void) argv; return 0; } @@ -558,6 +623,8 @@ bc_init_state(const struct buildcmd_control *ctl, state->cmd_argv_chars = 0; state->cmd_argv = NULL; state->cmd_argv_alloc = 0; + state->largest_successful_arg_count = 0; + state->smallest_failed_arg_count = 0; /* XXX: the following memory allocation is inadvisable on systems * with no ARG_MAX, because ctl->arg_max may actually be close to @@ -582,3 +649,48 @@ bc_clear_args(const struct buildcmd_control *ctl, state->todo = 0; state->dir_fd = -1; } + + +/* Return nonzero if the value stored in the environment variable ENV_VAR_NAME + * exceeds QUANTITY. + */ +static int +exceeds (const char *env_var_name, size_t quantity) +{ + const char *val = getenv (env_var_name); + if (val) + { + char *tmp; + unsigned long limit; + + if (xstrtoul (val, &tmp, 10, &limit, NULL) == LONGINT_OK) + { + if (quantity > limit) + return 1; + } + else + { + error (1, errno, "Environment variable %s " + "is not set to a valid decimal number", + env_var_name); + return 0; + } + } + return 0; +} + +/* Return nonzero if the indicated argument list exceeds a testing limit. */ +int +bc_args_exceed_testing_limit (const char **argv) +{ + size_t chars, args; + + for (chars=args=0; *argv; ++argv) + { + ++args; + chars += strlen(*argv); + } + + return (exceeds ("__GNU_FINDUTILS_EXEC_ARG_COUNT_LIMIT", args) || + exceeds ("__GNU_FINDUTILS_EXEC_ARG_LENGTH_LIMIT", chars)); +} diff --git a/lib/buildcmd.h b/lib/buildcmd.h index 8f37a7e..3828cf1 100644 --- a/lib/buildcmd.h +++ b/lib/buildcmd.h @@ -48,6 +48,10 @@ struct buildcmd_state /* Directory in which to perform the exec. */ int dir_fd; + + /* Summary of what we think the argv limits are. */ + int largest_successful_arg_count; + int smallest_failed_arg_count; }; struct buildcmd_control @@ -87,7 +91,7 @@ struct buildcmd_control int initial_argc; /* 0 */ /* exec callback. */ - int (*exec_callback)(struct buildcmd_control *, struct buildcmd_state *); + int (*exec_callback)(struct buildcmd_control *, void *usercontext, int argc, char **argv); /* If nonzero, the maximum number of nonblank lines from stdin to use per command line. */ @@ -132,6 +136,7 @@ extern size_t bc_get_arg_max(void); extern void bc_use_sensible_arg_max(struct buildcmd_control *ctl); extern void bc_clear_args(const struct buildcmd_control *ctl, struct buildcmd_state *state); +extern int bc_args_exceed_testing_limit(const char **argv); #endif diff --git a/xargs/xargs.c b/xargs/xargs.c index 4bf61d2..4d5ad11 100644 --- a/xargs/xargs.c +++ b/xargs/xargs.c @@ -236,7 +236,7 @@ static int read_line PARAMS ((void)); static int read_string PARAMS ((void)); static boolean print_args PARAMS ((boolean ask)); /* static void do_exec PARAMS ((void)); */ -static int xargs_do_exec (struct buildcmd_control *cl, struct buildcmd_state *state); +static int xargs_do_exec (struct buildcmd_control *ctl, void *usercontext, int argc, char **argv); static void exec_if_possible PARAMS ((void)); static void add_proc PARAMS ((pid_t pid)); static void wait_for_proc PARAMS ((boolean all, unsigned int minreap)); @@ -1057,7 +1057,7 @@ prep_child_for_exec (void) as an atexit() function. */ static int -xargs_do_exec (struct buildcmd_control *ctl, struct buildcmd_state *state) +xargs_do_exec (struct buildcmd_control *ctl, void *usercontext, int argc, char **argv) { pid_t child; int fd[2]; @@ -1065,7 +1065,6 @@ xargs_do_exec (struct buildcmd_control *ctl, struct buildcmd_state *state) int r; (void) ctl; - (void) state; if (!query_before_executing || print_args (true)) { @@ -1110,7 +1109,10 @@ xargs_do_exec (struct buildcmd_control *ctl, struct buildcmd_state *state) prep_child_for_exec(); - execvp (bc_state.cmd_argv[0], bc_state.cmd_argv); + if (bc_args_exceed_testing_limit (argv)) + errno = E2BIG; + else + execvp (argv[0], argv); if (errno) { /* Write errno value to parent. We do this even if @@ -1131,13 +1133,13 @@ xargs_do_exec (struct buildcmd_control *ctl, struct buildcmd_state *state) close(fd[1]); if (E2BIG != errno) { - error (0, errno, "%s", bc_state.cmd_argv[0]); - /* The actual value returned here should be irrelevant, - * because the parent will test our value of errno. - */ - _exit (errno == ENOENT ? 127 : 126); - + error (0, errno, "%s", argv[0]); } + /* The actual value returned here should be irrelevant, + * because the parent will test our value of errno. + */ + _exit (errno == ENOENT ? 127 : 126); + /*NOTREACHED*/ } /* child */ -- 1.5.6.5
