diff -x CVS -urpN 2009-01-17/ChangeLog 2009-01-18/ChangeLog --- 2009-01-17/ChangeLog 2009-01-14 15:29:45.000000000 +0100 +++ 2009-01-18/ChangeLog 2009-01-17 02:52:54.000000000 +0100 @@ -1,3 +1,47 @@ +2009-01-17 Denys Vlasenko <[email protected]> + + Two cleanups: tcb table expansion failure is not really a survivable + event, we do not have any viable way to continue. No wonder most + places where that is detected have FIXMEs. + It's way simpler to treat as fatal failure, and handle it inside + tcb table expansion finctions. + Second cleanup: tidy up haphazard locations of a few externs. + + * defs.h: Change return type of expand_tcbtab() to void. + Declare change_syscall(). + * process.c: Change all callsites of alloctcb(), alloc_tcb() and + fork_tcb(), removing now-redundant error checks. + (fork_tcb): Change return type to void - it can't fail now. + * strace.c: Move extern declarations out of function bodies. + Change all callsites of alloctcb(), alloc_tcb() and + fork_tcb(), removing now-redundant error checks. + (expand_tcbtab): Change return type to void - it can't fail now. + On failure to expand, print a message, clean up, and exit. + (alloc_tcb): On failure to expand, print a message, clean up, and exit. + * util.c (setbpt): Remove extern declaration from function body. + +2009-01-17 Denys Vlasenko <[email protected]> + + * defs.h: Update a comment. No code changes. + * strace.c (handle_stopped_tcbs): Discard all execve stops + and clear TCB_WAITEXECVE bit. + * syscall.c (get_scno): Add the code to not mistakenly + treat ptrace stop as execve stop (execve stops can be blocked + by traced program). + Fixes RH#477775 "strace hangs if the target process blocks SIGTRAP". + +2009-01-17 Denys Vlasenko <[email protected]> + + * process.c: Add a comment. No code changes. + * strace.c (collect_stopped_tcbs): Stop reversing list of stopped + tcp's. I'm not totally convinced it is crucial, but this is surely + fits the concept of "least surprise". + Do not collect TCB_SUSPENDED tcp's (this is closer to how + it was before). + (handle_stopped_tcbs): Remove the code to reject TCB_SUSPENDED tcp's, + it's done earlier now. In an unobvious way, this was causing + SIGSTOPs from freshly attached children to be misinterpreted. + 2009-01-14 Denys Vlasenko <[email protected]> * linux/bfin/syscallent.h: sys_futex has 6 parameters, not 5. diff -x CVS -urpN 2009-01-17/defs.h 2009-01-18/defs.h --- 2009-01-17/defs.h 2009-01-13 19:30:55.000000000 +0100 +++ 2009-01-18/defs.h 2009-01-17 02:52:54.000000000 +0100 @@ -26,7 +26,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Id: defs.h,v 1.94 2009/01/13 18:30:55 vda_linux Exp $ + * $Id: defs.h,v 1.96 2009/01/17 01:52:54 vda_linux Exp $ */ #ifdef HAVE_CONFIG_H @@ -363,9 +363,17 @@ struct tcb { #define TCB_FOLLOWFORK 00400 /* Process should have forks followed */ #define TCB_REPRINT 01000 /* We should reprint this syscall on exit */ #ifdef LINUX -/* x86 does not need TCB_WAITEXECVE. +/* TCB_WAITEXECVE bit means "ignore next SIGTRAP, it's execve exit stop". + * it is not reliable if traced program masks SIGTRAP. + * + * x86 does not need TCB_WAITEXECVE. * It can detect execve's SIGTRAP by looking at eax/rax. * See "stray syscall exit: eax = " message in syscall_fixup(). + * + * Note that on newer kernels, we use ptrace options and therefore + * can filter out execve stops reliably on any architecture, + * without using TCB_WAITEXECVE flag. + * I guess we can remove it from the source somewhere around year 2010 :) */ # if defined(ALPHA) || defined(SPARC) || defined(SPARC64) || defined(POWERPC) || defined(IA64) || defined(HPPA) || defined(SH) || defined(SH64) || defined(S390) || defined(S390X) || defined(ARM) || defined(MIPS) || defined(BFIN) # define TCB_WAITEXECVE 02000 /* ignore SIGTRAP after exceve */ @@ -473,7 +481,7 @@ extern const char *xlookup P((const stru extern struct tcb *alloc_tcb P((int, int)); extern struct tcb *pid2tcb P((int)); extern void droptcb P((struct tcb *)); -extern int expand_tcbtab P((void)); +extern void expand_tcbtab P((void)); #define alloctcb(pid) alloc_tcb((pid), 1) @@ -527,6 +535,7 @@ extern void tprint_iov P((struct tcb *, extern void tprint_open_modes P((struct tcb *, mode_t)); extern int is_restart_error P((struct tcb *)); +extern int change_syscall P((struct tcb *, int)); #ifdef LINUX extern int internal_clone P((struct tcb *)); #endif diff -x CVS -urpN 2009-01-17/process.c 2009-01-18/process.c --- 2009-01-17/process.c 2009-01-13 19:30:55.000000000 +0100 +++ 2009-01-18/process.c 2009-01-17 02:52:54.000000000 +0100 @@ -34,7 +34,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Id: process.c,v 1.123 2009/01/13 18:30:55 vda_linux Exp $ + * $Id: process.c,v 1.125 2009/01/17 01:52:54 vda_linux Exp $ */ #include "defs.h" @@ -487,25 +487,19 @@ struct tcb *tcp; /* TCP is creating a child we want to follow. If there will be space in tcbtab for it, set TCB_FOLLOWFORK and return 0. If not, clear TCB_FOLLOWFORK, print an error, and return 1. */ -static int +static void fork_tcb(struct tcb *tcp) { - if (nprocs == tcbtabsize) { - if (expand_tcbtab()) { - tcp->flags &= ~TCB_FOLLOWFORK; - return 1; - } - } + if (nprocs == tcbtabsize) + expand_tcbtab(); tcp->flags |= TCB_FOLLOWFORK; - return 0; } #ifdef USE_PROCFS int -sys_fork(tcp) -struct tcb *tcp; +sys_fork(struct tcb *tcp) { if (exiting(tcp) && !syserror(tcp)) { if (getrval2(tcp)) { @@ -551,12 +545,10 @@ struct tcb *tcp; return 0; if (!followfork) return 0; - if (fork_tcb(tcp)) - return 0; + fork_tcb(tcp); if (syserror(tcp)) return 0; - if ((tcpchild = alloctcb(tcp->u_rval)) == NULL) - return 0; + tcpchild = alloctcb(tcp->u_rval); if (proc_open(tcpchild, 2) < 0) droptcb(tcpchild); } @@ -706,9 +698,7 @@ struct tcb *tcp; } int -change_syscall(tcp, new) -struct tcb *tcp; -int new; +change_syscall(struct tcb *tcp, int new) { #if defined(LINUX) #if defined(I386) @@ -758,9 +748,12 @@ int new; #elif defined(IA64) if (ia32) { switch (new) { - case 2: break; /* x86 SYS_fork */ - case SYS_clone: new = 120; break; - default: + case 2: + break; /* x86 SYS_fork */ + case SYS_clone: + new = 120; + break; + default: fprintf(stderr, "%s: unexpected syscall %d\n", __FUNCTION__, new); return -1; @@ -892,8 +885,7 @@ struct tcb *tcp; if (entering(tcp)) { if (!followfork) return 0; - if (fork_tcb(tcp)) - return 0; + fork_tcb(tcp); if (setbpt(tcp) < 0) return 0; } else { @@ -924,12 +916,8 @@ struct tcb *tcp; } else #endif - if (fork_tcb(tcp) || (tcpchild = alloctcb(pid)) == NULL) { - if (bpt) - clearbpt(tcp); - kill(pid, SIGKILL); /* XXX */ - return 0; - } + fork_tcb(tcp); + tcpchild = alloctcb(pid); #ifndef CLONE_PTRACE /* Attach to the new child */ @@ -963,6 +951,9 @@ struct tcb *tcp; clearbpt(tcpchild); tcpchild->flags &= ~(TCB_SUSPENDED|TCB_STARTUP); + /* TCB_SUSPENDED tasks are not collected by waitpid + * loop, and left stopped. Restart it: + */ if (ptrace_restart(PTRACE_SYSCALL, tcpchild, 0) < 0) return -1; @@ -1039,8 +1030,7 @@ struct tcb *tcp; if (entering(tcp)) { if (!followfork || dont_follow) return 0; - if (fork_tcb(tcp)) - return 0; + fork_tcb(tcp); if (setbpt(tcp) < 0) return 0; } @@ -1056,10 +1046,8 @@ struct tcb *tcp; return 0; pid = tcp->u_rval; - if (fork_tcb(tcp) || (tcpchild = alloctcb(pid)) == NULL) { - kill(pid, SIGKILL); /* XXX */ - return 0; - } + fork_tcb(tcp); + tcpchild = alloctcb(pid); #ifdef LINUX #ifdef HPPA /* The child must have run before it can be attached. */ diff -x CVS -urpN 2009-01-17/strace.c 2009-01-18/strace.c --- 2009-01-17/strace.c 2009-01-13 19:30:55.000000000 +0100 +++ 2009-01-18/strace.c 2009-01-17 02:52:54.000000000 +0100 @@ -27,7 +27,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Id: strace.c,v 1.99 2009/01/13 18:30:55 vda_linux Exp $ + * $Id: strace.c,v 1.102 2009/01/17 01:52:54 vda_linux Exp $ */ #include "defs.h" @@ -78,6 +78,8 @@ #endif #endif extern char **environ; +extern int optind; +extern char *optarg; int debug = 0, followfork = 0; @@ -436,13 +438,7 @@ startup_attach(void) (char *) 1, 0) < 0) ++nerr; else if (tid != tcbtab[tcbi]->pid) { - if (nprocs == tcbtabsize && - expand_tcbtab()) - tcp = NULL; - else - tcp = alloctcb(tid); - if (tcp == NULL) - exit(1); + tcp = alloctcb(tid); tcp->flags |= TCB_ATTACHED|TCB_CLONE_THREAD|TCB_CLONE_DETACHED|TCB_FOLLOWFORK; tcbtab[tcbi]->nchildren++; tcbtab[tcbi]->nclone_threads++; @@ -675,10 +671,6 @@ startup_child (char **argv) /* We are the tracer. */ tcp = alloctcb(daemonized_tracer ? getppid() : pid); - if (tcp == NULL) { - cleanup(); - exit(1); - } if (daemonized_tracer) { /* We want subsequent startup_attach() to attach to it. */ tcp->flags |= TCB_ATTACHED; @@ -695,8 +687,6 @@ startup_child (char **argv) int main(int argc, char *argv[]) { - extern int optind; - extern char *optarg; struct tcb *tcp; int c, pid = 0; int optF = 0; @@ -708,11 +698,11 @@ main(int argc, char *argv[]) /* Allocate the initial tcbtab. */ tcbtabsize = argc; /* Surely enough for all -p args. */ - if ((tcbtab = calloc (tcbtabsize, sizeof tcbtab[0])) == NULL) { + if ((tcbtab = calloc(tcbtabsize, sizeof tcbtab[0])) == NULL) { fprintf(stderr, "%s: out of memory\n", progname); exit(1); } - if ((tcbtab[0] = calloc (tcbtabsize, sizeof tcbtab[0][0])) == NULL) { + if ((tcbtab[0] = calloc(tcbtabsize, sizeof tcbtab[0][0])) == NULL) { fprintf(stderr, "%s: out of memory\n", progname); exit(1); } @@ -807,11 +797,7 @@ main(int argc, char *argv[]) fprintf(stderr, "%s: I'm sorry, I can't let you do that, Dave.\n", progname); break; } - if ((tcp = alloc_tcb(pid, 0)) == NULL) { - fprintf(stderr, "%s: out of memory\n", - progname); - exit(1); - } + tcp = alloc_tcb(pid, 0); tcp->flags |= TCB_ATTACHED; pflag_seen++; break; @@ -979,8 +965,8 @@ main(int argc, char *argv[]) exit(exit_code); } -int -expand_tcbtab() +void +expand_tcbtab(void) { /* Allocate some more TCBs and expand the table. We don't want to relocate the TCBs because our @@ -993,27 +979,26 @@ expand_tcbtab() sizeof *newtcbs); int i; if (newtab == NULL || newtcbs == NULL) { - if (newtab != NULL) - free(newtab); fprintf(stderr, "%s: expand_tcbtab: out of memory\n", progname); - return 1; + cleanup(); + exit(1); } for (i = tcbtabsize; i < 2 * tcbtabsize; ++i) newtab[i] = &newtcbs[i - tcbtabsize]; tcbtabsize *= 2; tcbtab = newtab; - - return 0; } - struct tcb * alloc_tcb(int pid, int command_options_parsed) { int i; struct tcb *tcp; + if (nprocs == tcbtabsize) + expand_tcbtab(); + for (i = 0; i < tcbtabsize; i++) { tcp = tcbtab[i]; if ((tcp->flags & TCB_INUSE) == 0) { @@ -1036,15 +1021,14 @@ alloc_tcb(int pid, int command_options_p return tcp; } } - fprintf(stderr, "%s: alloc_tcb: tcb table full\n", progname); - return NULL; + fprintf(stderr, "%s: bug in alloc_tcb\n", progname); + cleanup(); + exit(1); } #ifdef USE_PROCFS int -proc_open(tcp, attaching) -struct tcb *tcp; -int attaching; +proc_open(struct tcb *tcp, int attaching) { char proc[32]; long arg; @@ -2279,7 +2263,8 @@ collect_stopped_tcbs(void) struct rusage ru; struct rusage* ru_ptr = cflag ? &ru : NULL; int wnohang = 0; - struct tcb *found_tcps = NULL; + struct tcb *found_tcps; + struct tcb **nextp = &found_tcps; #ifdef __WALL int wait4_options = __WALL; #endif @@ -2368,15 +2353,7 @@ collect_stopped_tcbs(void) will we have the association of parent and child so that we know how to do clearbpt in the child. */ - if (nprocs == tcbtabsize && - expand_tcbtab()) - tcp = NULL; - else - tcp = alloctcb(pid); - if (tcp == NULL) { - kill(pid, SIGKILL); /* XXX */ - return NULL; - } + tcp = alloctcb(pid); tcp->flags |= TCB_ATTACHED | TCB_SUSPENDED; if (!qflag) fprintf(stderr, "\ @@ -2401,10 +2378,28 @@ Process %d attached (waiting for parent) tcp->stime = ru.ru_stime; #endif } + if (tcp->flags & TCB_SUSPENDED) { + /* + * Apparently, doing any ptrace() call on a stopped + * process, provokes the kernel to report the process + * status again on a subsequent wait(), even if the + * process has not been actually restarted. + * Since we have inspected the arguments of suspended + * processes we end up here testing for this case. + */ + continue; + } + tcp->wait_status = status; #ifdef LINUX - tcp->next_need_service = found_tcps; - found_tcps = tcp; + /* It is important to not invert the order of tasks + * to process. For one, alloc_tcb() above picks newly forked + * threads in some order, processing of them and their parent + * should be in the same order, otherwise bad things happen + * (misinterpreted SIGSTOPs and such). + */ + *nextp = tcp; + nextp = &tcp->next_need_service; wnohang = WNOHANG; #endif #ifdef SUNOS4 @@ -2413,9 +2408,10 @@ Process %d attached (waiting for parent) */ break; #endif - } /* while (1) - collecting all stopped/exited tracees */ + } - return tcp; + *nextp = NULL; + return found_tcps; } static int @@ -2425,18 +2421,6 @@ handle_stopped_tcbs(struct tcb *tcp) int pid; int status; - if (tcp->flags & TCB_SUSPENDED) { - /* - * Apparently, doing any ptrace() call on a stopped - * process, provokes the kernel to report the process - * status again on a subsequent wait(), even if the - * process has not been actually restarted. - * Since we have inspected the arguments of suspended - * processes we end up here testing for this case. - */ - continue; - } - outf = tcp->outf; status = tcp->wait_status; pid = tcp->pid; @@ -2568,12 +2552,17 @@ handle_stopped_tcbs(struct tcb *tcp) * but be paranoid about it. */ if (((unsigned)status >> 16) == PTRACE_EVENT_EXEC) { - /* It's post-exec ptrace stop. */ - /* Set WSTOPSIG(status) = (SIGTRAP | 0x80). */ - status |= 0x8000; + /* It's post-exec ptrace stop. Ignore it, + * we will get syscall exit ptrace stop later. + */ +#ifdef TCB_WAITEXECVE + tcp->flags &= ~TCB_WAITEXECVE; +#endif + goto tracing; } else { /* Take a better look... */ siginfo_t si; + si.si_signo = 0; ptrace(PTRACE_GETSIGINFO, pid, (void*) 0, (void*) &si); /* * Check some fields to make sure we see diff -x CVS -urpN 2009-01-17/syscall.c 2009-01-18/syscall.c --- 2009-01-17/syscall.c 2009-01-06 22:45:06.000000000 +0100 +++ 2009-01-18/syscall.c 2009-01-17 02:06:18.000000000 +0100 @@ -30,7 +30,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Id: syscall.c,v 1.106 2009/01/06 21:45:06 vda_linux Exp $ + * $Id: syscall.c,v 1.107 2009/01/17 01:06:18 vda_linux Exp $ */ #include "defs.h" @@ -976,26 +976,47 @@ struct tcb *tcp; } #elif defined(IA64) # define IA64_PSR_IS ((long)1 << 34) - if (upeek (tcp, PT_CR_IPSR, &psr) >= 0) + if (upeek(tcp, PT_CR_IPSR, &psr) >= 0) ia32 = (psr & IA64_PSR_IS) != 0; if (!(tcp->flags & TCB_INSYSCALL)) { if (ia32) { if (upeek(tcp, PT_R1, &scno) < 0) /* orig eax */ return -1; } else { - if (upeek (tcp, PT_R15, &scno) < 0) + if (upeek(tcp, PT_R15, &scno) < 0) return -1; } /* Check if we return from execve. */ if (tcp->flags & TCB_WAITEXECVE) { +#if defined PTRACE_GETSIGINFO + siginfo_t si; + + tcp->flags &= ~TCB_WAITEXECVE; + /* If SIGTRAP is masked, execve's magic SIGTRAP + * is not delivered. We end up here on a subsequent + * ptrace stop instead. Luckily, we can check + * for the type of this SIGTRAP. execve's magic one + * has 0 (SI_USER) in si.si_code, ptrace stop has 5. + * (I don't know why 5). + */ + si.si_code = SI_USER; + /* If PTRACE_GETSIGINFO fails, we assume it's + * magic SIGTRAP. Moot anyway, PTRACE_GETSIGINFO + * doesn't fail. + */ + ptrace(PTRACE_GETSIGINFO, pid, (void*) 0, (void*) &si); + if (si.si_code == SI_USER) + return 0; +#else tcp->flags &= ~TCB_WAITEXECVE; return 0; +#endif } } else { /* syscall in progress */ - if (upeek (tcp, PT_R8, &r8) < 0) + if (upeek(tcp, PT_R8, &r8) < 0) return -1; - if (upeek (tcp, PT_R10, &r10) < 0) + if (upeek(tcp, PT_R10, &r10) < 0) return -1; } #elif defined (ARM) diff -x CVS -urpN 2009-01-17/util.c 2009-01-18/util.c --- 2009-01-17/util.c 2009-01-13 19:30:55.000000000 +0100 +++ 2009-01-18/util.c 2009-01-17 02:52:54.000000000 +0100 @@ -30,7 +30,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Id: util.c,v 1.86 2009/01/13 18:30:55 vda_linux Exp $ + * $Id: util.c,v 1.87 2009/01/17 01:52:54 vda_linux Exp $ */ #include "defs.h" @@ -1593,11 +1593,9 @@ set_arg1 (struct tcb *tcp, void *cookie, #endif int -setbpt(tcp) -struct tcb *tcp; +setbpt(struct tcb *tcp) { static int clone_scno[SUPPORTED_PERSONALITIES] = { SYS_clone }; - extern int change_syscall(struct tcb *, int); arg_setup_state state; if (tcp->flags & TCB_BPTSET) {
------------------------------------------------------------------------------ Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA -OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise -Strategies to boost innovation and cut costs with open source participation -Receive a $600 discount off the registration fee with the source code: SFAD http://p.sf.net/sfu/XcvMzF8H _______________________________________________ Strace-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/strace-devel
