> Could you please re-test with this patch applied? It turned out that this patch did not make much difference. the step-simple is still failing with the patch applied. It could be reproduced a few times after a fresh reboot. The test exited with 1 here,
/* Known bug in 2.6.28-rc7 + utrace patch: * child was left to run freely, and exited * Deterministic (happens even with NUM_SINGLESTEPS = 1) */ if (WIFEXITED (status)) { VERBOSE("PTRACE_SINGLESTEP did not stop (step #%d)\n", i+1); assert (WEXITSTATUS (status) == 42); exit (1); } Here was the strace output when failure. # strace ./step-simple execve("./step-simple", ["./step-simple"], [/* 28 vars */]) = 0 brk(0) = 0x80003000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x20000027000 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) open("/etc/ld.so.cache", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=44711, ...}) = 0 mmap(NULL, 44711, PROT_READ, MAP_PRIVATE, 3, 0) = 0x20000028000 close(3) = 0 open("/lib64/libc.so.6", O_RDONLY) = 3 read(3, "\177ELF\2\2\1\0\0\0\0\0\0\0\0\0\0\3\0\26\0\0\0\1\0\0\0\0\0\2\10\364\0"..., 832) = 832 fstat(3, {st_mode=S_IFREG|0755, st_size=2703224, ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x20000033000 mmap(NULL, 1729920, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x20000034000 mmap(0x200001d1000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19c000) = 0x200001d1000 mmap(0x200001d6000, 17792, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x200001d6000 close(3) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x200001db000 mprotect(0x200001d1000, 16384, PROT_READ) = 0 mprotect(0x20000022000, 4096, PROT_READ) = 0 munmap(0x20000028000, 44711) = 0 rt_sigaction(SIGABRT, {0x80000e7c, [ABRT], SA_RESTART}, {SIG_DFL, [], 0}, 8) = 0 rt_sigaction(SIGINT, {0x80000e7c, [INT], SA_RESTART}, {SIG_DFL, [], 0}, 8) = 0 rt_sigaction(SIGALRM, {0x80000e7c, [ALRM], SA_RESTART}, {SIG_DFL, [], 0}, 8) = 0 alarm(5) = 0 clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x200001db7c0) = 2026 wait4(2026, [{WIFSTOPPED(s) && WSTOPSIG(s) == SIGSTOP}], 0, NULL) = 2026 --- SIGCHLD (Child exited) @ 0 (0) --- ptrace(PTRACE_SINGLESTEP, 2026, 0, SIG_0) = 0 wait4(2026, [{WIFEXITED(s) && WEXITSTATUS(s) == 42}], 0, NULL) = 2026 --- SIGCHLD (Child exited) @ 0 (0) --- kill(2026, SIGKILL) = -1 ESRCH (No such process) wait4(-1, NULL, __WALL, NULL) = -1 ECHILD (No child processes) exit_group(1) Also, I could not reproduce the problem in kernels without utrace. Thanks, CAI Qian