PTRACE_SYSEMU_SINGLESTEP works, but we should avoid an extra send_sigtrap() from syscall_trace_leave().
Change the code under "if (->sysemu)" in ptrace_report_syscall_entry() to clear TIF_SINGLESTEP and return "| UTRACE_REPORT" instead of UTRACE_STOP. This way we report PTRACE_EVENT_SYSCALL_ENTRY but the tracee stops later, after syscall_trace_leave() path. Test case: #include <stdio.h> #include <unistd.h> #include <stdlib.h> #include <signal.h> #include <errno.h> #include <sys/wait.h> #include <sys/ptrace.h> #include <assert.h> #include <sys/user.h> #include <sys/debugreg.h> #include <sys/syscall.h> #define PTRACE_SYSEMU 31 #define PTRACE_SYSEMU_SINGLESTEP 32 #define WEVENT(s) ((s & 0xFF0000) >> 16) static int verbose; #define d_printf(fmt, ...) do { if (verbose) printf(fmt, ##__VA_ARGS__); } while (0) static struct user_regs_struct regs; static void resume(int pid, int req, int ck_stat) { int stat; assert(0 == ptrace(req, pid, 0, 0)); assert(waitpid(pid, &stat, __WALL) == pid); //printf("===> %06X\n %06X\n", ck_stat, stat); assert(stat == ck_stat); assert(0 == ptrace(PTRACE_GETREGS, pid, NULL, ®s)); } int main(int argc, const char *argv[]) { int pid, stat; long rip; if (getpid() == __NR_getppid) { printf("sorry, restart\n"); return 0; } verbose = argc > 1; pid = fork(); if (!pid) { assert(0 == ptrace(PTRACE_TRACEME, 0,0,0)); kill(getpid(), SIGSTOP); getppid(); getppid(); getppid(); assert(0); } assert(wait(&stat) == pid); assert(WIFSTOPPED(stat) && WSTOPSIG(stat) == SIGSTOP); assert(0 == ptrace(PTRACE_SETOPTIONS, pid, 0, PTRACE_O_TRACESYSGOOD)); //----------------------------------------------------------------------------- d_printf("1: syscall enter\n"); resume(pid, PTRACE_SYSCALL, 0x857F); assert(regs.orig_rax == __NR_getppid); assert(regs.rax == -ENOSYS); rip = regs.rip - 2; //-------------------------------------------------------------------------------- d_printf("2: sysemu\n"); resume(pid, PTRACE_SYSEMU, 0x857F); assert(regs.orig_rax == __NR_getppid); assert(regs.rax == -ENOSYS); assert(regs.rip == rip + 2); //-------------------------------------------------------------------------------- d_printf("3: stop before syscall insn\n"); do { long cur = regs.rip; resume(pid, PTRACE_SYSEMU_SINGLESTEP, 0x57F); assert(regs.rip != cur); } while (regs.rip != rip); assert(regs.rax == __NR_getppid); d_printf("3: step into syscall\n"); resume(pid, PTRACE_SYSEMU_SINGLESTEP, 0x857F); assert(regs.rip == rip + 2); d_printf("3: step next\n"); resume(pid, PTRACE_SYSEMU_SINGLESTEP, 0x57F); assert(regs.rip != rip + 2); //-------------------------------------------------------------------------------- kill(pid, SIGKILL); return 0; } However, I still do not understand what SYSEMU should really do. Afaics, only uml needs it... I don't see how we can implement the same behaviour without using TIF_SYSCALL_EMU, but perhaps we can ignore the behaviour difference? - The tracer does PTRACE_SYSEMU or PTRACE_SYSEMU_SINGLESTEP, the tracee stops. The tracer does PTRACE_SYSCALL. Should we report SYSCALL_EXIT? This is easy to do. But the next case is nasty, - The tracer does PTRACE_SYSCALL, the tracee reports SYSCALL_ENTER. The tracer does PTRACE_SYSEMU_SINGLESTEP. how we can suppress syscall_trace_leave()->send_sigtrap() without using TIF_SYSCALL_EMU ? Of course, we can. But this needs so nasty complications :/ Can't we ignore this incompatibility? --- kernel/ptrace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) --- PU/kernel/ptrace.c~105_PTRACE_SYSEMU_STEP 2009-10-19 18:46:50.000000000 +0200 +++ PU/kernel/ptrace.c 2009-10-20 15:58:19.000000000 +0200 @@ -252,8 +252,11 @@ static u32 ptrace_report_syscall_entry(u set_syscall_code(context, PTRACE_EVENT_SYSCALL_ENTRY); - if (unlikely(context->sysemu)) - return UTRACE_SYSCALL_ABORT | UTRACE_STOP; + if (unlikely(context->sysemu)) { + if (test_thread_flag(TIF_SINGLESTEP)) + user_disable_single_step(task); + return UTRACE_SYSCALL_ABORT | UTRACE_REPORT; + } return UTRACE_SYSCALL_RUN | UTRACE_STOP; }