Dear Roland, dear utrace developers, I have update also the second patch (which includes the first). This patch fixes the utrace_stop race condition and implements a consistent model of tracing engine nesting.
renzo On Sat, Feb 14, 2009 at 10:11:55AM +0100, Renzo Davoli wrote: > > This is an updated patch. It solves the race condition + it gives a quick (a > bit dirty) > solution to issues 3&4. > 3- Nesting, is it really useful to run all the reports in a row and > (eventually) stop and the end waiting for all the engines? > The patch waits for each engine to resume before notifying the next > registered engine. > 4- report_syscall_entry engines evaluation order should be reversed > REPORT macros have an extra "reverse" argument. The macros append this string > to the > list_for_each_entry_safe function name. All the macro calls skip this > argument except > the one in report_syscall_entry where it is set to _reverse. > > With this patch it is possible to run nested kmview machines and ptrace works > inside > the virtual machines. > > This patch is "a bit dirty" because variables and sections of code needed to > count and test > the stopped engines are useless here: a task can be kept stopped for at most > one engine at > a time. > > This patch is a proof-of concept to show what I meant in my previous message. > > For what concerns 1&2 (not included in this patch): > 1- Virtual Machines may need to change the system call > THis is just to simplify the implementation of arch. independent virtual > machine. > I have kept the definition of missing functions in the kmview module code. > 2- UTRACE_SYSCALL_ABORT: is it really useful as a return value for > report_syscall_entry? > It is useless for kmview as the decision of aborting the system call is taken > while > the process is stopped, I am currently setting the syscall number to -1 to > skip the syscall. > > For the sake of completeness there is another way to implement the partial > virtual machine > stuff by introducing another "quiescence" state inside the report upcalls. > I mean: when utrace calls a report function (say for example > report_syscall_entry), the function > in the module puts the process in a stopped state (maybe its TASK_TRACED and > calls the schedule). > >From utrace's point of view the report function does not return until all > >the changes in > the task state have been completed and the decision > UTRACE_RESUME/UTRACE_SYSCALL_ABORT has been taken. > In this way UTRACE_STOP is never used because the module has to implement > another feature > similar to UTRACE_STOP on its own. So what is UTRACE_STOP for? > > ciao > renzo --- --- kernel/utrace.c.mcgrath 2009-03-05 15:09:57.000000000 +0100 +++ kernel/utrace.c 2009-03-06 11:49:15.000000000 +0100 @@ -369,6 +369,13 @@ return killed; } +static void mark_engine_wants_stop(struct utrace_engine *engine); +static void clear_engine_wants_stop(struct utrace_engine *engine); +static bool engine_wants_stop(struct utrace_engine *engine); +static void mark_engine_wants_resume(struct utrace_engine *engine); +static void clear_engine_wants_resume(struct utrace_engine *engine); +static bool engine_wants_resume(struct utrace_engine *engine); + /* * Perform %UTRACE_STOP, i.e. block in TASK_TRACED until woken up. * @task == current, @utrace == current->utrace, which is not locked. @@ -378,6 +385,7 @@ static bool utrace_stop(struct task_struct *task, struct utrace *utrace) { bool killed; + struct utrace_engine *engine, *next; /* * @utrace->stopped is the flag that says we are safely @@ -399,7 +407,23 @@ return true; } - utrace->stopped = 1; + /* final check: is really needed to stop? */ + list_for_each_entry_safe(engine, next, &utrace->attached, entry) { + if ((engine->ops != &utrace_detached_ops) && engine_wants_stop(engine)) { + if (engine_wants_resume(engine)) { + clear_engine_wants_stop(engine); + clear_engine_wants_resume(engine); + } + else + utrace->stopped = 1; + } + } + if (unlikely(!utrace->stopped)) { + spin_unlock_irq(&task->sighand->siglock); + spin_unlock(&utrace->lock); + return false; + } + __set_current_state(TASK_TRACED); /* @@ -625,6 +649,7 @@ * to record whether the engine is keeping the target thread stopped. */ #define ENGINE_STOP (1UL << _UTRACE_NEVENTS) +#define ENGINE_RESUME (1UL << (_UTRACE_NEVENTS+1)) static void mark_engine_wants_stop(struct utrace_engine *engine) { @@ -641,6 +666,21 @@ return (engine->flags & ENGINE_STOP) != 0; } +static void mark_engine_wants_resume(struct utrace_engine *engine) +{ + engine->flags |= ENGINE_RESUME; +} + +static void clear_engine_wants_resume(struct utrace_engine *engine) +{ + engine->flags &= ~ENGINE_RESUME; +} + +static bool engine_wants_resume(struct utrace_engine *engine) +{ + return (engine->flags & ENGINE_RESUME) != 0; +} + /** * utrace_set_events - choose which event reports a tracing engine gets * @target: thread to affect @@ -891,6 +931,10 @@ list_move(&engine->entry, &detached); } else { flags |= engine->flags | UTRACE_EVENT(REAP); + if (engine_wants_resume(engine)) { + clear_engine_wants_stop(engine); + clear_engine_wants_resume(engine); + } wake = wake && !engine_wants_stop(engine); } } @@ -1110,6 +1154,7 @@ * There might not be another report before it just * resumes, so make sure single-step is not left set. */ + mark_engine_wants_resume(engine); if (likely(resume)) user_disable_single_step(target); break; @@ -1326,6 +1371,7 @@ static bool finish_callback(struct utrace *utrace, struct utrace_report *report, struct utrace_engine *engine, + struct task_struct *task, u32 ret) { enum utrace_resume_action action = utrace_resume_action(ret); @@ -1347,6 +1393,7 @@ spin_lock(&utrace->lock); mark_engine_wants_stop(engine); spin_unlock(&utrace->lock); + utrace_stop(task, utrace); } } else if (engine_wants_stop(engine)) { spin_lock(&utrace->lock); @@ -1401,7 +1448,7 @@ ops = engine->ops; if (want & UTRACE_EVENT(QUIESCE)) { - if (finish_callback(utrace, report, engine, + if (finish_callback(utrace, report, engine, task, (*ops->report_quiesce)(report->action, engine, task, event))) @@ -1430,25 +1477,25 @@ * @callback is the name of the member in the ops vector, and remaining * args are the extras it takes after the standard three args. */ -#define REPORT(task, utrace, report, event, callback, ...) \ +#define REPORT(reverse, task, utrace, report, event, callback, ...) \ do { \ start_report(utrace); \ - REPORT_CALLBACKS(task, utrace, report, event, callback, \ + REPORT_CALLBACKS(reverse, task, utrace, report, event, callback, \ (report)->action, engine, current, \ ## __VA_ARGS__); \ finish_report(report, task, utrace); \ } while (0) -#define REPORT_CALLBACKS(task, utrace, report, event, callback, ...) \ +#define REPORT_CALLBACKS(reverse, task, utrace, report, event, callback, ...) \ do { \ struct utrace_engine *engine, *next; \ const struct utrace_engine_ops *ops; \ - list_for_each_entry_safe(engine, next, \ + list_for_each_entry_safe ## reverse(engine, next, \ &utrace->attached, entry) { \ ops = start_callback(utrace, report, engine, task, \ event); \ if (!ops) \ continue; \ - finish_callback(utrace, report, engine, \ + finish_callback(utrace, report, engine, task, \ (*ops->callback)(__VA_ARGS__)); \ } \ } while (0) @@ -1463,7 +1510,7 @@ struct utrace *utrace = task_utrace_struct(task); INIT_REPORT(report); - REPORT(task, utrace, &report, UTRACE_EVENT(EXEC), + REPORT(,task, utrace, &report, UTRACE_EVENT(EXEC), report_exec, fmt, bprm, regs); } @@ -1478,7 +1525,7 @@ INIT_REPORT(report); start_report(utrace); - REPORT_CALLBACKS(task, utrace, &report, UTRACE_EVENT(SYSCALL_ENTRY), + REPORT_CALLBACKS(_reverse,task, utrace, &report, UTRACE_EVENT(SYSCALL_ENTRY), report_syscall_entry, report.result | report.action, engine, current, regs); finish_report(&report, task, utrace); @@ -1520,7 +1567,7 @@ struct utrace *utrace = task_utrace_struct(task); INIT_REPORT(report); - REPORT(task, utrace, &report, UTRACE_EVENT(SYSCALL_EXIT), + REPORT(,task, utrace, &report, UTRACE_EVENT(SYSCALL_EXIT), report_syscall_exit, regs); } @@ -1536,7 +1583,7 @@ struct utrace *utrace = task_utrace_struct(task); INIT_REPORT(report); - REPORT(task, utrace, &report, UTRACE_EVENT(CLONE), + REPORT(,task, utrace, &report, UTRACE_EVENT(CLONE), report_clone, clone_flags, child); /* @@ -1600,7 +1647,7 @@ utrace->report = 0; spin_unlock(&utrace->lock); - REPORT(task, utrace, &report, UTRACE_EVENT(JCTL), + REPORT(,task, utrace, &report, UTRACE_EVENT(JCTL), report_jctl, was_stopped ? CLD_STOPPED : CLD_CONTINUED, what); if (was_stopped && !task_is_stopped(task)) { @@ -1637,7 +1684,7 @@ INIT_REPORT(report); long orig_code = *exit_code; - REPORT(task, utrace, &report, UTRACE_EVENT(EXIT), + REPORT(,task, utrace, &report, UTRACE_EVENT(EXIT), report_exit, orig_code, exit_code); if (report.action == UTRACE_STOP) @@ -1676,7 +1723,7 @@ utrace->interrupt = 0; spin_unlock(&utrace->lock); - REPORT_CALLBACKS(task, utrace, &report, UTRACE_EVENT(DEATH), + REPORT_CALLBACKS(,task, utrace, &report, UTRACE_EVENT(DEATH), report_death, engine, task, group_dead, signal); spin_lock(&utrace->lock); @@ -2018,7 +2065,7 @@ break; } - finish_callback(utrace, &report, engine, ret); + finish_callback(utrace, &report, engine, task, ret); } /*