Hi Sergey, On Mon, Jun 20, 2016 at 12:28 AM, Sergey Fedorov <sergey.fedo...@linaro.org> wrote: > > From: Sergey Fedorov <serge.f...@gmail.com> > > This patch is based on the ideas found in work of KONRAD Frederic [1], > Alex Bennée [2], and Alvise Rigo [3]. > > This mechanism allows to perform an operation safely in a quiescent > state. Quiescent state means: (1) no vCPU is running and (2) BQL in > system-mode or 'exclusive_lock' in user-mode emulation is held while > performing the operation. This functionality is required e.g. for > performing translation buffer flush safely in multi-threaded user-mode > emulation. > > The existing CPU work queue is used to schedule such safe operations. A > new 'safe' flag is added into struct qemu_work_item to designate the > special requirements of the safe work. An operation in a quiescent sate > can be scheduled by using async_safe_run_on_cpu() function which is > actually the same as sync_run_on_cpu() except that it marks the queued > work item with the 'safe' flag set to true. Given this flag set > queue_work_on_cpu() atomically increments 'safe_work_pending' global > counter and kicks all the CPUs instead of just the target CPU as in case > of normal CPU work. This allows to force other CPUs to exit their > execution loops and wait in wait_safe_cpu_work() function for the safe > work to finish. When a CPU drains its work queue, if it encounters a > work item marked as safe, it first waits for other CPUs to exit their > execution loops, then called the work item function, and finally > decrements 'safe_work_pending' counter with signalling other CPUs to let > them continue execution as soon as all pending safe work items have been > processed. The 'tcg_pending_cpus' protected by 'exclusive_lock' in > user-mode or by 'qemu_global_mutex' in system-mode emulation is used to > determine if there is any CPU run and wait for it to exit the execution > loop. The fairness of all the CPU work queues is ensured by draining all > the pending safe work items before any CPU can run. > > [1] http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg01128.html > [2] http://lists.nongnu.org/archive/html/qemu-devel/2016-04/msg02531.html > [3] http://lists.nongnu.org/archive/html/qemu-devel/2016-05/msg04792.html > > Signed-off-by: Sergey Fedorov <serge.f...@gmail.com> > Signed-off-by: Sergey Fedorov <sergey.fedo...@linaro.org> > --- > cpu-exec-common.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- > cpus.c | 16 ++++++++++++++++ > include/exec/exec-all.h | 2 ++ > include/qom/cpu.h | 14 ++++++++++++++ > linux-user/main.c | 2 +- > 5 files changed, 77 insertions(+), 2 deletions(-) > > diff --git a/cpu-exec-common.c b/cpu-exec-common.c > index 8184e0662cbd..3056324738f8 100644 > --- a/cpu-exec-common.c > +++ b/cpu-exec-common.c > @@ -25,6 +25,7 @@ > > bool exit_request; > CPUState *tcg_current_cpu; > +int tcg_pending_cpus; > > /* exit the current TB, but without causing any exception to be raised */ > void cpu_loop_exit_noexc(CPUState *cpu) > @@ -78,6 +79,15 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc) > siglongjmp(cpu->jmp_env, 1); > } > > +static int safe_work_pending; > + > +void wait_safe_cpu_work(void) > +{ > + while (atomic_mb_read(&safe_work_pending) > 0) { > + wait_cpu_work(); > + } > +} > +
Is this piece of code deadlock-safe once we are in mttcg mode? What happens when two threads call simultaneously async_safe_run_on_cpu? Thank you, alvise > > static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) > { > qemu_mutex_lock(&cpu->work_mutex); > @@ -89,9 +99,18 @@ static void queue_work_on_cpu(CPUState *cpu, struct > qemu_work_item *wi) > cpu->queued_work_last = wi; > wi->next = NULL; > wi->done = false; > + if (wi->safe) { > + atomic_inc(&safe_work_pending); > + } > qemu_mutex_unlock(&cpu->work_mutex); > > - qemu_cpu_kick(cpu); > + if (!wi->safe) { > + qemu_cpu_kick(cpu); > + } else { > + CPU_FOREACH(cpu) { > + qemu_cpu_kick(cpu); > + } > + } > } > > void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > @@ -106,6 +125,7 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void > *data) > wi.func = func; > wi.data = data; > wi.free = false; > + wi.safe = false; > > queue_work_on_cpu(cpu, &wi); > while (!atomic_mb_read(&wi.done)) { > @@ -129,6 +149,20 @@ void async_run_on_cpu(CPUState *cpu, run_on_cpu_func > func, void *data) > wi->func = func; > wi->data = data; > wi->free = true; > + wi->safe = false; > + > + queue_work_on_cpu(cpu, wi); > +} > + > +void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > +{ > + struct qemu_work_item *wi; > + > + wi = g_malloc0(sizeof(struct qemu_work_item)); > + wi->func = func; > + wi->data = data; > + wi->free = true; > + wi->safe = true; > > queue_work_on_cpu(cpu, wi); > } > @@ -148,9 +182,18 @@ void flush_queued_work(CPUState *cpu) > if (!cpu->queued_work_first) { > cpu->queued_work_last = NULL; > } > + if (wi->safe) { > + while (tcg_pending_cpus) { > + wait_cpu_work(); > + } > + } > qemu_mutex_unlock(&cpu->work_mutex); > wi->func(cpu, wi->data); > qemu_mutex_lock(&cpu->work_mutex); > + if (wi->safe) { > + atomic_dec(&safe_work_pending); > + signal_cpu_work(); > + } > if (wi->free) { > g_free(wi); > } else { > diff --git a/cpus.c b/cpus.c > index 98f60f6f98f5..bb6bd8615cfc 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -932,6 +932,18 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu) > { > } > > +static void tcg_cpu_exec_start(CPUState *cpu) > +{ > + tcg_pending_cpus++; > +} > + > +static void tcg_cpu_exec_end(CPUState *cpu) > +{ > + if (--tcg_pending_cpus) { > + signal_cpu_work(); > + } > +} > + > static void qemu_wait_io_event_common(CPUState *cpu) > { > if (cpu->stop) { > @@ -956,6 +968,8 @@ static void qemu_tcg_wait_io_event(CPUState *cpu) > CPU_FOREACH(cpu) { > qemu_wait_io_event_common(cpu); > } > + > + wait_safe_cpu_work(); > } > > static void qemu_kvm_wait_io_event(CPUState *cpu) > @@ -1491,7 +1505,9 @@ static void tcg_exec_all(void) > (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0); > > if (cpu_can_run(cpu)) { > + tcg_cpu_exec_start(cpu); > r = tcg_cpu_exec(cpu); > + tcg_cpu_exec_end(cpu); > if (r == EXCP_DEBUG) { > cpu_handle_guest_debug(cpu); > break; > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index 23b4b50e0a45..3bc44ed81473 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -405,10 +405,12 @@ extern int singlestep; > > /* cpu-exec.c, accessed with atomic_mb_read/atomic_mb_set */ > extern CPUState *tcg_current_cpu; > +extern int tcg_pending_cpus; > extern bool exit_request; > > void wait_cpu_work(void); > void signal_cpu_work(void); > void flush_queued_work(CPUState *cpu); > +void wait_safe_cpu_work(void); > > #endif > diff --git a/include/qom/cpu.h b/include/qom/cpu.h > index 4e688f645b4a..5128fcc1745a 100644 > --- a/include/qom/cpu.h > +++ b/include/qom/cpu.h > @@ -231,6 +231,7 @@ struct qemu_work_item { > void *data; > int done; > bool free; > + bool safe; > }; > > /** > @@ -625,6 +626,19 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, > void *data); > void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); > > /** > + * async_safe_run_on_cpu: > + * @cpu: The vCPU to run on. > + * @func: The function to be executed. > + * @data: Data to pass to the function. > + * > + * Schedules the function @func for execution on the vCPU @cpu asynchronously > + * and in quiescent state. Quiescent state means: (1) all other vCPUs are > + * halted and (2) #qemu_global_mutex (a.k.a. BQL) in system-mode or > + * #exclusive_lock in user-mode emulation is held while @func is executing. > + */ > +void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); > + > +/** > * qemu_get_cpu: > * @index: The CPUState@cpu_index value of the CPU to obtain. > * > diff --git a/linux-user/main.c b/linux-user/main.c > index 5a68651159c2..6da3bb32186b 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -113,7 +113,6 @@ static pthread_cond_t exclusive_cond = > PTHREAD_COND_INITIALIZER; > static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER; > static pthread_cond_t work_cond = PTHREAD_COND_INITIALIZER; > static bool exclusive_pending; > -static int tcg_pending_cpus; > > /* Make sure everything is in a consistent state for calling fork(). */ > void fork_start(void) > @@ -219,6 +218,7 @@ static inline void cpu_exec_end(CPUState *cpu) > } > exclusive_idle(); > flush_queued_work(cpu); > + wait_safe_cpu_work(); > pthread_mutex_unlock(&exclusive_lock); > } > > -- > 1.9.1 > >