[Qemu-devel] [PATCH v2 07/27] tcg: Add EXCP_ATOMIC

2016-07-01 Thread Richard Henderson
When we cannot emulate an atomic operation within a parallel
context, this exception allows us to stop the world and try
again in a serial context.

Signed-off-by: Richard Henderson 
---
 cpu-exec-common.c   |  6 +
 cpu-exec.c  | 23 +++
 cpus.c  |  6 +
 include/exec/cpu-all.h  |  1 +
 include/exec/exec-all.h |  1 +
 include/qemu-common.h   |  1 +
 linux-user/main.c   | 59 -
 tcg/tcg.h   |  1 +
 translate-all.c |  1 +
 9 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/cpu-exec-common.c b/cpu-exec-common.c
index 0cb4ae6..767d9c6 100644
--- a/cpu-exec-common.c
+++ b/cpu-exec-common.c
@@ -77,3 +77,9 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
 }
 siglongjmp(cpu->jmp_env, 1);
 }
+
+void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
+{
+cpu->exception_index = EXCP_ATOMIC;
+cpu_loop_exit_restore(cpu, pc);
+}
diff --git a/cpu-exec.c b/cpu-exec.c
index b840e1d..041f8b7 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -225,6 +225,29 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
 }
 #endif
 
+void cpu_exec_step(CPUState *cpu)
+{
+CPUArchState *env = (CPUArchState *)cpu->env_ptr;
+TranslationBlock *tb;
+target_ulong cs_base, pc;
+uint32_t flags;
+bool old_tb_flushed;
+
+old_tb_flushed = cpu->tb_flushed;
+cpu->tb_flushed = false;
+
+cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+tb = tb_gen_code(cpu, pc, cs_base, flags,
+ 1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
+tb->orig_tb = NULL;
+cpu->tb_flushed |= old_tb_flushed;
+/* execute the generated code */
+trace_exec_tb_nocache(tb, pc);
+cpu_tb_exec(cpu, tb);
+tb_phys_invalidate(tb, -1);
+tb_free(tb);
+}
+
 struct tb_desc {
 target_ulong pc;
 target_ulong cs_base;
diff --git a/cpus.c b/cpus.c
index 84c3520..a01bbbd 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1575,6 +1575,12 @@ static void tcg_exec_all(void)
 if (r == EXCP_DEBUG) {
 cpu_handle_guest_debug(cpu);
 break;
+} else if (r == EXCP_ATOMIC) {
+/* ??? When we begin running cpus in parallel, we should
+   stop all cpus, clear parallel_cpus, and interpret a
+   single insn with cpu_exec_step.  In the meantime,
+   we should never get here.  */
+abort();
 }
 } else if (cpu->stop || cpu->stopped) {
 if (cpu->unplug) {
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 8007abb..d2aac4b 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -31,6 +31,7 @@
 #define EXCP_DEBUG  0x10002 /* cpu stopped after a breakpoint or 
singlestep */
 #define EXCP_HALTED 0x10003 /* cpu is halted (waiting for external event) 
*/
 #define EXCP_YIELD  0x10004 /* cpu wants to yield timeslice to another */
+#define EXCP_ATOMIC 0x10005 /* stop-the-world and emulate atomic */
 
 /* some important defines:
  *
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index c1f59fa..ec72c5a 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -59,6 +59,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 void cpu_exec_init(CPUState *cpu, Error **errp);
 void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
 void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc);
+void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc);
 
 #if !defined(CONFIG_USER_ONLY)
 void cpu_reloading_memory_map(void);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 1f2cb94..77e379d 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -76,6 +76,7 @@ void tcg_exec_init(unsigned long tb_size);
 bool tcg_enabled(void);
 
 void cpu_exec_init_all(void);
+void cpu_exec_step(CPUState *cpu);
 
 /**
  * Sends a (part of) iovec down a socket, yielding when the socket is full, or
diff --git a/linux-user/main.c b/linux-user/main.c
index 78d8d04..54df300 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -179,13 +179,25 @@ static inline void start_exclusive(void)
 }
 
 /* Finish an exclusive operation.  */
-static inline void __attribute__((unused)) end_exclusive(void)
+static inline void end_exclusive(void)
 {
 pending_cpus = 0;
 pthread_cond_broadcast(&exclusive_resume);
 pthread_mutex_unlock(&exclusive_lock);
 }
 
+static void step_atomic(CPUState *cpu)
+{
+start_exclusive();
+
+/* Since we got here, we know that parallel_cpus must be true.  */
+parallel_cpus = false;
+cpu_exec_step(cpu);
+parallel_cpus = true;
+
+end_exclusive();
+}
+
 /* Wait for exclusive ops to finish, and begin cpu execution.  */
 static inline void cpu_exec_start(CPUState *cpu)
 {
@@ -437,6 +449,9 @@ void cpu_loop(CPUX86State *env)
   }
 }
 break;
+case EXCP_ATOMIC:
+  

Re: [Qemu-devel] [PATCH v2 07/27] tcg: Add EXCP_ATOMIC

2016-09-08 Thread Alex Bennée

Richard Henderson  writes:

> When we cannot emulate an atomic operation within a parallel
> context, this exception allows us to stop the world and try
> again in a serial context.
>
> Signed-off-by: Richard Henderson 
> ---
>  cpu-exec-common.c   |  6 +
>  cpu-exec.c  | 23 +++
>  cpus.c  |  6 +
>  include/exec/cpu-all.h  |  1 +
>  include/exec/exec-all.h |  1 +
>  include/qemu-common.h   |  1 +
>  linux-user/main.c   | 59 
> -
>  tcg/tcg.h   |  1 +
>  translate-all.c |  1 +
>  9 files changed, 98 insertions(+), 1 deletion(-)
>
> diff --git a/cpu-exec-common.c b/cpu-exec-common.c
> index 0cb4ae6..767d9c6 100644
> --- a/cpu-exec-common.c
> +++ b/cpu-exec-common.c
> @@ -77,3 +77,9 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
>  }
>  siglongjmp(cpu->jmp_env, 1);
>  }
> +
> +void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
> +{
> +cpu->exception_index = EXCP_ATOMIC;
> +cpu_loop_exit_restore(cpu, pc);
> +}
> diff --git a/cpu-exec.c b/cpu-exec.c
> index b840e1d..041f8b7 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -225,6 +225,29 @@ static void cpu_exec_nocache(CPUState *cpu, int 
> max_cycles,
>  }
>  #endif
>
> +void cpu_exec_step(CPUState *cpu)
> +{
> +CPUArchState *env = (CPUArchState *)cpu->env_ptr;
> +TranslationBlock *tb;
> +target_ulong cs_base, pc;
> +uint32_t flags;
> +bool old_tb_flushed;
> +
> +old_tb_flushed = cpu->tb_flushed;
> +cpu->tb_flushed = false;
> +
> +cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> +tb = tb_gen_code(cpu, pc, cs_base, flags,
> + 1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
> +tb->orig_tb = NULL;
> +cpu->tb_flushed |= old_tb_flushed;
> +/* execute the generated code */
> +trace_exec_tb_nocache(tb, pc);
> +cpu_tb_exec(cpu, tb);
> +tb_phys_invalidate(tb, -1);
> +tb_free(tb);
> +}
> +
>  struct tb_desc {
>  target_ulong pc;
>  target_ulong cs_base;
> diff --git a/cpus.c b/cpus.c
> index 84c3520..a01bbbd 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -1575,6 +1575,12 @@ static void tcg_exec_all(void)
>  if (r == EXCP_DEBUG) {
>  cpu_handle_guest_debug(cpu);
>  break;
> +} else if (r == EXCP_ATOMIC) {
> +/* ??? When we begin running cpus in parallel, we should
> +   stop all cpus, clear parallel_cpus, and interpret a
> +   single insn with cpu_exec_step.  In the meantime,
> +   we should never get here.  */
> +abort();

Pranith has been hitting this abort in the latest merged tree with MTTCG
but I'm a little unclear how it got here. So is the plan the MTTCG
thread function should do a step_atomic a-la user mode but we'll never
get here in the single threaded case?

>  }
>  } else if (cpu->stop || cpu->stopped) {
>  if (cpu->unplug) {
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index 8007abb..d2aac4b 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -31,6 +31,7 @@
>  #define EXCP_DEBUG  0x10002 /* cpu stopped after a breakpoint or 
> singlestep */
>  #define EXCP_HALTED 0x10003 /* cpu is halted (waiting for external 
> event) */
>  #define EXCP_YIELD  0x10004 /* cpu wants to yield timeslice to another */
> +#define EXCP_ATOMIC 0x10005 /* stop-the-world and emulate atomic */
>
>  /* some important defines:
>   *
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index c1f59fa..ec72c5a 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -59,6 +59,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>  void cpu_exec_init(CPUState *cpu, Error **errp);
>  void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
>  void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc);
> +void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc);
>
>  #if !defined(CONFIG_USER_ONLY)
>  void cpu_reloading_memory_map(void);
> diff --git a/include/qemu-common.h b/include/qemu-common.h
> index 1f2cb94..77e379d 100644
> --- a/include/qemu-common.h
> +++ b/include/qemu-common.h
> @@ -76,6 +76,7 @@ void tcg_exec_init(unsigned long tb_size);
>  bool tcg_enabled(void);
>
>  void cpu_exec_init_all(void);
> +void cpu_exec_step(CPUState *cpu);
>
>  /**
>   * Sends a (part of) iovec down a socket, yielding when the socket is full, 
> or
> diff --git a/linux-user/main.c b/linux-user/main.c
> index 78d8d04..54df300 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -179,13 +179,25 @@ static inline void start_exclusive(void)
>  }
>
>  /* Finish an exclusive operation.  */
> -static inline void __attribute__((unused)) end_exclusive(void)
> +static inline void end_exclusive(void)
>  {
>  pending_cpus = 0;
>  pthread_cond_broadcast(&exclusive_resume);
>  pthread_mutex_unl

Re: [Qemu-devel] [PATCH v2 07/27] tcg: Add EXCP_ATOMIC

2016-09-08 Thread Richard Henderson
On 09/08/2016 01:38 AM, Alex Bennée wrote:
>> +} else if (r == EXCP_ATOMIC) {
>> > +/* ??? When we begin running cpus in parallel, we should
>> > +   stop all cpus, clear parallel_cpus, and interpret a
>> > +   single insn with cpu_exec_step.  In the meantime,
>> > +   we should never get here.  */
>> > +abort();
> Pranith has been hitting this abort in the latest merged tree with MTTCG
> but I'm a little unclear how it got here. So is the plan the MTTCG
> thread function should do a step_atomic a-la user mode but we'll never
> get here in the single threaded case?
> 

Yes, that's the plan.  I guess I could have filled in that blank, but I see
that I haven't even done that in the v3 patchset.


r~