Paolo Bonzini <pbonz...@redhat.com> writes: > On 10/08/2015 17:27, fred.kon...@greensocs.com wrote: >> From: KONRAD Frederic <fred.kon...@greensocs.com> >> >> Instead of doing the jump cache invalidation directly in tb_invalidate delay >> it >> after the exit so we don't have an other CPU trying to execute the code being >> invalidated. >> >> Signed-off-by: KONRAD Frederic <fred.kon...@greensocs.com> >> --- >> translate-all.c | 61 >> +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- >> 1 file changed, 59 insertions(+), 2 deletions(-) > > If you take the easy way and avoid the optimizations in patch 7, this is > not necessary: tb_find_fast and tb_add_jump are only called from within > tb_lock, so all of tb_jmp_cache/jmp_first/jmp_next are protected by tb_lock. > > Let's get everything in and then optimize; the order should be: > > - Alvise's LL/SC implementation > > - conversion of atomics to LL/SC for all front-ends > > - the main MTTCG series, reusing the locking already in-place for > user-mode emulation (with some audit...)
- including dropping the cmpxchg fix and including Alvise's MTTCG aware patches that build on top of LL/SC work. > > - any further push-downs of tb_lock > > Paolo > >> diff --git a/translate-all.c b/translate-all.c >> index 954c67a..fc5162a 100644 >> --- a/translate-all.c >> +++ b/translate-all.c >> @@ -62,6 +62,7 @@ >> #include "translate-all.h" >> #include "qemu/bitmap.h" >> #include "qemu/timer.h" >> +#include "sysemu/cpus.h" >> >> //#define DEBUG_TB_INVALIDATE >> //#define DEBUG_FLUSH >> @@ -967,14 +968,58 @@ static inline void tb_reset_jump(TranslationBlock *tb, >> int n) >> tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + >> tb->tb_next_offset[n])); >> } >> >> +struct CPUDiscardTBParams { >> + CPUState *cpu; >> + TranslationBlock *tb; >> +}; >> + >> +static void cpu_discard_tb_from_jmp_cache(void *opaque) >> +{ >> + unsigned int h; >> + struct CPUDiscardTBParams *params = opaque; >> + >> + h = tb_jmp_cache_hash_func(params->tb->pc); >> + if (params->cpu->tb_jmp_cache[h] == params->tb) { >> + params->cpu->tb_jmp_cache[h] = NULL; >> + } >> + >> + g_free(opaque); >> +} >> + >> +static void tb_invalidate_jmp_remove(void *opaque) >> +{ >> + TranslationBlock *tb = opaque; >> + TranslationBlock *tb1, *tb2; >> + unsigned int n1; >> + >> + /* suppress this TB from the two jump lists */ >> + tb_jmp_remove(tb, 0); >> + tb_jmp_remove(tb, 1); >> + >> + /* suppress any remaining jumps to this TB */ >> + tb1 = tb->jmp_first; >> + for (;;) { >> + n1 = (uintptr_t)tb1 & 3; >> + if (n1 == 2) { >> + break; >> + } >> + tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3); >> + tb2 = tb1->jmp_next[n1]; >> + tb_reset_jump(tb1, n1); >> + tb1->jmp_next[n1] = NULL; >> + tb1 = tb2; >> + } >> + tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */ >> +} >> + >> /* invalidate one TB */ >> void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) >> { >> CPUState *cpu; >> PageDesc *p; >> - unsigned int h, n1; >> + unsigned int h; >> tb_page_addr_t phys_pc; >> - TranslationBlock *tb1, *tb2; >> + struct CPUDiscardTBParams *params; >> >> tb_lock(); >> >> @@ -997,6 +1042,9 @@ void tb_phys_invalidate(TranslationBlock *tb, >> tb_page_addr_t page_addr) >> >> tcg_ctx.tb_ctx.tb_invalidated_flag = 1; >> >> +#if 0 /*MTTCG*/ >> + TranslationBlock *tb1, *tb2; >> + unsigned int n1; >> /* remove the TB from the hash list */ >> h = tb_jmp_cache_hash_func(tb->pc); >> CPU_FOREACH(cpu) { >> @@ -1023,6 +1071,15 @@ void tb_phys_invalidate(TranslationBlock *tb, >> tb_page_addr_t page_addr) >> tb1 = tb2; >> } >> tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */ >> +#else >> + CPU_FOREACH(cpu) { >> + params = g_malloc(sizeof(struct CPUDiscardTBParams)); >> + params->cpu = cpu; >> + params->tb = tb; >> + async_run_on_cpu(cpu, cpu_discard_tb_from_jmp_cache, params); >> + } >> + async_run_safe_work_on_cpu(first_cpu, tb_invalidate_jmp_remove, tb); >> +#endif /* MTTCG */ >> >> tcg_ctx.tb_ctx.tb_phys_invalidate_count++; >> tb_unlock(); >> -- Alex Bennée