On 13/08/2015 14:51, Frederic Konrad wrote: >> diff --git a/include/qom/cpu.h b/include/qom/cpu.h >> index 77bbff2..56b1f4d 100644 >> --- a/include/qom/cpu.h >> +++ b/include/qom/cpu.h >> @@ -285,7 +285,10 @@ struct CPUState { >> void *env_ptr; /* CPUArchState */ >> struct TranslationBlock *current_tb; >> + >> + /* Protected by tb_lock. */ >> struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; > This is temporary as a first step?
Yes, I now saw that tb_lock has a huge contention in tb_find_fast. :) I've now extracted parts of your patch "tcg: protect TBContext with tb_lock" into a separate "tcg: move tb_find_fast outside the tb_lock critical section" that also applies to user-mode emulation. That way I get good scalability on Dhrystone, same as with your branch. Do you agree with the first 10 patches as a first step towards upstreaming the MTTCG work? Paolo >> + >> struct GDBRegisterState *gdb_regs; >> int gdb_num_regs; >> int gdb_num_g_regs; >> diff --git a/tcg/tcg.h b/tcg/tcg.h >> index 0ae648f..a2cad31 100644 >> --- a/tcg/tcg.h >> +++ b/tcg/tcg.h >> @@ -590,6 +590,7 @@ static inline bool tcg_op_buf_full(void) >> /* pool based memory allocation */ >> +/* tb_lock must be held for tcg_malloc_internal. */ >> void *tcg_malloc_internal(TCGContext *s, int size); >> void tcg_pool_reset(TCGContext *s); >> void tcg_pool_delete(TCGContext *s); >> @@ -598,6 +599,7 @@ void tb_lock(void); >> void tb_unlock(void); >> void tb_lock_reset(void); >> +/* Called with tb_lock held. */ >> static inline void *tcg_malloc(int size) >> { >> TCGContext *s = &tcg_ctx; >> diff --git a/translate-all.c b/translate-all.c >> index edb9cb1..17d3cd1 100644 >> --- a/translate-all.c >> +++ b/translate-all.c >> @@ -237,6 +237,7 @@ int cpu_gen_code(CPUArchState *env, >> TranslationBlock *tb, int *gen_code_size_ptr >> } >> /* The cpu state corresponding to 'searched_pc' is restored. >> + * Called with tb_lock held. >> */ >> static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock >> *tb, >> uintptr_t searched_pc) >> @@ -424,6 +425,7 @@ static void page_init(void) >> } >> /* If alloc=1: >> + * Called with tb_lock held for system emulation. >> * Called with mmap_lock held for user-mode emulation. >> */ >> static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc) >> @@ -734,8 +736,12 @@ bool tcg_enabled(void) >> return tcg_ctx.code_gen_buffer != NULL; >> } >> -/* Allocate a new translation block. Flush the translation buffer if >> - too many translation blocks or too much generated code. */ >> +/* >> + * Allocate a new translation block. Flush the translation buffer if >> + * too many translation blocks or too much generated code. >> + * >> + * Called with tb_lock held. >> + */ >> static TranslationBlock *tb_alloc(target_ulong pc) >> { > There is the famous tb_flush which needs to be called with tb_lock held > as well. > There are several place where it's called. > >> TranslationBlock *tb; >> @@ -751,6 +757,7 @@ static TranslationBlock *tb_alloc(target_ulong pc) >> return tb; >> } >> +/* Called with tb_lock held. */ >> void tb_free(TranslationBlock *tb) >> { >> /* In practice this is mostly used for single use temporary TB >> @@ -859,7 +866,10 @@ static void tb_invalidate_check(target_ulong >> address) >> } >> } >> -/* verify that all the pages have correct rights for code */ >> +/* verify that all the pages have correct rights for code >> + * >> + * Called with tb_lock held. >> + */ >> static void tb_page_check(void) >> { >> TranslationBlock *tb; >> @@ -947,7 +957,10 @@ static inline void tb_reset_jump(TranslationBlock >> *tb, int n) >> tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + >> tb->tb_next_offset[n])); >> } >> -/* invalidate one TB */ >> +/* invalidate one TB >> + * >> + * Called with tb_lock held. >> + */ >> void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) >> { >> CPUState *cpu; >> @@ -1036,7 +1049,7 @@ static void build_page_bitmap(PageDesc *p) >> } >> #endif >> -/* Called with mmap_lock held for user mode emulation. */ >> +/* Called with tb_lock held, and mmap_lock too for user mode >> emulation. */ >> TranslationBlock *tb_gen_code(CPUState *cpu, >> target_ulong pc, target_ulong cs_base, >> int flags, int cflags) >> @@ -1234,7 +1247,9 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t >> start, int len) >> } >> if (!p->code_bitmap && >> ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) { >> - /* build code bitmap */ >> + /* build code bitmap. FIXME: writes should be protected by >> + * tb_lock, reads by tb_lock or RCU. >> + */ >> build_page_bitmap(p); >> } >> if (p->code_bitmap) { >> @@ -1324,6 +1339,7 @@ static void >> tb_invalidate_phys_page(tb_page_addr_t addr, >> /* add the tb in the target page and protect it if necessary >> * >> + * Called with tb_lock held. >> * Called with mmap_lock held for user-mode emulation. >> */ >> static inline void tb_alloc_page(TranslationBlock *tb, >> @@ -1383,6 +1399,7 @@ static inline void >> tb_alloc_page(TranslationBlock *tb, >> /* add a new TB and link it to the physical page tables. phys_page2 is >> * (-1) to indicate that only one page contains the TB. >> * >> + * Called with tb_lock held. >> * Called with mmap_lock held for user-mode emulation. >> */ >> static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, >> @@ -1423,7 +1440,10 @@ static void tb_link_page(TranslationBlock *tb, >> tb_page_addr_t phys_pc, >> } >> /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr < >> - tb[1].tc_ptr. Return NULL if not found */ >> + * tb[1].tc_ptr. Return NULL if not found >> + * >> + * Called with tb_lock held. >> + */ >> static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) >> { >> int m_min, m_max, m; >> @@ -1476,6 +1496,7 @@ void tb_invalidate_phys_addr(AddressSpace *as, >> hwaddr addr) >> } >> #endif /* !defined(CONFIG_USER_ONLY) */ >> +/* Called with tb_lock held. */ >> void tb_check_watchpoint(CPUState *cpu) >> { >> TranslationBlock *tb; >