When the tb_flush removes a block and it's recreated, this shouldn't be creating a new block but using the one that is found by:
lookup_result = g_list_find_custom(tb_ctx.tb_statistics, new_stats, statistics_cmp); So the tb_statisticics will be reused and we could just add this regen counter in the if statement: if (lookup_result) isn't that correct? Vanderson M. Rosario On Tue, Jun 25, 2019 at 1:28 PM Alex Bennée <alex.ben...@linaro.org> wrote: > > Alex Bennée <alex.ben...@linaro.org> writes: > > > vandersonmr <vanderson...@gmail.com> writes: > > > >> We want to store statistics for each TB even after flushes. > >> We do not want to modify or grow the TB struct. > >> So we create a new struct to contain this statistics and > >> link it to each TB while they are created. > >> > >> Signed-off-by: Vanderson M. do Rosario <vanderson...@gmail.com> > >> --- > >> accel/tcg/translate-all.c | 40 +++++++++++++++++++++++++++++++++++++++ > >> include/exec/exec-all.h | 21 ++++++++++++++++++++ > >> include/exec/tb-context.h | 1 + > >> include/qemu/log.h | 1 + > >> 4 files changed, 63 insertions(+) > >> > >> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c > >> index 5d1e08b169..f7e99f90e0 100644 > >> --- a/accel/tcg/translate-all.c > >> +++ b/accel/tcg/translate-all.c > >> @@ -1118,6 +1118,18 @@ static inline void code_gen_alloc(size_t tb_size) > >> } > >> } > >> > >> +static gint statistics_cmp(gconstpointer p1, gconstpointer p2) > >> +{ > >> + const TBStatistics *a = (TBStatistics *) p1; > >> + const TBStatistics *b = (TBStatistics *) p2; > >> + > >> + return (a->pc == b->pc && > >> + a->cs_base == b->cs_base && > >> + a->flags == b->flags && > >> + a->page_addr[0] == b->page_addr[0] && > >> + a->page_addr[1] == b->page_addr[1]) ? 0 : 1; > >> +} > >> + > > > > There are a bunch of white space errors that need fixing up as you > > probably already know from patchew ;-) > > > >> static bool tb_cmp(const void *ap, const void *bp) > >> { > >> const TranslationBlock *a = ap; > >> @@ -1586,6 +1598,29 @@ static inline void tb_page_add(PageDesc *p, > TranslationBlock *tb, > >> #endif > >> } > >> > >> +static void tb_insert_statistics_structure(TranslationBlock *tb) { > >> + TBStatistics *new_stats = g_new0(TBStatistics, 1); > >> + new_stats->pc = tb->pc; > >> + new_stats->cs_base = tb->cs_base; > >> + new_stats->flags = tb->flags; > >> + new_stats->page_addr[0] = tb->page_addr[0]; > >> + new_stats->page_addr[1] = tb->page_addr[1]; > >> + > >> + GList *lookup_result = g_list_find_custom(tb_ctx.tb_statistics, > new_stats, statistics_cmp); > >> + > >> + if (lookup_result) { > >> + /* If there is already a TBStatistic for this TB from a > previous flush > >> + * then just make the new TB point to the older TBStatistic > >> + */ > >> + free(new_stats); > >> + tb->tb_stats = lookup_result->data; > >> + } else { > >> + /* If not, then points to the new tb_statistics and add it > to the hash */ > >> + tb->tb_stats = new_stats; > >> + tb_ctx.tb_statistics = g_list_prepend(tb_ctx.tb_statistics, > >> new_stats); > > > > This is going to race as nothing prevents two threads attempting to > > insert records at the same time. > > > >> + } > >> +} > >> + > >> /* add a new TB and link it to the physical page tables. phys_page2 is > >> * (-1) to indicate that only one page contains the TB. > >> * > >> @@ -1636,6 +1671,11 @@ tb_link_page(TranslationBlock *tb, > tb_page_addr_t phys_pc, > >> void *existing_tb = NULL; > >> uint32_t h; > >> > >> + if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) { > >> + /* create and link to its TB a structure to store > statistics */ > >> + tb_insert_statistics_structure(tb); > >> + } > >> + > >> /* add in the hash table */ > >> h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & > CF_HASH_MASK, > >> tb->trace_vcpu_dstate); > > > > Perhaps we could just have a second qht array which allows for > > "unlocked" insertion of record. You could take advantage of the fact the > > hash would be the same: > > > > h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & > CF_HASH_MASK, > > tb->trace_vcpu_dstate); > > qht_insert(&tb_ctx.htable, tb, h, &existing_tb); > > > > /* remove TB from the page(s) if we couldn't insert it */ > > if (unlikely(existing_tb)) { > > ... > > tb = existing_tb; > > } else if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) { > > TBStatistics *new_stats = g_new0(TBStatistics, 1); > > bool ok; > > new_stats->pc = tb->pc; > > new_stats->cs_base = tb->cs_base; > > new_stats->flags = tb->flags; > > new_stats->page_addr[0] = tb->page_addr[0]; > > new_stats->page_addr[1] = tb->page_addr[1]; > > ok = qht_insert(&tb_ctx.tb_stats, new_stats, h, NULL); > > /* > > * This should never fail as we succeeded in inserting the > > * TB itself which means we haven't seen this combination > yet. > > */ > > g_assert(ok); > > } > > > > It would also avoid the clunkiness of having to allocate and then > > freeing an unused structure. > > > Actually thinking on this we still have to handle it. We may have > tb_flushed away a translation and just be regenerating the same block. > As TBStatistics should transcend tb_flush events we need to re-use it's > structure. > > It would be worth counting the regens as well so we can see blocks that > keep getting re-translated after each flush. > > > > >> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > >> index 16034ee651..359100ef3b 100644 > >> --- a/include/exec/exec-all.h > >> +++ b/include/exec/exec-all.h > >> @@ -324,6 +324,24 @@ static inline void > tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, > >> #define CODE_GEN_AVG_BLOCK_SIZE 150 > >> #endif > >> > >> +typedef struct TBStatistics TBStatistics; > >> + > >> +/* > >> + * This struct stores statistics such as execution count of the > TranslationBlocks. > >> + * Each TB has its own TBStatistics. TBStatistics is suppose to live > even after > >> + * flushes. > >> + */ > >> +struct TBStatistics { > >> + target_ulong pc; > >> + target_ulong cs_base; > >> + uint32_t flags; > >> + tb_page_addr_t page_addr[2]; > >> + > >> + // total number of times that the related TB have being executed > > > > No c++ style comments > > > >> + uint32_t exec_count; > >> + uint32_t exec_count_overflows; > >> +}; > >> + > >> /* > >> * Translation Cache-related fields of a TB. > >> * This struct exists just for convenience; we keep track of TB's in a > binary > >> @@ -403,6 +421,9 @@ struct TranslationBlock { > >> uintptr_t jmp_list_head; > >> uintptr_t jmp_list_next[2]; > >> uintptr_t jmp_dest[2]; > >> + > >> + // Pointer to a struct where statistics from the TB is stored > > > > No c++ style comments > > > >> + TBStatistics *tb_stats; > >> }; > >> > >> extern bool parallel_cpus; > >> diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h > >> index feb585e0a7..a78ce92e60 100644 > >> --- a/include/exec/tb-context.h > >> +++ b/include/exec/tb-context.h > >> @@ -35,6 +35,7 @@ struct TBContext { > >> > >> /* statistics */ > >> unsigned tb_flush_count; > >> + GList *tb_statistics; > >> }; > >> > >> extern TBContext tb_ctx; > >> diff --git a/include/qemu/log.h b/include/qemu/log.h > >> index b097a6cae1..2fca65dd01 100644 > >> --- a/include/qemu/log.h > >> +++ b/include/qemu/log.h > >> @@ -45,6 +45,7 @@ static inline bool qemu_log_separate(void) > >> /* LOG_TRACE (1 << 15) is defined in log-for-trace.h */ > >> #define CPU_LOG_TB_OP_IND (1 << 16) > >> #define CPU_LOG_TB_FPU (1 << 17) > >> +#define CPU_LOG_HOT_TBS (1 << 18) > >> > >> /* Lock output for a series of related logs. Since this is not needed > >> * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we > > > -- > Alex Bennée >