vandersonmr <vanderson...@gmail.com> writes:
> If a TB has a TBS (TBStatistics) with the TB_JIT_STATS > enabled then we collect statistics of its translation > processes and code translation. To collect the number > of host instructions we used a modified version of the > disas function to pass through the whole code without > printing anything (fake_fprintf) but counting the number > of instructions. > > Signed-off-by: vandersonmr <vanderson...@gmail.com> > --- > accel/tcg/translate-all.c | 18 +++++++ > accel/tcg/translator.c | 5 ++ > disas.c | 108 ++++++++++++++++++++++++++++++++++++++ > include/disas/disas.h | 1 + > include/exec/tb-stats.h | 14 +++++ > include/qemu/log.h | 1 + > tcg/tcg.c | 23 ++++++++ > tcg/tcg.h | 2 + > 8 files changed, 172 insertions(+) > > diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c > index 7497dae508..3a47ac6f2c 100644 > --- a/accel/tcg/translate-all.c > +++ b/accel/tcg/translate-all.c > @@ -1793,6 +1793,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > } > } > > + if (flag & TB_JIT_STATS) { > + tb->tb_stats->stats_enabled |= TB_JIT_STATS; > + atomic_inc(&tb->tb_stats->translations.total); > + } > } else { > tb->tb_stats = NULL; > } > @@ -1870,6 +1874,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > atomic_set(&prof->search_out_len, prof->search_out_len + search_size); > #endif > > + if (tb_stats_enabled(tb, TB_JIT_STATS)) { > + size_t code_size = gen_code_size; > + if (tcg_ctx->data_gen_ptr) { > + code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr; > + } > + > + atomic_add(&tb->tb_stats->code.num_host_inst, > + get_num_insts(tb->tc.ptr, code_size)); This is what is causing my crashes. I think invoking the disassembler stuff is too clunky here. Maybe we should just change the counter to num_host_insn_byte, the ratio will still be interesting but it saves the complication of re-counting. Ideally the we'd like the core tcg code to tell us how many host instructions it emitted for each tcg_op but I suspect that involves heavier surgery. > + } > + > + > #ifdef DEBUG_DISAS > if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && > qemu_log_in_addr_range(tb->pc)) { > @@ -1927,6 +1942,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > phys_page2 = -1; > if ((pc & TARGET_PAGE_MASK) != virt_page2) { > phys_page2 = get_page_addr_code(env, virt_page2); > + if (tb_stats_enabled(tb, TB_JIT_STATS)) { > + atomic_inc(&tb->tb_stats->translations.spanning); > + } > } > /* > * No explicit memory barrier is required -- tb_link_page() makes the > diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c > index 396a11e828..03c00bdb1b 100644 > --- a/accel/tcg/translator.c > +++ b/accel/tcg/translator.c > @@ -117,6 +117,11 @@ void translator_loop(const TranslatorOps *ops, > DisasContextBase *db, > db->tb->size = db->pc_next - db->pc_first; > db->tb->icount = db->num_insns; > > + if (tb_stats_enabled(tb, TB_JIT_STATS)) { > + atomic_add(&db->tb->tb_stats->code.num_guest_inst, db->num_insns); > + } > + > + > #ifdef DEBUG_DISAS > if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) > && qemu_log_in_addr_range(db->pc_first)) { > diff --git a/disas.c b/disas.c > index 3e2bfa572b..5dec754992 100644 > --- a/disas.c > +++ b/disas.c > @@ -475,6 +475,114 @@ void target_disas(FILE *out, CPUState *cpu, > target_ulong code, > } > } > > +static int fprintf_fake(struct _IO_FILE *a, const char *b, ...) > +{ > + return 1; > +} > + > +/* > + * This is a work around to get the number of host instructions with > + * a small effort. It reuses the disas function with a fake printf to > + * print nothing but count the number of instructions. > + * > + */ > +unsigned get_num_insts(void *code, unsigned long size) > +{ > + uintptr_t pc; > + int count; > + CPUDebug s; > + int (*print_insn)(bfd_vma pc, disassemble_info *info) = NULL; > + > + INIT_DISASSEMBLE_INFO(s.info, NULL, fprintf_fake); > + s.info.print_address_func = generic_print_host_address; > + > + s.info.buffer = code; > + s.info.buffer_vma = (uintptr_t)code; > + s.info.buffer_length = size; > + s.info.cap_arch = -1; > + s.info.cap_mode = 0; > + s.info.cap_insn_unit = 4; > + s.info.cap_insn_split = 4; > + > +#ifdef HOST_WORDS_BIGENDIAN > + s.info.endian = BFD_ENDIAN_BIG; > +#else > + s.info.endian = BFD_ENDIAN_LITTLE; > +#endif > +#if defined(CONFIG_TCG_INTERPRETER) > + print_insn = print_insn_tci; > +#elif defined(__i386__) > + s.info.mach = bfd_mach_i386_i386; > + print_insn = print_insn_i386; > + s.info.cap_arch = CS_ARCH_X86; > + s.info.cap_mode = CS_MODE_32; > + s.info.cap_insn_unit = 1; > + s.info.cap_insn_split = 8; > +#elif defined(__x86_64__) > + s.info.mach = bfd_mach_x86_64; > + print_insn = print_insn_i386; > + s.info.cap_arch = CS_ARCH_X86; > + s.info.cap_mode = CS_MODE_64; > + s.info.cap_insn_unit = 1; > + s.info.cap_insn_split = 8; > +#elif defined(_ARCH_PPC) > + s.info.disassembler_options = (char *)"any"; > + print_insn = print_insn_ppc; > + s.info.cap_arch = CS_ARCH_PPC; > +# ifdef _ARCH_PPC64 > + s.info.cap_mode = CS_MODE_64; > +# endif > +#elif defined(__riscv) && defined(CONFIG_RISCV_DIS) > +#if defined(_ILP32) || (__riscv_xlen == 32) > + print_insn = print_insn_riscv32; > +#elif defined(_LP64) > + print_insn = print_insn_riscv64; > +#else > +#error unsupported RISC-V ABI > +#endif > +#elif defined(__aarch64__) && defined(CONFIG_ARM_A64_DIS) > + print_insn = print_insn_arm_a64; > + s.info.cap_arch = CS_ARCH_ARM64; > +#elif defined(__alpha__) > + print_insn = print_insn_alpha; > +#elif defined(__sparc__) > + print_insn = print_insn_sparc; > + s.info.mach = bfd_mach_sparc_v9b; > +#elif defined(__arm__) > + print_insn = print_insn_arm; > + s.info.cap_arch = CS_ARCH_ARM; > + /* TCG only generates code for arm mode. */ > +#elif defined(__MIPSEB__) > + print_insn = print_insn_big_mips; > +#elif defined(__MIPSEL__) > + print_insn = print_insn_little_mips; > +#elif defined(__m68k__) > + print_insn = print_insn_m68k; > +#elif defined(__s390__) > + print_insn = print_insn_s390; > +#elif defined(__hppa__) > + print_insn = print_insn_hppa; > +#endif > + > + if (print_insn == NULL) { > + print_insn = print_insn_od_host; > + } > + > + s.info.fprintf_func = fprintf_fake; > + unsigned num_insts = 0; > + for (pc = (uintptr_t)code; size > 0; pc += count, size -= count) { > + num_insts++; > + count = print_insn(pc, &s.info); > + if (count < 0) { > + break; > + } > + } > + return num_insts; > +} > + > + > + > + > /* Disassemble this for me please... (debugging). */ > void disas(FILE *out, void *code, unsigned long size) > { > diff --git a/include/disas/disas.h b/include/disas/disas.h > index 15da511f49..9797ae7cfa 100644 > --- a/include/disas/disas.h > +++ b/include/disas/disas.h > @@ -7,6 +7,7 @@ > > /* Disassemble this for me please... (debugging). */ > void disas(FILE *out, void *code, unsigned long size); > +unsigned get_num_insts(void *code, unsigned long size); > void target_disas(FILE *out, CPUState *cpu, target_ulong code, > target_ulong size); > > diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h > index ee1e8de0d3..6584b78c4a 100644 > --- a/include/exec/tb-stats.h > +++ b/include/exec/tb-stats.h > @@ -33,6 +33,20 @@ struct TBStatistics { > unsigned long total; > unsigned long atomic; > } executions; > + > + struct { > + unsigned num_guest_inst; > + unsigned num_host_inst; > + unsigned num_tcg_ops; > + unsigned num_tcg_ops_opt; > + unsigned spills; > + } code; > + > + struct { > + unsigned long total; > + unsigned long uncached; > + unsigned long spanning; > + } translations; > }; > > bool tb_stats_cmp(const void *ap, const void *bp); > diff --git a/include/qemu/log.h b/include/qemu/log.h > index b213411836..8cdfc268ca 100644 > --- a/include/qemu/log.h > +++ b/include/qemu/log.h > @@ -131,6 +131,7 @@ void qemu_log_close(void); > > #define TB_NOTHING 0 > #define TB_EXEC_STATS (1 << 1) > +#define TB_JIT_STATS (1 << 2) > > void enable_collect_tb_stats(void); > void disable_collect_tb_stats(void); > diff --git a/tcg/tcg.c b/tcg/tcg.c > index be2c33c400..446e3d1708 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -3126,6 +3126,11 @@ static void temp_sync(TCGContext *s, TCGTemp *ts, > TCGRegSet allocated_regs, > case TEMP_VAL_REG: > tcg_out_st(s, ts->type, ts->reg, > ts->mem_base->reg, ts->mem_offset); > + > + /* Count number of spills */ > + if (tb_stats_enabled(s->current_tb, TB_JIT_STATS)) { > + atomic_inc(&s->current_tb->tb_stats->code.spills); > + } > break; > > case TEMP_VAL_MEM: > @@ -3997,6 +4002,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) > int i, num_insns; > TCGOp *op; > > + s->current_tb = tb; > + > #ifdef CONFIG_PROFILER > { > int n = 0; > @@ -4028,6 +4035,14 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) > } > #endif > > + if (tb_stats_enabled(tb, TB_JIT_STATS)) { > + int n = 0; > + QTAILQ_FOREACH(op, &s->ops, link) { > + n++; > + } > + atomic_add(&tb->tb_stats->code.num_tcg_ops, n); > + } > + > #ifdef CONFIG_DEBUG_TCG > /* Ensure all labels referenced have been emitted. */ > { > @@ -4094,6 +4109,14 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) > } > #endif > > + if (tb_stats_enabled(tb, TB_JIT_STATS)) { > + int n = 0; > + QTAILQ_FOREACH(op, &s->ops, link) { > + n++; > + } > + atomic_add(&tb->tb_stats->code.num_tcg_ops_opt, n); > + } > + > tcg_reg_alloc_start(s); > > s->code_buf = tb->tc.ptr; > diff --git a/tcg/tcg.h b/tcg/tcg.h > index b411e17a28..bf6f3bcba3 100644 > --- a/tcg/tcg.h > +++ b/tcg/tcg.h > @@ -738,6 +738,8 @@ struct TCGContext { > > uint16_t gen_insn_end_off[TCG_MAX_INSNS]; > target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS]; > + > + TranslationBlock *current_tb; > }; > > extern TCGContext tcg_init_ctx; Without the disas hack: Reviewed-by: Alex Bennée <alex.ben...@linaro.org> -- Alex Bennée