Re: [Qemu-devel] [PATCH v4 3/7] accel: collecting JIT statistics
vandersonmr writes: > If a TB has a TBS (TBStatistics) with the TB_JIT_STATS > enabled then we collect statistics of its translation > processes and code translation. To collect the number > of host instructions we used a modified version of the > disas function to pass through the whole code without > printing anything (fake_fprintf) but counting the number > of instructions. > > Signed-off-by: vandersonmr > --- > accel/tcg/translate-all.c | 18 +++ > accel/tcg/translator.c| 5 ++ > disas.c | 108 ++ > include/disas/disas.h | 1 + > include/exec/tb-stats.h | 14 + > include/qemu/log.h| 1 + > tcg/tcg.c | 23 > tcg/tcg.h | 2 + > 8 files changed, 172 insertions(+) > > diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c > index 7497dae508..3a47ac6f2c 100644 > --- a/accel/tcg/translate-all.c > +++ b/accel/tcg/translate-all.c > @@ -1793,6 +1793,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > } > } > > +if (flag & TB_JIT_STATS) { > +tb->tb_stats->stats_enabled |= TB_JIT_STATS; > +atomic_inc(>tb_stats->translations.total); > +} > } else { > tb->tb_stats = NULL; > } > @@ -1870,6 +1874,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > atomic_set(>search_out_len, prof->search_out_len + search_size); > #endif > > +if (tb_stats_enabled(tb, TB_JIT_STATS)) { > +size_t code_size = gen_code_size; > +if (tcg_ctx->data_gen_ptr) { > +code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr; > +} > + > +atomic_add(>tb_stats->code.num_host_inst, > +get_num_insts(tb->tc.ptr, code_size)); This is what is causing my crashes. I think invoking the disassembler stuff is too clunky here. Maybe we should just change the counter to num_host_insn_byte, the ratio will still be interesting but it saves the complication of re-counting. Ideally the we'd like the core tcg code to tell us how many host instructions it emitted for each tcg_op but I suspect that involves heavier surgery. > +} > + > + > #ifdef DEBUG_DISAS > if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && > qemu_log_in_addr_range(tb->pc)) { > @@ -1927,6 +1942,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > phys_page2 = -1; > if ((pc & TARGET_PAGE_MASK) != virt_page2) { > phys_page2 = get_page_addr_code(env, virt_page2); > +if (tb_stats_enabled(tb, TB_JIT_STATS)) { > +atomic_inc(>tb_stats->translations.spanning); > +} > } > /* > * No explicit memory barrier is required -- tb_link_page() makes the > diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c > index 396a11e828..03c00bdb1b 100644 > --- a/accel/tcg/translator.c > +++ b/accel/tcg/translator.c > @@ -117,6 +117,11 @@ void translator_loop(const TranslatorOps *ops, > DisasContextBase *db, > db->tb->size = db->pc_next - db->pc_first; > db->tb->icount = db->num_insns; > > +if (tb_stats_enabled(tb, TB_JIT_STATS)) { > +atomic_add(>tb->tb_stats->code.num_guest_inst, db->num_insns); > +} > + > + > #ifdef DEBUG_DISAS > if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) > && qemu_log_in_addr_range(db->pc_first)) { > diff --git a/disas.c b/disas.c > index 3e2bfa572b..5dec754992 100644 > --- a/disas.c > +++ b/disas.c > @@ -475,6 +475,114 @@ void target_disas(FILE *out, CPUState *cpu, > target_ulong code, > } > } > > +static int fprintf_fake(struct _IO_FILE *a, const char *b, ...) > +{ > +return 1; > +} > + > +/* > + * This is a work around to get the number of host instructions with > + * a small effort. It reuses the disas function with a fake printf to > + * print nothing but count the number of instructions. > + * > + */ > +unsigned get_num_insts(void *code, unsigned long size) > +{ > +uintptr_t pc; > +int count; > +CPUDebug s; > +int (*print_insn)(bfd_vma pc, disassemble_info *info) = NULL; > + > +INIT_DISASSEMBLE_INFO(s.info, NULL, fprintf_fake); > +s.info.print_address_func = generic_print_host_address; > + > +s.info.buffer = code; > +s.info.buffer_vma = (uintptr_t)code; > +s.info.buffer_length = size; > +s.info.cap_arch = -1; > +s.info.cap_mode = 0; > +s.info.cap_insn_unit = 4; > +s.info.cap_insn_split = 4; > + > +#ifdef HOST_WORDS_BIGENDIAN > +s.info.endian = BFD_ENDIAN_BIG; > +#else > +s.info.endian = BFD_ENDIAN_LITTLE; > +#endif > +#if defined(CONFIG_TCG_INTERPRETER) > +print_insn = print_insn_tci; > +#elif defined(__i386__) > +s.info.mach = bfd_mach_i386_i386; > +print_insn = print_insn_i386; > +s.info.cap_arch = CS_ARCH_X86; > +s.info.cap_mode = CS_MODE_32; > +s.info.cap_insn_unit = 1; > +s.info.cap_insn_split = 8; > +#elif defined(__x86_64__) > +s.info.mach = bfd_mach_x86_64; > +print_insn =
[Qemu-devel] [PATCH v4 3/7] accel: collecting JIT statistics
If a TB has a TBS (TBStatistics) with the TB_JIT_STATS enabled then we collect statistics of its translation processes and code translation. To collect the number of host instructions we used a modified version of the disas function to pass through the whole code without printing anything (fake_fprintf) but counting the number of instructions. Signed-off-by: vandersonmr --- accel/tcg/translate-all.c | 18 +++ accel/tcg/translator.c| 5 ++ disas.c | 108 ++ include/disas/disas.h | 1 + include/exec/tb-stats.h | 14 + include/qemu/log.h| 1 + tcg/tcg.c | 23 tcg/tcg.h | 2 + 8 files changed, 172 insertions(+) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 7497dae508..3a47ac6f2c 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1793,6 +1793,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } } +if (flag & TB_JIT_STATS) { +tb->tb_stats->stats_enabled |= TB_JIT_STATS; +atomic_inc(>tb_stats->translations.total); +} } else { tb->tb_stats = NULL; } @@ -1870,6 +1874,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu, atomic_set(>search_out_len, prof->search_out_len + search_size); #endif +if (tb_stats_enabled(tb, TB_JIT_STATS)) { +size_t code_size = gen_code_size; +if (tcg_ctx->data_gen_ptr) { +code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr; +} + +atomic_add(>tb_stats->code.num_host_inst, +get_num_insts(tb->tc.ptr, code_size)); +} + + #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && qemu_log_in_addr_range(tb->pc)) { @@ -1927,6 +1942,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, phys_page2 = -1; if ((pc & TARGET_PAGE_MASK) != virt_page2) { phys_page2 = get_page_addr_code(env, virt_page2); +if (tb_stats_enabled(tb, TB_JIT_STATS)) { +atomic_inc(>tb_stats->translations.spanning); +} } /* * No explicit memory barrier is required -- tb_link_page() makes the diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index 396a11e828..03c00bdb1b 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -117,6 +117,11 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, db->tb->size = db->pc_next - db->pc_first; db->tb->icount = db->num_insns; +if (tb_stats_enabled(tb, TB_JIT_STATS)) { +atomic_add(>tb->tb_stats->code.num_guest_inst, db->num_insns); +} + + #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && qemu_log_in_addr_range(db->pc_first)) { diff --git a/disas.c b/disas.c index 3e2bfa572b..5dec754992 100644 --- a/disas.c +++ b/disas.c @@ -475,6 +475,114 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code, } } +static int fprintf_fake(struct _IO_FILE *a, const char *b, ...) +{ +return 1; +} + +/* + * This is a work around to get the number of host instructions with + * a small effort. It reuses the disas function with a fake printf to + * print nothing but count the number of instructions. + * + */ +unsigned get_num_insts(void *code, unsigned long size) +{ +uintptr_t pc; +int count; +CPUDebug s; +int (*print_insn)(bfd_vma pc, disassemble_info *info) = NULL; + +INIT_DISASSEMBLE_INFO(s.info, NULL, fprintf_fake); +s.info.print_address_func = generic_print_host_address; + +s.info.buffer = code; +s.info.buffer_vma = (uintptr_t)code; +s.info.buffer_length = size; +s.info.cap_arch = -1; +s.info.cap_mode = 0; +s.info.cap_insn_unit = 4; +s.info.cap_insn_split = 4; + +#ifdef HOST_WORDS_BIGENDIAN +s.info.endian = BFD_ENDIAN_BIG; +#else +s.info.endian = BFD_ENDIAN_LITTLE; +#endif +#if defined(CONFIG_TCG_INTERPRETER) +print_insn = print_insn_tci; +#elif defined(__i386__) +s.info.mach = bfd_mach_i386_i386; +print_insn = print_insn_i386; +s.info.cap_arch = CS_ARCH_X86; +s.info.cap_mode = CS_MODE_32; +s.info.cap_insn_unit = 1; +s.info.cap_insn_split = 8; +#elif defined(__x86_64__) +s.info.mach = bfd_mach_x86_64; +print_insn = print_insn_i386; +s.info.cap_arch = CS_ARCH_X86; +s.info.cap_mode = CS_MODE_64; +s.info.cap_insn_unit = 1; +s.info.cap_insn_split = 8; +#elif defined(_ARCH_PPC) +s.info.disassembler_options = (char *)"any"; +print_insn = print_insn_ppc; +s.info.cap_arch = CS_ARCH_PPC; +# ifdef _ARCH_PPC64 +s.info.cap_mode = CS_MODE_64; +# endif +#elif defined(__riscv) && defined(CONFIG_RISCV_DIS) +#if defined(_ILP32) || (__riscv_xlen == 32) +print_insn = print_insn_riscv32; +#elif defined(_LP64) +print_insn = print_insn_riscv64; +#else +#error unsupported RISC-V ABI +#endif +#elif defined(__aarch64__) && defined(CONFIG_ARM_A64_DIS) +print_insn