Hello, I am trying to count the number of barrier instructions (dmb) which are being executed in an multi-threaded ARM executable. I am running the executable using qemu user mode with the following patch applied.
Basically I created two counters in the ARM cpu state and incrementing them by generating a TCG instruction whenever a barrier instruction is translated. I am doing something similar even for counting the total instructions executed. The problem I am facing is that this seems to be crashing when run with a multi-threaded executable. Also the statistics gathered are not really accurate. Is there something obviously wrong with what I am trying to do? Any help is highly appreciated. Thanks! --- linux-user/main.c | 10 +++++++++- target-arm/cpu.h | 2 ++ target-arm/translate.c | 9 +++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/linux-user/main.c b/linux-user/main.c index b453a39..7984027 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -816,7 +816,15 @@ void cpu_loop(CPUARMState *env) break; } } else { - env->regs[0] = do_syscall(env, + // print stats on exit + if (n == 248) { + FILE *icount_file = fopen("inscount.out", "w"); + unsigned long total = (unsigned long)env->fence_count; + unsigned long icount = (unsigned long)env->insn_count; + fprintf(icount_file, "%lu, %lu, %f\n", icount, total, total * 1000.0/icount); + fclose(icount_file); + } + env->regs[0] = do_syscall(env, n, env->regs[0], env->regs[1], diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 369d472..be38574 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -304,6 +304,8 @@ typedef struct CPUARMState { uint64_t exclusive_test; uint32_t exclusive_info; #endif + uint32_t fence_count; + uint32_t insn_count; /* iwMMXt coprocessor state. */ struct { diff --git a/target-arm/translate.c b/target-arm/translate.c index cf4e767..4d4ceb1 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -68,6 +68,8 @@ static TCGv_i64 cpu_exclusive_val; static TCGv_i64 cpu_exclusive_test; static TCGv_i32 cpu_exclusive_info; #endif +static TCGv_i32 cpu_fence_count; +static TCGv_i32 cpu_insn_count; /* FIXME: These should be removed. */ static TCGv_i32 cpu_F0s, cpu_F1s; @@ -106,6 +108,10 @@ void arm_translate_init(void) cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, exclusive_info), "exclusive_info"); #endif + cpu_fence_count = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUARMState, fence_count), "fence_count"); + cpu_insn_count = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUARMState, insn_count), "insn_count"); a64_translate_init(); } @@ -7568,6 +7574,7 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s) case 5: /* dmb */ case 6: /* isb */ ARCH(7); + tcg_gen_add_i32(cpu_fence_count, cpu_fence_count, 1); /* We don't emulate caches so these are a no-op. */ return; default: @@ -9740,6 +9747,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw case 4: /* dsb */ case 5: /* dmb */ case 6: /* isb */ + tcg_gen_add_i32(cpu_fence_count, cpu_fence_count, 1); /* These execute as NOPs. */ break; default: @@ -11022,6 +11030,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu, tcg_gen_debug_insn_start(dc->pc); } + tcg_gen_add_i32(cpu_insn_count, cpu_insn_count, 1); if (dc->thumb) { disas_thumb_insn(env, dc); if (dc->condexec_mask) { -- 1.9.1