From: Brian Cain <[email protected]> The PCYCLE register is available in system mode, but only increments when the SYSCFG.PCYCLEEN field is set.
The UPCYCLE register is available in user mode and we model it unconditionally in linux-user emulation, as if the system had enabled PCYCCLEEN. For now, the model is very crudely counting the sum of instructions executed among vCPUs, regardless of how the instructions were actually scheduled. This is sufficient for demonstrating a rough level of activity but will be particularly misleading for benchmarks and performance tuning. We may decide to revisit this model in order to give more a bit more fidelity, though without a cache model it would still be very far from accurate. Co-authored-by: Sid Manning <[email protected]> Signed-off-by: Brian Cain <[email protected]> --- target/hexagon/cpu.h | 5 +++-- target/hexagon/translate.h | 2 ++ target/hexagon/cpu.c | 14 ++++++++++++++ target/hexagon/cpu_helper.c | 32 ++++++++++++++++++++++++++++---- target/hexagon/translate.c | 21 +++++++++++++++++++++ 5 files changed, 68 insertions(+), 6 deletions(-) diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index ee2e5eeece2..da96b52bd2a 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -30,6 +30,7 @@ #include "cpu-qom.h" #include "exec/cpu-common.h" #include "exec/cpu-defs.h" +#include "exec/cpu-common.h" #include "hex_regs.h" #include "mmvec/mmvec.h" #include "hw/core/registerfields.h" @@ -38,8 +39,7 @@ #error "Hexagon does not support system emulation" #endif -#ifndef CONFIG_USER_ONLY -#endif +#include "reg_fields.h" #define NUM_PREGS 4 #define TOTAL_PER_THREAD_REGS 64 @@ -202,6 +202,7 @@ struct ArchCPU { FIELD(TB_FLAGS, IS_TIGHT_LOOP, 0, 1) FIELD(TB_FLAGS, MMU_INDEX, 1, 3) +FIELD(TB_FLAGS, PCYCLE_ENABLED, 4, 1) G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env, uint32_t exception, diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 7e528379db6..e7acbae9ffa 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -84,6 +84,8 @@ typedef struct DisasContext { TCGv new_pred_value[NUM_PREGS]; TCGv branch_taken; TCGv dczero_addr; + bool pcycle_enabled; + uint32_t num_cycles; } DisasContext; bool is_gather_store_insn(DisasContext *ctx); diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 20c4b82a970..1c8a188dec4 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -293,9 +293,23 @@ static TCGTBCPUState hexagon_get_tb_cpu_state(CPUState *cs) } #ifndef CONFIG_USER_ONLY + HexagonCPU *cpu = env_archcpu(env); + uint32_t syscfg = cpu->globalregs ? + hexagon_globalreg_read(cpu->globalregs, HEX_SREG_SYSCFG, + env->threadId) : 0; + + bool pcycle_enabled = extract32(syscfg, + reg_field_info[SYSCFG_PCYCLEEN].offset, + reg_field_info[SYSCFG_PCYCLEEN].width); + hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX, cpu_mmu_index(env_cpu(env), false)); + + if (pcycle_enabled) { + hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, PCYCLE_ENABLED, 1); + } #else + hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, PCYCLE_ENABLED, true); hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX, MMU_USER_IDX); #endif diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index 4860e4a6ab0..729ffa47eed 100644 --- a/target/hexagon/cpu_helper.c +++ b/target/hexagon/cpu_helper.c @@ -33,17 +33,31 @@ uint32_t hexagon_get_pmu_counter(CPUHexagonState *cur_env, int index) uint64_t hexagon_get_sys_pcycle_count(CPUHexagonState *env) { - g_assert_not_reached(); + BQL_LOCK_GUARD(); + uint32_t ssr = env->t_sreg[HEX_SREG_SSR]; + if (!GET_SSR_FIELD(SSR_CE, ssr)) { + return 0; + } + uint64_t cycles = 0; + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *thread_env = cpu_env(cs); + cycles += thread_env->t_cycle_count; + } + HexagonCPU *cpu = env_archcpu(env); + uint64_t base = cpu->globalregs ? + hexagon_globalreg_get_pcycle_base(cpu->globalregs) : 0; + return base + cycles; } uint32_t hexagon_get_sys_pcycle_count_high(CPUHexagonState *env) { - g_assert_not_reached(); + return hexagon_get_sys_pcycle_count(env) >> 32; } uint32_t hexagon_get_sys_pcycle_count_low(CPUHexagonState *env) { - g_assert_not_reached(); + return extract64(hexagon_get_sys_pcycle_count(env), 0, 32); } void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env, @@ -60,7 +74,17 @@ void hexagon_set_sys_pcycle_count_low(CPUHexagonState *env, void hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t cycles) { - g_assert_not_reached(); + BQL_LOCK_GUARD(); + HexagonCPU *cpu = env_archcpu(env); + if (cpu->globalregs) { + hexagon_globalreg_set_pcycle_base(cpu->globalregs, cycles); + } + + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *thread_env = cpu_env(cs); + thread_env->t_cycle_count = 0; + } } void hexagon_modify_ssr(CPUHexagonState *env, uint32_t new, uint32_t old) diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 4df4226cbcb..e4d4dad8ffd 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -61,6 +61,7 @@ TCGv_i64 hex_store_val64[STORES_MAX]; TCGv hex_llsc_addr; TCGv hex_llsc_val; TCGv_i64 hex_llsc_val_i64; +TCGv_i64 hex_cycle_count; TCGv hex_vstore_addr[VSTORES_MAX]; TCGv hex_vstore_size[VSTORES_MAX]; TCGv hex_vstore_pending[VSTORES_MAX]; @@ -128,6 +129,16 @@ static void gen_exception_raw(int excp) gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp)); } +#ifndef CONFIG_USER_ONLY +static inline void gen_pcycle_counters(DisasContext *ctx) +{ + if (ctx->pcycle_enabled) { + tcg_gen_addi_i64(hex_cycle_count, hex_cycle_count, ctx->num_cycles); + ctx->num_cycles = 0; + } +} +#endif + static void gen_exec_counters(DisasContext *ctx) { tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT], @@ -136,6 +147,10 @@ static void gen_exec_counters(DisasContext *ctx) hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns); tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT], hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns); + +#ifndef CONFIG_USER_ONLY + gen_pcycle_counters(ctx); +#endif } static bool use_goto_tb(DisasContext *ctx, target_ulong dest) @@ -821,6 +836,7 @@ static void gen_commit_hvx(DisasContext *ctx) } } +static const int PCYCLES_PER_PACKET = 3; static void update_exec_counters(DisasContext *ctx) { Packet *pkt = ctx->pkt; @@ -840,6 +856,7 @@ static void update_exec_counters(DisasContext *ctx) } ctx->num_packets++; + ctx->num_cycles += PCYCLES_PER_PACKET; ctx->num_insns += num_real_insns; ctx->num_hvx_insns += num_hvx_insns; } @@ -989,11 +1006,13 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, ctx->mem_idx = FIELD_EX32(hex_flags, TB_FLAGS, MMU_INDEX); ctx->num_packets = 0; + ctx->num_cycles = 0; ctx->num_insns = 0; ctx->num_hvx_insns = 0; ctx->branch_cond = TCG_COND_NEVER; ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP); ctx->short_circuit = hex_cpu->short_circuit; + ctx->pcycle_enabled = FIELD_EX32(hex_flags, TB_FLAGS, PCYCLE_ENABLED); } static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) @@ -1136,6 +1155,8 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, llsc_val), "llsc_val"); hex_llsc_val_i64 = tcg_global_mem_new_i64(tcg_env, offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64"); + hex_cycle_count = tcg_global_mem_new_i64(tcg_env, + offsetof(CPUHexagonState, t_cycle_count), "t_cycle_count"); for (i = 0; i < STORES_MAX; i++) { snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i); hex_store_addr[i] = tcg_global_mem_new(tcg_env, -- 2.34.1
