Probe and commit vector stores (masked and scatter/gather) Log vector register writes Add the execution counters to the debug log Histogram instructions
Reviewed-by: Richard Henderson <richard.hender...@linaro.org> Signed-off-by: Taylor Simpson <tsimp...@quicinc.com> --- target/hexagon/helper.h | 16 +++ target/hexagon/op_helper.c | 282 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 296 insertions(+), 2 deletions(-) diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index 89de2a3..c89aa4e 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -23,6 +23,8 @@ DEF_HELPER_1(debug_start_packet, void, env) DEF_HELPER_FLAGS_3(debug_check_store_width, TCG_CALL_NO_WG, void, env, int, int) DEF_HELPER_FLAGS_3(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int) DEF_HELPER_2(commit_store, void, env, int) +DEF_HELPER_3(gather_store, void, env, i32, int) +DEF_HELPER_1(commit_hvx_stores, void, env) DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32) DEF_HELPER_FLAGS_1(fbrev, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_3(sfrecipa, i64, env, f32, f32) @@ -90,4 +92,18 @@ DEF_HELPER_4(sffms_lib, f32, env, f32, f32, f32) DEF_HELPER_3(dfmpyfix, f64, env, f64, f64) DEF_HELPER_4(dfmpyhh, f64, env, f64, f64, f64) +/* Histogram instructions */ +DEF_HELPER_1(vhist, void, env) +DEF_HELPER_1(vhistq, void, env) +DEF_HELPER_1(vwhist256, void, env) +DEF_HELPER_1(vwhist256q, void, env) +DEF_HELPER_1(vwhist256_sat, void, env) +DEF_HELPER_1(vwhist256q_sat, void, env) +DEF_HELPER_1(vwhist128, void, env) +DEF_HELPER_1(vwhist128q, void, env) +DEF_HELPER_2(vwhist128m, void, env, s32) +DEF_HELPER_2(vwhist128qm, void, env, s32) + DEF_HELPER_2(probe_pkt_scalar_store_s0, void, env, int) +DEF_HELPER_2(probe_hvx_stores, void, env, int) +DEF_HELPER_3(probe_pkt_scalar_hvx_stores, void, env, int, int) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index af32de4..057baf9 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -27,6 +27,8 @@ #include "arch.h" #include "hex_arch_types.h" #include "fma_emu.h" +#include "mmvec/mmvec.h" +#include "mmvec/macros.h" #define SF_BIAS 127 #define SF_MANTBITS 23 @@ -164,6 +166,57 @@ void HELPER(commit_store)(CPUHexagonState *env, int slot_num) } } +void HELPER(gather_store)(CPUHexagonState *env, uint32_t addr, int slot) +{ + mem_gather_store(env, addr, slot); +} + +void HELPER(commit_hvx_stores)(CPUHexagonState *env) +{ + uintptr_t ra = GETPC(); + int i; + + /* Normal (possibly masked) vector store */ + for (i = 0; i < VSTORES_MAX; i++) { + if (env->vstore_pending[i]) { + env->vstore_pending[i] = 0; + target_ulong va = env->vstore[i].va; + int size = env->vstore[i].size; + for (int j = 0; j < size; j++) { + if (test_bit(j, env->vstore[i].mask)) { + cpu_stb_data_ra(env, va + j, env->vstore[i].data.ub[j], ra); + } + } + } + } + + /* Scatter store */ + if (env->vtcm_pending) { + env->vtcm_pending = false; + if (env->vtcm_log.op) { + /* Need to perform the scatter read/modify/write at commit time */ + if (env->vtcm_log.op_size == 2) { + SCATTER_OP_WRITE_TO_MEM(uint16_t); + } else if (env->vtcm_log.op_size == 4) { + /* Word Scatter += */ + SCATTER_OP_WRITE_TO_MEM(uint32_t); + } else { + g_assert_not_reached(); + } + } else { + for (i = 0; i < sizeof(MMVector); i++) { + if (test_bit(i, env->vtcm_log.mask)) { + cpu_stb_data_ra(env, env->vtcm_log.va[i], + env->vtcm_log.data.ub[i], ra); + clear_bit(i, env->vtcm_log.mask); + env->vtcm_log.data.ub[i] = 0; + } + + } + } + } +} + static void print_store(CPUHexagonState *env, int slot) { if (!(env->slot_cancelled & (1 << slot))) { @@ -242,9 +295,10 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1) HEX_DEBUG_LOG("Next PC = " TARGET_FMT_lx "\n", env->next_PC); HEX_DEBUG_LOG("Exec counters: pkt = " TARGET_FMT_lx ", insn = " TARGET_FMT_lx - "\n", + ", hvx = " TARGET_FMT_lx "\n", env->gpr[HEX_REG_QEMU_PKT_CNT], - env->gpr[HEX_REG_QEMU_INSN_CNT]); + env->gpr[HEX_REG_QEMU_INSN_CNT], + env->gpr[HEX_REG_QEMU_HVX_CNT]); } @@ -393,6 +447,65 @@ void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int mmu_idx) probe_store(env, 0, mmu_idx); } +void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx) +{ + uintptr_t retaddr = GETPC(); + int i; + + /* Normal (possibly masked) vector store */ + for (i = 0; i < VSTORES_MAX; i++) { + if (env->vstore_pending[i]) { + target_ulong va = env->vstore[i].va; + int size = env->vstore[i].size; + for (int j = 0; j < size; j++) { + if (test_bit(j, env->vstore[i].mask)) { + probe_write(env, va + j, 1, mmu_idx, retaddr); + } + } + } + } + + /* Scatter store */ + if (env->vtcm_pending) { + if (env->vtcm_log.op) { + /* Need to perform the scatter read/modify/write at commit time */ + if (env->vtcm_log.op_size == 2) { + SCATTER_OP_PROBE_MEM(size2u_t, mmu_idx, retaddr); + } else if (env->vtcm_log.op_size == 4) { + /* Word Scatter += */ + SCATTER_OP_PROBE_MEM(size4u_t, mmu_idx, retaddr); + } else { + g_assert_not_reached(); + } + } else { + for (int i = 0; i < sizeof(MMVector); i++) { + if (test_bit(i, env->vtcm_log.mask)) { + probe_write(env, env->vtcm_log.va[i], 1, mmu_idx, retaddr); + } + + } + } + } +} + +void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask, + int mmu_idx) +{ + bool has_st0 = (mask >> 0) & 1; + bool has_st1 = (mask >> 1) & 1; + bool has_hvx_stores = (mask >> 2) & 1; + + if (has_st0) { + probe_store(env, 0, mmu_idx); + } + if (has_st1) { + probe_store(env, 1, mmu_idx); + } + if (has_hvx_stores) { + HELPER(probe_hvx_stores)(env, mmu_idx); + } +} + /* * mem_noshuf * Section 5.5 of the Hexagon V67 Programmer's Reference Manual @@ -1181,6 +1294,171 @@ float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV, return RxxV; } +/* Histogram instructions */ + +void HELPER(vhist)(CPUHexagonState *env) +{ + MMVector *input = &env->tmp_VRegs[0]; + + for (int lane = 0; lane < 8; lane++) { + for (int i = 0; i < sizeof(MMVector) / 8; ++i) { + unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i]; + unsigned char regno = value >> 3; + unsigned char element = value & 7; + + env->VRegs[regno].uh[(sizeof(MMVector) / 16) * lane + element]++; + } + } +} + +void HELPER(vhistq)(CPUHexagonState *env) +{ + MMVector *input = &env->tmp_VRegs[0]; + + for (int lane = 0; lane < 8; lane++) { + for (int i = 0; i < sizeof(MMVector) / 8; ++i) { + unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i]; + unsigned char regno = value >> 3; + unsigned char element = value & 7; + + if (fGETQBIT(env->qtmp, sizeof(MMVector) / 8 * lane + i)) { + env->VRegs[regno].uh[ + (sizeof(MMVector) / 16) * lane + element]++; + } + } + } +} + +void HELPER(vwhist256)(CPUHexagonState *env) +{ + MMVector *input = &env->tmp_VRegs[0]; + + for (int i = 0; i < (sizeof(MMVector) / 2); i++) { + unsigned int bucket = fGETUBYTE(0, input->h[i]); + unsigned int weight = fGETUBYTE(1, input->h[i]); + unsigned int vindex = (bucket >> 3) & 0x1F; + unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7); + + env->VRegs[vindex].uh[elindex] = + env->VRegs[vindex].uh[elindex] + weight; + } +} + +void HELPER(vwhist256q)(CPUHexagonState *env) +{ + MMVector *input = &env->tmp_VRegs[0]; + + for (int i = 0; i < (sizeof(MMVector) / 2); i++) { + unsigned int bucket = fGETUBYTE(0, input->h[i]); + unsigned int weight = fGETUBYTE(1, input->h[i]); + unsigned int vindex = (bucket >> 3) & 0x1F; + unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7); + + if (fGETQBIT(env->qtmp, 2 * i)) { + env->VRegs[vindex].uh[elindex] = + env->VRegs[vindex].uh[elindex] + weight; + } + } +} + +void HELPER(vwhist256_sat)(CPUHexagonState *env) +{ + MMVector *input = &env->tmp_VRegs[0]; + + for (int i = 0; i < (sizeof(MMVector) / 2); i++) { + unsigned int bucket = fGETUBYTE(0, input->h[i]); + unsigned int weight = fGETUBYTE(1, input->h[i]); + unsigned int vindex = (bucket >> 3) & 0x1F; + unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7); + + env->VRegs[vindex].uh[elindex] = + fVSATUH(env->VRegs[vindex].uh[elindex] + weight); + } +} + +void HELPER(vwhist256q_sat)(CPUHexagonState *env) +{ + MMVector *input = &env->tmp_VRegs[0]; + + for (int i = 0; i < (sizeof(MMVector) / 2); i++) { + unsigned int bucket = fGETUBYTE(0, input->h[i]); + unsigned int weight = fGETUBYTE(1, input->h[i]); + unsigned int vindex = (bucket >> 3) & 0x1F; + unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7); + + if (fGETQBIT(env->qtmp, 2 * i)) { + env->VRegs[vindex].uh[elindex] = + fVSATUH(env->VRegs[vindex].uh[elindex] + weight); + } + } +} + +void HELPER(vwhist128)(CPUHexagonState *env) +{ + MMVector *input = &env->tmp_VRegs[0]; + + for (int i = 0; i < (sizeof(MMVector) / 2); i++) { + unsigned int bucket = fGETUBYTE(0, input->h[i]); + unsigned int weight = fGETUBYTE(1, input->h[i]); + unsigned int vindex = (bucket >> 3) & 0x1F; + unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3); + + env->VRegs[vindex].uw[elindex] = + env->VRegs[vindex].uw[elindex] + weight; + } +} + +void HELPER(vwhist128q)(CPUHexagonState *env) +{ + MMVector *input = &env->tmp_VRegs[0]; + + for (int i = 0; i < (sizeof(MMVector) / 2); i++) { + unsigned int bucket = fGETUBYTE(0, input->h[i]); + unsigned int weight = fGETUBYTE(1, input->h[i]); + unsigned int vindex = (bucket >> 3) & 0x1F; + unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3); + + if (fGETQBIT(env->qtmp, 2 * i)) { + env->VRegs[vindex].uw[elindex] = + env->VRegs[vindex].uw[elindex] + weight; + } + } +} + +void HELPER(vwhist128m)(CPUHexagonState *env, int32_t uiV) +{ + MMVector *input = &env->tmp_VRegs[0]; + + for (int i = 0; i < (sizeof(MMVector) / 2); i++) { + unsigned int bucket = fGETUBYTE(0, input->h[i]); + unsigned int weight = fGETUBYTE(1, input->h[i]); + unsigned int vindex = (bucket >> 3) & 0x1F; + unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3); + + if ((bucket & 1) == uiV) { + env->VRegs[vindex].uw[elindex] = + env->VRegs[vindex].uw[elindex] + weight; + } + } +} + +void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV) +{ + MMVector *input = &env->tmp_VRegs[0]; + + for (int i = 0; i < (sizeof(MMVector) / 2); i++) { + unsigned int bucket = fGETUBYTE(0, input->h[i]); + unsigned int weight = fGETUBYTE(1, input->h[i]); + unsigned int vindex = (bucket >> 3) & 0x1F; + unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3); + + if (((bucket & 1) == uiV) && fGETQBIT(env->qtmp, 2 * i)) { + env->VRegs[vindex].uw[elindex] = + env->VRegs[vindex].uw[elindex] + weight; + } + } +} + static void cancel_slot(CPUHexagonState *env, uint32_t slot) { HEX_DEBUG_LOG("Slot %d cancelled\n", slot); -- 2.7.4