Reviewed-by: Richard Henderson <richard.hender...@linaro.org> Signed-off-by: Taylor Simpson <tsimp...@quicinc.com> --- target/hexagon/translate.h | 61 ++++++++++++ target/hexagon/genptr.c | 15 +++ target/hexagon/translate.c | 239 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 311 insertions(+), 4 deletions(-)
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 703fd13..fccfb94 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -29,6 +29,7 @@ typedef struct DisasContext { uint32_t mem_idx; uint32_t num_packets; uint32_t num_insns; + uint32_t num_hvx_insns; int reg_log[REG_WRITES_MAX]; int reg_log_idx; DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS); @@ -37,6 +38,20 @@ typedef struct DisasContext { DECLARE_BITMAP(pregs_written, NUM_PREGS); uint8_t store_width[STORES_MAX]; bool s1_store_processed; + int future_vregs_idx; + int future_vregs_num[VECTOR_TEMPS_MAX]; + int tmp_vregs_idx; + int tmp_vregs_num[VECTOR_TEMPS_MAX]; + int vreg_log[NUM_VREGS]; + bool vreg_is_predicated[NUM_VREGS]; + int vreg_log_idx; + DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS); + DECLARE_BITMAP(vregs_updated, NUM_VREGS); + DECLARE_BITMAP(vregs_select, NUM_VREGS); + int qreg_log[NUM_QREGS]; + bool qreg_is_predicated[NUM_QREGS]; + int qreg_log_idx; + bool pre_commit; } DisasContext; static inline void ctx_log_reg_write(DisasContext *ctx, int rnum) @@ -67,6 +82,46 @@ static inline bool is_preloaded(DisasContext *ctx, int num) return test_bit(num, ctx->regs_written); } +intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, + int num, bool alloc_ok); +intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum, + int num, bool alloc_ok); + +static inline void ctx_log_vreg_write(DisasContext *ctx, + int rnum, VRegWriteType type, + bool is_predicated) +{ + if (type != EXT_TMP) { + ctx->vreg_log[ctx->vreg_log_idx] = rnum; + ctx->vreg_is_predicated[ctx->vreg_log_idx] = is_predicated; + ctx->vreg_log_idx++; + + set_bit(rnum, ctx->vregs_updated); + } + if (type == EXT_NEW) { + set_bit(rnum, ctx->vregs_select); + } + if (type == EXT_TMP) { + set_bit(rnum, ctx->vregs_updated_tmp); + } +} + +static inline void ctx_log_vreg_write_pair(DisasContext *ctx, + int rnum, VRegWriteType type, + bool is_predicated) +{ + ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated); + ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated); +} + +static inline void ctx_log_qreg_write(DisasContext *ctx, + int rnum, bool is_predicated) +{ + ctx->qreg_log[ctx->qreg_log_idx] = rnum; + ctx->qreg_is_predicated[ctx->qreg_log_idx] = is_predicated; + ctx->qreg_log_idx++; +} + extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; extern TCGv hex_pred[NUM_PREGS]; extern TCGv hex_next_PC; @@ -85,6 +140,12 @@ extern TCGv hex_dczero_addr; extern TCGv hex_llsc_addr; extern TCGv hex_llsc_val; extern TCGv_i64 hex_llsc_val_i64; +extern TCGv hex_VRegs_updated; +extern TCGv hex_QRegs_updated; +extern TCGv hex_vstore_addr[VSTORES_MAX]; +extern TCGv hex_vstore_size[VSTORES_MAX]; +extern TCGv hex_vstore_pending[VSTORES_MAX]; +bool is_gather_store_insn(Insn *insn, Packet *pkt); void process_store(DisasContext *ctx, Packet *pkt, int slot_num); #endif diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 4a21fa5..d16ff74 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -165,6 +165,9 @@ static inline void gen_read_ctrl_reg(DisasContext *ctx, const int reg_num, } else if (reg_num == HEX_REG_QEMU_INSN_CNT) { tcg_gen_addi_tl(dest, hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns); + } else if (reg_num == HEX_REG_QEMU_HVX_CNT) { + tcg_gen_addi_tl(dest, hex_gpr[HEX_REG_QEMU_HVX_CNT], + ctx->num_hvx_insns); } else { tcg_gen_mov_tl(dest, hex_gpr[reg_num]); } @@ -191,6 +194,12 @@ static inline void gen_read_ctrl_reg_pair(DisasContext *ctx, const int reg_num, tcg_gen_concat_i32_i64(dest, pkt_cnt, insn_cnt); tcg_temp_free(pkt_cnt); tcg_temp_free(insn_cnt); + } else if (reg_num == HEX_REG_QEMU_HVX_CNT) { + TCGv hvx_cnt = tcg_temp_new(); + tcg_gen_addi_tl(hvx_cnt, hex_gpr[HEX_REG_QEMU_HVX_CNT], + ctx->num_hvx_insns); + tcg_gen_concat_i32_i64(dest, hvx_cnt, hex_gpr[reg_num + 1]); + tcg_temp_free(hvx_cnt); } else { tcg_gen_concat_i32_i64(dest, hex_gpr[reg_num], @@ -226,6 +235,9 @@ static inline void gen_write_ctrl_reg(DisasContext *ctx, int reg_num, if (reg_num == HEX_REG_QEMU_INSN_CNT) { ctx->num_insns = 0; } + if (reg_num == HEX_REG_QEMU_HVX_CNT) { + ctx->num_hvx_insns = 0; + } } } @@ -247,6 +259,9 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num, ctx->num_packets = 0; ctx->num_insns = 0; } + if (reg_num == HEX_REG_QEMU_HVX_CNT) { + ctx->num_hvx_insns = 0; + } } } diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index e10ef36..b6f541e 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -19,6 +19,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "tcg/tcg-op.h" +#include "tcg/tcg-op-gvec.h" #include "exec/cpu_ldst.h" #include "exec/log.h" #include "internal.h" @@ -47,11 +48,60 @@ TCGv hex_dczero_addr; TCGv hex_llsc_addr; TCGv hex_llsc_val; TCGv_i64 hex_llsc_val_i64; +TCGv hex_VRegs_updated; +TCGv hex_QRegs_updated; +TCGv hex_vstore_addr[VSTORES_MAX]; +TCGv hex_vstore_size[VSTORES_MAX]; +TCGv hex_vstore_pending[VSTORES_MAX]; static const char * const hexagon_prednames[] = { "p0", "p1", "p2", "p3" }; +intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, + int num, bool alloc_ok) +{ + intptr_t offset; + + /* See if it is already allocated */ + for (int i = 0; i < ctx->future_vregs_idx; i++) { + if (ctx->future_vregs_num[i] == regnum) { + return offsetof(CPUHexagonState, future_VRegs[i]); + } + } + + g_assert(alloc_ok); + offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]); + for (int i = 0; i < num; i++) { + ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++; + } + ctx->future_vregs_idx += num; + g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX); + return offset; +} + +intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum, + int num, bool alloc_ok) +{ + intptr_t offset; + + /* See if it is already allocated */ + for (int i = 0; i < ctx->tmp_vregs_idx; i++) { + if (ctx->tmp_vregs_num[i] == regnum) { + return offsetof(CPUHexagonState, tmp_VRegs[i]); + } + } + + g_assert(alloc_ok); + offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]); + for (int i = 0; i < num; i++) { + ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++; + } + ctx->tmp_vregs_idx += num; + g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX); + return offset; +} + static void gen_exception_raw(int excp) { gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp)); @@ -63,6 +113,8 @@ static void gen_exec_counters(DisasContext *ctx) hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets); tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT], hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns); + tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT], + hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns); } static void gen_end_tb(DisasContext *ctx) @@ -167,11 +219,19 @@ static void gen_start_packet(DisasContext *ctx, Packet *pkt) bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); ctx->preg_log_idx = 0; bitmap_zero(ctx->pregs_written, NUM_PREGS); + ctx->future_vregs_idx = 0; + ctx->tmp_vregs_idx = 0; + ctx->vreg_log_idx = 0; + bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS); + bitmap_zero(ctx->vregs_updated, NUM_VREGS); + bitmap_zero(ctx->vregs_select, NUM_VREGS); + ctx->qreg_log_idx = 0; for (i = 0; i < STORES_MAX; i++) { ctx->store_width[i] = 0; } tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1); ctx->s1_store_processed = false; + ctx->pre_commit = true; if (HEX_DEBUG) { /* Handy place to set a breakpoint before the packet executes */ @@ -193,6 +253,26 @@ static void gen_start_packet(DisasContext *ctx, Packet *pkt) if (need_pred_written(pkt)) { tcg_gen_movi_tl(hex_pred_written, 0); } + + if (pkt->pkt_has_hvx) { + tcg_gen_movi_tl(hex_VRegs_updated, 0); + tcg_gen_movi_tl(hex_QRegs_updated, 0); + } +} + +bool is_gather_store_insn(Insn *insn, Packet *pkt) +{ + if (GET_ATTRIB(insn->opcode, A_CVI_NEW) && + insn->new_value_producer_slot == 1) { + /* Look for gather instruction */ + for (int i = 0; i < pkt->num_insns; i++) { + Insn *in = &pkt->insn[i]; + if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) { + return true; + } + } + } + return false; } /* @@ -448,10 +528,98 @@ static void process_dczeroa(DisasContext *ctx, Packet *pkt) } } +static bool pkt_has_hvx_store(Packet *pkt) +{ + int i; + for (i = 0; i < pkt->num_insns; i++) { + int opcode = pkt->insn[i].opcode; + if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) { + return true; + } + } + return false; +} + +static void gen_commit_hvx(DisasContext *ctx, Packet *pkt) +{ + int i; + + /* + * for (i = 0; i < ctx->vreg_log_idx; i++) { + * int rnum = ctx->vreg_log[i]; + * if (ctx->vreg_is_predicated[i]) { + * if (env->VRegs_updated & (1 << rnum)) { + * env->VRegs[rnum] = env->future_VRegs[rnum]; + * } + * } else { + * env->VRegs[rnum] = env->future_VRegs[rnum]; + * } + * } + */ + for (i = 0; i < ctx->vreg_log_idx; i++) { + int rnum = ctx->vreg_log[i]; + bool is_predicated = ctx->vreg_is_predicated[i]; + intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]); + intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false); + size_t size = sizeof(MMVector); + + if (is_predicated) { + TCGv cmp = tcg_temp_new(); + TCGLabel *label_skip = gen_new_label(); + + tcg_gen_andi_tl(cmp, hex_VRegs_updated, 1 << rnum); + tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip); + tcg_temp_free(cmp); + tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); + gen_set_label(label_skip); + } else { + tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); + } + } + + /* + * for (i = 0; i < ctx->qreg_log_idx; i++) { + * int rnum = ctx->qreg_log[i]; + * if (ctx->qreg_is_predicated[i]) { + * if (env->QRegs_updated) & (1 << rnum)) { + * env->QRegs[rnum] = env->future_QRegs[rnum]; + * } + * } else { + * env->QRegs[rnum] = env->future_QRegs[rnum]; + * } + * } + */ + for (i = 0; i < ctx->qreg_log_idx; i++) { + int rnum = ctx->qreg_log[i]; + bool is_predicated = ctx->qreg_is_predicated[i]; + intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]); + intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]); + size_t size = sizeof(MMQReg); + + if (is_predicated) { + TCGv cmp = tcg_temp_new(); + TCGLabel *label_skip = gen_new_label(); + + tcg_gen_andi_tl(cmp, hex_QRegs_updated, 1 << rnum); + tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip); + tcg_temp_free(cmp); + tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); + gen_set_label(label_skip); + } else { + tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size); + } + } + + if (pkt_has_hvx_store(pkt)) { + gen_helper_commit_hvx_stores(cpu_env); + } +} + static void update_exec_counters(DisasContext *ctx, Packet *pkt) { int num_insns = pkt->num_insns; int num_real_insns = 0; + int num_hvx_insns = 0; for (int i = 0; i < num_insns; i++) { if (!pkt->insn[i].is_endloop && @@ -459,13 +627,18 @@ static void update_exec_counters(DisasContext *ctx, Packet *pkt) !GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) { num_real_insns++; } + if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) { + num_hvx_insns++; + } } ctx->num_packets++; ctx->num_insns += num_real_insns; + ctx->num_hvx_insns += num_hvx_insns; } -static void gen_commit_packet(DisasContext *ctx, Packet *pkt) +static void gen_commit_packet(CPUHexagonState *env, DisasContext *ctx, + Packet *pkt) { /* * If there is more than one store in a packet, make sure they are all OK @@ -474,6 +647,10 @@ static void gen_commit_packet(DisasContext *ctx, Packet *pkt) * dczeroa has to be the only store operation in the packet, so we go * ahead and process that first. * + * When there is an HVX store, there can also be a scalar store in either + * slot 0 or slot1, so we create a mask for the helper to indicate what + * work to do. + * * When there are two scalar stores, we probe the one in slot 0. * * Note that we don't call the probe helper for packets with only one @@ -482,13 +659,35 @@ static void gen_commit_packet(DisasContext *ctx, Packet *pkt) */ bool has_store_s0 = pkt->pkt_has_store_s0; bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed); + bool has_hvx_store = pkt_has_hvx_store(pkt); if (pkt->pkt_has_dczeroa) { /* * The dczeroa will be the store in slot 0, check that we don't have - * a store in slot 1. + * a store in slot 1 or an HVX store. */ - g_assert(has_store_s0 && !has_store_s1); + g_assert(has_store_s0 && !has_store_s1 && !has_hvx_store); process_dczeroa(ctx, pkt); + } else if (has_hvx_store) { + TCGv mem_idx = tcg_constant_tl(ctx->mem_idx); + + if (!has_store_s0 && !has_store_s1) { + gen_helper_probe_hvx_stores(cpu_env, mem_idx); + } else { + int mask = 0; + TCGv mask_tcgv; + + if (has_store_s0) { + mask |= (1 << 0); + } + if (has_store_s1) { + mask |= (1 << 1); + } + if (has_hvx_store) { + mask |= (1 << 2); + } + mask_tcgv = tcg_constant_tl(mask); + gen_helper_probe_pkt_scalar_hvx_stores(cpu_env, mask_tcgv, mem_idx); + } } else if (has_store_s0 && has_store_s1) { /* * process_store_log will execute the slot 1 store first, @@ -502,6 +701,9 @@ static void gen_commit_packet(DisasContext *ctx, Packet *pkt) gen_reg_writes(ctx); gen_pred_writes(ctx, pkt); + if (pkt->pkt_has_hvx) { + gen_commit_hvx(ctx, pkt); + } update_exec_counters(ctx, pkt); if (HEX_DEBUG) { TCGv has_st0 = @@ -513,6 +715,11 @@ static void gen_commit_packet(DisasContext *ctx, Packet *pkt) gen_helper_debug_commit_end(cpu_env, has_st0, has_st1); } + if (pkt->vhist_insn != NULL) { + ctx->pre_commit = false; + pkt->vhist_insn->generate(env, ctx, pkt->vhist_insn, pkt); + } + if (pkt->pkt_has_cof) { gen_end_tb(ctx); } @@ -537,7 +744,7 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx) for (i = 0; i < pkt.num_insns; i++) { gen_insn(env, ctx, &pkt.insn[i], &pkt); } - gen_commit_packet(ctx, &pkt); + gen_commit_packet(env, ctx, &pkt); ctx->base.pc_next += pkt.encod_pkt_size_in_bytes; } else { gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET); @@ -552,6 +759,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, ctx->mem_idx = MMU_USER_IDX; ctx->num_packets = 0; ctx->num_insns = 0; + ctx->num_hvx_insns = 0; } static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) @@ -656,6 +864,9 @@ static char store_addr_names[STORES_MAX][NAME_LEN]; static char store_width_names[STORES_MAX][NAME_LEN]; static char store_val32_names[STORES_MAX][NAME_LEN]; static char store_val64_names[STORES_MAX][NAME_LEN]; +static char vstore_addr_names[VSTORES_MAX][NAME_LEN]; +static char vstore_size_names[VSTORES_MAX][NAME_LEN]; +static char vstore_pending_names[VSTORES_MAX][NAME_LEN]; void hexagon_translate_init(void) { @@ -718,6 +929,10 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, llsc_val), "llsc_val"); hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env, offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64"); + hex_VRegs_updated = tcg_global_mem_new(cpu_env, + offsetof(CPUHexagonState, VRegs_updated), "VRegs_updated"); + hex_QRegs_updated = tcg_global_mem_new(cpu_env, + offsetof(CPUHexagonState, QRegs_updated), "QRegs_updated"); for (i = 0; i < STORES_MAX; i++) { snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i); hex_store_addr[i] = tcg_global_mem_new(cpu_env, @@ -739,4 +954,20 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, mem_log_stores[i].data64), store_val64_names[i]); } + for (int i = 0; i < VSTORES_MAX; i++) { + snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i); + hex_vstore_addr[i] = tcg_global_mem_new(cpu_env, + offsetof(CPUHexagonState, vstore[i].va), + vstore_addr_names[i]); + + snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i); + hex_vstore_size[i] = tcg_global_mem_new(cpu_env, + offsetof(CPUHexagonState, vstore[i].size), + vstore_size_names[i]); + + snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i); + hex_vstore_pending[i] = tcg_global_mem_new(cpu_env, + offsetof(CPUHexagonState, vstore_pending[i]), + vstore_pending_names[i]); + } } -- 2.7.4