Implement strex and ldrex instruction relying on TCG's qemu_ldlink and qemu_stcond. For the time being only the 32bit instructions are supported.
Suggested-by: Jani Kokkonen <jani.kokko...@huawei.com> Suggested-by: Claudio Fontana <claudio.font...@huawei.com> Signed-off-by: Alvise Rigo <a.r...@virtualopensystems.com> --- target-arm/translate.c | 94 ++++++++++++++++++++++++++++++++++++++++++- tcg/arm/tcg-target.c | 105 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 170 insertions(+), 29 deletions(-) diff --git a/target-arm/translate.c b/target-arm/translate.c index 9116529..3b209a5 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -956,6 +956,18 @@ DO_GEN_ST(8, MO_UB) DO_GEN_ST(16, MO_TEUW) DO_GEN_ST(32, MO_TEUL) +/* Load/Store exclusive generators (always unsigned) */ +static inline void gen_aa32_ldex32(TCGv_i32 val, TCGv_i32 addr, int index) +{ + tcg_gen_qemu_ldlink_i32(val, addr, index, MO_TEUL); +} + +static inline void gen_aa32_stex32(TCGv_i32 is_excl, TCGv_i32 val, + TCGv_i32 addr, int index) +{ + tcg_gen_qemu_stcond_i32(is_excl, val, addr, index, MO_TEUL); +} + static inline void gen_set_pc_im(DisasContext *s, target_ulong val) { tcg_gen_movi_i32(cpu_R[15], val); @@ -7420,6 +7432,26 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr); } +static void gen_load_exclusive_multi(DisasContext *s, int rt, int rt2, + TCGv_i32 addr, int size) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + + switch (size) { + case 0: + case 1: + abort(); + case 2: + gen_aa32_ldex32(tmp, addr, get_mem_index(s)); + break; + case 3: + default: + abort(); + } + + store_reg(s, rt, tmp); +} + static void gen_clrex(DisasContext *s) { tcg_gen_movi_i64(cpu_exclusive_addr, -1); @@ -7518,6 +7550,63 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, gen_set_label(done_label); tcg_gen_movi_i64(cpu_exclusive_addr, -1); } + +static void gen_store_exclusive_multi(DisasContext *s, int rd, int rt, int rt2, + TCGv_i32 addr, int size) +{ + TCGv_i32 tmp; + TCGv_i32 is_dirty; + TCGv_i32 success; + TCGLabel *done_label; + TCGLabel *fail_label; + + fail_label = gen_new_label(); + done_label = gen_new_label(); + + tmp = tcg_temp_new_i32(); + is_dirty = tcg_temp_new_i32(); + success = tcg_temp_new_i32(); + switch (size) { + case 0: + case 1: + abort(); + break; + case 2: + gen_aa32_stex32(is_dirty, tmp, addr, get_mem_index(s)); + break; + case 3: + default: + abort(); + } + + tcg_temp_free_i32(tmp); + + /* Check if the store conditional has to fail */ + tcg_gen_movi_i32(success, 1); + tcg_gen_brcond_i32(TCG_COND_EQ, is_dirty, success, fail_label); + tcg_temp_free_i32(success); + tcg_temp_free_i32(is_dirty); + + tmp = load_reg(s, rt); + switch (size) { + case 0: + case 1: + abort(); + case 2: + gen_aa32_st32(tmp, addr, get_mem_index(s)); + break; + case 3: + default: + abort(); + } + tcg_temp_free_i32(tmp); + + tcg_gen_movi_i32(cpu_R[rd], 0); // is_dirty = 0 + tcg_gen_br(done_label); + gen_set_label(fail_label); + tcg_gen_movi_i32(cpu_R[rd], 1); // is_dirty = 1 + gen_set_label(done_label); +} #endif /* gen_srs: @@ -8365,7 +8454,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) } else if (insn & (1 << 20)) { switch (op1) { case 0: /* ldrex */ - gen_load_exclusive(s, rd, 15, addr, 2); + gen_load_exclusive_multi(s, rd, 15, addr, 2); break; case 1: /* ldrexd */ gen_load_exclusive(s, rd, rd + 1, addr, 3); @@ -8383,7 +8472,8 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) rm = insn & 0xf; switch (op1) { case 0: /* strex */ - gen_store_exclusive(s, rd, rm, 15, addr, 2); + gen_store_exclusive_multi(s, rd, rm, 15, + addr, 2); break; case 1: /* strexd */ gen_store_exclusive(s, rd, rm, rm + 1, addr, 3); diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index 01e6fbf..1a301be 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -1069,6 +1069,11 @@ static void * const qemu_ld_helpers[16] = { [MO_BESL] = helper_be_ldul_mmu, }; +/* LoadLink helpers, only unsigned */ +static void * const qemu_ldex_helpers[16] = { + [MO_LEUL] = helper_le_ldlinkul_mmu, +}; + /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, * uintxx_t val, int mmu_idx, uintptr_t ra) */ @@ -1082,6 +1087,11 @@ static void * const qemu_st_helpers[16] = { [MO_BEQ] = helper_be_stq_mmu, }; +/* StoreConditional helpers, only unsigned */ +static void * const qemu_stex_helpers[16] = { + [MO_LEUL] = helper_le_stcondl_mmu, +}; + /* Helper routines for marshalling helper function arguments into * the correct registers and stack. * argreg is where we want to put this argument, arg is the argument itself. @@ -1222,6 +1232,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, path for a load or store, so that we can later generate the correct helper code. */ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, + bool is_excl, TCGReg llsc_success, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addrhi, int mem_index, tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) @@ -1229,6 +1240,8 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, TCGLabelQemuLdst *label = new_ldst_label(s); label->is_ld = is_ld; + label->is_llsc = is_excl; + label->llsc_success = llsc_success; label->opc = opc; label->datalo_reg = datalo; label->datahi_reg = datahi; @@ -1259,12 +1272,16 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) /* For armv6 we can use the canonical unsigned helpers and minimize icache usage. For pre-armv6, use the signed helpers since we do not have a single insn sign-extend. */ - if (use_armv6_instructions) { - func = qemu_ld_helpers[opc & ~MO_SIGN]; + if (lb->is_llsc) { + func = qemu_ldex_helpers[opc]; } else { - func = qemu_ld_helpers[opc]; - if (opc & MO_SIGN) { - opc = MO_UL; + if (use_armv6_instructions) { + func = qemu_ld_helpers[opc & ~MO_SIGN]; + } else { + func = qemu_ld_helpers[opc]; + if (opc & MO_SIGN) { + opc = MO_UL; + } } } tcg_out_call(s, func); @@ -1336,7 +1353,14 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); /* Tail-call to the helper, which will return to the fast path. */ - tcg_out_goto(s, COND_AL, qemu_st_helpers[opc]); + if (lb->is_llsc) { + tcg_out_call(s, qemu_stex_helpers[opc]); + /* Save the output of the StoreConditional */ + tcg_out_mov_reg(s, COND_AL, lb->llsc_success, TCG_REG_R0); + tcg_out_goto(s, COND_AL, lb->raddr); + } else { + tcg_out_goto(s, COND_AL, qemu_st_helpers[opc]); + } } #endif /* SOFTMMU */ @@ -1460,7 +1484,8 @@ static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, } } -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64, + bool isLoadLink) { TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); TCGMemOp opc; @@ -1478,17 +1503,23 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) #ifdef CONFIG_SOFTMMU mem_index = *args; - addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1); /* This a conditional BL only to load a pointer within this opcode into LR - for the slow path. We will not be using the value for a tail call. */ - label_ptr = s->code_ptr; - tcg_out_bl_noaddr(s, COND_NE); - - tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend); + for the slow path. We will not be using the value for a tail call. + In the context of a LoadLink instruction, we don't check the TLB but we + always follow the slow path. */ + if (isLoadLink) { + label_ptr = s->code_ptr; + tcg_out_bl_noaddr(s, COND_AL); + } else { + addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1); + label_ptr = s->code_ptr; + tcg_out_bl_noaddr(s, COND_NE); + tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend); + } - add_qemu_ldst_label(s, true, opc, datalo, datahi, addrlo, addrhi, - mem_index, s->code_ptr, label_ptr); + add_qemu_ldst_label(s, true, opc, isLoadLink, 0, datalo, datahi, addrlo, + addrhi, mem_index, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE); @@ -1589,9 +1620,11 @@ static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, } } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64, + bool isStoreCond) { TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); + TCGReg llsc_success; TCGMemOp opc; #ifdef CONFIG_SOFTMMU int mem_index; @@ -1599,6 +1632,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) tcg_insn_unit *label_ptr; #endif + llsc_success = (isStoreCond ? *args++ : 0); + datalo = *args++; datahi = (is64 ? *args++ : 0); addrlo = *args++; @@ -1607,16 +1642,24 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #ifdef CONFIG_SOFTMMU mem_index = *args; - addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0); - tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend); + if (isStoreCond) { + /* Always follow the slow-path for an exclusive access */ + label_ptr = s->code_ptr; + tcg_out_bl_noaddr(s, COND_AL); + } else { + addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0); - /* The conditional call must come last, as we're going to return here. */ - label_ptr = s->code_ptr; - tcg_out_bl_noaddr(s, COND_NE); + tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend); - add_qemu_ldst_label(s, false, opc, datalo, datahi, addrlo, addrhi, - mem_index, s->code_ptr, label_ptr); + /* The conditional call must come last, as we're going to return here. */ + label_ptr = s->code_ptr; + tcg_out_bl_noaddr(s, COND_NE); + } + + add_qemu_ldst_label(s, false, opc, isStoreCond, llsc_success, datalo, + datahi, addrlo, addrhi, mem_index, s->code_ptr, + label_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE); @@ -1859,16 +1902,22 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args, 0); + tcg_out_qemu_ld(s, args, 0, 0); + break; + case INDEX_op_qemu_ldlink_i32: + tcg_out_qemu_ld(s, args, 0, 1); // LoadLink break; case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args, 1); + tcg_out_qemu_ld(s, args, 1, 0); break; case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args, 0); + tcg_out_qemu_st(s, args, 0, 0); + break; + case INDEX_op_qemu_stcond_i32: + tcg_out_qemu_st(s, args, 0, 1); // StoreConditional break; case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args, 1); + tcg_out_qemu_st(s, args, 1, 0); break; case INDEX_op_bswap16_i32: @@ -1952,8 +2001,10 @@ static const TCGTargetOpDef arm_op_defs[] = { #if TARGET_LONG_BITS == 32 { INDEX_op_qemu_ld_i32, { "r", "l" } }, + { INDEX_op_qemu_ldlink_i32, { "r", "l" } }, { INDEX_op_qemu_ld_i64, { "r", "r", "l" } }, { INDEX_op_qemu_st_i32, { "s", "s" } }, + { INDEX_op_qemu_stcond_i32, { "r", "s", "s" } }, { INDEX_op_qemu_st_i64, { "s", "s", "s" } }, #else { INDEX_op_qemu_ld_i32, { "r", "l", "l" } }, -- 2.4.0