That is, the old LDST_OPTIMIZATION. Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/s390/tcg-target.c | 210 ++++++++++++++++++++++++++++---------------------- 1 file changed, 118 insertions(+), 92 deletions(-)
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index 332822f..6e6c74a 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -24,7 +24,7 @@ * THE SOFTWARE. */ -#include "tcg-be-null.h" +#include "tcg-be-ldst.h" /* We only support generating code for 64-bit mode. */ #if TCG_TARGET_REG_BITS != 64 @@ -1386,107 +1386,123 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, TCGReg data, } #if defined(CONFIG_SOFTMMU) -static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg, - TCGReg addr_reg, int mem_index, int opc, - tcg_insn_unit **label2_ptr_p, int is_store) +/* We're expecting to use a 20-bit signed offset on the tlb memory ops. + Using the offset of the second entry in the last tlb table ensures + that we can index all of the elements of the first entry. */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) + > 0x7ffff); + +/* Load and compare a TLB entry, leaving the flags set. Loads the TLB + addend into R2. Returns a register with the santitized guest address. */ +static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc, + int mem_index, bool is_ld) { - const TCGReg arg0 = tcg_target_call_iarg_regs[0]; - const TCGReg arg1 = tcg_target_call_iarg_regs[1]; - const TCGReg arg2 = tcg_target_call_iarg_regs[2]; - const TCGReg arg3 = tcg_target_call_iarg_regs[3]; - const TCGReg arg4 = tcg_target_call_iarg_regs[4]; TCGMemOp s_bits = opc & MO_SIZE; - tcg_insn_unit *label1_ptr; - tcg_target_long ofs; + int ofs; + + tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); if (TARGET_LONG_BITS == 32) { - tgen_ext32u(s, arg1, addr_reg); + tgen_ext32u(s, TCG_REG_R3, addr_reg); } else { - tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg); + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); } - tcg_out_sh64(s, RSY_SRLG, arg2, addr_reg, TCG_REG_NONE, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - - tgen_andi(s, TCG_TYPE_I64, arg1, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - tgen_andi(s, TCG_TYPE_I64, arg2, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + tgen_andi(s, TCG_TYPE_I64, TCG_REG_R3, + TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - if (is_store) { - ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); - } else { + if (is_ld) { ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read); + } else { + ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); } - assert(ofs < 0x80000); - if (TARGET_LONG_BITS == 32) { - tcg_out_mem(s, RX_C, RXY_CY, arg1, arg2, TCG_AREG0, ofs); + tcg_out_mem(s, RX_C, RXY_CY, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); } else { - tcg_out_mem(s, 0, RXY_CG, arg1, arg2, TCG_AREG0, ofs); + tcg_out_mem(s, 0, RXY_CG, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); } + ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + tcg_out_mem(s, 0, RXY_LG, TCG_REG_R2, TCG_REG_R2, TCG_AREG0, ofs); + if (TARGET_LONG_BITS == 32) { - tgen_ext32u(s, arg1, addr_reg); - } else { - tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg); - } - - label1_ptr = s->code_ptr; - - /* je label1 (offset will be patched in later) */ - tcg_out_insn(s, RI, BRC, S390_CC_EQ, 0); - - /* call load/store helper */ - if (is_store) { - /* Make sure to zero-extend the value to the full register - for the calling convention. */ - switch (s_bits) { - case MO_UB: - tgen_ext8u(s, TCG_TYPE_I64, arg2, data_reg); - break; - case MO_UW: - tgen_ext16u(s, TCG_TYPE_I64, arg2, data_reg); - break; - case MO_UL: - tgen_ext32u(s, arg2, data_reg); - break; - case MO_Q: - tcg_out_mov(s, TCG_TYPE_I64, arg2, data_reg); - break; - default: - tcg_abort(); - } - tcg_out_movi(s, TCG_TYPE_I32, arg3, mem_index); - tcg_out_mov(s, TCG_TYPE_PTR, arg0, TCG_AREG0); - tcg_out_movi(s, TCG_TYPE_PTR, arg4, (uintptr_t)s->code_ptr); - tcg_out_call(s, qemu_st_helpers[opc]); - } else { - tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index); - tcg_out_mov(s, TCG_TYPE_PTR, arg0, TCG_AREG0); - tcg_out_movi(s, TCG_TYPE_PTR, arg3, (uintptr_t)s->code_ptr); - tcg_out_call(s, qemu_ld_helpers[opc]); - tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); + tgen_ext32u(s, TCG_REG_R3, addr_reg); + return TCG_REG_R3; } + return addr_reg; +} - /* jump to label2 (end) */ - *label2_ptr_p = s->code_ptr; - - tcg_out_insn(s, RI, BRC, S390_CC_ALWAYS, 0); +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, + TCGReg data, TCGReg addr, int mem_index, + tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) +{ + TCGLabelQemuLdst *label = new_ldst_label(s); + + label->is_ld = is_ld; + label->opc = opc; + label->datalo_reg = data; + label->addrlo_reg = addr; + label->mem_index = mem_index; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; +} - /* this is label1, patch branch */ - label1_ptr[1] = s->code_ptr - label1_ptr; +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGReg addr_reg = lb->addrlo_reg; + TCGReg data_reg = lb->datalo_reg; + TCGMemOp opc = lb->opc; - ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend); - assert(ofs < 0x80000); + patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); - tcg_out_mem(s, 0, RXY_AG, arg1, arg2, TCG_AREG0, ofs); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); + } + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, lb->mem_index); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr); + tcg_out_call(s, qemu_ld_helpers[opc]); + tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); - return arg1; + tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); } -static void tcg_finish_qemu_ldst(TCGContext* s, tcg_insn_unit *label2_ptr) +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - /* patch branch */ - label2_ptr[1] = s->code_ptr - label2_ptr; + TCGReg addr_reg = lb->addrlo_reg; + TCGReg data_reg = lb->datalo_reg; + TCGMemOp opc = lb->opc; + + patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); + } + switch (opc & MO_SIZE) { + case MO_UB: + tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); + break; + case MO_UW: + tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); + break; + case MO_UL: + tgen_ext32u(s, TCG_REG_R4, data_reg); + break; + case MO_Q: + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); + break; + default: + tcg_abort(); + } + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, lb->mem_index); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr); + tcg_out_call(s, qemu_st_helpers[opc]); + + tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); } #else static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, @@ -1506,18 +1522,22 @@ static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, } #endif /* CONFIG_SOFTMMU */ -/* load data with address translation (if applicable) - and endianness conversion */ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, TCGMemOp opc, int mem_index) { -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label2_ptr; +#ifdef CONFIG_SOFTMMU + tcg_insn_unit *label_ptr; + TCGReg base_reg; + + base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1); + + label_ptr = s->code_ptr + 1; + tcg_out_insn(s, RI, BRC, S390_CC_NE, 0); + + tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); - addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index, - opc, &label2_ptr, 0); - tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0); - tcg_finish_qemu_ldst(s, label2_ptr); + add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg, mem_index, + s->code_ptr, label_ptr); #else TCGReg index_reg; tcg_target_long disp; @@ -1530,13 +1550,19 @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, TCGMemOp opc, int mem_index) { -#if defined(CONFIG_SOFTMMU) - tcg_insn_unit *label2_ptr; +#ifdef CONFIG_SOFTMMU + tcg_insn_unit *label_ptr; + TCGReg base_reg; + + base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0); + + label_ptr = s->code_ptr + 1; + tcg_out_insn(s, RI, BRC, S390_CC_NE, 0); + + tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); - addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index, - opc, &label2_ptr, 1); - tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0); - tcg_finish_qemu_ldst(s, label2_ptr); + add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg, mem_index, + s->code_ptr, label_ptr); #else TCGReg index_reg; tcg_target_long disp; -- 1.9.0