Make use of the reg+reg+disp addressing mode to eliminate redundant additions. Make use of the load-and-operate insns. Avoid an extra register copy when using the 64-bit shift insns. Fix the width of the TLB comparison.
Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/s390/tcg-target.c | 64 ++++++++++++++++++++---------------------------- 1 files changed, 27 insertions(+), 37 deletions(-) diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index 88b5592..b73515d 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -146,7 +146,10 @@ typedef enum S390Opcode { RS_SRA = 0x8a, RS_SRL = 0x88, + RXY_AG = 0xe308, + RXY_AY = 0xe35a, RXY_CG = 0xe320, + RXY_CY = 0xe359, RXY_LB = 0xe376, RXY_LG = 0xe304, RXY_LGB = 0xe377, @@ -170,6 +173,8 @@ typedef enum S390Opcode { RXY_STRVH = 0xe33f, RXY_STY = 0xe350, + RX_A = 0x5a, + RX_C = 0x59, RX_L = 0x58, RX_LH = 0x48, RX_ST = 0x50, @@ -1220,24 +1225,16 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg, { const TCGReg arg0 = TCG_REG_R2; const TCGReg arg1 = TCG_REG_R3; - const TCGReg arg2 = TCG_REG_R4; - int s_bits; + int s_bits = opc & 3; uint16_t *label1_ptr; + tcg_target_long ofs; - if (is_store) { - s_bits = opc; + if (TARGET_LONG_BITS == 32) { + tgen_ext32u(s, arg0, addr_reg); } else { - s_bits = opc & 3; + tcg_out_mov(s, arg0, addr_reg); } -#if TARGET_LONG_BITS == 32 - tgen_ext32u(s, arg1, addr_reg); - tgen_ext32u(s, arg0, addr_reg); -#else - tcg_out_mov(s, arg1, addr_reg); - tcg_out_mov(s, arg0, addr_reg); -#endif - tcg_out_sh64(s, RSY_SRLG, arg1, addr_reg, TCG_REG_NONE, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); @@ -1245,23 +1242,23 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg, tgen64_andi_tmp(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); if (is_store) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, - offsetof(CPUState, tlb_table[mem_index][0].addr_write)); + ofs = offsetof(CPUState, tlb_table[mem_index][0].addr_write); } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, - offsetof(CPUState, tlb_table[mem_index][0].addr_read)); + ofs = offsetof(CPUState, tlb_table[mem_index][0].addr_read); } - tcg_out_insn(s, RRE, AGR, arg1, TCG_TMP0); + assert(ofs < 0x80000); - tcg_out_insn(s, RRE, AGR, arg1, TCG_AREG0); - - tcg_out_insn(s, RXY, CG, arg0, arg1, 0, 0); + if (TARGET_LONG_BITS == 32) { + tcg_out_mem(s, RX_C, RXY_CY, arg0, arg1, TCG_AREG0, ofs); + } else { + tcg_out_mem(s, 0, RXY_CG, arg0, arg1, TCG_AREG0, ofs); + } -#if TARGET_LONG_BITS == 32 - tgen_ext32u(s, arg0, addr_reg); -#else - tcg_out_mov(s, arg0, addr_reg); -#endif + if (TARGET_LONG_BITS == 32) { + tgen_ext32u(s, arg0, addr_reg); + } else { + tcg_out_mov(s, arg0, addr_reg); + } label1_ptr = (uint16_t*)s->code_ptr; @@ -1271,7 +1268,7 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg, /* call load/store helper */ if (is_store) { tcg_out_mov(s, arg1, data_reg); - tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, mem_index); tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]); } else { tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index); @@ -1304,17 +1301,10 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg, *(label1_ptr + 1) = ((unsigned long)s->code_ptr - (unsigned long)label1_ptr) >> 1; - if (is_store) { - tcg_out_insn(s, RXY, LG, arg1, arg1, 0, - offsetof(CPUTLBEntry, addend) - - offsetof(CPUTLBEntry, addr_write)); - } else { - tcg_out_insn(s, RXY, LG, arg1, arg1, 0, - offsetof(CPUTLBEntry, addend) - - offsetof(CPUTLBEntry, addr_read)); - } + ofs = offsetof(CPUState, tlb_table[mem_index][0].addend); + assert(ofs < 0x80000); - tcg_out_insn(s, RRE, AGR, arg0, arg1); + tcg_out_mem(s, 0, RXY_AG, arg0, arg1, TCG_AREG0, ofs); } static void tcg_finish_qemu_ldst(TCGContext* s, uint16_t *label2_ptr) -- 1.7.0.1