Saves one bundle per memory operation. Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/ia64/tcg-target.c | 132 ++++++++++++++++++++++++++++++++---------------- 1 files changed, 88 insertions(+), 44 deletions(-)
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index acd4ce8..0d275e9 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -40,6 +40,12 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { }; #endif +#ifdef CONFIG_USE_GUEST_BASE +#define TCG_GUEST_BASE_REG TCG_REG_R55 +#else +#define TCG_GUEST_BASE_REG TCG_REG_R0 +#endif + /* Branch registers */ enum { TCG_REG_B0 = 0, @@ -1641,9 +1647,13 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) { + static uint64_t const opc_ld_m1[4] = { + OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 + }; + static uint64_t const opc_sxt_i29[4] = { + OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 + }; int addr_reg, data_reg, mem_index, s_bits, bswap; - uint64_t opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 }; - uint64_t opc_sxt_i29[8] = { OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 }; data_reg = *args++; addr_reg = *args++; @@ -1656,19 +1666,21 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) bswap = 0; #endif - tcg_out_bundle(s, mLX, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_l2 ((tcg_target_long) GUEST_BASE), - tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R2, - GUEST_BASE)); - #if TARGET_LONG_BITS == 32 - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, - TCG_REG_R3, addr_reg), - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, TCG_REG_R3)); + if (GUEST_BASE != 0) { + tcg_out_bundle(s, mII, + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, + TCG_REG_R3, addr_reg), + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_GUEST_BASE_REG, TCG_REG_R3)); + } else { + tcg_out_bundle(s, miI, + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, + TCG_REG_R2, addr_reg), + tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + } if (!bswap || s_bits == 0) { if (s_bits == opc) { @@ -1724,12 +1736,20 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) } } #else - tcg_out_bundle(s, MmI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, addr_reg), - tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], - data_reg, TCG_REG_R2), - tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + if (GUEST_BASE != 0) { + tcg_out_bundle(s, MmI, + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_GUEST_BASE_REG, addr_reg), + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, TCG_REG_R2), + tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + } else { + tcg_out_bundle(s, mmI, + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], + data_reg, addr_reg), + tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + } if (bswap && s_bits == 1) { tcg_out_bundle(s, mII, @@ -1764,8 +1784,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) { + static uint64_t const opc_st_m4[4] = { + OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 + }; int addr_reg, data_reg, bswap; - uint64_t opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 }; +#if TARGET_LONG_BITS == 64 + uint64_t add_guest_base; +#endif data_reg = *args++; addr_reg = *args++; @@ -1776,19 +1801,22 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) bswap = 0; #endif - tcg_out_bundle(s, mLX, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_l2 ((tcg_target_long) GUEST_BASE), - tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R2, - GUEST_BASE)); - #if TARGET_LONG_BITS == 32 - tcg_out_bundle(s, mII, - tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), - tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, - TCG_REG_R3, addr_reg), - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, TCG_REG_R3)); + if (GUEST_BASE != 0) { + tcg_out_bundle(s, mII, + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, + TCG_REG_R3, addr_reg), + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_GUEST_BASE_REG, TCG_REG_R3)); + } else { + tcg_out_bundle(s, miI, + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, + TCG_REG_R3, addr_reg), + tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + } + if (bswap) { if (opc == 1) { tcg_out_bundle(s, mII, @@ -1821,18 +1849,24 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); #else + if (GUEST_BASE != 0) { + add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, + TCG_GUEST_BASE_REG, addr_reg); + addr_reg = TCG_REG_R2; + } else { + add_guest_base = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0); + } + if (!bswap || opc == 0) { - tcg_out_bundle(s, MmI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, addr_reg), + tcg_out_bundle(s, (GUEST_BASE ? MmI : mmI), + add_guest_base, tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc], - data_reg, TCG_REG_R2), + data_reg, addr_reg), tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); } else { if (opc == 1) { tcg_out_bundle(s, mII, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, addr_reg), + add_guest_base, tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3, data_reg, 15, 15), tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, @@ -1840,8 +1874,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) data_reg = TCG_REG_R3; } else if (opc == 2) { tcg_out_bundle(s, mII, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, addr_reg), + add_guest_base, tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3, data_reg, 31, 31), tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, @@ -1849,8 +1882,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) data_reg = TCG_REG_R3; } else if (opc == 3) { tcg_out_bundle(s, miI, - tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, addr_reg), + add_guest_base, tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, TCG_REG_R3, data_reg, 0xb)); @@ -1858,7 +1890,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) } tcg_out_bundle(s, miI, tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc], - data_reg, TCG_REG_R2), + data_reg, addr_reg), tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0), tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); } @@ -2255,6 +2287,18 @@ void tcg_target_qemu_prologue(TCGContext *s) TCG_REG_B6, TCG_REG_R32, 0), tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, TCG_REG_R32, TCG_REG_B0)); + + /* ??? If GUEST_BASE < 0x200000, we could load the register via + an ADDL in the M slot of the next bundle. */ + if (GUEST_BASE != 0) { + tcg_out_bundle(s, mlx, + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_l2 (GUEST_BASE), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, + TCG_GUEST_BASE_REG, GUEST_BASE)); + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); + } + tcg_out_bundle(s, miB, tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, -- 1.7.0.1