This results in significant code size reductions when manipulating pointers into TCG's own data structures. E.g.
-OUT: [size=180] +OUT: [size=132] ... -xxx: li r2,16383 # goto_tb -xxx: rldicr r2,r2,32,31 -xxx: oris r2,r2,39128 -xxx: ori r2,r2,376 -xxx: ldx r30,0,r2 +xxx: addis r30,r30,-544 +xxx: ld r30,-8(r30) ... -xxx: li r3,16383 # exit_tb -xxx: rldicr r3,r3,32,31 -xxx: oris r3,r3,39128 -xxx: ori r3,r3,288 +xxx: addis r3,r30,-544 +xxx: addi r3,r3,-96 Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/ppc64/tcg-target.c | 164 +++++++++++++++++++++++++++++-------------------- 1 file changed, 99 insertions(+), 65 deletions(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index e01d8bc..d4e1efc 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -548,6 +548,78 @@ static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); } +static void tcg_out_mem_long(TCGContext *s, PowerOpcode opi, PowerOpcode opx, + TCGReg rt, TCGReg base, tcg_target_long offset) +{ + tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; + TCGReg rs = TCG_REG_R2; + + assert(rt != TCG_REG_R2 && base != TCG_REG_R2); + + switch (opi) { + case LD: case LWA: + align = 3; + /* FALLTHRU */ + default: + if (rt != TCG_REG_R0) { + rs = rt; + } + break; + case STD: + align = 3; + break; + case STB: case STH: case STW: + break; + } + + /* For unaligned, use the indexed form. */ + if (offset & align) { + do_indexed: + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, orig); + tcg_out32(s, opx | TAB(rt, base, TCG_REG_R2)); + return; + } + + if (base == TCG_REG_R0) { + /* For absolute addresses, avoid indexed form. First try turning + it into an offset from a known base register, then just fold + the low 16 bits. */ + offset -= (tcg_target_long)s->code_buf; + if (offset == (int32_t)offset) { + orig = offset; + base = TCG_REG_TB; + } else { + offset = (int16_t)orig; + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, orig - offset); + orig = offset; + base = TCG_REG_R2; + } + } else if (offset != (int32_t)offset) { + /* For very large offsets off a real base register, use indexed. */ + goto do_indexed; + } + + l0 = (int16_t)offset; + offset = (offset - l0) >> 16; + l1 = (int16_t)offset; + + if (l1 < 0 && orig >= 0) { + extra = 0x4000; + l1 = (int16_t)(offset - 0x4000); + } + if (l1) { + tcg_out32(s, ADDIS | TAI(rs, base, l1)); + base = rs; + } + if (extra) { + tcg_out32(s, ADDIS | TAI(rs, base, extra)); + base = rs; + } + if (opi != ADDI || base != rt || l0 != 0) { + tcg_out32(s, opi | TAI(rt, base, l0)); + } +} + static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg) { if (arg == (int16_t) arg) { @@ -563,23 +635,37 @@ static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg) static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg) { + tcg_target_long tmp; + + /* Two attempts at 1 or 2 insn sequence for 32-bit constant. */ if (type == TCG_TYPE_I32 || arg == (int32_t)arg) { tcg_out_movi32(s, ret, arg); - } else if (arg == (uint32_t)arg && !(arg & 0x8000)) { + return; + } + if (arg == (uint32_t)arg && !(arg & 0x8000)) { tcg_out32(s, ADDI | TAI(ret, 0, arg)); tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); - } else { - int32_t high = arg >> 32; - tcg_out_movi32(s, ret, high); - if (high) { - tcg_out_shli64(s, ret, ret, 32); - } - if (arg & 0xffff0000) { - tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); - } - if (arg & 0xffff) { - tcg_out32(s, ORI | SAI(ret, ret, arg)); - } + return; + } + + /* See if we can turn a address constant into a TB offset. */ + tmp = arg - (uintptr_t)s->code_buf; + if (tmp == (int32_t)tmp) { + tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tmp); + return; + } + + /* Full 64-bit constant load. */ + tmp = arg >> 32; + tcg_out_movi32(s, ret, tmp); + if (tmp) { + tcg_out_shli64(s, ret, ret, 32); + } + if (arg & 0xffff0000) { + tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); + } + if (arg & 0xffff) { + tcg_out32(s, ORI | SAI(ret, ret, arg)); } } @@ -746,58 +832,6 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, #endif } -static void tcg_out_mem_long(TCGContext *s, PowerOpcode opi, PowerOpcode opx, - TCGReg rt, TCGReg base, tcg_target_long offset) -{ - tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; - TCGReg rs = TCG_REG_R2; - - assert(rt != TCG_REG_R2 && base != TCG_REG_R2); - - switch (opi) { - case LD: case LWA: - align = 3; - /* FALLTHRU */ - default: - if (rt != TCG_REG_R0) { - rs = rt; - } - break; - case STD: - align = 3; - break; - case STB: case STH: case STW: - break; - } - - /* For unaligned, or very large offsets, use the indexed form. */ - if (offset & align || offset != (int32_t)offset) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, orig); - tcg_out32(s, opx | TAB(rt, base, TCG_REG_R2)); - return; - } - - l0 = (int16_t)offset; - offset = (offset - l0) >> 16; - l1 = (int16_t)offset; - - if (l1 < 0 && orig >= 0) { - extra = 0x4000; - l1 = (int16_t)(offset - 0x4000); - } - if (l1) { - tcg_out32(s, ADDIS | TAI(rs, base, l1)); - base = rs; - } - if (extra) { - tcg_out32(s, ADDIS | TAI(rs, base, extra)); - base = rs; - } - if (opi != ADDI || base != rt || l0 != 0) { - tcg_out32(s, opi | TAI(rt, base, l0)); - } -} - #if defined (CONFIG_SOFTMMU) #include "exec/softmmu_defs.h" -- 1.8.3.1