Use signed 25-bit operands, because two 12-bit operations is smaller than movz+movk+add.
Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/aarch64/tcg-target.c | 107 +++++++++++++++++++++++++++-------------------- 1 file changed, 61 insertions(+), 46 deletions(-) diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index 41a17f8..ed32f64 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -106,6 +106,8 @@ static inline void patch_reloc(uint8_t *code_ptr, int type, } } +#define TCG_CT_CONST_S25 0x100 + /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) @@ -129,6 +131,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3); #endif break; + case 'A': /* 25-bit signed, to be added or subtracted. */ + ct->ct |= TCG_CT_CONST_S25; + break; default: return -1; } @@ -146,6 +151,9 @@ static inline int tcg_target_const_match(tcg_target_long val, if (ct & TCG_CT_CONST) { return 1; } + if ((ct & TCG_CT_CONST_S25) && sextract32(val, 0, 25) == val) { + return 1; + } return 0; } @@ -200,12 +208,14 @@ enum aarch64_ldst_op_type { /* type of operation */ }; enum aarch64_arith_opc { - ARITH_AND = 0x0a, - ARITH_ADD = 0x0b, - ARITH_OR = 0x2a, + ARITH_AND = 0x0a, + ARITH_ADD = 0x0b, + ARITH_ADDI = 0x11, + ARITH_OR = 0x2a, ARITH_ADDS = 0x2b, - ARITH_XOR = 0x4a, - ARITH_SUB = 0x4b, + ARITH_XOR = 0x4a, + ARITH_SUB = 0x4b, + ARITH_SUBI = 0x51, ARITH_ANDS = 0x6a, ARITH_SUBS = 0x6b, }; @@ -453,6 +463,20 @@ static inline void tcg_out_arith(TCGContext *s, enum aarch64_arith_opc opc, tcg_out32(s, base | rm << 16 | shift | rn << 5 | rd); } +static inline void tcg_out_aimm(TCGContext *s, enum aarch64_arith_opc opc, + int ext, TCGReg rd, TCGReg rn, uint64_t aimm) +{ + unsigned int base = (ext ? 0x80 | opc : opc) << 24; + + if (aimm > 0xfff) { + assert((aimm & 0xfff) == 0); + aimm >>= 12; + base |= 1 << 22; /* apply LSL 12 */ + assert(aimm <= 0xfff); + } + tcg_out32(s, base | (aimm << 10) | (rn << 5) | rd); +} + static inline void tcg_out_mul(TCGContext *s, int ext, TCGReg rd, TCGReg rn, TCGReg rm) { @@ -732,44 +756,27 @@ static inline void tcg_out_uxt(TCGContext *s, int s_bits, tcg_out_ubfm(s, 0, rd, rn, 0, bits); } -static inline void tcg_out_addi(TCGContext *s, int ext, - TCGReg rd, TCGReg rn, unsigned int aimm) +static void tcg_out_addi(TCGContext *s, int ext, TCGReg rd, TCGReg rn, + tcg_target_long aimm) { - /* add immediate aimm unsigned 12bit value (with LSL 0 or 12) */ - /* using ADD 0x11000000 | (ext) | (aimm << 10) | (rn << 5) | rd */ - unsigned int base = ext ? 0x91000000 : 0x11000000; + enum aarch64_arith_opc opc = ARITH_ADDI; + tcg_target_long lo, hi; - if (aimm <= 0xfff) { - aimm <<= 10; - } else { - /* we can only shift left by 12, on assert we cannot represent */ - assert(!(aimm & 0xfff)); - assert(aimm <= 0xfff000); - base |= 1 << 22; /* apply LSL 12 */ - aimm >>= 2; + if (aimm < 0) { + aimm = -aimm; + opc = ARITH_SUBI; } + hi = aimm & 0xfff000; + lo = aimm & 0xfff; + assert(aimm == hi + lo); - tcg_out32(s, base | aimm | (rn << 5) | rd); -} - -static inline void tcg_out_subi(TCGContext *s, int ext, - TCGReg rd, TCGReg rn, unsigned int aimm) -{ - /* sub immediate aimm unsigned 12bit value (with LSL 0 or 12) */ - /* using SUB 0x51000000 | (ext) | (aimm << 10) | (rn << 5) | rd */ - unsigned int base = ext ? 0xd1000000 : 0x51000000; - - if (aimm <= 0xfff) { - aimm <<= 10; - } else { - /* we can only shift left by 12, on assert we cannot represent */ - assert(!(aimm & 0xfff)); - assert(aimm <= 0xfff000); - base |= 1 << 22; /* apply LSL 12 */ - aimm >>= 2; + if (hi != 0) { + tcg_out_aimm(s, opc, ext, rd, rn, hi); + rn = rd; + } + if (lo != 0 || rd != rn) { + tcg_out_aimm(s, opc, ext, rd, rn, lo); } - - tcg_out32(s, base | aimm | (rn << 5) | rd); } static inline void tcg_out_nop(TCGContext *s) @@ -1180,13 +1187,21 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_add_i64: ext = 1; /* fall through */ case INDEX_op_add_i32: - tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0); + if (const_args[2]) { + tcg_out_addi(s, ext, args[0], args[1], args[2]); + } else { + tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0); + } break; case INDEX_op_sub_i64: ext = 1; /* fall through */ case INDEX_op_sub_i32: - tcg_out_arith(s, ARITH_SUB, ext, args[0], args[1], args[2], 0); + if (const_args[2]) { + tcg_out_addi(s, ext, args[0], args[1], -args[2]); + } else { + tcg_out_arith(s, ARITH_SUB, ext, args[0], args[1], args[2], 0); + } break; case INDEX_op_and_i64: @@ -1391,10 +1406,10 @@ static const TCGTargetOpDef aarch64_op_defs[] = { { INDEX_op_st32_i64, { "r", "r" } }, { INDEX_op_st_i64, { "r", "r" } }, - { INDEX_op_add_i32, { "r", "r", "r" } }, - { INDEX_op_add_i64, { "r", "r", "r" } }, - { INDEX_op_sub_i32, { "r", "r", "r" } }, - { INDEX_op_sub_i64, { "r", "r", "r" } }, + { INDEX_op_add_i32, { "r", "r", "rA" } }, + { INDEX_op_add_i64, { "r", "r", "rA" } }, + { INDEX_op_sub_i32, { "r", "r", "rA" } }, + { INDEX_op_sub_i64, { "r", "r", "rA" } }, { INDEX_op_mul_i32, { "r", "r", "r" } }, { INDEX_op_mul_i64, { "r", "r", "r" } }, { INDEX_op_and_i32, { "r", "r", "r" } }, @@ -1518,8 +1533,8 @@ static void tcg_target_qemu_prologue(TCGContext *s) } /* make stack space for TCG locals */ - tcg_out_subi(s, 1, TCG_REG_SP, TCG_REG_SP, - frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN); + tcg_out_addi(s, 1, TCG_REG_SP, TCG_REG_SP, + -frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN); /* inform TCG about how to find TCG locals with register, offset, size */ tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, CPU_TEMP_BUF_NLONGS * sizeof(long)); -- 1.8.3.1