On Wed, Apr 14, 2010 at 10:16:33AM -0700, Richard Henderson wrote: > Add more OPC values, and tgen_arithr. Use the later throughout. > > Note that normal reg/reg arithmetic now uses the Gv,Ev opcode form > instead of the Ev,Gv opcode form used previously. Both forms > disassemble properly, and so there's no visible change when diffing > log files before and after the change. This change makes the operand > ordering within the output routines more natural, and avoids the need > to define an OPC_ARITH_EvGv since a read-modify-write with memory is > not needed within TCG. > > Signed-off-by: Richard Henderson <r...@twiddle.net>
Acked-by: Aurelien Jarno <aurel...@aurel32.net> > --- > tcg/i386/tcg-target.c | 78 > ++++++++++++++++++++++++++++++------------------- > 1 files changed, 48 insertions(+), 30 deletions(-) > > diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c > index df1bdfc..b4e8e74 100644 > --- a/tcg/i386/tcg-target.c > +++ b/tcg/i386/tcg-target.c > @@ -165,7 +165,12 @@ static inline int tcg_target_const_match(tcg_target_long > val, > > #define OPC_ARITH_EvIz (0x81) > #define OPC_ARITH_EvIb (0x83) > +#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ > +#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) > +#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) > +#define OPC_DEC_r32 (0x48) > #define OPC_BSWAP (0xc8 | P_EXT) > +#define OPC_INC_r32 (0x40) > #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */ > #define OPC_JCC_short (0x70) /* ... plus condition code */ > #define OPC_JMP_long (0xe9) > @@ -180,6 +185,7 @@ static inline int tcg_target_const_match(tcg_target_long > val, > #define OPC_SHIFT_1 (0xd1) > #define OPC_SHIFT_Ib (0xc1) > #define OPC_SHIFT_cl (0xd3) > +#define OPC_TESTL (0x85) > > /* Group 1 opcode extensions for 0x80-0x83. */ > #define ARITH_ADD 0 > @@ -280,6 +286,12 @@ static inline void tcg_out_modrm_offset(TCGContext *s, > int opc, int r, int rm, > } > } > > +/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */ > +static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src) > +{ > + tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3), dest, src); > +} > + > static inline void tcg_out_mov(TCGContext *s, int ret, int arg) > { > if (arg != ret) { > @@ -291,8 +303,7 @@ static inline void tcg_out_movi(TCGContext *s, TCGType > type, > int ret, int32_t arg) > { > if (arg == 0) { > - /* xor r0,r0 */ > - tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), ret, ret); > + tgen_arithr(s, ARITH_XOR, ret, ret); > } else { > tcg_out8(s, 0xb8 + ret); > tcg_out32(s, arg); > @@ -374,14 +385,15 @@ static inline void tcg_out_rolw_8(TCGContext *s, int > reg) > tcg_out_shifti(s, SHIFT_ROL, reg, 8); > } > > -static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, > int cf) > +static inline void tgen_arithi(TCGContext *s, int c, int r0, > + int32_t val, int cf) > { > - if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == > -1))) { > - /* inc */ > - tcg_out_opc(s, 0x40 + r0); > - } else if (!cf && ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && > val == 1))) { > - /* dec */ > - tcg_out_opc(s, 0x48 + r0); > + /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce > + partial flags update stalls on Pentium4 and are not recommended > + by current Intel optimization manuals. */ > + if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == > -1)) { > + int opc = ((c == ARITH_ADD) ^ (val < 0) ? OPC_INC_r32 : OPC_DEC_r32); > + tcg_out_opc(s, opc + r0); > } else if (val == (int8_t)val) { > tcg_out_modrm(s, OPC_ARITH_EvIb, c, r0); > tcg_out8(s, val); > @@ -454,12 +466,12 @@ static void tcg_out_cmp(TCGContext *s, TCGArg arg1, > TCGArg arg2, > if (const_arg2) { > if (arg2 == 0) { > /* test r, r */ > - tcg_out_modrm(s, 0x85, arg1, arg1); > + tcg_out_modrm(s, OPC_TESTL, arg1, arg1); > } else { > tgen_arithi(s, ARITH_CMP, arg1, arg2, 0); > } > } else { > - tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1); > + tgen_arithr(s, ARITH_CMP, arg1, arg2); > } > } > > @@ -674,7 +686,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg > *args, > tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_read)); > > /* cmp 0(r1), r0 */ > - tcg_out_modrm_offset(s, 0x3b, r0, r1, 0); > + tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0); > > tcg_out_mov(s, r0, addr_reg); > > @@ -690,7 +702,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg > *args, > s->code_ptr++; > > /* cmp 4(r1), addr_reg2 */ > - tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4); > + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4); > > /* je label1 */ > tcg_out8(s, OPC_JCC_short + JCC_JE); > @@ -749,7 +761,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg > *args, > *label1_ptr = s->code_ptr - label1_ptr - 1; > > /* add x(r1), r0 */ > - tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - > + tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1, > + offsetof(CPUTLBEntry, addend) - > offsetof(CPUTLBEntry, addr_read)); > #else > r0 = addr_reg; > @@ -864,7 +877,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg > *args, > tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_write)); > > /* cmp 0(r1), r0 */ > - tcg_out_modrm_offset(s, 0x3b, r0, r1, 0); > + tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0); > > tcg_out_mov(s, r0, addr_reg); > > @@ -880,7 +893,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg > *args, > s->code_ptr++; > > /* cmp 4(r1), addr_reg2 */ > - tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4); > + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4); > > /* je label1 */ > tcg_out8(s, OPC_JCC_short + JCC_JE); > @@ -961,7 +974,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg > *args, > *label1_ptr = s->code_ptr - label1_ptr - 1; > > /* add x(r1), r0 */ > - tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - > + tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1, > + offsetof(CPUTLBEntry, addend) - > offsetof(CPUTLBEntry, addr_write)); > #else > r0 = addr_reg; > @@ -1113,7 +1127,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode > opc, > if (const_args[2]) { > tgen_arithi(s, c, args[0], args[2], 0); > } else { > - tcg_out_modrm(s, 0x01 | (c << 3), args[2], args[0]); > + tgen_arithr(s, c, args[0], args[2]); > } > break; > case INDEX_op_mul_i32: > @@ -1163,24 +1177,28 @@ static inline void tcg_out_op(TCGContext *s, > TCGOpcode opc, > goto gen_shift32; > > case INDEX_op_add2_i32: > - if (const_args[4]) > + if (const_args[4]) { > tgen_arithi(s, ARITH_ADD, args[0], args[4], 1); > - else > - tcg_out_modrm(s, 0x01 | (ARITH_ADD << 3), args[4], args[0]); > - if (const_args[5]) > + } else { > + tgen_arithr(s, ARITH_ADD, args[0], args[4]); > + } > + if (const_args[5]) { > tgen_arithi(s, ARITH_ADC, args[1], args[5], 1); > - else > - tcg_out_modrm(s, 0x01 | (ARITH_ADC << 3), args[5], args[1]); > + } else { > + tgen_arithr(s, ARITH_ADC, args[0], args[5]); > + } > break; > case INDEX_op_sub2_i32: > - if (const_args[4]) > + if (const_args[4]) { > tgen_arithi(s, ARITH_SUB, args[0], args[4], 1); > - else > - tcg_out_modrm(s, 0x01 | (ARITH_SUB << 3), args[4], args[0]); > - if (const_args[5]) > + } else { > + tgen_arithr(s, ARITH_SUB, args[0], args[4]); > + } > + if (const_args[5]) { > tgen_arithi(s, ARITH_SBB, args[1], args[5], 1); > - else > - tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]); > + } else { > + tgen_arithr(s, ARITH_SBB, args[1], args[5]); > + } > break; > case INDEX_op_brcond_i32: > tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], > -- > 1.6.6.1 > > > > -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurel...@aurel32.net http://www.aurel32.net