The REX2 prefix has two main complications: it does not apply to vector registers, and it disables or mutates some opcodes (thus needing separate decoding functions instead of decode_root and decode_0F). Otherwise, all it does is extend s->rex_r, s->rex_w and s->rex_b to two bits.
Since REX2 provides the ability to access r16...r31, extend cpu_regs[] to CPU_NB_EREGS elements. Signed-off-by: Paolo Bonzini <[email protected]> --- target/i386/tcg/translate.c | 22 +++++- target/i386/tcg/decode-new.c.inc | 114 +++++++++++++++++++++++++++++-- 2 files changed, 126 insertions(+), 10 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 061adcb7221..47eef81ba05 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -78,7 +78,7 @@ static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2; static TCGv cpu_eip; static TCGv_i32 cpu_cc_op; -static TCGv cpu_regs[CPU_NB_REGS]; +static TCGv cpu_regs[CPU_NB_EREGS]; static TCGv cpu_seg_base[6]; static TCGv_i64 cpu_bndl[4]; static TCGv_i64 cpu_bndu[4]; @@ -3349,7 +3349,7 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode) void tcg_x86_init(void) { - static const char reg_names[CPU_NB_REGS][4] = { + static const char reg_names[CPU_NB_EREGS][4] = { #ifdef TARGET_X86_64 [R_EAX] = "rax", [R_EBX] = "rbx", @@ -3367,6 +3367,22 @@ void tcg_x86_init(void) [13] = "r13", [14] = "r14", [15] = "r15", + [16] = "r16", + [17] = "r17", + [18] = "r18", + [19] = "r19", + [20] = "r20", + [21] = "r21", + [22] = "r22", + [23] = "r23", + [24] = "r24", + [25] = "r25", + [26] = "r26", + [27] = "r27", + [28] = "r28", + [29] = "r29", + [30] = "r30", + [31] = "r31", #else [R_EAX] = "eax", [R_EBX] = "ebx", @@ -3411,7 +3427,7 @@ void tcg_x86_init(void) "cc_src2"); cpu_eip = tcg_global_mem_new(tcg_env, offsetof(CPUX86State, eip), eip_name); - for (i = 0; i < CPU_NB_REGS; ++i) { + for (i = 0; i < CPU_NB_EREGS; ++i) { cpu_regs[i] = tcg_global_mem_new(tcg_env, offsetof(CPUX86State, regs[i]), reg_names[i]); diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index b968db2b8ad..9ee69564ab1 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -1988,6 +1988,65 @@ static const X86OpEntry opcodes_root[256] = { [0xFF] = X86_OP_GROUP1(group4_5, E,v), }; +#ifdef TARGET_X86_64 +static const X86OpEntry opcodes_rex2_map0_A0toAF[16] = { +}; + +static void decode_REX2(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b, + const X86OpEntry **map) +{ + *b = x86_ldub_code(env, s); + const X86OpEntry *group = map[*b >> 4]; + *entry = group ? group[*b & 15] : UNKNOWN_OPCODE; +} + +static void decode_REX2_map0(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry *opcode_rex2_map0[16] = { + &opcodes_root[0x00], + &opcodes_root[0x10], + &opcodes_root[0x20], + &opcodes_root[0x30], + NULL, + &opcodes_root[0x50], + &opcodes_root[0x60], + NULL, + &opcodes_root[0x80], + &opcodes_root[0x90], + opcodes_rex2_map0_A0toAF, + &opcodes_root[0xB0], + &opcodes_root[0xC0], + &opcodes_root[0xD0], + NULL, + &opcodes_root[0xF0], + }; + decode_REX2(s, env, entry, b, opcode_rex2_map0); +} + +static void decode_REX2_map1(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry *opcode_rex2_map1[16] = { + &opcodes_0F[0x00], + &opcodes_0F[0x10], + &opcodes_0F[0x20], + NULL, + &opcodes_0F[0x40], + &opcodes_0F[0x50], + &opcodes_0F[0x60], + &opcodes_0F[0x70], + NULL, + &opcodes_0F[0x90], + &opcodes_0F[0xA0], + &opcodes_0F[0xB0], + &opcodes_0F[0xC0], + &opcodes_0F[0xD0], + &opcodes_0F[0xE0], + &opcodes_0F[0xF0], + }; + decode_REX2(s, env, entry, b, opcode_rex2_map1); +} +#endif + #undef mmx #undef vex1 #undef vex2 @@ -2007,6 +2066,20 @@ static void decode_root(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui *entry = opcodes_root[*b]; } +static int reg_nb_mask(DisasContext *s, int unit) +{ + switch (unit) { + case X86_OP_MMX: + return 7; + case X86_OP_SSE: + return 15; + break; + default: + return 31; + break; + } +} + /* Decode the MODRM and SIB bytes into a register or memory operand. */ static void decode_modrm(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, X86DecodedOp *op) @@ -2018,10 +2091,7 @@ static void decode_modrm(DisasContext *s, CPUX86State *env, int sib = -1; if (mod == 3) { - op->n = rm; - if (op->unit != X86_OP_MMX) { - op->n |= REX_B(s); - } + op->n = (rm | REX_B(s)) & reg_nb_mask(s, op->unit); return; } @@ -2300,9 +2370,7 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, } get_reg: op->n = ((get_modrm(s, env) >> 3) & 7); - if (op->unit != X86_OP_MMX) { - op->n |= REX_R(s); - } + op->n |= REX_R(s) & reg_nb_mask(s, op->unit); break; case X86_TYPE_E: /* ALU modrm operand */ @@ -2749,6 +2817,24 @@ static void dump_unknown_opcode(CPUX86State *env, DisasContext *s) } } +/* MASK must have two bits set. Bring the highest next to the lowest; + * for example if MASK == 0x11, bit 4 of value is moved to bit 1. Clear + * every other bit in VALUE. + * + * Generally mask will be a constant, so that all of the first three + * lines disappear. Likewise, if the bits in mask are already adjacent + * this becomes just "return value & mask". + */ +static inline uint8_t collapse_two_bits(uint8_t value, uint8_t mask) +{ + uint8_t high = mask & (mask - 1); + uint8_t low = mask & ~high; + uint8_t tweak = (low << 1) - high; + + value &= mask; + return value + (value > low ? tweak : 0); +} + /* * Convert one instruction. s->base.is_jmp is set if the translation must * be stopped. @@ -2833,6 +2919,20 @@ static void disas_insn(DisasContext *s, CPUState *cpu) goto next_byte_rex; } break; + case 0xd5: /* REX2 */ + if (CODE64(s) && (s->flags & HF_APX_EN_MASK)) { + int rex2 = x86_ldub_code(env, s); + if (rex != -1) { + goto illegal_op; + } + s->prefix |= PREFIX_REX2; + s->rex_b = collapse_two_bits(rex2, 0x11) << 3; + s->rex_x = collapse_two_bits(rex2, 0x22) << 2; + s->rex_r = collapse_two_bits(rex2, 0x44) << 1; + s->vex_w = (rex2 >> 3) & 1; + decode_func = rex2 & 0x80 ? decode_REX2_map1 : decode_REX2_map0; + } + break; #endif case 0xc5: /* 2-byte VEX */ case 0xc4: /* 3-byte VEX */ -- 2.52.0
