Use the same flag generation code as SHL and SHR, but use the existing gen_shiftd_rm_T1 function to compute the result as well as CC_SRC.
Decoding-wise, SHLD/SHRD by immediate count as a 4 operand instruction because s->T0 and s->T1 actually occupy three op slots. The infrastructure used by opcodes in the 0F 3A table works fine. Reviewed-by: Richard Henderson <richard.hender...@linaro.org> Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- target/i386/tcg/translate.c | 84 +------------------------------- target/i386/tcg/decode-new.c.inc | 8 ++- target/i386/tcg/emit.c.inc | 42 ++++++++++++++++ 3 files changed, 50 insertions(+), 84 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 5200b578a0e..33058db4e30 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -1434,57 +1434,11 @@ static bool check_cpl0(DisasContext *s) return false; } -static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result, - TCGv shm1, TCGv count, bool is_right) -{ - TCGv_i32 z32, s32, oldop; - TCGv z_tl; - - /* Store the results into the CC variables. If we know that the - variable must be dead, store unconditionally. Otherwise we'll - need to not disrupt the current contents. */ - z_tl = tcg_constant_tl(0); - if (cc_op_live[s->cc_op] & USES_CC_DST) { - tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl, - result, cpu_cc_dst); - } else { - tcg_gen_mov_tl(cpu_cc_dst, result); - } - if (cc_op_live[s->cc_op] & USES_CC_SRC) { - tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl, - shm1, cpu_cc_src); - } else { - tcg_gen_mov_tl(cpu_cc_src, shm1); - } - - /* Get the two potential CC_OP values into temporaries. */ - tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot); - if (s->cc_op == CC_OP_DYNAMIC) { - oldop = cpu_cc_op; - } else { - tcg_gen_movi_i32(s->tmp3_i32, s->cc_op); - oldop = s->tmp3_i32; - } - - /* Conditionally store the CC_OP value. */ - z32 = tcg_constant_i32(0); - s32 = tcg_temp_new_i32(); - tcg_gen_trunc_tl_i32(s32, count); - tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop); - - /* The CC_OP value is no longer predictable. */ - set_cc_op(s, CC_OP_DYNAMIC); -} - /* XXX: add faster immediate case */ -static TCGv gen_shiftd_rm_T1(DisasContext *s, MemOp ot, - bool is_right, TCGv count_in) +static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, + bool is_right, TCGv count) { target_ulong mask = (ot == MO_64 ? 63 : 31); - TCGv count; - - count = tcg_temp_new(); - tcg_gen_andi_tl(count, count_in, mask); switch (ot) { case MO_16: @@ -1546,8 +1500,6 @@ static TCGv gen_shiftd_rm_T1(DisasContext *s, MemOp ot, tcg_gen_or_tl(s->T0, s->T0, s->T1); break; } - - return count; } #define X86_MAX_INSN_LENGTH 15 @@ -3057,7 +3009,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) CPUX86State *env = cpu_env(cpu); int prefixes = s->prefix; MemOp dflag = s->dflag; - TCGv shift; MemOp ot; int modrm, reg, rm, mod, op, val; @@ -3221,37 +3172,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } break; - /**************************/ - /* shifts */ - case 0x1a4: /* shld imm */ - op = 0; - shift = NULL; - goto do_shiftd; - case 0x1a5: /* shld cl */ - op = 0; - shift = cpu_regs[R_ECX]; - goto do_shiftd; - case 0x1ac: /* shrd imm */ - op = 1; - shift = NULL; - goto do_shiftd; - case 0x1ad: /* shrd cl */ - op = 1; - shift = cpu_regs[R_ECX]; - do_shiftd: - ot = dflag; - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - gen_ld_modrm(env, s, modrm, ot); - if (!shift) { - shift = tcg_constant_tl(x86_ldub_code(env, s)); - } - gen_op_mov_v_reg(s, ot, s->T1, reg); - shift = gen_shiftd_rm_T1(s, ot, op, shift); - gen_st_modrm(env, s, modrm, ot); - gen_shift_flags(s, ot, s->T0, s->tmp0, shift, op); - break; - /************************/ /* bit operations */ case 0x1ba: /* bt/bts/btr/btc Gv, im */ diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 1db9d1e2bc3..2d27b07f296 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -1114,6 +1114,8 @@ static const X86OpEntry opcodes_0F[256] = { [0xa0] = X86_OP_ENTRYr(PUSH, FS, w), [0xa1] = X86_OP_ENTRYw(POP, FS, w), [0xa2] = X86_OP_ENTRY0(CPUID), + [0xa4] = X86_OP_ENTRY4(SHLD, E,v, 2op,v, G,v), + [0xa5] = X86_OP_ENTRY3(SHLD, E,v, 2op,v, G,v), [0xb2] = X86_OP_ENTRY3(LSS, G,v, EM,p, None, None), [0xb4] = X86_OP_ENTRY3(LFS, G,v, EM,p, None, None), @@ -1240,6 +1242,8 @@ static const X86OpEntry opcodes_0F[256] = { [0xa8] = X86_OP_ENTRYr(PUSH, GS, w), [0xa9] = X86_OP_ENTRYw(POP, GS, w), [0xaa] = X86_OP_ENTRY0(RSM, chk(smm) svm(RSM)), + [0xac] = X86_OP_ENTRY4(SHRD, E,v, 2op,v, G,v), + [0xad] = X86_OP_ENTRY3(SHRD, E,v, 2op,v, G,v), [0xae] = X86_OP_GROUP0(group15), /* * It's slightly more efficient to put Ev operand in T0 and allow gen_IMUL3 @@ -2532,8 +2536,8 @@ static void disas_insn(DisasContext *s, CPUState *cpu) switch (b) { case 0x00 ... 0x01: /* mostly privileged instructions */ case 0x1a ... 0x1b: /* MPX */ - case 0xa3 ... 0xa5: /* BT, SHLD */ - case 0xab ... 0xad: /* BTS, SHRD */ + case 0xa3: /* bt */ + case 0xab: /* bts */ case 0xb0 ... 0xb1: /* cmpxchg */ case 0xb3: /* btr */ case 0xb8: /* integer ops */ diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 156ea282af4..fc4687a35bd 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -3584,6 +3584,27 @@ static void gen_SHL(DisasContext *s, X86DecodedInsn *decode) } } +static void gen_SHLD(DisasContext *s, X86DecodedInsn *decode) +{ + bool can_be_zero; + TCGv count; + int unit = decode->e.op3 == X86_TYPE_I ? X86_OP_IMM : X86_OP_INT; + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, unit); + + if (!count) { + return; + } + + decode->cc_dst = s->T0; + decode->cc_src = s->tmp0; + gen_shiftd_rm_T1(s, ot, false, count); + if (can_be_zero) { + gen_shift_dynamic_flags(s, decode, count, CC_OP_SHLB + ot); + } else { + decode->cc_op = CC_OP_SHLB + ot; + } +} + static void gen_SHLX(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -3616,6 +3637,27 @@ static void gen_SHR(DisasContext *s, X86DecodedInsn *decode) } } +static void gen_SHRD(DisasContext *s, X86DecodedInsn *decode) +{ + bool can_be_zero; + TCGv count; + int unit = decode->e.op3 == X86_TYPE_I ? X86_OP_IMM : X86_OP_INT; + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, unit); + + if (!count) { + return; + } + + decode->cc_dst = s->T0; + decode->cc_src = s->tmp0; + gen_shiftd_rm_T1(s, ot, true, count); + if (can_be_zero) { + gen_shift_dynamic_flags(s, decode, count, CC_OP_SARB + ot); + } else { + decode->cc_op = CC_OP_SARB + ot; + } +} + static void gen_SHRX(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; -- 2.45.1