VSIB can have either 32-bit or 64-bit addresses, pass a constant mask to the helper and apply it before the load.
Cc: [email protected] Signed-off-by: Paolo Bonzini <[email protected]> --- target/i386/ops_sse.h | 16 ++++++++-------- target/i386/tcg/ops_sse_header.h.inc | 8 ++++---- target/i386/tcg/emit.c.inc | 17 +++++++++-------- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index a2e4d480399..853196b2bbd 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -2362,42 +2362,42 @@ void glue(helper_vpmaskmovq, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) } void glue(helper_vpgatherdd, SUFFIX)(CPUX86State *env, - Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale) + Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale, target_ulong amask) { int i; for (i = 0; i < (2 << SHIFT); i++) { if (v->L(i) >> 31) { target_ulong addr = a0 + ((target_ulong)(int32_t)s->L(i) << scale); - d->L(i) = cpu_ldl_data_ra(env, addr, GETPC()); + d->L(i) = cpu_ldl_data_ra(env, addr & amask, GETPC()); } v->L(i) = 0; } } void glue(helper_vpgatherdq, SUFFIX)(CPUX86State *env, - Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale) + Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale, target_ulong amask) { int i; for (i = 0; i < (1 << SHIFT); i++) { if (v->Q(i) >> 63) { target_ulong addr = a0 + ((target_ulong)(int32_t)s->L(i) << scale); - d->Q(i) = cpu_ldq_data_ra(env, addr, GETPC()); + d->Q(i) = cpu_ldq_data_ra(env, addr & amask, GETPC()); } v->Q(i) = 0; } } void glue(helper_vpgatherqd, SUFFIX)(CPUX86State *env, - Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale) + Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale, target_ulong amask) { int i; for (i = 0; i < (1 << SHIFT); i++) { if (v->L(i) >> 31) { target_ulong addr = a0 + ((target_ulong)(int64_t)s->Q(i) << scale); - d->L(i) = cpu_ldl_data_ra(env, addr, GETPC()); + d->L(i) = cpu_ldl_data_ra(env, addr & amask, GETPC()); } v->L(i) = 0; } @@ -2408,14 +2408,14 @@ void glue(helper_vpgatherqd, SUFFIX)(CPUX86State *env, } void glue(helper_vpgatherqq, SUFFIX)(CPUX86State *env, - Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale) + Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale, target_ulong amask) { int i; for (i = 0; i < (1 << SHIFT); i++) { if (v->Q(i) >> 63) { target_ulong addr = a0 + ((target_ulong)(int64_t)s->Q(i) << scale); - d->Q(i) = cpu_ldq_data_ra(env, addr, GETPC()); + d->Q(i) = cpu_ldq_data_ra(env, addr & amask, GETPC()); } v->Q(i) = 0; } diff --git a/target/i386/tcg/ops_sse_header.h.inc b/target/i386/tcg/ops_sse_header.h.inc index d92c6faf6d6..bbeb7301c33 100644 --- a/target/i386/tcg/ops_sse_header.h.inc +++ b/target/i386/tcg/ops_sse_header.h.inc @@ -388,10 +388,10 @@ DEF_HELPER_4(glue(vpmaskmovd_st, SUFFIX), void, env, Reg, Reg, tl) DEF_HELPER_4(glue(vpmaskmovq_st, SUFFIX), void, env, Reg, Reg, tl) DEF_HELPER_4(glue(vpmaskmovd, SUFFIX), void, env, Reg, Reg, Reg) DEF_HELPER_4(glue(vpmaskmovq, SUFFIX), void, env, Reg, Reg, Reg) -DEF_HELPER_6(glue(vpgatherdd, SUFFIX), void, env, Reg, Reg, Reg, tl, i32) -DEF_HELPER_6(glue(vpgatherdq, SUFFIX), void, env, Reg, Reg, Reg, tl, i32) -DEF_HELPER_6(glue(vpgatherqd, SUFFIX), void, env, Reg, Reg, Reg, tl, i32) -DEF_HELPER_6(glue(vpgatherqq, SUFFIX), void, env, Reg, Reg, Reg, tl, i32) +DEF_HELPER_7(glue(vpgatherdd, SUFFIX), void, env, Reg, Reg, Reg, tl, i32, tl) +DEF_HELPER_7(glue(vpgatherdq, SUFFIX), void, env, Reg, Reg, Reg, tl, i32, tl) +DEF_HELPER_7(glue(vpgatherqd, SUFFIX), void, env, Reg, Reg, Reg, tl, i32, tl) +DEF_HELPER_7(glue(vpgatherqq, SUFFIX), void, env, Reg, Reg, Reg, tl, i32, tl) #if SHIFT == 2 DEF_HELPER_3(vpermd_ymm, void, Reg, Reg, Reg) DEF_HELPER_4(vpermdq_ymm, void, Reg, Reg, Reg, i32) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index e55b65176fc..bc3a07f972c 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -43,8 +43,8 @@ typedef void (*SSEFunc_0_pppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_ptr reg_c, TCGv_i32 val); typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv val); -typedef void (*SSEFunc_0_epppti)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, - TCGv_ptr reg_c, TCGv a0, TCGv_i32 scale); +typedef void (*SSEFunc_0_eppptit)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, + TCGv_ptr reg_c, TCGv a0, TCGv_i32 scale, TCGv amask); typedef void (*SSEFunc_0_eppppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_ptr reg_c, TCGv_ptr reg_d, TCGv_i32 flags); typedef void (*SSEFunc_0_eppppii)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, @@ -1100,18 +1100,19 @@ VEXW_AVX(VPMASKMOV, vpmaskmov) /* Same as above, but with extra arguments to the helper. */ static inline void gen_vsib_avx(DisasContext *s, X86DecodedInsn *decode, - SSEFunc_0_epppti d_xmm, SSEFunc_0_epppti q_xmm, - SSEFunc_0_epppti d_ymm, SSEFunc_0_epppti q_ymm) + SSEFunc_0_eppptit d_xmm, SSEFunc_0_eppptit q_xmm, + SSEFunc_0_eppptit d_ymm, SSEFunc_0_eppptit q_ymm) { - SSEFunc_0_epppti d = s->vex_l ? d_ymm : d_xmm; - SSEFunc_0_epppti q = s->vex_l ? q_ymm : q_xmm; - SSEFunc_0_epppti fn = s->vex_w ? q : d; + SSEFunc_0_eppptit d = s->vex_l ? d_ymm : d_xmm; + SSEFunc_0_eppptit q = s->vex_l ? q_ymm : q_xmm; + SSEFunc_0_eppptit fn = s->vex_w ? q : d; TCGv_i32 scale = tcg_constant_i32(decode->mem.scale); TCGv_ptr index = tcg_temp_new_ptr(); + TCGv mask = tcg_constant_tl(MAKE_64BIT_MASK(0, 8 << s->aflag)); /* Pass third input as (index, base, scale) */ tcg_gen_addi_ptr(index, tcg_env, ZMM_OFFSET(decode->mem.index)); - fn(tcg_env, OP_PTR0, OP_PTR1, index, s->A0, scale); + fn(tcg_env, OP_PTR0, OP_PTR1, index, s->A0, scale, mask); /* * There are two output operands, so zero OP1's high 128 bits -- 2.52.0
