From: Chinmay Rath <ra...@linux.ibm.com> Updated many VSX instructions to use tcg_gen_qemu_ld/st_i128, instead of using tcg_gen_qemu_ld/st_i64 consecutively. Introduced functions {get,set}_vsr_full to facilitate the above & for future use.
Reviewed-by: Richard Henderson <richard.hender...@linaro.org> Suggested-by: Richard Henderson <richard.hender...@linaro.org> Signed-off-by: Chinmay Rath <ra...@linux.ibm.com> Signed-off-by: Nicholas Piggin <npig...@gmail.com> --- target/ppc/translate/vsx-impl.c.inc | 74 +++++++++++++---------------- 1 file changed, 33 insertions(+), 41 deletions(-) diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 26ebf3fedf..40a87ddc4a 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -10,6 +10,16 @@ static inline void set_cpu_vsr(int n, TCGv_i64 src, bool high) tcg_gen_st_i64(src, tcg_env, vsr64_offset(n, high)); } +static inline void get_vsr_full(TCGv_i128 dst, int reg) +{ + tcg_gen_ld_i128(dst, tcg_env, vsr_full_offset(reg)); +} + +static inline void set_vsr_full(int reg, TCGv_i128 src) +{ + tcg_gen_st_i128(src, tcg_env, vsr_full_offset(reg)); +} + static inline TCGv_ptr gen_vsr_ptr(int reg) { TCGv_ptr r = tcg_temp_new_ptr(); @@ -196,20 +206,17 @@ static bool trans_LXVH8X(DisasContext *ctx, arg_LXVH8X *a) static bool trans_LXVB16X(DisasContext *ctx, arg_LXVB16X *a) { TCGv EA; - TCGv_i64 xth, xtl; + TCGv_i128 data; REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, ISA300); - xth = tcg_temp_new_i64(); - xtl = tcg_temp_new_i64(); + data = tcg_temp_new_i128(); gen_set_access_type(ctx, ACCESS_INT); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); - tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEUQ); - tcg_gen_addi_tl(EA, EA, 8); - tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ); - set_cpu_vsr(a->rt, xth, true); - set_cpu_vsr(a->rt, xtl, false); + tcg_gen_qemu_ld_i128(data, EA, ctx->mem_idx, + MO_BE | MO_128 | MO_ATOM_IFALIGN_PAIR); + set_vsr_full(a->rt, data); return true; } @@ -385,20 +392,17 @@ static bool trans_STXVH8X(DisasContext *ctx, arg_STXVH8X *a) static bool trans_STXVB16X(DisasContext *ctx, arg_STXVB16X *a) { TCGv EA; - TCGv_i64 xsh, xsl; + TCGv_i128 data; REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, ISA300); - xsh = tcg_temp_new_i64(); - xsl = tcg_temp_new_i64(); - get_cpu_vsr(xsh, a->rt, true); - get_cpu_vsr(xsl, a->rt, false); + data = tcg_temp_new_i128(); gen_set_access_type(ctx, ACCESS_INT); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); - tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEUQ); - tcg_gen_addi_tl(EA, EA, 8); - tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEUQ); + get_vsr_full(data, a->rt); + tcg_gen_qemu_st_i128(data, EA, ctx->mem_idx, + MO_BE | MO_128 | MO_ATOM_IFALIGN_PAIR); return true; } @@ -2175,13 +2179,13 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv displ, int rt, bool store, bool paired) { TCGv ea; - TCGv_i64 xt; + TCGv_i128 data; MemOp mop; int rt1, rt2; - xt = tcg_temp_new_i64(); + data = tcg_temp_new_i128(); - mop = DEF_MEMOP(MO_UQ); + mop = DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR); gen_set_access_type(ctx, ACCESS_INT); ea = do_ea_calc(ctx, ra, displ); @@ -2195,32 +2199,20 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv displ, } if (store) { - get_cpu_vsr(xt, rt1, !ctx->le_mode); - tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); - gen_addr_add(ctx, ea, ea, 8); - get_cpu_vsr(xt, rt1, ctx->le_mode); - tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); + get_vsr_full(data, rt1); + tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop); if (paired) { - gen_addr_add(ctx, ea, ea, 8); - get_cpu_vsr(xt, rt2, !ctx->le_mode); - tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); - gen_addr_add(ctx, ea, ea, 8); - get_cpu_vsr(xt, rt2, ctx->le_mode); - tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); + gen_addr_add(ctx, ea, ea, 16); + get_vsr_full(data, rt2); + tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop); } } else { - tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); - set_cpu_vsr(rt1, xt, !ctx->le_mode); - gen_addr_add(ctx, ea, ea, 8); - tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); - set_cpu_vsr(rt1, xt, ctx->le_mode); + tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop); + set_vsr_full(rt1, data); if (paired) { - gen_addr_add(ctx, ea, ea, 8); - tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); - set_cpu_vsr(rt2, xt, !ctx->le_mode); - gen_addr_add(ctx, ea, ea, 8); - tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); - set_cpu_vsr(rt2, xt, ctx->le_mode); + gen_addr_add(ctx, ea, ea, 16); + tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop); + set_vsr_full(rt2, data); } } return true; -- 2.45.2