[PATCH v3 0/3] target/ppc: Update vector insns to use 128 bit
Updating a bunch of VMX and VSX storage access instructions to use tcg_gen_qemu_ld/st_i128 instead of using tcg_gen_qemu_ld/st_i64 in succession; as suggested by Richard, in my decodetree patches. Plus some minor clean-ups to facilitate the above in case of VMX insns. Change log: v3 : Rectified EA increment from 8 to 16 for paired insns in patch 3/3, as pointed by Richard. Retained his 'Reviewed-by' for all patches, after the correction. v2 : Applied IFALIGN_PAIR memop changes in patches 2/3 and 3/3, based on review comments by Richard. https://lore.kernel.org/qemu-devel/20240630120157.259233-1-ra...@linux.ibm.com/ v3 : https://lore.kernel.org/qemu-devel/20240621114604.868415-1-ra...@linux.ibm.com/ Chinmay Rath (3): target/ppc: Move get/set_avr64 functions to vmx-impl.c.inc. target/ppc: Update VMX storage access insns to use tcg_gen_qemu_ld/st_i128. target/ppc : Update VSX storage access insns to use tcg_gen_qemu _ld/st_i128. target/ppc/translate.c | 10 target/ppc/translate/vmx-impl.c.inc | 52 +++- target/ppc/translate/vsx-impl.c.inc | 74 + 3 files changed, 63 insertions(+), 73 deletions(-) -- 2.39.3
[PATCH v3 3/3] target/ppc : Update VSX storage access insns to use tcg_gen_qemu _ld/st_i128.
Updated many VSX instructions to use tcg_gen_qemu_ld/st_i128, instead of using tcg_gen_qemu_ld/st_i64 consecutively. Introduced functions {get,set}_vsr_full to facilitate the above & for future use. Reviewed-by: Richard Henderson Suggested-by: Richard Henderson Signed-off-by: Chinmay Rath --- target/ppc/translate/vsx-impl.c.inc | 74 + 1 file changed, 33 insertions(+), 41 deletions(-) diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 26ebf3fedf..40a87ddc4a 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -10,6 +10,16 @@ static inline void set_cpu_vsr(int n, TCGv_i64 src, bool high) tcg_gen_st_i64(src, tcg_env, vsr64_offset(n, high)); } +static inline void get_vsr_full(TCGv_i128 dst, int reg) +{ +tcg_gen_ld_i128(dst, tcg_env, vsr_full_offset(reg)); +} + +static inline void set_vsr_full(int reg, TCGv_i128 src) +{ +tcg_gen_st_i128(src, tcg_env, vsr_full_offset(reg)); +} + static inline TCGv_ptr gen_vsr_ptr(int reg) { TCGv_ptr r = tcg_temp_new_ptr(); @@ -196,20 +206,17 @@ static bool trans_LXVH8X(DisasContext *ctx, arg_LXVH8X *a) static bool trans_LXVB16X(DisasContext *ctx, arg_LXVB16X *a) { TCGv EA; -TCGv_i64 xth, xtl; +TCGv_i128 data; REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, ISA300); -xth = tcg_temp_new_i64(); -xtl = tcg_temp_new_i64(); +data = tcg_temp_new_i128(); gen_set_access_type(ctx, ACCESS_INT); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); -tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEUQ); -tcg_gen_addi_tl(EA, EA, 8); -tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ); -set_cpu_vsr(a->rt, xth, true); -set_cpu_vsr(a->rt, xtl, false); +tcg_gen_qemu_ld_i128(data, EA, ctx->mem_idx, + MO_BE | MO_128 | MO_ATOM_IFALIGN_PAIR); +set_vsr_full(a->rt, data); return true; } @@ -385,20 +392,17 @@ static bool trans_STXVH8X(DisasContext *ctx, arg_STXVH8X *a) static bool trans_STXVB16X(DisasContext *ctx, arg_STXVB16X *a) { TCGv EA; -TCGv_i64 xsh, xsl; +TCGv_i128 data; REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, ISA300); -xsh = tcg_temp_new_i64(); -xsl = tcg_temp_new_i64(); -get_cpu_vsr(xsh, a->rt, true); -get_cpu_vsr(xsl, a->rt, false); +data = tcg_temp_new_i128(); gen_set_access_type(ctx, ACCESS_INT); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); -tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEUQ); -tcg_gen_addi_tl(EA, EA, 8); -tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEUQ); +get_vsr_full(data, a->rt); +tcg_gen_qemu_st_i128(data, EA, ctx->mem_idx, + MO_BE | MO_128 | MO_ATOM_IFALIGN_PAIR); return true; } @@ -2175,13 +2179,13 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv displ, int rt, bool store, bool paired) { TCGv ea; -TCGv_i64 xt; +TCGv_i128 data; MemOp mop; int rt1, rt2; -xt = tcg_temp_new_i64(); +data = tcg_temp_new_i128(); -mop = DEF_MEMOP(MO_UQ); +mop = DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR); gen_set_access_type(ctx, ACCESS_INT); ea = do_ea_calc(ctx, ra, displ); @@ -2195,32 +2199,20 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv displ, } if (store) { -get_cpu_vsr(xt, rt1, !ctx->le_mode); -tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); -gen_addr_add(ctx, ea, ea, 8); -get_cpu_vsr(xt, rt1, ctx->le_mode); -tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); +get_vsr_full(data, rt1); +tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop); if (paired) { -gen_addr_add(ctx, ea, ea, 8); -get_cpu_vsr(xt, rt2, !ctx->le_mode); -tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); -gen_addr_add(ctx, ea, ea, 8); -get_cpu_vsr(xt, rt2, ctx->le_mode); -tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); +gen_addr_add(ctx, ea, ea, 16); +get_vsr_full(data, rt2); +tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop); } } else { -tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); -set_cpu_vsr(rt1, xt, !ctx->le_mode); -gen_addr_add(ctx, ea, ea, 8); -tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); -set_cpu_vsr(rt1, xt, ctx->le_mode); +tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop); +set_vsr_full(rt1, data); if (paired) { -gen_addr_add(ctx, ea, ea, 8); -tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); -set_cpu_vsr(rt2, xt, !ctx->le_mode); -gen_addr_add(ctx, ea, ea, 8); -tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); -set_cpu_vsr(rt2, xt,
[PATCH v3 1/3] target/ppc: Move get/set_avr64 functions to vmx-impl.c.inc.
Those functions are used to ld/st data to and from Altivec registers, in 64 bits chunks, and are only used in vmx-impl.c.inc file, hence the clean-up movement. Reviewed-by: Richard Henderson Signed-off-by: Chinmay Rath --- target/ppc/translate.c | 10 -- target/ppc/translate/vmx-impl.c.inc | 10 ++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index ad512e1922..f7f2c2db9e 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -6200,16 +6200,6 @@ static inline void set_fpr(int regno, TCGv_i64 src) tcg_gen_st_i64(tcg_constant_i64(0), tcg_env, vsr64_offset(regno, false)); } -static inline void get_avr64(TCGv_i64 dst, int regno, bool high) -{ -tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high)); -} - -static inline void set_avr64(int regno, TCGv_i64 src, bool high) -{ -tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high)); -} - /* * Helpers for decodetree used by !function for decoding arguments. */ diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 152bcde0e3..a182d2cf81 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -14,6 +14,16 @@ static inline TCGv_ptr gen_avr_ptr(int reg) return r; } +static inline void get_avr64(TCGv_i64 dst, int regno, bool high) +{ +tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high)); +} + +static inline void set_avr64(int regno, TCGv_i64 src, bool high) +{ +tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high)); +} + static bool trans_LVX(DisasContext *ctx, arg_X *a) { TCGv EA; -- 2.39.3
[PATCH v3 2/3] target/ppc: Update VMX storage access insns to use tcg_gen_qemu_ld/st_i128.
Updated instructions {l, st}vx to use tcg_gen_qemu_ld/st_i128, instead of using 64 bits loads/stores in succession. Introduced functions {get, set}_avr_full in vmx-impl.c.inc to facilitate the above, and potential future usage. Reviewed-by: Richard Henderson Suggested-by: Richard Henderson Signed-off-by: Chinmay Rath --- target/ppc/translate/vmx-impl.c.inc | 42 ++--- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index a182d2cf81..70d0ad2e71 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -24,25 +24,29 @@ static inline void set_avr64(int regno, TCGv_i64 src, bool high) tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high)); } +static inline void get_avr_full(TCGv_i128 dst, int regno) +{ +tcg_gen_ld_i128(dst, tcg_env, avr_full_offset(regno)); +} + +static inline void set_avr_full(int regno, TCGv_i128 src) +{ +tcg_gen_st_i128(src, tcg_env, avr_full_offset(regno)); +} + static bool trans_LVX(DisasContext *ctx, arg_X *a) { TCGv EA; -TCGv_i64 avr; +TCGv_i128 avr; REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); REQUIRE_VECTOR(ctx); gen_set_access_type(ctx, ACCESS_INT); -avr = tcg_temp_new_i64(); +avr = tcg_temp_new_i128(); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); tcg_gen_andi_tl(EA, EA, ~0xf); -/* - * We only need to swap high and low halves. gen_qemu_ld64_i64 - * does necessary 64-bit byteswap already. - */ -gen_qemu_ld64_i64(ctx, avr, EA); -set_avr64(a->rt, avr, !ctx->le_mode); -tcg_gen_addi_tl(EA, EA, 8); -gen_qemu_ld64_i64(ctx, avr, EA); -set_avr64(a->rt, avr, ctx->le_mode); +tcg_gen_qemu_ld_i128(avr, EA, ctx->mem_idx, + DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR)); +set_avr_full(a->rt, avr); return true; } @@ -56,22 +60,16 @@ static bool trans_LVXL(DisasContext *ctx, arg_LVXL *a) static bool trans_STVX(DisasContext *ctx, arg_STVX *a) { TCGv EA; -TCGv_i64 avr; +TCGv_i128 avr; REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); REQUIRE_VECTOR(ctx); gen_set_access_type(ctx, ACCESS_INT); -avr = tcg_temp_new_i64(); +avr = tcg_temp_new_i128(); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); tcg_gen_andi_tl(EA, EA, ~0xf); -/* - * We only need to swap high and low halves. gen_qemu_st64_i64 - * does necessary 64-bit byteswap already. - */ -get_avr64(avr, a->rt, !ctx->le_mode); -gen_qemu_st64_i64(ctx, avr, EA); -tcg_gen_addi_tl(EA, EA, 8); -get_avr64(avr, a->rt, ctx->le_mode); -gen_qemu_st64_i64(ctx, avr, EA); +get_avr_full(avr, a->rt); +tcg_gen_qemu_st_i128(avr, EA, ctx->mem_idx, + DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR)); return true; } -- 2.39.3
[PATCH v2 3/3] target/ppc : Update VSX storage access insns to use tcg_gen_qemu _ld/st_i128.
Updated many VSX instructions to use tcg_gen_qemu_ld/st_i128, instead of using tcg_gen_qemu_ld/st_i64 consecutively. Introduced functions {get,set}_vsr_full to facilitate the above & for future use. Suggested-by: Richard Henderson Signed-off-by: Chinmay Rath --- target/ppc/translate/vsx-impl.c.inc | 70 + 1 file changed, 31 insertions(+), 39 deletions(-) diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 26ebf3fedf..b622831a73 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -10,6 +10,16 @@ static inline void set_cpu_vsr(int n, TCGv_i64 src, bool high) tcg_gen_st_i64(src, tcg_env, vsr64_offset(n, high)); } +static inline void get_vsr_full(TCGv_i128 dst, int reg) +{ +tcg_gen_ld_i128(dst, tcg_env, vsr_full_offset(reg)); +} + +static inline void set_vsr_full(int reg, TCGv_i128 src) +{ +tcg_gen_st_i128(src, tcg_env, vsr_full_offset(reg)); +} + static inline TCGv_ptr gen_vsr_ptr(int reg) { TCGv_ptr r = tcg_temp_new_ptr(); @@ -196,20 +206,17 @@ static bool trans_LXVH8X(DisasContext *ctx, arg_LXVH8X *a) static bool trans_LXVB16X(DisasContext *ctx, arg_LXVB16X *a) { TCGv EA; -TCGv_i64 xth, xtl; +TCGv_i128 data; REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, ISA300); -xth = tcg_temp_new_i64(); -xtl = tcg_temp_new_i64(); +data = tcg_temp_new_i128(); gen_set_access_type(ctx, ACCESS_INT); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); -tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEUQ); -tcg_gen_addi_tl(EA, EA, 8); -tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ); -set_cpu_vsr(a->rt, xth, true); -set_cpu_vsr(a->rt, xtl, false); +tcg_gen_qemu_ld_i128(data, EA, ctx->mem_idx, + MO_BE | MO_128 | MO_ATOM_IFALIGN_PAIR); +set_vsr_full(a->rt, data); return true; } @@ -385,20 +392,17 @@ static bool trans_STXVH8X(DisasContext *ctx, arg_STXVH8X *a) static bool trans_STXVB16X(DisasContext *ctx, arg_STXVB16X *a) { TCGv EA; -TCGv_i64 xsh, xsl; +TCGv_i128 data; REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, ISA300); -xsh = tcg_temp_new_i64(); -xsl = tcg_temp_new_i64(); -get_cpu_vsr(xsh, a->rt, true); -get_cpu_vsr(xsl, a->rt, false); +data = tcg_temp_new_i128(); gen_set_access_type(ctx, ACCESS_INT); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); -tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEUQ); -tcg_gen_addi_tl(EA, EA, 8); -tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEUQ); +get_vsr_full(data, a->rt); +tcg_gen_qemu_st_i128(data, EA, ctx->mem_idx, + MO_BE | MO_128 | MO_ATOM_IFALIGN_PAIR); return true; } @@ -2175,13 +2179,13 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv displ, int rt, bool store, bool paired) { TCGv ea; -TCGv_i64 xt; +TCGv_i128 data; MemOp mop; int rt1, rt2; -xt = tcg_temp_new_i64(); +data = tcg_temp_new_i128(); -mop = DEF_MEMOP(MO_UQ); +mop = DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR); gen_set_access_type(ctx, ACCESS_INT); ea = do_ea_calc(ctx, ra, displ); @@ -2195,32 +2199,20 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv displ, } if (store) { -get_cpu_vsr(xt, rt1, !ctx->le_mode); -tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); -gen_addr_add(ctx, ea, ea, 8); -get_cpu_vsr(xt, rt1, ctx->le_mode); -tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); +get_vsr_full(data, rt1); +tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop); if (paired) { gen_addr_add(ctx, ea, ea, 8); -get_cpu_vsr(xt, rt2, !ctx->le_mode); -tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); -gen_addr_add(ctx, ea, ea, 8); -get_cpu_vsr(xt, rt2, ctx->le_mode); -tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); +get_vsr_full(data, rt2); +tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop); } } else { -tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); -set_cpu_vsr(rt1, xt, !ctx->le_mode); -gen_addr_add(ctx, ea, ea, 8); -tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); -set_cpu_vsr(rt1, xt, ctx->le_mode); +tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop); +set_vsr_full(rt1, data); if (paired) { gen_addr_add(ctx, ea, ea, 8); -tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); -set_cpu_vsr(rt2, xt, !ctx->le_mode); -gen_addr_add(ctx, ea, ea, 8); -tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); -set_cpu_vsr(rt2, xt, ctx->le_mode); +tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop); +set_vsr_full(rt2, data); } } return true; -- 2.39.3
[PATCH v2 2/3] target/ppc: Update VMX storage access insns to use tcg_gen_qemu_ld/st_i128.
Updated instructions {l, st}vx to use tcg_gen_qemu_ld/st_i128, instead of using 64 bits loads/stores in succession. Introduced functions {get, set}_avr_full in vmx-impl.c.inc to facilitate the above, and potential future usage. Suggested-by: Richard Henderson Signed-off-by: Chinmay Rath --- target/ppc/translate/vmx-impl.c.inc | 42 ++--- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index a182d2cf81..70d0ad2e71 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -24,25 +24,29 @@ static inline void set_avr64(int regno, TCGv_i64 src, bool high) tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high)); } +static inline void get_avr_full(TCGv_i128 dst, int regno) +{ +tcg_gen_ld_i128(dst, tcg_env, avr_full_offset(regno)); +} + +static inline void set_avr_full(int regno, TCGv_i128 src) +{ +tcg_gen_st_i128(src, tcg_env, avr_full_offset(regno)); +} + static bool trans_LVX(DisasContext *ctx, arg_X *a) { TCGv EA; -TCGv_i64 avr; +TCGv_i128 avr; REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); REQUIRE_VECTOR(ctx); gen_set_access_type(ctx, ACCESS_INT); -avr = tcg_temp_new_i64(); +avr = tcg_temp_new_i128(); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); tcg_gen_andi_tl(EA, EA, ~0xf); -/* - * We only need to swap high and low halves. gen_qemu_ld64_i64 - * does necessary 64-bit byteswap already. - */ -gen_qemu_ld64_i64(ctx, avr, EA); -set_avr64(a->rt, avr, !ctx->le_mode); -tcg_gen_addi_tl(EA, EA, 8); -gen_qemu_ld64_i64(ctx, avr, EA); -set_avr64(a->rt, avr, ctx->le_mode); +tcg_gen_qemu_ld_i128(avr, EA, ctx->mem_idx, + DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR)); +set_avr_full(a->rt, avr); return true; } @@ -56,22 +60,16 @@ static bool trans_LVXL(DisasContext *ctx, arg_LVXL *a) static bool trans_STVX(DisasContext *ctx, arg_STVX *a) { TCGv EA; -TCGv_i64 avr; +TCGv_i128 avr; REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); REQUIRE_VECTOR(ctx); gen_set_access_type(ctx, ACCESS_INT); -avr = tcg_temp_new_i64(); +avr = tcg_temp_new_i128(); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); tcg_gen_andi_tl(EA, EA, ~0xf); -/* - * We only need to swap high and low halves. gen_qemu_st64_i64 - * does necessary 64-bit byteswap already. - */ -get_avr64(avr, a->rt, !ctx->le_mode); -gen_qemu_st64_i64(ctx, avr, EA); -tcg_gen_addi_tl(EA, EA, 8); -get_avr64(avr, a->rt, ctx->le_mode); -gen_qemu_st64_i64(ctx, avr, EA); +get_avr_full(avr, a->rt); +tcg_gen_qemu_st_i128(avr, EA, ctx->mem_idx, + DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR)); return true; } -- 2.39.3
[PATCH v2 0/3] target/ppc: Update vector insns to use 128 bit
Updating a bunch of VMX and VSX storage access instructions to use tcg_gen_qemu_ld/st_i128 instead of using tcg_gen_qemu_ld/st_i64 in succession; as suggested by Richard, in my decodetree patches. Plus some minor clean-ups to facilitate the above in case of VMX insns. Change log: v2 : Applied IFALIGN_PAIR memop changes in patches 2/3 and 3/3, based on review comments by Richard in v1. v1 : https://lore.kernel.org/qemu-devel/20240621114604.868415-1-ra...@linux.ibm.com/ Chinmay Rath (3): target/ppc: Move get/set_avr64 functions to vmx-impl.c.inc. target/ppc: Update VMX storage access insns to use tcg_gen_qemu_ld/st_i128. target/ppc : Update VSX storage access insns to use tcg_gen_qemu _ld/st_i128. target/ppc/translate.c | 10 - target/ppc/translate/vmx-impl.c.inc | 52 - target/ppc/translate/vsx-impl.c.inc | 70 + 3 files changed, 61 insertions(+), 71 deletions(-) -- 2.39.3
[PATCH v2 1/3] target/ppc: Move get/set_avr64 functions to vmx-impl.c.inc.
Those functions are used to ld/st data to and from Altivec registers, in 64 bits chunks, and are only used in vmx-impl.c.inc file, hence the clean-up movement. Signed-off-by: Chinmay Rath --- target/ppc/translate.c | 10 -- target/ppc/translate/vmx-impl.c.inc | 10 ++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index ad512e1922..f7f2c2db9e 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -6200,16 +6200,6 @@ static inline void set_fpr(int regno, TCGv_i64 src) tcg_gen_st_i64(tcg_constant_i64(0), tcg_env, vsr64_offset(regno, false)); } -static inline void get_avr64(TCGv_i64 dst, int regno, bool high) -{ -tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high)); -} - -static inline void set_avr64(int regno, TCGv_i64 src, bool high) -{ -tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high)); -} - /* * Helpers for decodetree used by !function for decoding arguments. */ diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 152bcde0e3..a182d2cf81 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -14,6 +14,16 @@ static inline TCGv_ptr gen_avr_ptr(int reg) return r; } +static inline void get_avr64(TCGv_i64 dst, int regno, bool high) +{ +tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high)); +} + +static inline void set_avr64(int regno, TCGv_i64 src, bool high) +{ +tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high)); +} + static bool trans_LVX(DisasContext *ctx, arg_X *a) { TCGv EA; -- 2.39.3
[PATCH 1/3] target/ppc: Move get/set_avr64 functions to vmx-impl.c.inc.
Those functions are used to ld/st data to and from Altivec registers, in 64 bits chunks, and are only used in vmx-impl.c.inc file, hence the clean-up movement. Signed-off-by: Chinmay Rath --- target/ppc/translate.c | 10 -- target/ppc/translate/vmx-impl.c.inc | 10 ++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index ad512e1922..f7f2c2db9e 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -6200,16 +6200,6 @@ static inline void set_fpr(int regno, TCGv_i64 src) tcg_gen_st_i64(tcg_constant_i64(0), tcg_env, vsr64_offset(regno, false)); } -static inline void get_avr64(TCGv_i64 dst, int regno, bool high) -{ -tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high)); -} - -static inline void set_avr64(int regno, TCGv_i64 src, bool high) -{ -tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high)); -} - /* * Helpers for decodetree used by !function for decoding arguments. */ diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 152bcde0e3..a182d2cf81 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -14,6 +14,16 @@ static inline TCGv_ptr gen_avr_ptr(int reg) return r; } +static inline void get_avr64(TCGv_i64 dst, int regno, bool high) +{ +tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high)); +} + +static inline void set_avr64(int regno, TCGv_i64 src, bool high) +{ +tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high)); +} + static bool trans_LVX(DisasContext *ctx, arg_X *a) { TCGv EA; -- 2.39.3
[PATCH 2/3] target/ppc: Update VMX storage access insns to use tcg_gen_qemu_ld/st_i128.
Updated instructions {l, st}vx to use tcg_gen_qemu_ld/st_i128, instead of using 64 bits loads/stores in succession. Introduced functions {get, set}_avr_full in vmx-impl.c.inc to facilitate the above, and potential future usage. Suggested-by: Richard Henderson Signed-off-by: Chinmay Rath --- target/ppc/translate/vmx-impl.c.inc | 40 + 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index a182d2cf81..47f6952d69 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -24,25 +24,28 @@ static inline void set_avr64(int regno, TCGv_i64 src, bool high) tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high)); } +static inline void get_avr_full(TCGv_i128 dst, int regno) +{ +tcg_gen_ld_i128(dst, tcg_env, avr_full_offset(regno)); +} + +static inline void set_avr_full(int regno, TCGv_i128 src) +{ +tcg_gen_st_i128(src, tcg_env, avr_full_offset(regno)); +} + static bool trans_LVX(DisasContext *ctx, arg_X *a) { TCGv EA; -TCGv_i64 avr; +TCGv_i128 avr; REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); REQUIRE_VECTOR(ctx); gen_set_access_type(ctx, ACCESS_INT); -avr = tcg_temp_new_i64(); +avr = tcg_temp_new_i128(); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); tcg_gen_andi_tl(EA, EA, ~0xf); -/* - * We only need to swap high and low halves. gen_qemu_ld64_i64 - * does necessary 64-bit byteswap already. - */ -gen_qemu_ld64_i64(ctx, avr, EA); -set_avr64(a->rt, avr, !ctx->le_mode); -tcg_gen_addi_tl(EA, EA, 8); -gen_qemu_ld64_i64(ctx, avr, EA); -set_avr64(a->rt, avr, ctx->le_mode); +tcg_gen_qemu_ld_i128(avr, EA, ctx->mem_idx, DEF_MEMOP(MO_128)); +set_avr_full(a->rt, avr); return true; } @@ -56,22 +59,15 @@ static bool trans_LVXL(DisasContext *ctx, arg_LVXL *a) static bool trans_STVX(DisasContext *ctx, arg_STVX *a) { TCGv EA; -TCGv_i64 avr; +TCGv_i128 avr; REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); REQUIRE_VECTOR(ctx); gen_set_access_type(ctx, ACCESS_INT); -avr = tcg_temp_new_i64(); +avr = tcg_temp_new_i128(); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); tcg_gen_andi_tl(EA, EA, ~0xf); -/* - * We only need to swap high and low halves. gen_qemu_st64_i64 - * does necessary 64-bit byteswap already. - */ -get_avr64(avr, a->rt, !ctx->le_mode); -gen_qemu_st64_i64(ctx, avr, EA); -tcg_gen_addi_tl(EA, EA, 8); -get_avr64(avr, a->rt, ctx->le_mode); -gen_qemu_st64_i64(ctx, avr, EA); +get_avr_full(avr, a->rt); +tcg_gen_qemu_st_i128(avr, EA, ctx->mem_idx, DEF_MEMOP(MO_128)); return true; } -- 2.39.3
[PATCH 3/3] target/ppc : Update VSX storage access insns to use tcg_gen_qemu _ld/st_i128.
Updated many VSX instructions to use tcg_gen_qemu_ld/st_i128, instead of using tcg_gen_qemu_ld/st_i64 consecutively. Introduced functions {get,set}_vsr_full to facilitate the above & for future use. Suggested-by: Richard Henderson Signed-off-by: Chinmay Rath --- target/ppc/translate/vsx-impl.c.inc | 68 - 1 file changed, 29 insertions(+), 39 deletions(-) diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 26ebf3fedf..a42fbf7c12 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -10,6 +10,16 @@ static inline void set_cpu_vsr(int n, TCGv_i64 src, bool high) tcg_gen_st_i64(src, tcg_env, vsr64_offset(n, high)); } +static inline void get_vsr_full(TCGv_i128 dst, int reg) +{ +tcg_gen_ld_i128(dst, tcg_env, vsr_full_offset(reg)); +} + +static inline void set_vsr_full(int reg, TCGv_i128 src) +{ +tcg_gen_st_i128(src, tcg_env, vsr_full_offset(reg)); +} + static inline TCGv_ptr gen_vsr_ptr(int reg) { TCGv_ptr r = tcg_temp_new_ptr(); @@ -196,20 +206,16 @@ static bool trans_LXVH8X(DisasContext *ctx, arg_LXVH8X *a) static bool trans_LXVB16X(DisasContext *ctx, arg_LXVB16X *a) { TCGv EA; -TCGv_i64 xth, xtl; +TCGv_i128 data; REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, ISA300); -xth = tcg_temp_new_i64(); -xtl = tcg_temp_new_i64(); +data = tcg_temp_new_i128(); gen_set_access_type(ctx, ACCESS_INT); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); -tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEUQ); -tcg_gen_addi_tl(EA, EA, 8); -tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ); -set_cpu_vsr(a->rt, xth, true); -set_cpu_vsr(a->rt, xtl, false); +tcg_gen_qemu_ld_i128(data, EA, ctx->mem_idx, MO_BE | MO_128); +set_vsr_full(a->rt, data); return true; } @@ -385,20 +391,16 @@ static bool trans_STXVH8X(DisasContext *ctx, arg_STXVH8X *a) static bool trans_STXVB16X(DisasContext *ctx, arg_STXVB16X *a) { TCGv EA; -TCGv_i64 xsh, xsl; +TCGv_i128 data; REQUIRE_VSX(ctx); REQUIRE_INSNS_FLAGS2(ctx, ISA300); -xsh = tcg_temp_new_i64(); -xsl = tcg_temp_new_i64(); -get_cpu_vsr(xsh, a->rt, true); -get_cpu_vsr(xsl, a->rt, false); +data = tcg_temp_new_i128(); gen_set_access_type(ctx, ACCESS_INT); EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); -tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEUQ); -tcg_gen_addi_tl(EA, EA, 8); -tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEUQ); +get_vsr_full(data, a->rt); +tcg_gen_qemu_st_i128(data, EA, ctx->mem_idx, MO_BE | MO_128); return true; } @@ -2175,13 +2177,13 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv displ, int rt, bool store, bool paired) { TCGv ea; -TCGv_i64 xt; +TCGv_i128 data; MemOp mop; int rt1, rt2; -xt = tcg_temp_new_i64(); +data = tcg_temp_new_i128(); -mop = DEF_MEMOP(MO_UQ); +mop = DEF_MEMOP(MO_128); gen_set_access_type(ctx, ACCESS_INT); ea = do_ea_calc(ctx, ra, displ); @@ -2195,32 +2197,20 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv displ, } if (store) { -get_cpu_vsr(xt, rt1, !ctx->le_mode); -tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); -gen_addr_add(ctx, ea, ea, 8); -get_cpu_vsr(xt, rt1, ctx->le_mode); -tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); +get_vsr_full(data, rt1); +tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop); if (paired) { gen_addr_add(ctx, ea, ea, 8); -get_cpu_vsr(xt, rt2, !ctx->le_mode); -tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); -gen_addr_add(ctx, ea, ea, 8); -get_cpu_vsr(xt, rt2, ctx->le_mode); -tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop); +get_vsr_full(data, rt2); +tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop); } } else { -tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); -set_cpu_vsr(rt1, xt, !ctx->le_mode); -gen_addr_add(ctx, ea, ea, 8); -tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); -set_cpu_vsr(rt1, xt, ctx->le_mode); +tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop); +set_vsr_full(rt1, data); if (paired) { gen_addr_add(ctx, ea, ea, 8); -tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); -set_cpu_vsr(rt2, xt, !ctx->le_mode); -gen_addr_add(ctx, ea, ea, 8); -tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop); -set_cpu_vsr(rt2, xt, ctx->le_mode); +tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop); +set_vsr_full(rt2, data); } } return true; -- 2.39.3
[PATCH 0/3] target/ppc: Update vector insns to use 128 bit
Updating a bunch of VMX and VSX storage access instructions to use tcg_gen_qemu_ld/st_i128 instead of using tcg_gen_qemu_ld/st_i64 in succession; as suggested by Richard, in my decodetree patches. Plus some minor clean-ups to facilitate the above in case of VMX insns. Chinmay Rath (3): target/ppc: Move get/set_avr64 functions to vmx-impl.c.inc. target/ppc: Update VMX storage access insns to use tcg_gen_qemu_ld/st_i128. target/ppc : Update VSX storage access insns to use tcg_gen_qemu _ld/st_i128. target/ppc/translate.c | 10 - target/ppc/translate/vmx-impl.c.inc | 50 +++-- target/ppc/translate/vsx-impl.c.inc | 68 - 3 files changed, 57 insertions(+), 71 deletions(-) -- 2.39.3
[PATCH v3 0/4] target/ppc: Move VSX storage access and compare
Moving all remaining VSX storage access instructions and all VSX compare instructions of XX3 form with RC field, to decodetree specification. Change log : v3: - Patch 2/4 : Updated the added function do_ea_calc_ra to return modifiable EA, after discussions with Richard. v2: https://lore.kernel.org/qemu-devel/20240613093318.314913-1-ra...@linux.ibm.com/ - Addressed comments by Richard in v1 : - Patch 2/4 : Handled proper ea calculation in narrow mode. Also created a new function for ea calculation instead of inlining, for later use by (p){lx,stx}vp insns. - Patch 4/4 : Unified helper calls. - Retained Richard's "Reviewed-by" in patches 1, 3 and 4. v1: https://lore.kernel.org/qemu-devel/20240607144921.726730-1-ra...@linux.ibm.com/ Chinmay Rath (4): target/ppc: Moving VSX scalar storage access insns to decodetree. target/ppc: Move VSX vector with length storage access insns to decodetree. target/ppc: Move VSX vector storage access insns to decodetree. target/ppc: Move VSX fp compare insns to decodetree. target/ppc/helper.h | 24 +- target/ppc/insn32.decode| 41 +++ target/ppc/fpu_helper.c | 16 +- target/ppc/mem_helper.c | 8 +- target/ppc/translate.c | 15 + target/ppc/translate/vsx-impl.c.inc | 416 ++-- target/ppc/translate/vsx-ops.c.inc | 49 7 files changed, 287 insertions(+), 282 deletions(-) -- 2.39.3
[PATCH v3 1/4] target/ppc: Moving VSX scalar storage access insns to decodetree.
Moving the following instructions to decodetree specification : lxs{d, iwa, ibz, ihz, iwz, sp}x : X-form stxs{d, ib, ih, iw, sp}x: X-form The changes were verified by validating that the tcg-ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson --- target/ppc/insn32.decode| 13 + target/ppc/translate/vsx-impl.c.inc | 79 + target/ppc/translate/vsx-ops.c.inc | 11 3 files changed, 49 insertions(+), 54 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 30d6f9f750..88753c75e1 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -792,6 +792,19 @@ STXVRHX 01 . . . 0010101101 . @X_TSX STXVRWX 01 . . . 0011001101 . @X_TSX STXVRDX 01 . . . 0011101101 . @X_TSX +LXSDX 01 . . . 1001001100 . @X_TSX +LXSIWAX 01 . . . 0001001100 . @X_TSX +LXSIBZX 01 . . . 111101 . @X_TSX +LXSIHZX 01 . . . 1100101101 . @X_TSX +LXSIWZX 01 . . . 001100 . @X_TSX +LXSSPX 01 . . . 101100 . @X_TSX + +STXSDX 01 . . . 1011001100 . @X_TSX +STXSIBX 01 . . . 1110001101 . @X_TSX +STXSIHX 01 . . . 1110101101 . @X_TSX +STXSIWX 01 . . . 0010001100 . @X_TSX +STXSSPX 01 . . . 1010001100 . @X_TSX + ## VSX Vector Binary Floating-Point Sign Manipulation Instructions XVABSDP 00 . 0 . 111011001 .. @XX2 diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index a769f199ce..de2a26a213 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -24,30 +24,27 @@ static inline TCGv_ptr gen_acc_ptr(int reg) return r; } -#define VSX_LOAD_SCALAR(name, operation) \ -static void gen_##name(DisasContext *ctx) \ -{ \ -TCGv EA; \ -TCGv_i64 t0; \ -if (unlikely(!ctx->vsx_enabled)) {\ -gen_exception(ctx, POWERPC_EXCP_VSXU);\ -return; \ -} \ -t0 = tcg_temp_new_i64(); \ -gen_set_access_type(ctx, ACCESS_INT); \ -EA = tcg_temp_new(); \ -gen_addr_reg_index(ctx, EA); \ -gen_qemu_##operation(ctx, t0, EA);\ -set_cpu_vsr(xT(ctx->opcode), t0, true); \ -/* NOTE: cpu_vsrl is undefined */ \ +static bool do_lxs(DisasContext *ctx, arg_X *a, + void (*op)(DisasContext *, TCGv_i64, TCGv)) +{ +TCGv EA; +TCGv_i64 t0; +REQUIRE_VSX(ctx); +t0 = tcg_temp_new_i64(); +gen_set_access_type(ctx, ACCESS_INT); +EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); +op(ctx, t0, EA); +set_cpu_vsr(a->rt, t0, true); +/* NOTE: cpu_vsrl is undefined */ +return true; } -VSX_LOAD_SCALAR(lxsdx, ld64_i64) -VSX_LOAD_SCALAR(lxsiwax, ld32s_i64) -VSX_LOAD_SCALAR(lxsibzx, ld8u_i64) -VSX_LOAD_SCALAR(lxsihzx, ld16u_i64) -VSX_LOAD_SCALAR(lxsiwzx, ld32u_i64) -VSX_LOAD_SCALAR(lxsspx, ld32fs) +TRANS_FLAGS2(VSX, LXSDX, do_lxs, gen_qemu_ld64_i64); +TRANS_FLAGS2(VSX207, LXSIWAX, do_lxs, gen_qemu_ld32s_i64); +TRANS_FLAGS2(ISA300, LXSIBZX, do_lxs, gen_qemu_ld8u_i64); +TRANS_FLAGS2(ISA300, LXSIHZX, do_lxs, gen_qemu_ld16u_i64); +TRANS_FLAGS2(VSX207, LXSIWZX, do_lxs, gen_qemu_ld32u_i64); +TRANS_FLAGS2(VSX207, LXSSPX, do_lxs, gen_qemu_ld32fs); static void gen_lxvd2x(DisasContext *ctx) { @@ -266,29 +263,25 @@ VSX_VECTOR_LOAD_STORE_LENGTH(stxvl) VSX_VECTOR_LOAD_STORE_LENGTH(stxvll) #endif -#define VSX_STORE_SCALAR(name, operation) \ -static void gen_##name(DisasContext *ctx) \ -{ \ -TCGv EA; \ -TCGv_i64 t0; \ -if (unlikely(!ctx->vsx_enabled)) {\ -gen_exception(ctx, POWERPC_EXCP_VSXU);\ -return; \ -} \ -t0 = tcg_temp_new_i64(); \ -gen_set_access_ty
[PATCH v3 4/4] target/ppc: Move VSX fp compare insns to decodetree.
Moving the following instructions to decodetree specification: xvcmp{eq, gt, ge, ne}{s, d}p: XX3-form The changes were verified by validating that the tcg-ops generated for those instructions remain the same which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson --- target/ppc/helper.h | 16 +- target/ppc/insn32.decode| 12 target/ppc/fpu_helper.c | 16 +- target/ppc/translate/vsx-impl.c.inc | 46 + target/ppc/translate/vsx-ops.c.inc | 18 --- 5 files changed, 48 insertions(+), 60 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 510ce76524..3fd849628a 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -473,10 +473,10 @@ DEF_HELPER_5(xvnmadddp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvnmsubdp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_4(XVMAXDP, void, env, vsr, vsr, vsr) DEF_HELPER_4(XVMINDP, void, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpeqdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgtdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpnedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPEQDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGTDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPNEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) DEF_HELPER_3(xvcvdpsp, void, env, vsr, vsr) DEF_HELPER_3(xvcvdpsxds, void, env, vsr, vsr) DEF_HELPER_3(xvcvdpsxws, void, env, vsr, vsr) @@ -507,10 +507,10 @@ DEF_HELPER_5(xvnmaddsp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvnmsubsp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_4(XVMAXSP, void, env, vsr, vsr, vsr) DEF_HELPER_4(XVMINSP, void, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpeqsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgtsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpnesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPEQSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGTSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPNESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) DEF_HELPER_3(xvcvspdp, void, env, vsr, vsr) DEF_HELPER_3(xvcvsphp, void, env, vsr, vsr) DEF_HELPER_3(xvcvhpsp, void, env, vsr, vsr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 3d31ef52f8..bcaf03f24c 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -217,6 +217,9 @@ xt xa xb @XX3.. . . . ...xt=%xx_xt xa=%xx_xa xb=%xx_xb +_rc xt xa xb rc:bool +@XX3_rc .. . . . rc:1 ... ... _rc xt=%xx_xt xa=%xx_xa xb=%xx_xb + # 32 bit GER instructions have all mask bits considered 1 _XX3 xa xb xt pmsk xmsk ymsk %xx_at 23:3 @@ -923,6 +926,15 @@ XSCMPEQQP 11 . . . 0001000100 - @X XSCMPGEQP 11 . . . 0011000100 - @X XSCMPGTQP 11 . . . 0011100100 - @X +XVCMPEQSP 00 . . . . 111 ... @XX3_rc +XVCMPGTSP 00 . . . . 1001011 ... @XX3_rc +XVCMPGESP 00 . . . . 1010011 ... @XX3_rc +XVCMPNESP 00 . . . . 1011011 ... @XX3_rc +XVCMPEQDP 00 . . . . 1100011 ... @XX3_rc +XVCMPGTDP 00 . . . . 1101011 ... @XX3_rc +XVCMPGEDP 00 . . . . 1110011 ... @XX3_rc +XVCMPNEDP 00 . . . . 011 ... @XX3_rc + XSMAXDP 00 . . . 1010 ... @XX3 XSMINDP 00 . . . 10101000 ... @XX3 diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index a013160644..5a300a3c86 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -2624,14 +2624,14 @@ uint32_t helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ return crf6; \ } -VSX_CMP(xvcmpeqdp, 2, float64, VsrD(i), eq, 0, 1) -VSX_CMP(xvcmpgedp, 2, float64, VsrD(i), le, 1, 1) -VSX_CMP(xvcmpgtdp, 2, float64, VsrD(i), lt, 1, 1) -VSX_CMP(xvcmpnedp, 2, float64, VsrD(i), eq, 0, 0) -VSX_CMP(xvcmpeqsp, 4, float32, VsrW(i), eq, 0, 1) -VSX_CMP(xvcmpgesp, 4, float32, VsrW(i), le, 1, 1) -VSX_CMP(xvcmpgtsp, 4, float32, VsrW(i), lt, 1, 1) -VSX_CMP(xvcmpnesp, 4, float32, VsrW(i), eq, 0, 0) +VSX_CMP(XVCMPEQDP, 2, float64, VsrD(i), eq, 0, 1) +VSX_CMP(XVCMPGEDP, 2, float64, VsrD(i), le, 1, 1) +VSX_CMP
[PATCH v3 3/4] target/ppc: Move VSX vector storage access insns to decodetree.
Moving the following instructions to decodetree specification: lxv{b16, d2, h8, w4, ds, ws}x : X-form stxv{b16, d2, h8, w4}x : X-form The changes were verified by validating that the tcg-ops generated for those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson --- target/ppc/insn32.decode| 10 ++ target/ppc/translate/vsx-impl.c.inc | 199 target/ppc/translate/vsx-ops.c.inc | 12 -- 3 files changed, 97 insertions(+), 124 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 445fdb341f..3d31ef52f8 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -805,9 +805,19 @@ STXSIHX 01 . . . 1110101101 . @X_TSX STXSIWX 01 . . . 0010001100 . @X_TSX STXSSPX 01 . . . 1010001100 . @X_TSX +LXVB16X 01 . . . 1101101100 . @X_TSX +LXVD2X 01 . . . 1101001100 . @X_TSX +LXVH8X 01 . . . 1100101100 . @X_TSX +LXVW4X 01 . . . 111100 . @X_TSX +LXVDSX 01 . . . 0101001100 . @X_TSX +LXVWSX 01 . . . 0101101100 . @X_TSX LXVL01 . . . 011101 . @X_TSX LXVLL 01 . . . 0100101101 . @X_TSX +STXVB16X01 . . . 101100 . @X_TSX +STXVD2X 01 . . . 001100 . @X_TSX +STXVH8X 01 . . . 1110101100 . @X_TSX +STXVW4X 01 . . . 1110001100 . @X_TSX STXVL 01 . . . 0110001101 . @X_TSX STXVLL 01 . . . 0110101101 . @X_TSX diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 46bab49215..e0fb4bad92 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -46,41 +46,37 @@ TRANS_FLAGS2(ISA300, LXSIHZX, do_lxs, gen_qemu_ld16u_i64); TRANS_FLAGS2(VSX207, LXSIWZX, do_lxs, gen_qemu_ld32u_i64); TRANS_FLAGS2(VSX207, LXSSPX, do_lxs, gen_qemu_ld32fs); -static void gen_lxvd2x(DisasContext *ctx) +static bool trans_LXVD2X(DisasContext *ctx, arg_LXVD2X *a) { TCGv EA; TCGv_i64 t0; -if (unlikely(!ctx->vsx_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VSXU); -return; -} + +REQUIRE_VSX(ctx); +REQUIRE_INSNS_FLAGS2(ctx, VSX); + t0 = tcg_temp_new_i64(); gen_set_access_type(ctx, ACCESS_INT); -EA = tcg_temp_new(); -gen_addr_reg_index(ctx, EA); +EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); gen_qemu_ld64_i64(ctx, t0, EA); -set_cpu_vsr(xT(ctx->opcode), t0, true); +set_cpu_vsr(a->rt, t0, true); tcg_gen_addi_tl(EA, EA, 8); gen_qemu_ld64_i64(ctx, t0, EA); -set_cpu_vsr(xT(ctx->opcode), t0, false); +set_cpu_vsr(a->rt, t0, false); +return true; } -static void gen_lxvw4x(DisasContext *ctx) +static bool trans_LXVW4X(DisasContext *ctx, arg_LXVW4X *a) { TCGv EA; -TCGv_i64 xth; -TCGv_i64 xtl; -if (unlikely(!ctx->vsx_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VSXU); -return; -} +TCGv_i64 xth, xtl; + +REQUIRE_VSX(ctx); +REQUIRE_INSNS_FLAGS2(ctx, VSX); + xth = tcg_temp_new_i64(); xtl = tcg_temp_new_i64(); - gen_set_access_type(ctx, ACCESS_INT); -EA = tcg_temp_new(); - -gen_addr_reg_index(ctx, EA); +EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); if (ctx->le_mode) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -97,55 +93,45 @@ static void gen_lxvw4x(DisasContext *ctx) tcg_gen_addi_tl(EA, EA, 8); tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ); } -set_cpu_vsr(xT(ctx->opcode), xth, true); -set_cpu_vsr(xT(ctx->opcode), xtl, false); +set_cpu_vsr(a->rt, xth, true); +set_cpu_vsr(a->rt, xtl, false); +return true; } -static void gen_lxvwsx(DisasContext *ctx) +static bool trans_LXVWSX(DisasContext *ctx, arg_LXVWSX *a) { TCGv EA; TCGv_i32 data; -if (xT(ctx->opcode) < 32) { -if (unlikely(!ctx->vsx_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VSXU); -return; -} +if (a->rt < 32) { +REQUIRE_VSX(ctx); } else { -if (unlikely(!ctx->altivec_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VPU); -return; -} +REQUIRE_VECTOR(ctx); } +REQUIRE_INSNS_FLAGS2(ctx, ISA300); gen_set_access_type(ctx, ACCESS_INT); -EA = tcg_temp_new(); - -gen_addr_reg_index(ctx, EA); - +EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); data = tcg_temp_new_i32(); tcg_gen_qemu_ld_i32(data, EA, ctx->mem_
[PATCH v3 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.
Moving the following instructions to decodetree specification : {l, st}xvl(l) : X-form The changes were verified by validating that the tcg-ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Also added a new function do_ea_calc_ra to calculate the effective address : EA <- (RA == 0) ? 0 : GPR[RA], which is now used by the above-said insns, and shall be used later by (p){lx, stx}vp insns. Signed-off-by: Chinmay Rath --- target/ppc/helper.h | 8 +-- target/ppc/insn32.decode| 6 ++ target/ppc/mem_helper.c | 8 +-- target/ppc/translate.c | 15 + target/ppc/translate/vsx-impl.c.inc | 94 - target/ppc/translate/vsx-ops.c.inc | 8 --- 6 files changed, 94 insertions(+), 45 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 3b4a0c4674..510ce76524 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -274,10 +274,10 @@ DEF_HELPER_3(stvebx, void, env, avr, tl) DEF_HELPER_3(stvehx, void, env, avr, tl) DEF_HELPER_3(stvewx, void, env, avr, tl) #if defined(TARGET_PPC64) -DEF_HELPER_4(lxvl, void, env, tl, vsr, tl) -DEF_HELPER_4(lxvll, void, env, tl, vsr, tl) -DEF_HELPER_4(stxvl, void, env, tl, vsr, tl) -DEF_HELPER_4(stxvll, void, env, tl, vsr, tl) +DEF_HELPER_4(LXVL, void, env, tl, vsr, tl) +DEF_HELPER_4(LXVLL, void, env, tl, vsr, tl) +DEF_HELPER_4(STXVL, void, env, tl, vsr, tl) +DEF_HELPER_4(STXVLL, void, env, tl, vsr, tl) #endif DEF_HELPER_4(vsumsws, void, env, avr, avr, avr) DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 88753c75e1..445fdb341f 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -805,6 +805,12 @@ STXSIHX 01 . . . 1110101101 . @X_TSX STXSIWX 01 . . . 0010001100 . @X_TSX STXSSPX 01 . . . 1010001100 . @X_TSX +LXVL01 . . . 011101 . @X_TSX +LXVLL 01 . . . 0100101101 . @X_TSX + +STXVL 01 . . . 0110001101 . @X_TSX +STXVLL 01 . . . 0110101101 . @X_TSX + ## VSX Vector Binary Floating-Point Sign Manipulation Instructions XVABSDP 00 . 0 . 111011001 .. @XX2 diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index ea7e8443a8..dec1b25eb8 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -467,8 +467,8 @@ void helper_##name(CPUPPCState *env, target_ulong addr, \ *xt = t;\ } -VSX_LXVL(lxvl, 0) -VSX_LXVL(lxvll, 1) +VSX_LXVL(LXVL, 0) +VSX_LXVL(LXVLL, 1) #undef VSX_LXVL #define VSX_STXVL(name, lj) \ @@ -496,8 +496,8 @@ void helper_##name(CPUPPCState *env, target_ulong addr, \ } \ } -VSX_STXVL(stxvl, 0) -VSX_STXVL(stxvll, 1) +VSX_STXVL(STXVL, 0) +VSX_STXVL(STXVLL, 1) #undef VSX_STXVL #undef GET_NB #endif /* TARGET_PPC64 */ diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 93ffec787c..f0647da551 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -3096,6 +3096,7 @@ static inline void gen_align_no_le(DisasContext *ctx) (ctx->opcode & 0x03FF) | POWERPC_EXCP_ALIGN_LE); } +/* EA <- {(ra == 0) ? 0 : GPR[ra]} + displ */ static TCGv do_ea_calc(DisasContext *ctx, int ra, TCGv displ) { TCGv ea = tcg_temp_new(); @@ -3110,6 +3111,20 @@ static TCGv do_ea_calc(DisasContext *ctx, int ra, TCGv displ) return ea; } +/* EA <- (ra == 0) ? 0 : GPR[ra] */ +static TCGv do_ea_calc_ra(DisasContext *ctx, int ra) +{ +TCGv EA = tcg_temp_new(); +if (!ra) { +tcg_gen_movi_tl(EA, 0); +} else if (NARROW_MODE(ctx)) { +tcg_gen_ext32u_tl(EA, cpu_gpr[ra]); +} else { +tcg_gen_mov_tl(EA, cpu_gpr[ra]); +} +return EA; +} + /*** Integer load ***/ #define DEF_MEMOP(op) ((op) | ctx->default_tcg_memop_mask) #define BSWAP_MEMOP(op) ((op) | (ctx->default_tcg_memop_mask ^ MO_BSWAP)) diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index de2a26a213..46bab49215 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -232,36 +232,72 @@ static void gen_lxvb16x(DisasContext *ctx) set_cpu_vsr(xT(ctx->opcode), xtl, false); } -#ifdef TARGET_PPC64 -#define VSX_VECTOR_LOAD_STORE_LENGTH(name) \ -static void gen_##name(DisasContext *ctx) \ -{ \ -TCGv EA;
Re: [PATCH v2 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.
On 6/17/24 23:27, Richard Henderson wrote: On 6/17/24 04:51, Chinmay Rath wrote: Hi Richard, On 6/17/24 00:43, Richard Henderson wrote: On 6/13/24 02:33, Chinmay Rath wrote: +/* EA <- (ra == 0) ? 0 : GPR[ra] */ +static TCGv do_ea_calc_ra(DisasContext *ctx, int ra) +{ + TCGv EA; + if (!ra) { + EA = tcg_constant_tl(0); + return EA; + } + EA = tcg_temp_new(); + if (NARROW_MODE(ctx)) { + tcg_gen_ext32u_tl(EA, cpu_gpr[ra]); + } else { + tcg_gen_mov_tl(EA, cpu_gpr[ra]); Why are you making a copy, rather than just returning cpu_gpr[ra]? If you need to modify the resulting EA, then you also need to make a copy for 0. Please ignore my previous response. I think do_ea_calc_ra should allow modification to the resulting EA, hence below change appears more appropriate to me : /* EA <- (ra == 0) ? 0 : GPR[ra] */ static TCGv do_ea_calc_ra(DisasContext *ctx, int ra) { TCGv EA = tcg_temp_new(); if (!ra) { tcg_gen_movi_tl(EA, 0); return EA; } if (NARROW_MODE(ctx)) { tcg_gen_ext32u_tl(EA, cpu_gpr[ra]); } else { tcg_gen_mov_tl(EA, cpu_gpr[ra]); } return EA; } If that's what's needed by the callers of do_ea_calc_ra, then yes. You can drop the first return EA and use else if instead. Sure. I shall stick to keeping EA modifiable, (even though it is not modified by the callers in this patch), to allow its proper usage by (p){lx, stx}vp insns in future. Thanks & Regards, Chinmay r~
Re: [PATCH v2 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.
On 6/17/24 23:15, Richard Henderson wrote: On 6/17/24 03:40, Chinmay Rath wrote: static TCGv do_ea_calc_ra(DisasContext *ctx, int ra) { TCGv EA; if (!ra) { return tcg_constant_tl(0); } if (NARROW_MODE(ctx)) { EA = tcg_temp_new(); tcg_gen_ext32u_tl(EA, cpu_gpr[ra]); } else { return cpu_gpr[ra]; } return EA; } If you need to modify the resulting EA, then you also need to make a copy for 0. Hey, didn't properly get what you meant here. Did you mean : Since I'm using a tcg_constant for 0, if the EA is to be modified later, this constant would be an issue, in which case, I should make a copy for it ?? Yes. Considering that, there are no tcg level modifications with this EA. Ok, good. However, the underlying helper method, which considers this EA as a target_ulong type does modify it, which I don't think should be an issue. Correct, that's fine. Awesome ! Thanks for the clarification. Regards, Chinmay r~
Re: [PATCH v2 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.
Hi Richard, On 6/17/24 00:43, Richard Henderson wrote: On 6/13/24 02:33, Chinmay Rath wrote: +/* EA <- (ra == 0) ? 0 : GPR[ra] */ +static TCGv do_ea_calc_ra(DisasContext *ctx, int ra) +{ + TCGv EA; + if (!ra) { + EA = tcg_constant_tl(0); + return EA; + } + EA = tcg_temp_new(); + if (NARROW_MODE(ctx)) { + tcg_gen_ext32u_tl(EA, cpu_gpr[ra]); + } else { + tcg_gen_mov_tl(EA, cpu_gpr[ra]); Why are you making a copy, rather than just returning cpu_gpr[ra]? If you need to modify the resulting EA, then you also need to make a copy for 0. Please ignore my previous response. I think do_ea_calc_ra should allow modification to the resulting EA, hence below change appears more appropriate to me : /* EA <- (ra == 0) ? 0 : GPR[ra] */ static TCGv do_ea_calc_ra(DisasContext *ctx, int ra) { TCGv EA = tcg_temp_new(); if (!ra) { tcg_gen_movi_tl(EA, 0); return EA; } if (NARROW_MODE(ctx)) { tcg_gen_ext32u_tl(EA, cpu_gpr[ra]); } else { tcg_gen_mov_tl(EA, cpu_gpr[ra]); } return EA; } Let me know your thoughts. Thanks & Regards, Chinmay r~
Re: [PATCH v2 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.
Hi Richard, On 6/17/24 00:43, Richard Henderson wrote: On 6/13/24 02:33, Chinmay Rath wrote: +/* EA <- (ra == 0) ? 0 : GPR[ra] */ +static TCGv do_ea_calc_ra(DisasContext *ctx, int ra) +{ + TCGv EA; + if (!ra) { + EA = tcg_constant_tl(0); + return EA; + } + EA = tcg_temp_new(); + if (NARROW_MODE(ctx)) { + tcg_gen_ext32u_tl(EA, cpu_gpr[ra]); + } else { + tcg_gen_mov_tl(EA, cpu_gpr[ra]); Why are you making a copy, rather than just returning cpu_gpr[ra]? True, this tcg move is redundant. Was carried away to maintain uniformity with the original do_ea_calc function. My bad! This can rather just be : /* ea <- (ra == 0) ? 0 : GPR[ra] */ static TCGv do_ea_calc_ra(DisasContext *ctx, int ra) { TCGv EA; if (!ra) { return tcg_constant_tl(0); } if (NARROW_MODE(ctx)) { EA = tcg_temp_new(); tcg_gen_ext32u_tl(EA, cpu_gpr[ra]); } else { return cpu_gpr[ra]; } return EA; } If you need to modify the resulting EA, then you also need to make a copy for 0. Hey, didn't properly get what you meant here. Did you mean : Since I'm using a tcg_constant for 0, if the EA is to be modified later, this constant would be an issue, in which case, I should make a copy for it ?? Considering that, there are no tcg level modifications with this EA. However, the underlying helper method, which considers this EA as a target_ulong type does modify it, which I don't think should be an issue. Please let me know if I missed something. Thanks & Regards, Chinmay r~
[PATCH v2 4/4] target/ppc: Move VSX fp compare insns to decodetree.
Moving the following instructions to decodetree specification: xvcmp{eq, gt, ge, ne}{s, d}p: XX3-form The changes were verified by validating that the tcg-ops generated for those instructions remain the same which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson --- target/ppc/helper.h | 16 +- target/ppc/insn32.decode| 12 target/ppc/fpu_helper.c | 16 +- target/ppc/translate/vsx-impl.c.inc | 46 + target/ppc/translate/vsx-ops.c.inc | 18 --- 5 files changed, 48 insertions(+), 60 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 510ce76524..3fd849628a 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -473,10 +473,10 @@ DEF_HELPER_5(xvnmadddp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvnmsubdp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_4(XVMAXDP, void, env, vsr, vsr, vsr) DEF_HELPER_4(XVMINDP, void, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpeqdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgtdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpnedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPEQDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGTDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPNEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) DEF_HELPER_3(xvcvdpsp, void, env, vsr, vsr) DEF_HELPER_3(xvcvdpsxds, void, env, vsr, vsr) DEF_HELPER_3(xvcvdpsxws, void, env, vsr, vsr) @@ -507,10 +507,10 @@ DEF_HELPER_5(xvnmaddsp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvnmsubsp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_4(XVMAXSP, void, env, vsr, vsr, vsr) DEF_HELPER_4(XVMINSP, void, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpeqsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgtsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpnesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPEQSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGTSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPNESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) DEF_HELPER_3(xvcvspdp, void, env, vsr, vsr) DEF_HELPER_3(xvcvsphp, void, env, vsr, vsr) DEF_HELPER_3(xvcvhpsp, void, env, vsr, vsr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 3d31ef52f8..bcaf03f24c 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -217,6 +217,9 @@ xt xa xb @XX3.. . . . ...xt=%xx_xt xa=%xx_xa xb=%xx_xb +_rc xt xa xb rc:bool +@XX3_rc .. . . . rc:1 ... ... _rc xt=%xx_xt xa=%xx_xa xb=%xx_xb + # 32 bit GER instructions have all mask bits considered 1 _XX3 xa xb xt pmsk xmsk ymsk %xx_at 23:3 @@ -923,6 +926,15 @@ XSCMPEQQP 11 . . . 0001000100 - @X XSCMPGEQP 11 . . . 0011000100 - @X XSCMPGTQP 11 . . . 0011100100 - @X +XVCMPEQSP 00 . . . . 111 ... @XX3_rc +XVCMPGTSP 00 . . . . 1001011 ... @XX3_rc +XVCMPGESP 00 . . . . 1010011 ... @XX3_rc +XVCMPNESP 00 . . . . 1011011 ... @XX3_rc +XVCMPEQDP 00 . . . . 1100011 ... @XX3_rc +XVCMPGTDP 00 . . . . 1101011 ... @XX3_rc +XVCMPGEDP 00 . . . . 1110011 ... @XX3_rc +XVCMPNEDP 00 . . . . 011 ... @XX3_rc + XSMAXDP 00 . . . 1010 ... @XX3 XSMINDP 00 . . . 10101000 ... @XX3 diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index a013160644..5a300a3c86 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -2624,14 +2624,14 @@ uint32_t helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ return crf6; \ } -VSX_CMP(xvcmpeqdp, 2, float64, VsrD(i), eq, 0, 1) -VSX_CMP(xvcmpgedp, 2, float64, VsrD(i), le, 1, 1) -VSX_CMP(xvcmpgtdp, 2, float64, VsrD(i), lt, 1, 1) -VSX_CMP(xvcmpnedp, 2, float64, VsrD(i), eq, 0, 0) -VSX_CMP(xvcmpeqsp, 4, float32, VsrW(i), eq, 0, 1) -VSX_CMP(xvcmpgesp, 4, float32, VsrW(i), le, 1, 1) -VSX_CMP(xvcmpgtsp, 4, float32, VsrW(i), lt, 1, 1) -VSX_CMP(xvcmpnesp, 4, float32, VsrW(i), eq, 0, 0) +VSX_CMP(XVCMPEQDP, 2, float64, VsrD(i), eq, 0, 1) +VSX_CMP(XVCMPGEDP, 2, float64, VsrD(i), le, 1, 1) +VSX_CMP
[PATCH v2 0/4] Move VSX storage access and compare insns to
Moving all remaining VSX storage access instructions and all VSX compare instructions of XX3 form with RC field, to decodetree specification. Change log : v2: - Addressed comments by Richard in v1 - Patch 2/4 : Handled proper ea calculation in narrow mode. Also created a new function for ea calculation instead of inlining, for later use by (p){lx,stx}vp insns. - Patch 4/4 : Unified helper calls. - Retained Richard's "Reviewed-by" in patches 1, 3 and 4. v1: https://lore.kernel.org/qemu-devel/20240607144921.726730-1-ra...@linux.ibm.com/ Chinmay Rath (4): target/ppc: Moving VSX scalar storage access insns to decodetree. target/ppc: Move VSX vector with length storage access insns to decodetree. target/ppc: Move VSX vector storage access insns to decodetree. target/ppc: Move VSX fp compare insns to decodetree. target/ppc/helper.h | 24 +- target/ppc/insn32.decode| 41 +++ target/ppc/fpu_helper.c | 16 +- target/ppc/mem_helper.c | 8 +- target/ppc/translate.c | 18 ++ target/ppc/translate/vsx-impl.c.inc | 416 ++-- target/ppc/translate/vsx-ops.c.inc | 49 7 files changed, 290 insertions(+), 282 deletions(-) -- 2.39.3
[PATCH v2 1/4] target/ppc: Moving VSX scalar storage access insns to decodetree.
Moving the following instructions to decodetree specification : lxs{d, iwa, ibz, ihz, iwz, sp}x : X-form stxs{d, ib, ih, iw, sp}x: X-form The changes were verified by validating that the tcg-ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson --- target/ppc/insn32.decode| 13 + target/ppc/translate/vsx-impl.c.inc | 79 + target/ppc/translate/vsx-ops.c.inc | 11 3 files changed, 49 insertions(+), 54 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 30d6f9f750..88753c75e1 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -792,6 +792,19 @@ STXVRHX 01 . . . 0010101101 . @X_TSX STXVRWX 01 . . . 0011001101 . @X_TSX STXVRDX 01 . . . 0011101101 . @X_TSX +LXSDX 01 . . . 1001001100 . @X_TSX +LXSIWAX 01 . . . 0001001100 . @X_TSX +LXSIBZX 01 . . . 111101 . @X_TSX +LXSIHZX 01 . . . 1100101101 . @X_TSX +LXSIWZX 01 . . . 001100 . @X_TSX +LXSSPX 01 . . . 101100 . @X_TSX + +STXSDX 01 . . . 1011001100 . @X_TSX +STXSIBX 01 . . . 1110001101 . @X_TSX +STXSIHX 01 . . . 1110101101 . @X_TSX +STXSIWX 01 . . . 0010001100 . @X_TSX +STXSSPX 01 . . . 1010001100 . @X_TSX + ## VSX Vector Binary Floating-Point Sign Manipulation Instructions XVABSDP 00 . 0 . 111011001 .. @XX2 diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index a769f199ce..de2a26a213 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -24,30 +24,27 @@ static inline TCGv_ptr gen_acc_ptr(int reg) return r; } -#define VSX_LOAD_SCALAR(name, operation) \ -static void gen_##name(DisasContext *ctx) \ -{ \ -TCGv EA; \ -TCGv_i64 t0; \ -if (unlikely(!ctx->vsx_enabled)) {\ -gen_exception(ctx, POWERPC_EXCP_VSXU);\ -return; \ -} \ -t0 = tcg_temp_new_i64(); \ -gen_set_access_type(ctx, ACCESS_INT); \ -EA = tcg_temp_new(); \ -gen_addr_reg_index(ctx, EA); \ -gen_qemu_##operation(ctx, t0, EA);\ -set_cpu_vsr(xT(ctx->opcode), t0, true); \ -/* NOTE: cpu_vsrl is undefined */ \ +static bool do_lxs(DisasContext *ctx, arg_X *a, + void (*op)(DisasContext *, TCGv_i64, TCGv)) +{ +TCGv EA; +TCGv_i64 t0; +REQUIRE_VSX(ctx); +t0 = tcg_temp_new_i64(); +gen_set_access_type(ctx, ACCESS_INT); +EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); +op(ctx, t0, EA); +set_cpu_vsr(a->rt, t0, true); +/* NOTE: cpu_vsrl is undefined */ +return true; } -VSX_LOAD_SCALAR(lxsdx, ld64_i64) -VSX_LOAD_SCALAR(lxsiwax, ld32s_i64) -VSX_LOAD_SCALAR(lxsibzx, ld8u_i64) -VSX_LOAD_SCALAR(lxsihzx, ld16u_i64) -VSX_LOAD_SCALAR(lxsiwzx, ld32u_i64) -VSX_LOAD_SCALAR(lxsspx, ld32fs) +TRANS_FLAGS2(VSX, LXSDX, do_lxs, gen_qemu_ld64_i64); +TRANS_FLAGS2(VSX207, LXSIWAX, do_lxs, gen_qemu_ld32s_i64); +TRANS_FLAGS2(ISA300, LXSIBZX, do_lxs, gen_qemu_ld8u_i64); +TRANS_FLAGS2(ISA300, LXSIHZX, do_lxs, gen_qemu_ld16u_i64); +TRANS_FLAGS2(VSX207, LXSIWZX, do_lxs, gen_qemu_ld32u_i64); +TRANS_FLAGS2(VSX207, LXSSPX, do_lxs, gen_qemu_ld32fs); static void gen_lxvd2x(DisasContext *ctx) { @@ -266,29 +263,25 @@ VSX_VECTOR_LOAD_STORE_LENGTH(stxvl) VSX_VECTOR_LOAD_STORE_LENGTH(stxvll) #endif -#define VSX_STORE_SCALAR(name, operation) \ -static void gen_##name(DisasContext *ctx) \ -{ \ -TCGv EA; \ -TCGv_i64 t0; \ -if (unlikely(!ctx->vsx_enabled)) {\ -gen_exception(ctx, POWERPC_EXCP_VSXU);\ -return; \ -} \ -t0 = tcg_temp_new_i64(); \ -gen_set_access_ty
[PATCH v2 3/4] target/ppc: Move VSX vector storage access insns to decodetree.
Moving the following instructions to decodetree specification: lxv{b16, d2, h8, w4, ds, ws}x : X-form stxv{b16, d2, h8, w4}x : X-form The changes were verified by validating that the tcg-ops generated for those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson --- target/ppc/insn32.decode| 10 ++ target/ppc/translate/vsx-impl.c.inc | 199 target/ppc/translate/vsx-ops.c.inc | 12 -- 3 files changed, 97 insertions(+), 124 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 445fdb341f..3d31ef52f8 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -805,9 +805,19 @@ STXSIHX 01 . . . 1110101101 . @X_TSX STXSIWX 01 . . . 0010001100 . @X_TSX STXSSPX 01 . . . 1010001100 . @X_TSX +LXVB16X 01 . . . 1101101100 . @X_TSX +LXVD2X 01 . . . 1101001100 . @X_TSX +LXVH8X 01 . . . 1100101100 . @X_TSX +LXVW4X 01 . . . 111100 . @X_TSX +LXVDSX 01 . . . 0101001100 . @X_TSX +LXVWSX 01 . . . 0101101100 . @X_TSX LXVL01 . . . 011101 . @X_TSX LXVLL 01 . . . 0100101101 . @X_TSX +STXVB16X01 . . . 101100 . @X_TSX +STXVD2X 01 . . . 001100 . @X_TSX +STXVH8X 01 . . . 1110101100 . @X_TSX +STXVW4X 01 . . . 1110001100 . @X_TSX STXVL 01 . . . 0110001101 . @X_TSX STXVLL 01 . . . 0110101101 . @X_TSX diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 46bab49215..e0fb4bad92 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -46,41 +46,37 @@ TRANS_FLAGS2(ISA300, LXSIHZX, do_lxs, gen_qemu_ld16u_i64); TRANS_FLAGS2(VSX207, LXSIWZX, do_lxs, gen_qemu_ld32u_i64); TRANS_FLAGS2(VSX207, LXSSPX, do_lxs, gen_qemu_ld32fs); -static void gen_lxvd2x(DisasContext *ctx) +static bool trans_LXVD2X(DisasContext *ctx, arg_LXVD2X *a) { TCGv EA; TCGv_i64 t0; -if (unlikely(!ctx->vsx_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VSXU); -return; -} + +REQUIRE_VSX(ctx); +REQUIRE_INSNS_FLAGS2(ctx, VSX); + t0 = tcg_temp_new_i64(); gen_set_access_type(ctx, ACCESS_INT); -EA = tcg_temp_new(); -gen_addr_reg_index(ctx, EA); +EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); gen_qemu_ld64_i64(ctx, t0, EA); -set_cpu_vsr(xT(ctx->opcode), t0, true); +set_cpu_vsr(a->rt, t0, true); tcg_gen_addi_tl(EA, EA, 8); gen_qemu_ld64_i64(ctx, t0, EA); -set_cpu_vsr(xT(ctx->opcode), t0, false); +set_cpu_vsr(a->rt, t0, false); +return true; } -static void gen_lxvw4x(DisasContext *ctx) +static bool trans_LXVW4X(DisasContext *ctx, arg_LXVW4X *a) { TCGv EA; -TCGv_i64 xth; -TCGv_i64 xtl; -if (unlikely(!ctx->vsx_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VSXU); -return; -} +TCGv_i64 xth, xtl; + +REQUIRE_VSX(ctx); +REQUIRE_INSNS_FLAGS2(ctx, VSX); + xth = tcg_temp_new_i64(); xtl = tcg_temp_new_i64(); - gen_set_access_type(ctx, ACCESS_INT); -EA = tcg_temp_new(); - -gen_addr_reg_index(ctx, EA); +EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); if (ctx->le_mode) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -97,55 +93,45 @@ static void gen_lxvw4x(DisasContext *ctx) tcg_gen_addi_tl(EA, EA, 8); tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ); } -set_cpu_vsr(xT(ctx->opcode), xth, true); -set_cpu_vsr(xT(ctx->opcode), xtl, false); +set_cpu_vsr(a->rt, xth, true); +set_cpu_vsr(a->rt, xtl, false); +return true; } -static void gen_lxvwsx(DisasContext *ctx) +static bool trans_LXVWSX(DisasContext *ctx, arg_LXVWSX *a) { TCGv EA; TCGv_i32 data; -if (xT(ctx->opcode) < 32) { -if (unlikely(!ctx->vsx_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VSXU); -return; -} +if (a->rt < 32) { +REQUIRE_VSX(ctx); } else { -if (unlikely(!ctx->altivec_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VPU); -return; -} +REQUIRE_VECTOR(ctx); } +REQUIRE_INSNS_FLAGS2(ctx, ISA300); gen_set_access_type(ctx, ACCESS_INT); -EA = tcg_temp_new(); - -gen_addr_reg_index(ctx, EA); - +EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); data = tcg_temp_new_i32(); tcg_gen_qemu_ld_i32(data, EA, ctx->mem_
[PATCH v2 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.
Moving the following instructions to decodetree specification : {l, st}xvl(l) : X-form The changes were verified by validating that the tcg-ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Also added a new function to calculate the effective address : EA <- (RA == 0) ? 0 : GPR[RA], which is now used by the above-said insns, and shall be used later by (p){lx, stx}vp insns. Signed-off-by: Chinmay Rath --- target/ppc/helper.h | 8 +-- target/ppc/insn32.decode| 6 ++ target/ppc/mem_helper.c | 8 +-- target/ppc/translate.c | 18 ++ target/ppc/translate/vsx-impl.c.inc | 94 - target/ppc/translate/vsx-ops.c.inc | 8 --- 6 files changed, 97 insertions(+), 45 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 3b4a0c4674..510ce76524 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -274,10 +274,10 @@ DEF_HELPER_3(stvebx, void, env, avr, tl) DEF_HELPER_3(stvehx, void, env, avr, tl) DEF_HELPER_3(stvewx, void, env, avr, tl) #if defined(TARGET_PPC64) -DEF_HELPER_4(lxvl, void, env, tl, vsr, tl) -DEF_HELPER_4(lxvll, void, env, tl, vsr, tl) -DEF_HELPER_4(stxvl, void, env, tl, vsr, tl) -DEF_HELPER_4(stxvll, void, env, tl, vsr, tl) +DEF_HELPER_4(LXVL, void, env, tl, vsr, tl) +DEF_HELPER_4(LXVLL, void, env, tl, vsr, tl) +DEF_HELPER_4(STXVL, void, env, tl, vsr, tl) +DEF_HELPER_4(STXVLL, void, env, tl, vsr, tl) #endif DEF_HELPER_4(vsumsws, void, env, avr, avr, avr) DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 88753c75e1..445fdb341f 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -805,6 +805,12 @@ STXSIHX 01 . . . 1110101101 . @X_TSX STXSIWX 01 . . . 0010001100 . @X_TSX STXSSPX 01 . . . 1010001100 . @X_TSX +LXVL01 . . . 011101 . @X_TSX +LXVLL 01 . . . 0100101101 . @X_TSX + +STXVL 01 . . . 0110001101 . @X_TSX +STXVLL 01 . . . 0110101101 . @X_TSX + ## VSX Vector Binary Floating-Point Sign Manipulation Instructions XVABSDP 00 . 0 . 111011001 .. @XX2 diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index ea7e8443a8..dec1b25eb8 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -467,8 +467,8 @@ void helper_##name(CPUPPCState *env, target_ulong addr, \ *xt = t;\ } -VSX_LXVL(lxvl, 0) -VSX_LXVL(lxvll, 1) +VSX_LXVL(LXVL, 0) +VSX_LXVL(LXVLL, 1) #undef VSX_LXVL #define VSX_STXVL(name, lj) \ @@ -496,8 +496,8 @@ void helper_##name(CPUPPCState *env, target_ulong addr, \ } \ } -VSX_STXVL(stxvl, 0) -VSX_STXVL(stxvll, 1) +VSX_STXVL(STXVL, 0) +VSX_STXVL(STXVLL, 1) #undef VSX_STXVL #undef GET_NB #endif /* TARGET_PPC64 */ diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 93ffec787c..a1f2f4fbda 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -3096,6 +3096,7 @@ static inline void gen_align_no_le(DisasContext *ctx) (ctx->opcode & 0x03FF) | POWERPC_EXCP_ALIGN_LE); } +/* EA <- {(ra == 0) ? 0 : GPR[ra]} + displ */ static TCGv do_ea_calc(DisasContext *ctx, int ra, TCGv displ) { TCGv ea = tcg_temp_new(); @@ -3110,6 +3111,23 @@ static TCGv do_ea_calc(DisasContext *ctx, int ra, TCGv displ) return ea; } +/* EA <- (ra == 0) ? 0 : GPR[ra] */ +static TCGv do_ea_calc_ra(DisasContext *ctx, int ra) +{ +TCGv EA; +if (!ra) { +EA = tcg_constant_tl(0); +return EA; +} +EA = tcg_temp_new(); +if (NARROW_MODE(ctx)) { +tcg_gen_ext32u_tl(EA, cpu_gpr[ra]); +} else { +tcg_gen_mov_tl(EA, cpu_gpr[ra]); +} +return EA; +} + /*** Integer load ***/ #define DEF_MEMOP(op) ((op) | ctx->default_tcg_memop_mask) #define BSWAP_MEMOP(op) ((op) | (ctx->default_tcg_memop_mask ^ MO_BSWAP)) diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index de2a26a213..46bab49215 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -232,36 +232,72 @@ static void gen_lxvb16x(DisasContext *ctx) set_cpu_vsr(xT(ctx->opcode), xtl, false); } -#ifdef TARGET_PPC64 -#define VSX_VECTOR_LOAD_STORE_LENGTH(name) \ -static void gen_##name(DisasContext *ctx) \ -{ \ -TCGv EA;
Re: [PATCH 3/4] target/ppc: Move VSX vector storage access insns to decodetree.
Hi Richard, My apologies for the ill formatted reply in this patch series. Just realized it now. The cliched 'Tab' issue with the mail client XD. On 6/7/24 21:16, Richard Henderson wrote: On 6/7/24 07:49, Chinmay Rath wrote: Moving the following instructions to decodetree specification: lxv{b16, d2, h8, w4, ds, ws}x : X-form stxv{b16, d2, h8, w4}x : X-form The changes were verified by validating that the tcg-ops generated for those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/insn32.decode | 10 ++ target/ppc/translate/vsx-impl.c.inc | 199 target/ppc/translate/vsx-ops.c.inc | 12 -- 3 files changed, 97 insertions(+), 124 deletions(-) Because the ops are identical, Reviewed-by: Richard Henderson But you really should update these to use tcg_gen_qemu_ld/st_i128 with the proper atomicity flags. This will fix an existing bug... ^ Surely Richard, I have noted this suggestion from earlier patch and plan to do this, and a few others which I couldn't implement earlier, along with some clean-ups this week. I refrained from doing it with the decodetree movement, to take time to properly understand and test. Should send out those patches soon. Thanks & Regards, Chinmay +static bool trans_LXVD2X(DisasContext *ctx, arg_LXVD2X *a) { TCGv EA; TCGv_i64 t0; + + REQUIRE_VSX(ctx); + REQUIRE_INSNS_FLAGS2(ctx, VSX); + t0 = tcg_temp_new_i64(); gen_set_access_type(ctx, ACCESS_INT); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); gen_qemu_ld64_i64(ctx, t0, EA); + set_cpu_vsr(a->rt, t0, true); where the vector register is partially modified ... tcg_gen_addi_tl(EA, EA, 8); gen_qemu_ld64_i64(ctx, t0, EA); before a fault from the second load is recognized. Similarly for stores leaving memory partially modified. r~
Re: [PATCH 4/4] target/ppc: Move VSX fp compare insns to decodetree.
On 6/7/24 21:25, Richard Henderson wrote: On 6/7/24 07:49, Chinmay Rath wrote: +static bool do_cmp(DisasContext *ctx, arg_XX3_rc *a, + void (*helper)(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) +{ + TCGv_i32 ignored; + TCGv_ptr xt, xa, xb; + REQUIRE_VSX(ctx); + xt = gen_vsr_ptr(a->xt); + xa = gen_vsr_ptr(a->xa); + xb = gen_vsr_ptr(a->xb); + if (a->rc) { + helper(cpu_crf[6], tcg_env, xt, xa, xb); + } else { + ignored = tcg_temp_new_i32(); + helper(ignored, tcg_env, xt, xa, xb); + } Better to unify the helper call. E.g. dest = a->rc ? cpu_crf[6] : tcg_temp_new_i32(); helper(dest, ...) ^ Sure Richard, will do in v2. Thanks & Regards, Chinmay Anyway, Reviewed-by: Richard Henderson r~
Re: [PATCH 3/4] target/ppc: Move VSX vector storage access insns to decodetree.
Hi Richard, On 6/7/24 21:16, Richard Henderson wrote: On 6/7/24 07:49, Chinmay Rath wrote: Moving the following instructions to decodetree specification: lxv{b16, d2, h8, w4, ds, ws}x : X-form stxv{b16, d2, h8, w4}x : X-form The changes were verified by validating that the tcg-ops generated for those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/insn32.decode | 10 ++ target/ppc/translate/vsx-impl.c.inc | 199 target/ppc/translate/vsx-ops.c.inc | 12 -- 3 files changed, 97 insertions(+), 124 deletions(-) Because the ops are identical, Reviewed-by: Richard Henderson But you really should update these to use tcg_gen_qemu_ld/st_i128 with the proper atomicity flags. This will fix an existing bug... ^ Surely Richard, I have noted this suggestion of yours from an earlier patch, and plan to do this change and implement a few of your other suggestions, which I couldn't implement earlier, along with some clean-ups this week. I refrained from doing it with the decodetree movement, to take proper time to understand and test. Should send out those patches soon. Thanks & Regards, Chinmay +static bool trans_LXVD2X(DisasContext *ctx, arg_LXVD2X *a) { TCGv EA; TCGv_i64 t0; + + REQUIRE_VSX(ctx); + REQUIRE_INSNS_FLAGS2(ctx, VSX); + t0 = tcg_temp_new_i64(); gen_set_access_type(ctx, ACCESS_INT); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); gen_qemu_ld64_i64(ctx, t0, EA); + set_cpu_vsr(a->rt, t0, true); where the vector register is partially modified ... tcg_gen_addi_tl(EA, EA, 8); gen_qemu_ld64_i64(ctx, t0, EA); before a fault from the second load is recognized. Similarly for stores leaving memory partially modified. r~
Re: [PATCH 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.
Hi Richard, On 6/7/24 21:11, Richard Henderson wrote: On 6/7/24 07:49, Chinmay Rath wrote: +static bool do_ld_st_vl(DisasContext *ctx, arg_X *a, + void (*helper)(TCGv_ptr, TCGv, TCGv_ptr, TCGv)) +{ + TCGv EA; + TCGv_ptr xt; + if (a->rt < 32) { + REQUIRE_VSX(ctx); + } else { + REQUIRE_VECTOR(ctx); + } + xt = gen_vsr_ptr(a->rt); + gen_set_access_type(ctx, ACCESS_INT); + + if (a->ra) { + EA = tcg_temp_new(); + tcg_gen_mov_tl(EA, cpu_gpr[a->ra]); + } else { + EA = tcg_constant_tl(0); + } + if (NARROW_MODE(ctx)) { + tcg_gen_ext32u_tl(EA, EA); ra == 0, narrow mode, will crash, due to write into constant 0. Obviously 0 does not need extending, so this could be if (!a->ra) { ea = constant 0; } else if (narrow mode) { ea = tcg_temp_new(); tcg_gen_ext32u_tl(ea, cpu_gpr[a->ra]); } else { ra = cpu_gpr[a->ra]; } ^ Thank you Richard, will take care in v2. Aren't there existing helper functions for computing this address? And if not, better to create one. ^ The calculation of effective address in these instructions is slightly different than the others, for which helper function exist : EA for these insns : EA ← (RA=0) ? 0 : GPR[RA] EA for rest storage access insns : EA ← ((RA=0) ? 0 : GPR[RA]) + GPR[RB] This is why I could not reuse that function. Also, this calculation of EA is limited to these 4 insns above, and only 2 others (prefixed insns), which is why I did not create a new function for this, considering it won't be reused for any other insn. Please let me know if I should create a new function in this case as well. Thanks and Regards, Chinmay r~
[PATCH 0/4] target/ppc: Move VSX storage access and compare
Moving all remaining VSX storage access instructions and all VSX compare instructions of XX3 form with RC field, to decodetree specification. Chinmay Rath (4): target/ppc: Moving VSX scalar storage access insns to decodetree. target/ppc: Move VSX vector with length storage access insns to decodetree. target/ppc: Move VSX vector storage access insns to decodetree. target/ppc: Move VSX fp compare insns to decodetree. target/ppc/helper.h | 24 +- target/ppc/insn32.decode| 41 +++ target/ppc/fpu_helper.c | 16 +- target/ppc/mem_helper.c | 8 +- target/ppc/translate/vsx-impl.c.inc | 430 ++-- target/ppc/translate/vsx-ops.c.inc | 49 6 files changed, 286 insertions(+), 282 deletions(-) -- 2.39.3
[PATCH 3/4] target/ppc: Move VSX vector storage access insns to decodetree.
Moving the following instructions to decodetree specification: lxv{b16, d2, h8, w4, ds, ws}x : X-form stxv{b16, d2, h8, w4}x : X-form The changes were verified by validating that the tcg-ops generated for those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/insn32.decode| 10 ++ target/ppc/translate/vsx-impl.c.inc | 199 target/ppc/translate/vsx-ops.c.inc | 12 -- 3 files changed, 97 insertions(+), 124 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 445fdb341f..3d31ef52f8 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -805,9 +805,19 @@ STXSIHX 01 . . . 1110101101 . @X_TSX STXSIWX 01 . . . 0010001100 . @X_TSX STXSSPX 01 . . . 1010001100 . @X_TSX +LXVB16X 01 . . . 1101101100 . @X_TSX +LXVD2X 01 . . . 1101001100 . @X_TSX +LXVH8X 01 . . . 1100101100 . @X_TSX +LXVW4X 01 . . . 111100 . @X_TSX +LXVDSX 01 . . . 0101001100 . @X_TSX +LXVWSX 01 . . . 0101101100 . @X_TSX LXVL01 . . . 011101 . @X_TSX LXVLL 01 . . . 0100101101 . @X_TSX +STXVB16X01 . . . 101100 . @X_TSX +STXVD2X 01 . . . 001100 . @X_TSX +STXVH8X 01 . . . 1110101100 . @X_TSX +STXVW4X 01 . . . 1110001100 . @X_TSX STXVL 01 . . . 0110001101 . @X_TSX STXVLL 01 . . . 0110101101 . @X_TSX diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 695b75ded9..739b5ad915 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -46,41 +46,37 @@ TRANS_FLAGS2(ISA300, LXSIHZX, do_lxs, gen_qemu_ld16u_i64); TRANS_FLAGS2(VSX207, LXSIWZX, do_lxs, gen_qemu_ld32u_i64); TRANS_FLAGS2(VSX207, LXSSPX, do_lxs, gen_qemu_ld32fs); -static void gen_lxvd2x(DisasContext *ctx) +static bool trans_LXVD2X(DisasContext *ctx, arg_LXVD2X *a) { TCGv EA; TCGv_i64 t0; -if (unlikely(!ctx->vsx_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VSXU); -return; -} + +REQUIRE_VSX(ctx); +REQUIRE_INSNS_FLAGS2(ctx, VSX); + t0 = tcg_temp_new_i64(); gen_set_access_type(ctx, ACCESS_INT); -EA = tcg_temp_new(); -gen_addr_reg_index(ctx, EA); +EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); gen_qemu_ld64_i64(ctx, t0, EA); -set_cpu_vsr(xT(ctx->opcode), t0, true); +set_cpu_vsr(a->rt, t0, true); tcg_gen_addi_tl(EA, EA, 8); gen_qemu_ld64_i64(ctx, t0, EA); -set_cpu_vsr(xT(ctx->opcode), t0, false); +set_cpu_vsr(a->rt, t0, false); +return true; } -static void gen_lxvw4x(DisasContext *ctx) +static bool trans_LXVW4X(DisasContext *ctx, arg_LXVW4X *a) { TCGv EA; -TCGv_i64 xth; -TCGv_i64 xtl; -if (unlikely(!ctx->vsx_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VSXU); -return; -} +TCGv_i64 xth, xtl; + +REQUIRE_VSX(ctx); +REQUIRE_INSNS_FLAGS2(ctx, VSX); + xth = tcg_temp_new_i64(); xtl = tcg_temp_new_i64(); - gen_set_access_type(ctx, ACCESS_INT); -EA = tcg_temp_new(); - -gen_addr_reg_index(ctx, EA); +EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); if (ctx->le_mode) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -97,55 +93,45 @@ static void gen_lxvw4x(DisasContext *ctx) tcg_gen_addi_tl(EA, EA, 8); tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ); } -set_cpu_vsr(xT(ctx->opcode), xth, true); -set_cpu_vsr(xT(ctx->opcode), xtl, false); +set_cpu_vsr(a->rt, xth, true); +set_cpu_vsr(a->rt, xtl, false); +return true; } -static void gen_lxvwsx(DisasContext *ctx) +static bool trans_LXVWSX(DisasContext *ctx, arg_LXVWSX *a) { TCGv EA; TCGv_i32 data; -if (xT(ctx->opcode) < 32) { -if (unlikely(!ctx->vsx_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VSXU); -return; -} +if (a->rt < 32) { +REQUIRE_VSX(ctx); } else { -if (unlikely(!ctx->altivec_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VPU); -return; -} +REQUIRE_VECTOR(ctx); } +REQUIRE_INSNS_FLAGS2(ctx, ISA300); gen_set_access_type(ctx, ACCESS_INT); -EA = tcg_temp_new(); - -gen_addr_reg_index(ctx, EA); - +EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); data = tcg_temp_new_i32(); tcg_gen_qemu_ld_i32(data, EA, ctx->mem_idx, DEF_MEMOP(MO_UL)); -
[PATCH 1/4] target/ppc: Moving VSX scalar storage access insns to decodetree.
Moving the following instructions to decodetree specification : lxs{d, iwa, ibz, ihz, iwz, sp}x : X-form stxs{d, ib, ih, iw, sp}x: X-form The changes were verified by validating that the tcg-ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/insn32.decode| 13 + target/ppc/translate/vsx-impl.c.inc | 79 + target/ppc/translate/vsx-ops.c.inc | 11 3 files changed, 49 insertions(+), 54 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 30d6f9f750..88753c75e1 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -792,6 +792,19 @@ STXVRHX 01 . . . 0010101101 . @X_TSX STXVRWX 01 . . . 0011001101 . @X_TSX STXVRDX 01 . . . 0011101101 . @X_TSX +LXSDX 01 . . . 1001001100 . @X_TSX +LXSIWAX 01 . . . 0001001100 . @X_TSX +LXSIBZX 01 . . . 111101 . @X_TSX +LXSIHZX 01 . . . 1100101101 . @X_TSX +LXSIWZX 01 . . . 001100 . @X_TSX +LXSSPX 01 . . . 101100 . @X_TSX + +STXSDX 01 . . . 1011001100 . @X_TSX +STXSIBX 01 . . . 1110001101 . @X_TSX +STXSIHX 01 . . . 1110101101 . @X_TSX +STXSIWX 01 . . . 0010001100 . @X_TSX +STXSSPX 01 . . . 1010001100 . @X_TSX + ## VSX Vector Binary Floating-Point Sign Manipulation Instructions XVABSDP 00 . 0 . 111011001 .. @XX2 diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index a769f199ce..de2a26a213 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -24,30 +24,27 @@ static inline TCGv_ptr gen_acc_ptr(int reg) return r; } -#define VSX_LOAD_SCALAR(name, operation) \ -static void gen_##name(DisasContext *ctx) \ -{ \ -TCGv EA; \ -TCGv_i64 t0; \ -if (unlikely(!ctx->vsx_enabled)) {\ -gen_exception(ctx, POWERPC_EXCP_VSXU);\ -return; \ -} \ -t0 = tcg_temp_new_i64(); \ -gen_set_access_type(ctx, ACCESS_INT); \ -EA = tcg_temp_new(); \ -gen_addr_reg_index(ctx, EA); \ -gen_qemu_##operation(ctx, t0, EA);\ -set_cpu_vsr(xT(ctx->opcode), t0, true); \ -/* NOTE: cpu_vsrl is undefined */ \ +static bool do_lxs(DisasContext *ctx, arg_X *a, + void (*op)(DisasContext *, TCGv_i64, TCGv)) +{ +TCGv EA; +TCGv_i64 t0; +REQUIRE_VSX(ctx); +t0 = tcg_temp_new_i64(); +gen_set_access_type(ctx, ACCESS_INT); +EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); +op(ctx, t0, EA); +set_cpu_vsr(a->rt, t0, true); +/* NOTE: cpu_vsrl is undefined */ +return true; } -VSX_LOAD_SCALAR(lxsdx, ld64_i64) -VSX_LOAD_SCALAR(lxsiwax, ld32s_i64) -VSX_LOAD_SCALAR(lxsibzx, ld8u_i64) -VSX_LOAD_SCALAR(lxsihzx, ld16u_i64) -VSX_LOAD_SCALAR(lxsiwzx, ld32u_i64) -VSX_LOAD_SCALAR(lxsspx, ld32fs) +TRANS_FLAGS2(VSX, LXSDX, do_lxs, gen_qemu_ld64_i64); +TRANS_FLAGS2(VSX207, LXSIWAX, do_lxs, gen_qemu_ld32s_i64); +TRANS_FLAGS2(ISA300, LXSIBZX, do_lxs, gen_qemu_ld8u_i64); +TRANS_FLAGS2(ISA300, LXSIHZX, do_lxs, gen_qemu_ld16u_i64); +TRANS_FLAGS2(VSX207, LXSIWZX, do_lxs, gen_qemu_ld32u_i64); +TRANS_FLAGS2(VSX207, LXSSPX, do_lxs, gen_qemu_ld32fs); static void gen_lxvd2x(DisasContext *ctx) { @@ -266,29 +263,25 @@ VSX_VECTOR_LOAD_STORE_LENGTH(stxvl) VSX_VECTOR_LOAD_STORE_LENGTH(stxvll) #endif -#define VSX_STORE_SCALAR(name, operation) \ -static void gen_##name(DisasContext *ctx) \ -{ \ -TCGv EA; \ -TCGv_i64 t0; \ -if (unlikely(!ctx->vsx_enabled)) {\ -gen_exception(ctx, POWERPC_EXCP_VSXU);\ -return; \ -} \ -t0 = tcg_temp_new_i64(); \ -gen_set_access_type(ctx, ACCESS_INT);
[PATCH 4/4] target/ppc: Move VSX fp compare insns to decodetree.
Moving the following instructions to decodetree specification: xvcmp{eq, gt, ge, ne}{s, d}p: XX3-form The changes were verified by validating that the tcg-ops generated for those instructions remain the same which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/helper.h | 16 - target/ppc/insn32.decode| 12 +++ target/ppc/fpu_helper.c | 16 - target/ppc/translate/vsx-impl.c.inc | 50 ++--- target/ppc/translate/vsx-ops.c.inc | 18 --- 5 files changed, 52 insertions(+), 60 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 510ce76524..3fd849628a 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -473,10 +473,10 @@ DEF_HELPER_5(xvnmadddp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvnmsubdp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_4(XVMAXDP, void, env, vsr, vsr, vsr) DEF_HELPER_4(XVMINDP, void, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpeqdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgtdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpnedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPEQDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGTDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPNEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) DEF_HELPER_3(xvcvdpsp, void, env, vsr, vsr) DEF_HELPER_3(xvcvdpsxds, void, env, vsr, vsr) DEF_HELPER_3(xvcvdpsxws, void, env, vsr, vsr) @@ -507,10 +507,10 @@ DEF_HELPER_5(xvnmaddsp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvnmsubsp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_4(XVMAXSP, void, env, vsr, vsr, vsr) DEF_HELPER_4(XVMINSP, void, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpeqsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpgtsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) -DEF_HELPER_FLAGS_4(xvcmpnesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPEQSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPGTSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) +DEF_HELPER_FLAGS_4(XVCMPNESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) DEF_HELPER_3(xvcvspdp, void, env, vsr, vsr) DEF_HELPER_3(xvcvsphp, void, env, vsr, vsr) DEF_HELPER_3(xvcvhpsp, void, env, vsr, vsr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 3d31ef52f8..bcaf03f24c 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -217,6 +217,9 @@ xt xa xb @XX3.. . . . ...xt=%xx_xt xa=%xx_xa xb=%xx_xb +_rc xt xa xb rc:bool +@XX3_rc .. . . . rc:1 ... ... _rc xt=%xx_xt xa=%xx_xa xb=%xx_xb + # 32 bit GER instructions have all mask bits considered 1 _XX3 xa xb xt pmsk xmsk ymsk %xx_at 23:3 @@ -923,6 +926,15 @@ XSCMPEQQP 11 . . . 0001000100 - @X XSCMPGEQP 11 . . . 0011000100 - @X XSCMPGTQP 11 . . . 0011100100 - @X +XVCMPEQSP 00 . . . . 111 ... @XX3_rc +XVCMPGTSP 00 . . . . 1001011 ... @XX3_rc +XVCMPGESP 00 . . . . 1010011 ... @XX3_rc +XVCMPNESP 00 . . . . 1011011 ... @XX3_rc +XVCMPEQDP 00 . . . . 1100011 ... @XX3_rc +XVCMPGTDP 00 . . . . 1101011 ... @XX3_rc +XVCMPGEDP 00 . . . . 1110011 ... @XX3_rc +XVCMPNEDP 00 . . . . 011 ... @XX3_rc + XSMAXDP 00 . . . 1010 ... @XX3 XSMINDP 00 . . . 10101000 ... @XX3 diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index a013160644..5a300a3c86 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -2624,14 +2624,14 @@ uint32_t helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ return crf6; \ } -VSX_CMP(xvcmpeqdp, 2, float64, VsrD(i), eq, 0, 1) -VSX_CMP(xvcmpgedp, 2, float64, VsrD(i), le, 1, 1) -VSX_CMP(xvcmpgtdp, 2, float64, VsrD(i), lt, 1, 1) -VSX_CMP(xvcmpnedp, 2, float64, VsrD(i), eq, 0, 0) -VSX_CMP(xvcmpeqsp, 4, float32, VsrW(i), eq, 0, 1) -VSX_CMP(xvcmpgesp, 4, float32, VsrW(i), le, 1, 1) -VSX_CMP(xvcmpgtsp, 4, float32, VsrW(i), lt, 1, 1) -VSX_CMP(xvcmpnesp, 4, float32, VsrW(i), eq, 0, 0) +VSX_CMP(XVCMPEQDP, 2, float64, VsrD(i), eq, 0, 1) +VSX_CMP(XVCMPGEDP, 2, float64, VsrD(i), le, 1, 1) +VSX_CMP(XVCMPGTDP, 2, float64, VsrD(i), lt, 1, 1
[PATCH 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.
Moving the following instructions to decodetree specification : {l, st}xvl(l) : X-form The changes were verified by validating that the tcg-ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/helper.h | 8 +-- target/ppc/insn32.decode| 6 ++ target/ppc/mem_helper.c | 8 +-- target/ppc/translate/vsx-impl.c.inc | 104 target/ppc/translate/vsx-ops.c.inc | 8 --- 5 files changed, 89 insertions(+), 45 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 3b4a0c4674..510ce76524 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -274,10 +274,10 @@ DEF_HELPER_3(stvebx, void, env, avr, tl) DEF_HELPER_3(stvehx, void, env, avr, tl) DEF_HELPER_3(stvewx, void, env, avr, tl) #if defined(TARGET_PPC64) -DEF_HELPER_4(lxvl, void, env, tl, vsr, tl) -DEF_HELPER_4(lxvll, void, env, tl, vsr, tl) -DEF_HELPER_4(stxvl, void, env, tl, vsr, tl) -DEF_HELPER_4(stxvll, void, env, tl, vsr, tl) +DEF_HELPER_4(LXVL, void, env, tl, vsr, tl) +DEF_HELPER_4(LXVLL, void, env, tl, vsr, tl) +DEF_HELPER_4(STXVL, void, env, tl, vsr, tl) +DEF_HELPER_4(STXVLL, void, env, tl, vsr, tl) #endif DEF_HELPER_4(vsumsws, void, env, avr, avr, avr) DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 88753c75e1..445fdb341f 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -805,6 +805,12 @@ STXSIHX 01 . . . 1110101101 . @X_TSX STXSIWX 01 . . . 0010001100 . @X_TSX STXSSPX 01 . . . 1010001100 . @X_TSX +LXVL01 . . . 011101 . @X_TSX +LXVLL 01 . . . 0100101101 . @X_TSX + +STXVL 01 . . . 0110001101 . @X_TSX +STXVLL 01 . . . 0110101101 . @X_TSX + ## VSX Vector Binary Floating-Point Sign Manipulation Instructions XVABSDP 00 . 0 . 111011001 .. @XX2 diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index ea7e8443a8..dec1b25eb8 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -467,8 +467,8 @@ void helper_##name(CPUPPCState *env, target_ulong addr, \ *xt = t;\ } -VSX_LXVL(lxvl, 0) -VSX_LXVL(lxvll, 1) +VSX_LXVL(LXVL, 0) +VSX_LXVL(LXVLL, 1) #undef VSX_LXVL #define VSX_STXVL(name, lj) \ @@ -496,8 +496,8 @@ void helper_##name(CPUPPCState *env, target_ulong addr, \ } \ } -VSX_STXVL(stxvl, 0) -VSX_STXVL(stxvll, 1) +VSX_STXVL(STXVL, 0) +VSX_STXVL(STXVLL, 1) #undef VSX_STXVL #undef GET_NB #endif /* TARGET_PPC64 */ diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index de2a26a213..695b75ded9 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -232,36 +232,82 @@ static void gen_lxvb16x(DisasContext *ctx) set_cpu_vsr(xT(ctx->opcode), xtl, false); } -#ifdef TARGET_PPC64 -#define VSX_VECTOR_LOAD_STORE_LENGTH(name) \ -static void gen_##name(DisasContext *ctx) \ -{ \ -TCGv EA; \ -TCGv_ptr xt; \ - \ -if (xT(ctx->opcode) < 32) {\ -if (unlikely(!ctx->vsx_enabled)) { \ -gen_exception(ctx, POWERPC_EXCP_VSXU); \ -return;\ -} \ -} else { \ -if (unlikely(!ctx->altivec_enabled)) { \ -gen_exception(ctx, POWERPC_EXCP_VPU); \ -return;\ -} \ -} \ -EA = tcg_temp_new(); \ -xt = gen_vsr_ptr(xT(ctx->opcode)); \ -gen_set_access_type(ctx, ACCESS_INT); \ -gen_addr_register(ctx, EA);\ -gen_helper_##name(tcg_env, EA, xt, cpu_gpr[rB(ctx->opcode)]); \ -} - -VSX_VECTOR_LOAD_STORE_LENGTH(lxvl) -VSX_VECTOR_LOAD_STORE_LENGTH(lxvll) -VSX_VECTOR_LO
[PATCH 3/3] target/ppc: Move VSX logical instructions to decodetree.
Moving the following instructions to decodetree specification : xxl{and, andc, or, orc, nor, xor, nand, eqv}: XX3-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/insn32.decode| 11 target/ppc/translate/vsx-impl.c.inc | 39 + target/ppc/translate/vsx-ops.c.inc | 11 3 files changed, 29 insertions(+), 32 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 9ed8e33cc8..30d6f9f750 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -937,6 +937,17 @@ XXMFACC 01 ... -- 0 - 0010110001 - @X_a XXMTACC 01 ... -- 1 - 0010110001 - @X_a XXSETACCZ 01 ... -- 00011 - 0010110001 - @X_a +## VSX Vector Logical instructions + +XXLAND 00 . . . 1010 ... @XX3 +XXLANDC 00 . . . 10001010 ... @XX3 +XXLOR 00 . . . 10010010 ... @XX3 +XXLXOR 00 . . . 10011010 ... @XX3 +XXLNOR 00 . . . 10100010 ... @XX3 +XXLEQV 00 . . . 10111010 ... @XX3 +XXLNAND 00 . . . 10110010 ... @XX3 +XXLORC 00 . . . 10101010 ... @XX3 + ## VSX GER instruction XVI4GER8111011 ... -- . . 00100011 ..- @XX3_at xa=%xx_xa diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 0d16e0f02b..a769f199ce 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -1573,26 +1573,24 @@ static void gen_xxbrw(DisasContext *ctx) set_cpu_vsr(xT(ctx->opcode), xtl, false); } -#define VSX_LOGICAL(name, vece, tcg_op) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{\ -if (unlikely(!ctx->vsx_enabled)) { \ -gen_exception(ctx, POWERPC_EXCP_VSXU); \ -return; \ -}\ -tcg_op(vece, vsr_full_offset(xT(ctx->opcode)), \ - vsr_full_offset(xA(ctx->opcode)), \ - vsr_full_offset(xB(ctx->opcode)), 16, 16);\ -} - -VSX_LOGICAL(xxland, MO_64, tcg_gen_gvec_and) -VSX_LOGICAL(xxlandc, MO_64, tcg_gen_gvec_andc) -VSX_LOGICAL(xxlor, MO_64, tcg_gen_gvec_or) -VSX_LOGICAL(xxlxor, MO_64, tcg_gen_gvec_xor) -VSX_LOGICAL(xxlnor, MO_64, tcg_gen_gvec_nor) -VSX_LOGICAL(xxleqv, MO_64, tcg_gen_gvec_eqv) -VSX_LOGICAL(xxlnand, MO_64, tcg_gen_gvec_nand) -VSX_LOGICAL(xxlorc, MO_64, tcg_gen_gvec_orc) +static bool do_logical_op(DisasContext *ctx, arg_XX3 *a, unsigned vece, +void (*helper)(unsigned, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t)) +{ +REQUIRE_VSX(ctx); +helper(vece, vsr_full_offset(a->xt), +vsr_full_offset(a->xa), +vsr_full_offset(a->xb), 16, 16); +return true; +} + +TRANS_FLAGS2(VSX, XXLAND, do_logical_op, MO_64, tcg_gen_gvec_and); +TRANS_FLAGS2(VSX, XXLANDC, do_logical_op, MO_64, tcg_gen_gvec_andc); +TRANS_FLAGS2(VSX, XXLOR, do_logical_op, MO_64, tcg_gen_gvec_or); +TRANS_FLAGS2(VSX, XXLXOR, do_logical_op, MO_64, tcg_gen_gvec_xor); +TRANS_FLAGS2(VSX, XXLNOR, do_logical_op, MO_64, tcg_gen_gvec_nor); +TRANS_FLAGS2(VSX207, XXLEQV, do_logical_op, MO_64, tcg_gen_gvec_eqv); +TRANS_FLAGS2(VSX207, XXLNAND, do_logical_op, MO_64, tcg_gen_gvec_nand); +TRANS_FLAGS2(VSX207, XXLORC, do_logical_op, MO_64, tcg_gen_gvec_orc); #define VSX_XXMRG(name, high) \ static void glue(gen_, name)(DisasContext *ctx) \ @@ -2899,4 +2897,3 @@ TRANS64(PMXVF64GERNN, do_ger, gen_helper_XVF64GERNN) #undef GEN_XX2IFORM #undef GEN_XX3_RC_FORM #undef GEN_XX3FORM_DM -#undef VSX_LOGICAL diff --git a/target/ppc/translate/vsx-ops.c.inc b/target/ppc/translate/vsx-ops.c.inc index 18510d757d..3c0a70cb7c 100644 --- a/target/ppc/translate/vsx-ops.c.inc +++ b/target/ppc/translate/vsx-ops.c.inc @@ -263,17 +263,6 @@ GEN_XX2FORM_EO(xvcvhpsp, 0x16, 0x1D, 0x18, PPC2_ISA300), GEN_XX2FORM_EO(xvcvsphp, 0x16, 0x1D, 0x19, PPC2_ISA300), GEN_XX2FORM_EO(xxbrq, 0x16, 0x1D, 0x1F, PPC2_ISA300), -#define VSX_LOGICAL(name, opc2, opc3, fl2) \ -GEN_XX3FORM(name, opc2, opc3, fl2) - -VSX_LOGICAL(xxland, 0x8, 0x10, PPC2_VSX), -VSX_LOGICAL(xxlandc, 0x8, 0x11, PPC2_VSX), -VSX_LOGICAL(xxlor, 0x8, 0x12, PPC2_VSX), -VSX_LOGICAL(xxlxor, 0x8, 0x13, PPC2_VSX), -VSX_LOGICAL(xxlnor, 0x8, 0x14, PPC2_VSX), -VSX_LOGICAL(xxleqv, 0x8, 0x17, PPC2_VSX207), -VSX_LOGICAL(xxlnand, 0x8, 0x16, PPC2_VSX207), -VSX_LOGICAL(xxlorc
[PATCH 1/3] target/ppc: Move ISA300 flag check out of do_helper_XX3.
Moving PPC2_ISA300 flag check out of do_helper_XX3 method in vmx-impl.c.inc so that the helper can be used with other instructions as well. Signed-off-by: Chinmay Rath --- target/ppc/translate/vsx-impl.c.inc | 16 +++- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 0266f09119..6025119e5b 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -2712,8 +2712,6 @@ static bool do_helper_XX3(DisasContext *ctx, arg_XX3 *a, void (*helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) { TCGv_ptr xt, xa, xb; - -REQUIRE_INSNS_FLAGS2(ctx, ISA300); REQUIRE_VSX(ctx); xt = gen_vsr_ptr(a->xt); @@ -2724,13 +2722,13 @@ static bool do_helper_XX3(DisasContext *ctx, arg_XX3 *a, return true; } -TRANS(XSCMPEQDP, do_helper_XX3, gen_helper_XSCMPEQDP) -TRANS(XSCMPGEDP, do_helper_XX3, gen_helper_XSCMPGEDP) -TRANS(XSCMPGTDP, do_helper_XX3, gen_helper_XSCMPGTDP) -TRANS(XSMAXCDP, do_helper_XX3, gen_helper_XSMAXCDP) -TRANS(XSMINCDP, do_helper_XX3, gen_helper_XSMINCDP) -TRANS(XSMAXJDP, do_helper_XX3, gen_helper_XSMAXJDP) -TRANS(XSMINJDP, do_helper_XX3, gen_helper_XSMINJDP) +TRANS_FLAGS2(ISA300, XSCMPEQDP, do_helper_XX3, gen_helper_XSCMPEQDP) +TRANS_FLAGS2(ISA300, XSCMPGEDP, do_helper_XX3, gen_helper_XSCMPGEDP) +TRANS_FLAGS2(ISA300, XSCMPGTDP, do_helper_XX3, gen_helper_XSCMPGTDP) +TRANS_FLAGS2(ISA300, XSMAXCDP, do_helper_XX3, gen_helper_XSMAXCDP) +TRANS_FLAGS2(ISA300, XSMINCDP, do_helper_XX3, gen_helper_XSMINCDP) +TRANS_FLAGS2(ISA300, XSMAXJDP, do_helper_XX3, gen_helper_XSMAXJDP) +TRANS_FLAGS2(ISA300, XSMINJDP, do_helper_XX3, gen_helper_XSMINJDP) static bool do_helper_X(arg_X *a, void (*helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) -- 2.39.3
[PATCH 0/3] target/ppc: Moving VSX insns to decodetree.
Moving a number of VSX arithmetic, max/min and logical instructions to decodetree specification. Also moving ISA300 flag check in the do_helper_XX3 methods in vsx-impl.c.inc file; out of it, to make it usable for a larger num of instructions. Chinmay Rath (3): target/ppc: Move ISA300 flag check out of do_helper_XX3. target/ppc: Move VSX arithmetic and max/min insns to decodetree. target/ppc: Move VSX logical instructions to decodetree. target/ppc/helper.h | 44 +-- target/ppc/insn32.decode| 41 ++ target/ppc/fpu_helper.c | 44 +-- target/ppc/translate/vsx-impl.c.inc | 116 target/ppc/translate/vsx-ops.c.inc | 33 5 files changed, 136 insertions(+), 142 deletions(-) -- 2.39.3
[PATCH 2/3] target/ppc: Move VSX arithmetic and max/min insns to decodetree.
Moving the following instructions to decodetree specification: x{s, v}{add, sub, mul, div}{s, d}p : XX3-form xs{max, min}dp, xv{max, min}{s, d}p : XX3-form The changes were verfied by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/helper.h | 44 ++-- target/ppc/insn32.decode| 30 ++ target/ppc/fpu_helper.c | 44 ++-- target/ppc/translate/vsx-impl.c.inc | 63 + target/ppc/translate/vsx-ops.c.inc | 22 -- 5 files changed, 101 insertions(+), 102 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 86f97ee1e7..3b4a0c4674 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -361,12 +361,12 @@ DEF_HELPER_FLAGS_4(bcdsr, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32) DEF_HELPER_FLAGS_4(bcdtrunc, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32) DEF_HELPER_FLAGS_4(bcdutrunc, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32) -DEF_HELPER_4(xsadddp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSADDDP, void, env, vsr, vsr, vsr) DEF_HELPER_5(xsaddqp, void, env, i32, vsr, vsr, vsr) -DEF_HELPER_4(xssubdp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xsmuldp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSSUBDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSMULDP, void, env, vsr, vsr, vsr) DEF_HELPER_5(xsmulqp, void, env, i32, vsr, vsr, vsr) -DEF_HELPER_4(xsdivdp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSDIVDP, void, env, vsr, vsr, vsr) DEF_HELPER_5(xsdivqp, void, env, i32, vsr, vsr, vsr) DEF_HELPER_3(xsredp, void, env, vsr, vsr) DEF_HELPER_3(xssqrtdp, void, env, vsr, vsr) @@ -389,8 +389,8 @@ DEF_HELPER_4(xscmpodp, void, env, i32, vsr, vsr) DEF_HELPER_4(xscmpudp, void, env, i32, vsr, vsr) DEF_HELPER_4(xscmpoqp, void, env, i32, vsr, vsr) DEF_HELPER_4(xscmpuqp, void, env, i32, vsr, vsr) -DEF_HELPER_4(xsmaxdp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xsmindp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSMAXDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSMINDP, void, env, vsr, vsr, vsr) DEF_HELPER_4(XSMAXCDP, void, env, vsr, vsr, vsr) DEF_HELPER_4(XSMINCDP, void, env, vsr, vsr, vsr) DEF_HELPER_4(XSMAXJDP, void, env, vsr, vsr, vsr) @@ -436,10 +436,10 @@ DEF_HELPER_4(xsrqpxp, void, env, i32, vsr, vsr) DEF_HELPER_4(xssqrtqp, void, env, i32, vsr, vsr) DEF_HELPER_5(xssubqp, void, env, i32, vsr, vsr, vsr) -DEF_HELPER_4(xsaddsp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xssubsp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xsmulsp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xsdivsp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSADDSP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSSUBSP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSMULSP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSDIVSP, void, env, vsr, vsr, vsr) DEF_HELPER_3(xsresp, void, env, vsr, vsr) DEF_HELPER_2(xsrsp, i64, env, i64) DEF_HELPER_3(xssqrtsp, void, env, vsr, vsr) @@ -458,10 +458,10 @@ DEF_HELPER_5(XSNMADDQPO, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(XSNMSUBQP, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(XSNMSUBQPO, void, env, vsr, vsr, vsr, vsr) -DEF_HELPER_4(xvadddp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xvsubdp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xvmuldp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xvdivdp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVADDDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVSUBDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVMULDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVDIVDP, void, env, vsr, vsr, vsr) DEF_HELPER_3(xvredp, void, env, vsr, vsr) DEF_HELPER_3(xvsqrtdp, void, env, vsr, vsr) DEF_HELPER_3(xvrsqrtedp, void, env, vsr, vsr) @@ -471,8 +471,8 @@ DEF_HELPER_5(xvmadddp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvmsubdp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvnmadddp, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_5(xvnmsubdp, void, env, vsr, vsr, vsr, vsr) -DEF_HELPER_4(xvmaxdp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xvmindp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVMAXDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVMINDP, void, env, vsr, vsr, vsr) DEF_HELPER_FLAGS_4(xvcmpeqdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) DEF_HELPER_FLAGS_4(xvcmpgedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) DEF_HELPER_FLAGS_4(xvcmpgtdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) @@ -492,10 +492,10 @@ DEF_HELPER_3(xvrdpim, void, env, vsr, vsr) DEF_HELPER_3(xvrdpip, void, env, vsr, vsr) DEF_HELPER_3(xvrdpiz, void, env, vsr, vsr) -DEF_HELPER_4(xvaddsp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xvsubsp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xvmulsp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xvdivsp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVADDSP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVSUBSP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVMULSP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XVDIVSP, void, env, vsr, vsr, vsr) DEF_HELPER_3(xvresp, void, env, vsr, vsr) DEF_HELPER_3
[PATCH v2 2/2] target/ppc: Improve VMX integer add/sub saturate instructions.
No need for a full comparison; xor produces non-zero bits for QC just fine. Suggested-by: Richard Henderson Signed-off-by: Chinmay Rath --- target/ppc/translate/vmx-impl.c.inc | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index fdb283c1d4..152bcde0e3 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -2876,15 +2876,15 @@ TRANS(VADDCUW, do_vx_vaddsubcuw, 1) /* Integer Add/Sub Saturate Instructions */ static inline void do_vadd_vsub_sat ( -unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b, +unsigned vece, TCGv_vec t, TCGv_vec qc, TCGv_vec a, TCGv_vec b, void (*norm_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec), void (*sat_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) { TCGv_vec x = tcg_temp_new_vec_matching(t); norm_op(vece, x, a, b); sat_op(vece, t, a, b); -tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); -tcg_gen_or_vec(vece, sat, sat, x); +tcg_gen_xor_vec(vece, x, x, t); +tcg_gen_or_vec(vece, qc, qc, x); } static void gen_vadd_sat_u(unsigned vece, TCGv_vec t, TCGv_vec sat, @@ -2916,16 +2916,16 @@ static void gen_vsub_sat_s(unsigned vece, TCGv_vec t, TCGv_vec sat, * GVecGen4 struct variants. */ static const TCGOpcode vecop_list_sub_u[] = { -INDEX_op_sub_vec, INDEX_op_ussub_vec, INDEX_op_cmp_vec, 0 +INDEX_op_sub_vec, INDEX_op_ussub_vec, 0 }; static const TCGOpcode vecop_list_sub_s[] = { -INDEX_op_sub_vec, INDEX_op_sssub_vec, INDEX_op_cmp_vec, 0 +INDEX_op_sub_vec, INDEX_op_sssub_vec, 0 }; static const TCGOpcode vecop_list_add_u[] = { -INDEX_op_add_vec, INDEX_op_usadd_vec, INDEX_op_cmp_vec, 0 +INDEX_op_add_vec, INDEX_op_usadd_vec, 0 }; static const TCGOpcode vecop_list_add_s[] = { -INDEX_op_add_vec, INDEX_op_ssadd_vec, INDEX_op_cmp_vec, 0 +INDEX_op_add_vec, INDEX_op_ssadd_vec, 0 }; static const GVecGen4 op_vsububs = { -- 2.39.3
[PATCH v2 0/2] target/ppc: Move VMX int add/sub saturate insns
Change Log : 1. Changes addressing all review comments by Richard in v1 : Having seperate ops table instead of using a 3D array, using TRANS_FLAGS instead of sinking flag check in the helper, proper flag checks for the insns left behind from GEN_VXFORM_DUAL declarations. 2. Added a second patch to improve the moved insns as per suggestion by Richard in v1. v1: https://lore.kernel.org/qemu-devel/20240512093847.18099-1-ra...@linux.ibm.com/ Chinmay Rath (2): target/ppc: Move VMX integer add/sub saturate insns to decodetree. target/ppc: Improve VMX integer add/sub saturate instructions. target/ppc/helper.h | 24 +-- target/ppc/insn32.decode| 16 ++ target/ppc/int_helper.c | 22 +-- target/ppc/translate/vmx-impl.c.inc | 238 target/ppc/translate/vmx-ops.c.inc | 19 +-- 5 files changed, 220 insertions(+), 99 deletions(-) -- 2.39.3
[PATCH v2 1/2] target/ppc: Move VMX integer add/sub saturate insns to decodetree.
Moving the following instructions to decodetree specification : v{add,sub}{u,s}{b,h,w}s : VX-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/helper.h | 24 +-- target/ppc/insn32.decode| 16 ++ target/ppc/int_helper.c | 22 +-- target/ppc/translate/vmx-impl.c.inc | 238 target/ppc/translate/vmx-ops.c.inc | 19 +-- 5 files changed, 220 insertions(+), 99 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index f397ef459a..2963e48fdc 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -200,18 +200,18 @@ DEF_HELPER_FLAGS_3(vsro, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vsrv, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vslv, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VPRTYBQ, TCG_CALL_NO_RWG, void, avr, avr, i32) -DEF_HELPER_FLAGS_5(vaddsbs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vaddshs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vaddsws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsubsbs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsubshs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsubsws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vaddubs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vadduhs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vadduws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsububs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsubuhs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsubuws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDSBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDSHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDSWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBSBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBSHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBSWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDUBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDUHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDUWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBUBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBUHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBUWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) DEF_HELPER_FLAGS_3(VADDUQM, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_4(VADDECUQ, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) DEF_HELPER_FLAGS_4(VADDEUQM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 847a2f4356..d7d77eaa99 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -697,6 +697,14 @@ VADDCUW 000100 . . . 0011000@VX VADDCUQ 000100 . . . 0010100@VX VADDUQM 000100 . . . 001@VX +VADDSBS 000100 . . . 011@VX +VADDSHS 000100 . . . 0110100@VX +VADDSWS 000100 . . . 0111000@VX + +VADDUBS 000100 . . . 010@VX +VADDUHS 000100 . . . 0100100@VX +VADDUWS 000100 . . . 0101000@VX + VADDEUQM000100 . . . . 00 @VA VADDECUQ000100 . . . . 01 @VA @@ -704,6 +712,14 @@ VSUBCUW 000100 . . . 1011000@VX VSUBCUQ 000100 . . . 1010100@VX VSUBUQM 000100 . . . 101@VX +VSUBSBS 000100 . . . 111@VX +VSUBSHS 000100 . . . 1110100@VX +VSUBSWS 000100 . . . 000@VX + +VSUBUBS 000100 . . . 110@VX +VSUBUHS 000100 . . . 1100100@VX +VSUBUWS 000100 . . . 1101000@VX + VSUBECUQ000100 . . . . 11 @VA VSUBEUQM000100 . . . . 10 @VA diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 0a5c3e78a4..aec2d3d4ec 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -541,7 +541,7 @@ VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); } #define VARITHSAT_DO(name, op, optype, cvt, element
Re: [PATCH 1/1] target/ppc: Move VMX integer add/sub saturate insns to decodetree.
Hi Richard, On 5/12/24 17:08, Richard Henderson wrote: On 5/12/24 11:38, Chinmay Rath wrote: @@ -2934,6 +2870,184 @@ static bool do_vx_vaddsubcuw(DisasContext *ctx, arg_VX *a, int add) return true; } +static inline void do_vadd_vsub_sat +( + unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b, + void (*norm_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec), + void (*sat_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + norm_op(vece, x, a, b); + sat_op(vece, t, a, b); + tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); + tcg_gen_or_vec(vece, sat, sat, x); +} As a separate change, before or after, the cmp_vec may be simplified to xor_vec. Which means that INDEX_op_cmp_vec need not be probed in the vecop_lists. See https://lore.kernel.org/qemu-devel/20240506010403.6204-31-richard.hender...@linaro.org/ which is performing the same operation on AArch64. Noted ! Will do. +static bool do_vx_vadd_vsub_sat(DisasContext *ctx, arg_VX *a, + int sign, int vece, int add) +{ + static const TCGOpcode vecop_list_sub_u[] = { + INDEX_op_sub_vec, INDEX_op_ussub_vec, INDEX_op_cmp_vec, 0 + }; + static const TCGOpcode vecop_list_sub_s[] = { + INDEX_op_sub_vec, INDEX_op_sssub_vec, INDEX_op_cmp_vec, 0 + }; + static const TCGOpcode vecop_list_add_u[] = { + INDEX_op_add_vec, INDEX_op_usadd_vec, INDEX_op_cmp_vec, 0 + }; + static const TCGOpcode vecop_list_add_s[] = { + INDEX_op_add_vec, INDEX_op_ssadd_vec, INDEX_op_cmp_vec, 0 + }; + + static const GVecGen4 op[2][3][2] = { + { + { + { + .fniv = gen_vsub_sat_u, + .fno = gen_helper_VSUBUBS, + .opt_opc = vecop_list_sub_u, + .write_aofs = true, + .vece = MO_8 + }, . . . + { + .fniv = gen_vadd_sat_s, + .fno = gen_helper_VADDSWS, + .opt_opc = vecop_list_add_s, + .write_aofs = true, + .vece = MO_32 + }, + }, + }, + }; While this table is not wrong, I think it is clearer to have separate tables, one per operation, which are then passed in to a common expander. + + REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); + REQUIRE_VECTOR(ctx); + + tcg_gen_gvec_4(avr_full_offset(a->vrt), offsetof(CPUPPCState, vscr_sat), + avr_full_offset(a->vra), avr_full_offset(a->vrb), 16, 16, + [sign][vece][add]); + + return true; +} + +TRANS(VSUBUBS, do_vx_vadd_vsub_sat, 0, MO_8, 0) I think it is clearer to use TRANS_FLAGS than to sink the ISA check into the helper. In general I seem to find the helper later gets reused for something else with a different ISA check. Thus static const TCGOpcode vecop_list_vsub_sat_u[] = { INDEX_op_sub_vec, INDEX_op_ussub_vec, 0 }; static const GVecGen4 op_vsububs = { .fno = gen_helper_VSUBUBS, .fniv = gen_vsub_sat_u, .opt_opc = vecop_list_vsub_sat_u, .write_aofs = true, .vece = MO_8 }; TRANS_FLAGS(VSUBUBS, do_vx_vadd_vsub_sat, _vsububs) static const GVecGen4 op_vsubuhs = { .fno = gen_helper_VSUBUHS, .fniv = gen_vsub_sat_u, .opt_opc = vecop_list_vsub_sat_u, .write_aofs = true, .vece = MO_16 }; TRANS_FLAGS(VSUBUHS, do_vx_vadd_vsub_sat, _vsubuhs) etc. Will add those changes in v2. -GEN_VXFORM_DUAL(vaddubs, vmul10uq, 0, 8, PPC_ALTIVEC, PPC_NONE), You are correct in your cover letter that this is not right. We should have been testing ISA300 for vmul10uq here. Thank you very much for the clarification ! +GEN_VXFORM(vmul10euq, 0, 9), And thus need GEN_VXFORM_300 here. +GEN_VXFORM(vmul10euq, 0, 9), +GEN_VXFORM(bcdcpsgn, 0, 13), +GEN_VXFORM(bcdadd, 0, 24), +GEN_VXFORM(bcdsub, 0, 25), ... +GEN_VXFORM(xpnd04_2, 0, 30), None of these are in the base ISA, so all need a flag check. r~ Thanks & Regards, Chinmay
Re: target/ppc: Move VMX int add/sub saturate insns to decodetree.
Hi Richard, On 5/12/24 15:59, Richard Henderson wrote: On 5/12/24 11:38, Chinmay Rath wrote: 1. vsubsbs and bcdtrunc : In this pair, bcdtrunc has the insn flag check PPC2_ISA300 in the vmx-impl file, within the GEN_VXFORM_DUAL macro, which does this flag check. However it also has this flag check in the vmx-ops file. Hence I have retained the same in the new entry in the vmx-ops file. This is consistent with the behaviour in done in the following commit : https://github.com/qemu/qemu/commit/b132be53a4ba6a0a40d5643d791822f958a36e53 So even though the flag check is removed from the vmx-impl file, it is retained in the vmx-ops file. All good here. 2. vadduhs and vmul10euq : In this pair, vmul10euq has the insn flag check PPC2_ISA300 in the vmx-impl file, check done within the GEN_VXFORM_DUAL macro. However the same flag was NOT originally present in the vmx-ops file, so I have NOT included in its new entry in the vmx-ops file. I have done this, following the behaviour done in the following commit : https://github.com/qemu/qemu/commit/c85929b2ddf6bbad737635c9b85213007ec043af So this flag check for vmul10euq is excluded now. Is this not a problem ? I feel that this leads to the flag check being skipped now, however this behaviour was followed in the above mentioned commit. This second link is for VAVG* and VABSD*. Yes you are correct that this second case was done incorrectly. Thankfully the mistake was fixed in the very next commit, when VABSD* was converted to decodetree as well. Thank you very much for the clarification ! r~ Regards, Chinmay
target/ppc: Move VMX int add/sub saturate insns to decodetree.
Moving the following instructions to decodetree : v{add,sub}{u,s}{b,h,w}s : VX-form However, the following instructions were paired using the GEN_VXFORM_DUAL macros in the vmx-impl and vmx-ops files : vaddubs and vmul10uq vadduhs and vmul10euq vaddshs and bcdcpsgn vsububs and bcdadd vsubuhs and bcdsub vsubsbs and bcdtrunc vsubsws and xpnd04_2 Out of those 7 above mentioned pairs, I have moved the first one of each pair and added respective entry of the 2nd one in the vmx-ops file. However, I lack some clarity on those flag checks added for those insns in the ops file. It would be great if someone sheds some light at this. The issue; let's take the following example : 1. vsubsbs and bcdtrunc : In this pair, bcdtrunc has the insn flag check PPC2_ISA300 in the vmx-impl file, within the GEN_VXFORM_DUAL macro, which does this flag check. However it also has this flag check in the vmx-ops file. Hence I have retained the same in the new entry in the vmx-ops file. This is consistent with the behaviour in done in the following commit : https://github.com/qemu/qemu/commit/b132be53a4ba6a0a40d5643d791822f958a36e53 So even though the flag check is removed from the vmx-impl file, it is retained in the vmx-ops file. All good here. 2. vadduhs and vmul10euq : In this pair, vmul10euq has the insn flag check PPC2_ISA300 in the vmx-impl file, check done within the GEN_VXFORM_DUAL macro. However the same flag was NOT originally present in the vmx-ops file, so I have NOT included in its new entry in the vmx-ops file. I have done this, following the behaviour done in the following commit : https://github.com/qemu/qemu/commit/c85929b2ddf6bbad737635c9b85213007ec043af So this flag check for vmul10euq is excluded now. Is this not a problem ? I feel that this leads to the flag check being skipped now, however this behaviour was followed in the above mentioned commit. Requesting anyone to please let me know why this behaviour was followed and how the flag checks are retained here, or if they are really skipped, why is it okay to skip them here ? Regards, Chinmay Chinmay Rath (1): target/ppc: Move VMX integer add/sub saturate insns to decodetree. target/ppc/helper.h | 24 +-- target/ppc/insn32.decode| 16 ++ target/ppc/int_helper.c | 22 +-- target/ppc/translate/vmx-impl.c.inc | 242 target/ppc/translate/vmx-ops.c.inc | 19 +-- 5 files changed, 224 insertions(+), 99 deletions(-) -- 2.39.3
[PATCH 1/1] target/ppc: Move VMX integer add/sub saturate insns to decodetree.
Moving the following instructions to decodetree specification : v{add,sub}{u,s}{b,h,w}s : VX-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/helper.h | 24 +-- target/ppc/insn32.decode| 16 ++ target/ppc/int_helper.c | 22 +-- target/ppc/translate/vmx-impl.c.inc | 242 target/ppc/translate/vmx-ops.c.inc | 19 +-- 5 files changed, 224 insertions(+), 99 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index f397ef459a..2963e48fdc 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -200,18 +200,18 @@ DEF_HELPER_FLAGS_3(vsro, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vsrv, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vslv, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VPRTYBQ, TCG_CALL_NO_RWG, void, avr, avr, i32) -DEF_HELPER_FLAGS_5(vaddsbs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vaddshs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vaddsws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsubsbs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsubshs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsubsws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vaddubs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vadduhs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vadduws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsububs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsubuhs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) -DEF_HELPER_FLAGS_5(vsubuws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDSBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDSHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDSWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBSBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBSHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBSWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDUBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDUHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VADDUWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBUBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBUHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) +DEF_HELPER_FLAGS_5(VSUBUWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) DEF_HELPER_FLAGS_3(VADDUQM, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_4(VADDECUQ, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) DEF_HELPER_FLAGS_4(VADDEUQM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 847a2f4356..d7d77eaa99 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -697,6 +697,14 @@ VADDCUW 000100 . . . 0011000@VX VADDCUQ 000100 . . . 0010100@VX VADDUQM 000100 . . . 001@VX +VADDSBS 000100 . . . 011@VX +VADDSHS 000100 . . . 0110100@VX +VADDSWS 000100 . . . 0111000@VX + +VADDUBS 000100 . . . 010@VX +VADDUHS 000100 . . . 0100100@VX +VADDUWS 000100 . . . 0101000@VX + VADDEUQM000100 . . . . 00 @VA VADDECUQ000100 . . . . 01 @VA @@ -704,6 +712,14 @@ VSUBCUW 000100 . . . 1011000@VX VSUBCUQ 000100 . . . 1010100@VX VSUBUQM 000100 . . . 101@VX +VSUBSBS 000100 . . . 111@VX +VSUBSHS 000100 . . . 1110100@VX +VSUBSWS 000100 . . . 000@VX + +VSUBUBS 000100 . . . 110@VX +VSUBUHS 000100 . . . 1100100@VX +VSUBUWS 000100 . . . 1101000@VX + VSUBECUQ000100 . . . . 11 @VA VSUBEUQM000100 . . . . 10 @VA diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 0a5c3e78a4..aec2d3d4ec 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -541,7 +541,7 @@ VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); } #define VARITHSAT_DO(name, op, optype, cvt, element
Re: [PATCH 2/3] target/ppc: Fix embedded memory barriers
On 5/1/24 18:34, Nicholas Piggin wrote: Memory barriers are supposed to do something on BookE systems, these were probably just missed during MTTCG enablement, maybe no targets support SMP. Either way, add proper BookE implementations. Signed-off-by: Nicholas Piggin Reviewed-by: Chinmay Rath --- target/ppc/translate/misc-impl.c.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/ppc/translate/misc-impl.c.inc b/target/ppc/translate/misc-impl.c.inc index f58bf8b848..9226467f81 100644 --- a/target/ppc/translate/misc-impl.c.inc +++ b/target/ppc/translate/misc-impl.c.inc @@ -34,8 +34,7 @@ static bool trans_SYNC(DisasContext *ctx, arg_X_sync *a) */ if (!(ctx->insns_flags & PPC_MEM_SYNC)) { if (ctx->insns_flags & PPC_BOOKE) { -/* msync replaces sync on 440, interpreted as nop */ -/* XXX: this also catches e200 */ +tcg_gen_mb(bar | TCG_BAR_SC); return true; } @@ -80,6 +79,7 @@ static bool trans_EIEIO(DisasContext *ctx, arg_EIEIO *a) if (!(ctx->insns_flags & PPC_MEM_EIEIO)) { if ((ctx->insns_flags & PPC_BOOKE) || (ctx->insns_flags2 & PPC2_BOOKE206)) { +tcg_gen_mb(bar | TCG_BAR_SC); return true; } return false;
Re: [PATCH 3/3] target/ppc: Add ISA v3.1 variants of sync instruction
On 5/1/24 18:34, Nicholas Piggin wrote: POWER10 adds a new field to sync for store-store syncs, and some new variants of the existing syncs that include persistent memory. Implement the store-store syncs and plwsync/phwsync. Signed-off-by: Nicholas Piggin Reviewed-by: Chinmay Rath --- target/ppc/insn32.decode | 6 ++-- target/ppc/translate/misc-impl.c.inc | 41 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 6b89804b15..a180380750 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -1001,7 +1001,7 @@ MSGSYNC 01 - - - 1101110110 - # Memory Barrier Instructions -_sync l -@X_sync .. ... l:2 . . .. . _sync -SYNC01 --- .. - - 1001010110 - @X_sync +_sync l sc +@X_sync .. .. l:3 ... sc:2 . .. . _sync +SYNC01 -- ... --- .. - 1001010110 - @X_sync EIEIO 01 - - - 1101010110 - diff --git a/target/ppc/translate/misc-impl.c.inc b/target/ppc/translate/misc-impl.c.inc index 9226467f81..3467b49d0d 100644 --- a/target/ppc/translate/misc-impl.c.inc +++ b/target/ppc/translate/misc-impl.c.inc @@ -25,6 +25,7 @@ static bool trans_SYNC(DisasContext *ctx, arg_X_sync *a) { TCGBar bar = TCG_MO_ALL; uint32_t l = a->l; +uint32_t sc = a->sc; /* * BookE uses the msync mnemonic. This means hwsync, except in the @@ -46,20 +47,36 @@ static bool trans_SYNC(DisasContext *ctx, arg_X_sync *a) gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); } -if ((l == 1) && (ctx->insns_flags2 & PPC2_MEM_LWSYNC)) { -bar = TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST; -} - /* - * We may need to check for a pending TLB flush. - * - * We do this on ptesync (l == 2) on ppc64 and any sync on ppc32. - * - * Additionally, this can only happen in kernel mode however so - * check MSR_PR as well. + * In ISA v3.1, the L field grew one bit. Mask that out to ignore it in + * older processors. It also added the SC field, zero this to ignore + * it too. */ -if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) { -gen_check_tlb_flush(ctx, true); +if (!(ctx->insns_flags2 & PPC2_ISA310)) { +l &= 0x3; +sc = 0; +} + +if (sc) { +/* Store syncs [stsync, stcisync, stncisync]. These ignore L. */ +bar = TCG_MO_ST_ST; +} else { +if (((l == 1) && (ctx->insns_flags2 & PPC2_MEM_LWSYNC)) || (l == 5)) { +/* lwsync, or plwsync on POWER10 and later */ +bar = TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST; +} + +/* + * We may need to check for a pending TLB flush. + * + * We do this on ptesync (l == 2) on ppc64 and any sync on ppc32. + * + * Additionally, this can only happen in kernel mode however so + * check MSR_PR as well. + */ +if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) { +gen_check_tlb_flush(ctx, true); +} } tcg_gen_mb(bar | TCG_BAR_SC);
Re: [PATCH 1/3] target/ppc: Move sync instructions to decodetree
On 5/1/24 18:34, Nicholas Piggin wrote: This tries to faithfully reproduce the odd BookE logic. It does change the handling of non-zero reserved bits outside the defined fields from being illegal to being ignored, which the architecture specifies ot help with backward compatibility of new fields. The existing behaviour causes illegal instruction exceptions when using new POWER10 sync variants that add new fields, after this the instructions are accepted and are implemented as supersets of the new behaviour, as intended. Signed-off-by: Nicholas Piggin Reviewed-by: Chinmay Rath --- target/ppc/insn32.decode | 7 ++ target/ppc/translate.c | 102 +--- target/ppc/translate/misc-impl.c.inc | 135 +++ 3 files changed, 144 insertions(+), 100 deletions(-) create mode 100644 target/ppc/translate/misc-impl.c.inc diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index eada59f59f..6b89804b15 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -998,3 +998,10 @@ MSGSND 01 - - . 0011001110 - @X_rb MSGCLRP 01 - - . 0010101110 - @X_rb MSGSNDP 01 - - . 0010001110 - @X_rb MSGSYNC 01 - - - 1101110110 - + +# Memory Barrier Instructions + +_sync l +@X_sync .. ... l:2 . . .. . _sync +SYNC01 --- .. - - 1001010110 - @X_sync +EIEIO 01 - - - 1101010110 - diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 93ffec787c..bb2cabae10 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -3423,59 +3423,6 @@ static void gen_stswx(DisasContext *ctx) gen_helper_stsw(tcg_env, t0, t1, t2); } -/***Memory synchronisation ***/ -/* eieio */ -static void gen_eieio(DisasContext *ctx) -{ -TCGBar bar = TCG_MO_ALL; - -/* - * eieio has complex semanitcs. It provides memory ordering between - * operations in the set: - * - loads from CI memory. - * - stores to CI memory. - * - stores to WT memory. - * - * It separately also orders memory for operations in the set: - * - stores to cacheble memory. - * - * It also serializes instructions: - * - dcbt and dcbst. - * - * It separately serializes: - * - tlbie and tlbsync. - * - * And separately serializes: - * - slbieg, slbiag, and slbsync. - * - * The end result is that CI memory ordering requires TCG_MO_ALL - * and it is not possible to special-case more relaxed ordering for - * cacheable accesses. TCG_BAR_SC is required to provide this - * serialization. - */ - -/* - * POWER9 has a eieio instruction variant using bit 6 as a hint to - * tell the CPU it is a store-forwarding barrier. - */ -if (ctx->opcode & 0x200) { -/* - * ISA says that "Reserved fields in instructions are ignored - * by the processor". So ignore the bit 6 on non-POWER9 CPU but - * as this is not an instruction software should be using, - * complain to the user. - */ -if (!(ctx->insns_flags2 & PPC2_ISA300)) { -qemu_log_mask(LOG_GUEST_ERROR, "invalid eieio using bit 6 at @" - TARGET_FMT_lx "\n", ctx->cia); -} else { -bar = TCG_MO_ST_LD; -} -} - -tcg_gen_mb(bar | TCG_BAR_SC); -} - #if !defined(CONFIG_USER_ONLY) static inline void gen_check_tlb_flush(DisasContext *ctx, bool global) { @@ -3877,31 +3824,6 @@ static void gen_stqcx_(DisasContext *ctx) } #endif /* defined(TARGET_PPC64) */ -/* sync */ -static void gen_sync(DisasContext *ctx) -{ -TCGBar bar = TCG_MO_ALL; -uint32_t l = (ctx->opcode >> 21) & 3; - -if ((l == 1) && (ctx->insns_flags2 & PPC2_MEM_LWSYNC)) { -bar = TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST; -} - -/* - * We may need to check for a pending TLB flush. - * - * We do this on ptesync (l == 2) on ppc64 and any sync pn ppc32. - * - * Additionally, this can only happen in kernel mode however so - * check MSR_PR as well. - */ -if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) { -gen_check_tlb_flush(ctx, true); -} - -tcg_gen_mb(bar | TCG_BAR_SC); -} - /* wait */ static void gen_wait(DisasContext *ctx) { @@ -6010,23 +5932,6 @@ static void gen_dlmzb(DisasContext *ctx) cpu_gpr[rS(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], t0); } -/* mbar replaces eieio on 440 */ -static void gen_mbar(DisasContext *ctx) -{ -/* interpreted as no-op */ -} - -/* msync replaces sync on 440 */ -static void gen_msync_4xx(DisasContext *ctx) -{ -
[PATCH 2/3] target/ppc: Move VMX integer logical instructions to decodetree.
Moving the following instructions to decodetree specification: v{and, andc, nand, or, orc, nor, xor, eqv} : VX-form The changes were verified by validating that the tcp ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/insn32.decode| 11 +++ target/ppc/translate/vmx-impl.c.inc | 22 ++ target/ppc/translate/vmx-ops.c.inc | 15 --- 3 files changed, 21 insertions(+), 27 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 27655f0d9e..e00bc05381 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -574,6 +574,17 @@ VCMPNEZW000100 . . . . 011111 @VC VCMPSQ 000100 ... -- . . 0010101 @VX_bf VCMPUQ 000100 ... -- . . 0010001 @VX_bf +## Vector Integer Logical Instructions + +VAND000100 . . . 1000100@VX +VANDC 000100 . . . 10001000100@VX +VNAND 000100 . . . 1011100@VX +VOR 000100 . . . 1001100@VX +VORC000100 . . . 10101000100@VX +VNOR000100 . . . 1010100@VX +VXOR000100 . . . 10011000100@VX +VEQV000100 . . . 1101100@VX + ## Vector Integer Average Instructions VAVGSB 000100 . . . 1010010@VX diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 4d5e743cfe..cefe04127c 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -205,16 +205,6 @@ static void glue(gen_, name)(DisasContext *ctx) \ 16, 16); \ } -/* Logical operations */ -GEN_VXFORM_V(vand, MO_64, tcg_gen_gvec_and, 2, 16); -GEN_VXFORM_V(vandc, MO_64, tcg_gen_gvec_andc, 2, 17); -GEN_VXFORM_V(vor, MO_64, tcg_gen_gvec_or, 2, 18); -GEN_VXFORM_V(vxor, MO_64, tcg_gen_gvec_xor, 2, 19); -GEN_VXFORM_V(vnor, MO_64, tcg_gen_gvec_nor, 2, 20); -GEN_VXFORM_V(veqv, MO_64, tcg_gen_gvec_eqv, 2, 26); -GEN_VXFORM_V(vnand, MO_64, tcg_gen_gvec_nand, 2, 22); -GEN_VXFORM_V(vorc, MO_64, tcg_gen_gvec_orc, 2, 21); - #define GEN_VXFORM(name, opc2, opc3)\ static void glue(gen_, name)(DisasContext *ctx) \ { \ @@ -727,6 +717,16 @@ TRANS_FLAGS(ALTIVEC, VRLH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_rotlv) TRANS_FLAGS(ALTIVEC, VRLW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_rotlv) TRANS_FLAGS2(ALTIVEC_207, VRLD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_rotlv) +/* Logical operations */ +TRANS_FLAGS(ALTIVEC, VAND, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_and); +TRANS_FLAGS(ALTIVEC, VANDC, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_andc); +TRANS_FLAGS(ALTIVEC, VOR, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_or); +TRANS_FLAGS(ALTIVEC, VXOR, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_xor); +TRANS_FLAGS(ALTIVEC, VNOR, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_nor); +TRANS_FLAGS2(ALTIVEC_207, VEQV, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_eqv); +TRANS_FLAGS2(ALTIVEC_207, VNAND, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_nand); +TRANS_FLAGS2(ALTIVEC_207, VORC, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_orc); + static TCGv_vec do_vrl_mask_vec(unsigned vece, TCGv_vec vrb) { TCGv_vec t0 = tcg_temp_new_vec_matching(vrb), @@ -3331,8 +3331,6 @@ TRANS_FLAGS2(ISA310, VMODUQ, do_vx_helper, gen_helper_VMODUQ) #undef DIVS64 #undef DIVU64 -#undef GEN_VX_LOGICAL -#undef GEN_VX_LOGICAL_207 #undef GEN_VXFORM #undef GEN_VXFORM_207 #undef GEN_VXFORM_DUAL diff --git a/target/ppc/translate/vmx-ops.c.inc b/target/ppc/translate/vmx-ops.c.inc index 672fba3796..80c5217749 100644 --- a/target/ppc/translate/vmx-ops.c.inc +++ b/target/ppc/translate/vmx-ops.c.inc @@ -1,18 +1,3 @@ -#define GEN_VX_LOGICAL(name, tcg_op, opc2, opc3)\ -GEN_HANDLER(name, 0x04, opc2, opc3, 0x, PPC_ALTIVEC) - -#define GEN_VX_LOGICAL_207(name, tcg_op, opc2, opc3) \ -GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x, PPC_NONE, PPC2_ALTIVEC_207) - -GEN_VX_LOGICAL(vand, tcg_gen_and_i64, 2, 16), -GEN_VX_LOGICAL(vandc, tcg_gen_andc_i64, 2, 17), -GEN_VX_LOGICAL(vor, tcg_gen_or_i64, 2, 18), -GEN_VX_LOGICAL(vxor, tcg_gen_xor_i64, 2, 19), -GEN_VX_LOGICAL(vnor, tcg_gen_nor_i64, 2, 20), -GEN_VX_LOGICAL_207(veqv, tcg_gen_eqv_i64, 2, 26), -GEN_VX_LOGICAL_207(vnand, tcg_gen_nand_i64, 2, 22), -GEN_VX_LOGICAL_207(vorc, tcg_gen_orc_i64, 2, 21), - #define GEN_VXFORM(name, opc2, opc3)\ GEN_HANDLER(name, 0x04, opc2, opc3, 0x, PPC_ALTIVEC) -- 2.39.3
[PATCH 1/3] target/ppc: Move VMX storage access instructions to decodetree
Moving the following instructions to decodetree specification : {l,st}ve{b,h,w}x, {l,st}v{x,xl}, lvs{l,r}: X-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/helper.h | 12 +- target/ppc/insn32.decode| 17 +++ target/ppc/mem_helper.c | 12 +- target/ppc/translate.c | 2 - target/ppc/translate/vmx-impl.c.inc | 221 target/ppc/translate/vmx-ops.c.inc | 19 --- 6 files changed, 120 insertions(+), 163 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 86f97ee1e7..f397ef459a 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -267,12 +267,12 @@ DEF_HELPER_5(VMSUMSHS, void, env, avr, avr, avr, avr) DEF_HELPER_FLAGS_5(VMLADDUHM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) DEF_HELPER_FLAGS_2(mtvscr, TCG_CALL_NO_RWG, void, env, i32) DEF_HELPER_FLAGS_1(mfvscr, TCG_CALL_NO_RWG, i32, env) -DEF_HELPER_3(lvebx, void, env, avr, tl) -DEF_HELPER_3(lvehx, void, env, avr, tl) -DEF_HELPER_3(lvewx, void, env, avr, tl) -DEF_HELPER_3(stvebx, void, env, avr, tl) -DEF_HELPER_3(stvehx, void, env, avr, tl) -DEF_HELPER_3(stvewx, void, env, avr, tl) +DEF_HELPER_3(LVEBX, void, env, avr, tl) +DEF_HELPER_3(LVEHX, void, env, avr, tl) +DEF_HELPER_3(LVEWX, void, env, avr, tl) +DEF_HELPER_3(STVEBX, void, env, avr, tl) +DEF_HELPER_3(STVEHX, void, env, avr, tl) +DEF_HELPER_3(STVEWX, void, env, avr, tl) #if defined(TARGET_PPC64) DEF_HELPER_4(lxvl, void, env, tl, vsr, tl) DEF_HELPER_4(lxvll, void, env, tl, vsr, tl) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index eada59f59f..27655f0d9e 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -526,6 +526,23 @@ DSCRIQ 11 . . .. 001100010 . @Z22_tap_sh_rc VPMSUMD 000100 . . . 10011001000@VX +## Vector Load/Store Instructions + +LVEBX 01 . . . 000111 - @X +LVEHX 01 . . . 100111 - @X +LVEWX 01 . . . 0001000111 - @X +LVX 01 . . . 0001100111 - @X +LVXL01 . . . 0101100111 - @X + +STVEBX 01 . . . 001111 - @X +STVEHX 01 . . . 0010100111 - @X +STVEWX 01 . . . 0011000111 - @X +STVX01 . . . 0011100111 - @X +STVXL 01 . . . 000111 - @X + +LVSL01 . . . 000110 - @X +LVSR01 . . . 100110 - @X + ## Vector Integer Instructions VCMPEQUB000100 . . . . 000110 @VC diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index ea7e8443a8..f88155ad45 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -404,9 +404,9 @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg, } \ } #define I(x) (x) -LVE(lvebx, cpu_ldub_data_ra, I, u8) -LVE(lvehx, cpu_lduw_data_ra, bswap16, u16) -LVE(lvewx, cpu_ldl_data_ra, bswap32, u32) +LVE(LVEBX, cpu_ldub_data_ra, I, u8) +LVE(LVEHX, cpu_lduw_data_ra, bswap16, u16) +LVE(LVEWX, cpu_ldl_data_ra, bswap32, u32) #undef I #undef LVE @@ -432,9 +432,9 @@ LVE(lvewx, cpu_ldl_data_ra, bswap32, u32) } \ } #define I(x) (x) -STVE(stvebx, cpu_stb_data_ra, I, u8) -STVE(stvehx, cpu_stw_data_ra, bswap16, u16) -STVE(stvewx, cpu_stl_data_ra, bswap32, u32) +STVE(STVEBX, cpu_stb_data_ra, I, u8) +STVE(STVEHX, cpu_stw_data_ra, bswap16, u16) +STVE(STVEWX, cpu_stl_data_ra, bswap32, u32) #undef I #undef LVE diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 93ffec787c..cde3b88b98 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -6640,8 +6640,6 @@ GEN_HANDLER2_E(icbt_440, "icbt", 0x1F, 0x16, 0x00, 0x03E1, PPC_BOOKE, PPC2_BOOKE206), GEN_HANDLER2(icbt_440, "icbt", 0x1F, 0x06, 0x08, 0x03E1, PPC_440_SPEC), -GEN_HANDLER(lvsl, 0x1f, 0x06, 0x00, 0x0001, PPC_ALTIVEC), -GEN_HANDLER(lvsr, 0x1f, 0x06, 0x01, 0x0001, PPC_ALTIVEC), GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC), GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff, PPC_ALTIVEC), #if defined(TARGET_PPC64) diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index b56e615c24..4d5e743cfe 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -14,125 +14,88 @@ static inline TCGv_ptr gen_avr_ptr(int reg) return r; } -#define GEN_VR_
[PATCH 0/3] target/ppc: Moving VMX insns to decodetree
Moving VMX instructions of the following types to decodetree specification : storage access, integer logical & integer max/min. Chinmay Rath (3): target/ppc: Move VMX storage access instructions to decodetree target/ppc: Move VMX integer logical instructions to decodetree target/ppc: Move VMX integer max/min instructions to decodetree. target/ppc/helper.h | 12 +- target/ppc/insn32.decode| 50 + target/ppc/mem_helper.c | 12 +- target/ppc/translate.c | 2 - target/ppc/translate/vmx-impl.c.inc | 280 target/ppc/translate/vmx-ops.c.inc | 50 - 6 files changed, 184 insertions(+), 222 deletions(-) -- 2.39.3
[PATCH 3/3] target/ppc: Move VMX integer max/min instructions to decodetree.
Moving the following instructions to decodetree specification : v{max, min}{u, s}{b, h, w, d} : VX-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/insn32.decode| 22 + target/ppc/translate/vmx-impl.c.inc | 37 - target/ppc/translate/vmx-ops.c.inc | 16 - 3 files changed, 43 insertions(+), 32 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index e00bc05381..847a2f4356 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -717,6 +717,28 @@ VEXTSD2Q000100 . 11011 . 1100010 @VX_tb VNEGD 000100 . 00111 . 1100010@VX_tb VNEGW 000100 . 00110 . 1100010@VX_tb +## Vector Integer Maximum/Minimum Instructions + +VMAXUB 000100 . . . 010@VX +VMAXUH 000100 . . . 110@VX +VMAXUW 000100 . . . 0001010@VX +VMAXUD 000100 . . . 0001110@VX + +VMAXSB 000100 . . . 0010010@VX +VMAXSH 000100 . . . 0010110@VX +VMAXSW 000100 . . . 0011010@VX +VMAXSD 000100 . . . 0011110@VX + +VMINUB 000100 . . . 0100010@VX +VMINUH 000100 . . . 0100110@VX +VMINUW 000100 . . . 0101010@VX +VMINUD 000100 . . . 0101110@VX + +VMINSB 000100 . . . 0110010@VX +VMINSH 000100 . . . 0110110@VX +VMINSW 000100 . . . 0111010@VX +VMINSD 000100 . . . 010@VX + ## Vector Mask Manipulation Instructions MTVSRBM 000100 . 1 . 1100110@VX_tb diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index cefe04127c..8084af75cc 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -342,22 +342,6 @@ GEN_VXFORM_V(vsububm, MO_8, tcg_gen_gvec_sub, 0, 16); GEN_VXFORM_V(vsubuhm, MO_16, tcg_gen_gvec_sub, 0, 17); GEN_VXFORM_V(vsubuwm, MO_32, tcg_gen_gvec_sub, 0, 18); GEN_VXFORM_V(vsubudm, MO_64, tcg_gen_gvec_sub, 0, 19); -GEN_VXFORM_V(vmaxub, MO_8, tcg_gen_gvec_umax, 1, 0); -GEN_VXFORM_V(vmaxuh, MO_16, tcg_gen_gvec_umax, 1, 1); -GEN_VXFORM_V(vmaxuw, MO_32, tcg_gen_gvec_umax, 1, 2); -GEN_VXFORM_V(vmaxud, MO_64, tcg_gen_gvec_umax, 1, 3); -GEN_VXFORM_V(vmaxsb, MO_8, tcg_gen_gvec_smax, 1, 4); -GEN_VXFORM_V(vmaxsh, MO_16, tcg_gen_gvec_smax, 1, 5); -GEN_VXFORM_V(vmaxsw, MO_32, tcg_gen_gvec_smax, 1, 6); -GEN_VXFORM_V(vmaxsd, MO_64, tcg_gen_gvec_smax, 1, 7); -GEN_VXFORM_V(vminub, MO_8, tcg_gen_gvec_umin, 1, 8); -GEN_VXFORM_V(vminuh, MO_16, tcg_gen_gvec_umin, 1, 9); -GEN_VXFORM_V(vminuw, MO_32, tcg_gen_gvec_umin, 1, 10); -GEN_VXFORM_V(vminud, MO_64, tcg_gen_gvec_umin, 1, 11); -GEN_VXFORM_V(vminsb, MO_8, tcg_gen_gvec_smin, 1, 12); -GEN_VXFORM_V(vminsh, MO_16, tcg_gen_gvec_smin, 1, 13); -GEN_VXFORM_V(vminsw, MO_32, tcg_gen_gvec_smin, 1, 14); -GEN_VXFORM_V(vminsd, MO_64, tcg_gen_gvec_smin, 1, 15); GEN_VXFORM(vmrghb, 6, 0); GEN_VXFORM(vmrghh, 6, 1); GEN_VXFORM(vmrghw, 6, 2); @@ -727,6 +711,27 @@ TRANS_FLAGS2(ALTIVEC_207, VEQV, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_eqv); TRANS_FLAGS2(ALTIVEC_207, VNAND, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_nand); TRANS_FLAGS2(ALTIVEC_207, VORC, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_orc); +/* Integer Max/Min operations */ +TRANS_FLAGS(ALTIVEC, VMAXUB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_umax); +TRANS_FLAGS(ALTIVEC, VMAXUH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_umax); +TRANS_FLAGS(ALTIVEC, VMAXUW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_umax); +TRANS_FLAGS2(ALTIVEC_207, VMAXUD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_umax); + +TRANS_FLAGS(ALTIVEC, VMAXSB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_smax); +TRANS_FLAGS(ALTIVEC, VMAXSH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_smax); +TRANS_FLAGS(ALTIVEC, VMAXSW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_smax); +TRANS_FLAGS2(ALTIVEC_207, VMAXSD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_smax); + +TRANS_FLAGS(ALTIVEC, VMINUB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_umin); +TRANS_FLAGS(ALTIVEC, VMINUH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_umin); +TRANS_FLAGS(ALTIVEC, VMINUW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_umin); +TRANS_FLAGS2(ALTIVEC_207, VMINUD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_umin); + +TRANS_FLAGS(ALTIVEC, VMINSB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_smin); +TRANS_FLAGS(ALTIVEC, VMINSH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_smin); +TRANS_FLAGS(ALTIVEC, VMINSW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_smin); +TRANS_FLAGS2(ALTIVEC_207, VMINSD
[PATCH v2 4/8] target/ppc: Move neg, darn, mod{sw, uw} to decodetree.
Moving the below instructions to decodetree specification : neg[o][.] : XO-form mod{sw, uw}, darn : X-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson --- target/ppc/helper.h| 4 +- target/ppc/insn32.decode | 8 target/ppc/int_helper.c| 4 +- target/ppc/translate.c | 56 -- target/ppc/translate/fixedpoint-impl.c.inc | 44 + 5 files changed, 56 insertions(+), 60 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 1fc8b7c5fd..09d0b0074b 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -71,8 +71,8 @@ DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl) DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_3(srad, tl, env, tl, tl) -DEF_HELPER_FLAGS_0(darn32, TCG_CALL_NO_RWG, tl) -DEF_HELPER_FLAGS_0(darn64, TCG_CALL_NO_RWG, tl) +DEF_HELPER_FLAGS_0(DARN32, TCG_CALL_NO_RWG, tl) +DEF_HELPER_FLAGS_0(DARN64, TCG_CALL_NO_RWG, tl) #endif DEF_HELPER_FLAGS_1(cntlsw32, TCG_CALL_NO_RWG_SE, i32, i32) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index bfccebd9a7..654f55471b 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -187,6 +187,9 @@ _ara @X_a.. ra:3 .. . . .. . _a +_tl rt l +@X_tl .. rt:5 ... l:2 . .. . _tl + rt ra rb oe:bool rc:bool @XO .. rt:5 ra:5 rb:5 oe:1 . rc:1 @@ -367,6 +370,11 @@ DIVWU 01 . . . . 111001011 . @XO DIVWE 01 . . . . 110101011 . @XO DIVWEU 01 . . . . 110001011 . @XO +MODSW 01 . . . 111011 - @X +MODUW 01 . . . 011011 - @X +DARN01 . --- .. - 100011 - @X_tl +NEG 01 . . - . 001101000 . @XO_ta + ## Fixed-Point Logical Instructions CFUGED 01 . . . 0011011100 - @X diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index dc1f72ff38..bc25d5b062 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -171,7 +171,7 @@ uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) /* * Return a random number. */ -uint64_t helper_darn32(void) +uint64_t helper_DARN32(void) { Error *err = NULL; uint32_t ret; @@ -186,7 +186,7 @@ uint64_t helper_darn32(void) return ret; } -uint64_t helper_darn64(void) +uint64_t helper_DARN64(void) { Error *err = NULL; uint64_t ret; diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 0a1d1d63b3..436fcfc645 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1878,17 +1878,6 @@ static inline void gen_op_arith_modw(DisasContext *ctx, TCGv ret, TCGv arg1, } } -#define GEN_INT_ARITH_MODW(name, opc3, sign)\ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ -gen_op_arith_modw(ctx, cpu_gpr[rD(ctx->opcode)],\ - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \ - sign);\ -} - -GEN_INT_ARITH_MODW(moduw, 0x08, 0); -GEN_INT_ARITH_MODW(modsw, 0x18, 1); - #if defined(TARGET_PPC64) static inline void gen_op_arith_modd(DisasContext *ctx, TCGv ret, TCGv arg1, TCGv arg2, int sign) @@ -2055,27 +2044,6 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1, } } -/* neg neg. nego nego. */ -static inline void gen_op_arith_neg(DisasContext *ctx, bool compute_ov) -{ -TCGv zero = tcg_constant_tl(0); -gen_op_arith_subf(ctx, cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], - zero, 0, 0, compute_ov, Rc(ctx->opcode)); -} - -static void gen_neg(DisasContext *ctx) -{ -tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); -if (unlikely(Rc(ctx->opcode))) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -static void gen_nego(DisasContext *ctx) -{ -gen_op_arith_neg(ctx, 1); -} - /***Integer logical***/ #define GEN_LOGICAL2(name, tcg_op, opc, type) \ static void glue(gen_, name)(DisasContext *ctx) \ @@ -2401,24 +2369,6 @@ static void gen_cnttzd(DisasContext *ctx) gen_set_R
[PATCH v2 3/8] target/ppc: Move divw[u, e, eu] instructions to decodetree.
Moving the following instructions to decodetree specification : divw[u, e, eu][o][.] : XO-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson --- target/ppc/helper.h| 4 +-- target/ppc/insn32.decode | 5 target/ppc/int_helper.c| 4 +-- target/ppc/translate.c | 31 -- target/ppc/translate/fixedpoint-impl.c.inc | 24 + 5 files changed, 33 insertions(+), 35 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 86f97ee1e7..1fc8b7c5fd 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -55,8 +55,8 @@ DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32) DEF_HELPER_4(divdeu, i64, env, i64, i64, i32) DEF_HELPER_4(divde, i64, env, i64, i64, i32) #endif -DEF_HELPER_4(divweu, tl, env, tl, tl, i32) -DEF_HELPER_4(divwe, tl, env, tl, tl, i32) +DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32) +DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32) DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 0184680db8..bfccebd9a7 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -362,6 +362,11 @@ MULLWO 01 . . . 1 011101011 . @XO_tab_rc MULHW 01 . . . - 001001011 . @XO_tab_rc MULHWU 01 . . . - 01011 . @XO_tab_rc +DIVW01 . . . . 01011 . @XO +DIVWU 01 . . . . 111001011 . @XO +DIVWE 01 . . . . 110101011 . @XO +DIVWEU 01 . . . . 110001011 . @XO + ## Fixed-Point Logical Instructions CFUGED 01 . . . 0011011100 - @X diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 0a5c3e78a4..dc1f72ff38 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -44,7 +44,7 @@ static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) } } -target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, +target_ulong helper_DIVWEU(CPUPPCState *env, target_ulong ra, target_ulong rb, uint32_t oe) { uint64_t rt = 0; @@ -71,7 +71,7 @@ target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, return (target_ulong)rt; } -target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, +target_ulong helper_DIVWE(CPUPPCState *env, target_ulong ra, target_ulong rb, uint32_t oe) { int64_t rt = 0; diff --git a/target/ppc/translate.c b/target/ppc/translate.c index be7d807e3c..0a1d1d63b3 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1778,21 +1778,6 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, gen_set_Rc0(ctx, ret); } } -/* Div functions */ -#define GEN_INT_ARITH_DIVW(name, opc3, sign, compute_ov) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ -gen_op_arith_divw(ctx, cpu_gpr[rD(ctx->opcode)], \ - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \ - sign, compute_ov, Rc(ctx->opcode)); \ -} -/* divwu divwu. divwuo divwuo. */ -GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0); -GEN_INT_ARITH_DIVW(divwuo, 0x1E, 0, 1); -/* divw divw. divwo divwo. */ -GEN_INT_ARITH_DIVW(divw, 0x0F, 1, 0); -GEN_INT_ARITH_DIVW(divwo, 0x1F, 1, 1); - /* div[wd]eu[o][.] */ #define GEN_DIVE(name, hlpr, compute_ov) \ static void gen_##name(DisasContext *ctx) \ @@ -1805,11 +1790,6 @@ static void gen_##name(DisasContext *ctx) \ } \ } -GEN_DIVE(divweu, divweu, 0); -GEN_DIVE(divweuo, divweu, 1); -GEN_DIVE(divwe, divwe, 0); -GEN_DIVE(divweo, divwe, 1); - #if defined(TARGET_PPC64) static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1, TCGv arg2, int sign, int compute_ov) @@ -6562,17 +6542,6 @@ GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x, PPC_NONE, GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x, PPC_NONE, PPC2_ISA300), #endif -#undef GEN_INT_ARITH_DIVW -#define GEN_INT_ARITH_DIVW(name, opc3, sign, compute_ov) \ -GEN_HANDLER(name, 0x1F, 0x0B, opc3, 0x, PPC_INTEGER) -GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0), -GEN_INT_ARITH_DIVW(divwuo
[PATCH v2 0/8] target/ppc: Move fixed-point insns to
Moving all fixed-point instructions of the following type to decodetree specification : arithmetic, compare, trap, select and logical. Change log : v2 : Implemented code clean-ups as per comments by Richard in patches 1/8, 5/8 and 7/8 of v1. v1 : https://lore.kernel.org/qemu-devel/20240416063927.99428-1-ra...@linux.ibm.com/ Chinmay Rath (8): target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions to decodetree. target/ppc: Make divw[u] handler method decodetree compatible. target/ppc: Move divw[u, e, eu] instructions to decodetree. target/ppc: Move neg, darn, mod{sw, uw} to decodetree. target/ppc: Move multiply fixed-point insns (64-bit operands) to decodetree. target/ppc: Move div/mod fixed-point insns (64 bits operands) to decodetree. target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree. target/ppc: Move logical fixed-point instructions to decodetree. target/ppc/helper.h| 26 +- target/ppc/insn32.decode | 93 +++ target/ppc/excp_helper.c | 4 +- target/ppc/int_helper.c| 24 +- target/ppc/translate.c | 841 + target/ppc/translate/fixedpoint-impl.c.inc | 631 6 files changed, 762 insertions(+), 857 deletions(-) -- 2.39.3
[PATCH v2 8/8] target/ppc: Move logical fixed-point instructions to decodetree.
Moving the below instructions to decodetree specification : andi[s]., {ori, xori}[s]: D-form {and, andc, nand, or, orc, nor, xor, eqv}[.], exts{b, h, w}[.], cnt{l, t}z{w, d}[.], popcnt{b, w, d}, prty{w, d}, cmp, bpermd : X-form With this patch, all the fixed-point logical instructions have been moved to decodetree. The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson --- target/ppc/helper.h| 8 +- target/ppc/insn32.decode | 38 +++ target/ppc/int_helper.c| 10 +- target/ppc/translate.c | 359 - target/ppc/translate/fixedpoint-impl.c.inc | 269 +++ 5 files changed, 316 insertions(+), 368 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 05f7ab5f6e..b53abd853a 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -58,8 +58,8 @@ DEF_HELPER_4(DIVDE, i64, env, i64, i64, i32) DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32) DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32) -DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl) +DEF_HELPER_FLAGS_1(POPCNTB, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_2(CMPB, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_3(sraw, tl, env, tl, tl) DEF_HELPER_FLAGS_2(CFUGED, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(PDEPD, TCG_CALL_NO_RWG_SE, i64, i64, i64) @@ -68,8 +68,8 @@ DEF_HELPER_FLAGS_1(CDTBCD, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl) #if defined(TARGET_PPC64) DEF_HELPER_FLAGS_2(CMPEQB, TCG_CALL_NO_RWG_SE, i32, tl, tl) -DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_1(POPCNTW, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_2(BPERMD, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_3(srad, tl, env, tl, tl) DEF_HELPER_FLAGS_0(DARN32, TCG_CALL_NO_RWG, tl) DEF_HELPER_FLAGS_0(DARN64, TCG_CALL_NO_RWG, tl) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 80a7bb1872..3175810190 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -29,6 +29,9 @@ rt ra si:int64_t @D .. rt:5 ra:5 si:s16 +_ui rt ra ui:uint64_t +@D_ui .. rt:5 ra:5 ui:16 _ui + _bf bf l:bool ra imm @D_bfs .. bf:3 . l:1 ra:5 imm:s16 _bf @D_bfu .. bf:3 . l:1 ra:5 imm:16 _bf @@ -96,6 +99,9 @@ _sa rs ra @X_sa .. rs:5 ra:5 . .. . _sa +_sa_rcrs ra rc +@X_sa_rc.. rs:5 ra:5 . .. rc:1 _sa_rc + %x_frtp 22:4 !function=times_2 %x_frap 17:4 !function=times_2 %x_frbp 12:4 !function=times_2 @@ -410,6 +416,38 @@ MODUD 01 . . . 011001 - @X ## Fixed-Point Logical Instructions +ANDI_ 011100 . . @D_ui +ANDIS_ 011101 . . @D_ui +ORI 011000 . . @D_ui +ORIS011001 . . @D_ui +XORI011010 . . @D_ui +XORIS 011011 . . @D_ui + +AND 01 . . . 011100 . @X_rc +ANDC01 . . . 00 . @X_rc +NAND01 . . . 0111011100 . @X_rc +OR 01 . . . 011000 . @X_rc +ORC 01 . . . 0110011100 . @X_rc +NOR 01 . . . 000100 . @X_rc +XOR 01 . . . 010000 . @X_rc +EQV 01 . . . 0100011100 . @X_rc +CMPB01 . . . 011100 . @X_rc + +EXTSB 01 . . - 1110111010 . @X_sa_rc +EXTSH 01 . . - 1110011010 . @X_sa_rc +EXTSW 01 . . - 011010 . @X_sa_rc +CNTLZW 01 . . - 011010 . @X_sa_rc +CNTTZW 01 . . - 111010 . @X_sa_rc +CNTLZD 01 . . - 111010 . @X_sa_rc +CNTTZD 01 . . - 1000111010 . @X_sa_rc +POPCNTB 01 . . - 000010 . @X_sa_rc + +POPCNTW 01 . . - 010010 - @X_sa +POPCNTD 01 . . - 011010 - @X_sa +PRTYW 01 . . - 0010011010 - @X_sa +PRTYD 01 . . - 0010111010 - @X_sa + +BPERMD 01
[PATCH v2 6/8] target/ppc: Move div/mod fixed-point insns (64 bits operands) to decodetree.
Moving the below instructions to decodetree specification : divd[u, e, eu][o][.]: XO-form mod{sd, ud} : X-form With this patch, all the fixed-point arithmetic instructions have been moved to decodetree. The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Also, remaned do_divwe method in fixedpoint-impl.c.inc to do_dive because it is now used to divide doubleword operands as well, and not just words. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson --- target/ppc/helper.h| 4 +- target/ppc/insn32.decode | 8 +++ target/ppc/int_helper.c| 4 +- target/ppc/translate.c | 65 ++ target/ppc/translate/fixedpoint-impl.c.inc | 29 +- 5 files changed, 42 insertions(+), 68 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 09d0b0074b..e862bdceaf 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -52,8 +52,8 @@ DEF_HELPER_FLAGS_2(icbiep, TCG_CALL_NO_WG, void, env, tl) DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32) #if defined(TARGET_PPC64) -DEF_HELPER_4(divdeu, i64, env, i64, i64, i32) -DEF_HELPER_4(divde, i64, env, i64, i64, i32) +DEF_HELPER_4(DIVDEU, i64, env, i64, i64, i32) +DEF_HELPER_4(DIVDE, i64, env, i64, i64, i32) #endif DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32) DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 61c59bbde0..509961023b 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -384,6 +384,14 @@ MADDLD 000100 . . . . 110011 @VA MADDHD 000100 . . . . 11 @VA MADDHDU 000100 . . . . 110001 @VA +DIVD01 . . . . 01001 . @XO +DIVDU 01 . . . . 111001001 . @XO +DIVDE 01 . . . . 110101001 . @XO +DIVDEU 01 . . . . 110001001 . @XO + +MODSD 01 . . . 111001 - @X +MODUD 01 . . . 011001 - @X + ## Fixed-Point Logical Instructions CFUGED 01 . . . 0011011100 - @X diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index bc25d5b062..585c2b65d3 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -101,7 +101,7 @@ target_ulong helper_DIVWE(CPUPPCState *env, target_ulong ra, target_ulong rb, #if defined(TARGET_PPC64) -uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) +uint64_t helper_DIVDEU(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) { uint64_t rt = 0; int overflow = 0; @@ -120,7 +120,7 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) return rt; } -uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) +uint64_t helper_DIVDE(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) { uint64_t rt = 0; int64_t ra = (int64_t)rau; diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 8fa125d0ae..8900da85e5 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1778,21 +1778,11 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, gen_set_Rc0(ctx, ret); } } -/* div[wd]eu[o][.] */ -#define GEN_DIVE(name, hlpr, compute_ov) \ -static void gen_##name(DisasContext *ctx) \ -{ \ -TCGv_i32 t0 = tcg_constant_i32(compute_ov); \ -gen_helper_##hlpr(cpu_gpr[rD(ctx->opcode)], tcg_env, \ - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], t0); \ -if (unlikely(Rc(ctx->opcode) != 0)) { \ -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); \ -} \ -} #if defined(TARGET_PPC64) -static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1, - TCGv arg2, int sign, int compute_ov) +static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, + TCGv arg1, TCGv arg2, bool sign, + bool compute_ov, bool compute_rc0) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -1824,29 +1814,10 @@ static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); } -if (unlikely(Rc(ctx->opcode) != 0)) { +if (unlikely(compute_rc0)) { ge
[PATCH v2 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree.
Moving the following instructions to decodetree specification : cmp{rb, eqb}, t{w, d} : X-form t{w, d}i: D-form isel: A-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Also for CMPRB, following review comments : Replaced repetition of arithmetic right shifting (tcg_gen_shri_i32) followed by extraction of last 8 bits (tcg_gen_ext8u_i32) with extraction of the required bits using offsets (tcg_gen_extract_i32). Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson --- target/ppc/helper.h| 6 +- target/ppc/insn32.decode | 16 +++ target/ppc/excp_helper.c | 4 +- target/ppc/int_helper.c| 2 +- target/ppc/translate.c | 133 + target/ppc/translate/fixedpoint-impl.c.inc | 120 +++ 6 files changed, 145 insertions(+), 136 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index e862bdceaf..05f7ab5f6e 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -1,8 +1,8 @@ DEF_HELPER_FLAGS_3(raise_exception_err, TCG_CALL_NO_WG, noreturn, env, i32, i32) DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32) -DEF_HELPER_FLAGS_4(tw, TCG_CALL_NO_WG, void, env, tl, tl, i32) +DEF_HELPER_FLAGS_4(TW, TCG_CALL_NO_WG, void, env, tl, tl, i32) #if defined(TARGET_PPC64) -DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32) +DEF_HELPER_FLAGS_4(TD, TCG_CALL_NO_WG, void, env, tl, tl, i32) #endif DEF_HELPER_4(HASHST, void, env, tl, tl, tl) DEF_HELPER_4(HASHCHK, void, env, tl, tl, tl) @@ -67,7 +67,7 @@ DEF_HELPER_FLAGS_2(PEXTD, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_1(CDTBCD, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl) #if defined(TARGET_PPC64) -DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl) +DEF_HELPER_FLAGS_2(CMPEQB, TCG_CALL_NO_RWG_SE, i32, tl, tl) DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_3(srad, tl, env, tl, tl) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 509961023b..80a7bb1872 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -23,6 +23,9 @@ _tb frt frb rc:bool @A_tb .. frt:5 . frb:5 . . rc:1 _tb +_tab_bc rt ra rb bc +@A_tab_bc .. rt:5 ra:5 rb:5 bc:5 . . _tab_bc + rt ra si:int64_t @D .. rt:5 ra:5 si:s16 @@ -331,6 +334,19 @@ CMP 01 ... - . . . 00 - @X_bfl CMPL01 ... - . . . 10 - @X_bfl CMPI001011 ... - . . @D_bfs CMPLI 001010 ... - . . @D_bfu +CMPRB 01 ... - . . . 001100 - @X_bfl +CMPEQB 01 ... -- . . 001110 - @X_bf + +### Fixed-Point Trap Instructions + +TW 01 . . . 000100 - @X +TD 01 . . . 0001000100 - @X +TWI 11 . . @D +TDI 10 . . @D + +### Fixed-Point Select Instruction + +ISEL01 . . . . 0 - @A_tab_bc ### Fixed-Point Arithmetic Instructions diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 674c05a2ce..79dd9b82cf 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -2750,7 +2750,7 @@ void helper_rfmci(CPUPPCState *env) } #endif /* !CONFIG_USER_ONLY */ -void helper_tw(CPUPPCState *env, target_ulong arg1, target_ulong arg2, +void helper_TW(CPUPPCState *env, target_ulong arg1, target_ulong arg2, uint32_t flags) { if (!likely(!(((int32_t)arg1 < (int32_t)arg2 && (flags & 0x10)) || @@ -2764,7 +2764,7 @@ void helper_tw(CPUPPCState *env, target_ulong arg1, target_ulong arg2, } #ifdef TARGET_PPC64 -void helper_td(CPUPPCState *env, target_ulong arg1, target_ulong arg2, +void helper_TD(CPUPPCState *env, target_ulong arg1, target_ulong arg2, uint32_t flags) { if (!likely(!(((int64_t)arg1 < (int64_t)arg2 && (flags & 0x10)) || diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 585c2b65d3..d12dcc28e1 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -159,7 +159,7 @@ uint64_t helper_DIVDE(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) /* When you XOR the pattern and there is a match, that byte will be zero */ #define hasvalue(x, n) (haszero((x) ^ pattern(n))) -uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) +uint32_t helper_CMPEQB(target_ulong ra, target_ulong
[PATCH v2 1/8] target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions to decodetree.
Moving the following instructions to decodetree specification : mulli : D-form mul{lw, lwo, hw, hwu}[.]: XO-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Also cleaned up code for mullw[o][.] as per review comments while keeping the logic of the tcg ops generated semantically same. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson --- target/ppc/insn32.decode | 9 +++ target/ppc/translate.c | 89 -- target/ppc/translate/fixedpoint-impl.c.inc | 66 3 files changed, 75 insertions(+), 89 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index eada59f59f..0184680db8 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -193,6 +193,9 @@ _ta rt ra oe:bool rc:bool @XO_ta .. rt:5 ra:5 . oe:1 . rc:1 _ta +_tab_rc rt ra rb rc:bool +@XO_tab_rc .. rt:5 ra:5 rb:5 . . rc:1 _tab_rc + %xx_xt 0:1 21:5 %xx_xb 1:1 11:5 %xx_xa 2:1 16:5 @@ -353,6 +356,12 @@ SUBFE 01 . . . . 010001000 . @XO SUBFME 01 . . - . 011101000 . @XO_ta SUBFZE 01 . . - . 011001000 . @XO_ta +MULLI 000111 . . @D +MULLW 01 . . . 0 011101011 . @XO_tab_rc +MULLWO 01 . . . 1 011101011 . @XO_tab_rc +MULHW 01 . . . - 001001011 . @XO_tab_rc +MULHWU 01 . . . - 01011 . @XO_tab_rc + ## Fixed-Point Logical Instructions CFUGED 01 . . . 0011011100 - @X diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 93ffec787c..c45547a770 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1948,90 +1948,6 @@ GEN_INT_ARITH_MODD(modud, 0x08, 0); GEN_INT_ARITH_MODD(modsd, 0x18, 1); #endif -/* mulhw mulhw. */ -static void gen_mulhw(DisasContext *ctx) -{ -TCGv_i32 t0 = tcg_temp_new_i32(); -TCGv_i32 t1 = tcg_temp_new_i32(); - -tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); -tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); -tcg_gen_muls2_i32(t0, t1, t0, t1); -tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1); -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -/* mulhwu mulhwu. */ -static void gen_mulhwu(DisasContext *ctx) -{ -TCGv_i32 t0 = tcg_temp_new_i32(); -TCGv_i32 t1 = tcg_temp_new_i32(); - -tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); -tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); -tcg_gen_mulu2_i32(t0, t1, t0, t1); -tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1); -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -/* mullw mullw. */ -static void gen_mullw(DisasContext *ctx) -{ -#if defined(TARGET_PPC64) -TCGv_i64 t0, t1; -t0 = tcg_temp_new_i64(); -t1 = tcg_temp_new_i64(); -tcg_gen_ext32s_tl(t0, cpu_gpr[rA(ctx->opcode)]); -tcg_gen_ext32s_tl(t1, cpu_gpr[rB(ctx->opcode)]); -tcg_gen_mul_i64(cpu_gpr[rD(ctx->opcode)], t0, t1); -#else -tcg_gen_mul_i32(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], -cpu_gpr[rB(ctx->opcode)]); -#endif -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -/* mullwo mullwo. */ -static void gen_mullwo(DisasContext *ctx) -{ -TCGv_i32 t0 = tcg_temp_new_i32(); -TCGv_i32 t1 = tcg_temp_new_i32(); - -tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); -tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); -tcg_gen_muls2_i32(t0, t1, t0, t1); -#if defined(TARGET_PPC64) -tcg_gen_concat_i32_i64(cpu_gpr[rD(ctx->opcode)], t0, t1); -#else -tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], t0); -#endif - -tcg_gen_sari_i32(t0, t0, 31); -tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1); -tcg_gen_extu_i32_tl(cpu_ov, t0); -if (is_isa300(ctx)) { -tcg_gen_mov_tl(cpu_ov32, cpu_ov); -} -tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); - -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -/* mulli */ -static void gen_mulli(DisasContext *ctx) -{ -tcg_gen_muli_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], -SIMM(ctx->opcode)); -} - #if defined(TARGET_PPC64) /* mulhd mulhd. */ static void gen_mulhd(DisasContext *ctx) @@ -6430,11 +6346,6 @@ GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x0060, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x0001, PPC_NONE, PPC2_I
[PATCH v2 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) to decodetree.
Moving the following instructions to decodetree : mul{ld, ldo, hd, hdu}[.]: XO-form madd{hd, hdu, ld} : VA-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson --- target/ppc/insn32.decode | 9 ++ target/ppc/translate.c | 101 - target/ppc/translate/fixedpoint-impl.c.inc | 85 + 3 files changed, 94 insertions(+), 101 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 654f55471b..61c59bbde0 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -375,6 +375,15 @@ MODUW 01 . . . 011011 - @X DARN01 . --- .. - 100011 - @X_tl NEG 01 . . - . 001101000 . @XO_ta +MULLD 01 . . . 0 011101001 . @XO_tab_rc +MULLDO 01 . . . 1 011101001 . @XO_tab_rc +MULHD 01 . . . - 001001001 . @XO_tab_rc +MULHDU 01 . . . - 01001 . @XO_tab_rc + +MADDLD 000100 . . . . 110011 @VA +MADDHD 000100 . . . . 11 @VA +MADDHDU 000100 . . . . 110001 @VA + ## Fixed-Point Logical Instructions CFUGED 01 . . . 0011011100 - @X diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 436fcfc645..8fa125d0ae 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1918,62 +1918,6 @@ GEN_INT_ARITH_MODD(modud, 0x08, 0); GEN_INT_ARITH_MODD(modsd, 0x18, 1); #endif -#if defined(TARGET_PPC64) -/* mulhd mulhd. */ -static void gen_mulhd(DisasContext *ctx) -{ -TCGv lo = tcg_temp_new(); -tcg_gen_muls2_tl(lo, cpu_gpr[rD(ctx->opcode)], - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -/* mulhdu mulhdu. */ -static void gen_mulhdu(DisasContext *ctx) -{ -TCGv lo = tcg_temp_new(); -tcg_gen_mulu2_tl(lo, cpu_gpr[rD(ctx->opcode)], - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -/* mulld mulld. */ -static void gen_mulld(DisasContext *ctx) -{ -tcg_gen_mul_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -/* mulldo mulldo. */ -static void gen_mulldo(DisasContext *ctx) -{ -TCGv_i64 t0 = tcg_temp_new_i64(); -TCGv_i64 t1 = tcg_temp_new_i64(); - -tcg_gen_muls2_i64(t0, t1, cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); -tcg_gen_mov_i64(cpu_gpr[rD(ctx->opcode)], t0); - -tcg_gen_sari_i64(t0, t0, 63); -tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1); -if (is_isa300(ctx)) { -tcg_gen_mov_tl(cpu_ov32, cpu_ov); -} -tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); - -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} -#endif - /* Common subf function */ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1, TCGv arg2, bool add_ca, bool compute_ca, @@ -5884,36 +5828,6 @@ static void gen_icbt_440(DisasContext *ctx) */ } -#if defined(TARGET_PPC64) -static void gen_maddld(DisasContext *ctx) -{ -TCGv_i64 t1 = tcg_temp_new_i64(); - -tcg_gen_mul_i64(t1, cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); -tcg_gen_add_i64(cpu_gpr[rD(ctx->opcode)], t1, cpu_gpr[rC(ctx->opcode)]); -} - -/* maddhd maddhdu */ -static void gen_maddhd_maddhdu(DisasContext *ctx) -{ -TCGv_i64 lo = tcg_temp_new_i64(); -TCGv_i64 hi = tcg_temp_new_i64(); -TCGv_i64 t1 = tcg_temp_new_i64(); - -if (Rc(ctx->opcode)) { -tcg_gen_mulu2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); -tcg_gen_movi_i64(t1, 0); -} else { -tcg_gen_muls2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); -tcg_gen_sari_i64(t1, cpu_gpr[rC(ctx->opcode)], 63); -} -tcg_gen_add2_i64(t1, cpu_gpr[rD(ctx->opcode)], lo, hi, - cpu_gpr[rC(ctx->opcode)], t1); -} -#endif /* defined(TARGET_PPC64) */ - static void gen_tbegin(DisasContext *ctx) { if (unlikely(!ctx->tm_enabled)) { @@ -6277,9 +6191,6 @@ GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x0060, PPC_NONE, PPC2_ISA300), GEN_HAN
[PATCH v2 2/8] target/ppc: Make divw[u] handler method decodetree compatible.
The handler methods for divw[u] instructions internally use Rc(ctx->opcode), for extraction of Rc field of instructions, which poses a problem if we move the above said instructions to decodetree, as the ctx->opcode field is not popluated in decodetree. Hence, making it decodetree compatible, so that the mentioned insns can be safely move to decodetree specs. Signed-off-by: Chinmay Rath Reviewed-by: Richard Henderson --- target/ppc/translate.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index c45547a770..be7d807e3c 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1738,8 +1738,9 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, } } -static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1, - TCGv arg2, int sign, int compute_ov) +static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, + TCGv arg1, TCGv arg2, bool sign, + bool compute_ov, bool compute_rc0) { TCGv_i32 t0 = tcg_temp_new_i32(); TCGv_i32 t1 = tcg_temp_new_i32(); @@ -1773,7 +1774,7 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); } -if (unlikely(Rc(ctx->opcode) != 0)) { +if (unlikely(compute_rc0)) { gen_set_Rc0(ctx, ret); } } @@ -1783,7 +1784,7 @@ static void glue(gen_, name)(DisasContext *ctx) \ { \ gen_op_arith_divw(ctx, cpu_gpr[rD(ctx->opcode)], \ cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \ - sign, compute_ov); \ + sign, compute_ov, Rc(ctx->opcode)); \ } /* divwu divwu. divwuo divwuo. */ GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0); -- 2.39.3
Re: [PATCH 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) to decodetree.
Hi Richard, On 4/20/24 21:21, Richard Henderson wrote: On 4/19/24 02:25, Chinmay Rath wrote: Hi Richard, On 4/17/24 00:06, Richard Henderson wrote: On 4/15/24 23:39, Chinmay Rath wrote: +static bool trans_MADDHDU(DisasContext *ctx, arg_MADDHDU *a) ... + tcg_gen_movi_i64(t1, 0); Drop the movi. + tcg_gen_add2_i64(t1, cpu_gpr[a->vrt], lo, hi, cpu_gpr[a->rc], t1); Use tcg_constant_i64(0). Looks like tcg_gen_add2_i64 internally modifies the passed arguments, hence constant is not expected. However, I tried using tcg_constant_i64(0) as suggested but this leads to an assert failure : qemu-system-ppc64: ../tcg/tcg.c:5071: tcg_reg_alloc_op: Assertion `!temp_readonly(ts)' failed. You misunderstood my suggestion. TCGv_i64 t1 = tcg_temp_new_i64(); tcg_gen_add2_i64(t1, cpu_gpr[vrt], lo, hi, cpu_gpr[a->rc], tcg_constantant_i64(0)); Thank you for the clarification. Will add this to v2. Regards, Chinmay r~
Re: [PATCH 6/8] target/ppc: Move div/mod fixed-point insns (64 bits operands) to decodetree.
On 4/17/24 00:08, Richard Henderson wrote: On 4/15/24 23:39, Chinmay Rath wrote: Moving the below instructions to decodetree specification : divd[u, e, eu][o][.] : XO-form mod{sd, ud} : X-form With this patch, all the fixed-point arithmetic instructions have been moved to decodetree. The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Also, remaned do_divwe method in fixedpoint-impl.c.inc to do_dive because it is now used to divide doubleword operands as well, and not just words. Signed-off-by: Chinmay Rath --- target/ppc/helper.h | 4 +- target/ppc/insn32.decode | 8 +++ target/ppc/int_helper.c | 4 +- target/ppc/translate.c | 65 ++ target/ppc/translate/fixedpoint-impl.c.inc | 29 +- 5 files changed, 42 insertions(+), 68 deletions(-) Reviewed-by: Richard Henderson Thanks, Chinmay r~
Re: [PATCH 8/8] target/ppc: Move logical fixed-point instructions to decodetree.
On 4/17/24 01:05, Richard Henderson wrote: On 4/15/24 23:39, Chinmay Rath wrote: Moving the below instructions to decodetree specification : andi[s]., {ori, xori}[s] : D-form {and, andc, nand, or, orc, nor, xor, eqv}[.], exts{b, h, w}[.], cnt{l, t}z{w, d}[.], popcnt{b, w, d}, prty{w, d}, cmp, bpermd : X-form With this patch, all the fixed-point logical instructions have been moved to decodetree. The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/helper.h | 8 +- target/ppc/insn32.decode | 38 +++ target/ppc/int_helper.c | 10 +- target/ppc/translate.c | 359 - target/ppc/translate/fixedpoint-impl.c.inc | 269 +++ 5 files changed, 316 insertions(+), 368 deletions(-) Reviewed-by: Richard Henderson Thanks, Chinmay r~
Re: [PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree.
Hi Richard, On 4/17/24 00:50, Richard Henderson wrote: On 4/15/24 23:39, Chinmay Rath wrote: Moving the following instructions to decodetree specification : cmp{rb, eqb}, t{w, d} : X-form t{w, d}i : D-form isel : A-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath A faithful reorg of the existing code, so, Reviewed-by: Richard Henderson Thank you. Notes for improvement: +static bool trans_CMPRB(DisasContext *ctx, arg_CMPRB *a) +{ + TCGv_i32 src1 = tcg_temp_new_i32(); + TCGv_i32 src2 = tcg_temp_new_i32(); + TCGv_i32 src2lo = tcg_temp_new_i32(); + TCGv_i32 src2hi = tcg_temp_new_i32(); + TCGv_i32 crf = cpu_crf[a->bf]; + + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + tcg_gen_trunc_tl_i32(src1, cpu_gpr[a->ra]); + tcg_gen_trunc_tl_i32(src2, cpu_gpr[a->rb]); + + tcg_gen_andi_i32(src1, src1, 0xFF); + tcg_gen_ext8u_i32(src2lo, src2); + tcg_gen_shri_i32(src2, src2, 8); + tcg_gen_ext8u_i32(src2hi, src2); tcg_gen_extract_i32(src2hi, src2, 8, 8); + + tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1); + tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi); + tcg_gen_and_i32(crf, src2lo, src2hi); + + if (a->l) { + tcg_gen_shri_i32(src2, src2, 8); + tcg_gen_ext8u_i32(src2lo, src2); tcg_gen_extract_i32(src2lo, src2, 16, 8); + tcg_gen_shri_i32(src2, src2, 8); + tcg_gen_ext8u_i32(src2hi, src2); tcg_gen_extract_i32(src2hi, src2, 24, 8); Will update the above in v2. Will implement the below improvements for trap insns as a separate patch later. +/* + * Fixed-Point Trap Instructions + */ + +static bool trans_TW(DisasContext *ctx, arg_TW *a) +{ + TCGv_i32 t0; + + if (check_unconditional_trap(ctx, a->rt)) { + return true; + } + t0 = tcg_constant_i32(a->rt); + gen_helper_TW(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0); + return true; +} + +static bool trans_TWI(DisasContext *ctx, arg_TWI *a) +{ + TCGv t0; + TCGv_i32 t1; + + if (check_unconditional_trap(ctx, a->rt)) { + return true; + } + t0 = tcg_constant_tl(a->si); + t1 = tcg_constant_i32(a->rt); + gen_helper_TW(tcg_env, cpu_gpr[a->ra], t0, t1); + return true; +} + +static bool trans_TD(DisasContext *ctx, arg_TD *a) +{ + TCGv_i32 t0; + + REQUIRE_64BIT(ctx); + if (check_unconditional_trap(ctx, a->rt)) { + return true; + } + t0 = tcg_constant_i32(a->rt); + gen_helper_TD(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0); + return true; +} + +static bool trans_TDI(DisasContext *ctx, arg_TDI *a) +{ + TCGv t0; + TCGv_i32 t1; + + REQUIRE_64BIT(ctx); + if (check_unconditional_trap(ctx, a->rt)) { + return true; + } + t0 = tcg_constant_tl(a->si); + t1 = tcg_constant_i32(a->rt); + gen_helper_TD(tcg_env, cpu_gpr[a->ra], t0, t1); + return true; +} See target/sparc/translate.c, delay_exception, for a method of implementing compare-and-trap inline with no inline branch penalty. static void do_conditional_trap(DisasContext *ctx, unsigned to, TCGv a, TCGv b) { static const TCGCond ucond[8] = { TCG_COND_NEVER, TCG_COND_GTU, TCG_COND_LTU, TCG_COND_NE, TCG_COND_EQ, TCG_COND_GEU, TCG_COND_LEU, TCG_COND_ALWAYS, }; static const TCGCond scond[8] = { TCG_COND_NEVER, TCG_COND_EQ, TCG_COND_GT, TCG_COND_GE, TCG_COND_LT, TCG_COND_LE, TCG_COND_NE, TCG_COND_ALWAYS, }; TCGCond uc = ucond[to & 7]; TCGCond sc = scond[to >> 2]; /* There is overlap with EQ; we may not need both comparisons. */ if (!(to & 0x18)) { sc = TCG_COND_NEVER; } else if (!(to & 0x03)) { uc = TCG_COND_NEVER; } if (uc == TCG_COND_ALWAYS || sc == TCG_COND_ALWAYS) { unconditional trap; return true; } if (uc == TCG_COND_NEVER && sc == TCG_COND_NEVER) { return true; } e = delay_exception(ctx, POWERPC_EXCP_TRAP); if (uc != TCG_COND_NEVER) { tcg_gen_brcond_tl(uc, a, b, e->lab); } if (sc != TCG_COND_NEVER) { tcg_gen_brcond_tl(sc, a, b, e->lab); } return true; } bool trans_TW(...) { TCGv a = tcg_temp_new(); TCGv b = tcg_temp_new(); /* Note that consistent sign extensions work for unsigned comparisons. */ tcg_gen_exts_i32_tl(a, ra); tcg_gen_exts_i32_tl(b, rb); return do_conditional_trap(ctx, to, a, b); } etc. Thanks, Chinmay r~
Re: [PATCH 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) to decodetree.
Hi Richard, On 4/17/24 00:06, Richard Henderson wrote: On 4/15/24 23:39, Chinmay Rath wrote: +static bool trans_MADDHDU(DisasContext *ctx, arg_MADDHDU *a) ... + tcg_gen_movi_i64(t1, 0); Drop the movi. + tcg_gen_add2_i64(t1, cpu_gpr[a->vrt], lo, hi, cpu_gpr[a->rc], t1); Use tcg_constant_i64(0). Looks like tcg_gen_add2_i64 internally modifies the passed arguments, hence constant is not expected. However, I tried using tcg_constant_i64(0) as suggested but this leads to an assert failure : qemu-system-ppc64: ../tcg/tcg.c:5071: tcg_reg_alloc_op: Assertion `!temp_readonly(ts)' failed. So I hope it is fine to keep the code change as is for now. Let me know if you have any suggestions. Thanks, Chinmay With that, Reviewed-by: Richard Henderson r~
Re: [PATCH 4/8] target/ppc: Move neg, darn, mod{sw, uw} to decodetree.
On 4/16/24 23:55, Richard Henderson wrote: On 4/15/24 23:39, Chinmay Rath wrote: Moving the below instructions to decodetree specification : neg[o][.] : XO-form mod{sw, uw}, darn : X-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/helper.h | 4 +- target/ppc/insn32.decode | 8 target/ppc/int_helper.c | 4 +- target/ppc/translate.c | 56 -- target/ppc/translate/fixedpoint-impl.c.inc | 44 + 5 files changed, 56 insertions(+), 60 deletions(-) Reviewed-by: Richard Henderson Thanks, Chinmay r~
Re: [PATCH 3/8] target/ppc: Move divw[u, e, eu] instructions to decodetree.
On 4/16/24 23:49, Richard Henderson wrote: On 4/15/24 23:39, Chinmay Rath wrote: Moving the following instructions to decodetree specification : divw[u, e, eu][o][.] : XO-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/helper.h | 4 +-- target/ppc/insn32.decode | 5 target/ppc/int_helper.c | 4 +-- target/ppc/translate.c | 31 -- target/ppc/translate/fixedpoint-impl.c.inc | 24 + 5 files changed, 33 insertions(+), 35 deletions(-) Reviewed-by: Richard Henderson Thanks, Chinmay r~
Re: [PATCH 2/8] target/ppc: Make divw[u] handler method decodetree compatible.
Hi Richard, On 4/16/24 23:27, Richard Henderson wrote: On 4/15/24 23:39, Chinmay Rath wrote: The handler methods for divw[u] instructions internally use Rc(ctx->opcode), for extraction of Rc field of instructions, which poses a problem if we move the above said instructions to decodetree, as the ctx->opcode field is not popluated in decodetree. Hence, making it decodetree compatible, so that the mentioned insns can be safely move to decodetree specs. Signed-off-by: Chinmay Rath --- target/ppc/translate.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) Reviewed-by: Richard Henderson Thank you. +static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, + TCGv arg1, TCGv arg2, bool sign, + bool compute_ov, bool compute_rc0) Could drop the inline at the same time. Let the compiler decide. I kept inline as is, as there are multiple gen_op_* routines with inline and if necessary we could consider removing inline for all of them together in a separate patch : grep inline target/ppc/translate.c | grep gen_op static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf) static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf) static inline void gen_op_cmp32(TCGv arg0, TCGv arg1, int s, int crf) static inline void gen_op_cmpi32(TCGv arg0, target_ulong arg1, int s, int crf) static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0, static inline void gen_op_arith_compute_ca32(DisasContext *ctx, static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, static inline void gen_op_arith_modw(DisasContext *ctx, TCGv ret, TCGv arg1, static inline void gen_op_arith_modd(DisasContext *ctx, TCGv ret, TCGv arg1, static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1, static inline void gen_op_mfspr(DisasContext *ctx) Thanks, Chinmay r~
Re: [PATCH 1/8] target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions to decodetree.
Hi Richard, On 4/16/24 23:26, Richard Henderson wrote: On 4/15/24 23:39, Chinmay Rath wrote: Moving the following instructions to decodetree specification : mulli : D-form mul{lw, lwo, hw, hwu}[.] : XO-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/insn32.decode | 9 +++ target/ppc/translate.c | 89 -- target/ppc/translate/fixedpoint-impl.c.inc | 71 + 3 files changed, 80 insertions(+), 89 deletions(-) This is an accurate reorg of the current code, so Reviewed-by: Richard Henderson Thank you. However, as follow-up, the code generation could be cleaned up: +static bool trans_MULLW(DisasContext *ctx, arg_MULLW *a) +{ +#if defined(TARGET_PPC64) + TCGv_i64 t0, t1; + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_ext32s_tl(t0, cpu_gpr[a->ra]); + tcg_gen_ext32s_tl(t1, cpu_gpr[a->rb]); + tcg_gen_mul_i64(cpu_gpr[a->rt], t0, t1); +#else + tcg_gen_mul_i32(cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb]); +#endif + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->rt]); + } + return true; +} Without ifdefs: TCGv t0 = tcg_temp_new(); TCGv t1 = tcg_temp_new(); tcg_gen_ext32s_tl(t0, ra); tcg_gen_ext32s_tl(t1, rb); tcg_gen_mul_tl(rt, t0, t1); For ppc32, ext32s_tl will turn into a mov, which will be optimized away. So ideal code generation for both modes. +static bool trans_MULLWO(DisasContext *ctx, arg_MULLWO *a) +{ + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(t0, cpu_gpr[a->ra]); + tcg_gen_trunc_tl_i32(t1, cpu_gpr[a->rb]); + tcg_gen_muls2_i32(t0, t1, t0, t1); +#if defined(TARGET_PPC64) + tcg_gen_concat_i32_i64(cpu_gpr[a->rt], t0, t1); +#else + tcg_gen_mov_i32(cpu_gpr[a->rt], t0); +#endif + + tcg_gen_sari_i32(t0, t0, 31); + tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1); + tcg_gen_extu_i32_tl(cpu_ov, t0); Usually hosts need to create the full 64-bit product and then break it apart for tcg_gen_muls2_i32, so split followed immediately by concatenate isn't great. TCGv t0 = tcg_temp_new(); TCGv t1 = tcg_temp_new(); #ifdef TARGET_PPC64 tcg_gen_ext32s_i64(t0, ra); tcg_gen_ext32s_i64(t1, rb); tcg_gen_mul_i64(rt, t0, t1); tcg_gen_sextract_i64(t0, rt, 31, 1); tcg_gen_sari_i64(t1, rt, 32); #else tcg_gen_muls2_i32(rt, t1, ra, rb); tcg_gen_sari_i32(t0, rt, 31); #endif tcg_gen_setcond_tl(TCG_COND_NE, cpu_ov, t0, t1); Sure, will update in v2. Thanks, Chinmay + if (is_isa300(ctx)) { + tcg_gen_mov_tl(cpu_ov32, cpu_ov); + } + tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); + + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->rt]); + } + return true; +} r~
[PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree.
Moving the following instructions to decodetree specification : cmp{rb, eqb}, t{w, d} : X-form t{w, d}i: D-form isel: A-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/helper.h| 6 +- target/ppc/insn32.decode | 16 +++ target/ppc/excp_helper.c | 4 +- target/ppc/int_helper.c| 2 +- target/ppc/translate.c | 133 + target/ppc/translate/fixedpoint-impl.c.inc | 123 +++ 6 files changed, 148 insertions(+), 136 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index e862bdceaf..05f7ab5f6e 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -1,8 +1,8 @@ DEF_HELPER_FLAGS_3(raise_exception_err, TCG_CALL_NO_WG, noreturn, env, i32, i32) DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32) -DEF_HELPER_FLAGS_4(tw, TCG_CALL_NO_WG, void, env, tl, tl, i32) +DEF_HELPER_FLAGS_4(TW, TCG_CALL_NO_WG, void, env, tl, tl, i32) #if defined(TARGET_PPC64) -DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32) +DEF_HELPER_FLAGS_4(TD, TCG_CALL_NO_WG, void, env, tl, tl, i32) #endif DEF_HELPER_4(HASHST, void, env, tl, tl, tl) DEF_HELPER_4(HASHCHK, void, env, tl, tl, tl) @@ -67,7 +67,7 @@ DEF_HELPER_FLAGS_2(PEXTD, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_1(CDTBCD, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl) #if defined(TARGET_PPC64) -DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl) +DEF_HELPER_FLAGS_2(CMPEQB, TCG_CALL_NO_RWG_SE, i32, tl, tl) DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_3(srad, tl, env, tl, tl) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 509961023b..80a7bb1872 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -23,6 +23,9 @@ _tb frt frb rc:bool @A_tb .. frt:5 . frb:5 . . rc:1 _tb +_tab_bc rt ra rb bc +@A_tab_bc .. rt:5 ra:5 rb:5 bc:5 . . _tab_bc + rt ra si:int64_t @D .. rt:5 ra:5 si:s16 @@ -331,6 +334,19 @@ CMP 01 ... - . . . 00 - @X_bfl CMPL01 ... - . . . 10 - @X_bfl CMPI001011 ... - . . @D_bfs CMPLI 001010 ... - . . @D_bfu +CMPRB 01 ... - . . . 001100 - @X_bfl +CMPEQB 01 ... -- . . 001110 - @X_bf + +### Fixed-Point Trap Instructions + +TW 01 . . . 000100 - @X +TD 01 . . . 0001000100 - @X +TWI 11 . . @D +TDI 10 . . @D + +### Fixed-Point Select Instruction + +ISEL01 . . . . 0 - @A_tab_bc ### Fixed-Point Arithmetic Instructions diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 674c05a2ce..79dd9b82cf 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -2750,7 +2750,7 @@ void helper_rfmci(CPUPPCState *env) } #endif /* !CONFIG_USER_ONLY */ -void helper_tw(CPUPPCState *env, target_ulong arg1, target_ulong arg2, +void helper_TW(CPUPPCState *env, target_ulong arg1, target_ulong arg2, uint32_t flags) { if (!likely(!(((int32_t)arg1 < (int32_t)arg2 && (flags & 0x10)) || @@ -2764,7 +2764,7 @@ void helper_tw(CPUPPCState *env, target_ulong arg1, target_ulong arg2, } #ifdef TARGET_PPC64 -void helper_td(CPUPPCState *env, target_ulong arg1, target_ulong arg2, +void helper_TD(CPUPPCState *env, target_ulong arg1, target_ulong arg2, uint32_t flags) { if (!likely(!(((int64_t)arg1 < (int64_t)arg2 && (flags & 0x10)) || diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 585c2b65d3..d12dcc28e1 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -159,7 +159,7 @@ uint64_t helper_DIVDE(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) /* When you XOR the pattern and there is a match, that byte will be zero */ #define hasvalue(x, n) (haszero((x) ^ pattern(n))) -uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) +uint32_t helper_CMPEQB(target_ulong ra, target_ulong rb) { return hasvalue(rb, ra) ? CRF_GT : 0; } diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 8900da85e5..98e642b19a 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1564,66 +1564,6 @@ static inline void gen_set_Rc
[PATCH 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) to decodetree.
Moving the following instructions to decodetree : mul{ld, ldo, hd, hdu}[.]: XO-form madd{hd, hdu, ld} : VA-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/insn32.decode | 9 ++ target/ppc/translate.c | 101 - target/ppc/translate/fixedpoint-impl.c.inc | 85 + 3 files changed, 94 insertions(+), 101 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 654f55471b..61c59bbde0 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -375,6 +375,15 @@ MODUW 01 . . . 011011 - @X DARN01 . --- .. - 100011 - @X_tl NEG 01 . . - . 001101000 . @XO_ta +MULLD 01 . . . 0 011101001 . @XO_tab_rc +MULLDO 01 . . . 1 011101001 . @XO_tab_rc +MULHD 01 . . . - 001001001 . @XO_tab_rc +MULHDU 01 . . . - 01001 . @XO_tab_rc + +MADDLD 000100 . . . . 110011 @VA +MADDHD 000100 . . . . 11 @VA +MADDHDU 000100 . . . . 110001 @VA + ## Fixed-Point Logical Instructions CFUGED 01 . . . 0011011100 - @X diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 436fcfc645..8fa125d0ae 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1918,62 +1918,6 @@ GEN_INT_ARITH_MODD(modud, 0x08, 0); GEN_INT_ARITH_MODD(modsd, 0x18, 1); #endif -#if defined(TARGET_PPC64) -/* mulhd mulhd. */ -static void gen_mulhd(DisasContext *ctx) -{ -TCGv lo = tcg_temp_new(); -tcg_gen_muls2_tl(lo, cpu_gpr[rD(ctx->opcode)], - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -/* mulhdu mulhdu. */ -static void gen_mulhdu(DisasContext *ctx) -{ -TCGv lo = tcg_temp_new(); -tcg_gen_mulu2_tl(lo, cpu_gpr[rD(ctx->opcode)], - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -/* mulld mulld. */ -static void gen_mulld(DisasContext *ctx) -{ -tcg_gen_mul_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -/* mulldo mulldo. */ -static void gen_mulldo(DisasContext *ctx) -{ -TCGv_i64 t0 = tcg_temp_new_i64(); -TCGv_i64 t1 = tcg_temp_new_i64(); - -tcg_gen_muls2_i64(t0, t1, cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); -tcg_gen_mov_i64(cpu_gpr[rD(ctx->opcode)], t0); - -tcg_gen_sari_i64(t0, t0, 63); -tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1); -if (is_isa300(ctx)) { -tcg_gen_mov_tl(cpu_ov32, cpu_ov); -} -tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); - -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} -#endif - /* Common subf function */ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1, TCGv arg2, bool add_ca, bool compute_ca, @@ -5884,36 +5828,6 @@ static void gen_icbt_440(DisasContext *ctx) */ } -#if defined(TARGET_PPC64) -static void gen_maddld(DisasContext *ctx) -{ -TCGv_i64 t1 = tcg_temp_new_i64(); - -tcg_gen_mul_i64(t1, cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); -tcg_gen_add_i64(cpu_gpr[rD(ctx->opcode)], t1, cpu_gpr[rC(ctx->opcode)]); -} - -/* maddhd maddhdu */ -static void gen_maddhd_maddhdu(DisasContext *ctx) -{ -TCGv_i64 lo = tcg_temp_new_i64(); -TCGv_i64 hi = tcg_temp_new_i64(); -TCGv_i64 t1 = tcg_temp_new_i64(); - -if (Rc(ctx->opcode)) { -tcg_gen_mulu2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); -tcg_gen_movi_i64(t1, 0); -} else { -tcg_gen_muls2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); -tcg_gen_sari_i64(t1, cpu_gpr[rC(ctx->opcode)], 63); -} -tcg_gen_add2_i64(t1, cpu_gpr[rD(ctx->opcode)], lo, hi, - cpu_gpr[rC(ctx->opcode)], t1); -} -#endif /* defined(TARGET_PPC64) */ - static void gen_tbegin(DisasContext *ctx) { if (unlikely(!ctx->tm_enabled)) { @@ -6277,9 +6191,6 @@ GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x0060, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0
[PATCH 1/8] target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions to decodetree.
Moving the following instructions to decodetree specification : mulli : D-form mul{lw, lwo, hw, hwu}[.]: XO-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/insn32.decode | 9 +++ target/ppc/translate.c | 89 -- target/ppc/translate/fixedpoint-impl.c.inc | 71 + 3 files changed, 80 insertions(+), 89 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index eada59f59f..0184680db8 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -193,6 +193,9 @@ _ta rt ra oe:bool rc:bool @XO_ta .. rt:5 ra:5 . oe:1 . rc:1 _ta +_tab_rc rt ra rb rc:bool +@XO_tab_rc .. rt:5 ra:5 rb:5 . . rc:1 _tab_rc + %xx_xt 0:1 21:5 %xx_xb 1:1 11:5 %xx_xa 2:1 16:5 @@ -353,6 +356,12 @@ SUBFE 01 . . . . 010001000 . @XO SUBFME 01 . . - . 011101000 . @XO_ta SUBFZE 01 . . - . 011001000 . @XO_ta +MULLI 000111 . . @D +MULLW 01 . . . 0 011101011 . @XO_tab_rc +MULLWO 01 . . . 1 011101011 . @XO_tab_rc +MULHW 01 . . . - 001001011 . @XO_tab_rc +MULHWU 01 . . . - 01011 . @XO_tab_rc + ## Fixed-Point Logical Instructions CFUGED 01 . . . 0011011100 - @X diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 93ffec787c..c45547a770 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1948,90 +1948,6 @@ GEN_INT_ARITH_MODD(modud, 0x08, 0); GEN_INT_ARITH_MODD(modsd, 0x18, 1); #endif -/* mulhw mulhw. */ -static void gen_mulhw(DisasContext *ctx) -{ -TCGv_i32 t0 = tcg_temp_new_i32(); -TCGv_i32 t1 = tcg_temp_new_i32(); - -tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); -tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); -tcg_gen_muls2_i32(t0, t1, t0, t1); -tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1); -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -/* mulhwu mulhwu. */ -static void gen_mulhwu(DisasContext *ctx) -{ -TCGv_i32 t0 = tcg_temp_new_i32(); -TCGv_i32 t1 = tcg_temp_new_i32(); - -tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); -tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); -tcg_gen_mulu2_i32(t0, t1, t0, t1); -tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1); -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -/* mullw mullw. */ -static void gen_mullw(DisasContext *ctx) -{ -#if defined(TARGET_PPC64) -TCGv_i64 t0, t1; -t0 = tcg_temp_new_i64(); -t1 = tcg_temp_new_i64(); -tcg_gen_ext32s_tl(t0, cpu_gpr[rA(ctx->opcode)]); -tcg_gen_ext32s_tl(t1, cpu_gpr[rB(ctx->opcode)]); -tcg_gen_mul_i64(cpu_gpr[rD(ctx->opcode)], t0, t1); -#else -tcg_gen_mul_i32(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], -cpu_gpr[rB(ctx->opcode)]); -#endif -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -/* mullwo mullwo. */ -static void gen_mullwo(DisasContext *ctx) -{ -TCGv_i32 t0 = tcg_temp_new_i32(); -TCGv_i32 t1 = tcg_temp_new_i32(); - -tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); -tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); -tcg_gen_muls2_i32(t0, t1, t0, t1); -#if defined(TARGET_PPC64) -tcg_gen_concat_i32_i64(cpu_gpr[rD(ctx->opcode)], t0, t1); -#else -tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], t0); -#endif - -tcg_gen_sari_i32(t0, t0, 31); -tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1); -tcg_gen_extu_i32_tl(cpu_ov, t0); -if (is_isa300(ctx)) { -tcg_gen_mov_tl(cpu_ov32, cpu_ov); -} -tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); - -if (unlikely(Rc(ctx->opcode) != 0)) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -/* mulli */ -static void gen_mulli(DisasContext *ctx) -{ -tcg_gen_muli_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], -SIMM(ctx->opcode)); -} - #if defined(TARGET_PPC64) /* mulhd mulhd. */ static void gen_mulhd(DisasContext *ctx) @@ -6430,11 +6346,6 @@ GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x0060, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x0001, PPC_NONE, PPC2_ISA205), GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x0041, PPC_NONE, PPC2_ISA300), GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x0001, PPC_ISEL), -GEN_HANDLER(mulhw,
[PATCH 3/8] target/ppc: Move divw[u, e, eu] instructions to decodetree.
Moving the following instructions to decodetree specification : divw[u, e, eu][o][.] : XO-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/helper.h| 4 +-- target/ppc/insn32.decode | 5 target/ppc/int_helper.c| 4 +-- target/ppc/translate.c | 31 -- target/ppc/translate/fixedpoint-impl.c.inc | 24 + 5 files changed, 33 insertions(+), 35 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 86f97ee1e7..1fc8b7c5fd 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -55,8 +55,8 @@ DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32) DEF_HELPER_4(divdeu, i64, env, i64, i64, i32) DEF_HELPER_4(divde, i64, env, i64, i64, i32) #endif -DEF_HELPER_4(divweu, tl, env, tl, tl, i32) -DEF_HELPER_4(divwe, tl, env, tl, tl, i32) +DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32) +DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32) DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 0184680db8..bfccebd9a7 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -362,6 +362,11 @@ MULLWO 01 . . . 1 011101011 . @XO_tab_rc MULHW 01 . . . - 001001011 . @XO_tab_rc MULHWU 01 . . . - 01011 . @XO_tab_rc +DIVW01 . . . . 01011 . @XO +DIVWU 01 . . . . 111001011 . @XO +DIVWE 01 . . . . 110101011 . @XO +DIVWEU 01 . . . . 110001011 . @XO + ## Fixed-Point Logical Instructions CFUGED 01 . . . 0011011100 - @X diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 0a5c3e78a4..dc1f72ff38 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -44,7 +44,7 @@ static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) } } -target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, +target_ulong helper_DIVWEU(CPUPPCState *env, target_ulong ra, target_ulong rb, uint32_t oe) { uint64_t rt = 0; @@ -71,7 +71,7 @@ target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, return (target_ulong)rt; } -target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, +target_ulong helper_DIVWE(CPUPPCState *env, target_ulong ra, target_ulong rb, uint32_t oe) { int64_t rt = 0; diff --git a/target/ppc/translate.c b/target/ppc/translate.c index be7d807e3c..0a1d1d63b3 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1778,21 +1778,6 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, gen_set_Rc0(ctx, ret); } } -/* Div functions */ -#define GEN_INT_ARITH_DIVW(name, opc3, sign, compute_ov) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ -gen_op_arith_divw(ctx, cpu_gpr[rD(ctx->opcode)], \ - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \ - sign, compute_ov, Rc(ctx->opcode)); \ -} -/* divwu divwu. divwuo divwuo. */ -GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0); -GEN_INT_ARITH_DIVW(divwuo, 0x1E, 0, 1); -/* divw divw. divwo divwo. */ -GEN_INT_ARITH_DIVW(divw, 0x0F, 1, 0); -GEN_INT_ARITH_DIVW(divwo, 0x1F, 1, 1); - /* div[wd]eu[o][.] */ #define GEN_DIVE(name, hlpr, compute_ov) \ static void gen_##name(DisasContext *ctx) \ @@ -1805,11 +1790,6 @@ static void gen_##name(DisasContext *ctx) \ } \ } -GEN_DIVE(divweu, divweu, 0); -GEN_DIVE(divweuo, divweu, 1); -GEN_DIVE(divwe, divwe, 0); -GEN_DIVE(divweo, divwe, 1); - #if defined(TARGET_PPC64) static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1, TCGv arg2, int sign, int compute_ov) @@ -6562,17 +6542,6 @@ GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x, PPC_NONE, GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x, PPC_NONE, PPC2_ISA300), #endif -#undef GEN_INT_ARITH_DIVW -#define GEN_INT_ARITH_DIVW(name, opc3, sign, compute_ov) \ -GEN_HANDLER(name, 0x1F, 0x0B, opc3, 0x, PPC_INTEGER) -GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0), -GEN_INT_ARITH_DIVW(divwuo, 0x1E, 0, 1), -GEN_INT_ARITH_DIV
[PATCH 4/8] target/ppc: Move neg, darn, mod{sw, uw} to decodetree.
Moving the below instructions to decodetree specification : neg[o][.] : XO-form mod{sw, uw}, darn : X-form The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/helper.h| 4 +- target/ppc/insn32.decode | 8 target/ppc/int_helper.c| 4 +- target/ppc/translate.c | 56 -- target/ppc/translate/fixedpoint-impl.c.inc | 44 + 5 files changed, 56 insertions(+), 60 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 1fc8b7c5fd..09d0b0074b 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -71,8 +71,8 @@ DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl) DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_3(srad, tl, env, tl, tl) -DEF_HELPER_FLAGS_0(darn32, TCG_CALL_NO_RWG, tl) -DEF_HELPER_FLAGS_0(darn64, TCG_CALL_NO_RWG, tl) +DEF_HELPER_FLAGS_0(DARN32, TCG_CALL_NO_RWG, tl) +DEF_HELPER_FLAGS_0(DARN64, TCG_CALL_NO_RWG, tl) #endif DEF_HELPER_FLAGS_1(cntlsw32, TCG_CALL_NO_RWG_SE, i32, i32) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index bfccebd9a7..654f55471b 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -187,6 +187,9 @@ _ara @X_a.. ra:3 .. . . .. . _a +_tl rt l +@X_tl .. rt:5 ... l:2 . .. . _tl + rt ra rb oe:bool rc:bool @XO .. rt:5 ra:5 rb:5 oe:1 . rc:1 @@ -367,6 +370,11 @@ DIVWU 01 . . . . 111001011 . @XO DIVWE 01 . . . . 110101011 . @XO DIVWEU 01 . . . . 110001011 . @XO +MODSW 01 . . . 111011 - @X +MODUW 01 . . . 011011 - @X +DARN01 . --- .. - 100011 - @X_tl +NEG 01 . . - . 001101000 . @XO_ta + ## Fixed-Point Logical Instructions CFUGED 01 . . . 0011011100 - @X diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index dc1f72ff38..bc25d5b062 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -171,7 +171,7 @@ uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) /* * Return a random number. */ -uint64_t helper_darn32(void) +uint64_t helper_DARN32(void) { Error *err = NULL; uint32_t ret; @@ -186,7 +186,7 @@ uint64_t helper_darn32(void) return ret; } -uint64_t helper_darn64(void) +uint64_t helper_DARN64(void) { Error *err = NULL; uint64_t ret; diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 0a1d1d63b3..436fcfc645 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1878,17 +1878,6 @@ static inline void gen_op_arith_modw(DisasContext *ctx, TCGv ret, TCGv arg1, } } -#define GEN_INT_ARITH_MODW(name, opc3, sign)\ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ -gen_op_arith_modw(ctx, cpu_gpr[rD(ctx->opcode)],\ - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \ - sign);\ -} - -GEN_INT_ARITH_MODW(moduw, 0x08, 0); -GEN_INT_ARITH_MODW(modsw, 0x18, 1); - #if defined(TARGET_PPC64) static inline void gen_op_arith_modd(DisasContext *ctx, TCGv ret, TCGv arg1, TCGv arg2, int sign) @@ -2055,27 +2044,6 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1, } } -/* neg neg. nego nego. */ -static inline void gen_op_arith_neg(DisasContext *ctx, bool compute_ov) -{ -TCGv zero = tcg_constant_tl(0); -gen_op_arith_subf(ctx, cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], - zero, 0, 0, compute_ov, Rc(ctx->opcode)); -} - -static void gen_neg(DisasContext *ctx) -{ -tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); -if (unlikely(Rc(ctx->opcode))) { -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); -} -} - -static void gen_nego(DisasContext *ctx) -{ -gen_op_arith_neg(ctx, 1); -} - /***Integer logical***/ #define GEN_LOGICAL2(name, tcg_op, opc, type) \ static void glue(gen_, name)(DisasContext *ctx) \ @@ -2401,24 +2369,6 @@ static void gen_cnttzd(DisasContext *ctx) gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
[PATCH 6/8] target/ppc: Move div/mod fixed-point insns (64 bits operands) to decodetree.
Moving the below instructions to decodetree specification : divd[u, e, eu][o][.]: XO-form mod{sd, ud} : X-form With this patch, all the fixed-point arithmetic instructions have been moved to decodetree. The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured using the '-d in_asm,op' flag. Also, remaned do_divwe method in fixedpoint-impl.c.inc to do_dive because it is now used to divide doubleword operands as well, and not just words. Signed-off-by: Chinmay Rath --- target/ppc/helper.h| 4 +- target/ppc/insn32.decode | 8 +++ target/ppc/int_helper.c| 4 +- target/ppc/translate.c | 65 ++ target/ppc/translate/fixedpoint-impl.c.inc | 29 +- 5 files changed, 42 insertions(+), 68 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 09d0b0074b..e862bdceaf 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -52,8 +52,8 @@ DEF_HELPER_FLAGS_2(icbiep, TCG_CALL_NO_WG, void, env, tl) DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32) #if defined(TARGET_PPC64) -DEF_HELPER_4(divdeu, i64, env, i64, i64, i32) -DEF_HELPER_4(divde, i64, env, i64, i64, i32) +DEF_HELPER_4(DIVDEU, i64, env, i64, i64, i32) +DEF_HELPER_4(DIVDE, i64, env, i64, i64, i32) #endif DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32) DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 61c59bbde0..509961023b 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -384,6 +384,14 @@ MADDLD 000100 . . . . 110011 @VA MADDHD 000100 . . . . 11 @VA MADDHDU 000100 . . . . 110001 @VA +DIVD01 . . . . 01001 . @XO +DIVDU 01 . . . . 111001001 . @XO +DIVDE 01 . . . . 110101001 . @XO +DIVDEU 01 . . . . 110001001 . @XO + +MODSD 01 . . . 111001 - @X +MODUD 01 . . . 011001 - @X + ## Fixed-Point Logical Instructions CFUGED 01 . . . 0011011100 - @X diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index bc25d5b062..585c2b65d3 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -101,7 +101,7 @@ target_ulong helper_DIVWE(CPUPPCState *env, target_ulong ra, target_ulong rb, #if defined(TARGET_PPC64) -uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) +uint64_t helper_DIVDEU(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) { uint64_t rt = 0; int overflow = 0; @@ -120,7 +120,7 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) return rt; } -uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) +uint64_t helper_DIVDE(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) { uint64_t rt = 0; int64_t ra = (int64_t)rau; diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 8fa125d0ae..8900da85e5 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1778,21 +1778,11 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, gen_set_Rc0(ctx, ret); } } -/* div[wd]eu[o][.] */ -#define GEN_DIVE(name, hlpr, compute_ov) \ -static void gen_##name(DisasContext *ctx) \ -{ \ -TCGv_i32 t0 = tcg_constant_i32(compute_ov); \ -gen_helper_##hlpr(cpu_gpr[rD(ctx->opcode)], tcg_env, \ - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], t0); \ -if (unlikely(Rc(ctx->opcode) != 0)) { \ -gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); \ -} \ -} #if defined(TARGET_PPC64) -static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1, - TCGv arg2, int sign, int compute_ov) +static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, + TCGv arg1, TCGv arg2, bool sign, + bool compute_ov, bool compute_rc0) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -1824,29 +1814,10 @@ static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); } -if (unlikely(Rc(ctx->opcode) != 0)) { +if (unlikely(compute_rc0)) { gen_set_Rc0(ctx, ret);
[PATCH 8/8] target/ppc: Move logical fixed-point instructions to decodetree.
Moving the below instructions to decodetree specification : andi[s]., {ori, xori}[s]: D-form {and, andc, nand, or, orc, nor, xor, eqv}[.], exts{b, h, w}[.], cnt{l, t}z{w, d}[.], popcnt{b, w, d}, prty{w, d}, cmp, bpermd : X-form With this patch, all the fixed-point logical instructions have been moved to decodetree. The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/helper.h| 8 +- target/ppc/insn32.decode | 38 +++ target/ppc/int_helper.c| 10 +- target/ppc/translate.c | 359 - target/ppc/translate/fixedpoint-impl.c.inc | 269 +++ 5 files changed, 316 insertions(+), 368 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 05f7ab5f6e..b53abd853a 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -58,8 +58,8 @@ DEF_HELPER_4(DIVDE, i64, env, i64, i64, i32) DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32) DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32) -DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl) +DEF_HELPER_FLAGS_1(POPCNTB, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_2(CMPB, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_3(sraw, tl, env, tl, tl) DEF_HELPER_FLAGS_2(CFUGED, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(PDEPD, TCG_CALL_NO_RWG_SE, i64, i64, i64) @@ -68,8 +68,8 @@ DEF_HELPER_FLAGS_1(CDTBCD, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl) #if defined(TARGET_PPC64) DEF_HELPER_FLAGS_2(CMPEQB, TCG_CALL_NO_RWG_SE, i32, tl, tl) -DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_1(POPCNTW, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_2(BPERMD, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_3(srad, tl, env, tl, tl) DEF_HELPER_FLAGS_0(DARN32, TCG_CALL_NO_RWG, tl) DEF_HELPER_FLAGS_0(DARN64, TCG_CALL_NO_RWG, tl) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 80a7bb1872..3175810190 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -29,6 +29,9 @@ rt ra si:int64_t @D .. rt:5 ra:5 si:s16 +_ui rt ra ui:uint64_t +@D_ui .. rt:5 ra:5 ui:16 _ui + _bf bf l:bool ra imm @D_bfs .. bf:3 . l:1 ra:5 imm:s16 _bf @D_bfu .. bf:3 . l:1 ra:5 imm:16 _bf @@ -96,6 +99,9 @@ _sa rs ra @X_sa .. rs:5 ra:5 . .. . _sa +_sa_rcrs ra rc +@X_sa_rc.. rs:5 ra:5 . .. rc:1 _sa_rc + %x_frtp 22:4 !function=times_2 %x_frap 17:4 !function=times_2 %x_frbp 12:4 !function=times_2 @@ -410,6 +416,38 @@ MODUD 01 . . . 011001 - @X ## Fixed-Point Logical Instructions +ANDI_ 011100 . . @D_ui +ANDIS_ 011101 . . @D_ui +ORI 011000 . . @D_ui +ORIS011001 . . @D_ui +XORI011010 . . @D_ui +XORIS 011011 . . @D_ui + +AND 01 . . . 011100 . @X_rc +ANDC01 . . . 00 . @X_rc +NAND01 . . . 0111011100 . @X_rc +OR 01 . . . 011000 . @X_rc +ORC 01 . . . 0110011100 . @X_rc +NOR 01 . . . 000100 . @X_rc +XOR 01 . . . 010000 . @X_rc +EQV 01 . . . 0100011100 . @X_rc +CMPB01 . . . 011100 . @X_rc + +EXTSB 01 . . - 1110111010 . @X_sa_rc +EXTSH 01 . . - 1110011010 . @X_sa_rc +EXTSW 01 . . - 011010 . @X_sa_rc +CNTLZW 01 . . - 011010 . @X_sa_rc +CNTTZW 01 . . - 111010 . @X_sa_rc +CNTLZD 01 . . - 111010 . @X_sa_rc +CNTTZD 01 . . - 1000111010 . @X_sa_rc +POPCNTB 01 . . - 000010 . @X_sa_rc + +POPCNTW 01 . . - 010010 - @X_sa +POPCNTD 01 . . - 011010 - @X_sa +PRTYW 01 . . - 0010011010 - @X_sa +PRTYD 01 . . - 0010111010 - @X_sa + +BPERMD 01 . . . 001100
[PATCH 2/8] target/ppc: Make divw[u] handler method decodetree compatible.
The handler methods for divw[u] instructions internally use Rc(ctx->opcode), for extraction of Rc field of instructions, which poses a problem if we move the above said instructions to decodetree, as the ctx->opcode field is not popluated in decodetree. Hence, making it decodetree compatible, so that the mentioned insns can be safely move to decodetree specs. Signed-off-by: Chinmay Rath --- target/ppc/translate.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index c45547a770..be7d807e3c 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1738,8 +1738,9 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, } } -static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1, - TCGv arg2, int sign, int compute_ov) +static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, + TCGv arg1, TCGv arg2, bool sign, + bool compute_ov, bool compute_rc0) { TCGv_i32 t0 = tcg_temp_new_i32(); TCGv_i32 t1 = tcg_temp_new_i32(); @@ -1773,7 +1774,7 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); } -if (unlikely(Rc(ctx->opcode) != 0)) { +if (unlikely(compute_rc0)) { gen_set_Rc0(ctx, ret); } } @@ -1783,7 +1784,7 @@ static void glue(gen_, name)(DisasContext *ctx) \ { \ gen_op_arith_divw(ctx, cpu_gpr[rD(ctx->opcode)], \ cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \ - sign, compute_ov); \ + sign, compute_ov, Rc(ctx->opcode)); \ } /* divwu divwu. divwuo divwuo. */ GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0); -- 2.39.3
[PATCH 0/8] target/ppc: Move fixed-point insns to decodetree.
Moving all fixed-point instructions of the following type to decodetree specification : arithmetic, compare, trap, select and logical. Chinmay Rath (8): target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions to decodetree. target/ppc: Make divw[u] handler method decodetree compatible. target/ppc: Move divw[u, e, eu] instructions to decodetree. target/ppc: Move neg, darn, mod{sw, uw} to decodetree. target/ppc: Move multiply fixed-point insns (64-bit operands) to decodetree. target/ppc: Move div/mod fixed-point insns (64 bits operands) to decodetree. target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree. target/ppc: Move logical fixed-point instructions to decodetree. target/ppc/helper.h| 26 +- target/ppc/insn32.decode | 93 +++ target/ppc/excp_helper.c | 4 +- target/ppc/int_helper.c| 24 +- target/ppc/translate.c | 841 + target/ppc/translate/fixedpoint-impl.c.inc | 639 6 files changed, 770 insertions(+), 857 deletions(-) -- 2.39.3
[PATCH v2 2/2] target/ppc: Move floating-point arithmetic instructions to decodetree.
This patch moves the below instructions to decodetree specification : f{add, sub, mul, div, re, rsqrte, madd, msub, nmadd, nmsub}[s][.] : A-form ft{div, sqrt} : X-form With this patch, all the floating-point arithmetic instructions have been moved to decodetree. The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath Reviewed-by: Nicholas Piggin --- target/ppc/helper.h| 44 ++--- target/ppc/insn32.decode | 42 + target/ppc/fpu_helper.c| 38 ++-- target/ppc/translate/fp-impl.c.inc | 285 +++-- target/ppc/translate/fp-ops.c.inc | 31 5 files changed, 192 insertions(+), 248 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 86f97ee1e7..f177d5b906 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -110,32 +110,32 @@ DEF_HELPER_2(friz, i64, env, i64) DEF_HELPER_2(frip, i64, env, i64) DEF_HELPER_2(frim, i64, env, i64) -DEF_HELPER_3(fadd, f64, env, f64, f64) -DEF_HELPER_3(fadds, f64, env, f64, f64) -DEF_HELPER_3(fsub, f64, env, f64, f64) -DEF_HELPER_3(fsubs, f64, env, f64, f64) -DEF_HELPER_3(fmul, f64, env, f64, f64) -DEF_HELPER_3(fmuls, f64, env, f64, f64) -DEF_HELPER_3(fdiv, f64, env, f64, f64) -DEF_HELPER_3(fdivs, f64, env, f64, f64) -DEF_HELPER_4(fmadd, i64, env, i64, i64, i64) -DEF_HELPER_4(fmsub, i64, env, i64, i64, i64) -DEF_HELPER_4(fnmadd, i64, env, i64, i64, i64) -DEF_HELPER_4(fnmsub, i64, env, i64, i64, i64) -DEF_HELPER_4(fmadds, i64, env, i64, i64, i64) -DEF_HELPER_4(fmsubs, i64, env, i64, i64, i64) -DEF_HELPER_4(fnmadds, i64, env, i64, i64, i64) -DEF_HELPER_4(fnmsubs, i64, env, i64, i64, i64) +DEF_HELPER_3(FADD, f64, env, f64, f64) +DEF_HELPER_3(FADDS, f64, env, f64, f64) +DEF_HELPER_3(FSUB, f64, env, f64, f64) +DEF_HELPER_3(FSUBS, f64, env, f64, f64) +DEF_HELPER_3(FMUL, f64, env, f64, f64) +DEF_HELPER_3(FMULS, f64, env, f64, f64) +DEF_HELPER_3(FDIV, f64, env, f64, f64) +DEF_HELPER_3(FDIVS, f64, env, f64, f64) +DEF_HELPER_4(FMADD, i64, env, i64, i64, i64) +DEF_HELPER_4(FMSUB, i64, env, i64, i64, i64) +DEF_HELPER_4(FNMADD, i64, env, i64, i64, i64) +DEF_HELPER_4(FNMSUB, i64, env, i64, i64, i64) +DEF_HELPER_4(FMADDS, i64, env, i64, i64, i64) +DEF_HELPER_4(FMSUBS, i64, env, i64, i64, i64) +DEF_HELPER_4(FNMADDS, i64, env, i64, i64, i64) +DEF_HELPER_4(FNMSUBS, i64, env, i64, i64, i64) DEF_HELPER_2(FSQRT, f64, env, f64) DEF_HELPER_2(FSQRTS, f64, env, f64) -DEF_HELPER_2(fre, i64, env, i64) -DEF_HELPER_2(fres, i64, env, i64) -DEF_HELPER_2(frsqrte, i64, env, i64) -DEF_HELPER_2(frsqrtes, i64, env, i64) +DEF_HELPER_2(FRE, i64, env, i64) +DEF_HELPER_2(FRES, i64, env, i64) +DEF_HELPER_2(FRSQRTE, i64, env, i64) +DEF_HELPER_2(FRSQRTES, i64, env, i64) DEF_HELPER_FLAGS_3(FSEL, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) -DEF_HELPER_FLAGS_2(ftdiv, TCG_CALL_NO_RWG_SE, i32, i64, i64) -DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64) +DEF_HELPER_FLAGS_2(FTDIV, TCG_CALL_NO_RWG_SE, i32, i64, i64) +DEF_HELPER_FLAGS_1(FTSQRT, TCG_CALL_NO_RWG_SE, i32, i64) #define dh_alias_avr ptr #define dh_ctype_avr ppc_avr_t * diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 4fcf3af8d0..a314172a2e 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -20,6 +20,12 @@ frt fra frb frc rc:bool @A .. frt:5 fra:5 frb:5 frc:5 . rc:1 +_tab frt fra frb rc:bool +@A_tab .. frt:5 fra:5 frb:5 . . rc:1 _tab + +_tac frt fra frc rc:bool +@A_tac .. frt:5 fra:5 . frc:5 . rc:1 _tac + _tb frt frb rc:bool @A_tb .. frt:5 . frb:5 . . rc:1 _tb @@ -124,6 +130,9 @@ _bf bf ra rb @X_bf .. bf:3 .. ra:5 rb:5 .. . _bf +_bf_b bf rb +@X_bf_b .. bf:3 .. . rb:5 .. . _bf_b + @X_bf_ap_bp .. bf:3 .. 0 0 .. . _bf ra=%x_frap rb=%x_frbp @X_bf_a_bp .. bf:3 .. ra:5 0 .. . _bf rb=%x_frbp @@ -374,9 +383,42 @@ STFDUX 01 . .. 100111 - @X ### Floating-Point Arithmetic Instructions +FADD11 . . . - 10101 . @A_tab +FADDS 111011 . . . - 10101 . @A_tab + +FSUB11 . . . - 10100 . @A_tab +FSUBS 111011 . . . - 10100 . @A_tab + +FMUL11 . . - . 11001 . @A_tac +FMULS 111011 . . - . 11001 . @A_tac + +FDIV11 . . . - 10010 . @A_tab +FDIVS 111011 . . . - 10010 . @A_tab + FSQRT 11 . - . - 10110 . @A_tb FSQRTS 111011
[PATCH v2 1/2] target/ppc: Merge various fpu helpers
This patch merges the definitions of the following set of fpu helper methods, which are similar, using macros : 1. f{add, sub, mul, div}(s) 2. fre(s) 3. frsqrte(s) Signed-off-by: Chinmay Rath --- target/ppc/fpu_helper.c | 221 +++- 1 file changed, 62 insertions(+), 159 deletions(-) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 4b3dcad5d1..8d0cbe27e7 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -490,54 +490,12 @@ static void float_invalid_op_addsub(CPUPPCState *env, int flags, } } -/* fadd - fadd. */ -float64 helper_fadd(CPUPPCState *env, float64 arg1, float64 arg2) +static inline void addsub_flags_handler(CPUPPCState *env, int flags, +uintptr_t ra) { -float64 ret = float64_add(arg1, arg2, >fp_status); -int flags = get_float_exception_flags(>fp_status); - -if (unlikely(flags & float_flag_invalid)) { -float_invalid_op_addsub(env, flags, 1, GETPC()); -} - -return ret; -} - -/* fadds - fadds. */ -float64 helper_fadds(CPUPPCState *env, float64 arg1, float64 arg2) -{ -float64 ret = float64r32_add(arg1, arg2, >fp_status); -int flags = get_float_exception_flags(>fp_status); - -if (unlikely(flags & float_flag_invalid)) { -float_invalid_op_addsub(env, flags, 1, GETPC()); -} -return ret; -} - -/* fsub - fsub. */ -float64 helper_fsub(CPUPPCState *env, float64 arg1, float64 arg2) -{ -float64 ret = float64_sub(arg1, arg2, >fp_status); -int flags = get_float_exception_flags(>fp_status); - if (unlikely(flags & float_flag_invalid)) { -float_invalid_op_addsub(env, flags, 1, GETPC()); +float_invalid_op_addsub(env, flags, 1, ra); } - -return ret; -} - -/* fsubs - fsubs. */ -float64 helper_fsubs(CPUPPCState *env, float64 arg1, float64 arg2) -{ -float64 ret = float64r32_sub(arg1, arg2, >fp_status); -int flags = get_float_exception_flags(>fp_status); - -if (unlikely(flags & float_flag_invalid)) { -float_invalid_op_addsub(env, flags, 1, GETPC()); -} -return ret; } static void float_invalid_op_mul(CPUPPCState *env, int flags, @@ -550,29 +508,11 @@ static void float_invalid_op_mul(CPUPPCState *env, int flags, } } -/* fmul - fmul. */ -float64 helper_fmul(CPUPPCState *env, float64 arg1, float64 arg2) -{ -float64 ret = float64_mul(arg1, arg2, >fp_status); -int flags = get_float_exception_flags(>fp_status); - -if (unlikely(flags & float_flag_invalid)) { -float_invalid_op_mul(env, flags, 1, GETPC()); -} - -return ret; -} - -/* fmuls - fmuls. */ -float64 helper_fmuls(CPUPPCState *env, float64 arg1, float64 arg2) +static inline void mul_flags_handler(CPUPPCState *env, int flags, uintptr_t ra) { -float64 ret = float64r32_mul(arg1, arg2, >fp_status); -int flags = get_float_exception_flags(>fp_status); - if (unlikely(flags & float_flag_invalid)) { -float_invalid_op_mul(env, flags, 1, GETPC()); +float_invalid_op_mul(env, flags, 1, ra); } -return ret; } static void float_invalid_op_div(CPUPPCState *env, int flags, @@ -587,36 +527,14 @@ static void float_invalid_op_div(CPUPPCState *env, int flags, } } -/* fdiv - fdiv. */ -float64 helper_fdiv(CPUPPCState *env, float64 arg1, float64 arg2) -{ -float64 ret = float64_div(arg1, arg2, >fp_status); -int flags = get_float_exception_flags(>fp_status); - -if (unlikely(flags & float_flag_invalid)) { -float_invalid_op_div(env, flags, 1, GETPC()); -} -if (unlikely(flags & float_flag_divbyzero)) { -float_zero_divide_excp(env, GETPC()); -} - -return ret; -} - -/* fdivs - fdivs. */ -float64 helper_fdivs(CPUPPCState *env, float64 arg1, float64 arg2) +static inline void div_flags_handler(CPUPPCState *env, int flags, uintptr_t ra) { -float64 ret = float64r32_div(arg1, arg2, >fp_status); -int flags = get_float_exception_flags(>fp_status); - if (unlikely(flags & float_flag_invalid)) { -float_invalid_op_div(env, flags, 1, GETPC()); +float_invalid_op_div(env, flags, 1, ra); } if (unlikely(flags & float_flag_divbyzero)) { -float_zero_divide_excp(env, GETPC()); +float_zero_divide_excp(env, ra); } - -return ret; } static uint64_t float_invalid_cvt(CPUPPCState *env, int flags, @@ -812,81 +730,66 @@ float64 helper_##name(CPUPPCState *env, float64 arg) \ FPU_FSQRT(FSQRT, float64_sqrt) FPU_FSQRT(FSQRTS, float64r32_sqrt) -/* fre - fre. */ -float64 helper_fre(CPUPPCState *env, float64 arg) -{ -/* "Estimate" the reciprocal with actual division. */ -float64 ret = float64_div(float64_one, arg, >fp_status); -int flags = get_float_exception_flags(>fp_status); - -if (unlikely(flags & float_flag_invalid_sn
[PATCH v2 0/2] Moving fp arithmetic insns to decodetree.
This patch series moves floating-point arithmetic instructions from legacy to decodetree format. The first patch consolidates the common behaviour of floating-point helper functions using macros, reducing code duplication. The second patch moves all the floating arithmetic instructions to decodetree. Change log : v2 : Addressed review comments on v1 v1 : https://lore.kernel.org/qemu-devel/20240307110318.170319-1-ra...@linux.ibm.com/ Chinmay Rath (2): target/ppc: Merge various fpu helpers target/ppc: Move floating-point arithmetic instructions to decodetree. target/ppc/helper.h| 44 ++--- target/ppc/insn32.decode | 42 + target/ppc/fpu_helper.c| 235 +++- target/ppc/translate/fp-impl.c.inc | 285 +++-- target/ppc/translate/fp-ops.c.inc | 31 5 files changed, 242 insertions(+), 395 deletions(-) -- 2.39.3
Re: [PATCH] target/ppc: Move floating-point arithmetic instructions to decodetree.
On 3/12/24 15:31, Nicholas Piggin wrote: On Thu Mar 7, 2024 at 9:03 PM AEST, Chinmay Rath wrote: diff --git a/target/ppc/translate/fp-impl.c.inc b/target/ppc/translate/fp-impl.c.inc index 189cd8c979..03b84ba79b 100644 --- a/target/ppc/translate/fp-impl.c.inc +++ b/target/ppc/translate/fp-impl.c.inc @@ -30,96 +30,73 @@ static void gen_set_cr1_from_fpscr(DisasContext *ctx) #endif /*** Floating-Point arithmetic ***/ -#define _GEN_FLOAT_ACB(name, op1, op2, set_fprf, type)\ -static void gen_f##name(DisasContext *ctx)\ -{ \ -TCGv_i64 t0; \ -TCGv_i64 t1; \ -TCGv_i64 t2; \ -TCGv_i64 t3; \ -if (unlikely(!ctx->fpu_enabled)) {\ -gen_exception(ctx, POWERPC_EXCP_FPU); \ -return; \ -} \ -t0 = tcg_temp_new_i64(); \ -t1 = tcg_temp_new_i64(); \ -t2 = tcg_temp_new_i64(); \ -t3 = tcg_temp_new_i64(); \ -gen_reset_fpstatus(); \ -get_fpr(t0, rA(ctx->opcode)); \ -get_fpr(t1, rC(ctx->opcode)); \ -get_fpr(t2, rB(ctx->opcode)); \ -gen_helper_f##name(t3, tcg_env, t0, t1, t2); \ -set_fpr(rD(ctx->opcode), t3); \ -if (set_fprf) { \ -gen_compute_fprf_float64(t3); \ -} \ -if (unlikely(Rc(ctx->opcode) != 0)) { \ -gen_set_cr1_from_fpscr(ctx); \ -} \ +static bool do_helper_acb(DisasContext *ctx, arg_A *a, + void (*helper)(TCGv_i64, TCGv_ptr, TCGv_i64, + TCGv_i64, TCGv_i64)) +{ +REQUIRE_INSNS_FLAGS(ctx, FLOAT); +REQUIRE_FPU(ctx); +TCGv_i64 t0, t1, t2, t3; Existing style prefers the variable declarations first I think. +t0 = tcg_temp_new_i64(); +t1 = tcg_temp_new_i64(); +t2 = tcg_temp_new_i64(); +t3 = tcg_temp_new_i64(); +gen_reset_fpstatus(); +get_fpr(t0, a->fra); +get_fpr(t1, a->frc); +get_fpr(t2, a->frb); +helper(t3, tcg_env, t0, t1, t2); +set_fpr(a->frt, t3); +gen_compute_fprf_float64(t3); +if (unlikely(a->rc != false)) { This reads better without the double negative. I.e., just if (unlikely(a->rc)) { Otherwise the decodetree parts look good, with those updated and split out from the helper generation: Reviewed-by: Nicholas Piggin Thanks Nick, I shall post v2 with suggested updates. Regards, Chinmay Thanks, Nick +gen_set_cr1_from_fpscr(ctx); +} +return true; } -#define GEN_FLOAT_ACB(name, op2, set_fprf, type) \ -_GEN_FLOAT_ACB(name, 0x3F, op2, set_fprf, type); \ -_GEN_FLOAT_ACB(name##s, 0x3B, op2, set_fprf, type); - -#define _GEN_FLOAT_AB(name, op1, op2, inval, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx)\ -{ \ -TCGv_i64 t0; \ -TCGv_i64 t1; \ -TCGv_i64 t2; \ -if (unlikely(!ctx->fpu_enabled)) {\ -gen_exception(ctx, POWERPC_EXCP_FPU); \ -return; \ -} \ -t0 = tcg_temp_new_i64(); \ -t1 = tcg_temp_new_i64();
Re: [PATCH] target/ppc: Move floating-point arithmetic instructions to decodetree.
On 3/12/24 19:59, Peter Maydell wrote: On Tue, 12 Mar 2024 at 14:25, Nicholas Piggin wrote: On Wed Mar 13, 2024 at 12:01 AM AEST, Richard Henderson wrote: On 3/11/24 23:36, Nicholas Piggin wrote: [snip] #define FPU_HELPER(name, op, flags_handler) \ float64 helper_##name(CPUPPCState *env, float64 arg1, float64 arg2) \ { \ float64 ret = op(arg1, arg2, >fp_status);\ int flags = get_float_exception_flags(>fp_status); \ flags_handler(env, flags) \ return ret; \ } static inline void addsub_flags_handler(CPUPPCState *env, int flags) { if (unlikely(flags & float_flag_invalid)) { float_invalid_op_addsub(env, flags, 1, GETPC()); } } static inline void mul_flags_handler(CPUPPCState *env, int flags) { if (unlikely(flags & float_flag_invalid)) { float_invalid_op_mul(env, flags, 1, GETPC()); } } static inline void div_flags_handler(CPUPPCState *env, int flags) { if (unlikely(flags & float_flag_invalid)) { float_invalid_op_div(env, flags, 1, GETPC()); } if (unlikely(flags & float_flag_divbyzero)) { float_zero_divide_excp(env, GETPC()); } } Beware -- GETPC() may only be called from the outermost helper. Ah, because it's using __builtin_return_address. Good to know. Using always_inline and a comment should do the trick then. The standard way to fix this is that you call GETPC() at the outermost helper and then pass that value around as an extra uintptr_t ra argument to called functions that need it. Thanks Peter, Nick and Richard. I shall post v2 with suggested updates. Regards, Chinmay thanks -- PMM
[PATCH] target/ppc: Move floating-point arithmetic instructions to decodetree.
This patch moves the below instructions to decodetree specification : f{add, sub, mul, div, re, rsqrte, madd, msub, nmadd, nmsub}[s][.] : A-form ft{div, sqrt} : X-form With this patch, all the floating-point arithmetic instructions have been moved to decodetree. The patch also merges the definitions of different sets of helper methods of the above instructions, which are similar, using macros. The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/helper.h| 44 ++--- target/ppc/insn32.decode | 42 + target/ppc/fpu_helper.c| 265 +- target/ppc/translate/fp-impl.c.inc | 288 +++-- target/ppc/translate/fp-ops.c.inc | 31 5 files changed, 262 insertions(+), 408 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 86f97ee1e7..f177d5b906 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -110,32 +110,32 @@ DEF_HELPER_2(friz, i64, env, i64) DEF_HELPER_2(frip, i64, env, i64) DEF_HELPER_2(frim, i64, env, i64) -DEF_HELPER_3(fadd, f64, env, f64, f64) -DEF_HELPER_3(fadds, f64, env, f64, f64) -DEF_HELPER_3(fsub, f64, env, f64, f64) -DEF_HELPER_3(fsubs, f64, env, f64, f64) -DEF_HELPER_3(fmul, f64, env, f64, f64) -DEF_HELPER_3(fmuls, f64, env, f64, f64) -DEF_HELPER_3(fdiv, f64, env, f64, f64) -DEF_HELPER_3(fdivs, f64, env, f64, f64) -DEF_HELPER_4(fmadd, i64, env, i64, i64, i64) -DEF_HELPER_4(fmsub, i64, env, i64, i64, i64) -DEF_HELPER_4(fnmadd, i64, env, i64, i64, i64) -DEF_HELPER_4(fnmsub, i64, env, i64, i64, i64) -DEF_HELPER_4(fmadds, i64, env, i64, i64, i64) -DEF_HELPER_4(fmsubs, i64, env, i64, i64, i64) -DEF_HELPER_4(fnmadds, i64, env, i64, i64, i64) -DEF_HELPER_4(fnmsubs, i64, env, i64, i64, i64) +DEF_HELPER_3(FADD, f64, env, f64, f64) +DEF_HELPER_3(FADDS, f64, env, f64, f64) +DEF_HELPER_3(FSUB, f64, env, f64, f64) +DEF_HELPER_3(FSUBS, f64, env, f64, f64) +DEF_HELPER_3(FMUL, f64, env, f64, f64) +DEF_HELPER_3(FMULS, f64, env, f64, f64) +DEF_HELPER_3(FDIV, f64, env, f64, f64) +DEF_HELPER_3(FDIVS, f64, env, f64, f64) +DEF_HELPER_4(FMADD, i64, env, i64, i64, i64) +DEF_HELPER_4(FMSUB, i64, env, i64, i64, i64) +DEF_HELPER_4(FNMADD, i64, env, i64, i64, i64) +DEF_HELPER_4(FNMSUB, i64, env, i64, i64, i64) +DEF_HELPER_4(FMADDS, i64, env, i64, i64, i64) +DEF_HELPER_4(FMSUBS, i64, env, i64, i64, i64) +DEF_HELPER_4(FNMADDS, i64, env, i64, i64, i64) +DEF_HELPER_4(FNMSUBS, i64, env, i64, i64, i64) DEF_HELPER_2(FSQRT, f64, env, f64) DEF_HELPER_2(FSQRTS, f64, env, f64) -DEF_HELPER_2(fre, i64, env, i64) -DEF_HELPER_2(fres, i64, env, i64) -DEF_HELPER_2(frsqrte, i64, env, i64) -DEF_HELPER_2(frsqrtes, i64, env, i64) +DEF_HELPER_2(FRE, i64, env, i64) +DEF_HELPER_2(FRES, i64, env, i64) +DEF_HELPER_2(FRSQRTE, i64, env, i64) +DEF_HELPER_2(FRSQRTES, i64, env, i64) DEF_HELPER_FLAGS_3(FSEL, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) -DEF_HELPER_FLAGS_2(ftdiv, TCG_CALL_NO_RWG_SE, i32, i64, i64) -DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64) +DEF_HELPER_FLAGS_2(FTDIV, TCG_CALL_NO_RWG_SE, i32, i64, i64) +DEF_HELPER_FLAGS_1(FTSQRT, TCG_CALL_NO_RWG_SE, i32, i64) #define dh_alias_avr ptr #define dh_ctype_avr ppc_avr_t * diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 4fcf3af8d0..a314172a2e 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -20,6 +20,12 @@ frt fra frb frc rc:bool @A .. frt:5 fra:5 frb:5 frc:5 . rc:1 +_tab frt fra frb rc:bool +@A_tab .. frt:5 fra:5 frb:5 . . rc:1 _tab + +_tac frt fra frc rc:bool +@A_tac .. frt:5 fra:5 . frc:5 . rc:1 _tac + _tb frt frb rc:bool @A_tb .. frt:5 . frb:5 . . rc:1 _tb @@ -124,6 +130,9 @@ _bf bf ra rb @X_bf .. bf:3 .. ra:5 rb:5 .. . _bf +_bf_b bf rb +@X_bf_b .. bf:3 .. . rb:5 .. . _bf_b + @X_bf_ap_bp .. bf:3 .. 0 0 .. . _bf ra=%x_frap rb=%x_frbp @X_bf_a_bp .. bf:3 .. ra:5 0 .. . _bf rb=%x_frbp @@ -374,9 +383,42 @@ STFDUX 01 . .. 100111 - @X ### Floating-Point Arithmetic Instructions +FADD11 . . . - 10101 . @A_tab +FADDS 111011 . . . - 10101 . @A_tab + +FSUB11 . . . - 10100 . @A_tab +FSUBS 111011 . . . - 10100 . @A_tab + +FMUL11 . . - . 11001 . @A_tac +FMULS 111011 . . - . 11001 . @A_tac + +FDIV11 . . . - 10010 . @A_tab +FDIVS 111011
[PATCH v2] target/ppc: Move add and subf type fixed-point arithmetic instructions to decodetree
This patch moves the below instructions to decodetree specification: {add, subf}[c,e,me,ze][o][.] : XO-form addic[.], subfic : D-form addex : Z23-form This patch introduces XO form instructions into decode tree specification, for which all the four variations([o][.]) have been handled with a single pattern. The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- Changes v1 -> v2 : Reused X format for ADDEX instead of creating a new Z23_tab_cy format. (Richard) Added necessary instruction flag checks for ADDEX. (self-review) --- target/ppc/insn32.decode | 26 target/ppc/translate.c | 136 - target/ppc/translate/fixedpoint-impl.c.inc | 70 +++ 3 files changed, 96 insertions(+), 136 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 4fcf3af8d0..eada59f59f 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -187,6 +187,12 @@ _ara @X_a.. ra:3 .. . . .. . _a + rt ra rb oe:bool rc:bool +@XO .. rt:5 ra:5 rb:5 oe:1 . rc:1 + +_ta rt ra oe:bool rc:bool +@XO_ta .. rt:5 ra:5 . oe:1 . rc:1 _ta + %xx_xt 0:1 21:5 %xx_xb 1:1 11:5 %xx_xa 2:1 16:5 @@ -322,10 +328,30 @@ CMPLI 001010 ... - . . @D_bfu ### Fixed-Point Arithmetic Instructions +ADD 01 . . . . 11010 . @XO +ADDC01 . . . . 01010 . @XO +ADDE01 . . . . 010001010 . @XO + +# ADDEX is Z23-form, with CY=0; all other values for CY are reserved. +# This works out the same as X-form. +ADDEX 01 . . . 00 10101010 - @X + ADDI001110 . . @D ADDIS 00 . . @D +ADDIC 001100 . . @D +ADDIC_ 001101 . . @D ADDPCIS 010011 . . .. 00010 . @DX +ADDME 01 . . - . 011101010 . @XO_ta +ADDZE 01 . . - . 011001010 . @XO_ta + +SUBF01 . . . . 000101000 . @XO +SUBFIC 001000 . . @D +SUBFC 01 . . . . 01000 . @XO +SUBFE 01 . . . . 010001000 . @XO + +SUBFME 01 . . - . 011101000 . @XO_ta +SUBFZE 01 . . - . 011001000 . @XO_ta ## Fixed-Point Logical Instructions diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 049f636927..51dc1e79cc 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1697,61 +1697,6 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_gen_mov_tl(ret, t0); } } -/* Add functions with two operands */ -#define GEN_INT_ARITH_ADD(name, opc3, ca, add_ca, compute_ca, compute_ov) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ -gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)], \ - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \ - ca, glue(ca, 32),\ - add_ca, compute_ca, compute_ov, Rc(ctx->opcode));\ -} -/* Add functions with one operand and one immediate */ -#define GEN_INT_ARITH_ADD_CONST(name, opc3, const_val, ca,\ -add_ca, compute_ca, compute_ov) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ -TCGv t0 = tcg_constant_tl(const_val); \ -gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)], \ - cpu_gpr[rA(ctx->opcode)], t0,\ - ca, glue(ca, 32),\ - add_ca, compute_ca, compute_ov, Rc(ctx->opcode));\ -} - -/* add add. addo addo. */ -GEN_INT_ARITH_ADD(add, 0x08, cpu_ca, 0, 0, 0) -GEN_INT_ARITH_ADD(addo, 0x18, cpu_ca, 0, 0, 1) -/* addc addc. addco addco. */ -GEN_INT_ARITH_ADD(addc, 0x00, cpu_ca, 0, 1, 0) -GEN_INT_ARITH_ADD(addco, 0x10, cpu_ca, 0, 1, 1) -/* adde adde. addeo addeo. */ -GEN_INT_ARITH_ADD(adde, 0x04, cpu_ca, 1, 1, 0) -GEN_INT_ARITH_ADD(addeo, 0x14, cpu_ca, 1, 1, 1) -
Re: [RFC PATCH] target/ppc: Move add and subf type fixed-point arithmetic instructions to decodetree
Hi Richard, On 2/13/24 03:51, Richard Henderson wrote: On 2/9/24 01:35, Chinmay Rath wrote: +_tab_cy rt ra rb cy +@Z23_tab_cy .. rt:5 ra:5 rb:5 cy:2 . _tab_cy ... +ADDEX 01 . . . .. 10101010 - @Z23_tab_cy ... +static bool trans_ADDEX(DisasContext *ctx, arg_Z23_tab_cy *a) +{ + gen_op_arith_add(ctx, cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb], + cpu_ov, cpu_ov32, true, true, false, false); + return true; +} CY != 0 is reserved. While you could diagnose this in trans_ADDEX, it seems cleaner to simply match 00 in the CY field until a future ISA defines something else. All that is required is a comment in the decodetree entry. # Z23-form, with CY=0; all other values for CY are reserved. # This works out the same as X-form. ADDEX 01 . . . 00 10101010 - @X Thanks for your review comments. I shall update as suggested in v2. Regards, Chinmay r~
[RFC PATCH] target/ppc: Move add and subf type fixed-point arithmetic instructions to decodetree
This patch moves the below instructions to decodetree specification: {add, subf}[c,e,me,ze][o][.] : XO-form addic[.], subfic : D-form addex : Z23-form This patch introduces XO form instructions into decode tree specification, for which all the four variations([o][.]) have been handled with a single pattern. The changes were verified by validating that the tcg ops generated by those instructions remain the same, which were captured with the '-d in_asm,op' flag. Signed-off-by: Chinmay Rath --- target/ppc/insn32.decode | 26 target/ppc/translate.c | 136 - target/ppc/translate/fixedpoint-impl.c.inc | 69 +++ 3 files changed, 95 insertions(+), 136 deletions(-) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 4fcf3af8d0..ddaa47210a 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -187,6 +187,12 @@ _ara @X_a.. ra:3 .. . . .. . _a + rt ra rb oe:bool rc:bool +@XO .. rt:5 ra:5 rb:5 oe:1 . rc:1 + +_ta rt ra oe:bool rc:bool +@XO_ta .. rt:5 ra:5 . oe:1 . rc:1 _ta + %xx_xt 0:1 21:5 %xx_xb 1:1 11:5 %xx_xa 2:1 16:5 @@ -239,6 +245,9 @@ _tabfrt fra frb rmc rc:bool @Z23_tab.. frt:5 fra:5 frb:5 rmc:2 rc:1_tab +_tab_cy rt ra rb cy +@Z23_tab_cy .. rt:5 ra:5 rb:5 cy:2 . _tab_cy + %z23_frtp 22:4 !function=times_2 %z23_frap 17:4 !function=times_2 %z23_frbp 12:4 !function=times_2 @@ -322,10 +331,27 @@ CMPLI 001010 ... - . . @D_bfu ### Fixed-Point Arithmetic Instructions +ADD 01 . . . . 11010 . @XO +ADDC01 . . . . 01010 . @XO +ADDE01 . . . . 010001010 . @XO +ADDEX 01 . . . .. 10101010 - @Z23_tab_cy + ADDI001110 . . @D ADDIS 00 . . @D +ADDIC 001100 . . @D +ADDIC_ 001101 . . @D ADDPCIS 010011 . . .. 00010 . @DX +ADDME 01 . . - . 011101010 . @XO_ta +ADDZE 01 . . - . 011001010 . @XO_ta + +SUBF01 . . . . 000101000 . @XO +SUBFIC 001000 . . @D +SUBFC 01 . . . . 01000 . @XO +SUBFE 01 . . . . 010001000 . @XO + +SUBFME 01 . . - . 011101000 . @XO_ta +SUBFZE 01 . . - . 011001000 . @XO_ta ## Fixed-Point Logical Instructions diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 049f636927..51dc1e79cc 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1697,61 +1697,6 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_gen_mov_tl(ret, t0); } } -/* Add functions with two operands */ -#define GEN_INT_ARITH_ADD(name, opc3, ca, add_ca, compute_ca, compute_ov) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ -gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)], \ - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \ - ca, glue(ca, 32),\ - add_ca, compute_ca, compute_ov, Rc(ctx->opcode));\ -} -/* Add functions with one operand and one immediate */ -#define GEN_INT_ARITH_ADD_CONST(name, opc3, const_val, ca,\ -add_ca, compute_ca, compute_ov) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ -TCGv t0 = tcg_constant_tl(const_val); \ -gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)], \ - cpu_gpr[rA(ctx->opcode)], t0,\ - ca, glue(ca, 32),\ - add_ca, compute_ca, compute_ov, Rc(ctx->opcode));\ -} - -/* add add. addo addo. */ -GEN_INT_ARITH_ADD(add, 0x08, cpu_ca, 0, 0, 0) -GEN_INT_ARITH_ADD(addo, 0x18, cpu_ca, 0, 0, 1) -/* addc addc. addco addco. */ -GEN_INT_ARITH_ADD(addc, 0x00, cpu_ca, 0, 1, 0) -GEN_INT_ARITH_ADD(addco, 0x10, cpu_ca, 0, 1, 1) -/* adde adde. addeo addeo. */ -GEN_INT_ARITH_ADD(add