from:"Chinmay Rath"

[PATCH v3 0/3] target/ppc: Update vector insns to use 128 bit

2024-07-09 Thread Chinmay Rath

Updating a bunch of VMX and VSX storage access instructions to use
tcg_gen_qemu_ld/st_i128 instead of using tcg_gen_qemu_ld/st_i64 in
succession; as suggested by Richard, in my decodetree patches.
Plus some minor clean-ups to facilitate the above in case of VMX insns.

Change log:

v3 : Rectified EA increment from 8 to 16 for paired insns in patch 3/3,
as pointed by Richard.
Retained his 'Reviewed-by' for all patches, after the correction.

v2 : Applied IFALIGN_PAIR memop changes in patches 2/3 and 3/3,
based on review comments by Richard.
https://lore.kernel.org/qemu-devel/20240630120157.259233-1-ra...@linux.ibm.com/

v3 : 
https://lore.kernel.org/qemu-devel/20240621114604.868415-1-ra...@linux.ibm.com/

Chinmay Rath (3):
  target/ppc: Move get/set_avr64 functions to vmx-impl.c.inc.
  target/ppc: Update VMX storage access insns to use
tcg_gen_qemu_ld/st_i128.
  target/ppc : Update VSX storage access insns to use tcg_gen_qemu
_ld/st_i128.

 target/ppc/translate.c  | 10 
 target/ppc/translate/vmx-impl.c.inc | 52 +++-
 target/ppc/translate/vsx-impl.c.inc | 74 +
 3 files changed, 63 insertions(+), 73 deletions(-)

-- 
2.39.3

[PATCH v3 3/3] target/ppc : Update VSX storage access insns to use tcg_gen_qemu _ld/st_i128.

2024-07-09 Thread Chinmay Rath

Updated many VSX instructions to use tcg_gen_qemu_ld/st_i128, instead of using
tcg_gen_qemu_ld/st_i64 consecutively.
Introduced functions {get,set}_vsr_full to facilitate the above & for future 
use.

Reviewed-by: Richard Henderson 
Suggested-by: Richard Henderson 
Signed-off-by: Chinmay Rath 
---
 target/ppc/translate/vsx-impl.c.inc | 74 +
 1 file changed, 33 insertions(+), 41 deletions(-)

diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index 26ebf3fedf..40a87ddc4a 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -10,6 +10,16 @@ static inline void set_cpu_vsr(int n, TCGv_i64 src, bool 
high)
 tcg_gen_st_i64(src, tcg_env, vsr64_offset(n, high));
 }
 
+static inline void get_vsr_full(TCGv_i128 dst, int reg)
+{
+tcg_gen_ld_i128(dst, tcg_env, vsr_full_offset(reg));
+}
+
+static inline void set_vsr_full(int reg, TCGv_i128 src)
+{
+tcg_gen_st_i128(src, tcg_env, vsr_full_offset(reg));
+}
+
 static inline TCGv_ptr gen_vsr_ptr(int reg)
 {
 TCGv_ptr r = tcg_temp_new_ptr();
@@ -196,20 +206,17 @@ static bool trans_LXVH8X(DisasContext *ctx, arg_LXVH8X *a)
 static bool trans_LXVB16X(DisasContext *ctx, arg_LXVB16X *a)
 {
 TCGv EA;
-TCGv_i64 xth, xtl;
+TCGv_i128 data;
 
 REQUIRE_VSX(ctx);
 REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 
-xth = tcg_temp_new_i64();
-xtl = tcg_temp_new_i64();
+data = tcg_temp_new_i128();
 gen_set_access_type(ctx, ACCESS_INT);
 EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
-tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEUQ);
-tcg_gen_addi_tl(EA, EA, 8);
-tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ);
-set_cpu_vsr(a->rt, xth, true);
-set_cpu_vsr(a->rt, xtl, false);
+tcg_gen_qemu_ld_i128(data, EA, ctx->mem_idx,
+ MO_BE | MO_128 | MO_ATOM_IFALIGN_PAIR);
+set_vsr_full(a->rt, data);
 return true;
 }
 
@@ -385,20 +392,17 @@ static bool trans_STXVH8X(DisasContext *ctx, arg_STXVH8X 
*a)
 static bool trans_STXVB16X(DisasContext *ctx, arg_STXVB16X *a)
 {
 TCGv EA;
-TCGv_i64 xsh, xsl;
+TCGv_i128 data;
 
 REQUIRE_VSX(ctx);
 REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 
-xsh = tcg_temp_new_i64();
-xsl = tcg_temp_new_i64();
-get_cpu_vsr(xsh, a->rt, true);
-get_cpu_vsr(xsl, a->rt, false);
+data = tcg_temp_new_i128();
 gen_set_access_type(ctx, ACCESS_INT);
 EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
-tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEUQ);
-tcg_gen_addi_tl(EA, EA, 8);
-tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEUQ);
+get_vsr_full(data, a->rt);
+tcg_gen_qemu_st_i128(data, EA, ctx->mem_idx,
+ MO_BE | MO_128 | MO_ATOM_IFALIGN_PAIR);
 return true;
 }
 
@@ -2175,13 +2179,13 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv 
displ,
  int rt, bool store, bool paired)
 {
 TCGv ea;
-TCGv_i64 xt;
+TCGv_i128 data;
 MemOp mop;
 int rt1, rt2;
 
-xt = tcg_temp_new_i64();
+data = tcg_temp_new_i128();
 
-mop = DEF_MEMOP(MO_UQ);
+mop = DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR);
 
 gen_set_access_type(ctx, ACCESS_INT);
 ea = do_ea_calc(ctx, ra, displ);
@@ -2195,32 +2199,20 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv 
displ,
 }
 
 if (store) {
-get_cpu_vsr(xt, rt1, !ctx->le_mode);
-tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
-gen_addr_add(ctx, ea, ea, 8);
-get_cpu_vsr(xt, rt1, ctx->le_mode);
-tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
+get_vsr_full(data, rt1);
+tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop);
 if (paired) {
-gen_addr_add(ctx, ea, ea, 8);
-get_cpu_vsr(xt, rt2, !ctx->le_mode);
-tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
-gen_addr_add(ctx, ea, ea, 8);
-get_cpu_vsr(xt, rt2, ctx->le_mode);
-tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
+gen_addr_add(ctx, ea, ea, 16);
+get_vsr_full(data, rt2);
+tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop);
 }
 } else {
-tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-set_cpu_vsr(rt1, xt, !ctx->le_mode);
-gen_addr_add(ctx, ea, ea, 8);
-tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-set_cpu_vsr(rt1, xt, ctx->le_mode);
+tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop);
+set_vsr_full(rt1, data);
 if (paired) {
-gen_addr_add(ctx, ea, ea, 8);
-tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-set_cpu_vsr(rt2, xt, !ctx->le_mode);
-gen_addr_add(ctx, ea, ea, 8);
-tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-set_cpu_vsr(rt2, xt,

[PATCH v3 1/3] target/ppc: Move get/set_avr64 functions to vmx-impl.c.inc.

2024-07-09 Thread Chinmay Rath

Those functions are used to ld/st data to and from Altivec registers,
in 64 bits chunks, and are only used in vmx-impl.c.inc file,
hence the clean-up movement.

Reviewed-by: Richard Henderson 
Signed-off-by: Chinmay Rath 
---
 target/ppc/translate.c  | 10 --
 target/ppc/translate/vmx-impl.c.inc | 10 ++
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index ad512e1922..f7f2c2db9e 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6200,16 +6200,6 @@ static inline void set_fpr(int regno, TCGv_i64 src)
 tcg_gen_st_i64(tcg_constant_i64(0), tcg_env, vsr64_offset(regno, false));
 }
 
-static inline void get_avr64(TCGv_i64 dst, int regno, bool high)
-{
-tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high));
-}
-
-static inline void set_avr64(int regno, TCGv_i64 src, bool high)
-{
-tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high));
-}
-
 /*
  * Helpers for decodetree used by !function for decoding arguments.
  */
diff --git a/target/ppc/translate/vmx-impl.c.inc 
b/target/ppc/translate/vmx-impl.c.inc
index 152bcde0e3..a182d2cf81 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -14,6 +14,16 @@ static inline TCGv_ptr gen_avr_ptr(int reg)
 return r;
 }
 
+static inline void get_avr64(TCGv_i64 dst, int regno, bool high)
+{
+tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high));
+}
+
+static inline void set_avr64(int regno, TCGv_i64 src, bool high)
+{
+tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high));
+}
+
 static bool trans_LVX(DisasContext *ctx, arg_X *a)
 {
 TCGv EA;
-- 
2.39.3

[PATCH v3 2/3] target/ppc: Update VMX storage access insns to use tcg_gen_qemu_ld/st_i128.

2024-07-09 Thread Chinmay Rath

Updated instructions {l, st}vx to use tcg_gen_qemu_ld/st_i128,
instead of using 64 bits loads/stores in succession.
Introduced functions {get, set}_avr_full in vmx-impl.c.inc to
facilitate the above, and potential future usage.

Reviewed-by: Richard Henderson 
Suggested-by: Richard Henderson 
Signed-off-by: Chinmay Rath 
---
 target/ppc/translate/vmx-impl.c.inc | 42 ++---
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/target/ppc/translate/vmx-impl.c.inc 
b/target/ppc/translate/vmx-impl.c.inc
index a182d2cf81..70d0ad2e71 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -24,25 +24,29 @@ static inline void set_avr64(int regno, TCGv_i64 src, bool 
high)
 tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high));
 }
 
+static inline void get_avr_full(TCGv_i128 dst, int regno)
+{
+tcg_gen_ld_i128(dst, tcg_env, avr_full_offset(regno));
+}
+
+static inline void set_avr_full(int regno, TCGv_i128 src)
+{
+tcg_gen_st_i128(src, tcg_env, avr_full_offset(regno));
+}
+
 static bool trans_LVX(DisasContext *ctx, arg_X *a)
 {
 TCGv EA;
-TCGv_i64 avr;
+TCGv_i128 avr;
 REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
 REQUIRE_VECTOR(ctx);
 gen_set_access_type(ctx, ACCESS_INT);
-avr = tcg_temp_new_i64();
+avr = tcg_temp_new_i128();
 EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
 tcg_gen_andi_tl(EA, EA, ~0xf);
-/*
- * We only need to swap high and low halves. gen_qemu_ld64_i64
- * does necessary 64-bit byteswap already.
- */
-gen_qemu_ld64_i64(ctx, avr, EA);
-set_avr64(a->rt, avr, !ctx->le_mode);
-tcg_gen_addi_tl(EA, EA, 8);
-gen_qemu_ld64_i64(ctx, avr, EA);
-set_avr64(a->rt, avr, ctx->le_mode);
+tcg_gen_qemu_ld_i128(avr, EA, ctx->mem_idx,
+ DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR));
+set_avr_full(a->rt, avr);
 return true;
 }
 
@@ -56,22 +60,16 @@ static bool trans_LVXL(DisasContext *ctx, arg_LVXL *a)
 static bool trans_STVX(DisasContext *ctx, arg_STVX *a)
 {
 TCGv EA;
-TCGv_i64 avr;
+TCGv_i128 avr;
 REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
 REQUIRE_VECTOR(ctx);
 gen_set_access_type(ctx, ACCESS_INT);
-avr = tcg_temp_new_i64();
+avr = tcg_temp_new_i128();
 EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
 tcg_gen_andi_tl(EA, EA, ~0xf);
-/*
- * We only need to swap high and low halves. gen_qemu_st64_i64
- * does necessary 64-bit byteswap already.
- */
-get_avr64(avr, a->rt, !ctx->le_mode);
-gen_qemu_st64_i64(ctx, avr, EA);
-tcg_gen_addi_tl(EA, EA, 8);
-get_avr64(avr, a->rt, ctx->le_mode);
-gen_qemu_st64_i64(ctx, avr, EA);
+get_avr_full(avr, a->rt);
+tcg_gen_qemu_st_i128(avr, EA, ctx->mem_idx,
+ DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR));
 return true;
 }
 
-- 
2.39.3

[PATCH v2 3/3] target/ppc : Update VSX storage access insns to use tcg_gen_qemu _ld/st_i128.

2024-06-30 Thread Chinmay Rath

Updated many VSX instructions to use tcg_gen_qemu_ld/st_i128, instead of using
tcg_gen_qemu_ld/st_i64 consecutively.
Introduced functions {get,set}_vsr_full to facilitate the above & for future 
use.

Suggested-by: Richard Henderson 
Signed-off-by: Chinmay Rath 
---
 target/ppc/translate/vsx-impl.c.inc | 70 +
 1 file changed, 31 insertions(+), 39 deletions(-)

diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index 26ebf3fedf..b622831a73 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -10,6 +10,16 @@ static inline void set_cpu_vsr(int n, TCGv_i64 src, bool 
high)
 tcg_gen_st_i64(src, tcg_env, vsr64_offset(n, high));
 }
 
+static inline void get_vsr_full(TCGv_i128 dst, int reg)
+{
+tcg_gen_ld_i128(dst, tcg_env, vsr_full_offset(reg));
+}
+
+static inline void set_vsr_full(int reg, TCGv_i128 src)
+{
+tcg_gen_st_i128(src, tcg_env, vsr_full_offset(reg));
+}
+
 static inline TCGv_ptr gen_vsr_ptr(int reg)
 {
 TCGv_ptr r = tcg_temp_new_ptr();
@@ -196,20 +206,17 @@ static bool trans_LXVH8X(DisasContext *ctx, arg_LXVH8X *a)
 static bool trans_LXVB16X(DisasContext *ctx, arg_LXVB16X *a)
 {
 TCGv EA;
-TCGv_i64 xth, xtl;
+TCGv_i128 data;
 
 REQUIRE_VSX(ctx);
 REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 
-xth = tcg_temp_new_i64();
-xtl = tcg_temp_new_i64();
+data = tcg_temp_new_i128();
 gen_set_access_type(ctx, ACCESS_INT);
 EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
-tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEUQ);
-tcg_gen_addi_tl(EA, EA, 8);
-tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ);
-set_cpu_vsr(a->rt, xth, true);
-set_cpu_vsr(a->rt, xtl, false);
+tcg_gen_qemu_ld_i128(data, EA, ctx->mem_idx,
+ MO_BE | MO_128 | MO_ATOM_IFALIGN_PAIR);
+set_vsr_full(a->rt, data);
 return true;
 }
 
@@ -385,20 +392,17 @@ static bool trans_STXVH8X(DisasContext *ctx, arg_STXVH8X 
*a)
 static bool trans_STXVB16X(DisasContext *ctx, arg_STXVB16X *a)
 {
 TCGv EA;
-TCGv_i64 xsh, xsl;
+TCGv_i128 data;
 
 REQUIRE_VSX(ctx);
 REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 
-xsh = tcg_temp_new_i64();
-xsl = tcg_temp_new_i64();
-get_cpu_vsr(xsh, a->rt, true);
-get_cpu_vsr(xsl, a->rt, false);
+data = tcg_temp_new_i128();
 gen_set_access_type(ctx, ACCESS_INT);
 EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
-tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEUQ);
-tcg_gen_addi_tl(EA, EA, 8);
-tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEUQ);
+get_vsr_full(data, a->rt);
+tcg_gen_qemu_st_i128(data, EA, ctx->mem_idx,
+ MO_BE | MO_128 | MO_ATOM_IFALIGN_PAIR);
 return true;
 }
 
@@ -2175,13 +2179,13 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv 
displ,
  int rt, bool store, bool paired)
 {
 TCGv ea;
-TCGv_i64 xt;
+TCGv_i128 data;
 MemOp mop;
 int rt1, rt2;
 
-xt = tcg_temp_new_i64();
+data = tcg_temp_new_i128();
 
-mop = DEF_MEMOP(MO_UQ);
+mop = DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR);
 
 gen_set_access_type(ctx, ACCESS_INT);
 ea = do_ea_calc(ctx, ra, displ);
@@ -2195,32 +2199,20 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv 
displ,
 }
 
 if (store) {
-get_cpu_vsr(xt, rt1, !ctx->le_mode);
-tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
-gen_addr_add(ctx, ea, ea, 8);
-get_cpu_vsr(xt, rt1, ctx->le_mode);
-tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
+get_vsr_full(data, rt1);
+tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop);
 if (paired) {
 gen_addr_add(ctx, ea, ea, 8);
-get_cpu_vsr(xt, rt2, !ctx->le_mode);
-tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
-gen_addr_add(ctx, ea, ea, 8);
-get_cpu_vsr(xt, rt2, ctx->le_mode);
-tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
+get_vsr_full(data, rt2);
+tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop);
 }
 } else {
-tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-set_cpu_vsr(rt1, xt, !ctx->le_mode);
-gen_addr_add(ctx, ea, ea, 8);
-tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-set_cpu_vsr(rt1, xt, ctx->le_mode);
+tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop);
+set_vsr_full(rt1, data);
 if (paired) {
 gen_addr_add(ctx, ea, ea, 8);
-tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-set_cpu_vsr(rt2, xt, !ctx->le_mode);
-gen_addr_add(ctx, ea, ea, 8);
-tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-set_cpu_vsr(rt2, xt, ctx->le_mode);
+tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop);
+set_vsr_full(rt2, data);
 }
 }
 return true;
-- 
2.39.3

[PATCH v2 2/3] target/ppc: Update VMX storage access insns to use tcg_gen_qemu_ld/st_i128.

2024-06-30 Thread Chinmay Rath

Updated instructions {l, st}vx to use tcg_gen_qemu_ld/st_i128,
instead of using 64 bits loads/stores in succession.
Introduced functions {get, set}_avr_full in vmx-impl.c.inc to
facilitate the above, and potential future usage.

Suggested-by: Richard Henderson 
Signed-off-by: Chinmay Rath 
---
 target/ppc/translate/vmx-impl.c.inc | 42 ++---
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/target/ppc/translate/vmx-impl.c.inc 
b/target/ppc/translate/vmx-impl.c.inc
index a182d2cf81..70d0ad2e71 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -24,25 +24,29 @@ static inline void set_avr64(int regno, TCGv_i64 src, bool 
high)
 tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high));
 }
 
+static inline void get_avr_full(TCGv_i128 dst, int regno)
+{
+tcg_gen_ld_i128(dst, tcg_env, avr_full_offset(regno));
+}
+
+static inline void set_avr_full(int regno, TCGv_i128 src)
+{
+tcg_gen_st_i128(src, tcg_env, avr_full_offset(regno));
+}
+
 static bool trans_LVX(DisasContext *ctx, arg_X *a)
 {
 TCGv EA;
-TCGv_i64 avr;
+TCGv_i128 avr;
 REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
 REQUIRE_VECTOR(ctx);
 gen_set_access_type(ctx, ACCESS_INT);
-avr = tcg_temp_new_i64();
+avr = tcg_temp_new_i128();
 EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
 tcg_gen_andi_tl(EA, EA, ~0xf);
-/*
- * We only need to swap high and low halves. gen_qemu_ld64_i64
- * does necessary 64-bit byteswap already.
- */
-gen_qemu_ld64_i64(ctx, avr, EA);
-set_avr64(a->rt, avr, !ctx->le_mode);
-tcg_gen_addi_tl(EA, EA, 8);
-gen_qemu_ld64_i64(ctx, avr, EA);
-set_avr64(a->rt, avr, ctx->le_mode);
+tcg_gen_qemu_ld_i128(avr, EA, ctx->mem_idx,
+ DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR));
+set_avr_full(a->rt, avr);
 return true;
 }
 
@@ -56,22 +60,16 @@ static bool trans_LVXL(DisasContext *ctx, arg_LVXL *a)
 static bool trans_STVX(DisasContext *ctx, arg_STVX *a)
 {
 TCGv EA;
-TCGv_i64 avr;
+TCGv_i128 avr;
 REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
 REQUIRE_VECTOR(ctx);
 gen_set_access_type(ctx, ACCESS_INT);
-avr = tcg_temp_new_i64();
+avr = tcg_temp_new_i128();
 EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
 tcg_gen_andi_tl(EA, EA, ~0xf);
-/*
- * We only need to swap high and low halves. gen_qemu_st64_i64
- * does necessary 64-bit byteswap already.
- */
-get_avr64(avr, a->rt, !ctx->le_mode);
-gen_qemu_st64_i64(ctx, avr, EA);
-tcg_gen_addi_tl(EA, EA, 8);
-get_avr64(avr, a->rt, ctx->le_mode);
-gen_qemu_st64_i64(ctx, avr, EA);
+get_avr_full(avr, a->rt);
+tcg_gen_qemu_st_i128(avr, EA, ctx->mem_idx,
+ DEF_MEMOP(MO_128 | MO_ATOM_IFALIGN_PAIR));
 return true;
 }
 
-- 
2.39.3

[PATCH v2 0/3] target/ppc: Update vector insns to use 128 bit

2024-06-30 Thread Chinmay Rath

Updating a bunch of VMX and VSX storage access instructions to use
tcg_gen_qemu_ld/st_i128 instead of using tcg_gen_qemu_ld/st_i64 in
succession; as suggested by Richard, in my decodetree patches.
Plus some minor clean-ups to facilitate the above in case of VMX insns.

Change log:

v2 : Applied IFALIGN_PAIR memop changes in patches 2/3 and 3/3,
based on review comments by Richard in v1.

v1 : 
https://lore.kernel.org/qemu-devel/20240621114604.868415-1-ra...@linux.ibm.com/

Chinmay Rath (3):
  target/ppc: Move get/set_avr64 functions to vmx-impl.c.inc.
  target/ppc: Update VMX storage access insns to use
tcg_gen_qemu_ld/st_i128.
  target/ppc : Update VSX storage access insns to use tcg_gen_qemu
_ld/st_i128.

 target/ppc/translate.c  | 10 -
 target/ppc/translate/vmx-impl.c.inc | 52 -
 target/ppc/translate/vsx-impl.c.inc | 70 +
 3 files changed, 61 insertions(+), 71 deletions(-)

-- 
2.39.3

[PATCH v2 1/3] target/ppc: Move get/set_avr64 functions to vmx-impl.c.inc.

2024-06-30 Thread Chinmay Rath

Those functions are used to ld/st data to and from Altivec registers,
in 64 bits chunks, and are only used in vmx-impl.c.inc file,
hence the clean-up movement.

Signed-off-by: Chinmay Rath 
---
 target/ppc/translate.c  | 10 --
 target/ppc/translate/vmx-impl.c.inc | 10 ++
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index ad512e1922..f7f2c2db9e 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6200,16 +6200,6 @@ static inline void set_fpr(int regno, TCGv_i64 src)
 tcg_gen_st_i64(tcg_constant_i64(0), tcg_env, vsr64_offset(regno, false));
 }
 
-static inline void get_avr64(TCGv_i64 dst, int regno, bool high)
-{
-tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high));
-}
-
-static inline void set_avr64(int regno, TCGv_i64 src, bool high)
-{
-tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high));
-}
-
 /*
  * Helpers for decodetree used by !function for decoding arguments.
  */
diff --git a/target/ppc/translate/vmx-impl.c.inc 
b/target/ppc/translate/vmx-impl.c.inc
index 152bcde0e3..a182d2cf81 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -14,6 +14,16 @@ static inline TCGv_ptr gen_avr_ptr(int reg)
 return r;
 }
 
+static inline void get_avr64(TCGv_i64 dst, int regno, bool high)
+{
+tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high));
+}
+
+static inline void set_avr64(int regno, TCGv_i64 src, bool high)
+{
+tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high));
+}
+
 static bool trans_LVX(DisasContext *ctx, arg_X *a)
 {
 TCGv EA;
-- 
2.39.3

[PATCH 1/3] target/ppc: Move get/set_avr64 functions to vmx-impl.c.inc.

2024-06-21 Thread Chinmay Rath

Those functions are used to ld/st data to and from Altivec registers,
in 64 bits chunks, and are only used in vmx-impl.c.inc file,
hence the clean-up movement.

Signed-off-by: Chinmay Rath 
---
 target/ppc/translate.c  | 10 --
 target/ppc/translate/vmx-impl.c.inc | 10 ++
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index ad512e1922..f7f2c2db9e 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6200,16 +6200,6 @@ static inline void set_fpr(int regno, TCGv_i64 src)
 tcg_gen_st_i64(tcg_constant_i64(0), tcg_env, vsr64_offset(regno, false));
 }
 
-static inline void get_avr64(TCGv_i64 dst, int regno, bool high)
-{
-tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high));
-}
-
-static inline void set_avr64(int regno, TCGv_i64 src, bool high)
-{
-tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high));
-}
-
 /*
  * Helpers for decodetree used by !function for decoding arguments.
  */
diff --git a/target/ppc/translate/vmx-impl.c.inc 
b/target/ppc/translate/vmx-impl.c.inc
index 152bcde0e3..a182d2cf81 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -14,6 +14,16 @@ static inline TCGv_ptr gen_avr_ptr(int reg)
 return r;
 }
 
+static inline void get_avr64(TCGv_i64 dst, int regno, bool high)
+{
+tcg_gen_ld_i64(dst, tcg_env, avr64_offset(regno, high));
+}
+
+static inline void set_avr64(int regno, TCGv_i64 src, bool high)
+{
+tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high));
+}
+
 static bool trans_LVX(DisasContext *ctx, arg_X *a)
 {
 TCGv EA;
-- 
2.39.3

[PATCH 2/3] target/ppc: Update VMX storage access insns to use tcg_gen_qemu_ld/st_i128.

2024-06-21 Thread Chinmay Rath

Updated instructions {l, st}vx to use tcg_gen_qemu_ld/st_i128,
instead of using 64 bits loads/stores in succession.
Introduced functions {get, set}_avr_full in vmx-impl.c.inc to
facilitate the above, and potential future usage.

Suggested-by: Richard Henderson 
Signed-off-by: Chinmay Rath 
---
 target/ppc/translate/vmx-impl.c.inc | 40 +
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/target/ppc/translate/vmx-impl.c.inc 
b/target/ppc/translate/vmx-impl.c.inc
index a182d2cf81..47f6952d69 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -24,25 +24,28 @@ static inline void set_avr64(int regno, TCGv_i64 src, bool 
high)
 tcg_gen_st_i64(src, tcg_env, avr64_offset(regno, high));
 }
 
+static inline void get_avr_full(TCGv_i128 dst, int regno)
+{
+tcg_gen_ld_i128(dst, tcg_env, avr_full_offset(regno));
+}
+
+static inline void set_avr_full(int regno, TCGv_i128 src)
+{
+tcg_gen_st_i128(src, tcg_env, avr_full_offset(regno));
+}
+
 static bool trans_LVX(DisasContext *ctx, arg_X *a)
 {
 TCGv EA;
-TCGv_i64 avr;
+TCGv_i128 avr;
 REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
 REQUIRE_VECTOR(ctx);
 gen_set_access_type(ctx, ACCESS_INT);
-avr = tcg_temp_new_i64();
+avr = tcg_temp_new_i128();
 EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
 tcg_gen_andi_tl(EA, EA, ~0xf);
-/*
- * We only need to swap high and low halves. gen_qemu_ld64_i64
- * does necessary 64-bit byteswap already.
- */
-gen_qemu_ld64_i64(ctx, avr, EA);
-set_avr64(a->rt, avr, !ctx->le_mode);
-tcg_gen_addi_tl(EA, EA, 8);
-gen_qemu_ld64_i64(ctx, avr, EA);
-set_avr64(a->rt, avr, ctx->le_mode);
+tcg_gen_qemu_ld_i128(avr, EA, ctx->mem_idx, DEF_MEMOP(MO_128));
+set_avr_full(a->rt, avr);
 return true;
 }
 
@@ -56,22 +59,15 @@ static bool trans_LVXL(DisasContext *ctx, arg_LVXL *a)
 static bool trans_STVX(DisasContext *ctx, arg_STVX *a)
 {
 TCGv EA;
-TCGv_i64 avr;
+TCGv_i128 avr;
 REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
 REQUIRE_VECTOR(ctx);
 gen_set_access_type(ctx, ACCESS_INT);
-avr = tcg_temp_new_i64();
+avr = tcg_temp_new_i128();
 EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
 tcg_gen_andi_tl(EA, EA, ~0xf);
-/*
- * We only need to swap high and low halves. gen_qemu_st64_i64
- * does necessary 64-bit byteswap already.
- */
-get_avr64(avr, a->rt, !ctx->le_mode);
-gen_qemu_st64_i64(ctx, avr, EA);
-tcg_gen_addi_tl(EA, EA, 8);
-get_avr64(avr, a->rt, ctx->le_mode);
-gen_qemu_st64_i64(ctx, avr, EA);
+get_avr_full(avr, a->rt);
+tcg_gen_qemu_st_i128(avr, EA, ctx->mem_idx, DEF_MEMOP(MO_128));
 return true;
 }
 
-- 
2.39.3

[PATCH 3/3] target/ppc : Update VSX storage access insns to use tcg_gen_qemu _ld/st_i128.

2024-06-21 Thread Chinmay Rath

Updated many VSX instructions to use tcg_gen_qemu_ld/st_i128, instead of using
tcg_gen_qemu_ld/st_i64 consecutively.
Introduced functions {get,set}_vsr_full to facilitate the above & for future 
use.

Suggested-by: Richard Henderson 
Signed-off-by: Chinmay Rath 
---
 target/ppc/translate/vsx-impl.c.inc | 68 -
 1 file changed, 29 insertions(+), 39 deletions(-)

diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index 26ebf3fedf..a42fbf7c12 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -10,6 +10,16 @@ static inline void set_cpu_vsr(int n, TCGv_i64 src, bool 
high)
 tcg_gen_st_i64(src, tcg_env, vsr64_offset(n, high));
 }
 
+static inline void get_vsr_full(TCGv_i128 dst, int reg)
+{
+tcg_gen_ld_i128(dst, tcg_env, vsr_full_offset(reg));
+}
+
+static inline void set_vsr_full(int reg, TCGv_i128 src)
+{
+tcg_gen_st_i128(src, tcg_env, vsr_full_offset(reg));
+}
+
 static inline TCGv_ptr gen_vsr_ptr(int reg)
 {
 TCGv_ptr r = tcg_temp_new_ptr();
@@ -196,20 +206,16 @@ static bool trans_LXVH8X(DisasContext *ctx, arg_LXVH8X *a)
 static bool trans_LXVB16X(DisasContext *ctx, arg_LXVB16X *a)
 {
 TCGv EA;
-TCGv_i64 xth, xtl;
+TCGv_i128 data;
 
 REQUIRE_VSX(ctx);
 REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 
-xth = tcg_temp_new_i64();
-xtl = tcg_temp_new_i64();
+data = tcg_temp_new_i128();
 gen_set_access_type(ctx, ACCESS_INT);
 EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
-tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEUQ);
-tcg_gen_addi_tl(EA, EA, 8);
-tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ);
-set_cpu_vsr(a->rt, xth, true);
-set_cpu_vsr(a->rt, xtl, false);
+tcg_gen_qemu_ld_i128(data, EA, ctx->mem_idx, MO_BE | MO_128);
+set_vsr_full(a->rt, data);
 return true;
 }
 
@@ -385,20 +391,16 @@ static bool trans_STXVH8X(DisasContext *ctx, arg_STXVH8X 
*a)
 static bool trans_STXVB16X(DisasContext *ctx, arg_STXVB16X *a)
 {
 TCGv EA;
-TCGv_i64 xsh, xsl;
+TCGv_i128 data;
 
 REQUIRE_VSX(ctx);
 REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 
-xsh = tcg_temp_new_i64();
-xsl = tcg_temp_new_i64();
-get_cpu_vsr(xsh, a->rt, true);
-get_cpu_vsr(xsl, a->rt, false);
+data = tcg_temp_new_i128();
 gen_set_access_type(ctx, ACCESS_INT);
 EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
-tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEUQ);
-tcg_gen_addi_tl(EA, EA, 8);
-tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEUQ);
+get_vsr_full(data, a->rt);
+tcg_gen_qemu_st_i128(data, EA, ctx->mem_idx, MO_BE | MO_128);
 return true;
 }
 
@@ -2175,13 +2177,13 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv 
displ,
  int rt, bool store, bool paired)
 {
 TCGv ea;
-TCGv_i64 xt;
+TCGv_i128 data;
 MemOp mop;
 int rt1, rt2;
 
-xt = tcg_temp_new_i64();
+data = tcg_temp_new_i128();
 
-mop = DEF_MEMOP(MO_UQ);
+mop = DEF_MEMOP(MO_128);
 
 gen_set_access_type(ctx, ACCESS_INT);
 ea = do_ea_calc(ctx, ra, displ);
@@ -2195,32 +2197,20 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv 
displ,
 }
 
 if (store) {
-get_cpu_vsr(xt, rt1, !ctx->le_mode);
-tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
-gen_addr_add(ctx, ea, ea, 8);
-get_cpu_vsr(xt, rt1, ctx->le_mode);
-tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
+get_vsr_full(data, rt1);
+tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop);
 if (paired) {
 gen_addr_add(ctx, ea, ea, 8);
-get_cpu_vsr(xt, rt2, !ctx->le_mode);
-tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
-gen_addr_add(ctx, ea, ea, 8);
-get_cpu_vsr(xt, rt2, ctx->le_mode);
-tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
+get_vsr_full(data, rt2);
+tcg_gen_qemu_st_i128(data, ea, ctx->mem_idx, mop);
 }
 } else {
-tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-set_cpu_vsr(rt1, xt, !ctx->le_mode);
-gen_addr_add(ctx, ea, ea, 8);
-tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-set_cpu_vsr(rt1, xt, ctx->le_mode);
+tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop);
+set_vsr_full(rt1, data);
 if (paired) {
 gen_addr_add(ctx, ea, ea, 8);
-tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-set_cpu_vsr(rt2, xt, !ctx->le_mode);
-gen_addr_add(ctx, ea, ea, 8);
-tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
-set_cpu_vsr(rt2, xt, ctx->le_mode);
+tcg_gen_qemu_ld_i128(data, ea, ctx->mem_idx, mop);
+set_vsr_full(rt2, data);
 }
 }
 return true;
-- 
2.39.3

[PATCH 0/3] target/ppc: Update vector insns to use 128 bit

2024-06-21 Thread Chinmay Rath

Updating a bunch of VMX and VSX storage access instructions to use
tcg_gen_qemu_ld/st_i128 instead of using tcg_gen_qemu_ld/st_i64 in
succession; as suggested by Richard, in my decodetree patches.
Plus some minor clean-ups to facilitate the above in case of VMX insns.

Chinmay Rath (3):
  target/ppc: Move get/set_avr64 functions to vmx-impl.c.inc.
  target/ppc: Update VMX storage access insns to use
tcg_gen_qemu_ld/st_i128.
  target/ppc : Update VSX storage access insns to use tcg_gen_qemu
_ld/st_i128.

 target/ppc/translate.c  | 10 -
 target/ppc/translate/vmx-impl.c.inc | 50 +++--
 target/ppc/translate/vsx-impl.c.inc | 68 -
 3 files changed, 57 insertions(+), 71 deletions(-)

-- 
2.39.3

[PATCH v3 0/4] target/ppc: Move VSX storage access and compare

2024-06-18 Thread Chinmay Rath

Moving all remaining VSX storage access instructions and all VSX compare
instructions of XX3 form with RC field, to decodetree specification.

Change log :

v3:
- Patch 2/4 : Updated the added function do_ea_calc_ra to return
  modifiable EA, after discussions with Richard.

v2: 
https://lore.kernel.org/qemu-devel/20240613093318.314913-1-ra...@linux.ibm.com/
 - Addressed comments by Richard in v1 :
   - Patch 2/4 : Handled proper ea calculation in narrow mode.
 Also created a new function for ea calculation instead of inlining,
 for later use by (p){lx,stx}vp insns.
   - Patch 4/4 : Unified helper calls.
 - Retained Richard's "Reviewed-by" in patches 1, 3 and 4.

v1: 
https://lore.kernel.org/qemu-devel/20240607144921.726730-1-ra...@linux.ibm.com/

Chinmay Rath (4):
  target/ppc: Moving VSX scalar storage access insns to decodetree.
  target/ppc: Move VSX vector with length storage access insns to
decodetree.
  target/ppc: Move VSX vector storage access insns to decodetree.
  target/ppc: Move VSX fp compare insns to decodetree.

 target/ppc/helper.h |  24 +-
 target/ppc/insn32.decode|  41 +++
 target/ppc/fpu_helper.c |  16 +-
 target/ppc/mem_helper.c |   8 +-
 target/ppc/translate.c  |  15 +
 target/ppc/translate/vsx-impl.c.inc | 416 ++--
 target/ppc/translate/vsx-ops.c.inc  |  49 
 7 files changed, 287 insertions(+), 282 deletions(-)

-- 
2.39.3

[PATCH v3 1/4] target/ppc: Moving VSX scalar storage access insns to decodetree.

2024-06-18 Thread Chinmay Rath

Moving the following instructions to decodetree specification :

lxs{d, iwa, ibz, ihz, iwz, sp}x : X-form
stxs{d, ib, ih, iw, sp}x: X-form

The changes were verified by validating that the tcg-ops generated by those
instructions remain the same, which were captured using the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
Reviewed-by: Richard Henderson 
---
 target/ppc/insn32.decode| 13 +
 target/ppc/translate/vsx-impl.c.inc | 79 +
 target/ppc/translate/vsx-ops.c.inc  | 11 
 3 files changed, 49 insertions(+), 54 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 30d6f9f750..88753c75e1 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -792,6 +792,19 @@ STXVRHX 01 . . . 0010101101 .   
@X_TSX
 STXVRWX 01 . . . 0011001101 .   @X_TSX
 STXVRDX 01 . . . 0011101101 .   @X_TSX
 
+LXSDX   01 . . . 1001001100 .   @X_TSX
+LXSIWAX 01 . . . 0001001100 .   @X_TSX
+LXSIBZX 01 . . . 111101 .   @X_TSX
+LXSIHZX 01 . . . 1100101101 .   @X_TSX
+LXSIWZX 01 . . . 001100 .   @X_TSX
+LXSSPX  01 . . . 101100 .   @X_TSX
+
+STXSDX  01 . . . 1011001100 .   @X_TSX
+STXSIBX 01 . . . 1110001101 .   @X_TSX
+STXSIHX 01 . . . 1110101101 .   @X_TSX
+STXSIWX 01 . . . 0010001100 .   @X_TSX
+STXSSPX 01 . . . 1010001100 .   @X_TSX
+
 ## VSX Vector Binary Floating-Point Sign Manipulation Instructions
 
 XVABSDP 00 . 0 . 111011001 ..   @XX2
diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index a769f199ce..de2a26a213 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -24,30 +24,27 @@ static inline TCGv_ptr gen_acc_ptr(int reg)
 return r;
 }
 
-#define VSX_LOAD_SCALAR(name, operation)  \
-static void gen_##name(DisasContext *ctx) \
-{ \
-TCGv EA;  \
-TCGv_i64 t0;  \
-if (unlikely(!ctx->vsx_enabled)) {\
-gen_exception(ctx, POWERPC_EXCP_VSXU);\
-return;   \
-} \
-t0 = tcg_temp_new_i64();  \
-gen_set_access_type(ctx, ACCESS_INT); \
-EA = tcg_temp_new();  \
-gen_addr_reg_index(ctx, EA);  \
-gen_qemu_##operation(ctx, t0, EA);\
-set_cpu_vsr(xT(ctx->opcode), t0, true);   \
-/* NOTE: cpu_vsrl is undefined */ \
+static bool do_lxs(DisasContext *ctx, arg_X *a,
+   void (*op)(DisasContext *, TCGv_i64, TCGv))
+{
+TCGv EA;
+TCGv_i64 t0;
+REQUIRE_VSX(ctx);
+t0 = tcg_temp_new_i64();
+gen_set_access_type(ctx, ACCESS_INT);
+EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
+op(ctx, t0, EA);
+set_cpu_vsr(a->rt, t0, true);
+/* NOTE: cpu_vsrl is undefined */
+return true;
 }
 
-VSX_LOAD_SCALAR(lxsdx, ld64_i64)
-VSX_LOAD_SCALAR(lxsiwax, ld32s_i64)
-VSX_LOAD_SCALAR(lxsibzx, ld8u_i64)
-VSX_LOAD_SCALAR(lxsihzx, ld16u_i64)
-VSX_LOAD_SCALAR(lxsiwzx, ld32u_i64)
-VSX_LOAD_SCALAR(lxsspx, ld32fs)
+TRANS_FLAGS2(VSX, LXSDX, do_lxs, gen_qemu_ld64_i64);
+TRANS_FLAGS2(VSX207, LXSIWAX, do_lxs, gen_qemu_ld32s_i64);
+TRANS_FLAGS2(ISA300, LXSIBZX, do_lxs, gen_qemu_ld8u_i64);
+TRANS_FLAGS2(ISA300, LXSIHZX, do_lxs, gen_qemu_ld16u_i64);
+TRANS_FLAGS2(VSX207, LXSIWZX, do_lxs, gen_qemu_ld32u_i64);
+TRANS_FLAGS2(VSX207, LXSSPX, do_lxs, gen_qemu_ld32fs);
 
 static void gen_lxvd2x(DisasContext *ctx)
 {
@@ -266,29 +263,25 @@ VSX_VECTOR_LOAD_STORE_LENGTH(stxvl)
 VSX_VECTOR_LOAD_STORE_LENGTH(stxvll)
 #endif
 
-#define VSX_STORE_SCALAR(name, operation) \
-static void gen_##name(DisasContext *ctx) \
-{ \
-TCGv EA;  \
-TCGv_i64 t0;  \
-if (unlikely(!ctx->vsx_enabled)) {\
-gen_exception(ctx, POWERPC_EXCP_VSXU);\
-return;   \
-} \
-t0 = tcg_temp_new_i64();  \
-gen_set_access_ty

[PATCH v3 4/4] target/ppc: Move VSX fp compare insns to decodetree.

2024-06-18 Thread Chinmay Rath

Moving the following instructions to decodetree specification:

xvcmp{eq, gt, ge, ne}{s, d}p: XX3-form

The changes were verified by validating that the tcg-ops generated for those
instructions remain the same which were captured using the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
Reviewed-by: Richard Henderson 
---
 target/ppc/helper.h | 16 +-
 target/ppc/insn32.decode| 12 
 target/ppc/fpu_helper.c | 16 +-
 target/ppc/translate/vsx-impl.c.inc | 46 +
 target/ppc/translate/vsx-ops.c.inc  | 18 ---
 5 files changed, 48 insertions(+), 60 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 510ce76524..3fd849628a 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -473,10 +473,10 @@ DEF_HELPER_5(xvnmadddp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(xvnmsubdp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_4(XVMAXDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_4(XVMINDP, void, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpeqdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgtdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpnedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPEQDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGTDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPNEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
 DEF_HELPER_3(xvcvdpsp, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvdpsxds, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvdpsxws, void, env, vsr, vsr)
@@ -507,10 +507,10 @@ DEF_HELPER_5(xvnmaddsp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(xvnmsubsp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_4(XVMAXSP, void, env, vsr, vsr, vsr)
 DEF_HELPER_4(XVMINSP, void, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpeqsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgtsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpnesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPEQSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGTSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPNESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
 DEF_HELPER_3(xvcvspdp, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvsphp, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvhpsp, void, env, vsr, vsr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 3d31ef52f8..bcaf03f24c 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -217,6 +217,9 @@
 xt xa xb
 @XX3.. . . .  ...xt=%xx_xt 
xa=%xx_xa xb=%xx_xb
 
+_rc xt xa xb rc:bool
+@XX3_rc .. . . . rc:1 ... ...   _rc 
xt=%xx_xt xa=%xx_xa xb=%xx_xb
+
 # 32 bit GER instructions have all mask bits considered 1
 _XX3  xa xb xt pmsk xmsk ymsk
 %xx_at  23:3
@@ -923,6 +926,15 @@ XSCMPEQQP   11 . . . 0001000100 -   @X
 XSCMPGEQP   11 . . . 0011000100 -   @X
 XSCMPGTQP   11 . . . 0011100100 -   @X
 
+XVCMPEQSP   00 . . . . 111 ...   @XX3_rc
+XVCMPGTSP   00 . . . . 1001011 ...   @XX3_rc
+XVCMPGESP   00 . . . . 1010011 ...   @XX3_rc
+XVCMPNESP   00 . . . . 1011011 ...   @XX3_rc
+XVCMPEQDP   00 . . . . 1100011 ...   @XX3_rc
+XVCMPGTDP   00 . . . . 1101011 ...   @XX3_rc
+XVCMPGEDP   00 . . . . 1110011 ...   @XX3_rc
+XVCMPNEDP   00 . . . . 011 ...   @XX3_rc
+
 XSMAXDP 00 . . . 1010 ...   @XX3
 XSMINDP 00 . . . 10101000 ...   @XX3
 
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index a013160644..5a300a3c86 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -2624,14 +2624,14 @@ uint32_t helper_##op(CPUPPCState *env, ppc_vsr_t *xt,   
  \
 return crf6;  \
 }
 
-VSX_CMP(xvcmpeqdp, 2, float64, VsrD(i), eq, 0, 1)
-VSX_CMP(xvcmpgedp, 2, float64, VsrD(i), le, 1, 1)
-VSX_CMP(xvcmpgtdp, 2, float64, VsrD(i), lt, 1, 1)
-VSX_CMP(xvcmpnedp, 2, float64, VsrD(i), eq, 0, 0)
-VSX_CMP(xvcmpeqsp, 4, float32, VsrW(i), eq, 0, 1)
-VSX_CMP(xvcmpgesp, 4, float32, VsrW(i), le, 1, 1)
-VSX_CMP(xvcmpgtsp, 4, float32, VsrW(i), lt, 1, 1)
-VSX_CMP(xvcmpnesp, 4, float32, VsrW(i), eq, 0, 0)
+VSX_CMP(XVCMPEQDP, 2, float64, VsrD(i), eq, 0, 1)
+VSX_CMP(XVCMPGEDP, 2, float64, VsrD(i), le, 1, 1)
+VSX_CMP

[PATCH v3 3/4] target/ppc: Move VSX vector storage access insns to decodetree.

2024-06-18 Thread Chinmay Rath

Moving the following instructions to decodetree specification:

  lxv{b16, d2, h8, w4, ds, ws}x   : X-form
  stxv{b16, d2, h8, w4}x  : X-form

The changes were verified by validating that the tcg-ops generated for those
instructions remain the same, which were captured using the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
Reviewed-by: Richard Henderson 
---
 target/ppc/insn32.decode|  10 ++
 target/ppc/translate/vsx-impl.c.inc | 199 
 target/ppc/translate/vsx-ops.c.inc  |  12 --
 3 files changed, 97 insertions(+), 124 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 445fdb341f..3d31ef52f8 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -805,9 +805,19 @@ STXSIHX 01 . . . 1110101101 .   
@X_TSX
 STXSIWX 01 . . . 0010001100 .   @X_TSX
 STXSSPX 01 . . . 1010001100 .   @X_TSX
 
+LXVB16X 01 . . . 1101101100 .   @X_TSX
+LXVD2X  01 . . . 1101001100 .   @X_TSX
+LXVH8X  01 . . . 1100101100 .   @X_TSX
+LXVW4X  01 . . . 111100 .   @X_TSX
+LXVDSX  01 . . . 0101001100 .   @X_TSX
+LXVWSX  01 . . . 0101101100 .   @X_TSX
 LXVL01 . . . 011101 .   @X_TSX
 LXVLL   01 . . . 0100101101 .   @X_TSX
 
+STXVB16X01 . . . 101100 .   @X_TSX
+STXVD2X 01 . . . 001100 .   @X_TSX
+STXVH8X 01 . . . 1110101100 .   @X_TSX
+STXVW4X 01 . . . 1110001100 .   @X_TSX
 STXVL   01 . . . 0110001101 .   @X_TSX
 STXVLL  01 . . . 0110101101 .   @X_TSX
 
diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index 46bab49215..e0fb4bad92 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -46,41 +46,37 @@ TRANS_FLAGS2(ISA300, LXSIHZX, do_lxs, gen_qemu_ld16u_i64);
 TRANS_FLAGS2(VSX207, LXSIWZX, do_lxs, gen_qemu_ld32u_i64);
 TRANS_FLAGS2(VSX207, LXSSPX, do_lxs, gen_qemu_ld32fs);
 
-static void gen_lxvd2x(DisasContext *ctx)
+static bool trans_LXVD2X(DisasContext *ctx, arg_LXVD2X *a)
 {
 TCGv EA;
 TCGv_i64 t0;
-if (unlikely(!ctx->vsx_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VSXU);
-return;
-}
+
+REQUIRE_VSX(ctx);
+REQUIRE_INSNS_FLAGS2(ctx, VSX);
+
 t0 = tcg_temp_new_i64();
 gen_set_access_type(ctx, ACCESS_INT);
-EA = tcg_temp_new();
-gen_addr_reg_index(ctx, EA);
+EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
 gen_qemu_ld64_i64(ctx, t0, EA);
-set_cpu_vsr(xT(ctx->opcode), t0, true);
+set_cpu_vsr(a->rt, t0, true);
 tcg_gen_addi_tl(EA, EA, 8);
 gen_qemu_ld64_i64(ctx, t0, EA);
-set_cpu_vsr(xT(ctx->opcode), t0, false);
+set_cpu_vsr(a->rt, t0, false);
+return true;
 }
 
-static void gen_lxvw4x(DisasContext *ctx)
+static bool trans_LXVW4X(DisasContext *ctx, arg_LXVW4X *a)
 {
 TCGv EA;
-TCGv_i64 xth;
-TCGv_i64 xtl;
-if (unlikely(!ctx->vsx_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VSXU);
-return;
-}
+TCGv_i64 xth, xtl;
+
+REQUIRE_VSX(ctx);
+REQUIRE_INSNS_FLAGS2(ctx, VSX);
+
 xth = tcg_temp_new_i64();
 xtl = tcg_temp_new_i64();
-
 gen_set_access_type(ctx, ACCESS_INT);
-EA = tcg_temp_new();
-
-gen_addr_reg_index(ctx, EA);
+EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
 if (ctx->le_mode) {
 TCGv_i64 t0 = tcg_temp_new_i64();
 TCGv_i64 t1 = tcg_temp_new_i64();
@@ -97,55 +93,45 @@ static void gen_lxvw4x(DisasContext *ctx)
 tcg_gen_addi_tl(EA, EA, 8);
 tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ);
 }
-set_cpu_vsr(xT(ctx->opcode), xth, true);
-set_cpu_vsr(xT(ctx->opcode), xtl, false);
+set_cpu_vsr(a->rt, xth, true);
+set_cpu_vsr(a->rt, xtl, false);
+return true;
 }
 
-static void gen_lxvwsx(DisasContext *ctx)
+static bool trans_LXVWSX(DisasContext *ctx, arg_LXVWSX *a)
 {
 TCGv EA;
 TCGv_i32 data;
 
-if (xT(ctx->opcode) < 32) {
-if (unlikely(!ctx->vsx_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VSXU);
-return;
-}
+if (a->rt < 32) {
+REQUIRE_VSX(ctx);
 } else {
-if (unlikely(!ctx->altivec_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VPU);
-return;
-}
+REQUIRE_VECTOR(ctx);
 }
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 
 gen_set_access_type(ctx, ACCESS_INT);
-EA = tcg_temp_new();
-
-gen_addr_reg_index(ctx, EA);
-
+EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
 data = tcg_temp_new_i32();
 tcg_gen_qemu_ld_i32(data, EA, ctx->mem_

[PATCH v3 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.

2024-06-18 Thread Chinmay Rath

Moving the following instructions to decodetree specification :

{l, st}xvl(l)   : X-form

The changes were verified by validating that the tcg-ops generated by those
instructions remain the same, which were captured using the '-d in_asm,op' flag.

Also added a new function do_ea_calc_ra to calculate the effective address :
EA <- (RA == 0) ? 0 : GPR[RA], which is now used by the above-said insns,
and shall be used later by (p){lx, stx}vp insns.

Signed-off-by: Chinmay Rath 
---
 target/ppc/helper.h |  8 +--
 target/ppc/insn32.decode|  6 ++
 target/ppc/mem_helper.c |  8 +--
 target/ppc/translate.c  | 15 +
 target/ppc/translate/vsx-impl.c.inc | 94 -
 target/ppc/translate/vsx-ops.c.inc  |  8 ---
 6 files changed, 94 insertions(+), 45 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 3b4a0c4674..510ce76524 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -274,10 +274,10 @@ DEF_HELPER_3(stvebx, void, env, avr, tl)
 DEF_HELPER_3(stvehx, void, env, avr, tl)
 DEF_HELPER_3(stvewx, void, env, avr, tl)
 #if defined(TARGET_PPC64)
-DEF_HELPER_4(lxvl, void, env, tl, vsr, tl)
-DEF_HELPER_4(lxvll, void, env, tl, vsr, tl)
-DEF_HELPER_4(stxvl, void, env, tl, vsr, tl)
-DEF_HELPER_4(stxvll, void, env, tl, vsr, tl)
+DEF_HELPER_4(LXVL, void, env, tl, vsr, tl)
+DEF_HELPER_4(LXVLL, void, env, tl, vsr, tl)
+DEF_HELPER_4(STXVL, void, env, tl, vsr, tl)
+DEF_HELPER_4(STXVLL, void, env, tl, vsr, tl)
 #endif
 DEF_HELPER_4(vsumsws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 88753c75e1..445fdb341f 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -805,6 +805,12 @@ STXSIHX 01 . . . 1110101101 .   
@X_TSX
 STXSIWX 01 . . . 0010001100 .   @X_TSX
 STXSSPX 01 . . . 1010001100 .   @X_TSX
 
+LXVL01 . . . 011101 .   @X_TSX
+LXVLL   01 . . . 0100101101 .   @X_TSX
+
+STXVL   01 . . . 0110001101 .   @X_TSX
+STXVLL  01 . . . 0110101101 .   @X_TSX
+
 ## VSX Vector Binary Floating-Point Sign Manipulation Instructions
 
 XVABSDP 00 . 0 . 111011001 ..   @XX2
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index ea7e8443a8..dec1b25eb8 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -467,8 +467,8 @@ void helper_##name(CPUPPCState *env, target_ulong addr, 
\
 *xt = t;\
 }
 
-VSX_LXVL(lxvl, 0)
-VSX_LXVL(lxvll, 1)
+VSX_LXVL(LXVL, 0)
+VSX_LXVL(LXVLL, 1)
 #undef VSX_LXVL
 
 #define VSX_STXVL(name, lj)   \
@@ -496,8 +496,8 @@ void helper_##name(CPUPPCState *env, target_ulong addr, 
  \
 } \
 }
 
-VSX_STXVL(stxvl, 0)
-VSX_STXVL(stxvll, 1)
+VSX_STXVL(STXVL, 0)
+VSX_STXVL(STXVLL, 1)
 #undef VSX_STXVL
 #undef GET_NB
 #endif /* TARGET_PPC64 */
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 93ffec787c..f0647da551 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -3096,6 +3096,7 @@ static inline void gen_align_no_le(DisasContext *ctx)
   (ctx->opcode & 0x03FF) | POWERPC_EXCP_ALIGN_LE);
 }
 
+/* EA <- {(ra == 0) ? 0 : GPR[ra]} + displ */
 static TCGv do_ea_calc(DisasContext *ctx, int ra, TCGv displ)
 {
 TCGv ea = tcg_temp_new();
@@ -3110,6 +3111,20 @@ static TCGv do_ea_calc(DisasContext *ctx, int ra, TCGv 
displ)
 return ea;
 }
 
+/* EA <- (ra == 0) ? 0 : GPR[ra] */
+static TCGv do_ea_calc_ra(DisasContext *ctx, int ra)
+{
+TCGv EA = tcg_temp_new();
+if (!ra) {
+tcg_gen_movi_tl(EA, 0);
+} else if (NARROW_MODE(ctx)) {
+tcg_gen_ext32u_tl(EA, cpu_gpr[ra]);
+} else {
+tcg_gen_mov_tl(EA, cpu_gpr[ra]);
+}
+return EA;
+}
+
 /*** Integer load  ***/
 #define DEF_MEMOP(op) ((op) | ctx->default_tcg_memop_mask)
 #define BSWAP_MEMOP(op) ((op) | (ctx->default_tcg_memop_mask ^ MO_BSWAP))
diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index de2a26a213..46bab49215 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -232,36 +232,72 @@ static void gen_lxvb16x(DisasContext *ctx)
 set_cpu_vsr(xT(ctx->opcode), xtl, false);
 }
 
-#ifdef TARGET_PPC64
-#define VSX_VECTOR_LOAD_STORE_LENGTH(name) \
-static void gen_##name(DisasContext *ctx)  \
-{  \
-TCGv EA;

Re: [PATCH v2 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.

2024-06-18 Thread Chinmay Rath





On 6/17/24 23:27, Richard Henderson wrote:

On 6/17/24 04:51, Chinmay Rath wrote:



Hi Richard,
On 6/17/24 00:43, Richard Henderson wrote:

On 6/13/24 02:33, Chinmay Rath wrote:

+/* EA <- (ra == 0) ? 0 : GPR[ra] */
+static TCGv do_ea_calc_ra(DisasContext *ctx, int ra)
+{
+    TCGv EA;
+    if (!ra) {
+    EA = tcg_constant_tl(0);
+    return EA;
+    }
+    EA = tcg_temp_new();
+    if (NARROW_MODE(ctx)) {
+    tcg_gen_ext32u_tl(EA, cpu_gpr[ra]);
+    } else {
+    tcg_gen_mov_tl(EA, cpu_gpr[ra]);


Why are you making a copy, rather than just returning cpu_gpr[ra]?
If you need to modify the resulting EA, then you also need to make a 
copy for 0.



Please ignore my previous response.
I think do_ea_calc_ra should allow modification to the resulting EA, 
hence below change appears more appropriate to me :


/* EA <- (ra == 0) ? 0 : GPR[ra] */
static TCGv do_ea_calc_ra(DisasContext *ctx, int ra)
{
 TCGv EA = tcg_temp_new();
 if (!ra) {
 tcg_gen_movi_tl(EA, 0);
 return EA;
 }
 if (NARROW_MODE(ctx)) {
 tcg_gen_ext32u_tl(EA, cpu_gpr[ra]);
 } else {
 tcg_gen_mov_tl(EA, cpu_gpr[ra]);
 }
 return EA;
}


If that's what's needed by the callers of do_ea_calc_ra, then yes.
You can drop the first return EA and use else if instead.

Sure.
I shall stick to keeping EA modifiable, (even though it is not modified 
by the callers in this patch),

to allow its proper usage by (p){lx, stx}vp insns in future.

Thanks & Regards,
Chinmay



r~

Re: [PATCH v2 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.

2024-06-18 Thread Chinmay Rath





On 6/17/24 23:15, Richard Henderson wrote:

On 6/17/24 03:40, Chinmay Rath wrote:

static TCGv do_ea_calc_ra(DisasContext *ctx, int ra)
{
 TCGv EA;
 if (!ra) {
     return tcg_constant_tl(0);
 }
 if (NARROW_MODE(ctx)) {
     EA = tcg_temp_new();
     tcg_gen_ext32u_tl(EA, cpu_gpr[ra]);
 } else {
     return cpu_gpr[ra];
 }
 return EA;
}

If you need to modify the resulting EA, then you also need to make a 
copy for 0.



Hey, didn't properly get what you meant here.
Did you mean : Since I'm using a tcg_constant for 0, if the EA is to 
be modified later, this constant would be an issue, in which case, I 
should make a copy for it ??


Yes.


Considering that, there are no tcg level modifications with this EA.


Ok, good.


However, the underlying helper method, which considers this EA as a 
target_ulong type does modify it, which I don't think should be an 
issue.


Correct, that's fine.

Awesome ! Thanks for the clarification.

Regards,
Chinmay



r~

Re: [PATCH v2 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.

2024-06-17 Thread Chinmay Rath





Hi Richard,
On 6/17/24 00:43, Richard Henderson wrote:

On 6/13/24 02:33, Chinmay Rath wrote:

+/* EA <- (ra == 0) ? 0 : GPR[ra] */
+static TCGv do_ea_calc_ra(DisasContext *ctx, int ra)
+{
+    TCGv EA;
+    if (!ra) {
+    EA = tcg_constant_tl(0);
+    return EA;
+    }
+    EA = tcg_temp_new();
+    if (NARROW_MODE(ctx)) {
+    tcg_gen_ext32u_tl(EA, cpu_gpr[ra]);
+    } else {
+    tcg_gen_mov_tl(EA, cpu_gpr[ra]);


Why are you making a copy, rather than just returning cpu_gpr[ra]?
If you need to modify the resulting EA, then you also need to make a 
copy for 0.



Please ignore my previous response.
I think do_ea_calc_ra should allow modification to the resulting EA, 
hence below change appears more appropriate to me :


/* EA <- (ra == 0) ? 0 : GPR[ra] */
static TCGv do_ea_calc_ra(DisasContext *ctx, int ra)
{
    TCGv EA = tcg_temp_new();
    if (!ra) {
    tcg_gen_movi_tl(EA, 0);
    return EA;
    }
    if (NARROW_MODE(ctx)) {
    tcg_gen_ext32u_tl(EA, cpu_gpr[ra]);
    } else {
    tcg_gen_mov_tl(EA, cpu_gpr[ra]);
    }
    return EA;
}

Let me know your thoughts.

Thanks & Regards,
Chinmay


r~

Re: [PATCH v2 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.

2024-06-17 Thread Chinmay Rath


Hi Richard,

On 6/17/24 00:43, Richard Henderson wrote:

On 6/13/24 02:33, Chinmay Rath wrote:

+/* EA <- (ra == 0) ? 0 : GPR[ra] */
+static TCGv do_ea_calc_ra(DisasContext *ctx, int ra)
+{
+    TCGv EA;
+    if (!ra) {
+    EA = tcg_constant_tl(0);
+    return EA;
+    }
+    EA = tcg_temp_new();
+    if (NARROW_MODE(ctx)) {
+    tcg_gen_ext32u_tl(EA, cpu_gpr[ra]);
+    } else {
+    tcg_gen_mov_tl(EA, cpu_gpr[ra]);


Why are you making a copy, rather than just returning cpu_gpr[ra]?
True, this tcg move is redundant. Was carried away to maintain 
uniformity with the original do_ea_calc function. My bad!


This can rather just be :
/* ea <- (ra == 0) ? 0 : GPR[ra] */
static TCGv do_ea_calc_ra(DisasContext *ctx, int ra)
{
    TCGv EA;
    if (!ra) {
        return tcg_constant_tl(0);
    }
    if (NARROW_MODE(ctx)) {
        EA = tcg_temp_new();
        tcg_gen_ext32u_tl(EA, cpu_gpr[ra]);
    } else {
        return cpu_gpr[ra];
    }
    return EA;
}

If you need to modify the resulting EA, then you also need to make a 
copy for 0.



Hey, didn't properly get what you meant here.
Did you mean : Since I'm using a tcg_constant for 0, if the EA is to be 
modified later, this constant would be an issue, in which case, I should 
make a copy for it ??


Considering that, there are no tcg level modifications with this EA. 
However, the underlying helper method, which considers this EA as a 
target_ulong type does modify it, which I don't think should be an issue.


Please let me know if I missed something.

Thanks & Regards,
Chinmay

r~

[PATCH v2 4/4] target/ppc: Move VSX fp compare insns to decodetree.

2024-06-13 Thread Chinmay Rath

Moving the following instructions to decodetree specification:

xvcmp{eq, gt, ge, ne}{s, d}p: XX3-form

The changes were verified by validating that the tcg-ops generated for those
instructions remain the same which were captured using the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
Reviewed-by: Richard Henderson 
---
 target/ppc/helper.h | 16 +-
 target/ppc/insn32.decode| 12 
 target/ppc/fpu_helper.c | 16 +-
 target/ppc/translate/vsx-impl.c.inc | 46 +
 target/ppc/translate/vsx-ops.c.inc  | 18 ---
 5 files changed, 48 insertions(+), 60 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 510ce76524..3fd849628a 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -473,10 +473,10 @@ DEF_HELPER_5(xvnmadddp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(xvnmsubdp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_4(XVMAXDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_4(XVMINDP, void, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpeqdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgtdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpnedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPEQDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGTDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPNEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
 DEF_HELPER_3(xvcvdpsp, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvdpsxds, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvdpsxws, void, env, vsr, vsr)
@@ -507,10 +507,10 @@ DEF_HELPER_5(xvnmaddsp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(xvnmsubsp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_4(XVMAXSP, void, env, vsr, vsr, vsr)
 DEF_HELPER_4(XVMINSP, void, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpeqsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgtsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpnesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPEQSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGTSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPNESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
 DEF_HELPER_3(xvcvspdp, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvsphp, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvhpsp, void, env, vsr, vsr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 3d31ef52f8..bcaf03f24c 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -217,6 +217,9 @@
 xt xa xb
 @XX3.. . . .  ...xt=%xx_xt 
xa=%xx_xa xb=%xx_xb
 
+_rc xt xa xb rc:bool
+@XX3_rc .. . . . rc:1 ... ...   _rc 
xt=%xx_xt xa=%xx_xa xb=%xx_xb
+
 # 32 bit GER instructions have all mask bits considered 1
 _XX3  xa xb xt pmsk xmsk ymsk
 %xx_at  23:3
@@ -923,6 +926,15 @@ XSCMPEQQP   11 . . . 0001000100 -   @X
 XSCMPGEQP   11 . . . 0011000100 -   @X
 XSCMPGTQP   11 . . . 0011100100 -   @X
 
+XVCMPEQSP   00 . . . . 111 ...   @XX3_rc
+XVCMPGTSP   00 . . . . 1001011 ...   @XX3_rc
+XVCMPGESP   00 . . . . 1010011 ...   @XX3_rc
+XVCMPNESP   00 . . . . 1011011 ...   @XX3_rc
+XVCMPEQDP   00 . . . . 1100011 ...   @XX3_rc
+XVCMPGTDP   00 . . . . 1101011 ...   @XX3_rc
+XVCMPGEDP   00 . . . . 1110011 ...   @XX3_rc
+XVCMPNEDP   00 . . . . 011 ...   @XX3_rc
+
 XSMAXDP 00 . . . 1010 ...   @XX3
 XSMINDP 00 . . . 10101000 ...   @XX3
 
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index a013160644..5a300a3c86 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -2624,14 +2624,14 @@ uint32_t helper_##op(CPUPPCState *env, ppc_vsr_t *xt,   
  \
 return crf6;  \
 }
 
-VSX_CMP(xvcmpeqdp, 2, float64, VsrD(i), eq, 0, 1)
-VSX_CMP(xvcmpgedp, 2, float64, VsrD(i), le, 1, 1)
-VSX_CMP(xvcmpgtdp, 2, float64, VsrD(i), lt, 1, 1)
-VSX_CMP(xvcmpnedp, 2, float64, VsrD(i), eq, 0, 0)
-VSX_CMP(xvcmpeqsp, 4, float32, VsrW(i), eq, 0, 1)
-VSX_CMP(xvcmpgesp, 4, float32, VsrW(i), le, 1, 1)
-VSX_CMP(xvcmpgtsp, 4, float32, VsrW(i), lt, 1, 1)
-VSX_CMP(xvcmpnesp, 4, float32, VsrW(i), eq, 0, 0)
+VSX_CMP(XVCMPEQDP, 2, float64, VsrD(i), eq, 0, 1)
+VSX_CMP(XVCMPGEDP, 2, float64, VsrD(i), le, 1, 1)
+VSX_CMP

[PATCH v2 0/4] Move VSX storage access and compare insns to

2024-06-13 Thread Chinmay Rath

Moving all remaining VSX storage access instructions and all VSX compare
instructions of XX3 form with RC field, to decodetree specification.

Change log : 

v2:
 - Addressed comments by Richard in v1
   - Patch 2/4 : Handled proper ea calculation in narrow mode.
 Also created a new function for ea calculation instead of inlining,
 for later use by (p){lx,stx}vp insns.
   - Patch 4/4 : Unified helper calls.
 - Retained Richard's "Reviewed-by" in patches 1, 3 and 4.

v1: 
https://lore.kernel.org/qemu-devel/20240607144921.726730-1-ra...@linux.ibm.com/ 

Chinmay Rath (4):
  target/ppc: Moving VSX scalar storage access insns to decodetree.
  target/ppc: Move VSX vector with length storage access insns to
decodetree.
  target/ppc: Move VSX vector storage access insns to decodetree.
  target/ppc: Move VSX fp compare insns to decodetree.

 target/ppc/helper.h |  24 +-
 target/ppc/insn32.decode|  41 +++
 target/ppc/fpu_helper.c |  16 +-
 target/ppc/mem_helper.c |   8 +-
 target/ppc/translate.c  |  18 ++
 target/ppc/translate/vsx-impl.c.inc | 416 ++--
 target/ppc/translate/vsx-ops.c.inc  |  49 
 7 files changed, 290 insertions(+), 282 deletions(-)

-- 
2.39.3

[PATCH v2 1/4] target/ppc: Moving VSX scalar storage access insns to decodetree.

2024-06-13 Thread Chinmay Rath

Moving the following instructions to decodetree specification :

lxs{d, iwa, ibz, ihz, iwz, sp}x : X-form
stxs{d, ib, ih, iw, sp}x: X-form

The changes were verified by validating that the tcg-ops generated by those
instructions remain the same, which were captured using the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
Reviewed-by: Richard Henderson 
---
 target/ppc/insn32.decode| 13 +
 target/ppc/translate/vsx-impl.c.inc | 79 +
 target/ppc/translate/vsx-ops.c.inc  | 11 
 3 files changed, 49 insertions(+), 54 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 30d6f9f750..88753c75e1 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -792,6 +792,19 @@ STXVRHX 01 . . . 0010101101 .   
@X_TSX
 STXVRWX 01 . . . 0011001101 .   @X_TSX
 STXVRDX 01 . . . 0011101101 .   @X_TSX
 
+LXSDX   01 . . . 1001001100 .   @X_TSX
+LXSIWAX 01 . . . 0001001100 .   @X_TSX
+LXSIBZX 01 . . . 111101 .   @X_TSX
+LXSIHZX 01 . . . 1100101101 .   @X_TSX
+LXSIWZX 01 . . . 001100 .   @X_TSX
+LXSSPX  01 . . . 101100 .   @X_TSX
+
+STXSDX  01 . . . 1011001100 .   @X_TSX
+STXSIBX 01 . . . 1110001101 .   @X_TSX
+STXSIHX 01 . . . 1110101101 .   @X_TSX
+STXSIWX 01 . . . 0010001100 .   @X_TSX
+STXSSPX 01 . . . 1010001100 .   @X_TSX
+
 ## VSX Vector Binary Floating-Point Sign Manipulation Instructions
 
 XVABSDP 00 . 0 . 111011001 ..   @XX2
diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index a769f199ce..de2a26a213 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -24,30 +24,27 @@ static inline TCGv_ptr gen_acc_ptr(int reg)
 return r;
 }
 
-#define VSX_LOAD_SCALAR(name, operation)  \
-static void gen_##name(DisasContext *ctx) \
-{ \
-TCGv EA;  \
-TCGv_i64 t0;  \
-if (unlikely(!ctx->vsx_enabled)) {\
-gen_exception(ctx, POWERPC_EXCP_VSXU);\
-return;   \
-} \
-t0 = tcg_temp_new_i64();  \
-gen_set_access_type(ctx, ACCESS_INT); \
-EA = tcg_temp_new();  \
-gen_addr_reg_index(ctx, EA);  \
-gen_qemu_##operation(ctx, t0, EA);\
-set_cpu_vsr(xT(ctx->opcode), t0, true);   \
-/* NOTE: cpu_vsrl is undefined */ \
+static bool do_lxs(DisasContext *ctx, arg_X *a,
+   void (*op)(DisasContext *, TCGv_i64, TCGv))
+{
+TCGv EA;
+TCGv_i64 t0;
+REQUIRE_VSX(ctx);
+t0 = tcg_temp_new_i64();
+gen_set_access_type(ctx, ACCESS_INT);
+EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
+op(ctx, t0, EA);
+set_cpu_vsr(a->rt, t0, true);
+/* NOTE: cpu_vsrl is undefined */
+return true;
 }
 
-VSX_LOAD_SCALAR(lxsdx, ld64_i64)
-VSX_LOAD_SCALAR(lxsiwax, ld32s_i64)
-VSX_LOAD_SCALAR(lxsibzx, ld8u_i64)
-VSX_LOAD_SCALAR(lxsihzx, ld16u_i64)
-VSX_LOAD_SCALAR(lxsiwzx, ld32u_i64)
-VSX_LOAD_SCALAR(lxsspx, ld32fs)
+TRANS_FLAGS2(VSX, LXSDX, do_lxs, gen_qemu_ld64_i64);
+TRANS_FLAGS2(VSX207, LXSIWAX, do_lxs, gen_qemu_ld32s_i64);
+TRANS_FLAGS2(ISA300, LXSIBZX, do_lxs, gen_qemu_ld8u_i64);
+TRANS_FLAGS2(ISA300, LXSIHZX, do_lxs, gen_qemu_ld16u_i64);
+TRANS_FLAGS2(VSX207, LXSIWZX, do_lxs, gen_qemu_ld32u_i64);
+TRANS_FLAGS2(VSX207, LXSSPX, do_lxs, gen_qemu_ld32fs);
 
 static void gen_lxvd2x(DisasContext *ctx)
 {
@@ -266,29 +263,25 @@ VSX_VECTOR_LOAD_STORE_LENGTH(stxvl)
 VSX_VECTOR_LOAD_STORE_LENGTH(stxvll)
 #endif
 
-#define VSX_STORE_SCALAR(name, operation) \
-static void gen_##name(DisasContext *ctx) \
-{ \
-TCGv EA;  \
-TCGv_i64 t0;  \
-if (unlikely(!ctx->vsx_enabled)) {\
-gen_exception(ctx, POWERPC_EXCP_VSXU);\
-return;   \
-} \
-t0 = tcg_temp_new_i64();  \
-gen_set_access_ty

[PATCH v2 3/4] target/ppc: Move VSX vector storage access insns to decodetree.

2024-06-13 Thread Chinmay Rath

Moving the following instructions to decodetree specification:

  lxv{b16, d2, h8, w4, ds, ws}x   : X-form
  stxv{b16, d2, h8, w4}x  : X-form

The changes were verified by validating that the tcg-ops generated for those
instructions remain the same, which were captured using the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
Reviewed-by: Richard Henderson 
---
 target/ppc/insn32.decode|  10 ++
 target/ppc/translate/vsx-impl.c.inc | 199 
 target/ppc/translate/vsx-ops.c.inc  |  12 --
 3 files changed, 97 insertions(+), 124 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 445fdb341f..3d31ef52f8 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -805,9 +805,19 @@ STXSIHX 01 . . . 1110101101 .   
@X_TSX
 STXSIWX 01 . . . 0010001100 .   @X_TSX
 STXSSPX 01 . . . 1010001100 .   @X_TSX
 
+LXVB16X 01 . . . 1101101100 .   @X_TSX
+LXVD2X  01 . . . 1101001100 .   @X_TSX
+LXVH8X  01 . . . 1100101100 .   @X_TSX
+LXVW4X  01 . . . 111100 .   @X_TSX
+LXVDSX  01 . . . 0101001100 .   @X_TSX
+LXVWSX  01 . . . 0101101100 .   @X_TSX
 LXVL01 . . . 011101 .   @X_TSX
 LXVLL   01 . . . 0100101101 .   @X_TSX
 
+STXVB16X01 . . . 101100 .   @X_TSX
+STXVD2X 01 . . . 001100 .   @X_TSX
+STXVH8X 01 . . . 1110101100 .   @X_TSX
+STXVW4X 01 . . . 1110001100 .   @X_TSX
 STXVL   01 . . . 0110001101 .   @X_TSX
 STXVLL  01 . . . 0110101101 .   @X_TSX
 
diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index 46bab49215..e0fb4bad92 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -46,41 +46,37 @@ TRANS_FLAGS2(ISA300, LXSIHZX, do_lxs, gen_qemu_ld16u_i64);
 TRANS_FLAGS2(VSX207, LXSIWZX, do_lxs, gen_qemu_ld32u_i64);
 TRANS_FLAGS2(VSX207, LXSSPX, do_lxs, gen_qemu_ld32fs);
 
-static void gen_lxvd2x(DisasContext *ctx)
+static bool trans_LXVD2X(DisasContext *ctx, arg_LXVD2X *a)
 {
 TCGv EA;
 TCGv_i64 t0;
-if (unlikely(!ctx->vsx_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VSXU);
-return;
-}
+
+REQUIRE_VSX(ctx);
+REQUIRE_INSNS_FLAGS2(ctx, VSX);
+
 t0 = tcg_temp_new_i64();
 gen_set_access_type(ctx, ACCESS_INT);
-EA = tcg_temp_new();
-gen_addr_reg_index(ctx, EA);
+EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
 gen_qemu_ld64_i64(ctx, t0, EA);
-set_cpu_vsr(xT(ctx->opcode), t0, true);
+set_cpu_vsr(a->rt, t0, true);
 tcg_gen_addi_tl(EA, EA, 8);
 gen_qemu_ld64_i64(ctx, t0, EA);
-set_cpu_vsr(xT(ctx->opcode), t0, false);
+set_cpu_vsr(a->rt, t0, false);
+return true;
 }
 
-static void gen_lxvw4x(DisasContext *ctx)
+static bool trans_LXVW4X(DisasContext *ctx, arg_LXVW4X *a)
 {
 TCGv EA;
-TCGv_i64 xth;
-TCGv_i64 xtl;
-if (unlikely(!ctx->vsx_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VSXU);
-return;
-}
+TCGv_i64 xth, xtl;
+
+REQUIRE_VSX(ctx);
+REQUIRE_INSNS_FLAGS2(ctx, VSX);
+
 xth = tcg_temp_new_i64();
 xtl = tcg_temp_new_i64();
-
 gen_set_access_type(ctx, ACCESS_INT);
-EA = tcg_temp_new();
-
-gen_addr_reg_index(ctx, EA);
+EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
 if (ctx->le_mode) {
 TCGv_i64 t0 = tcg_temp_new_i64();
 TCGv_i64 t1 = tcg_temp_new_i64();
@@ -97,55 +93,45 @@ static void gen_lxvw4x(DisasContext *ctx)
 tcg_gen_addi_tl(EA, EA, 8);
 tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ);
 }
-set_cpu_vsr(xT(ctx->opcode), xth, true);
-set_cpu_vsr(xT(ctx->opcode), xtl, false);
+set_cpu_vsr(a->rt, xth, true);
+set_cpu_vsr(a->rt, xtl, false);
+return true;
 }
 
-static void gen_lxvwsx(DisasContext *ctx)
+static bool trans_LXVWSX(DisasContext *ctx, arg_LXVWSX *a)
 {
 TCGv EA;
 TCGv_i32 data;
 
-if (xT(ctx->opcode) < 32) {
-if (unlikely(!ctx->vsx_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VSXU);
-return;
-}
+if (a->rt < 32) {
+REQUIRE_VSX(ctx);
 } else {
-if (unlikely(!ctx->altivec_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VPU);
-return;
-}
+REQUIRE_VECTOR(ctx);
 }
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 
 gen_set_access_type(ctx, ACCESS_INT);
-EA = tcg_temp_new();
-
-gen_addr_reg_index(ctx, EA);
-
+EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
 data = tcg_temp_new_i32();
 tcg_gen_qemu_ld_i32(data, EA, ctx->mem_

[PATCH v2 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.

2024-06-13 Thread Chinmay Rath

Moving the following instructions to decodetree specification :

{l, st}xvl(l)   : X-form

The changes were verified by validating that the tcg-ops generated by those
instructions remain the same, which were captured using the '-d in_asm,op' flag.

Also added a new function to calculate the effective address :
EA <- (RA == 0) ? 0 : GPR[RA], which is now used by the above-said insns,
and shall be used later by (p){lx, stx}vp insns.

Signed-off-by: Chinmay Rath 
---
 target/ppc/helper.h |  8 +--
 target/ppc/insn32.decode|  6 ++
 target/ppc/mem_helper.c |  8 +--
 target/ppc/translate.c  | 18 ++
 target/ppc/translate/vsx-impl.c.inc | 94 -
 target/ppc/translate/vsx-ops.c.inc  |  8 ---
 6 files changed, 97 insertions(+), 45 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 3b4a0c4674..510ce76524 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -274,10 +274,10 @@ DEF_HELPER_3(stvebx, void, env, avr, tl)
 DEF_HELPER_3(stvehx, void, env, avr, tl)
 DEF_HELPER_3(stvewx, void, env, avr, tl)
 #if defined(TARGET_PPC64)
-DEF_HELPER_4(lxvl, void, env, tl, vsr, tl)
-DEF_HELPER_4(lxvll, void, env, tl, vsr, tl)
-DEF_HELPER_4(stxvl, void, env, tl, vsr, tl)
-DEF_HELPER_4(stxvll, void, env, tl, vsr, tl)
+DEF_HELPER_4(LXVL, void, env, tl, vsr, tl)
+DEF_HELPER_4(LXVLL, void, env, tl, vsr, tl)
+DEF_HELPER_4(STXVL, void, env, tl, vsr, tl)
+DEF_HELPER_4(STXVLL, void, env, tl, vsr, tl)
 #endif
 DEF_HELPER_4(vsumsws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 88753c75e1..445fdb341f 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -805,6 +805,12 @@ STXSIHX 01 . . . 1110101101 .   
@X_TSX
 STXSIWX 01 . . . 0010001100 .   @X_TSX
 STXSSPX 01 . . . 1010001100 .   @X_TSX
 
+LXVL01 . . . 011101 .   @X_TSX
+LXVLL   01 . . . 0100101101 .   @X_TSX
+
+STXVL   01 . . . 0110001101 .   @X_TSX
+STXVLL  01 . . . 0110101101 .   @X_TSX
+
 ## VSX Vector Binary Floating-Point Sign Manipulation Instructions
 
 XVABSDP 00 . 0 . 111011001 ..   @XX2
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index ea7e8443a8..dec1b25eb8 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -467,8 +467,8 @@ void helper_##name(CPUPPCState *env, target_ulong addr, 
\
 *xt = t;\
 }
 
-VSX_LXVL(lxvl, 0)
-VSX_LXVL(lxvll, 1)
+VSX_LXVL(LXVL, 0)
+VSX_LXVL(LXVLL, 1)
 #undef VSX_LXVL
 
 #define VSX_STXVL(name, lj)   \
@@ -496,8 +496,8 @@ void helper_##name(CPUPPCState *env, target_ulong addr, 
  \
 } \
 }
 
-VSX_STXVL(stxvl, 0)
-VSX_STXVL(stxvll, 1)
+VSX_STXVL(STXVL, 0)
+VSX_STXVL(STXVLL, 1)
 #undef VSX_STXVL
 #undef GET_NB
 #endif /* TARGET_PPC64 */
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 93ffec787c..a1f2f4fbda 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -3096,6 +3096,7 @@ static inline void gen_align_no_le(DisasContext *ctx)
   (ctx->opcode & 0x03FF) | POWERPC_EXCP_ALIGN_LE);
 }
 
+/* EA <- {(ra == 0) ? 0 : GPR[ra]} + displ */
 static TCGv do_ea_calc(DisasContext *ctx, int ra, TCGv displ)
 {
 TCGv ea = tcg_temp_new();
@@ -3110,6 +3111,23 @@ static TCGv do_ea_calc(DisasContext *ctx, int ra, TCGv 
displ)
 return ea;
 }
 
+/* EA <- (ra == 0) ? 0 : GPR[ra] */
+static TCGv do_ea_calc_ra(DisasContext *ctx, int ra)
+{
+TCGv EA;
+if (!ra) {
+EA = tcg_constant_tl(0);
+return EA;
+}
+EA = tcg_temp_new();
+if (NARROW_MODE(ctx)) {
+tcg_gen_ext32u_tl(EA, cpu_gpr[ra]);
+} else {
+tcg_gen_mov_tl(EA, cpu_gpr[ra]);
+}
+return EA;
+}
+
 /*** Integer load  ***/
 #define DEF_MEMOP(op) ((op) | ctx->default_tcg_memop_mask)
 #define BSWAP_MEMOP(op) ((op) | (ctx->default_tcg_memop_mask ^ MO_BSWAP))
diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index de2a26a213..46bab49215 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -232,36 +232,72 @@ static void gen_lxvb16x(DisasContext *ctx)
 set_cpu_vsr(xT(ctx->opcode), xtl, false);
 }
 
-#ifdef TARGET_PPC64
-#define VSX_VECTOR_LOAD_STORE_LENGTH(name) \
-static void gen_##name(DisasContext *ctx)  \
-{  \
-TCGv EA;

Re: [PATCH 3/4] target/ppc: Move VSX vector storage access insns to decodetree.

2024-06-09 Thread Chinmay Rath




Hi Richard,

My apologies for the ill formatted reply in this patch series. Just 
realized it now. The cliched 'Tab' issue with the mail client XD.

On 6/7/24 21:16, Richard Henderson wrote:

On 6/7/24 07:49, Chinmay Rath wrote:

Moving the following instructions to decodetree specification:

   lxv{b16, d2, h8, w4, ds, ws}x   : X-form
   stxv{b16, d2, h8, w4}x  : X-form

The changes were verified by validating that the tcg-ops generated 
for those
instructions remain the same, which were captured using the '-d 
in_asm,op' flag.


Signed-off-by: Chinmay Rath 
---
  target/ppc/insn32.decode    |  10 ++
  target/ppc/translate/vsx-impl.c.inc | 199 
  target/ppc/translate/vsx-ops.c.inc  |  12 --
  3 files changed, 97 insertions(+), 124 deletions(-)


Because the ops are identical,
Reviewed-by: Richard Henderson 

But you really should update these to use tcg_gen_qemu_ld/st_i128 with 
the proper atomicity flags.  This will fix an existing bug...

^
Surely Richard, I have noted this suggestion from earlier patch and plan 
to do this, and a few others which I couldn't implement earlier, along 
with some clean-ups this week.


I refrained from doing it with the decodetree movement, to take time to 
properly understand and test. Should send out those patches soon.


Thanks & Regards,
Chinmay



+static bool trans_LXVD2X(DisasContext *ctx, arg_LXVD2X *a)
  {
  TCGv EA;
  TCGv_i64 t0;
+
+    REQUIRE_VSX(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, VSX);
+
  t0 = tcg_temp_new_i64();
  gen_set_access_type(ctx, ACCESS_INT);
+    EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
  gen_qemu_ld64_i64(ctx, t0, EA);
+    set_cpu_vsr(a->rt, t0, true);


where the vector register is partially modified ...


  tcg_gen_addi_tl(EA, EA, 8);
  gen_qemu_ld64_i64(ctx, t0, EA);


before a fault from the second load is recognized.
Similarly for stores leaving memory partially modified.


r~

Re: [PATCH 4/4] target/ppc: Move VSX fp compare insns to decodetree.

2024-06-09 Thread Chinmay Rath





On 6/7/24 21:25, Richard Henderson wrote:

On 6/7/24 07:49, Chinmay Rath wrote:

+static bool do_cmp(DisasContext *ctx, arg_XX3_rc *a,
+    void (*helper)(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, 
TCGv_ptr))

+{
+    TCGv_i32 ignored;
+    TCGv_ptr xt, xa, xb;
+    REQUIRE_VSX(ctx);
+    xt = gen_vsr_ptr(a->xt);
+    xa = gen_vsr_ptr(a->xa);
+    xb = gen_vsr_ptr(a->xb);
+    if (a->rc) {
+    helper(cpu_crf[6], tcg_env, xt, xa, xb);
+    } else {
+    ignored = tcg_temp_new_i32();
+    helper(ignored, tcg_env, xt, xa, xb);
+    }


Better to unify the helper call.  E.g.

    dest = a->rc ? cpu_crf[6] : tcg_temp_new_i32();
    helper(dest, ...)


^
Sure Richard, will do in v2.

Thanks & Regards,
Chinmay


Anyway,
Reviewed-by: Richard Henderson 


r~

Re: [PATCH 3/4] target/ppc: Move VSX vector storage access insns to decodetree.

2024-06-09 Thread Chinmay Rath




Hi Richard,

On 6/7/24 21:16, Richard Henderson wrote:

On 6/7/24 07:49, Chinmay Rath wrote:

Moving the following instructions to decodetree specification:

   lxv{b16, d2, h8, w4, ds, ws}x   : X-form
   stxv{b16, d2, h8, w4}x  : X-form

The changes were verified by validating that the tcg-ops generated 
for those
instructions remain the same, which were captured using the '-d 
in_asm,op' flag.


Signed-off-by: Chinmay Rath 
---
  target/ppc/insn32.decode    |  10 ++
  target/ppc/translate/vsx-impl.c.inc | 199 
  target/ppc/translate/vsx-ops.c.inc  |  12 --
  3 files changed, 97 insertions(+), 124 deletions(-)


Because the ops are identical,
Reviewed-by: Richard Henderson 

But you really should update these to use tcg_gen_qemu_ld/st_i128 with 
the proper atomicity flags.  This will fix an existing bug...


^ Surely Richard, I have noted this suggestion of yours from an earlier 
patch, and plan to do this change and implement a few of your other 
suggestions,
    which I couldn't implement earlier, along with some clean-ups this 
week.
    I refrained from doing it with the decodetree movement, to take 
proper time to understand and test.


    Should send out those patches soon.

    Thanks & Regards,
    Chinmay

+static bool trans_LXVD2X(DisasContext *ctx, arg_LXVD2X *a)
  {
  TCGv EA;
  TCGv_i64 t0;
+
+    REQUIRE_VSX(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, VSX);
+
  t0 = tcg_temp_new_i64();
  gen_set_access_type(ctx, ACCESS_INT);
+    EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
  gen_qemu_ld64_i64(ctx, t0, EA);
+    set_cpu_vsr(a->rt, t0, true);


where the vector register is partially modified ...


  tcg_gen_addi_tl(EA, EA, 8);
  gen_qemu_ld64_i64(ctx, t0, EA);


before a fault from the second load is recognized.
Similarly for stores leaving memory partially modified.


r~

Re: [PATCH 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.

2024-06-09 Thread Chinmay Rath




Hi Richard,

On 6/7/24 21:11, Richard Henderson wrote:

On 6/7/24 07:49, Chinmay Rath wrote:

+static bool do_ld_st_vl(DisasContext *ctx, arg_X *a,
+    void (*helper)(TCGv_ptr, TCGv, TCGv_ptr, TCGv))
+{
+    TCGv EA;
+    TCGv_ptr xt;
+    if (a->rt < 32) {
+    REQUIRE_VSX(ctx);
+    } else {
+    REQUIRE_VECTOR(ctx);
+    }
+    xt = gen_vsr_ptr(a->rt);
+    gen_set_access_type(ctx, ACCESS_INT);
+
+    if (a->ra) {
+    EA = tcg_temp_new();
+    tcg_gen_mov_tl(EA, cpu_gpr[a->ra]);
+    } else {
+    EA = tcg_constant_tl(0);
+    }
+    if (NARROW_MODE(ctx)) {
+    tcg_gen_ext32u_tl(EA, EA);


ra == 0, narrow mode, will crash, due to write into constant 0.
Obviously 0 does not need extending, so this could be

    if (!a->ra) {
    ea = constant 0;
    } else if (narrow mode) {
    ea = tcg_temp_new();
    tcg_gen_ext32u_tl(ea, cpu_gpr[a->ra]);
    } else {
    ra = cpu_gpr[a->ra];
    }


^ Thank you Richard, will take care in v2.


Aren't there existing helper functions for computing this address?
And if not, better to create one.

^
The calculation of effective address in these instructions is slightly 
different than the others,

for which helper function exist :

EA for these insns : EA ← (RA=0) ? 0 : GPR[RA]
EA for rest storage access insns : EA ← ((RA=0) ? 0 : GPR[RA]) + GPR[RB]

This is why I could not reuse that function. Also, this calculation of 
EA is limited to these
4 insns above, and only 2 others (prefixed insns), which is why I did 
not create a new function

for this, considering it won't be reused for any other insn.

Please let me know if I should create a new function in this case as well.

Thanks and Regards,
Chinmay



r~

[PATCH 0/4] target/ppc: Move VSX storage access and compare

2024-06-07 Thread Chinmay Rath

Moving all remaining VSX storage access instructions and all VSX compare
instructions of XX3 form with RC field, to decodetree specification.

Chinmay Rath (4):
  target/ppc: Moving VSX scalar storage access insns to decodetree.
  target/ppc: Move VSX vector with length storage access insns to
decodetree.
  target/ppc: Move VSX vector storage access insns to decodetree.
  target/ppc: Move VSX fp compare insns to decodetree.

 target/ppc/helper.h |  24 +-
 target/ppc/insn32.decode|  41 +++
 target/ppc/fpu_helper.c |  16 +-
 target/ppc/mem_helper.c |   8 +-
 target/ppc/translate/vsx-impl.c.inc | 430 ++--
 target/ppc/translate/vsx-ops.c.inc  |  49 
 6 files changed, 286 insertions(+), 282 deletions(-)

-- 
2.39.3

[PATCH 3/4] target/ppc: Move VSX vector storage access insns to decodetree.

2024-06-07 Thread Chinmay Rath

Moving the following instructions to decodetree specification:

  lxv{b16, d2, h8, w4, ds, ws}x   : X-form
  stxv{b16, d2, h8, w4}x  : X-form

The changes were verified by validating that the tcg-ops generated for those
instructions remain the same, which were captured using the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/insn32.decode|  10 ++
 target/ppc/translate/vsx-impl.c.inc | 199 
 target/ppc/translate/vsx-ops.c.inc  |  12 --
 3 files changed, 97 insertions(+), 124 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 445fdb341f..3d31ef52f8 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -805,9 +805,19 @@ STXSIHX 01 . . . 1110101101 .   
@X_TSX
 STXSIWX 01 . . . 0010001100 .   @X_TSX
 STXSSPX 01 . . . 1010001100 .   @X_TSX
 
+LXVB16X 01 . . . 1101101100 .   @X_TSX
+LXVD2X  01 . . . 1101001100 .   @X_TSX
+LXVH8X  01 . . . 1100101100 .   @X_TSX
+LXVW4X  01 . . . 111100 .   @X_TSX
+LXVDSX  01 . . . 0101001100 .   @X_TSX
+LXVWSX  01 . . . 0101101100 .   @X_TSX
 LXVL01 . . . 011101 .   @X_TSX
 LXVLL   01 . . . 0100101101 .   @X_TSX
 
+STXVB16X01 . . . 101100 .   @X_TSX
+STXVD2X 01 . . . 001100 .   @X_TSX
+STXVH8X 01 . . . 1110101100 .   @X_TSX
+STXVW4X 01 . . . 1110001100 .   @X_TSX
 STXVL   01 . . . 0110001101 .   @X_TSX
 STXVLL  01 . . . 0110101101 .   @X_TSX
 
diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index 695b75ded9..739b5ad915 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -46,41 +46,37 @@ TRANS_FLAGS2(ISA300, LXSIHZX, do_lxs, gen_qemu_ld16u_i64);
 TRANS_FLAGS2(VSX207, LXSIWZX, do_lxs, gen_qemu_ld32u_i64);
 TRANS_FLAGS2(VSX207, LXSSPX, do_lxs, gen_qemu_ld32fs);
 
-static void gen_lxvd2x(DisasContext *ctx)
+static bool trans_LXVD2X(DisasContext *ctx, arg_LXVD2X *a)
 {
 TCGv EA;
 TCGv_i64 t0;
-if (unlikely(!ctx->vsx_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VSXU);
-return;
-}
+
+REQUIRE_VSX(ctx);
+REQUIRE_INSNS_FLAGS2(ctx, VSX);
+
 t0 = tcg_temp_new_i64();
 gen_set_access_type(ctx, ACCESS_INT);
-EA = tcg_temp_new();
-gen_addr_reg_index(ctx, EA);
+EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
 gen_qemu_ld64_i64(ctx, t0, EA);
-set_cpu_vsr(xT(ctx->opcode), t0, true);
+set_cpu_vsr(a->rt, t0, true);
 tcg_gen_addi_tl(EA, EA, 8);
 gen_qemu_ld64_i64(ctx, t0, EA);
-set_cpu_vsr(xT(ctx->opcode), t0, false);
+set_cpu_vsr(a->rt, t0, false);
+return true;
 }
 
-static void gen_lxvw4x(DisasContext *ctx)
+static bool trans_LXVW4X(DisasContext *ctx, arg_LXVW4X *a)
 {
 TCGv EA;
-TCGv_i64 xth;
-TCGv_i64 xtl;
-if (unlikely(!ctx->vsx_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VSXU);
-return;
-}
+TCGv_i64 xth, xtl;
+
+REQUIRE_VSX(ctx);
+REQUIRE_INSNS_FLAGS2(ctx, VSX);
+
 xth = tcg_temp_new_i64();
 xtl = tcg_temp_new_i64();
-
 gen_set_access_type(ctx, ACCESS_INT);
-EA = tcg_temp_new();
-
-gen_addr_reg_index(ctx, EA);
+EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
 if (ctx->le_mode) {
 TCGv_i64 t0 = tcg_temp_new_i64();
 TCGv_i64 t1 = tcg_temp_new_i64();
@@ -97,55 +93,45 @@ static void gen_lxvw4x(DisasContext *ctx)
 tcg_gen_addi_tl(EA, EA, 8);
 tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEUQ);
 }
-set_cpu_vsr(xT(ctx->opcode), xth, true);
-set_cpu_vsr(xT(ctx->opcode), xtl, false);
+set_cpu_vsr(a->rt, xth, true);
+set_cpu_vsr(a->rt, xtl, false);
+return true;
 }
 
-static void gen_lxvwsx(DisasContext *ctx)
+static bool trans_LXVWSX(DisasContext *ctx, arg_LXVWSX *a)
 {
 TCGv EA;
 TCGv_i32 data;
 
-if (xT(ctx->opcode) < 32) {
-if (unlikely(!ctx->vsx_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VSXU);
-return;
-}
+if (a->rt < 32) {
+REQUIRE_VSX(ctx);
 } else {
-if (unlikely(!ctx->altivec_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VPU);
-return;
-}
+REQUIRE_VECTOR(ctx);
 }
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 
 gen_set_access_type(ctx, ACCESS_INT);
-EA = tcg_temp_new();
-
-gen_addr_reg_index(ctx, EA);
-
+EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
 data = tcg_temp_new_i32();
 tcg_gen_qemu_ld_i32(data, EA, ctx->mem_idx, DEF_MEMOP(MO_UL));
-

[PATCH 1/4] target/ppc: Moving VSX scalar storage access insns to decodetree.

2024-06-07 Thread Chinmay Rath

Moving the following instructions to decodetree specification :

lxs{d, iwa, ibz, ihz, iwz, sp}x : X-form
stxs{d, ib, ih, iw, sp}x: X-form

The changes were verified by validating that the tcg-ops generated by those
instructions remain the same, which were captured using the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/insn32.decode| 13 +
 target/ppc/translate/vsx-impl.c.inc | 79 +
 target/ppc/translate/vsx-ops.c.inc  | 11 
 3 files changed, 49 insertions(+), 54 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 30d6f9f750..88753c75e1 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -792,6 +792,19 @@ STXVRHX 01 . . . 0010101101 .   
@X_TSX
 STXVRWX 01 . . . 0011001101 .   @X_TSX
 STXVRDX 01 . . . 0011101101 .   @X_TSX
 
+LXSDX   01 . . . 1001001100 .   @X_TSX
+LXSIWAX 01 . . . 0001001100 .   @X_TSX
+LXSIBZX 01 . . . 111101 .   @X_TSX
+LXSIHZX 01 . . . 1100101101 .   @X_TSX
+LXSIWZX 01 . . . 001100 .   @X_TSX
+LXSSPX  01 . . . 101100 .   @X_TSX
+
+STXSDX  01 . . . 1011001100 .   @X_TSX
+STXSIBX 01 . . . 1110001101 .   @X_TSX
+STXSIHX 01 . . . 1110101101 .   @X_TSX
+STXSIWX 01 . . . 0010001100 .   @X_TSX
+STXSSPX 01 . . . 1010001100 .   @X_TSX
+
 ## VSX Vector Binary Floating-Point Sign Manipulation Instructions
 
 XVABSDP 00 . 0 . 111011001 ..   @XX2
diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index a769f199ce..de2a26a213 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -24,30 +24,27 @@ static inline TCGv_ptr gen_acc_ptr(int reg)
 return r;
 }
 
-#define VSX_LOAD_SCALAR(name, operation)  \
-static void gen_##name(DisasContext *ctx) \
-{ \
-TCGv EA;  \
-TCGv_i64 t0;  \
-if (unlikely(!ctx->vsx_enabled)) {\
-gen_exception(ctx, POWERPC_EXCP_VSXU);\
-return;   \
-} \
-t0 = tcg_temp_new_i64();  \
-gen_set_access_type(ctx, ACCESS_INT); \
-EA = tcg_temp_new();  \
-gen_addr_reg_index(ctx, EA);  \
-gen_qemu_##operation(ctx, t0, EA);\
-set_cpu_vsr(xT(ctx->opcode), t0, true);   \
-/* NOTE: cpu_vsrl is undefined */ \
+static bool do_lxs(DisasContext *ctx, arg_X *a,
+   void (*op)(DisasContext *, TCGv_i64, TCGv))
+{
+TCGv EA;
+TCGv_i64 t0;
+REQUIRE_VSX(ctx);
+t0 = tcg_temp_new_i64();
+gen_set_access_type(ctx, ACCESS_INT);
+EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]);
+op(ctx, t0, EA);
+set_cpu_vsr(a->rt, t0, true);
+/* NOTE: cpu_vsrl is undefined */
+return true;
 }
 
-VSX_LOAD_SCALAR(lxsdx, ld64_i64)
-VSX_LOAD_SCALAR(lxsiwax, ld32s_i64)
-VSX_LOAD_SCALAR(lxsibzx, ld8u_i64)
-VSX_LOAD_SCALAR(lxsihzx, ld16u_i64)
-VSX_LOAD_SCALAR(lxsiwzx, ld32u_i64)
-VSX_LOAD_SCALAR(lxsspx, ld32fs)
+TRANS_FLAGS2(VSX, LXSDX, do_lxs, gen_qemu_ld64_i64);
+TRANS_FLAGS2(VSX207, LXSIWAX, do_lxs, gen_qemu_ld32s_i64);
+TRANS_FLAGS2(ISA300, LXSIBZX, do_lxs, gen_qemu_ld8u_i64);
+TRANS_FLAGS2(ISA300, LXSIHZX, do_lxs, gen_qemu_ld16u_i64);
+TRANS_FLAGS2(VSX207, LXSIWZX, do_lxs, gen_qemu_ld32u_i64);
+TRANS_FLAGS2(VSX207, LXSSPX, do_lxs, gen_qemu_ld32fs);
 
 static void gen_lxvd2x(DisasContext *ctx)
 {
@@ -266,29 +263,25 @@ VSX_VECTOR_LOAD_STORE_LENGTH(stxvl)
 VSX_VECTOR_LOAD_STORE_LENGTH(stxvll)
 #endif
 
-#define VSX_STORE_SCALAR(name, operation) \
-static void gen_##name(DisasContext *ctx) \
-{ \
-TCGv EA;  \
-TCGv_i64 t0;  \
-if (unlikely(!ctx->vsx_enabled)) {\
-gen_exception(ctx, POWERPC_EXCP_VSXU);\
-return;   \
-} \
-t0 = tcg_temp_new_i64();  \
-gen_set_access_type(ctx, ACCESS_INT);

[PATCH 4/4] target/ppc: Move VSX fp compare insns to decodetree.

2024-06-07 Thread Chinmay Rath

Moving the following instructions to decodetree specification:

xvcmp{eq, gt, ge, ne}{s, d}p: XX3-form

The changes were verified by validating that the tcg-ops generated for those
instructions remain the same which were captured using the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/helper.h | 16 -
 target/ppc/insn32.decode| 12 +++
 target/ppc/fpu_helper.c | 16 -
 target/ppc/translate/vsx-impl.c.inc | 50 ++---
 target/ppc/translate/vsx-ops.c.inc  | 18 ---
 5 files changed, 52 insertions(+), 60 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 510ce76524..3fd849628a 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -473,10 +473,10 @@ DEF_HELPER_5(xvnmadddp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(xvnmsubdp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_4(XVMAXDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_4(XVMINDP, void, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpeqdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgtdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpnedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPEQDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGTDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPNEDP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
 DEF_HELPER_3(xvcvdpsp, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvdpsxds, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvdpsxws, void, env, vsr, vsr)
@@ -507,10 +507,10 @@ DEF_HELPER_5(xvnmaddsp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(xvnmsubsp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_4(XVMAXSP, void, env, vsr, vsr, vsr)
 DEF_HELPER_4(XVMINSP, void, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpeqsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpgtsp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
-DEF_HELPER_FLAGS_4(xvcmpnesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPEQSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPGTSP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
+DEF_HELPER_FLAGS_4(XVCMPNESP, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
 DEF_HELPER_3(xvcvspdp, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvsphp, void, env, vsr, vsr)
 DEF_HELPER_3(xvcvhpsp, void, env, vsr, vsr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 3d31ef52f8..bcaf03f24c 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -217,6 +217,9 @@
 xt xa xb
 @XX3.. . . .  ...xt=%xx_xt 
xa=%xx_xa xb=%xx_xb
 
+_rc xt xa xb rc:bool
+@XX3_rc .. . . . rc:1 ... ...   _rc 
xt=%xx_xt xa=%xx_xa xb=%xx_xb
+
 # 32 bit GER instructions have all mask bits considered 1
 _XX3  xa xb xt pmsk xmsk ymsk
 %xx_at  23:3
@@ -923,6 +926,15 @@ XSCMPEQQP   11 . . . 0001000100 -   @X
 XSCMPGEQP   11 . . . 0011000100 -   @X
 XSCMPGTQP   11 . . . 0011100100 -   @X
 
+XVCMPEQSP   00 . . . . 111 ...   @XX3_rc
+XVCMPGTSP   00 . . . . 1001011 ...   @XX3_rc
+XVCMPGESP   00 . . . . 1010011 ...   @XX3_rc
+XVCMPNESP   00 . . . . 1011011 ...   @XX3_rc
+XVCMPEQDP   00 . . . . 1100011 ...   @XX3_rc
+XVCMPGTDP   00 . . . . 1101011 ...   @XX3_rc
+XVCMPGEDP   00 . . . . 1110011 ...   @XX3_rc
+XVCMPNEDP   00 . . . . 011 ...   @XX3_rc
+
 XSMAXDP 00 . . . 1010 ...   @XX3
 XSMINDP 00 . . . 10101000 ...   @XX3
 
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index a013160644..5a300a3c86 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -2624,14 +2624,14 @@ uint32_t helper_##op(CPUPPCState *env, ppc_vsr_t *xt,   
  \
 return crf6;  \
 }
 
-VSX_CMP(xvcmpeqdp, 2, float64, VsrD(i), eq, 0, 1)
-VSX_CMP(xvcmpgedp, 2, float64, VsrD(i), le, 1, 1)
-VSX_CMP(xvcmpgtdp, 2, float64, VsrD(i), lt, 1, 1)
-VSX_CMP(xvcmpnedp, 2, float64, VsrD(i), eq, 0, 0)
-VSX_CMP(xvcmpeqsp, 4, float32, VsrW(i), eq, 0, 1)
-VSX_CMP(xvcmpgesp, 4, float32, VsrW(i), le, 1, 1)
-VSX_CMP(xvcmpgtsp, 4, float32, VsrW(i), lt, 1, 1)
-VSX_CMP(xvcmpnesp, 4, float32, VsrW(i), eq, 0, 0)
+VSX_CMP(XVCMPEQDP, 2, float64, VsrD(i), eq, 0, 1)
+VSX_CMP(XVCMPGEDP, 2, float64, VsrD(i), le, 1, 1)
+VSX_CMP(XVCMPGTDP, 2, float64, VsrD(i), lt, 1, 1

[PATCH 2/4] target/ppc: Move VSX vector with length storage access insns to decodetree.

2024-06-07 Thread Chinmay Rath

Moving the following instructions to decodetree specification :

{l, st}xvl(l)   : X-form

The changes were verified by validating that the tcg-ops generated by those
instructions remain the same, which were captured using the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/helper.h |   8 +--
 target/ppc/insn32.decode|   6 ++
 target/ppc/mem_helper.c |   8 +--
 target/ppc/translate/vsx-impl.c.inc | 104 
 target/ppc/translate/vsx-ops.c.inc  |   8 ---
 5 files changed, 89 insertions(+), 45 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 3b4a0c4674..510ce76524 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -274,10 +274,10 @@ DEF_HELPER_3(stvebx, void, env, avr, tl)
 DEF_HELPER_3(stvehx, void, env, avr, tl)
 DEF_HELPER_3(stvewx, void, env, avr, tl)
 #if defined(TARGET_PPC64)
-DEF_HELPER_4(lxvl, void, env, tl, vsr, tl)
-DEF_HELPER_4(lxvll, void, env, tl, vsr, tl)
-DEF_HELPER_4(stxvl, void, env, tl, vsr, tl)
-DEF_HELPER_4(stxvll, void, env, tl, vsr, tl)
+DEF_HELPER_4(LXVL, void, env, tl, vsr, tl)
+DEF_HELPER_4(LXVLL, void, env, tl, vsr, tl)
+DEF_HELPER_4(STXVL, void, env, tl, vsr, tl)
+DEF_HELPER_4(STXVLL, void, env, tl, vsr, tl)
 #endif
 DEF_HELPER_4(vsumsws, void, env, avr, avr, avr)
 DEF_HELPER_4(vsum2sws, void, env, avr, avr, avr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 88753c75e1..445fdb341f 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -805,6 +805,12 @@ STXSIHX 01 . . . 1110101101 .   
@X_TSX
 STXSIWX 01 . . . 0010001100 .   @X_TSX
 STXSSPX 01 . . . 1010001100 .   @X_TSX
 
+LXVL01 . . . 011101 .   @X_TSX
+LXVLL   01 . . . 0100101101 .   @X_TSX
+
+STXVL   01 . . . 0110001101 .   @X_TSX
+STXVLL  01 . . . 0110101101 .   @X_TSX
+
 ## VSX Vector Binary Floating-Point Sign Manipulation Instructions
 
 XVABSDP 00 . 0 . 111011001 ..   @XX2
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index ea7e8443a8..dec1b25eb8 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -467,8 +467,8 @@ void helper_##name(CPUPPCState *env, target_ulong addr, 
\
 *xt = t;\
 }
 
-VSX_LXVL(lxvl, 0)
-VSX_LXVL(lxvll, 1)
+VSX_LXVL(LXVL, 0)
+VSX_LXVL(LXVLL, 1)
 #undef VSX_LXVL
 
 #define VSX_STXVL(name, lj)   \
@@ -496,8 +496,8 @@ void helper_##name(CPUPPCState *env, target_ulong addr, 
  \
 } \
 }
 
-VSX_STXVL(stxvl, 0)
-VSX_STXVL(stxvll, 1)
+VSX_STXVL(STXVL, 0)
+VSX_STXVL(STXVLL, 1)
 #undef VSX_STXVL
 #undef GET_NB
 #endif /* TARGET_PPC64 */
diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index de2a26a213..695b75ded9 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -232,36 +232,82 @@ static void gen_lxvb16x(DisasContext *ctx)
 set_cpu_vsr(xT(ctx->opcode), xtl, false);
 }
 
-#ifdef TARGET_PPC64
-#define VSX_VECTOR_LOAD_STORE_LENGTH(name) \
-static void gen_##name(DisasContext *ctx)  \
-{  \
-TCGv EA;   \
-TCGv_ptr xt;   \
-   \
-if (xT(ctx->opcode) < 32) {\
-if (unlikely(!ctx->vsx_enabled)) { \
-gen_exception(ctx, POWERPC_EXCP_VSXU); \
-return;\
-}  \
-} else {   \
-if (unlikely(!ctx->altivec_enabled)) { \
-gen_exception(ctx, POWERPC_EXCP_VPU);  \
-return;\
-}  \
-}  \
-EA = tcg_temp_new();   \
-xt = gen_vsr_ptr(xT(ctx->opcode)); \
-gen_set_access_type(ctx, ACCESS_INT);  \
-gen_addr_register(ctx, EA);\
-gen_helper_##name(tcg_env, EA, xt, cpu_gpr[rB(ctx->opcode)]);  \
-}
-
-VSX_VECTOR_LOAD_STORE_LENGTH(lxvl)
-VSX_VECTOR_LOAD_STORE_LENGTH(lxvll)
-VSX_VECTOR_LO

[PATCH 3/3] target/ppc: Move VSX logical instructions to decodetree.

2024-05-23 Thread Chinmay Rath

Moving the following instructions to decodetree specification :

xxl{and, andc, or, orc, nor, xor, nand, eqv}: XX3-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/insn32.decode| 11 
 target/ppc/translate/vsx-impl.c.inc | 39 +
 target/ppc/translate/vsx-ops.c.inc  | 11 
 3 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 9ed8e33cc8..30d6f9f750 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -937,6 +937,17 @@ XXMFACC 01 ... -- 0 - 0010110001 -   
@X_a
 XXMTACC 01 ... -- 1 - 0010110001 -   @X_a
 XXSETACCZ   01 ... -- 00011 - 0010110001 -   @X_a
 
+## VSX Vector Logical instructions
+
+XXLAND  00 . . . 1010 ...   @XX3
+XXLANDC 00 . . . 10001010 ...   @XX3
+XXLOR   00 . . . 10010010 ...   @XX3
+XXLXOR  00 . . . 10011010 ...   @XX3
+XXLNOR  00 . . . 10100010 ...   @XX3
+XXLEQV  00 . . . 10111010 ...   @XX3
+XXLNAND 00 . . . 10110010 ...   @XX3
+XXLORC  00 . . . 10101010 ...   @XX3
+
 ## VSX GER instruction
 
 XVI4GER8111011 ... -- . . 00100011 ..-  @XX3_at xa=%xx_xa
diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index 0d16e0f02b..a769f199ce 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -1573,26 +1573,24 @@ static void gen_xxbrw(DisasContext *ctx)
 set_cpu_vsr(xT(ctx->opcode), xtl, false);
 }
 
-#define VSX_LOGICAL(name, vece, tcg_op)  \
-static void glue(gen_, name)(DisasContext *ctx)  \
-{\
-if (unlikely(!ctx->vsx_enabled)) {   \
-gen_exception(ctx, POWERPC_EXCP_VSXU);   \
-return;  \
-}\
-tcg_op(vece, vsr_full_offset(xT(ctx->opcode)),   \
-   vsr_full_offset(xA(ctx->opcode)), \
-   vsr_full_offset(xB(ctx->opcode)), 16, 16);\
-}
-
-VSX_LOGICAL(xxland, MO_64, tcg_gen_gvec_and)
-VSX_LOGICAL(xxlandc, MO_64, tcg_gen_gvec_andc)
-VSX_LOGICAL(xxlor, MO_64, tcg_gen_gvec_or)
-VSX_LOGICAL(xxlxor, MO_64, tcg_gen_gvec_xor)
-VSX_LOGICAL(xxlnor, MO_64, tcg_gen_gvec_nor)
-VSX_LOGICAL(xxleqv, MO_64, tcg_gen_gvec_eqv)
-VSX_LOGICAL(xxlnand, MO_64, tcg_gen_gvec_nand)
-VSX_LOGICAL(xxlorc, MO_64, tcg_gen_gvec_orc)
+static bool do_logical_op(DisasContext *ctx, arg_XX3 *a, unsigned vece,
+void (*helper)(unsigned, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t))
+{
+REQUIRE_VSX(ctx);
+helper(vece, vsr_full_offset(a->xt),
+vsr_full_offset(a->xa),
+vsr_full_offset(a->xb), 16, 16);
+return true;
+}
+
+TRANS_FLAGS2(VSX, XXLAND, do_logical_op, MO_64, tcg_gen_gvec_and);
+TRANS_FLAGS2(VSX, XXLANDC, do_logical_op, MO_64, tcg_gen_gvec_andc);
+TRANS_FLAGS2(VSX, XXLOR, do_logical_op, MO_64, tcg_gen_gvec_or);
+TRANS_FLAGS2(VSX, XXLXOR, do_logical_op, MO_64, tcg_gen_gvec_xor);
+TRANS_FLAGS2(VSX, XXLNOR, do_logical_op, MO_64, tcg_gen_gvec_nor);
+TRANS_FLAGS2(VSX207, XXLEQV, do_logical_op, MO_64, tcg_gen_gvec_eqv);
+TRANS_FLAGS2(VSX207, XXLNAND, do_logical_op, MO_64, tcg_gen_gvec_nand);
+TRANS_FLAGS2(VSX207, XXLORC, do_logical_op, MO_64, tcg_gen_gvec_orc);
 
 #define VSX_XXMRG(name, high)   \
 static void glue(gen_, name)(DisasContext *ctx) \
@@ -2899,4 +2897,3 @@ TRANS64(PMXVF64GERNN, do_ger, gen_helper_XVF64GERNN)
 #undef GEN_XX2IFORM
 #undef GEN_XX3_RC_FORM
 #undef GEN_XX3FORM_DM
-#undef VSX_LOGICAL
diff --git a/target/ppc/translate/vsx-ops.c.inc 
b/target/ppc/translate/vsx-ops.c.inc
index 18510d757d..3c0a70cb7c 100644
--- a/target/ppc/translate/vsx-ops.c.inc
+++ b/target/ppc/translate/vsx-ops.c.inc
@@ -263,17 +263,6 @@ GEN_XX2FORM_EO(xvcvhpsp, 0x16, 0x1D, 0x18, PPC2_ISA300),
 GEN_XX2FORM_EO(xvcvsphp, 0x16, 0x1D, 0x19, PPC2_ISA300),
 GEN_XX2FORM_EO(xxbrq, 0x16, 0x1D, 0x1F, PPC2_ISA300),
 
-#define VSX_LOGICAL(name, opc2, opc3, fl2) \
-GEN_XX3FORM(name, opc2, opc3, fl2)
-
-VSX_LOGICAL(xxland, 0x8, 0x10, PPC2_VSX),
-VSX_LOGICAL(xxlandc, 0x8, 0x11, PPC2_VSX),
-VSX_LOGICAL(xxlor, 0x8, 0x12, PPC2_VSX),
-VSX_LOGICAL(xxlxor, 0x8, 0x13, PPC2_VSX),
-VSX_LOGICAL(xxlnor, 0x8, 0x14, PPC2_VSX),
-VSX_LOGICAL(xxleqv, 0x8, 0x17, PPC2_VSX207),
-VSX_LOGICAL(xxlnand, 0x8, 0x16, PPC2_VSX207),
-VSX_LOGICAL(xxlorc

[PATCH 1/3] target/ppc: Move ISA300 flag check out of do_helper_XX3.

2024-05-23 Thread Chinmay Rath

Moving PPC2_ISA300 flag check out of do_helper_XX3 method in vmx-impl.c.inc
so that the helper can be used with other instructions as well.

Signed-off-by: Chinmay Rath 
---
 target/ppc/translate/vsx-impl.c.inc | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index 0266f09119..6025119e5b 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -2712,8 +2712,6 @@ static bool do_helper_XX3(DisasContext *ctx, arg_XX3 *a,
 void (*helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
 {
 TCGv_ptr xt, xa, xb;
-
-REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 REQUIRE_VSX(ctx);
 
 xt = gen_vsr_ptr(a->xt);
@@ -2724,13 +2722,13 @@ static bool do_helper_XX3(DisasContext *ctx, arg_XX3 *a,
 return true;
 }
 
-TRANS(XSCMPEQDP, do_helper_XX3, gen_helper_XSCMPEQDP)
-TRANS(XSCMPGEDP, do_helper_XX3, gen_helper_XSCMPGEDP)
-TRANS(XSCMPGTDP, do_helper_XX3, gen_helper_XSCMPGTDP)
-TRANS(XSMAXCDP, do_helper_XX3, gen_helper_XSMAXCDP)
-TRANS(XSMINCDP, do_helper_XX3, gen_helper_XSMINCDP)
-TRANS(XSMAXJDP, do_helper_XX3, gen_helper_XSMAXJDP)
-TRANS(XSMINJDP, do_helper_XX3, gen_helper_XSMINJDP)
+TRANS_FLAGS2(ISA300, XSCMPEQDP, do_helper_XX3, gen_helper_XSCMPEQDP)
+TRANS_FLAGS2(ISA300, XSCMPGEDP, do_helper_XX3, gen_helper_XSCMPGEDP)
+TRANS_FLAGS2(ISA300, XSCMPGTDP, do_helper_XX3, gen_helper_XSCMPGTDP)
+TRANS_FLAGS2(ISA300, XSMAXCDP, do_helper_XX3, gen_helper_XSMAXCDP)
+TRANS_FLAGS2(ISA300, XSMINCDP, do_helper_XX3, gen_helper_XSMINCDP)
+TRANS_FLAGS2(ISA300, XSMAXJDP, do_helper_XX3, gen_helper_XSMAXJDP)
+TRANS_FLAGS2(ISA300, XSMINJDP, do_helper_XX3, gen_helper_XSMINJDP)
 
 static bool do_helper_X(arg_X *a,
 void (*helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
-- 
2.39.3

[PATCH 0/3] target/ppc: Moving VSX insns to decodetree.

2024-05-23 Thread Chinmay Rath

Moving a number of  VSX arithmetic, max/min and logical instructions to
decodetree specification.
Also moving ISA300 flag check in the do_helper_XX3 methods in
vsx-impl.c.inc file; out of it, to make it usable for a larger num of
instructions.

Chinmay Rath (3):
  target/ppc: Move ISA300 flag check out of do_helper_XX3.
  target/ppc: Move VSX arithmetic and max/min insns to decodetree.
  target/ppc: Move VSX logical instructions to decodetree.

 target/ppc/helper.h |  44 +--
 target/ppc/insn32.decode|  41 ++
 target/ppc/fpu_helper.c |  44 +--
 target/ppc/translate/vsx-impl.c.inc | 116 
 target/ppc/translate/vsx-ops.c.inc  |  33 
 5 files changed, 136 insertions(+), 142 deletions(-)

-- 
2.39.3

[PATCH 2/3] target/ppc: Move VSX arithmetic and max/min insns to decodetree.

2024-05-23 Thread Chinmay Rath

Moving the following instructions to decodetree specification:

x{s, v}{add, sub, mul, div}{s, d}p  : XX3-form
xs{max, min}dp, xv{max, min}{s, d}p : XX3-form

The changes were verfied by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/helper.h | 44 ++--
 target/ppc/insn32.decode| 30 ++
 target/ppc/fpu_helper.c | 44 ++--
 target/ppc/translate/vsx-impl.c.inc | 63 +
 target/ppc/translate/vsx-ops.c.inc  | 22 --
 5 files changed, 101 insertions(+), 102 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 86f97ee1e7..3b4a0c4674 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -361,12 +361,12 @@ DEF_HELPER_FLAGS_4(bcdsr, TCG_CALL_NO_RWG, i32, avr, avr, 
avr, i32)
 DEF_HELPER_FLAGS_4(bcdtrunc, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32)
 DEF_HELPER_FLAGS_4(bcdutrunc, TCG_CALL_NO_RWG, i32, avr, avr, avr, i32)
 
-DEF_HELPER_4(xsadddp, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSADDDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_5(xsaddqp, void, env, i32, vsr, vsr, vsr)
-DEF_HELPER_4(xssubdp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xsmuldp, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSSUBDP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSMULDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_5(xsmulqp, void, env, i32, vsr, vsr, vsr)
-DEF_HELPER_4(xsdivdp, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSDIVDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_5(xsdivqp, void, env, i32, vsr, vsr, vsr)
 DEF_HELPER_3(xsredp, void, env, vsr, vsr)
 DEF_HELPER_3(xssqrtdp, void, env, vsr, vsr)
@@ -389,8 +389,8 @@ DEF_HELPER_4(xscmpodp, void, env, i32, vsr, vsr)
 DEF_HELPER_4(xscmpudp, void, env, i32, vsr, vsr)
 DEF_HELPER_4(xscmpoqp, void, env, i32, vsr, vsr)
 DEF_HELPER_4(xscmpuqp, void, env, i32, vsr, vsr)
-DEF_HELPER_4(xsmaxdp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xsmindp, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSMAXDP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSMINDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_4(XSMAXCDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_4(XSMINCDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_4(XSMAXJDP, void, env, vsr, vsr, vsr)
@@ -436,10 +436,10 @@ DEF_HELPER_4(xsrqpxp, void, env, i32, vsr, vsr)
 DEF_HELPER_4(xssqrtqp, void, env, i32, vsr, vsr)
 DEF_HELPER_5(xssubqp, void, env, i32, vsr, vsr, vsr)
 
-DEF_HELPER_4(xsaddsp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xssubsp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xsmulsp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xsdivsp, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSADDSP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSSUBSP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSMULSP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XSDIVSP, void, env, vsr, vsr, vsr)
 DEF_HELPER_3(xsresp, void, env, vsr, vsr)
 DEF_HELPER_2(xsrsp, i64, env, i64)
 DEF_HELPER_3(xssqrtsp, void, env, vsr, vsr)
@@ -458,10 +458,10 @@ DEF_HELPER_5(XSNMADDQPO, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(XSNMSUBQP, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(XSNMSUBQPO, void, env, vsr, vsr, vsr, vsr)
 
-DEF_HELPER_4(xvadddp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xvsubdp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xvmuldp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xvdivdp, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVADDDP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVSUBDP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVMULDP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVDIVDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_3(xvredp, void, env, vsr, vsr)
 DEF_HELPER_3(xvsqrtdp, void, env, vsr, vsr)
 DEF_HELPER_3(xvrsqrtedp, void, env, vsr, vsr)
@@ -471,8 +471,8 @@ DEF_HELPER_5(xvmadddp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(xvmsubdp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(xvnmadddp, void, env, vsr, vsr, vsr, vsr)
 DEF_HELPER_5(xvnmsubdp, void, env, vsr, vsr, vsr, vsr)
-DEF_HELPER_4(xvmaxdp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xvmindp, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVMAXDP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVMINDP, void, env, vsr, vsr, vsr)
 DEF_HELPER_FLAGS_4(xvcmpeqdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
 DEF_HELPER_FLAGS_4(xvcmpgedp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
 DEF_HELPER_FLAGS_4(xvcmpgtdp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr)
@@ -492,10 +492,10 @@ DEF_HELPER_3(xvrdpim, void, env, vsr, vsr)
 DEF_HELPER_3(xvrdpip, void, env, vsr, vsr)
 DEF_HELPER_3(xvrdpiz, void, env, vsr, vsr)
 
-DEF_HELPER_4(xvaddsp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xvsubsp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xvmulsp, void, env, vsr, vsr, vsr)
-DEF_HELPER_4(xvdivsp, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVADDSP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVSUBSP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVMULSP, void, env, vsr, vsr, vsr)
+DEF_HELPER_4(XVDIVSP, void, env, vsr, vsr, vsr)
 DEF_HELPER_3(xvresp, void, env, vsr, vsr)
 DEF_HELPER_3

[PATCH v2 2/2] target/ppc: Improve VMX integer add/sub saturate instructions.

2024-05-23 Thread Chinmay Rath

No need for a full comparison; xor produces non-zero bits for QC just fine.

Suggested-by: Richard Henderson 
Signed-off-by: Chinmay Rath 
---
 target/ppc/translate/vmx-impl.c.inc | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/target/ppc/translate/vmx-impl.c.inc 
b/target/ppc/translate/vmx-impl.c.inc
index fdb283c1d4..152bcde0e3 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -2876,15 +2876,15 @@ TRANS(VADDCUW, do_vx_vaddsubcuw, 1)
 /* Integer Add/Sub Saturate Instructions */
 static inline void do_vadd_vsub_sat
 (
-unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b,
+unsigned vece, TCGv_vec t, TCGv_vec qc, TCGv_vec a, TCGv_vec b,
 void (*norm_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec),
 void (*sat_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
 {
 TCGv_vec x = tcg_temp_new_vec_matching(t);
 norm_op(vece, x, a, b);
 sat_op(vece, t, a, b);
-tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
-tcg_gen_or_vec(vece, sat, sat, x);
+tcg_gen_xor_vec(vece, x, x, t);
+tcg_gen_or_vec(vece, qc, qc, x);
 }
 
 static void gen_vadd_sat_u(unsigned vece, TCGv_vec t, TCGv_vec sat,
@@ -2916,16 +2916,16 @@ static void gen_vsub_sat_s(unsigned vece, TCGv_vec t, 
TCGv_vec sat,
  * GVecGen4 struct variants.
  */
 static const TCGOpcode vecop_list_sub_u[] = {
-INDEX_op_sub_vec, INDEX_op_ussub_vec, INDEX_op_cmp_vec, 0
+INDEX_op_sub_vec, INDEX_op_ussub_vec, 0
 };
 static const TCGOpcode vecop_list_sub_s[] = {
-INDEX_op_sub_vec, INDEX_op_sssub_vec, INDEX_op_cmp_vec, 0
+INDEX_op_sub_vec, INDEX_op_sssub_vec, 0
 };
 static const TCGOpcode vecop_list_add_u[] = {
-INDEX_op_add_vec, INDEX_op_usadd_vec, INDEX_op_cmp_vec, 0
+INDEX_op_add_vec, INDEX_op_usadd_vec, 0
 };
 static const TCGOpcode vecop_list_add_s[] = {
-INDEX_op_add_vec, INDEX_op_ssadd_vec, INDEX_op_cmp_vec, 0
+INDEX_op_add_vec, INDEX_op_ssadd_vec, 0
 };
 
 static const GVecGen4 op_vsububs = {
-- 
2.39.3

[PATCH v2 0/2] target/ppc: Move VMX int add/sub saturate insns

2024-05-23 Thread Chinmay Rath

Change Log :
1. Changes addressing all review comments by Richard in v1 :
   Having seperate ops table instead of using a 3D array, using
   TRANS_FLAGS instead of sinking flag check in the helper, proper flag
   checks for the insns left behind from GEN_VXFORM_DUAL declarations.
2. Added a second patch to improve the moved insns as per suggestion by
   Richard in v1.   


v1: 
https://lore.kernel.org/qemu-devel/20240512093847.18099-1-ra...@linux.ibm.com/

Chinmay Rath (2):
  target/ppc: Move VMX integer add/sub saturate insns to decodetree.
  target/ppc: Improve VMX integer add/sub saturate instructions.

 target/ppc/helper.h |  24 +--
 target/ppc/insn32.decode|  16 ++
 target/ppc/int_helper.c |  22 +--
 target/ppc/translate/vmx-impl.c.inc | 238 
 target/ppc/translate/vmx-ops.c.inc  |  19 +--
 5 files changed, 220 insertions(+), 99 deletions(-)

-- 
2.39.3

[PATCH v2 1/2] target/ppc: Move VMX integer add/sub saturate insns to decodetree.

2024-05-23 Thread Chinmay Rath

Moving the following instructions to decodetree specification :

v{add,sub}{u,s}{b,h,w}s : VX-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/helper.h |  24 +--
 target/ppc/insn32.decode|  16 ++
 target/ppc/int_helper.c |  22 +--
 target/ppc/translate/vmx-impl.c.inc | 238 
 target/ppc/translate/vmx-ops.c.inc  |  19 +--
 5 files changed, 220 insertions(+), 99 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index f397ef459a..2963e48fdc 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -200,18 +200,18 @@ DEF_HELPER_FLAGS_3(vsro, TCG_CALL_NO_RWG, void, avr, avr, 
avr)
 DEF_HELPER_FLAGS_3(vsrv, TCG_CALL_NO_RWG, void, avr, avr, avr)
 DEF_HELPER_FLAGS_3(vslv, TCG_CALL_NO_RWG, void, avr, avr, avr)
 DEF_HELPER_FLAGS_3(VPRTYBQ, TCG_CALL_NO_RWG, void, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vaddsbs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vaddshs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vaddsws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsubsbs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsubshs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsubsws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vaddubs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vadduhs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vadduws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsububs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsubuhs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsubuws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDSBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDSHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDSWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBSBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBSHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBSWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDUBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDUHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDUWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBUBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBUHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBUWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
 DEF_HELPER_FLAGS_3(VADDUQM, TCG_CALL_NO_RWG, void, avr, avr, avr)
 DEF_HELPER_FLAGS_4(VADDECUQ, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
 DEF_HELPER_FLAGS_4(VADDEUQM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 847a2f4356..d7d77eaa99 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -697,6 +697,14 @@ VADDCUW 000100 . . . 0011000@VX
 VADDCUQ 000100 . . . 0010100@VX
 VADDUQM 000100 . . . 001@VX
 
+VADDSBS 000100 . . . 011@VX
+VADDSHS 000100 . . . 0110100@VX
+VADDSWS 000100 . . . 0111000@VX
+
+VADDUBS 000100 . . . 010@VX
+VADDUHS 000100 . . . 0100100@VX
+VADDUWS 000100 . . . 0101000@VX
+
 VADDEUQM000100 . . . . 00   @VA
 VADDECUQ000100 . . . . 01   @VA
 
@@ -704,6 +712,14 @@ VSUBCUW 000100 . . . 1011000@VX
 VSUBCUQ 000100 . . . 1010100@VX
 VSUBUQM 000100 . . . 101@VX
 
+VSUBSBS 000100 . . . 111@VX
+VSUBSHS 000100 . . . 1110100@VX
+VSUBSWS 000100 . . . 000@VX
+
+VSUBUBS 000100 . . . 110@VX
+VSUBUHS 000100 . . . 1100100@VX
+VSUBUWS 000100 . . . 1101000@VX
+
 VSUBECUQ000100 . . . . 11   @VA
 VSUBEUQM000100 . . . . 10   @VA
 
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 0a5c3e78a4..aec2d3d4ec 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -541,7 +541,7 @@ VARITHFPFMA(nmsubfp, float_muladd_negate_result | 
float_muladd_negate_c);
 }
 
 #define VARITHSAT_DO(name, op, optype, cvt, element

Re: [PATCH 1/1] target/ppc: Move VMX integer add/sub saturate insns to decodetree.

2024-05-16 Thread Chinmay Rath


Hi Richard,

On 5/12/24 17:08, Richard Henderson wrote:

On 5/12/24 11:38, Chinmay Rath wrote:
@@ -2934,6 +2870,184 @@ static bool do_vx_vaddsubcuw(DisasContext 
*ctx, arg_VX *a, int add)

  return true;
  }
  +static inline void do_vadd_vsub_sat
+(
+    unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b,
+    void (*norm_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec),
+    void (*sat_op)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
+{
+    TCGv_vec x = tcg_temp_new_vec_matching(t);
+    norm_op(vece, x, a, b);
+    sat_op(vece, t, a, b);
+    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+    tcg_gen_or_vec(vece, sat, sat, x);
+}


As a separate change, before or after, the cmp_vec may be simplified 
to xor_vec.  Which means that INDEX_op_cmp_vec need not be probed in 
the vecop_lists.  See


https://lore.kernel.org/qemu-devel/20240506010403.6204-31-richard.hender...@linaro.org/ 



which is performing the same operation on AArch64.


Noted ! Will do.



+static bool do_vx_vadd_vsub_sat(DisasContext *ctx, arg_VX *a,
+    int sign, int vece, int add)
+{
+    static const TCGOpcode vecop_list_sub_u[] = {
+    INDEX_op_sub_vec, INDEX_op_ussub_vec, INDEX_op_cmp_vec, 0
+    };
+    static const TCGOpcode vecop_list_sub_s[] = {
+    INDEX_op_sub_vec, INDEX_op_sssub_vec, INDEX_op_cmp_vec, 0
+    };
+    static const TCGOpcode vecop_list_add_u[] = {
+    INDEX_op_add_vec, INDEX_op_usadd_vec, INDEX_op_cmp_vec, 0
+    };
+    static const TCGOpcode vecop_list_add_s[] = {
+    INDEX_op_add_vec, INDEX_op_ssadd_vec, INDEX_op_cmp_vec, 0
+    };
+
+    static const GVecGen4 op[2][3][2] = {
+    {
+    {
+    {
+    .fniv = gen_vsub_sat_u,
+    .fno = gen_helper_VSUBUBS,
+    .opt_opc = vecop_list_sub_u,
+    .write_aofs = true,
+    .vece = MO_8
+    },

.
.
.

+    {
+    .fniv = gen_vadd_sat_s,
+    .fno = gen_helper_VADDSWS,
+    .opt_opc = vecop_list_add_s,
+    .write_aofs = true,
+    .vece = MO_32
+    },
+    },
+    },
+    };


While this table is not wrong, I think it is clearer to have separate 
tables, one per operation, which are then passed in to a common expander.



+
+    REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
+    REQUIRE_VECTOR(ctx);
+
+    tcg_gen_gvec_4(avr_full_offset(a->vrt), offsetof(CPUPPCState, 
vscr_sat),
+   avr_full_offset(a->vra), avr_full_offset(a->vrb), 
16, 16,

+   [sign][vece][add]);
+
+    return true;
+}
+
+TRANS(VSUBUBS, do_vx_vadd_vsub_sat, 0, MO_8, 0)


I think it is clearer to use TRANS_FLAGS than to sink the ISA check 
into the helper.  In general I seem to find the helper later gets 
reused for something else with a different ISA check.


Thus

static const TCGOpcode vecop_list_vsub_sat_u[] = {
    INDEX_op_sub_vec, INDEX_op_ussub_vec, 0
};
static const GVecGen4 op_vsububs = {
    .fno = gen_helper_VSUBUBS,
    .fniv = gen_vsub_sat_u,
    .opt_opc = vecop_list_vsub_sat_u,
    .write_aofs = true,
    .vece = MO_8
};
TRANS_FLAGS(VSUBUBS, do_vx_vadd_vsub_sat, _vsububs)

static const GVecGen4 op_vsubuhs = {
    .fno = gen_helper_VSUBUHS,
    .fniv = gen_vsub_sat_u,
    .opt_opc = vecop_list_vsub_sat_u,
    .write_aofs = true,
    .vece = MO_16
};
TRANS_FLAGS(VSUBUHS, do_vx_vadd_vsub_sat, _vsubuhs)

etc.


Will add those changes in v2.

-GEN_VXFORM_DUAL(vaddubs, vmul10uq, 0, 8, PPC_ALTIVEC, PPC_NONE),


You are correct in your cover letter that this is not right.
We should have been testing ISA300 for vmul10uq here.


Thank you very much for the clarification !

+GEN_VXFORM(vmul10euq, 0, 9),


And thus need GEN_VXFORM_300 here.


+GEN_VXFORM(vmul10euq, 0, 9),
+GEN_VXFORM(bcdcpsgn, 0, 13),
+GEN_VXFORM(bcdadd, 0, 24),
+GEN_VXFORM(bcdsub, 0, 25),

...

+GEN_VXFORM(xpnd04_2, 0, 30),


None of these are in the base ISA, so all need a flag check.



r~


Thanks & Regards,
Chinmay

Re: target/ppc: Move VMX int add/sub saturate insns to decodetree.

2024-05-16 Thread Chinmay Rath


Hi Richard,

On 5/12/24 15:59, Richard Henderson wrote:

On 5/12/24 11:38, Chinmay Rath wrote:

1. vsubsbs and bcdtrunc :

In this pair, bcdtrunc has the insn flag check PPC2_ISA300 in the
vmx-impl file, within the GEN_VXFORM_DUAL macro, which does this flag
check.
However it also has this flag check in the vmx-ops file.
Hence I have retained the same in the new entry in the vmx-ops file.
This is consistent with the behaviour in done in the following commit :
https://github.com/qemu/qemu/commit/b132be53a4ba6a0a40d5643d791822f958a36e53 


So even though the flag check is removed from the vmx-impl file, it is
retained in the vmx-ops file. All good here.

2. vadduhs and vmul10euq :

In this pair, vmul10euq has the insn flag check PPC2_ISA300 in the
vmx-impl file, check done within the GEN_VXFORM_DUAL macro.
However the same flag was NOT originally present in the
vmx-ops file, so I have NOT included in its new entry in the vmx-ops
file. I have done this, following the behaviour done in the following
commit :
https://github.com/qemu/qemu/commit/c85929b2ddf6bbad737635c9b85213007ec043af 

So this flag check for vmul10euq is excluded now. Is this not a 
problem ?

I feel that this leads to the flag check being skipped now, however this
behaviour was followed in the above mentioned commit.


This second link is for VAVG* and VABSD*.

Yes you are correct that this second case was done incorrectly. 
Thankfully the mistake was fixed in the very next commit, when VABSD* 
was converted to decodetree as well.



Thank you very much for the clarification !


r~

Regards,
Chinmay

target/ppc: Move VMX int add/sub saturate insns to decodetree.

2024-05-12 Thread Chinmay Rath

Moving the following instructions to decodetree :
  v{add,sub}{u,s}{b,h,w}s   : VX-form

However, the following instructions were paired using the
GEN_VXFORM_DUAL macros in the vmx-impl and vmx-ops files :

  vaddubs and vmul10uq
  vadduhs and vmul10euq
  vaddshs and bcdcpsgn
  vsububs and bcdadd
  vsubuhs and bcdsub
  vsubsbs and bcdtrunc
  vsubsws and xpnd04_2

Out of those 7 above mentioned pairs, I have moved the first one of
each pair and added respective entry of the 2nd one in the vmx-ops file.
However, I lack some clarity on those flag checks added for those insns
in the ops file. It would be great if someone sheds some light at this.

The issue; let's take the following example :

1. vsubsbs and bcdtrunc :

In this pair, bcdtrunc has the insn flag check PPC2_ISA300 in the
vmx-impl file, within the GEN_VXFORM_DUAL macro, which does this flag
check.
However it also has this flag check in the vmx-ops file.
Hence I have retained the same in the new entry in the vmx-ops file.
This is consistent with the behaviour in done in the following commit :
https://github.com/qemu/qemu/commit/b132be53a4ba6a0a40d5643d791822f958a36e53
So even though the flag check is removed from the vmx-impl file, it is
retained in the vmx-ops file. All good here.

2. vadduhs and vmul10euq :

In this pair, vmul10euq has the insn flag check PPC2_ISA300 in the
vmx-impl file, check done within the GEN_VXFORM_DUAL macro. 
However the same flag was NOT originally present in the
vmx-ops file, so I have NOT included in its new entry in the vmx-ops
file. I have done this, following the behaviour done in the following
commit : 
https://github.com/qemu/qemu/commit/c85929b2ddf6bbad737635c9b85213007ec043af
So this flag check for vmul10euq is excluded now. Is this not a problem ?
I feel that this leads to the flag check being skipped now, however this
behaviour was followed in the above mentioned commit.

Requesting anyone to please let me know why this behaviour was followed and
how the flag checks are retained here, or if they are really skipped,
why is it okay to skip them here ? 

Regards,
Chinmay

Chinmay Rath (1):
  target/ppc: Move VMX integer add/sub saturate insns to decodetree.

 target/ppc/helper.h |  24 +--
 target/ppc/insn32.decode|  16 ++
 target/ppc/int_helper.c |  22 +--
 target/ppc/translate/vmx-impl.c.inc | 242 
 target/ppc/translate/vmx-ops.c.inc  |  19 +--
 5 files changed, 224 insertions(+), 99 deletions(-)

-- 
2.39.3

[PATCH 1/1] target/ppc: Move VMX integer add/sub saturate insns to decodetree.

2024-05-12 Thread Chinmay Rath

Moving the following instructions to decodetree specification :

v{add,sub}{u,s}{b,h,w}s : VX-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/helper.h |  24 +--
 target/ppc/insn32.decode|  16 ++
 target/ppc/int_helper.c |  22 +--
 target/ppc/translate/vmx-impl.c.inc | 242 
 target/ppc/translate/vmx-ops.c.inc  |  19 +--
 5 files changed, 224 insertions(+), 99 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index f397ef459a..2963e48fdc 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -200,18 +200,18 @@ DEF_HELPER_FLAGS_3(vsro, TCG_CALL_NO_RWG, void, avr, avr, 
avr)
 DEF_HELPER_FLAGS_3(vsrv, TCG_CALL_NO_RWG, void, avr, avr, avr)
 DEF_HELPER_FLAGS_3(vslv, TCG_CALL_NO_RWG, void, avr, avr, avr)
 DEF_HELPER_FLAGS_3(VPRTYBQ, TCG_CALL_NO_RWG, void, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vaddsbs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vaddshs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vaddsws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsubsbs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsubshs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsubsws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vaddubs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vadduhs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vadduws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsububs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsubuhs, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_5(vsubuws, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDSBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDSHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDSWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBSBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBSHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBSWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDUBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDUHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VADDUWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBUBS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBUHS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_5(VSUBUWS, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
 DEF_HELPER_FLAGS_3(VADDUQM, TCG_CALL_NO_RWG, void, avr, avr, avr)
 DEF_HELPER_FLAGS_4(VADDECUQ, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
 DEF_HELPER_FLAGS_4(VADDEUQM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 847a2f4356..d7d77eaa99 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -697,6 +697,14 @@ VADDCUW 000100 . . . 0011000@VX
 VADDCUQ 000100 . . . 0010100@VX
 VADDUQM 000100 . . . 001@VX
 
+VADDSBS 000100 . . . 011@VX
+VADDSHS 000100 . . . 0110100@VX
+VADDSWS 000100 . . . 0111000@VX
+
+VADDUBS 000100 . . . 010@VX
+VADDUHS 000100 . . . 0100100@VX
+VADDUWS 000100 . . . 0101000@VX
+
 VADDEUQM000100 . . . . 00   @VA
 VADDECUQ000100 . . . . 01   @VA
 
@@ -704,6 +712,14 @@ VSUBCUW 000100 . . . 1011000@VX
 VSUBCUQ 000100 . . . 1010100@VX
 VSUBUQM 000100 . . . 101@VX
 
+VSUBSBS 000100 . . . 111@VX
+VSUBSHS 000100 . . . 1110100@VX
+VSUBSWS 000100 . . . 000@VX
+
+VSUBUBS 000100 . . . 110@VX
+VSUBUHS 000100 . . . 1100100@VX
+VSUBUWS 000100 . . . 1101000@VX
+
 VSUBECUQ000100 . . . . 11   @VA
 VSUBEUQM000100 . . . . 10   @VA
 
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 0a5c3e78a4..aec2d3d4ec 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -541,7 +541,7 @@ VARITHFPFMA(nmsubfp, float_muladd_negate_result | 
float_muladd_negate_c);
 }
 
 #define VARITHSAT_DO(name, op, optype, cvt, element

Re: [PATCH 2/3] target/ppc: Fix embedded memory barriers

2024-05-07 Thread Chinmay Rath





On 5/1/24 18:34, Nicholas Piggin wrote:

Memory barriers are supposed to do something on BookE systems, these
were probably just missed during MTTCG enablement, maybe no targets
support SMP. Either way, add proper BookE implementations.

Signed-off-by: Nicholas Piggin 

Reviewed-by: Chinmay Rath 

---
  target/ppc/translate/misc-impl.c.inc | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/ppc/translate/misc-impl.c.inc 
b/target/ppc/translate/misc-impl.c.inc
index f58bf8b848..9226467f81 100644
--- a/target/ppc/translate/misc-impl.c.inc
+++ b/target/ppc/translate/misc-impl.c.inc
@@ -34,8 +34,7 @@ static bool trans_SYNC(DisasContext *ctx, arg_X_sync *a)
   */
  if (!(ctx->insns_flags & PPC_MEM_SYNC)) {
  if (ctx->insns_flags & PPC_BOOKE) {
-/* msync replaces sync on 440, interpreted as nop */
-/* XXX: this also catches e200 */
+tcg_gen_mb(bar | TCG_BAR_SC);
  return true;
  }
  
@@ -80,6 +79,7 @@ static bool trans_EIEIO(DisasContext *ctx, arg_EIEIO *a)

  if (!(ctx->insns_flags & PPC_MEM_EIEIO)) {
  if ((ctx->insns_flags & PPC_BOOKE) ||
  (ctx->insns_flags2 & PPC2_BOOKE206)) {
+tcg_gen_mb(bar | TCG_BAR_SC);
  return true;
  }
  return false;

Re: [PATCH 3/3] target/ppc: Add ISA v3.1 variants of sync instruction

2024-05-07 Thread Chinmay Rath





On 5/1/24 18:34, Nicholas Piggin wrote:

POWER10 adds a new field to sync for store-store syncs, and some
new variants of the existing syncs that include persistent memory.

Implement the store-store syncs and plwsync/phwsync.

Signed-off-by: Nicholas Piggin 

Reviewed-by: Chinmay Rath 

---
  target/ppc/insn32.decode |  6 ++--
  target/ppc/translate/misc-impl.c.inc | 41 
  2 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 6b89804b15..a180380750 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -1001,7 +1001,7 @@ MSGSYNC 01 - - - 1101110110 -
  
  # Memory Barrier Instructions
  
-_sync l

-@X_sync .. ... l:2 . . .. .   _sync
-SYNC01 --- ..  - - 1001010110 -   @X_sync
+_sync l sc
+@X_sync .. .. l:3 ... sc:2 . .. .   _sync
+SYNC01 -- ... --- ..   - 1001010110 -   @X_sync
  EIEIO   01 - - - 1101010110 -
diff --git a/target/ppc/translate/misc-impl.c.inc 
b/target/ppc/translate/misc-impl.c.inc
index 9226467f81..3467b49d0d 100644
--- a/target/ppc/translate/misc-impl.c.inc
+++ b/target/ppc/translate/misc-impl.c.inc
@@ -25,6 +25,7 @@ static bool trans_SYNC(DisasContext *ctx, arg_X_sync *a)
  {
  TCGBar bar = TCG_MO_ALL;
  uint32_t l = a->l;
+uint32_t sc = a->sc;
  
  /*

   * BookE uses the msync mnemonic. This means hwsync, except in the
@@ -46,20 +47,36 @@ static bool trans_SYNC(DisasContext *ctx, arg_X_sync *a)
  gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
  }
  
-if ((l == 1) && (ctx->insns_flags2 & PPC2_MEM_LWSYNC)) {

-bar = TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST;
-}
-
  /*
- * We may need to check for a pending TLB flush.
- *
- * We do this on ptesync (l == 2) on ppc64 and any sync on ppc32.
- *
- * Additionally, this can only happen in kernel mode however so
- * check MSR_PR as well.
+ * In ISA v3.1, the L field grew one bit. Mask that out to ignore it in
+ * older processors. It also added the SC field, zero this to ignore
+ * it too.
   */
-if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) {
-gen_check_tlb_flush(ctx, true);
+if (!(ctx->insns_flags2 & PPC2_ISA310)) {
+l &= 0x3;
+sc = 0;
+}
+
+if (sc) {
+/* Store syncs [stsync, stcisync, stncisync]. These ignore L. */
+bar = TCG_MO_ST_ST;
+} else {
+if (((l == 1) && (ctx->insns_flags2 & PPC2_MEM_LWSYNC)) || (l == 5)) {
+/* lwsync, or plwsync on POWER10 and later */
+bar = TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST;
+}
+
+/*
+ * We may need to check for a pending TLB flush.
+ *
+ * We do this on ptesync (l == 2) on ppc64 and any sync on ppc32.
+ *
+ * Additionally, this can only happen in kernel mode however so
+ * check MSR_PR as well.
+ */
+if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) {
+gen_check_tlb_flush(ctx, true);
+}
  }
  
  tcg_gen_mb(bar | TCG_BAR_SC);

Re: [PATCH 1/3] target/ppc: Move sync instructions to decodetree

2024-05-07 Thread Chinmay Rath





On 5/1/24 18:34, Nicholas Piggin wrote:

This tries to faithfully reproduce the odd BookE logic.

It does change the handling of non-zero reserved bits outside the
defined fields from being illegal to being ignored, which the
architecture specifies ot help with backward compatibility of new
fields. The existing behaviour causes illegal instruction exceptions
when using new POWER10 sync variants that add new fields, after this
the instructions are accepted and are implemented as supersets of
the new behaviour, as intended.

Signed-off-by: Nicholas Piggin 

Reviewed-by: Chinmay Rath 

---
  target/ppc/insn32.decode |   7 ++
  target/ppc/translate.c   | 102 +---
  target/ppc/translate/misc-impl.c.inc | 135 +++
  3 files changed, 144 insertions(+), 100 deletions(-)
  create mode 100644 target/ppc/translate/misc-impl.c.inc

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index eada59f59f..6b89804b15 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -998,3 +998,10 @@ MSGSND  01 - - . 0011001110 -   
@X_rb
  MSGCLRP 01 - - . 0010101110 -   @X_rb
  MSGSNDP 01 - - . 0010001110 -   @X_rb
  MSGSYNC 01 - - - 1101110110 -
+
+# Memory Barrier Instructions
+
+_sync l
+@X_sync .. ... l:2 . . .. .   _sync
+SYNC01 --- ..  - - 1001010110 -   @X_sync
+EIEIO   01 - - - 1101010110 -
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 93ffec787c..bb2cabae10 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -3423,59 +3423,6 @@ static void gen_stswx(DisasContext *ctx)
  gen_helper_stsw(tcg_env, t0, t1, t2);
  }
  
-/***Memory synchronisation ***/

-/* eieio */
-static void gen_eieio(DisasContext *ctx)
-{
-TCGBar bar = TCG_MO_ALL;
-
-/*
- * eieio has complex semanitcs. It provides memory ordering between
- * operations in the set:
- * - loads from CI memory.
- * - stores to CI memory.
- * - stores to WT memory.
- *
- * It separately also orders memory for operations in the set:
- * - stores to cacheble memory.
- *
- * It also serializes instructions:
- * - dcbt and dcbst.
- *
- * It separately serializes:
- * - tlbie and tlbsync.
- *
- * And separately serializes:
- * - slbieg, slbiag, and slbsync.
- *
- * The end result is that CI memory ordering requires TCG_MO_ALL
- * and it is not possible to special-case more relaxed ordering for
- * cacheable accesses. TCG_BAR_SC is required to provide this
- * serialization.
- */
-
-/*
- * POWER9 has a eieio instruction variant using bit 6 as a hint to
- * tell the CPU it is a store-forwarding barrier.
- */
-if (ctx->opcode & 0x200) {
-/*
- * ISA says that "Reserved fields in instructions are ignored
- * by the processor". So ignore the bit 6 on non-POWER9 CPU but
- * as this is not an instruction software should be using,
- * complain to the user.
- */
-if (!(ctx->insns_flags2 & PPC2_ISA300)) {
-qemu_log_mask(LOG_GUEST_ERROR, "invalid eieio using bit 6 at @"
-  TARGET_FMT_lx "\n", ctx->cia);
-} else {
-bar = TCG_MO_ST_LD;
-}
-}
-
-tcg_gen_mb(bar | TCG_BAR_SC);
-}
-
  #if !defined(CONFIG_USER_ONLY)
  static inline void gen_check_tlb_flush(DisasContext *ctx, bool global)
  {
@@ -3877,31 +3824,6 @@ static void gen_stqcx_(DisasContext *ctx)
  }
  #endif /* defined(TARGET_PPC64) */
  
-/* sync */

-static void gen_sync(DisasContext *ctx)
-{
-TCGBar bar = TCG_MO_ALL;
-uint32_t l = (ctx->opcode >> 21) & 3;
-
-if ((l == 1) && (ctx->insns_flags2 & PPC2_MEM_LWSYNC)) {
-bar = TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST;
-}
-
-/*
- * We may need to check for a pending TLB flush.
- *
- * We do this on ptesync (l == 2) on ppc64 and any sync pn ppc32.
- *
- * Additionally, this can only happen in kernel mode however so
- * check MSR_PR as well.
- */
-if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) {
-gen_check_tlb_flush(ctx, true);
-}
-
-tcg_gen_mb(bar | TCG_BAR_SC);
-}
-
  /* wait */
  static void gen_wait(DisasContext *ctx)
  {
@@ -6010,23 +5932,6 @@ static void gen_dlmzb(DisasContext *ctx)
   cpu_gpr[rS(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], t0);
  }
  
-/* mbar replaces eieio on 440 */

-static void gen_mbar(DisasContext *ctx)
-{
-/* interpreted as no-op */
-}
-
-/* msync replaces sync on 440 */
-static void gen_msync_4xx(DisasContext *ctx)
-{
-

[PATCH 2/3] target/ppc: Move VMX integer logical instructions to decodetree.

2024-04-28 Thread Chinmay Rath

Moving the following instructions to decodetree specification:

v{and, andc, nand, or, orc, nor, xor, eqv}  : VX-form

The changes were verified by validating that the tcp ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/insn32.decode| 11 +++
 target/ppc/translate/vmx-impl.c.inc | 22 ++
 target/ppc/translate/vmx-ops.c.inc  | 15 ---
 3 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 27655f0d9e..e00bc05381 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -574,6 +574,17 @@ VCMPNEZW000100 . . . . 011111   @VC
 VCMPSQ  000100 ... -- . . 0010101   @VX_bf
 VCMPUQ  000100 ... -- . . 0010001   @VX_bf
 
+## Vector Integer Logical Instructions
+
+VAND000100 . . . 1000100@VX
+VANDC   000100 . . . 10001000100@VX
+VNAND   000100 . . . 1011100@VX
+VOR 000100 . . . 1001100@VX
+VORC000100 . . . 10101000100@VX
+VNOR000100 . . . 1010100@VX
+VXOR000100 . . . 10011000100@VX
+VEQV000100 . . . 1101100@VX
+
 ## Vector Integer Average Instructions
 
 VAVGSB  000100 . . . 1010010@VX
diff --git a/target/ppc/translate/vmx-impl.c.inc 
b/target/ppc/translate/vmx-impl.c.inc
index 4d5e743cfe..cefe04127c 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -205,16 +205,6 @@ static void glue(gen_, name)(DisasContext *ctx)
 \
16, 16); \
 }
 
-/* Logical operations */
-GEN_VXFORM_V(vand, MO_64, tcg_gen_gvec_and, 2, 16);
-GEN_VXFORM_V(vandc, MO_64, tcg_gen_gvec_andc, 2, 17);
-GEN_VXFORM_V(vor, MO_64, tcg_gen_gvec_or, 2, 18);
-GEN_VXFORM_V(vxor, MO_64, tcg_gen_gvec_xor, 2, 19);
-GEN_VXFORM_V(vnor, MO_64, tcg_gen_gvec_nor, 2, 20);
-GEN_VXFORM_V(veqv, MO_64, tcg_gen_gvec_eqv, 2, 26);
-GEN_VXFORM_V(vnand, MO_64, tcg_gen_gvec_nand, 2, 22);
-GEN_VXFORM_V(vorc, MO_64, tcg_gen_gvec_orc, 2, 21);
-
 #define GEN_VXFORM(name, opc2, opc3)\
 static void glue(gen_, name)(DisasContext *ctx) \
 {   \
@@ -727,6 +717,16 @@ TRANS_FLAGS(ALTIVEC, VRLH, do_vector_gvec3_VX, MO_16, 
tcg_gen_gvec_rotlv)
 TRANS_FLAGS(ALTIVEC, VRLW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_rotlv)
 TRANS_FLAGS2(ALTIVEC_207, VRLD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_rotlv)
 
+/* Logical operations */
+TRANS_FLAGS(ALTIVEC, VAND, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_and);
+TRANS_FLAGS(ALTIVEC, VANDC, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_andc);
+TRANS_FLAGS(ALTIVEC, VOR, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_or);
+TRANS_FLAGS(ALTIVEC, VXOR, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_xor);
+TRANS_FLAGS(ALTIVEC, VNOR, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_nor);
+TRANS_FLAGS2(ALTIVEC_207, VEQV, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_eqv);
+TRANS_FLAGS2(ALTIVEC_207, VNAND, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_nand);
+TRANS_FLAGS2(ALTIVEC_207, VORC, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_orc);
+
 static TCGv_vec do_vrl_mask_vec(unsigned vece, TCGv_vec vrb)
 {
 TCGv_vec t0 = tcg_temp_new_vec_matching(vrb),
@@ -3331,8 +3331,6 @@ TRANS_FLAGS2(ISA310, VMODUQ, do_vx_helper, 
gen_helper_VMODUQ)
 #undef DIVS64
 #undef DIVU64
 
-#undef GEN_VX_LOGICAL
-#undef GEN_VX_LOGICAL_207
 #undef GEN_VXFORM
 #undef GEN_VXFORM_207
 #undef GEN_VXFORM_DUAL
diff --git a/target/ppc/translate/vmx-ops.c.inc 
b/target/ppc/translate/vmx-ops.c.inc
index 672fba3796..80c5217749 100644
--- a/target/ppc/translate/vmx-ops.c.inc
+++ b/target/ppc/translate/vmx-ops.c.inc
@@ -1,18 +1,3 @@
-#define GEN_VX_LOGICAL(name, tcg_op, opc2, opc3)\
-GEN_HANDLER(name, 0x04, opc2, opc3, 0x, PPC_ALTIVEC)
-
-#define GEN_VX_LOGICAL_207(name, tcg_op, opc2, opc3) \
-GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x, PPC_NONE, PPC2_ALTIVEC_207)
-
-GEN_VX_LOGICAL(vand, tcg_gen_and_i64, 2, 16),
-GEN_VX_LOGICAL(vandc, tcg_gen_andc_i64, 2, 17),
-GEN_VX_LOGICAL(vor, tcg_gen_or_i64, 2, 18),
-GEN_VX_LOGICAL(vxor, tcg_gen_xor_i64, 2, 19),
-GEN_VX_LOGICAL(vnor, tcg_gen_nor_i64, 2, 20),
-GEN_VX_LOGICAL_207(veqv, tcg_gen_eqv_i64, 2, 26),
-GEN_VX_LOGICAL_207(vnand, tcg_gen_nand_i64, 2, 22),
-GEN_VX_LOGICAL_207(vorc, tcg_gen_orc_i64, 2, 21),
-
 #define GEN_VXFORM(name, opc2, opc3)\
 GEN_HANDLER(name, 0x04, opc2, opc3, 0x, PPC_ALTIVEC)
 
-- 
2.39.3

[PATCH 1/3] target/ppc: Move VMX storage access instructions to decodetree

2024-04-28 Thread Chinmay Rath

Moving the following instructions to decodetree specification :

{l,st}ve{b,h,w}x,
{l,st}v{x,xl},
lvs{l,r}: X-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured using the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/helper.h |  12 +-
 target/ppc/insn32.decode|  17 +++
 target/ppc/mem_helper.c |  12 +-
 target/ppc/translate.c  |   2 -
 target/ppc/translate/vmx-impl.c.inc | 221 
 target/ppc/translate/vmx-ops.c.inc  |  19 ---
 6 files changed, 120 insertions(+), 163 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 86f97ee1e7..f397ef459a 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -267,12 +267,12 @@ DEF_HELPER_5(VMSUMSHS, void, env, avr, avr, avr, avr)
 DEF_HELPER_FLAGS_5(VMLADDUHM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
 DEF_HELPER_FLAGS_2(mtvscr, TCG_CALL_NO_RWG, void, env, i32)
 DEF_HELPER_FLAGS_1(mfvscr, TCG_CALL_NO_RWG, i32, env)
-DEF_HELPER_3(lvebx, void, env, avr, tl)
-DEF_HELPER_3(lvehx, void, env, avr, tl)
-DEF_HELPER_3(lvewx, void, env, avr, tl)
-DEF_HELPER_3(stvebx, void, env, avr, tl)
-DEF_HELPER_3(stvehx, void, env, avr, tl)
-DEF_HELPER_3(stvewx, void, env, avr, tl)
+DEF_HELPER_3(LVEBX, void, env, avr, tl)
+DEF_HELPER_3(LVEHX, void, env, avr, tl)
+DEF_HELPER_3(LVEWX, void, env, avr, tl)
+DEF_HELPER_3(STVEBX, void, env, avr, tl)
+DEF_HELPER_3(STVEHX, void, env, avr, tl)
+DEF_HELPER_3(STVEWX, void, env, avr, tl)
 #if defined(TARGET_PPC64)
 DEF_HELPER_4(lxvl, void, env, tl, vsr, tl)
 DEF_HELPER_4(lxvll, void, env, tl, vsr, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index eada59f59f..27655f0d9e 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -526,6 +526,23 @@ DSCRIQ  11 . . .. 001100010 .   
@Z22_tap_sh_rc
 
 VPMSUMD 000100 . . . 10011001000@VX
 
+## Vector Load/Store Instructions
+
+LVEBX   01 . . . 000111 -   @X
+LVEHX   01 . . . 100111 -   @X
+LVEWX   01 . . . 0001000111 -   @X
+LVX 01 . . . 0001100111 -   @X
+LVXL01 . . . 0101100111 -   @X
+
+STVEBX  01 . . . 001111 -   @X
+STVEHX  01 . . . 0010100111 -   @X
+STVEWX  01 . . . 0011000111 -   @X
+STVX01 . . . 0011100111 -   @X
+STVXL   01 . . . 000111 -   @X
+
+LVSL01 . . . 000110 -   @X
+LVSR01 . . . 100110 -   @X
+
 ## Vector Integer Instructions
 
 VCMPEQUB000100 . . . . 000110   @VC
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index ea7e8443a8..f88155ad45 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -404,9 +404,9 @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong 
addr, uint32_t reg,
 }   \
 }
 #define I(x) (x)
-LVE(lvebx, cpu_ldub_data_ra, I, u8)
-LVE(lvehx, cpu_lduw_data_ra, bswap16, u16)
-LVE(lvewx, cpu_ldl_data_ra, bswap32, u32)
+LVE(LVEBX, cpu_ldub_data_ra, I, u8)
+LVE(LVEHX, cpu_lduw_data_ra, bswap16, u16)
+LVE(LVEWX, cpu_ldl_data_ra, bswap32, u32)
 #undef I
 #undef LVE
 
@@ -432,9 +432,9 @@ LVE(lvewx, cpu_ldl_data_ra, bswap32, u32)
 }   \
 }
 #define I(x) (x)
-STVE(stvebx, cpu_stb_data_ra, I, u8)
-STVE(stvehx, cpu_stw_data_ra, bswap16, u16)
-STVE(stvewx, cpu_stl_data_ra, bswap32, u32)
+STVE(STVEBX, cpu_stb_data_ra, I, u8)
+STVE(STVEHX, cpu_stw_data_ra, bswap16, u16)
+STVE(STVEWX, cpu_stl_data_ra, bswap32, u32)
 #undef I
 #undef LVE
 
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 93ffec787c..cde3b88b98 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6640,8 +6640,6 @@ GEN_HANDLER2_E(icbt_440, "icbt", 0x1F, 0x16, 0x00, 
0x03E1,
PPC_BOOKE, PPC2_BOOKE206),
 GEN_HANDLER2(icbt_440, "icbt", 0x1F, 0x06, 0x08, 0x03E1,
  PPC_440_SPEC),
-GEN_HANDLER(lvsl, 0x1f, 0x06, 0x00, 0x0001, PPC_ALTIVEC),
-GEN_HANDLER(lvsr, 0x1f, 0x06, 0x01, 0x0001, PPC_ALTIVEC),
 GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC),
 GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff, PPC_ALTIVEC),
 #if defined(TARGET_PPC64)
diff --git a/target/ppc/translate/vmx-impl.c.inc 
b/target/ppc/translate/vmx-impl.c.inc
index b56e615c24..4d5e743cfe 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -14,125 +14,88 @@ static inline TCGv_ptr gen_avr_ptr(int reg)
 return r;
 }
 
-#define GEN_VR_

[PATCH 0/3] target/ppc: Moving VMX insns to decodetree

2024-04-28 Thread Chinmay Rath

Moving VMX instructions of the following types to decodetree
specification : storage access, integer logical & integer max/min.

Chinmay Rath (3):
  target/ppc: Move VMX storage access instructions to decodetree
  target/ppc: Move VMX integer logical instructions to decodetree
  target/ppc: Move VMX integer max/min instructions to decodetree.

 target/ppc/helper.h |  12 +-
 target/ppc/insn32.decode|  50 +
 target/ppc/mem_helper.c |  12 +-
 target/ppc/translate.c  |   2 -
 target/ppc/translate/vmx-impl.c.inc | 280 
 target/ppc/translate/vmx-ops.c.inc  |  50 -
 6 files changed, 184 insertions(+), 222 deletions(-)

-- 
2.39.3

[PATCH 3/3] target/ppc: Move VMX integer max/min instructions to decodetree.

2024-04-28 Thread Chinmay Rath

Moving the following instructions to decodetree specification :

v{max, min}{u, s}{b, h, w, d}   : VX-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/insn32.decode| 22 +
 target/ppc/translate/vmx-impl.c.inc | 37 -
 target/ppc/translate/vmx-ops.c.inc  | 16 -
 3 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index e00bc05381..847a2f4356 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -717,6 +717,28 @@ VEXTSD2Q000100 . 11011 . 1100010
@VX_tb
 VNEGD   000100 . 00111 . 1100010@VX_tb
 VNEGW   000100 . 00110 . 1100010@VX_tb
 
+## Vector Integer Maximum/Minimum Instructions
+
+VMAXUB  000100 . . . 010@VX
+VMAXUH  000100 . . . 110@VX
+VMAXUW  000100 . . . 0001010@VX
+VMAXUD  000100 . . . 0001110@VX
+
+VMAXSB  000100 . . . 0010010@VX
+VMAXSH  000100 . . . 0010110@VX
+VMAXSW  000100 . . . 0011010@VX
+VMAXSD  000100 . . . 0011110@VX
+
+VMINUB  000100 . . . 0100010@VX
+VMINUH  000100 . . . 0100110@VX
+VMINUW  000100 . . . 0101010@VX
+VMINUD  000100 . . . 0101110@VX
+
+VMINSB  000100 . . . 0110010@VX
+VMINSH  000100 . . . 0110110@VX
+VMINSW  000100 . . . 0111010@VX
+VMINSD  000100 . . . 010@VX
+
 ## Vector Mask Manipulation Instructions
 
 MTVSRBM 000100 . 1 . 1100110@VX_tb
diff --git a/target/ppc/translate/vmx-impl.c.inc 
b/target/ppc/translate/vmx-impl.c.inc
index cefe04127c..8084af75cc 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -342,22 +342,6 @@ GEN_VXFORM_V(vsububm, MO_8, tcg_gen_gvec_sub, 0, 16);
 GEN_VXFORM_V(vsubuhm, MO_16, tcg_gen_gvec_sub, 0, 17);
 GEN_VXFORM_V(vsubuwm, MO_32, tcg_gen_gvec_sub, 0, 18);
 GEN_VXFORM_V(vsubudm, MO_64, tcg_gen_gvec_sub, 0, 19);
-GEN_VXFORM_V(vmaxub, MO_8, tcg_gen_gvec_umax, 1, 0);
-GEN_VXFORM_V(vmaxuh, MO_16, tcg_gen_gvec_umax, 1, 1);
-GEN_VXFORM_V(vmaxuw, MO_32, tcg_gen_gvec_umax, 1, 2);
-GEN_VXFORM_V(vmaxud, MO_64, tcg_gen_gvec_umax, 1, 3);
-GEN_VXFORM_V(vmaxsb, MO_8, tcg_gen_gvec_smax, 1, 4);
-GEN_VXFORM_V(vmaxsh, MO_16, tcg_gen_gvec_smax, 1, 5);
-GEN_VXFORM_V(vmaxsw, MO_32, tcg_gen_gvec_smax, 1, 6);
-GEN_VXFORM_V(vmaxsd, MO_64, tcg_gen_gvec_smax, 1, 7);
-GEN_VXFORM_V(vminub, MO_8, tcg_gen_gvec_umin, 1, 8);
-GEN_VXFORM_V(vminuh, MO_16, tcg_gen_gvec_umin, 1, 9);
-GEN_VXFORM_V(vminuw, MO_32, tcg_gen_gvec_umin, 1, 10);
-GEN_VXFORM_V(vminud, MO_64, tcg_gen_gvec_umin, 1, 11);
-GEN_VXFORM_V(vminsb, MO_8, tcg_gen_gvec_smin, 1, 12);
-GEN_VXFORM_V(vminsh, MO_16, tcg_gen_gvec_smin, 1, 13);
-GEN_VXFORM_V(vminsw, MO_32, tcg_gen_gvec_smin, 1, 14);
-GEN_VXFORM_V(vminsd, MO_64, tcg_gen_gvec_smin, 1, 15);
 GEN_VXFORM(vmrghb, 6, 0);
 GEN_VXFORM(vmrghh, 6, 1);
 GEN_VXFORM(vmrghw, 6, 2);
@@ -727,6 +711,27 @@ TRANS_FLAGS2(ALTIVEC_207, VEQV, do_vector_gvec3_VX, MO_64, 
tcg_gen_gvec_eqv);
 TRANS_FLAGS2(ALTIVEC_207, VNAND, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_nand);
 TRANS_FLAGS2(ALTIVEC_207, VORC, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_orc);
 
+/* Integer Max/Min operations */
+TRANS_FLAGS(ALTIVEC, VMAXUB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_umax);
+TRANS_FLAGS(ALTIVEC, VMAXUH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_umax);
+TRANS_FLAGS(ALTIVEC, VMAXUW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_umax);
+TRANS_FLAGS2(ALTIVEC_207, VMAXUD, do_vector_gvec3_VX, MO_64, 
tcg_gen_gvec_umax);
+
+TRANS_FLAGS(ALTIVEC, VMAXSB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_smax);
+TRANS_FLAGS(ALTIVEC, VMAXSH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_smax);
+TRANS_FLAGS(ALTIVEC, VMAXSW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_smax);
+TRANS_FLAGS2(ALTIVEC_207, VMAXSD, do_vector_gvec3_VX, MO_64, 
tcg_gen_gvec_smax);
+
+TRANS_FLAGS(ALTIVEC, VMINUB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_umin);
+TRANS_FLAGS(ALTIVEC, VMINUH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_umin);
+TRANS_FLAGS(ALTIVEC, VMINUW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_umin);
+TRANS_FLAGS2(ALTIVEC_207, VMINUD, do_vector_gvec3_VX, MO_64, 
tcg_gen_gvec_umin);
+
+TRANS_FLAGS(ALTIVEC, VMINSB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_smin);
+TRANS_FLAGS(ALTIVEC, VMINSH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_smin);
+TRANS_FLAGS(ALTIVEC, VMINSW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_smin);
+TRANS_FLAGS2(ALTIVEC_207, VMINSD

[PATCH v2 4/8] target/ppc: Move neg, darn, mod{sw, uw} to decodetree.

2024-04-23 Thread Chinmay Rath

Moving the below instructions to decodetree specification :

neg[o][.]   : XO-form
mod{sw, uw}, darn   : X-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
Reviewed-by: Richard Henderson 
---
 target/ppc/helper.h|  4 +-
 target/ppc/insn32.decode   |  8 
 target/ppc/int_helper.c|  4 +-
 target/ppc/translate.c | 56 --
 target/ppc/translate/fixedpoint-impl.c.inc | 44 +
 5 files changed, 56 insertions(+), 60 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 1fc8b7c5fd..09d0b0074b 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -71,8 +71,8 @@ DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl)
 DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(srad, tl, env, tl, tl)
-DEF_HELPER_FLAGS_0(darn32, TCG_CALL_NO_RWG, tl)
-DEF_HELPER_FLAGS_0(darn64, TCG_CALL_NO_RWG, tl)
+DEF_HELPER_FLAGS_0(DARN32, TCG_CALL_NO_RWG, tl)
+DEF_HELPER_FLAGS_0(DARN64, TCG_CALL_NO_RWG, tl)
 #endif
 
 DEF_HELPER_FLAGS_1(cntlsw32, TCG_CALL_NO_RWG_SE, i32, i32)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index bfccebd9a7..654f55471b 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -187,6 +187,9 @@
 _ara
 @X_a.. ra:3 .. . . .. . _a
 
+_tl   rt l
+@X_tl   .. rt:5 ... l:2 . .. .  _tl
+
  rt ra rb oe:bool rc:bool
 @XO .. rt:5 ra:5 rb:5 oe:1 . rc:1   
 
@@ -367,6 +370,11 @@ DIVWU   01 . . . . 111001011 .  @XO
 DIVWE   01 . . . . 110101011 .  @XO
 DIVWEU  01 . . . . 110001011 .  @XO
 
+MODSW   01 . . . 111011 -   @X
+MODUW   01 . . . 011011 -   @X
+DARN01 . --- .. - 100011 -  @X_tl
+NEG 01 . . - . 001101000 .  @XO_ta
+
 ## Fixed-Point Logical Instructions
 
 CFUGED  01 . . . 0011011100 -   @X
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index dc1f72ff38..bc25d5b062 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -171,7 +171,7 @@ uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
 /*
  * Return a random number.
  */
-uint64_t helper_darn32(void)
+uint64_t helper_DARN32(void)
 {
 Error *err = NULL;
 uint32_t ret;
@@ -186,7 +186,7 @@ uint64_t helper_darn32(void)
 return ret;
 }
 
-uint64_t helper_darn64(void)
+uint64_t helper_DARN64(void)
 {
 Error *err = NULL;
 uint64_t ret;
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 0a1d1d63b3..436fcfc645 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1878,17 +1878,6 @@ static inline void gen_op_arith_modw(DisasContext *ctx, 
TCGv ret, TCGv arg1,
 }
 }
 
-#define GEN_INT_ARITH_MODW(name, opc3, sign)\
-static void glue(gen_, name)(DisasContext *ctx) \
-{   \
-gen_op_arith_modw(ctx, cpu_gpr[rD(ctx->opcode)],\
-  cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],   \
-  sign);\
-}
-
-GEN_INT_ARITH_MODW(moduw, 0x08, 0);
-GEN_INT_ARITH_MODW(modsw, 0x18, 1);
-
 #if defined(TARGET_PPC64)
 static inline void gen_op_arith_modd(DisasContext *ctx, TCGv ret, TCGv arg1,
  TCGv arg2, int sign)
@@ -2055,27 +2044,6 @@ static inline void gen_op_arith_subf(DisasContext *ctx, 
TCGv ret, TCGv arg1,
 }
 }
 
-/* neg neg. nego nego. */
-static inline void gen_op_arith_neg(DisasContext *ctx, bool compute_ov)
-{
-TCGv zero = tcg_constant_tl(0);
-gen_op_arith_subf(ctx, cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-  zero, 0, 0, compute_ov, Rc(ctx->opcode));
-}
-
-static void gen_neg(DisasContext *ctx)
-{
-tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
-if (unlikely(Rc(ctx->opcode))) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-static void gen_nego(DisasContext *ctx)
-{
-gen_op_arith_neg(ctx, 1);
-}
-
 /***Integer logical***/
 #define GEN_LOGICAL2(name, tcg_op, opc, type) \
 static void glue(gen_, name)(DisasContext *ctx)   \
@@ -2401,24 +2369,6 @@ static void gen_cnttzd(DisasContext *ctx)
 gen_set_R

[PATCH v2 3/8] target/ppc: Move divw[u, e, eu] instructions to decodetree.

2024-04-23 Thread Chinmay Rath

Moving the following instructions to decodetree specification :
 divw[u, e, eu][o][.]   : XO-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
Reviewed-by: Richard Henderson 
---
 target/ppc/helper.h|  4 +--
 target/ppc/insn32.decode   |  5 
 target/ppc/int_helper.c|  4 +--
 target/ppc/translate.c | 31 --
 target/ppc/translate/fixedpoint-impl.c.inc | 24 +
 5 files changed, 33 insertions(+), 35 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 86f97ee1e7..1fc8b7c5fd 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -55,8 +55,8 @@ DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
 DEF_HELPER_4(divdeu, i64, env, i64, i64, i32)
 DEF_HELPER_4(divde, i64, env, i64, i64, i32)
 #endif
-DEF_HELPER_4(divweu, tl, env, tl, tl, i32)
-DEF_HELPER_4(divwe, tl, env, tl, tl, i32)
+DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32)
+DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32)
 
 DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 0184680db8..bfccebd9a7 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -362,6 +362,11 @@ MULLWO  01 . . . 1 011101011 .  
@XO_tab_rc
 MULHW   01 . . . - 001001011 .  @XO_tab_rc
 MULHWU  01 . . . - 01011 .  @XO_tab_rc
 
+DIVW01 . . . . 01011 .  @XO
+DIVWU   01 . . . . 111001011 .  @XO
+DIVWE   01 . . . . 110101011 .  @XO
+DIVWEU  01 . . . . 110001011 .  @XO
+
 ## Fixed-Point Logical Instructions
 
 CFUGED  01 . . . 0011011100 -   @X
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 0a5c3e78a4..dc1f72ff38 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -44,7 +44,7 @@ static inline void helper_update_ov_legacy(CPUPPCState *env, 
int ov)
 }
 }
 
-target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
+target_ulong helper_DIVWEU(CPUPPCState *env, target_ulong ra, target_ulong rb,
uint32_t oe)
 {
 uint64_t rt = 0;
@@ -71,7 +71,7 @@ target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, 
target_ulong rb,
 return (target_ulong)rt;
 }
 
-target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
+target_ulong helper_DIVWE(CPUPPCState *env, target_ulong ra, target_ulong rb,
   uint32_t oe)
 {
 int64_t rt = 0;
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index be7d807e3c..0a1d1d63b3 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1778,21 +1778,6 @@ static inline void gen_op_arith_divw(DisasContext *ctx, 
TCGv ret,
 gen_set_Rc0(ctx, ret);
 }
 }
-/* Div functions */
-#define GEN_INT_ARITH_DIVW(name, opc3, sign, compute_ov)  \
-static void glue(gen_, name)(DisasContext *ctx)   \
-{ \
-gen_op_arith_divw(ctx, cpu_gpr[rD(ctx->opcode)],  \
- cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],  \
- sign, compute_ov, Rc(ctx->opcode));  \
-}
-/* divwu  divwu.  divwuo  divwuo.   */
-GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0);
-GEN_INT_ARITH_DIVW(divwuo, 0x1E, 0, 1);
-/* divw  divw.  divwo  divwo.   */
-GEN_INT_ARITH_DIVW(divw, 0x0F, 1, 0);
-GEN_INT_ARITH_DIVW(divwo, 0x1F, 1, 1);
-
 /* div[wd]eu[o][.] */
 #define GEN_DIVE(name, hlpr, compute_ov)  \
 static void gen_##name(DisasContext *ctx) \
@@ -1805,11 +1790,6 @@ static void gen_##name(DisasContext *ctx)
 \
 } \
 }
 
-GEN_DIVE(divweu, divweu, 0);
-GEN_DIVE(divweuo, divweu, 1);
-GEN_DIVE(divwe, divwe, 0);
-GEN_DIVE(divweo, divwe, 1);
-
 #if defined(TARGET_PPC64)
 static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
  TCGv arg2, int sign, int compute_ov)
@@ -6562,17 +6542,6 @@ GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 
0x, PPC_NONE,
 GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x, PPC_NONE, PPC2_ISA300),
 #endif
 
-#undef GEN_INT_ARITH_DIVW
-#define GEN_INT_ARITH_DIVW(name, opc3, sign, compute_ov)  \
-GEN_HANDLER(name, 0x1F, 0x0B, opc3, 0x, PPC_INTEGER)
-GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0),
-GEN_INT_ARITH_DIVW(divwuo

[PATCH v2 0/8] target/ppc: Move fixed-point insns to

2024-04-23 Thread Chinmay Rath

Moving all fixed-point instructions of the following type to decodetree
specification : arithmetic, compare, trap, select and logical.

Change log : 
v2 : Implemented code clean-ups as per comments by Richard in
 patches 1/8, 5/8 and 7/8 of v1.
v1 : 
https://lore.kernel.org/qemu-devel/20240416063927.99428-1-ra...@linux.ibm.com/ 

Chinmay Rath (8):
  target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions to decodetree.
  target/ppc: Make divw[u] handler method decodetree compatible.
  target/ppc: Move divw[u, e, eu] instructions to decodetree.
  target/ppc: Move neg, darn, mod{sw, uw} to decodetree.
  target/ppc: Move multiply fixed-point insns (64-bit operands) to
decodetree.
  target/ppc: Move div/mod fixed-point insns (64 bits operands) to
decodetree.
  target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to
decodetree.
  target/ppc: Move logical fixed-point instructions to decodetree.

 target/ppc/helper.h|  26 +-
 target/ppc/insn32.decode   |  93 +++
 target/ppc/excp_helper.c   |   4 +-
 target/ppc/int_helper.c|  24 +-
 target/ppc/translate.c | 841 +
 target/ppc/translate/fixedpoint-impl.c.inc | 631 
 6 files changed, 762 insertions(+), 857 deletions(-)

-- 
2.39.3

[PATCH v2 8/8] target/ppc: Move logical fixed-point instructions to decodetree.

2024-04-23 Thread Chinmay Rath

Moving the below instructions to decodetree specification :

andi[s]., {ori, xori}[s]: D-form

{and, andc, nand, or, orc, nor, xor, eqv}[.],
exts{b, h, w}[.],  cnt{l, t}z{w, d}[.],
popcnt{b, w, d},  prty{w, d}, cmp, bpermd   : X-form

With this patch, all the fixed-point logical instructions have been
moved to decodetree.
The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
Reviewed-by: Richard Henderson 
---
 target/ppc/helper.h|   8 +-
 target/ppc/insn32.decode   |  38 +++
 target/ppc/int_helper.c|  10 +-
 target/ppc/translate.c | 359 -
 target/ppc/translate/fixedpoint-impl.c.inc | 269 +++
 5 files changed, 316 insertions(+), 368 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 05f7ab5f6e..b53abd853a 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -58,8 +58,8 @@ DEF_HELPER_4(DIVDE, i64, env, i64, i64, i32)
 DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32)
 DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32)
 
-DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
-DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_1(POPCNTB, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_2(CMPB, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_3(sraw, tl, env, tl, tl)
 DEF_HELPER_FLAGS_2(CFUGED, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(PDEPD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
@@ -68,8 +68,8 @@ DEF_HELPER_FLAGS_1(CDTBCD, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_2(CMPEQB, TCG_CALL_NO_RWG_SE, i32, tl, tl)
-DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
-DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_1(POPCNTW, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_2(BPERMD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(srad, tl, env, tl, tl)
 DEF_HELPER_FLAGS_0(DARN32, TCG_CALL_NO_RWG, tl)
 DEF_HELPER_FLAGS_0(DARN64, TCG_CALL_NO_RWG, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 80a7bb1872..3175810190 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -29,6 +29,9 @@
   rt ra si:int64_t
 @D  .. rt:5 ra:5 si:s16 
 
+_ui   rt ra ui:uint64_t
+@D_ui   .. rt:5 ra:5 ui:16 _ui
+
 _bf   bf l:bool ra imm
 @D_bfs  .. bf:3 . l:1 ra:5 imm:s16  _bf
 @D_bfu  .. bf:3 . l:1 ra:5 imm:16   _bf
@@ -96,6 +99,9 @@
 _sa   rs ra
 @X_sa   .. rs:5 ra:5 . .. . _sa
 
+_sa_rcrs ra rc
+@X_sa_rc.. rs:5 ra:5 . .. rc:1  _sa_rc
+
 %x_frtp 22:4 !function=times_2
 %x_frap 17:4 !function=times_2
 %x_frbp 12:4 !function=times_2
@@ -410,6 +416,38 @@ MODUD   01 . . . 011001 -   @X
 
 ## Fixed-Point Logical Instructions
 
+ANDI_   011100 . .  @D_ui
+ANDIS_  011101 . .  @D_ui
+ORI 011000 . .  @D_ui
+ORIS011001 . .  @D_ui
+XORI011010 . .  @D_ui
+XORIS   011011 . .  @D_ui
+
+AND 01 . . . 011100 .   @X_rc
+ANDC01 . . . 00 .   @X_rc
+NAND01 . . . 0111011100 .   @X_rc
+OR  01 . . . 011000 .   @X_rc
+ORC 01 . . . 0110011100 .   @X_rc
+NOR 01 . . . 000100 .   @X_rc
+XOR 01 . . . 010000 .   @X_rc
+EQV 01 . . . 0100011100 .   @X_rc
+CMPB01 . . . 011100 .   @X_rc
+
+EXTSB   01 . . - 1110111010 .   @X_sa_rc
+EXTSH   01 . . - 1110011010 .   @X_sa_rc
+EXTSW   01 . . - 011010 .   @X_sa_rc
+CNTLZW  01 . . - 011010 .   @X_sa_rc
+CNTTZW  01 . . - 111010 .   @X_sa_rc
+CNTLZD  01 . . - 111010 .   @X_sa_rc
+CNTTZD  01 . . - 1000111010 .   @X_sa_rc
+POPCNTB 01 . . - 000010 .   @X_sa_rc
+
+POPCNTW 01 . . - 010010 -   @X_sa
+POPCNTD 01 . . - 011010 -   @X_sa
+PRTYW   01 . . - 0010011010 -   @X_sa
+PRTYD   01 . . - 0010111010 -   @X_sa
+
+BPERMD  01

[PATCH v2 6/8] target/ppc: Move div/mod fixed-point insns (64 bits operands) to decodetree.

2024-04-23 Thread Chinmay Rath

Moving the below instructions to decodetree specification :

divd[u, e, eu][o][.]: XO-form
mod{sd, ud} : X-form

With this patch, all the fixed-point arithmetic instructions have been
moved to decodetree.
The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured using the '-d in_asm,op' flag.
Also, remaned do_divwe method in fixedpoint-impl.c.inc to do_dive because it is
now used to divide doubleword operands as well, and not just words.

Signed-off-by: Chinmay Rath 
Reviewed-by: Richard Henderson 
---
 target/ppc/helper.h|  4 +-
 target/ppc/insn32.decode   |  8 +++
 target/ppc/int_helper.c|  4 +-
 target/ppc/translate.c | 65 ++
 target/ppc/translate/fixedpoint-impl.c.inc | 29 +-
 5 files changed, 42 insertions(+), 68 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 09d0b0074b..e862bdceaf 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -52,8 +52,8 @@ DEF_HELPER_FLAGS_2(icbiep, TCG_CALL_NO_WG, void, env, tl)
 DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
 
 #if defined(TARGET_PPC64)
-DEF_HELPER_4(divdeu, i64, env, i64, i64, i32)
-DEF_HELPER_4(divde, i64, env, i64, i64, i32)
+DEF_HELPER_4(DIVDEU, i64, env, i64, i64, i32)
+DEF_HELPER_4(DIVDE, i64, env, i64, i64, i32)
 #endif
 DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32)
 DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 61c59bbde0..509961023b 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -384,6 +384,14 @@ MADDLD  000100 . . . . 110011   @VA
 MADDHD  000100 . . . . 11   @VA
 MADDHDU 000100 . . . . 110001   @VA
 
+DIVD01 . . . . 01001 .  @XO
+DIVDU   01 . . . . 111001001 .  @XO
+DIVDE   01 . . . . 110101001 .  @XO
+DIVDEU  01 . . . . 110001001 .  @XO
+
+MODSD   01 . . . 111001 -   @X
+MODUD   01 . . . 011001 -   @X
+
 ## Fixed-Point Logical Instructions
 
 CFUGED  01 . . . 0011011100 -   @X
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index bc25d5b062..585c2b65d3 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -101,7 +101,7 @@ target_ulong helper_DIVWE(CPUPPCState *env, target_ulong 
ra, target_ulong rb,
 
 #if defined(TARGET_PPC64)
 
-uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
+uint64_t helper_DIVDEU(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
 {
 uint64_t rt = 0;
 int overflow = 0;
@@ -120,7 +120,7 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, 
uint64_t rb, uint32_t oe)
 return rt;
 }
 
-uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t 
oe)
+uint64_t helper_DIVDE(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t 
oe)
 {
 uint64_t rt = 0;
 int64_t ra = (int64_t)rau;
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 8fa125d0ae..8900da85e5 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1778,21 +1778,11 @@ static inline void gen_op_arith_divw(DisasContext *ctx, 
TCGv ret,
 gen_set_Rc0(ctx, ret);
 }
 }
-/* div[wd]eu[o][.] */
-#define GEN_DIVE(name, hlpr, compute_ov)  \
-static void gen_##name(DisasContext *ctx) \
-{ \
-TCGv_i32 t0 = tcg_constant_i32(compute_ov);   \
-gen_helper_##hlpr(cpu_gpr[rD(ctx->opcode)], tcg_env,  \
- cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], t0); \
-if (unlikely(Rc(ctx->opcode) != 0)) { \
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);   \
-} \
-}
 
 #if defined(TARGET_PPC64)
-static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
- TCGv arg2, int sign, int compute_ov)
+static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret,
+ TCGv arg1, TCGv arg2, bool sign,
+ bool compute_ov, bool compute_rc0)
 {
 TCGv_i64 t0 = tcg_temp_new_i64();
 TCGv_i64 t1 = tcg_temp_new_i64();
@@ -1824,29 +1814,10 @@ static inline void gen_op_arith_divd(DisasContext *ctx, 
TCGv ret, TCGv arg1,
 tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
 }
 
-if (unlikely(Rc(ctx->opcode) != 0)) {
+if (unlikely(compute_rc0)) {
 ge

[PATCH v2 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree.

2024-04-23 Thread Chinmay Rath

Moving the following instructions to decodetree specification :

cmp{rb, eqb}, t{w, d}   : X-form
t{w, d}i: D-form
isel: A-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured using the '-d in_asm,op' flag.
Also for CMPRB, following review comments :
Replaced repetition of arithmetic right shifting (tcg_gen_shri_i32) followed
by extraction of last 8 bits (tcg_gen_ext8u_i32) with extraction of the required
bits using offsets (tcg_gen_extract_i32).

Signed-off-by: Chinmay Rath 
Reviewed-by: Richard Henderson 
---
 target/ppc/helper.h|   6 +-
 target/ppc/insn32.decode   |  16 +++
 target/ppc/excp_helper.c   |   4 +-
 target/ppc/int_helper.c|   2 +-
 target/ppc/translate.c | 133 +
 target/ppc/translate/fixedpoint-impl.c.inc | 120 +++
 6 files changed, 145 insertions(+), 136 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index e862bdceaf..05f7ab5f6e 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -1,8 +1,8 @@
 DEF_HELPER_FLAGS_3(raise_exception_err, TCG_CALL_NO_WG, noreturn, env, i32, 
i32)
 DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32)
-DEF_HELPER_FLAGS_4(tw, TCG_CALL_NO_WG, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(TW, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #if defined(TARGET_PPC64)
-DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(TD, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #endif
 DEF_HELPER_4(HASHST, void, env, tl, tl, tl)
 DEF_HELPER_4(HASHCHK, void, env, tl, tl, tl)
@@ -67,7 +67,7 @@ DEF_HELPER_FLAGS_2(PEXTD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_1(CDTBCD, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl)
 #if defined(TARGET_PPC64)
-DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl)
+DEF_HELPER_FLAGS_2(CMPEQB, TCG_CALL_NO_RWG_SE, i32, tl, tl)
 DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(srad, tl, env, tl, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 509961023b..80a7bb1872 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -23,6 +23,9 @@
 _tb   frt frb rc:bool
 @A_tb   .. frt:5 . frb:5 . . rc:1   _tb
 
+_tab_bc   rt ra rb bc
+@A_tab_bc   .. rt:5 ra:5 rb:5 bc:5 . .  _tab_bc
+
   rt ra si:int64_t
 @D  .. rt:5 ra:5 si:s16 
 
@@ -331,6 +334,19 @@ CMP 01 ... - . . . 00 - 
@X_bfl
 CMPL01 ... - . . . 10 - @X_bfl
 CMPI001011 ... - . .    @D_bfs
 CMPLI   001010 ... - . .    @D_bfu
+CMPRB   01 ... - . . . 001100 - @X_bfl
+CMPEQB  01 ... -- . . 001110 -  @X_bf
+
+### Fixed-Point Trap Instructions
+
+TW  01 . . . 000100 -   @X
+TD  01 . . . 0001000100 -   @X
+TWI 11 . .  @D
+TDI 10 . .  @D
+
+### Fixed-Point Select Instruction
+
+ISEL01 . . . . 0 -  @A_tab_bc
 
 ### Fixed-Point Arithmetic Instructions
 
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 674c05a2ce..79dd9b82cf 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2750,7 +2750,7 @@ void helper_rfmci(CPUPPCState *env)
 }
 #endif /* !CONFIG_USER_ONLY */
 
-void helper_tw(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
+void helper_TW(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
uint32_t flags)
 {
 if (!likely(!(((int32_t)arg1 < (int32_t)arg2 && (flags & 0x10)) ||
@@ -2764,7 +2764,7 @@ void helper_tw(CPUPPCState *env, target_ulong arg1, 
target_ulong arg2,
 }
 
 #ifdef TARGET_PPC64
-void helper_td(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
+void helper_TD(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
uint32_t flags)
 {
 if (!likely(!(((int64_t)arg1 < (int64_t)arg2 && (flags & 0x10)) ||
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 585c2b65d3..d12dcc28e1 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -159,7 +159,7 @@ uint64_t helper_DIVDE(CPUPPCState *env, uint64_t rau, 
uint64_t rbu, uint32_t oe)
 /* When you XOR the pattern and there is a match, that byte will be zero */
 #define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
 
-uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
+uint32_t helper_CMPEQB(target_ulong ra, target_ulong

[PATCH v2 1/8] target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions to decodetree.

2024-04-23 Thread Chinmay Rath

Moving the following instructions to decodetree specification :
mulli   : D-form
mul{lw, lwo, hw, hwu}[.]: XO-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.
Also cleaned up code for mullw[o][.] as per review comments while
keeping the logic of the tcg ops generated semantically same.

Signed-off-by: Chinmay Rath 
Reviewed-by: Richard Henderson 
---
 target/ppc/insn32.decode   |  9 +++
 target/ppc/translate.c | 89 --
 target/ppc/translate/fixedpoint-impl.c.inc | 66 
 3 files changed, 75 insertions(+), 89 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index eada59f59f..0184680db8 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -193,6 +193,9 @@
 _ta  rt ra oe:bool rc:bool
 @XO_ta  .. rt:5 ra:5 . oe:1 . rc:1  _ta
 
+_tab_rc  rt ra rb rc:bool
+@XO_tab_rc  .. rt:5 ra:5 rb:5 . . rc:1  _tab_rc
+
 %xx_xt  0:1 21:5
 %xx_xb  1:1 11:5
 %xx_xa  2:1 16:5
@@ -353,6 +356,12 @@ SUBFE   01 . . . . 010001000 .  @XO
 SUBFME  01 . . - . 011101000 .  @XO_ta
 SUBFZE  01 . . - . 011001000 .  @XO_ta
 
+MULLI   000111 . .  @D
+MULLW   01 . . . 0 011101011 .  @XO_tab_rc
+MULLWO  01 . . . 1 011101011 .  @XO_tab_rc
+MULHW   01 . . . - 001001011 .  @XO_tab_rc
+MULHWU  01 . . . - 01011 .  @XO_tab_rc
+
 ## Fixed-Point Logical Instructions
 
 CFUGED  01 . . . 0011011100 -   @X
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 93ffec787c..c45547a770 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1948,90 +1948,6 @@ GEN_INT_ARITH_MODD(modud, 0x08, 0);
 GEN_INT_ARITH_MODD(modsd, 0x18, 1);
 #endif
 
-/* mulhw  mulhw. */
-static void gen_mulhw(DisasContext *ctx)
-{
-TCGv_i32 t0 = tcg_temp_new_i32();
-TCGv_i32 t1 = tcg_temp_new_i32();
-
-tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
-tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_muls2_i32(t0, t1, t0, t1);
-tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1);
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-/* mulhwu  mulhwu.  */
-static void gen_mulhwu(DisasContext *ctx)
-{
-TCGv_i32 t0 = tcg_temp_new_i32();
-TCGv_i32 t1 = tcg_temp_new_i32();
-
-tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
-tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_mulu2_i32(t0, t1, t0, t1);
-tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1);
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-/* mullw  mullw. */
-static void gen_mullw(DisasContext *ctx)
-{
-#if defined(TARGET_PPC64)
-TCGv_i64 t0, t1;
-t0 = tcg_temp_new_i64();
-t1 = tcg_temp_new_i64();
-tcg_gen_ext32s_tl(t0, cpu_gpr[rA(ctx->opcode)]);
-tcg_gen_ext32s_tl(t1, cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_mul_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
-#else
-tcg_gen_mul_i32(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-cpu_gpr[rB(ctx->opcode)]);
-#endif
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-/* mullwo  mullwo. */
-static void gen_mullwo(DisasContext *ctx)
-{
-TCGv_i32 t0 = tcg_temp_new_i32();
-TCGv_i32 t1 = tcg_temp_new_i32();
-
-tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
-tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_muls2_i32(t0, t1, t0, t1);
-#if defined(TARGET_PPC64)
-tcg_gen_concat_i32_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
-#else
-tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], t0);
-#endif
-
-tcg_gen_sari_i32(t0, t0, 31);
-tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
-tcg_gen_extu_i32_tl(cpu_ov, t0);
-if (is_isa300(ctx)) {
-tcg_gen_mov_tl(cpu_ov32, cpu_ov);
-}
-tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
-
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-/* mulli */
-static void gen_mulli(DisasContext *ctx)
-{
-tcg_gen_muli_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-SIMM(ctx->opcode));
-}
-
 #if defined(TARGET_PPC64)
 /* mulhd  mulhd. */
 static void gen_mulhd(DisasContext *ctx)
@@ -6430,11 +6346,6 @@ GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x0060, 
PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x0001, PPC_NONE, PPC2_I

[PATCH v2 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) to decodetree.

2024-04-23 Thread Chinmay Rath

Moving the following instructions to decodetree :

mul{ld, ldo, hd, hdu}[.]: XO-form
madd{hd, hdu, ld}   : VA-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op'
flag.

Signed-off-by: Chinmay Rath 
Reviewed-by: Richard Henderson 
---
 target/ppc/insn32.decode   |   9 ++
 target/ppc/translate.c | 101 -
 target/ppc/translate/fixedpoint-impl.c.inc |  85 +
 3 files changed, 94 insertions(+), 101 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 654f55471b..61c59bbde0 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -375,6 +375,15 @@ MODUW   01 . . . 011011 -   @X
 DARN01 . --- .. - 100011 -  @X_tl
 NEG 01 . . - . 001101000 .  @XO_ta
 
+MULLD   01 . . . 0 011101001 .  @XO_tab_rc
+MULLDO  01 . . . 1 011101001 .  @XO_tab_rc
+MULHD   01 . . . - 001001001 .  @XO_tab_rc
+MULHDU  01 . . . - 01001 .  @XO_tab_rc
+
+MADDLD  000100 . . . . 110011   @VA
+MADDHD  000100 . . . . 11   @VA
+MADDHDU 000100 . . . . 110001   @VA
+
 ## Fixed-Point Logical Instructions
 
 CFUGED  01 . . . 0011011100 -   @X
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 436fcfc645..8fa125d0ae 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1918,62 +1918,6 @@ GEN_INT_ARITH_MODD(modud, 0x08, 0);
 GEN_INT_ARITH_MODD(modsd, 0x18, 1);
 #endif
 
-#if defined(TARGET_PPC64)
-/* mulhd  mulhd. */
-static void gen_mulhd(DisasContext *ctx)
-{
-TCGv lo = tcg_temp_new();
-tcg_gen_muls2_tl(lo, cpu_gpr[rD(ctx->opcode)],
- cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-/* mulhdu  mulhdu. */
-static void gen_mulhdu(DisasContext *ctx)
-{
-TCGv lo = tcg_temp_new();
-tcg_gen_mulu2_tl(lo, cpu_gpr[rD(ctx->opcode)],
- cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-/* mulld  mulld. */
-static void gen_mulld(DisasContext *ctx)
-{
-tcg_gen_mul_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-   cpu_gpr[rB(ctx->opcode)]);
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-/* mulldo  mulldo. */
-static void gen_mulldo(DisasContext *ctx)
-{
-TCGv_i64 t0 = tcg_temp_new_i64();
-TCGv_i64 t1 = tcg_temp_new_i64();
-
-tcg_gen_muls2_i64(t0, t1, cpu_gpr[rA(ctx->opcode)],
-  cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_mov_i64(cpu_gpr[rD(ctx->opcode)], t0);
-
-tcg_gen_sari_i64(t0, t0, 63);
-tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1);
-if (is_isa300(ctx)) {
-tcg_gen_mov_tl(cpu_ov32, cpu_ov);
-}
-tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
-
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-#endif
-
 /* Common subf function */
 static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
  TCGv arg2, bool add_ca, bool compute_ca,
@@ -5884,36 +5828,6 @@ static void gen_icbt_440(DisasContext *ctx)
  */
 }
 
-#if defined(TARGET_PPC64)
-static void gen_maddld(DisasContext *ctx)
-{
-TCGv_i64 t1 = tcg_temp_new_i64();
-
-tcg_gen_mul_i64(t1, cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_add_i64(cpu_gpr[rD(ctx->opcode)], t1, cpu_gpr[rC(ctx->opcode)]);
-}
-
-/* maddhd maddhdu */
-static void gen_maddhd_maddhdu(DisasContext *ctx)
-{
-TCGv_i64 lo = tcg_temp_new_i64();
-TCGv_i64 hi = tcg_temp_new_i64();
-TCGv_i64 t1 = tcg_temp_new_i64();
-
-if (Rc(ctx->opcode)) {
-tcg_gen_mulu2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)],
-  cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_movi_i64(t1, 0);
-} else {
-tcg_gen_muls2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)],
-  cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_sari_i64(t1, cpu_gpr[rC(ctx->opcode)], 63);
-}
-tcg_gen_add2_i64(t1, cpu_gpr[rD(ctx->opcode)], lo, hi,
- cpu_gpr[rC(ctx->opcode)], t1);
-}
-#endif /* defined(TARGET_PPC64) */
-
 static void gen_tbegin(DisasContext *ctx)
 {
 if (unlikely(!ctx->tm_enabled)) {
@@ -6277,9 +6191,6 @@ GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x0060, 
PPC_NONE, PPC2_ISA300),
 GEN_HAN

[PATCH v2 2/8] target/ppc: Make divw[u] handler method decodetree compatible.

2024-04-23 Thread Chinmay Rath

The handler methods for divw[u] instructions internally use Rc(ctx->opcode),
for extraction of Rc field of instructions, which poses a problem if we move
the above said instructions to decodetree, as the ctx->opcode field is not
popluated in decodetree. Hence, making it decodetree compatible, so that the
mentioned insns can be safely move to decodetree specs.

Signed-off-by: Chinmay Rath 
Reviewed-by: Richard Henderson 
---
 target/ppc/translate.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index c45547a770..be7d807e3c 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1738,8 +1738,9 @@ static inline void gen_op_arith_add(DisasContext *ctx, 
TCGv ret, TCGv arg1,
 }
 }
 
-static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
- TCGv arg2, int sign, int compute_ov)
+static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret,
+ TCGv arg1, TCGv arg2, bool sign,
+ bool compute_ov, bool compute_rc0)
 {
 TCGv_i32 t0 = tcg_temp_new_i32();
 TCGv_i32 t1 = tcg_temp_new_i32();
@@ -1773,7 +1774,7 @@ static inline void gen_op_arith_divw(DisasContext *ctx, 
TCGv ret, TCGv arg1,
 tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
 }
 
-if (unlikely(Rc(ctx->opcode) != 0)) {
+if (unlikely(compute_rc0)) {
 gen_set_Rc0(ctx, ret);
 }
 }
@@ -1783,7 +1784,7 @@ static void glue(gen_, name)(DisasContext *ctx)   
\
 { \
 gen_op_arith_divw(ctx, cpu_gpr[rD(ctx->opcode)],  \
  cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],  \
- sign, compute_ov);   \
+ sign, compute_ov, Rc(ctx->opcode));  \
 }
 /* divwu  divwu.  divwuo  divwuo.   */
 GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0);
-- 
2.39.3

Re: [PATCH 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) to decodetree.

2024-04-22 Thread Chinmay Rath


Hi Richard,

On 4/20/24 21:21, Richard Henderson wrote:

On 4/19/24 02:25, Chinmay Rath wrote:

Hi Richard,

On 4/17/24 00:06, Richard Henderson wrote:

On 4/15/24 23:39, Chinmay Rath wrote:

+static bool trans_MADDHDU(DisasContext *ctx, arg_MADDHDU *a)

...

+    tcg_gen_movi_i64(t1, 0);


Drop the movi.

+    tcg_gen_add2_i64(t1, cpu_gpr[a->vrt], lo, hi, cpu_gpr[a->rc], 
t1);


Use tcg_constant_i64(0).

Looks like tcg_gen_add2_i64 internally modifies the passed arguments, 
hence constant is not expected.
However, I tried using tcg_constant_i64(0) as suggested but this 
leads to an assert failure :
qemu-system-ppc64: ../tcg/tcg.c:5071: tcg_reg_alloc_op: Assertion 
`!temp_readonly(ts)' failed.


You misunderstood my suggestion.

  TCGv_i64 t1 = tcg_temp_new_i64();
  tcg_gen_add2_i64(t1, cpu_gpr[vrt], lo, hi, cpu_gpr[a->rc], 
tcg_constantant_i64(0));



Thank you for the clarification. Will add this to v2.
Regards,
Chinmay


r~

Re: [PATCH 6/8] target/ppc: Move div/mod fixed-point insns (64 bits operands) to decodetree.

2024-04-19 Thread Chinmay Rath





On 4/17/24 00:08, Richard Henderson wrote:

On 4/15/24 23:39, Chinmay Rath wrote:

Moving the below instructions to decodetree specification :

divd[u, e, eu][o][.]    : XO-form
mod{sd, ud}    : X-form

With this patch, all the fixed-point arithmetic instructions have been
moved to decodetree.
The changes were verified by validating that the tcg ops generated by 
those
instructions remain the same, which were captured using the '-d 
in_asm,op' flag.
Also, remaned do_divwe method in fixedpoint-impl.c.inc to do_dive 
because it is

now used to divide doubleword operands as well, and not just words.

Signed-off-by: Chinmay Rath
---
  target/ppc/helper.h    |  4 +-
  target/ppc/insn32.decode   |  8 +++
  target/ppc/int_helper.c    |  4 +-
  target/ppc/translate.c | 65 ++
  target/ppc/translate/fixedpoint-impl.c.inc | 29 +-
  5 files changed, 42 insertions(+), 68 deletions(-)


Reviewed-by: Richard Henderson 


Thanks,
Chinmay

r~

Re: [PATCH 8/8] target/ppc: Move logical fixed-point instructions to decodetree.

2024-04-19 Thread Chinmay Rath





On 4/17/24 01:05, Richard Henderson wrote:

On 4/15/24 23:39, Chinmay Rath wrote:

Moving the below instructions to decodetree specification :

andi[s]., {ori, xori}[s]    : D-form

{and, andc, nand, or, orc, nor, xor, eqv}[.],
exts{b, h, w}[.],  cnt{l, t}z{w, d}[.],
popcnt{b, w, d},  prty{w, d}, cmp, bpermd    : X-form

With this patch, all the fixed-point logical instructions have been
moved to decodetree.
The changes were verified by validating that the tcg ops generated by 
those
instructions remain the same, which were captured with the '-d 
in_asm,op' flag.


Signed-off-by: Chinmay Rath
---
  target/ppc/helper.h    |   8 +-
  target/ppc/insn32.decode   |  38 +++
  target/ppc/int_helper.c    |  10 +-
  target/ppc/translate.c | 359 -
  target/ppc/translate/fixedpoint-impl.c.inc | 269 +++
  5 files changed, 316 insertions(+), 368 deletions(-)


Reviewed-by: Richard Henderson 

Thanks,
Chinmay


r~

Re: [PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree.

2024-04-19 Thread Chinmay Rath


Hi Richard,

On 4/17/24 00:50, Richard Henderson wrote:

On 4/15/24 23:39, Chinmay Rath wrote:

Moving the following instructions to decodetree specification :

cmp{rb, eqb}, t{w, d}    : X-form
t{w, d}i    : D-form
isel    : A-form

The changes were verified by validating that the tcg ops generated by 
those
instructions remain the same, which were captured using the '-d 
in_asm,op' flag.


Signed-off-by: Chinmay Rath 


A faithful reorg of the existing code, so,
Reviewed-by: Richard Henderson 


Thank you.

Notes for improvement:


+static bool trans_CMPRB(DisasContext *ctx, arg_CMPRB *a)
+{
+    TCGv_i32 src1 = tcg_temp_new_i32();
+    TCGv_i32 src2 = tcg_temp_new_i32();
+    TCGv_i32 src2lo = tcg_temp_new_i32();
+    TCGv_i32 src2hi = tcg_temp_new_i32();
+    TCGv_i32 crf = cpu_crf[a->bf];
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    tcg_gen_trunc_tl_i32(src1, cpu_gpr[a->ra]);
+    tcg_gen_trunc_tl_i32(src2, cpu_gpr[a->rb]);
+
+    tcg_gen_andi_i32(src1, src1, 0xFF);
+    tcg_gen_ext8u_i32(src2lo, src2);
+    tcg_gen_shri_i32(src2, src2, 8);
+    tcg_gen_ext8u_i32(src2hi, src2);


tcg_gen_extract_i32(src2hi, src2, 8, 8);


+
+    tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
+    tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
+    tcg_gen_and_i32(crf, src2lo, src2hi);
+
+    if (a->l) {
+    tcg_gen_shri_i32(src2, src2, 8);
+    tcg_gen_ext8u_i32(src2lo, src2);


tcg_gen_extract_i32(src2lo, src2, 16, 8);


+    tcg_gen_shri_i32(src2, src2, 8);
+    tcg_gen_ext8u_i32(src2hi, src2);


tcg_gen_extract_i32(src2hi, src2, 24, 8);


Will update the above in v2.

Will implement the below improvements for trap insns as a separate patch 
later.



+/*
+ * Fixed-Point Trap Instructions
+ */
+
+static bool trans_TW(DisasContext *ctx, arg_TW *a)
+{
+    TCGv_i32 t0;
+
+    if (check_unconditional_trap(ctx, a->rt)) {
+    return true;
+    }
+    t0 = tcg_constant_i32(a->rt);
+    gen_helper_TW(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0);
+    return true;
+}
+
+static bool trans_TWI(DisasContext *ctx, arg_TWI *a)
+{
+    TCGv t0;
+    TCGv_i32 t1;
+
+    if (check_unconditional_trap(ctx, a->rt)) {
+    return true;
+    }
+    t0 = tcg_constant_tl(a->si);
+    t1 = tcg_constant_i32(a->rt);
+    gen_helper_TW(tcg_env, cpu_gpr[a->ra], t0, t1);
+    return true;
+}
+
+static bool trans_TD(DisasContext *ctx, arg_TD *a)
+{
+    TCGv_i32 t0;
+
+    REQUIRE_64BIT(ctx);
+    if (check_unconditional_trap(ctx, a->rt)) {
+    return true;
+    }
+    t0 = tcg_constant_i32(a->rt);
+    gen_helper_TD(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0);
+    return true;
+}
+
+static bool trans_TDI(DisasContext *ctx, arg_TDI *a)
+{
+    TCGv t0;
+    TCGv_i32 t1;
+
+    REQUIRE_64BIT(ctx);
+    if (check_unconditional_trap(ctx, a->rt)) {
+    return true;
+    }
+    t0 = tcg_constant_tl(a->si);
+    t1 = tcg_constant_i32(a->rt);
+    gen_helper_TD(tcg_env, cpu_gpr[a->ra], t0, t1);
+    return true;
+}


See target/sparc/translate.c, delay_exception, for a method of 
implementing compare-and-trap inline with no inline branch penalty.


static void do_conditional_trap(DisasContext *ctx, unsigned to, TCGv 
a, TCGv b)

{
    static const TCGCond ucond[8] = {
    TCG_COND_NEVER, TCG_COND_GTU, TCG_COND_LTU, TCG_COND_NE,
    TCG_COND_EQ,    TCG_COND_GEU, TCG_COND_LEU, TCG_COND_ALWAYS,
    };
    static const TCGCond scond[8] = {
    TCG_COND_NEVER, TCG_COND_EQ,  TCG_COND_GT,  TCG_COND_GE,
    TCG_COND_LT,    TCG_COND_LE,  TCG_COND_NE, TCG_COND_ALWAYS,
    };

    TCGCond uc = ucond[to & 7];
    TCGCond sc = scond[to >> 2];

    /* There is overlap with EQ; we may not need both comparisons. */
    if (!(to & 0x18)) {
    sc = TCG_COND_NEVER;
    } else if (!(to & 0x03)) {
    uc = TCG_COND_NEVER;
    }

    if (uc == TCG_COND_ALWAYS || sc == TCG_COND_ALWAYS) {
    unconditional trap;
    return true;
    }
    if (uc == TCG_COND_NEVER && sc == TCG_COND_NEVER) {
    return true;
    }

    e = delay_exception(ctx, POWERPC_EXCP_TRAP);

    if (uc != TCG_COND_NEVER) {
    tcg_gen_brcond_tl(uc, a, b, e->lab);
    }
    if (sc != TCG_COND_NEVER) {
    tcg_gen_brcond_tl(sc, a, b, e->lab);
    }
    return true;
}

bool trans_TW(...)
{
    TCGv a = tcg_temp_new();
    TCGv b = tcg_temp_new();

    /* Note that consistent sign extensions work for unsigned 
comparisons. */

    tcg_gen_exts_i32_tl(a, ra);
    tcg_gen_exts_i32_tl(b, rb);
    return do_conditional_trap(ctx, to, a, b);
}

etc.



Thanks,
Chinmay

r~

Re: [PATCH 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) to decodetree.

2024-04-19 Thread Chinmay Rath


Hi Richard,

On 4/17/24 00:06, Richard Henderson wrote:

On 4/15/24 23:39, Chinmay Rath wrote:

+static bool trans_MADDHDU(DisasContext *ctx, arg_MADDHDU *a)

...

+    tcg_gen_movi_i64(t1, 0);


Drop the movi.


+    tcg_gen_add2_i64(t1, cpu_gpr[a->vrt], lo, hi, cpu_gpr[a->rc], t1);


Use tcg_constant_i64(0).

Looks like tcg_gen_add2_i64 internally modifies the passed arguments, 
hence constant is not expected.
However, I tried using tcg_constant_i64(0) as suggested but this leads 
to an assert failure :
qemu-system-ppc64: ../tcg/tcg.c:5071: tcg_reg_alloc_op: Assertion 
`!temp_readonly(ts)' failed.


So I hope it is fine to keep the code change as is for now.
Let me know if you have any suggestions.

Thanks,
Chinmay

With that,
Reviewed-by: Richard Henderson 


r~

Re: [PATCH 4/8] target/ppc: Move neg, darn, mod{sw, uw} to decodetree.

2024-04-19 Thread Chinmay Rath





On 4/16/24 23:55, Richard Henderson wrote:

On 4/15/24 23:39, Chinmay Rath wrote:

Moving the below instructions to decodetree specification :

neg[o][.]   : XO-form
mod{sw, uw}, darn    : X-form

The changes were verified by validating that the tcg ops generated by 
those
instructions remain the same, which were captured with the '-d 
in_asm,op' flag.


Signed-off-by: Chinmay Rath
---
  target/ppc/helper.h    |  4 +-
  target/ppc/insn32.decode   |  8 
  target/ppc/int_helper.c    |  4 +-
  target/ppc/translate.c | 56 --
  target/ppc/translate/fixedpoint-impl.c.inc | 44 +
  5 files changed, 56 insertions(+), 60 deletions(-)


Reviewed-by: Richard Henderson 

Thanks,
Chinmay


r~

Re: [PATCH 3/8] target/ppc: Move divw[u, e, eu] instructions to decodetree.

2024-04-19 Thread Chinmay Rath


On 4/16/24 23:49, Richard Henderson wrote:

On 4/15/24 23:39, Chinmay Rath wrote:

Moving the following instructions to decodetree specification :
 divw[u, e, eu][o][.] : XO-form

The changes were verified by validating that the tcg ops generated by 
those
instructions remain the same, which were captured with the '-d 
in_asm,op' flag.


Signed-off-by: Chinmay Rath
---
  target/ppc/helper.h    |  4 +--
  target/ppc/insn32.decode   |  5 
  target/ppc/int_helper.c    |  4 +--
  target/ppc/translate.c | 31 --
  target/ppc/translate/fixedpoint-impl.c.inc | 24 +
  5 files changed, 33 insertions(+), 35 deletions(-)


Reviewed-by: Richard Henderson 


Thanks,
Chinmay


r~

Re: [PATCH 2/8] target/ppc: Make divw[u] handler method decodetree compatible.

2024-04-19 Thread Chinmay Rath


Hi Richard,

On 4/16/24 23:27, Richard Henderson wrote:

On 4/15/24 23:39, Chinmay Rath wrote:
The handler methods for divw[u] instructions internally use 
Rc(ctx->opcode),
for extraction of Rc field of instructions, which poses a problem if 
we move
the above said instructions to decodetree, as the ctx->opcode field 
is not
popluated in decodetree. Hence, making it decodetree compatible, so 
that the

mentioned insns can be safely move to decodetree specs.

Signed-off-by: Chinmay Rath
---
  target/ppc/translate.c | 9 +
  1 file changed, 5 insertions(+), 4 deletions(-)


Reviewed-by: Richard Henderson 


Thank you.

+static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret,
+ TCGv arg1, TCGv arg2, bool sign,
+ bool compute_ov, bool compute_rc0)


Could drop the inline at the same time.
Let the compiler decide.


I kept inline as is, as there are multiple gen_op_* routines with inline and
if necessary we could consider removing inline for all of them together 
in a separate patch :


grep inline target/ppc/translate.c | grep gen_op

static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf)
static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)
static inline void gen_op_cmp32(TCGv arg0, TCGv arg1, int s, int crf)
static inline void gen_op_cmpi32(TCGv arg0, target_ulong arg1, int s, 
int crf)

static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
static inline void gen_op_arith_compute_ca32(DisasContext *ctx,
static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret,
static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret,
static inline void gen_op_arith_modw(DisasContext *ctx, TCGv ret, TCGv arg1,
static inline void gen_op_arith_modd(DisasContext *ctx, TCGv ret, TCGv arg1,
static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
static inline void gen_op_mfspr(DisasContext *ctx)

Thanks,
Chinmay


r~

Re: [PATCH 1/8] target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions to decodetree.

2024-04-19 Thread Chinmay Rath


Hi Richard,

On 4/16/24 23:26, Richard Henderson wrote:

On 4/15/24 23:39, Chinmay Rath wrote:

Moving the following instructions to decodetree specification :
mulli   : D-form
mul{lw, lwo, hw, hwu}[.]    : XO-form

The changes were verified by validating that the tcg ops generated by 
those
instructions remain the same, which were captured with the '-d 
in_asm,op' flag.


Signed-off-by: Chinmay Rath 
---
  target/ppc/insn32.decode   |  9 +++
  target/ppc/translate.c | 89 --
  target/ppc/translate/fixedpoint-impl.c.inc | 71 +
  3 files changed, 80 insertions(+), 89 deletions(-)


This is an accurate reorg of the current code, so
Reviewed-by: Richard Henderson 


Thank you.


However, as follow-up, the code generation could be cleaned up:



+static bool trans_MULLW(DisasContext *ctx, arg_MULLW *a)
+{
+#if defined(TARGET_PPC64)
+    TCGv_i64 t0, t1;
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+    tcg_gen_ext32s_tl(t0, cpu_gpr[a->ra]);
+    tcg_gen_ext32s_tl(t1, cpu_gpr[a->rb]);
+    tcg_gen_mul_i64(cpu_gpr[a->rt], t0, t1);
+#else
+    tcg_gen_mul_i32(cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb]);
+#endif
+    if (unlikely(a->rc)) {
+    gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+    }
+    return true;
+}


Without ifdefs:

    TCGv t0 = tcg_temp_new();
    TCGv t1 = tcg_temp_new();

    tcg_gen_ext32s_tl(t0, ra);
    tcg_gen_ext32s_tl(t1, rb);
    tcg_gen_mul_tl(rt, t0, t1);

For ppc32, ext32s_tl will turn into a mov, which will be optimized 
away.  So ideal code generation for both modes.




+static bool trans_MULLWO(DisasContext *ctx, arg_MULLWO *a)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[a->ra]);
+    tcg_gen_trunc_tl_i32(t1, cpu_gpr[a->rb]);
+    tcg_gen_muls2_i32(t0, t1, t0, t1);
+#if defined(TARGET_PPC64)
+    tcg_gen_concat_i32_i64(cpu_gpr[a->rt], t0, t1);
+#else
+    tcg_gen_mov_i32(cpu_gpr[a->rt], t0);
+#endif
+
+    tcg_gen_sari_i32(t0, t0, 31);
+    tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
+    tcg_gen_extu_i32_tl(cpu_ov, t0);


Usually hosts need to create the full 64-bit product and then break it 
apart for tcg_gen_muls2_i32, so split followed immediately by 
concatenate isn't great.



    TCGv t0 = tcg_temp_new();
    TCGv t1 = tcg_temp_new();

#ifdef TARGET_PPC64
    tcg_gen_ext32s_i64(t0, ra);
    tcg_gen_ext32s_i64(t1, rb);
    tcg_gen_mul_i64(rt, t0, t1);
    tcg_gen_sextract_i64(t0, rt, 31, 1);
    tcg_gen_sari_i64(t1, rt, 32);
#else
    tcg_gen_muls2_i32(rt, t1, ra, rb);
    tcg_gen_sari_i32(t0, rt, 31);
#endif
    tcg_gen_setcond_tl(TCG_COND_NE, cpu_ov, t0, t1);


Sure, will update in v2.
Thanks,
Chinmay



+    if (is_isa300(ctx)) {
+    tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+    }
+    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
+
+    if (unlikely(a->rc)) {
+    gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+    }
+    return true;
+}



r~

[PATCH 7/8] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree.

2024-04-16 Thread Chinmay Rath

Moving the following instructions to decodetree specification :

cmp{rb, eqb}, t{w, d}   : X-form
t{w, d}i: D-form
isel: A-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured using the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/helper.h|   6 +-
 target/ppc/insn32.decode   |  16 +++
 target/ppc/excp_helper.c   |   4 +-
 target/ppc/int_helper.c|   2 +-
 target/ppc/translate.c | 133 +
 target/ppc/translate/fixedpoint-impl.c.inc | 123 +++
 6 files changed, 148 insertions(+), 136 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index e862bdceaf..05f7ab5f6e 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -1,8 +1,8 @@
 DEF_HELPER_FLAGS_3(raise_exception_err, TCG_CALL_NO_WG, noreturn, env, i32, 
i32)
 DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32)
-DEF_HELPER_FLAGS_4(tw, TCG_CALL_NO_WG, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(TW, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #if defined(TARGET_PPC64)
-DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(TD, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #endif
 DEF_HELPER_4(HASHST, void, env, tl, tl, tl)
 DEF_HELPER_4(HASHCHK, void, env, tl, tl, tl)
@@ -67,7 +67,7 @@ DEF_HELPER_FLAGS_2(PEXTD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_1(CDTBCD, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl)
 #if defined(TARGET_PPC64)
-DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl)
+DEF_HELPER_FLAGS_2(CMPEQB, TCG_CALL_NO_RWG_SE, i32, tl, tl)
 DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(srad, tl, env, tl, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 509961023b..80a7bb1872 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -23,6 +23,9 @@
 _tb   frt frb rc:bool
 @A_tb   .. frt:5 . frb:5 . . rc:1   _tb
 
+_tab_bc   rt ra rb bc
+@A_tab_bc   .. rt:5 ra:5 rb:5 bc:5 . .  _tab_bc
+
   rt ra si:int64_t
 @D  .. rt:5 ra:5 si:s16 
 
@@ -331,6 +334,19 @@ CMP 01 ... - . . . 00 - 
@X_bfl
 CMPL01 ... - . . . 10 - @X_bfl
 CMPI001011 ... - . .    @D_bfs
 CMPLI   001010 ... - . .    @D_bfu
+CMPRB   01 ... - . . . 001100 - @X_bfl
+CMPEQB  01 ... -- . . 001110 -  @X_bf
+
+### Fixed-Point Trap Instructions
+
+TW  01 . . . 000100 -   @X
+TD  01 . . . 0001000100 -   @X
+TWI 11 . .  @D
+TDI 10 . .  @D
+
+### Fixed-Point Select Instruction
+
+ISEL01 . . . . 0 -  @A_tab_bc
 
 ### Fixed-Point Arithmetic Instructions
 
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 674c05a2ce..79dd9b82cf 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2750,7 +2750,7 @@ void helper_rfmci(CPUPPCState *env)
 }
 #endif /* !CONFIG_USER_ONLY */
 
-void helper_tw(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
+void helper_TW(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
uint32_t flags)
 {
 if (!likely(!(((int32_t)arg1 < (int32_t)arg2 && (flags & 0x10)) ||
@@ -2764,7 +2764,7 @@ void helper_tw(CPUPPCState *env, target_ulong arg1, 
target_ulong arg2,
 }
 
 #ifdef TARGET_PPC64
-void helper_td(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
+void helper_TD(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
uint32_t flags)
 {
 if (!likely(!(((int64_t)arg1 < (int64_t)arg2 && (flags & 0x10)) ||
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 585c2b65d3..d12dcc28e1 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -159,7 +159,7 @@ uint64_t helper_DIVDE(CPUPPCState *env, uint64_t rau, 
uint64_t rbu, uint32_t oe)
 /* When you XOR the pattern and there is a match, that byte will be zero */
 #define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
 
-uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
+uint32_t helper_CMPEQB(target_ulong ra, target_ulong rb)
 {
 return hasvalue(rb, ra) ? CRF_GT : 0;
 }
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 8900da85e5..98e642b19a 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1564,66 +1564,6 @@ static inline void gen_set_Rc

[PATCH 5/8] target/ppc: Move multiply fixed-point insns (64-bit operands) to decodetree.

2024-04-16 Thread Chinmay Rath

Moving the following instructions to decodetree :

mul{ld, ldo, hd, hdu}[.]: XO-form
madd{hd, hdu, ld}   : VA-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op'
flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/insn32.decode   |   9 ++
 target/ppc/translate.c | 101 -
 target/ppc/translate/fixedpoint-impl.c.inc |  85 +
 3 files changed, 94 insertions(+), 101 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 654f55471b..61c59bbde0 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -375,6 +375,15 @@ MODUW   01 . . . 011011 -   @X
 DARN01 . --- .. - 100011 -  @X_tl
 NEG 01 . . - . 001101000 .  @XO_ta
 
+MULLD   01 . . . 0 011101001 .  @XO_tab_rc
+MULLDO  01 . . . 1 011101001 .  @XO_tab_rc
+MULHD   01 . . . - 001001001 .  @XO_tab_rc
+MULHDU  01 . . . - 01001 .  @XO_tab_rc
+
+MADDLD  000100 . . . . 110011   @VA
+MADDHD  000100 . . . . 11   @VA
+MADDHDU 000100 . . . . 110001   @VA
+
 ## Fixed-Point Logical Instructions
 
 CFUGED  01 . . . 0011011100 -   @X
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 436fcfc645..8fa125d0ae 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1918,62 +1918,6 @@ GEN_INT_ARITH_MODD(modud, 0x08, 0);
 GEN_INT_ARITH_MODD(modsd, 0x18, 1);
 #endif
 
-#if defined(TARGET_PPC64)
-/* mulhd  mulhd. */
-static void gen_mulhd(DisasContext *ctx)
-{
-TCGv lo = tcg_temp_new();
-tcg_gen_muls2_tl(lo, cpu_gpr[rD(ctx->opcode)],
- cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-/* mulhdu  mulhdu. */
-static void gen_mulhdu(DisasContext *ctx)
-{
-TCGv lo = tcg_temp_new();
-tcg_gen_mulu2_tl(lo, cpu_gpr[rD(ctx->opcode)],
- cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-/* mulld  mulld. */
-static void gen_mulld(DisasContext *ctx)
-{
-tcg_gen_mul_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-   cpu_gpr[rB(ctx->opcode)]);
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-/* mulldo  mulldo. */
-static void gen_mulldo(DisasContext *ctx)
-{
-TCGv_i64 t0 = tcg_temp_new_i64();
-TCGv_i64 t1 = tcg_temp_new_i64();
-
-tcg_gen_muls2_i64(t0, t1, cpu_gpr[rA(ctx->opcode)],
-  cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_mov_i64(cpu_gpr[rD(ctx->opcode)], t0);
-
-tcg_gen_sari_i64(t0, t0, 63);
-tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1);
-if (is_isa300(ctx)) {
-tcg_gen_mov_tl(cpu_ov32, cpu_ov);
-}
-tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
-
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-#endif
-
 /* Common subf function */
 static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
  TCGv arg2, bool add_ca, bool compute_ca,
@@ -5884,36 +5828,6 @@ static void gen_icbt_440(DisasContext *ctx)
  */
 }
 
-#if defined(TARGET_PPC64)
-static void gen_maddld(DisasContext *ctx)
-{
-TCGv_i64 t1 = tcg_temp_new_i64();
-
-tcg_gen_mul_i64(t1, cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_add_i64(cpu_gpr[rD(ctx->opcode)], t1, cpu_gpr[rC(ctx->opcode)]);
-}
-
-/* maddhd maddhdu */
-static void gen_maddhd_maddhdu(DisasContext *ctx)
-{
-TCGv_i64 lo = tcg_temp_new_i64();
-TCGv_i64 hi = tcg_temp_new_i64();
-TCGv_i64 t1 = tcg_temp_new_i64();
-
-if (Rc(ctx->opcode)) {
-tcg_gen_mulu2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)],
-  cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_movi_i64(t1, 0);
-} else {
-tcg_gen_muls2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)],
-  cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_sari_i64(t1, cpu_gpr[rC(ctx->opcode)], 63);
-}
-tcg_gen_add2_i64(t1, cpu_gpr[rD(ctx->opcode)], lo, hi,
- cpu_gpr[rC(ctx->opcode)], t1);
-}
-#endif /* defined(TARGET_PPC64) */
-
 static void gen_tbegin(DisasContext *ctx)
 {
 if (unlikely(!ctx->tm_enabled)) {
@@ -6277,9 +6191,6 @@ GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x0060, 
PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0

[PATCH 1/8] target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions to decodetree.

2024-04-16 Thread Chinmay Rath

Moving the following instructions to decodetree specification :
mulli   : D-form
mul{lw, lwo, hw, hwu}[.]: XO-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/insn32.decode   |  9 +++
 target/ppc/translate.c | 89 --
 target/ppc/translate/fixedpoint-impl.c.inc | 71 +
 3 files changed, 80 insertions(+), 89 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index eada59f59f..0184680db8 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -193,6 +193,9 @@
 _ta  rt ra oe:bool rc:bool
 @XO_ta  .. rt:5 ra:5 . oe:1 . rc:1  _ta
 
+_tab_rc  rt ra rb rc:bool
+@XO_tab_rc  .. rt:5 ra:5 rb:5 . . rc:1  _tab_rc
+
 %xx_xt  0:1 21:5
 %xx_xb  1:1 11:5
 %xx_xa  2:1 16:5
@@ -353,6 +356,12 @@ SUBFE   01 . . . . 010001000 .  @XO
 SUBFME  01 . . - . 011101000 .  @XO_ta
 SUBFZE  01 . . - . 011001000 .  @XO_ta
 
+MULLI   000111 . .  @D
+MULLW   01 . . . 0 011101011 .  @XO_tab_rc
+MULLWO  01 . . . 1 011101011 .  @XO_tab_rc
+MULHW   01 . . . - 001001011 .  @XO_tab_rc
+MULHWU  01 . . . - 01011 .  @XO_tab_rc
+
 ## Fixed-Point Logical Instructions
 
 CFUGED  01 . . . 0011011100 -   @X
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 93ffec787c..c45547a770 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1948,90 +1948,6 @@ GEN_INT_ARITH_MODD(modud, 0x08, 0);
 GEN_INT_ARITH_MODD(modsd, 0x18, 1);
 #endif
 
-/* mulhw  mulhw. */
-static void gen_mulhw(DisasContext *ctx)
-{
-TCGv_i32 t0 = tcg_temp_new_i32();
-TCGv_i32 t1 = tcg_temp_new_i32();
-
-tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
-tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_muls2_i32(t0, t1, t0, t1);
-tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1);
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-/* mulhwu  mulhwu.  */
-static void gen_mulhwu(DisasContext *ctx)
-{
-TCGv_i32 t0 = tcg_temp_new_i32();
-TCGv_i32 t1 = tcg_temp_new_i32();
-
-tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
-tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_mulu2_i32(t0, t1, t0, t1);
-tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1);
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-/* mullw  mullw. */
-static void gen_mullw(DisasContext *ctx)
-{
-#if defined(TARGET_PPC64)
-TCGv_i64 t0, t1;
-t0 = tcg_temp_new_i64();
-t1 = tcg_temp_new_i64();
-tcg_gen_ext32s_tl(t0, cpu_gpr[rA(ctx->opcode)]);
-tcg_gen_ext32s_tl(t1, cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_mul_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
-#else
-tcg_gen_mul_i32(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-cpu_gpr[rB(ctx->opcode)]);
-#endif
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-/* mullwo  mullwo. */
-static void gen_mullwo(DisasContext *ctx)
-{
-TCGv_i32 t0 = tcg_temp_new_i32();
-TCGv_i32 t1 = tcg_temp_new_i32();
-
-tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
-tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
-tcg_gen_muls2_i32(t0, t1, t0, t1);
-#if defined(TARGET_PPC64)
-tcg_gen_concat_i32_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
-#else
-tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], t0);
-#endif
-
-tcg_gen_sari_i32(t0, t0, 31);
-tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
-tcg_gen_extu_i32_tl(cpu_ov, t0);
-if (is_isa300(ctx)) {
-tcg_gen_mov_tl(cpu_ov32, cpu_ov);
-}
-tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
-
-if (unlikely(Rc(ctx->opcode) != 0)) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-/* mulli */
-static void gen_mulli(DisasContext *ctx)
-{
-tcg_gen_muli_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-SIMM(ctx->opcode));
-}
-
 #if defined(TARGET_PPC64)
 /* mulhd  mulhd. */
 static void gen_mulhd(DisasContext *ctx)
@@ -6430,11 +6346,6 @@ GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x0060, 
PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x0001, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x0041, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x0001, PPC_ISEL),
-GEN_HANDLER(mulhw,

[PATCH 3/8] target/ppc: Move divw[u, e, eu] instructions to decodetree.

2024-04-16 Thread Chinmay Rath

Moving the following instructions to decodetree specification :
 divw[u, e, eu][o][.]   : XO-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/helper.h|  4 +--
 target/ppc/insn32.decode   |  5 
 target/ppc/int_helper.c|  4 +--
 target/ppc/translate.c | 31 --
 target/ppc/translate/fixedpoint-impl.c.inc | 24 +
 5 files changed, 33 insertions(+), 35 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 86f97ee1e7..1fc8b7c5fd 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -55,8 +55,8 @@ DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
 DEF_HELPER_4(divdeu, i64, env, i64, i64, i32)
 DEF_HELPER_4(divde, i64, env, i64, i64, i32)
 #endif
-DEF_HELPER_4(divweu, tl, env, tl, tl, i32)
-DEF_HELPER_4(divwe, tl, env, tl, tl, i32)
+DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32)
+DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32)
 
 DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 0184680db8..bfccebd9a7 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -362,6 +362,11 @@ MULLWO  01 . . . 1 011101011 .  
@XO_tab_rc
 MULHW   01 . . . - 001001011 .  @XO_tab_rc
 MULHWU  01 . . . - 01011 .  @XO_tab_rc
 
+DIVW01 . . . . 01011 .  @XO
+DIVWU   01 . . . . 111001011 .  @XO
+DIVWE   01 . . . . 110101011 .  @XO
+DIVWEU  01 . . . . 110001011 .  @XO
+
 ## Fixed-Point Logical Instructions
 
 CFUGED  01 . . . 0011011100 -   @X
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 0a5c3e78a4..dc1f72ff38 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -44,7 +44,7 @@ static inline void helper_update_ov_legacy(CPUPPCState *env, 
int ov)
 }
 }
 
-target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
+target_ulong helper_DIVWEU(CPUPPCState *env, target_ulong ra, target_ulong rb,
uint32_t oe)
 {
 uint64_t rt = 0;
@@ -71,7 +71,7 @@ target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, 
target_ulong rb,
 return (target_ulong)rt;
 }
 
-target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
+target_ulong helper_DIVWE(CPUPPCState *env, target_ulong ra, target_ulong rb,
   uint32_t oe)
 {
 int64_t rt = 0;
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index be7d807e3c..0a1d1d63b3 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1778,21 +1778,6 @@ static inline void gen_op_arith_divw(DisasContext *ctx, 
TCGv ret,
 gen_set_Rc0(ctx, ret);
 }
 }
-/* Div functions */
-#define GEN_INT_ARITH_DIVW(name, opc3, sign, compute_ov)  \
-static void glue(gen_, name)(DisasContext *ctx)   \
-{ \
-gen_op_arith_divw(ctx, cpu_gpr[rD(ctx->opcode)],  \
- cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],  \
- sign, compute_ov, Rc(ctx->opcode));  \
-}
-/* divwu  divwu.  divwuo  divwuo.   */
-GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0);
-GEN_INT_ARITH_DIVW(divwuo, 0x1E, 0, 1);
-/* divw  divw.  divwo  divwo.   */
-GEN_INT_ARITH_DIVW(divw, 0x0F, 1, 0);
-GEN_INT_ARITH_DIVW(divwo, 0x1F, 1, 1);
-
 /* div[wd]eu[o][.] */
 #define GEN_DIVE(name, hlpr, compute_ov)  \
 static void gen_##name(DisasContext *ctx) \
@@ -1805,11 +1790,6 @@ static void gen_##name(DisasContext *ctx)
 \
 } \
 }
 
-GEN_DIVE(divweu, divweu, 0);
-GEN_DIVE(divweuo, divweu, 1);
-GEN_DIVE(divwe, divwe, 0);
-GEN_DIVE(divweo, divwe, 1);
-
 #if defined(TARGET_PPC64)
 static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
  TCGv arg2, int sign, int compute_ov)
@@ -6562,17 +6542,6 @@ GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 
0x, PPC_NONE,
 GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x, PPC_NONE, PPC2_ISA300),
 #endif
 
-#undef GEN_INT_ARITH_DIVW
-#define GEN_INT_ARITH_DIVW(name, opc3, sign, compute_ov)  \
-GEN_HANDLER(name, 0x1F, 0x0B, opc3, 0x, PPC_INTEGER)
-GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0),
-GEN_INT_ARITH_DIVW(divwuo, 0x1E, 0, 1),
-GEN_INT_ARITH_DIV

[PATCH 4/8] target/ppc: Move neg, darn, mod{sw, uw} to decodetree.

2024-04-16 Thread Chinmay Rath

Moving the below instructions to decodetree specification :

neg[o][.]   : XO-form
mod{sw, uw}, darn   : X-form

The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/helper.h|  4 +-
 target/ppc/insn32.decode   |  8 
 target/ppc/int_helper.c|  4 +-
 target/ppc/translate.c | 56 --
 target/ppc/translate/fixedpoint-impl.c.inc | 44 +
 5 files changed, 56 insertions(+), 60 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 1fc8b7c5fd..09d0b0074b 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -71,8 +71,8 @@ DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl)
 DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(srad, tl, env, tl, tl)
-DEF_HELPER_FLAGS_0(darn32, TCG_CALL_NO_RWG, tl)
-DEF_HELPER_FLAGS_0(darn64, TCG_CALL_NO_RWG, tl)
+DEF_HELPER_FLAGS_0(DARN32, TCG_CALL_NO_RWG, tl)
+DEF_HELPER_FLAGS_0(DARN64, TCG_CALL_NO_RWG, tl)
 #endif
 
 DEF_HELPER_FLAGS_1(cntlsw32, TCG_CALL_NO_RWG_SE, i32, i32)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index bfccebd9a7..654f55471b 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -187,6 +187,9 @@
 _ara
 @X_a.. ra:3 .. . . .. . _a
 
+_tl   rt l
+@X_tl   .. rt:5 ... l:2 . .. .  _tl
+
  rt ra rb oe:bool rc:bool
 @XO .. rt:5 ra:5 rb:5 oe:1 . rc:1   
 
@@ -367,6 +370,11 @@ DIVWU   01 . . . . 111001011 .  @XO
 DIVWE   01 . . . . 110101011 .  @XO
 DIVWEU  01 . . . . 110001011 .  @XO
 
+MODSW   01 . . . 111011 -   @X
+MODUW   01 . . . 011011 -   @X
+DARN01 . --- .. - 100011 -  @X_tl
+NEG 01 . . - . 001101000 .  @XO_ta
+
 ## Fixed-Point Logical Instructions
 
 CFUGED  01 . . . 0011011100 -   @X
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index dc1f72ff38..bc25d5b062 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -171,7 +171,7 @@ uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
 /*
  * Return a random number.
  */
-uint64_t helper_darn32(void)
+uint64_t helper_DARN32(void)
 {
 Error *err = NULL;
 uint32_t ret;
@@ -186,7 +186,7 @@ uint64_t helper_darn32(void)
 return ret;
 }
 
-uint64_t helper_darn64(void)
+uint64_t helper_DARN64(void)
 {
 Error *err = NULL;
 uint64_t ret;
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 0a1d1d63b3..436fcfc645 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1878,17 +1878,6 @@ static inline void gen_op_arith_modw(DisasContext *ctx, 
TCGv ret, TCGv arg1,
 }
 }
 
-#define GEN_INT_ARITH_MODW(name, opc3, sign)\
-static void glue(gen_, name)(DisasContext *ctx) \
-{   \
-gen_op_arith_modw(ctx, cpu_gpr[rD(ctx->opcode)],\
-  cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],   \
-  sign);\
-}
-
-GEN_INT_ARITH_MODW(moduw, 0x08, 0);
-GEN_INT_ARITH_MODW(modsw, 0x18, 1);
-
 #if defined(TARGET_PPC64)
 static inline void gen_op_arith_modd(DisasContext *ctx, TCGv ret, TCGv arg1,
  TCGv arg2, int sign)
@@ -2055,27 +2044,6 @@ static inline void gen_op_arith_subf(DisasContext *ctx, 
TCGv ret, TCGv arg1,
 }
 }
 
-/* neg neg. nego nego. */
-static inline void gen_op_arith_neg(DisasContext *ctx, bool compute_ov)
-{
-TCGv zero = tcg_constant_tl(0);
-gen_op_arith_subf(ctx, cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-  zero, 0, 0, compute_ov, Rc(ctx->opcode));
-}
-
-static void gen_neg(DisasContext *ctx)
-{
-tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
-if (unlikely(Rc(ctx->opcode))) {
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
-}
-}
-
-static void gen_nego(DisasContext *ctx)
-{
-gen_op_arith_neg(ctx, 1);
-}
-
 /***Integer logical***/
 #define GEN_LOGICAL2(name, tcg_op, opc, type) \
 static void glue(gen_, name)(DisasContext *ctx)   \
@@ -2401,24 +2369,6 @@ static void gen_cnttzd(DisasContext *ctx)
 gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);

[PATCH 6/8] target/ppc: Move div/mod fixed-point insns (64 bits operands) to decodetree.

2024-04-16 Thread Chinmay Rath

Moving the below instructions to decodetree specification :

divd[u, e, eu][o][.]: XO-form
mod{sd, ud} : X-form

With this patch, all the fixed-point arithmetic instructions have been
moved to decodetree.
The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured using the '-d in_asm,op' flag.
Also, remaned do_divwe method in fixedpoint-impl.c.inc to do_dive because it is
now used to divide doubleword operands as well, and not just words.

Signed-off-by: Chinmay Rath 
---
 target/ppc/helper.h|  4 +-
 target/ppc/insn32.decode   |  8 +++
 target/ppc/int_helper.c|  4 +-
 target/ppc/translate.c | 65 ++
 target/ppc/translate/fixedpoint-impl.c.inc | 29 +-
 5 files changed, 42 insertions(+), 68 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 09d0b0074b..e862bdceaf 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -52,8 +52,8 @@ DEF_HELPER_FLAGS_2(icbiep, TCG_CALL_NO_WG, void, env, tl)
 DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
 
 #if defined(TARGET_PPC64)
-DEF_HELPER_4(divdeu, i64, env, i64, i64, i32)
-DEF_HELPER_4(divde, i64, env, i64, i64, i32)
+DEF_HELPER_4(DIVDEU, i64, env, i64, i64, i32)
+DEF_HELPER_4(DIVDE, i64, env, i64, i64, i32)
 #endif
 DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32)
 DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 61c59bbde0..509961023b 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -384,6 +384,14 @@ MADDLD  000100 . . . . 110011   @VA
 MADDHD  000100 . . . . 11   @VA
 MADDHDU 000100 . . . . 110001   @VA
 
+DIVD01 . . . . 01001 .  @XO
+DIVDU   01 . . . . 111001001 .  @XO
+DIVDE   01 . . . . 110101001 .  @XO
+DIVDEU  01 . . . . 110001001 .  @XO
+
+MODSD   01 . . . 111001 -   @X
+MODUD   01 . . . 011001 -   @X
+
 ## Fixed-Point Logical Instructions
 
 CFUGED  01 . . . 0011011100 -   @X
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index bc25d5b062..585c2b65d3 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -101,7 +101,7 @@ target_ulong helper_DIVWE(CPUPPCState *env, target_ulong 
ra, target_ulong rb,
 
 #if defined(TARGET_PPC64)
 
-uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
+uint64_t helper_DIVDEU(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
 {
 uint64_t rt = 0;
 int overflow = 0;
@@ -120,7 +120,7 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, 
uint64_t rb, uint32_t oe)
 return rt;
 }
 
-uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t 
oe)
+uint64_t helper_DIVDE(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t 
oe)
 {
 uint64_t rt = 0;
 int64_t ra = (int64_t)rau;
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 8fa125d0ae..8900da85e5 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1778,21 +1778,11 @@ static inline void gen_op_arith_divw(DisasContext *ctx, 
TCGv ret,
 gen_set_Rc0(ctx, ret);
 }
 }
-/* div[wd]eu[o][.] */
-#define GEN_DIVE(name, hlpr, compute_ov)  \
-static void gen_##name(DisasContext *ctx) \
-{ \
-TCGv_i32 t0 = tcg_constant_i32(compute_ov);   \
-gen_helper_##hlpr(cpu_gpr[rD(ctx->opcode)], tcg_env,  \
- cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], t0); \
-if (unlikely(Rc(ctx->opcode) != 0)) { \
-gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);   \
-} \
-}
 
 #if defined(TARGET_PPC64)
-static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
- TCGv arg2, int sign, int compute_ov)
+static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret,
+ TCGv arg1, TCGv arg2, bool sign,
+ bool compute_ov, bool compute_rc0)
 {
 TCGv_i64 t0 = tcg_temp_new_i64();
 TCGv_i64 t1 = tcg_temp_new_i64();
@@ -1824,29 +1814,10 @@ static inline void gen_op_arith_divd(DisasContext *ctx, 
TCGv ret, TCGv arg1,
 tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
 }
 
-if (unlikely(Rc(ctx->opcode) != 0)) {
+if (unlikely(compute_rc0)) {
 gen_set_Rc0(ctx, ret);

[PATCH 8/8] target/ppc: Move logical fixed-point instructions to decodetree.

2024-04-16 Thread Chinmay Rath

Moving the below instructions to decodetree specification :

andi[s]., {ori, xori}[s]: D-form

{and, andc, nand, or, orc, nor, xor, eqv}[.],
exts{b, h, w}[.],  cnt{l, t}z{w, d}[.],
popcnt{b, w, d},  prty{w, d}, cmp, bpermd   : X-form

With this patch, all the fixed-point logical instructions have been
moved to decodetree.
The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/helper.h|   8 +-
 target/ppc/insn32.decode   |  38 +++
 target/ppc/int_helper.c|  10 +-
 target/ppc/translate.c | 359 -
 target/ppc/translate/fixedpoint-impl.c.inc | 269 +++
 5 files changed, 316 insertions(+), 368 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 05f7ab5f6e..b53abd853a 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -58,8 +58,8 @@ DEF_HELPER_4(DIVDE, i64, env, i64, i64, i32)
 DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32)
 DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32)
 
-DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
-DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_1(POPCNTB, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_2(CMPB, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_3(sraw, tl, env, tl, tl)
 DEF_HELPER_FLAGS_2(CFUGED, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(PDEPD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
@@ -68,8 +68,8 @@ DEF_HELPER_FLAGS_1(CDTBCD, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_2(CMPEQB, TCG_CALL_NO_RWG_SE, i32, tl, tl)
-DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
-DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_1(POPCNTW, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_2(BPERMD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(srad, tl, env, tl, tl)
 DEF_HELPER_FLAGS_0(DARN32, TCG_CALL_NO_RWG, tl)
 DEF_HELPER_FLAGS_0(DARN64, TCG_CALL_NO_RWG, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 80a7bb1872..3175810190 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -29,6 +29,9 @@
   rt ra si:int64_t
 @D  .. rt:5 ra:5 si:s16 
 
+_ui   rt ra ui:uint64_t
+@D_ui   .. rt:5 ra:5 ui:16 _ui
+
 _bf   bf l:bool ra imm
 @D_bfs  .. bf:3 . l:1 ra:5 imm:s16  _bf
 @D_bfu  .. bf:3 . l:1 ra:5 imm:16   _bf
@@ -96,6 +99,9 @@
 _sa   rs ra
 @X_sa   .. rs:5 ra:5 . .. . _sa
 
+_sa_rcrs ra rc
+@X_sa_rc.. rs:5 ra:5 . .. rc:1  _sa_rc
+
 %x_frtp 22:4 !function=times_2
 %x_frap 17:4 !function=times_2
 %x_frbp 12:4 !function=times_2
@@ -410,6 +416,38 @@ MODUD   01 . . . 011001 -   @X
 
 ## Fixed-Point Logical Instructions
 
+ANDI_   011100 . .  @D_ui
+ANDIS_  011101 . .  @D_ui
+ORI 011000 . .  @D_ui
+ORIS011001 . .  @D_ui
+XORI011010 . .  @D_ui
+XORIS   011011 . .  @D_ui
+
+AND 01 . . . 011100 .   @X_rc
+ANDC01 . . . 00 .   @X_rc
+NAND01 . . . 0111011100 .   @X_rc
+OR  01 . . . 011000 .   @X_rc
+ORC 01 . . . 0110011100 .   @X_rc
+NOR 01 . . . 000100 .   @X_rc
+XOR 01 . . . 010000 .   @X_rc
+EQV 01 . . . 0100011100 .   @X_rc
+CMPB01 . . . 011100 .   @X_rc
+
+EXTSB   01 . . - 1110111010 .   @X_sa_rc
+EXTSH   01 . . - 1110011010 .   @X_sa_rc
+EXTSW   01 . . - 011010 .   @X_sa_rc
+CNTLZW  01 . . - 011010 .   @X_sa_rc
+CNTTZW  01 . . - 111010 .   @X_sa_rc
+CNTLZD  01 . . - 111010 .   @X_sa_rc
+CNTTZD  01 . . - 1000111010 .   @X_sa_rc
+POPCNTB 01 . . - 000010 .   @X_sa_rc
+
+POPCNTW 01 . . - 010010 -   @X_sa
+POPCNTD 01 . . - 011010 -   @X_sa
+PRTYW   01 . . - 0010011010 -   @X_sa
+PRTYD   01 . . - 0010111010 -   @X_sa
+
+BPERMD  01 . . . 001100

[PATCH 2/8] target/ppc: Make divw[u] handler method decodetree compatible.

2024-04-16 Thread Chinmay Rath

The handler methods for divw[u] instructions internally use Rc(ctx->opcode),
for extraction of Rc field of instructions, which poses a problem if we move
the above said instructions to decodetree, as the ctx->opcode field is not
popluated in decodetree. Hence, making it decodetree compatible, so that the
mentioned insns can be safely move to decodetree specs.

Signed-off-by: Chinmay Rath 
---
 target/ppc/translate.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index c45547a770..be7d807e3c 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1738,8 +1738,9 @@ static inline void gen_op_arith_add(DisasContext *ctx, 
TCGv ret, TCGv arg1,
 }
 }
 
-static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
- TCGv arg2, int sign, int compute_ov)
+static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret,
+ TCGv arg1, TCGv arg2, bool sign,
+ bool compute_ov, bool compute_rc0)
 {
 TCGv_i32 t0 = tcg_temp_new_i32();
 TCGv_i32 t1 = tcg_temp_new_i32();
@@ -1773,7 +1774,7 @@ static inline void gen_op_arith_divw(DisasContext *ctx, 
TCGv ret, TCGv arg1,
 tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
 }
 
-if (unlikely(Rc(ctx->opcode) != 0)) {
+if (unlikely(compute_rc0)) {
 gen_set_Rc0(ctx, ret);
 }
 }
@@ -1783,7 +1784,7 @@ static void glue(gen_, name)(DisasContext *ctx)   
\
 { \
 gen_op_arith_divw(ctx, cpu_gpr[rD(ctx->opcode)],  \
  cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],  \
- sign, compute_ov);   \
+ sign, compute_ov, Rc(ctx->opcode));  \
 }
 /* divwu  divwu.  divwuo  divwuo.   */
 GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0);
-- 
2.39.3

[PATCH 0/8] target/ppc: Move fixed-point insns to decodetree.

2024-04-16 Thread Chinmay Rath

Moving all fixed-point instructions of the following type to decodetree
specification : arithmetic, compare, trap, select and logical.

Chinmay Rath (8):
  target/ppc: Move mul{li, lw, lwo, hw, hwu} instructions to decodetree.
  target/ppc: Make divw[u] handler method decodetree compatible.
  target/ppc: Move divw[u, e, eu] instructions to decodetree.
  target/ppc: Move neg, darn, mod{sw, uw} to decodetree.
  target/ppc: Move multiply fixed-point insns (64-bit operands) to
decodetree.
  target/ppc: Move div/mod fixed-point insns (64 bits operands) to
decodetree.
  target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to
decodetree.
  target/ppc: Move logical fixed-point instructions to decodetree.

 target/ppc/helper.h|  26 +-
 target/ppc/insn32.decode   |  93 +++
 target/ppc/excp_helper.c   |   4 +-
 target/ppc/int_helper.c|  24 +-
 target/ppc/translate.c | 841 +
 target/ppc/translate/fixedpoint-impl.c.inc | 639 
 6 files changed, 770 insertions(+), 857 deletions(-)

-- 
2.39.3

[PATCH v2 2/2] target/ppc: Move floating-point arithmetic instructions to decodetree.

2024-03-15 Thread Chinmay Rath

This patch moves the below instructions to decodetree specification :

f{add, sub, mul, div, re, rsqrte, madd, msub, nmadd, nmsub}[s][.] : A-form
ft{div, sqrt} : X-form

With this patch, all the floating-point arithmetic instructions have been
moved to decodetree.
The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/helper.h|  44 ++---
 target/ppc/insn32.decode   |  42 +
 target/ppc/fpu_helper.c|  38 ++--
 target/ppc/translate/fp-impl.c.inc | 285 +++--
 target/ppc/translate/fp-ops.c.inc  |  31 
 5 files changed, 192 insertions(+), 248 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 86f97ee1e7..f177d5b906 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -110,32 +110,32 @@ DEF_HELPER_2(friz, i64, env, i64)
 DEF_HELPER_2(frip, i64, env, i64)
 DEF_HELPER_2(frim, i64, env, i64)
 
-DEF_HELPER_3(fadd, f64, env, f64, f64)
-DEF_HELPER_3(fadds, f64, env, f64, f64)
-DEF_HELPER_3(fsub, f64, env, f64, f64)
-DEF_HELPER_3(fsubs, f64, env, f64, f64)
-DEF_HELPER_3(fmul, f64, env, f64, f64)
-DEF_HELPER_3(fmuls, f64, env, f64, f64)
-DEF_HELPER_3(fdiv, f64, env, f64, f64)
-DEF_HELPER_3(fdivs, f64, env, f64, f64)
-DEF_HELPER_4(fmadd, i64, env, i64, i64, i64)
-DEF_HELPER_4(fmsub, i64, env, i64, i64, i64)
-DEF_HELPER_4(fnmadd, i64, env, i64, i64, i64)
-DEF_HELPER_4(fnmsub, i64, env, i64, i64, i64)
-DEF_HELPER_4(fmadds, i64, env, i64, i64, i64)
-DEF_HELPER_4(fmsubs, i64, env, i64, i64, i64)
-DEF_HELPER_4(fnmadds, i64, env, i64, i64, i64)
-DEF_HELPER_4(fnmsubs, i64, env, i64, i64, i64)
+DEF_HELPER_3(FADD, f64, env, f64, f64)
+DEF_HELPER_3(FADDS, f64, env, f64, f64)
+DEF_HELPER_3(FSUB, f64, env, f64, f64)
+DEF_HELPER_3(FSUBS, f64, env, f64, f64)
+DEF_HELPER_3(FMUL, f64, env, f64, f64)
+DEF_HELPER_3(FMULS, f64, env, f64, f64)
+DEF_HELPER_3(FDIV, f64, env, f64, f64)
+DEF_HELPER_3(FDIVS, f64, env, f64, f64)
+DEF_HELPER_4(FMADD, i64, env, i64, i64, i64)
+DEF_HELPER_4(FMSUB, i64, env, i64, i64, i64)
+DEF_HELPER_4(FNMADD, i64, env, i64, i64, i64)
+DEF_HELPER_4(FNMSUB, i64, env, i64, i64, i64)
+DEF_HELPER_4(FMADDS, i64, env, i64, i64, i64)
+DEF_HELPER_4(FMSUBS, i64, env, i64, i64, i64)
+DEF_HELPER_4(FNMADDS, i64, env, i64, i64, i64)
+DEF_HELPER_4(FNMSUBS, i64, env, i64, i64, i64)
 DEF_HELPER_2(FSQRT, f64, env, f64)
 DEF_HELPER_2(FSQRTS, f64, env, f64)
-DEF_HELPER_2(fre, i64, env, i64)
-DEF_HELPER_2(fres, i64, env, i64)
-DEF_HELPER_2(frsqrte, i64, env, i64)
-DEF_HELPER_2(frsqrtes, i64, env, i64)
+DEF_HELPER_2(FRE, i64, env, i64)
+DEF_HELPER_2(FRES, i64, env, i64)
+DEF_HELPER_2(FRSQRTE, i64, env, i64)
+DEF_HELPER_2(FRSQRTES, i64, env, i64)
 DEF_HELPER_FLAGS_3(FSEL, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
 
-DEF_HELPER_FLAGS_2(ftdiv, TCG_CALL_NO_RWG_SE, i32, i64, i64)
-DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64)
+DEF_HELPER_FLAGS_2(FTDIV, TCG_CALL_NO_RWG_SE, i32, i64, i64)
+DEF_HELPER_FLAGS_1(FTSQRT, TCG_CALL_NO_RWG_SE, i32, i64)
 
 #define dh_alias_avr ptr
 #define dh_ctype_avr ppc_avr_t *
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 4fcf3af8d0..a314172a2e 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -20,6 +20,12 @@
   frt fra frb frc rc:bool
 @A  .. frt:5 fra:5 frb:5 frc:5 . rc:1   
 
+_tab  frt fra frb rc:bool
+@A_tab  .. frt:5 fra:5 frb:5 . . rc:1   _tab
+
+_tac  frt fra frc rc:bool
+@A_tac  .. frt:5 fra:5 . frc:5 . rc:1   _tac
+
 _tb   frt frb rc:bool
 @A_tb   .. frt:5 . frb:5 . . rc:1   _tb
 
@@ -124,6 +130,9 @@
 _bf   bf ra rb
 @X_bf   .. bf:3 .. ra:5 rb:5 .. .   _bf
 
+_bf_b bf rb
+@X_bf_b .. bf:3 .. . rb:5 .. .  _bf_b
+
 @X_bf_ap_bp .. bf:3 .. 0 0 .. . _bf 
ra=%x_frap rb=%x_frbp
 
 @X_bf_a_bp  .. bf:3 .. ra:5 0 .. .  _bf 
rb=%x_frbp
@@ -374,9 +383,42 @@ STFDUX  01 . ..  100111 -   @X
 
 ### Floating-Point Arithmetic Instructions
 
+FADD11 . . . - 10101 .  @A_tab
+FADDS   111011 . . . - 10101 .  @A_tab
+
+FSUB11 . . . - 10100 .  @A_tab
+FSUBS   111011 . . . - 10100 .  @A_tab
+
+FMUL11 . . - . 11001 .  @A_tac
+FMULS   111011 . . - . 11001 .  @A_tac
+
+FDIV11 . . . - 10010 .  @A_tab
+FDIVS   111011 . . . - 10010 .  @A_tab
+
 FSQRT   11 . - . - 10110 .  @A_tb
 FSQRTS  111011

[PATCH v2 1/2] target/ppc: Merge various fpu helpers

2024-03-15 Thread Chinmay Rath

This patch merges the definitions of the following set of fpu helper methods,
which are similar, using macros :

1. f{add, sub, mul, div}(s)
2. fre(s)
3. frsqrte(s)

Signed-off-by: Chinmay Rath 
---
 target/ppc/fpu_helper.c | 221 +++-
 1 file changed, 62 insertions(+), 159 deletions(-)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 4b3dcad5d1..8d0cbe27e7 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -490,54 +490,12 @@ static void float_invalid_op_addsub(CPUPPCState *env, int 
flags,
 }
 }
 
-/* fadd - fadd. */
-float64 helper_fadd(CPUPPCState *env, float64 arg1, float64 arg2)
+static inline void addsub_flags_handler(CPUPPCState *env, int flags,
+uintptr_t ra)
 {
-float64 ret = float64_add(arg1, arg2, >fp_status);
-int flags = get_float_exception_flags(>fp_status);
-
-if (unlikely(flags & float_flag_invalid)) {
-float_invalid_op_addsub(env, flags, 1, GETPC());
-}
-
-return ret;
-}
-
-/* fadds - fadds. */
-float64 helper_fadds(CPUPPCState *env, float64 arg1, float64 arg2)
-{
-float64 ret = float64r32_add(arg1, arg2, >fp_status);
-int flags = get_float_exception_flags(>fp_status);
-
-if (unlikely(flags & float_flag_invalid)) {
-float_invalid_op_addsub(env, flags, 1, GETPC());
-}
-return ret;
-}
-
-/* fsub - fsub. */
-float64 helper_fsub(CPUPPCState *env, float64 arg1, float64 arg2)
-{
-float64 ret = float64_sub(arg1, arg2, >fp_status);
-int flags = get_float_exception_flags(>fp_status);
-
 if (unlikely(flags & float_flag_invalid)) {
-float_invalid_op_addsub(env, flags, 1, GETPC());
+float_invalid_op_addsub(env, flags, 1, ra);
 }
-
-return ret;
-}
-
-/* fsubs - fsubs. */
-float64 helper_fsubs(CPUPPCState *env, float64 arg1, float64 arg2)
-{
-float64 ret = float64r32_sub(arg1, arg2, >fp_status);
-int flags = get_float_exception_flags(>fp_status);
-
-if (unlikely(flags & float_flag_invalid)) {
-float_invalid_op_addsub(env, flags, 1, GETPC());
-}
-return ret;
 }
 
 static void float_invalid_op_mul(CPUPPCState *env, int flags,
@@ -550,29 +508,11 @@ static void float_invalid_op_mul(CPUPPCState *env, int 
flags,
 }
 }
 
-/* fmul - fmul. */
-float64 helper_fmul(CPUPPCState *env, float64 arg1, float64 arg2)
-{
-float64 ret = float64_mul(arg1, arg2, >fp_status);
-int flags = get_float_exception_flags(>fp_status);
-
-if (unlikely(flags & float_flag_invalid)) {
-float_invalid_op_mul(env, flags, 1, GETPC());
-}
-
-return ret;
-}
-
-/* fmuls - fmuls. */
-float64 helper_fmuls(CPUPPCState *env, float64 arg1, float64 arg2)
+static inline void mul_flags_handler(CPUPPCState *env, int flags, uintptr_t ra)
 {
-float64 ret = float64r32_mul(arg1, arg2, >fp_status);
-int flags = get_float_exception_flags(>fp_status);
-
 if (unlikely(flags & float_flag_invalid)) {
-float_invalid_op_mul(env, flags, 1, GETPC());
+float_invalid_op_mul(env, flags, 1, ra);
 }
-return ret;
 }
 
 static void float_invalid_op_div(CPUPPCState *env, int flags,
@@ -587,36 +527,14 @@ static void float_invalid_op_div(CPUPPCState *env, int 
flags,
 }
 }
 
-/* fdiv - fdiv. */
-float64 helper_fdiv(CPUPPCState *env, float64 arg1, float64 arg2)
-{
-float64 ret = float64_div(arg1, arg2, >fp_status);
-int flags = get_float_exception_flags(>fp_status);
-
-if (unlikely(flags & float_flag_invalid)) {
-float_invalid_op_div(env, flags, 1, GETPC());
-}
-if (unlikely(flags & float_flag_divbyzero)) {
-float_zero_divide_excp(env, GETPC());
-}
-
-return ret;
-}
-
-/* fdivs - fdivs. */
-float64 helper_fdivs(CPUPPCState *env, float64 arg1, float64 arg2)
+static inline void div_flags_handler(CPUPPCState *env, int flags, uintptr_t ra)
 {
-float64 ret = float64r32_div(arg1, arg2, >fp_status);
-int flags = get_float_exception_flags(>fp_status);
-
 if (unlikely(flags & float_flag_invalid)) {
-float_invalid_op_div(env, flags, 1, GETPC());
+float_invalid_op_div(env, flags, 1, ra);
 }
 if (unlikely(flags & float_flag_divbyzero)) {
-float_zero_divide_excp(env, GETPC());
+float_zero_divide_excp(env, ra);
 }
-
-return ret;
 }
 
 static uint64_t float_invalid_cvt(CPUPPCState *env, int flags,
@@ -812,81 +730,66 @@ float64 helper_##name(CPUPPCState *env, float64 arg)  
\
 FPU_FSQRT(FSQRT, float64_sqrt)
 FPU_FSQRT(FSQRTS, float64r32_sqrt)
 
-/* fre - fre. */
-float64 helper_fre(CPUPPCState *env, float64 arg)
-{
-/* "Estimate" the reciprocal with actual division.  */
-float64 ret = float64_div(float64_one, arg, >fp_status);
-int flags = get_float_exception_flags(>fp_status);
-
-if (unlikely(flags & float_flag_invalid_sn

[PATCH v2 0/2] Moving fp arithmetic insns to decodetree.

2024-03-15 Thread Chinmay Rath

This patch series moves floating-point arithmetic instructions from
legacy to decodetree format. The first patch consolidates the common
behaviour of floating-point helper functions using macros, reducing
code duplication. The second patch moves all the floating arithmetic
instructions to decodetree.

Change log :
v2 : Addressed review comments on v1
v1 : 
https://lore.kernel.org/qemu-devel/20240307110318.170319-1-ra...@linux.ibm.com/

Chinmay Rath (2):
  target/ppc: Merge various fpu helpers
  target/ppc: Move floating-point arithmetic instructions to decodetree.

 target/ppc/helper.h|  44 ++---
 target/ppc/insn32.decode   |  42 +
 target/ppc/fpu_helper.c| 235 +++-
 target/ppc/translate/fp-impl.c.inc | 285 +++--
 target/ppc/translate/fp-ops.c.inc  |  31 
 5 files changed, 242 insertions(+), 395 deletions(-)

-- 
2.39.3

Re: [PATCH] target/ppc: Move floating-point arithmetic instructions to decodetree.

2024-03-13 Thread Chinmay Rath





On 3/12/24 15:31, Nicholas Piggin wrote:

On Thu Mar 7, 2024 at 9:03 PM AEST, Chinmay Rath wrote:

diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc
index 189cd8c979..03b84ba79b 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -30,96 +30,73 @@ static void gen_set_cr1_from_fpscr(DisasContext *ctx)
  #endif
  
  /***   Floating-Point arithmetic   ***/

-#define _GEN_FLOAT_ACB(name, op1, op2, set_fprf, type)\
-static void gen_f##name(DisasContext *ctx)\
-{ \
-TCGv_i64 t0;  \
-TCGv_i64 t1;  \
-TCGv_i64 t2;  \
-TCGv_i64 t3;  \
-if (unlikely(!ctx->fpu_enabled)) {\
-gen_exception(ctx, POWERPC_EXCP_FPU); \
-return;   \
-} \
-t0 = tcg_temp_new_i64();  \
-t1 = tcg_temp_new_i64();  \
-t2 = tcg_temp_new_i64();  \
-t3 = tcg_temp_new_i64();  \
-gen_reset_fpstatus(); \
-get_fpr(t0, rA(ctx->opcode)); \
-get_fpr(t1, rC(ctx->opcode)); \
-get_fpr(t2, rB(ctx->opcode)); \
-gen_helper_f##name(t3, tcg_env, t0, t1, t2);  \
-set_fpr(rD(ctx->opcode), t3); \
-if (set_fprf) {   \
-gen_compute_fprf_float64(t3); \
-} \
-if (unlikely(Rc(ctx->opcode) != 0)) { \
-gen_set_cr1_from_fpscr(ctx);  \
-} \
+static bool do_helper_acb(DisasContext *ctx, arg_A *a,
+  void (*helper)(TCGv_i64, TCGv_ptr, TCGv_i64,
+ TCGv_i64, TCGv_i64))
+{
+REQUIRE_INSNS_FLAGS(ctx, FLOAT);
+REQUIRE_FPU(ctx);
+TCGv_i64 t0, t1, t2, t3;

Existing style prefers the variable declarations first I think.


+t0 = tcg_temp_new_i64();
+t1 = tcg_temp_new_i64();
+t2 = tcg_temp_new_i64();
+t3 = tcg_temp_new_i64();
+gen_reset_fpstatus();
+get_fpr(t0, a->fra);
+get_fpr(t1, a->frc);
+get_fpr(t2, a->frb);
+helper(t3, tcg_env, t0, t1, t2);
+set_fpr(a->frt, t3);
+gen_compute_fprf_float64(t3);
+if (unlikely(a->rc != false)) {

This reads better without the double negative. I.e., just
   if (unlikely(a->rc)) {

Otherwise the decodetree parts look good, with those updated
and split out from the helper generation:

Reviewed-by: Nicholas Piggin 

Thanks Nick, I shall post v2 with suggested updates.

Regards,
Chinmay


Thanks,
Nick


+gen_set_cr1_from_fpscr(ctx);
+}
+return true;
  }


  
-#define GEN_FLOAT_ACB(name, op2, set_fprf, type)  \

-_GEN_FLOAT_ACB(name, 0x3F, op2, set_fprf, type);  \
-_GEN_FLOAT_ACB(name##s, 0x3B, op2, set_fprf, type);
-
-#define _GEN_FLOAT_AB(name, op1, op2, inval, set_fprf, type)  \
-static void gen_f##name(DisasContext *ctx)\
-{ \
-TCGv_i64 t0;  \
-TCGv_i64 t1;  \
-TCGv_i64 t2;  \
-if (unlikely(!ctx->fpu_enabled)) {\
-gen_exception(ctx, POWERPC_EXCP_FPU); \
-return;   \
-} \
-t0 = tcg_temp_new_i64();  \
-t1 = tcg_temp_new_i64();

Re: [PATCH] target/ppc: Move floating-point arithmetic instructions to decodetree.

2024-03-13 Thread Chinmay Rath





On 3/12/24 19:59, Peter Maydell wrote:

On Tue, 12 Mar 2024 at 14:25, Nicholas Piggin  wrote:

On Wed Mar 13, 2024 at 12:01 AM AEST, Richard Henderson wrote:

On 3/11/24 23:36, Nicholas Piggin wrote:

[snip]


#define FPU_HELPER(name, op, flags_handler)   \
float64 helper_##name(CPUPPCState *env, float64 arg1, float64 arg2)   \
{ \
  float64 ret = op(arg1, arg2, >fp_status);\
  int flags = get_float_exception_flags(>fp_status);   \
  flags_handler(env, flags) \
  return ret;   \
}

static inline void addsub_flags_handler(CPUPPCState *env, int flags)
{
  if (unlikely(flags & float_flag_invalid)) {
  float_invalid_op_addsub(env, flags, 1, GETPC());
  }
}

static inline void mul_flags_handler(CPUPPCState *env, int flags)
{
  if (unlikely(flags & float_flag_invalid)) {
  float_invalid_op_mul(env, flags, 1, GETPC());
  }
}

static inline void div_flags_handler(CPUPPCState *env, int flags)
{
  if (unlikely(flags & float_flag_invalid)) {
  float_invalid_op_div(env, flags, 1, GETPC());
  }
  if (unlikely(flags & float_flag_divbyzero)) {
  float_zero_divide_excp(env, GETPC());
  }
}

Beware -- GETPC() may only be called from the outermost helper.

Ah, because it's using __builtin_return_address. Good to know.
Using always_inline and a comment should do the trick then.

The standard way to fix this is that you call GETPC() at the
outermost helper and then pass that value around as an extra
uintptr_t ra argument to called functions that need it.

Thanks Peter, Nick and Richard. I shall post v2 with suggested updates.

Regards,
Chinmay


thanks
-- PMM

[PATCH] target/ppc: Move floating-point arithmetic instructions to decodetree.

2024-03-07 Thread Chinmay Rath

This patch moves the below instructions to decodetree specification :

f{add, sub, mul, div, re, rsqrte, madd, msub, nmadd, nmsub}[s][.] : A-form
ft{div, sqrt} : X-form

With this patch, all the floating-point arithmetic instructions have been
moved to decodetree.
The patch also merges the definitions of different sets of helper methods of
the above instructions, which are similar, using macros.
The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/helper.h|  44 ++---
 target/ppc/insn32.decode   |  42 +
 target/ppc/fpu_helper.c| 265 +-
 target/ppc/translate/fp-impl.c.inc | 288 +++--
 target/ppc/translate/fp-ops.c.inc  |  31 
 5 files changed, 262 insertions(+), 408 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 86f97ee1e7..f177d5b906 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -110,32 +110,32 @@ DEF_HELPER_2(friz, i64, env, i64)
 DEF_HELPER_2(frip, i64, env, i64)
 DEF_HELPER_2(frim, i64, env, i64)
 
-DEF_HELPER_3(fadd, f64, env, f64, f64)
-DEF_HELPER_3(fadds, f64, env, f64, f64)
-DEF_HELPER_3(fsub, f64, env, f64, f64)
-DEF_HELPER_3(fsubs, f64, env, f64, f64)
-DEF_HELPER_3(fmul, f64, env, f64, f64)
-DEF_HELPER_3(fmuls, f64, env, f64, f64)
-DEF_HELPER_3(fdiv, f64, env, f64, f64)
-DEF_HELPER_3(fdivs, f64, env, f64, f64)
-DEF_HELPER_4(fmadd, i64, env, i64, i64, i64)
-DEF_HELPER_4(fmsub, i64, env, i64, i64, i64)
-DEF_HELPER_4(fnmadd, i64, env, i64, i64, i64)
-DEF_HELPER_4(fnmsub, i64, env, i64, i64, i64)
-DEF_HELPER_4(fmadds, i64, env, i64, i64, i64)
-DEF_HELPER_4(fmsubs, i64, env, i64, i64, i64)
-DEF_HELPER_4(fnmadds, i64, env, i64, i64, i64)
-DEF_HELPER_4(fnmsubs, i64, env, i64, i64, i64)
+DEF_HELPER_3(FADD, f64, env, f64, f64)
+DEF_HELPER_3(FADDS, f64, env, f64, f64)
+DEF_HELPER_3(FSUB, f64, env, f64, f64)
+DEF_HELPER_3(FSUBS, f64, env, f64, f64)
+DEF_HELPER_3(FMUL, f64, env, f64, f64)
+DEF_HELPER_3(FMULS, f64, env, f64, f64)
+DEF_HELPER_3(FDIV, f64, env, f64, f64)
+DEF_HELPER_3(FDIVS, f64, env, f64, f64)
+DEF_HELPER_4(FMADD, i64, env, i64, i64, i64)
+DEF_HELPER_4(FMSUB, i64, env, i64, i64, i64)
+DEF_HELPER_4(FNMADD, i64, env, i64, i64, i64)
+DEF_HELPER_4(FNMSUB, i64, env, i64, i64, i64)
+DEF_HELPER_4(FMADDS, i64, env, i64, i64, i64)
+DEF_HELPER_4(FMSUBS, i64, env, i64, i64, i64)
+DEF_HELPER_4(FNMADDS, i64, env, i64, i64, i64)
+DEF_HELPER_4(FNMSUBS, i64, env, i64, i64, i64)
 DEF_HELPER_2(FSQRT, f64, env, f64)
 DEF_HELPER_2(FSQRTS, f64, env, f64)
-DEF_HELPER_2(fre, i64, env, i64)
-DEF_HELPER_2(fres, i64, env, i64)
-DEF_HELPER_2(frsqrte, i64, env, i64)
-DEF_HELPER_2(frsqrtes, i64, env, i64)
+DEF_HELPER_2(FRE, i64, env, i64)
+DEF_HELPER_2(FRES, i64, env, i64)
+DEF_HELPER_2(FRSQRTE, i64, env, i64)
+DEF_HELPER_2(FRSQRTES, i64, env, i64)
 DEF_HELPER_FLAGS_3(FSEL, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
 
-DEF_HELPER_FLAGS_2(ftdiv, TCG_CALL_NO_RWG_SE, i32, i64, i64)
-DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64)
+DEF_HELPER_FLAGS_2(FTDIV, TCG_CALL_NO_RWG_SE, i32, i64, i64)
+DEF_HELPER_FLAGS_1(FTSQRT, TCG_CALL_NO_RWG_SE, i32, i64)
 
 #define dh_alias_avr ptr
 #define dh_ctype_avr ppc_avr_t *
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 4fcf3af8d0..a314172a2e 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -20,6 +20,12 @@
   frt fra frb frc rc:bool
 @A  .. frt:5 fra:5 frb:5 frc:5 . rc:1   
 
+_tab  frt fra frb rc:bool
+@A_tab  .. frt:5 fra:5 frb:5 . . rc:1   _tab
+
+_tac  frt fra frc rc:bool
+@A_tac  .. frt:5 fra:5 . frc:5 . rc:1   _tac
+
 _tb   frt frb rc:bool
 @A_tb   .. frt:5 . frb:5 . . rc:1   _tb
 
@@ -124,6 +130,9 @@
 _bf   bf ra rb
 @X_bf   .. bf:3 .. ra:5 rb:5 .. .   _bf
 
+_bf_b bf rb
+@X_bf_b .. bf:3 .. . rb:5 .. .  _bf_b
+
 @X_bf_ap_bp .. bf:3 .. 0 0 .. . _bf 
ra=%x_frap rb=%x_frbp
 
 @X_bf_a_bp  .. bf:3 .. ra:5 0 .. .  _bf 
rb=%x_frbp
@@ -374,9 +383,42 @@ STFDUX  01 . ..  100111 -   @X
 
 ### Floating-Point Arithmetic Instructions
 
+FADD11 . . . - 10101 .  @A_tab
+FADDS   111011 . . . - 10101 .  @A_tab
+
+FSUB11 . . . - 10100 .  @A_tab
+FSUBS   111011 . . . - 10100 .  @A_tab
+
+FMUL11 . . - . 11001 .  @A_tac
+FMULS   111011 . . - . 11001 .  @A_tac
+
+FDIV11 . . . - 10010 .  @A_tab
+FDIVS   111011

[PATCH v2] target/ppc: Move add and subf type fixed-point arithmetic instructions to decodetree

2024-02-14 Thread Chinmay Rath

This patch moves the below instructions to decodetree specification:

{add, subf}[c,e,me,ze][o][.]   : XO-form
addic[.], subfic   : D-form
addex  : Z23-form

This patch introduces XO form instructions into decode tree specification, for
which all the four variations([o][.]) have been handled with a single pattern.
The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
Changes v1 -> v2 :
Reused X format for ADDEX instead of creating a new Z23_tab_cy
format. (Richard)
Added necessary instruction flag checks for ADDEX. (self-review)
---
 target/ppc/insn32.decode   |  26 
 target/ppc/translate.c | 136 -
 target/ppc/translate/fixedpoint-impl.c.inc |  70 +++
 3 files changed, 96 insertions(+), 136 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 4fcf3af8d0..eada59f59f 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -187,6 +187,12 @@
 _ara
 @X_a.. ra:3 .. . . .. . _a
 
+ rt ra rb oe:bool rc:bool
+@XO .. rt:5 ra:5 rb:5 oe:1 . rc:1   
+
+_ta  rt ra oe:bool rc:bool
+@XO_ta  .. rt:5 ra:5 . oe:1 . rc:1  _ta
+
 %xx_xt  0:1 21:5
 %xx_xb  1:1 11:5
 %xx_xa  2:1 16:5
@@ -322,10 +328,30 @@ CMPLI   001010 ... - . .    
@D_bfu
 
 ### Fixed-Point Arithmetic Instructions
 
+ADD 01 . . . . 11010 .  @XO
+ADDC01 . . . . 01010 .  @XO
+ADDE01 . . . . 010001010 .  @XO
+
+# ADDEX is Z23-form, with CY=0; all other values for CY are reserved.
+# This works out the same as X-form.
+ADDEX   01 . . . 00 10101010 -  @X
+
 ADDI001110 . .  @D
 ADDIS   00 . .  @D
+ADDIC   001100 . .  @D
+ADDIC_  001101 . .  @D
 
 ADDPCIS 010011 . . .. 00010 .   @DX
+ADDME   01 . . - . 011101010 .  @XO_ta
+ADDZE   01 . . - . 011001010 .  @XO_ta
+
+SUBF01 . . . . 000101000 .  @XO
+SUBFIC  001000 . .  @D
+SUBFC   01 . . . . 01000 .  @XO
+SUBFE   01 . . . . 010001000 .  @XO
+
+SUBFME  01 . . - . 011101000 .  @XO_ta
+SUBFZE  01 . . - . 011001000 .  @XO_ta
 
 ## Fixed-Point Logical Instructions
 
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 049f636927..51dc1e79cc 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1697,61 +1697,6 @@ static inline void gen_op_arith_add(DisasContext *ctx, 
TCGv ret, TCGv arg1,
 tcg_gen_mov_tl(ret, t0);
 }
 }
-/* Add functions with two operands */
-#define GEN_INT_ARITH_ADD(name, opc3, ca, add_ca, compute_ca, compute_ov) \
-static void glue(gen_, name)(DisasContext *ctx)   \
-{ \
-gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)],   \
- cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],  \
- ca, glue(ca, 32),\
- add_ca, compute_ca, compute_ov, Rc(ctx->opcode));\
-}
-/* Add functions with one operand and one immediate */
-#define GEN_INT_ARITH_ADD_CONST(name, opc3, const_val, ca,\
-add_ca, compute_ca, compute_ov)   \
-static void glue(gen_, name)(DisasContext *ctx)   \
-{ \
-TCGv t0 = tcg_constant_tl(const_val); \
-gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)],   \
- cpu_gpr[rA(ctx->opcode)], t0,\
- ca, glue(ca, 32),\
- add_ca, compute_ca, compute_ov, Rc(ctx->opcode));\
-}
-
-/* add  add.  addo  addo. */
-GEN_INT_ARITH_ADD(add, 0x08, cpu_ca, 0, 0, 0)
-GEN_INT_ARITH_ADD(addo, 0x18, cpu_ca, 0, 0, 1)
-/* addc  addc.  addco  addco. */
-GEN_INT_ARITH_ADD(addc, 0x00, cpu_ca, 0, 1, 0)
-GEN_INT_ARITH_ADD(addco, 0x10, cpu_ca, 0, 1, 1)
-/* adde  adde.  addeo  addeo. */
-GEN_INT_ARITH_ADD(adde, 0x04, cpu_ca, 1, 1, 0)
-GEN_INT_ARITH_ADD(addeo, 0x14, cpu_ca, 1, 1, 1)
-

Re: [RFC PATCH] target/ppc: Move add and subf type fixed-point arithmetic instructions to decodetree

2024-02-13 Thread Chinmay Rath


Hi Richard,

On 2/13/24 03:51, Richard Henderson wrote:

On 2/9/24 01:35, Chinmay Rath wrote:

+_tab_cy rt ra rb cy
+@Z23_tab_cy .. rt:5 ra:5 rb:5 cy:2  . _tab_cy

...

+ADDEX   01 . . . .. 10101010 -  @Z23_tab_cy

...

+static bool trans_ADDEX(DisasContext *ctx, arg_Z23_tab_cy *a)
+{
+    gen_op_arith_add(ctx, cpu_gpr[a->rt], cpu_gpr[a->ra], 
cpu_gpr[a->rb],

+ cpu_ov, cpu_ov32, true, true, false, false);
+    return true;
+}


CY != 0 is reserved.

While you could diagnose this in trans_ADDEX, it seems cleaner to 
simply match 00 in the CY field until a future ISA defines something 
else.  All that is required is a comment in the decodetree entry.


# Z23-form, with CY=0; all other values for CY are reserved.
# This works out the same as X-form.
ADDEX    01 . . . 00 10101010 -   @X


Thanks for your review comments.

I shall update as suggested in v2.

Regards,

Chinmay



r~

[RFC PATCH] target/ppc: Move add and subf type fixed-point arithmetic instructions to decodetree

2024-02-09 Thread Chinmay Rath

This patch moves the below instructions to decodetree specification:

{add, subf}[c,e,me,ze][o][.]   : XO-form
addic[.], subfic   : D-form
addex  : Z23-form

This patch introduces XO form instructions into decode tree specification, for
which all the four variations([o][.]) have been handled with a single pattern.
The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.

Signed-off-by: Chinmay Rath 
---
 target/ppc/insn32.decode   |  26 
 target/ppc/translate.c | 136 -
 target/ppc/translate/fixedpoint-impl.c.inc |  69 +++
 3 files changed, 95 insertions(+), 136 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 4fcf3af8d0..ddaa47210a 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -187,6 +187,12 @@
 _ara
 @X_a.. ra:3 .. . . .. . _a
 
+ rt ra rb oe:bool rc:bool
+@XO .. rt:5 ra:5 rb:5 oe:1 . rc:1   
+
+_ta  rt ra oe:bool rc:bool
+@XO_ta  .. rt:5 ra:5 . oe:1 . rc:1  _ta
+
 %xx_xt  0:1 21:5
 %xx_xb  1:1 11:5
 %xx_xa  2:1 16:5
@@ -239,6 +245,9 @@
 _tabfrt fra frb rmc rc:bool
 @Z23_tab.. frt:5 fra:5 frb:5 rmc:2  rc:1_tab
 
+_tab_cy rt ra rb cy
+@Z23_tab_cy .. rt:5 ra:5 rb:5 cy:2  .   _tab_cy
+
 %z23_frtp   22:4 !function=times_2
 %z23_frap   17:4 !function=times_2
 %z23_frbp   12:4 !function=times_2
@@ -322,10 +331,27 @@ CMPLI   001010 ... - . .    
@D_bfu
 
 ### Fixed-Point Arithmetic Instructions
 
+ADD 01 . . . . 11010 .  @XO
+ADDC01 . . . . 01010 .  @XO
+ADDE01 . . . . 010001010 .  @XO
+ADDEX   01 . . . .. 10101010 -  @Z23_tab_cy
+
 ADDI001110 . .  @D
 ADDIS   00 . .  @D
+ADDIC   001100 . .  @D
+ADDIC_  001101 . .  @D
 
 ADDPCIS 010011 . . .. 00010 .   @DX
+ADDME   01 . . - . 011101010 .  @XO_ta
+ADDZE   01 . . - . 011001010 .  @XO_ta
+
+SUBF01 . . . . 000101000 .  @XO
+SUBFIC  001000 . .  @D
+SUBFC   01 . . . . 01000 .  @XO
+SUBFE   01 . . . . 010001000 .  @XO
+
+SUBFME  01 . . - . 011101000 .  @XO_ta
+SUBFZE  01 . . - . 011001000 .  @XO_ta
 
 ## Fixed-Point Logical Instructions
 
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 049f636927..51dc1e79cc 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1697,61 +1697,6 @@ static inline void gen_op_arith_add(DisasContext *ctx, 
TCGv ret, TCGv arg1,
 tcg_gen_mov_tl(ret, t0);
 }
 }
-/* Add functions with two operands */
-#define GEN_INT_ARITH_ADD(name, opc3, ca, add_ca, compute_ca, compute_ov) \
-static void glue(gen_, name)(DisasContext *ctx)   \
-{ \
-gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)],   \
- cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],  \
- ca, glue(ca, 32),\
- add_ca, compute_ca, compute_ov, Rc(ctx->opcode));\
-}
-/* Add functions with one operand and one immediate */
-#define GEN_INT_ARITH_ADD_CONST(name, opc3, const_val, ca,\
-add_ca, compute_ca, compute_ov)   \
-static void glue(gen_, name)(DisasContext *ctx)   \
-{ \
-TCGv t0 = tcg_constant_tl(const_val); \
-gen_op_arith_add(ctx, cpu_gpr[rD(ctx->opcode)],   \
- cpu_gpr[rA(ctx->opcode)], t0,\
- ca, glue(ca, 32),\
- add_ca, compute_ca, compute_ov, Rc(ctx->opcode));\
-}
-
-/* add  add.  addo  addo. */
-GEN_INT_ARITH_ADD(add, 0x08, cpu_ca, 0, 0, 0)
-GEN_INT_ARITH_ADD(addo, 0x18, cpu_ca, 0, 0, 1)
-/* addc  addc.  addco  addco. */
-GEN_INT_ARITH_ADD(addc, 0x00, cpu_ca, 0, 1, 0)
-GEN_INT_ARITH_ADD(addco, 0x10, cpu_ca, 0, 1, 1)
-/* adde  adde.  addeo  addeo. */
-GEN_INT_ARITH_ADD(add

89 matches

Mail list logo