Re: [PATCH v3 32/33] target/arm: Convert FMADD, FMSUB, FNMADD, FNMSUB to decodetree

2024-05-30 Thread Peter Maydell
On Tue, 28 May 2024 at 21:32, Richard Henderson
 wrote:
>
> These are the only instructions in the 3 source scalar class.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Peter Maydell 

thanks
-- PMM



[PATCH v3 32/33] target/arm: Convert FMADD, FMSUB, FNMADD, FNMSUB to decodetree

2024-05-28 Thread Richard Henderson
These are the only instructions in the 3 source scalar class.

Signed-off-by: Richard Henderson 
---
 target/arm/tcg/a64.decode  |  10 ++
 target/arm/tcg/translate-a64.c | 231 -
 2 files changed, 93 insertions(+), 148 deletions(-)

diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index f7f897f9fc..6f6cd805b7 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -32,6 +32,7 @@
 _e   rd rn esz
 _e  rd rn rm esz
 _e  rd rn rm idx esz
+_e rd rn rm ra esz
 _e  q rd rn esz
 _e q rd rn rm esz
 _e q rd rn rm idx esz
@@ -998,3 +999,12 @@ SQDMULH_vi  0.00  10 . . 1100 . 0 . .  
 @qrrx_s
 
 SQRDMULH_vi 0.00  01 ..  1101 . 0 . .   @qrrx_h
 SQRDMULH_vi 0.00  10 . . 1101 . 0 . .   @qrrx_s
+
+# Floating-point data-processing (3 source)
+
+@_hsd     .. . rm:5  . ra:5  rn:5  rd:5 _e 
esz=%esz_hsd
+
+FMADD   0001  .. 0 . 0 . . .@_hsd
+FMSUB   0001  .. 0 . 1 . . .@_hsd
+FNMADD  0001  .. 1 . 0 . . .@_hsd
+FNMSUB  0001  .. 1 . 1 . . .@_hsd
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 14226c56cf..78a2e6d692 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5866,6 +5866,88 @@ static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
 return true;
 }
 
+/*
+ * Floating-point data-processing (3 source)
+ */
+
+static bool do_fmadd(DisasContext *s, arg__e *a, bool neg_a, bool neg_n)
+{
+TCGv_ptr fpst;
+
+/*
+ * These are fused multiply-add.  Note that doing the negations here
+ * as separate steps is correct: an input NaN should come out with
+ * its sign bit flipped if it is a negated-input.
+ */
+switch (a->esz) {
+case MO_64:
+if (fp_access_check(s)) {
+TCGv_i64 tn = read_fp_dreg(s, a->rn);
+TCGv_i64 tm = read_fp_dreg(s, a->rm);
+TCGv_i64 ta = read_fp_dreg(s, a->ra);
+
+if (neg_a) {
+gen_vfp_negd(ta, ta);
+}
+if (neg_n) {
+gen_vfp_negd(tn, tn);
+}
+fpst = fpstatus_ptr(FPST_FPCR);
+gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
+write_fp_dreg(s, a->rd, ta);
+}
+break;
+
+case MO_32:
+if (fp_access_check(s)) {
+TCGv_i32 tn = read_fp_sreg(s, a->rn);
+TCGv_i32 tm = read_fp_sreg(s, a->rm);
+TCGv_i32 ta = read_fp_sreg(s, a->ra);
+
+if (neg_a) {
+gen_vfp_negs(ta, ta);
+}
+if (neg_n) {
+gen_vfp_negs(tn, tn);
+}
+fpst = fpstatus_ptr(FPST_FPCR);
+gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
+write_fp_sreg(s, a->rd, ta);
+}
+break;
+
+case MO_16:
+if (!dc_isar_feature(aa64_fp16, s)) {
+return false;
+}
+if (fp_access_check(s)) {
+TCGv_i32 tn = read_fp_hreg(s, a->rn);
+TCGv_i32 tm = read_fp_hreg(s, a->rm);
+TCGv_i32 ta = read_fp_hreg(s, a->ra);
+
+if (neg_a) {
+gen_vfp_negh(ta, ta);
+}
+if (neg_n) {
+gen_vfp_negh(tn, tn);
+}
+fpst = fpstatus_ptr(FPST_FPCR_F16);
+gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
+write_fp_sreg(s, a->rd, ta);
+}
+break;
+
+default:
+return false;
+}
+return true;
+}
+
+TRANS(FMADD, do_fmadd, a, false, false)
+TRANS(FNMADD, do_fmadd, a, true, true)
+TRANS(FMSUB, do_fmadd, a, false, true)
+TRANS(FNMSUB, do_fmadd, a, true, false)
+
 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
  * Note that it is the caller's responsibility to ensure that the
  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
@@ -7665,152 +7747,6 @@ static void disas_fp_1src(DisasContext *s, uint32_t 
insn)
 }
 }
 
-/* Floating-point data-processing (3 source) - single precision */
-static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
-  int rd, int rn, int rm, int ra)
-{
-TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
-TCGv_i32 tcg_res = tcg_temp_new_i32();
-TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
-
-tcg_op1 = read_fp_sreg(s, rn);
-tcg_op2 = read_fp_sreg(s, rm);
-tcg_op3 = read_fp_sreg(s, ra);
-
-/* These are fused multiply-add, and must be done as one
- * floating point operation with no rounding between the
- * multiplication and addition steps.
- * NB that doing the negations here as separate steps is
- * correct : an input NaN should come out with its sign bit
- * flipped if it is a