This pattern enables the combine pass (or late-combine, depending on the case)
to merge a float_extend'ed vec_duplicate into a (possibly negated) minus-mult
RTL instruction.

Before this patch, we have six instructions, e.g.:
  vsetivli       zero,4,e32,m1,ta,ma
  fcvt.s.h       fa5,fa5
  vfmv.v.f       v4,fa5
  vfwcvt.f.f.v   v1,v3
  vsetvli        zero,zero,e32,m1,ta,ma
  vfnmadd.vv     v1,v4,v2

After, we get only one:
  vfwnmacc.vf     v1,fa5,v2

        PR target/119100

gcc/ChangeLog:

        * config/riscv/autovec-opt.md (*vfwnmacc_vf_<mode>): New pattern.
        (*vfwnmsac_vf_<mode>): New pattern.
        * config/riscv/riscv.cc (get_vector_binary_rtx_cost): Add support for a
        vec_duplicate in a neg.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: Add vfwnmacc and
        vfwnmsac.
        * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: Likewise.
        * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: Likewise.
        * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: Likewise.
        * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: Likewise.
        * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: Likewise.
        * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: Likewise.
        * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: Likewise.
        * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f16.c: New test.
        * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f32.c: New test.
        * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f16.c: New test.
        * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f32.c: New test.
---
 gcc/config/riscv/autovec-opt.md               | 51 +++++++++++++++++++
 gcc/config/riscv/riscv.cc                     |  5 +-
 .../riscv/rvv/autovec/vx_vf/vf-1-f16.c        |  4 ++
 .../riscv/rvv/autovec/vx_vf/vf-1-f32.c        |  4 ++
 .../riscv/rvv/autovec/vx_vf/vf-2-f16.c        |  6 ++-
 .../riscv/rvv/autovec/vx_vf/vf-2-f32.c        |  6 ++-
 .../riscv/rvv/autovec/vx_vf/vf-3-f16.c        |  4 ++
 .../riscv/rvv/autovec/vx_vf/vf-3-f32.c        |  4 ++
 .../riscv/rvv/autovec/vx_vf/vf-4-f16.c        |  2 +
 .../riscv/rvv/autovec/vx_vf/vf-4-f32.c        |  2 +
 .../rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f16.c | 17 +++++++
 .../rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f32.c | 17 +++++++
 .../rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f16.c | 17 +++++++
 .../rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f32.c | 17 +++++++
 14 files changed, 151 insertions(+), 5 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f32.c

diff --git gcc/config/riscv/autovec-opt.md gcc/config/riscv/autovec-opt.md
index f372f0e6a69..12217c03304 100644
--- gcc/config/riscv/autovec-opt.md
+++ gcc/config/riscv/autovec-opt.md
@@ -1844,3 +1844,54 @@ (define_insn_and_split "*extend_vf_<mode>"
   }
   [(set_attr "type" "vfwmuladd")]
 )
+
+;; vfwnmacc.vf
+(define_insn_and_split "*vfwnmacc_vf_<mode>"
+  [(set (match_operand:VWEXTF 0 "register_operand")
+    (minus:VWEXTF
+      (mult:VWEXTF
+       (neg:VWEXTF
+         (vec_duplicate:VWEXTF
+           (float_extend:<VEL>
+             (match_operand:<VSUBEL> 2 "register_operand"))))
+       (float_extend:VWEXTF
+         (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand")))
+      (match_operand:VWEXTF 1 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3]};
+    riscv_vector::emit_vlmax_insn(
+       code_for_pred_widen_mul_neg_scalar(MINUS, <MODE>mode),
+       riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops);
+    DONE;
+  }
+  [(set_attr "type" "vfwmuladd")]
+)
+
+;; vfwnmsac.vf
+(define_insn_and_split "*vfwnmsac_vf_<mode>"
+  [(set (match_operand:VWEXTF 0 "register_operand")
+    (minus:VWEXTF
+      (match_operand:VWEXTF 1 "register_operand")
+      (mult:VWEXTF
+       (float_extend:VWEXTF
+         (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand"))
+       (vec_duplicate:VWEXTF
+         (float_extend:<VEL>
+           (match_operand:<VSUBEL> 2 "register_operand"))))))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3]};
+    riscv_vector::emit_vlmax_insn(
+       code_for_pred_widen_mul_neg_scalar (PLUS, <MODE>mode),
+       riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops);
+    DONE;
+  }
+  [(set_attr "type" "vfwmuladd")]
+)
diff --git gcc/config/riscv/riscv.cc gcc/config/riscv/riscv.cc
index a4428f0e96d..bde93ae8e15 100644
--- gcc/config/riscv/riscv.cc
+++ gcc/config/riscv/riscv.cc
@@ -3965,11 +3965,14 @@ get_vector_binary_rtx_cost (rtx x, int scalar2vr_cost)
 
   rtx op_0 = XEXP (x, 0);
   rtx op_1 = XEXP (x, 1);
+  rtx neg;
 
   if (GET_CODE (op_0) == VEC_DUPLICATE
       || GET_CODE (op_1) == VEC_DUPLICATE)
     return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
-  else if (GET_CODE (op_0) == NEG && GET_CODE (op_1) == VEC_DUPLICATE)
+  else if (GET_CODE (neg = op_0) == NEG
+          && (GET_CODE (op_1) == VEC_DUPLICATE
+              || GET_CODE (XEXP (neg, 0)) == VEC_DUPLICATE))
     return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
   else
     return COSTS_N_INSNS (1);
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
index b17fd8ec1f1..811f26c156a 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
@@ -13,6 +13,8 @@ DEF_VF_MULOP_ACC_CASE_0 (_Float16, +, -, nacc)
 DEF_VF_MULOP_ACC_CASE_0 (_Float16, -, -, nsac)
 DEF_VF_MULOP_WIDEN_CASE_0 (_Float16, float, +, +, acc)
 DEF_VF_MULOP_WIDEN_CASE_0 (_Float16, float, -, +, sac)
+DEF_VF_MULOP_WIDEN_CASE_0 (_Float16, float, +, -, nacc)
+DEF_VF_MULOP_WIDEN_CASE_0 (_Float16, float, -, -, nsac)
 
 /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */
@@ -24,3 +26,5 @@ DEF_VF_MULOP_WIDEN_CASE_0 (_Float16, float, -, +, sac)
 /* { dg-final { scan-assembler-times {vfnmsac.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfwmacc.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfwmsac.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfwnmacc.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfwnmsac.vf} 1 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
index efd887dc8bf..ca82ead9d28 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
@@ -13,6 +13,8 @@ DEF_VF_MULOP_ACC_CASE_0 (float, +, -, nacc)
 DEF_VF_MULOP_ACC_CASE_0 (float, -, -, nsac)
 DEF_VF_MULOP_WIDEN_CASE_0 (float, double, +, +, acc)
 DEF_VF_MULOP_WIDEN_CASE_0 (float, double, -, +, sac)
+DEF_VF_MULOP_WIDEN_CASE_0 (float, double, +, -, nacc)
+DEF_VF_MULOP_WIDEN_CASE_0 (float, double, -, -, nsac)
 
 /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */
@@ -24,3 +26,5 @@ DEF_VF_MULOP_WIDEN_CASE_0 (float, double, -, +, sac)
 /* { dg-final { scan-assembler-times {vfnmsac.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfwmacc.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfwmsac.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfwnmacc.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfwnmsac.vf} 1 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
index 84987a9c0f4..3a39303f942 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
@@ -13,5 +13,7 @@
 /* { dg-final { scan-assembler-not {vfnmsac.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmacc.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmsac.vf} } } */
-/* { dg-final { scan-assembler-times {fcvt.s.h} 2 } } */
-/* { dg-final { scan-assembler-times {vfmv.v.f} 10 } } */
+/* { dg-final { scan-assembler-not {vfwnmacc.vf} } } */
+/* { dg-final { scan-assembler-not {vfwnmsac.vf} } } */
+/* { dg-final { scan-assembler-times {fcvt.s.h} 4 } } */
+/* { dg-final { scan-assembler-times {vfmv.v.f} 12 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
index dbd3d022d5e..b4618bae70e 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
@@ -13,5 +13,7 @@
 /* { dg-final { scan-assembler-not {vfnmsac.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmacc.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmsac.vf} } } */
-/* { dg-final { scan-assembler-times {fcvt.d.s} 2 } } */
-/* { dg-final { scan-assembler-times {vfmv.v.f} 10 } } */
+/* { dg-final { scan-assembler-not {vfwnmacc.vf} } } */
+/* { dg-final { scan-assembler-not {vfwnmsac.vf} } } */
+/* { dg-final { scan-assembler-times {fcvt.d.s} 4 } } */
+/* { dg-final { scan-assembler-times {vfmv.v.f} 12 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
index 5f0d7585e65..58afaa4aef9 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
@@ -13,6 +13,8 @@ DEF_VF_MULOP_ACC_CASE_1 (_Float16, +, -, nacc, 
VF_MULOP_ACC_BODY_X128)
 DEF_VF_MULOP_ACC_CASE_1 (_Float16, -, -, nsac, VF_MULOP_ACC_BODY_X128)
 DEF_VF_MULOP_WIDEN_CASE_1 (_Float16, float, +, +, acc)
 DEF_VF_MULOP_WIDEN_CASE_1 (_Float16, float, -, +, sac)
+DEF_VF_MULOP_WIDEN_CASE_1 (_Float16, float, +, -, nacc)
+DEF_VF_MULOP_WIDEN_CASE_1 (_Float16, float, -, -, nsac)
 
 /* { dg-final { scan-assembler {vfmadd.vf} } } */
 /* { dg-final { scan-assembler {vfmsub.vf} } } */
@@ -24,3 +26,5 @@ DEF_VF_MULOP_WIDEN_CASE_1 (_Float16, float, -, +, sac)
 /* { dg-final { scan-assembler {vfnmsac.vf} } } */
 /* { dg-final { scan-assembler {vfwmacc.vf} } } */
 /* { dg-final { scan-assembler {vfwmsac.vf} } } */
+/* { dg-final { scan-assembler {vfwnmacc.vf} } } */
+/* { dg-final { scan-assembler {vfwnmsac.vf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
index 951b0ef2a67..0e95774a489 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
@@ -13,6 +13,8 @@ DEF_VF_MULOP_ACC_CASE_1 (float, +, -, nacc, 
VF_MULOP_ACC_BODY_X128)
 DEF_VF_MULOP_ACC_CASE_1 (float, -, -, nsac, VF_MULOP_ACC_BODY_X128)
 DEF_VF_MULOP_WIDEN_CASE_1 (float, double, +, +, acc)
 DEF_VF_MULOP_WIDEN_CASE_1 (float, double, -, +, sac)
+DEF_VF_MULOP_WIDEN_CASE_1 (float, double, +, -, nacc)
+DEF_VF_MULOP_WIDEN_CASE_1 (float, double, -, -, nsac)
 
 /* { dg-final { scan-assembler {vfmadd.vf} } } */
 /* { dg-final { scan-assembler {vfmsub.vf} } } */
@@ -24,3 +26,5 @@ DEF_VF_MULOP_WIDEN_CASE_1 (float, double, -, +, sac)
 /* { dg-final { scan-assembler {vfnmsac.vf} } } */
 /* { dg-final { scan-assembler {vfwmacc.vf} } } */
 /* { dg-final { scan-assembler {vfwmsac.vf} } } */
+/* { dg-final { scan-assembler {vfwnmacc.vf} } } */
+/* { dg-final { scan-assembler {vfwnmsac.vf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
index a4edd92f1ef..559df6c7976 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
@@ -13,4 +13,6 @@
 /* { dg-final { scan-assembler-not {vfnmsac.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmacc.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmsac.vf} } } */
+/* { dg-final { scan-assembler-not {vfwnmacc.vf} } } */
+/* { dg-final { scan-assembler-not {vfwnmsac.vf} } } */
 /* { dg-final { scan-assembler {fcvt.s.h} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
index 4eb28e585a0..03f9c5a3d86 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
@@ -13,4 +13,6 @@
 /* { dg-final { scan-assembler-not {vfnmsac.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmacc.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmsac.vf} } } */
+/* { dg-final { scan-assembler-not {vfwnmacc.vf} } } */
+/* { dg-final { scan-assembler-not {vfwnmsac.vf} } } */
 /* { dg-final { scan-assembler {fcvt.d.s} } } */
diff --git 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f16.c 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f16.c
new file mode 100644
index 00000000000..6be7d720603
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f16.c
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-march=rv64gcv_zvfh --param=fpr2vr-cost=0" } */
+
+#include "vf_mulop.h"
+
+#define T1    _Float16
+#define T2    float
+#define NAME nacc
+#define OP +
+#define NEG -
+
+DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) 
RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -32768
+
+#include "vf_mulop_widen_run.h"
diff --git 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f32.c 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f32.c
new file mode 100644
index 00000000000..851c335d64d
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmacc-run-1-f32.c
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_mulop.h"
+
+#define T1    float
+#define T2    double
+#define NAME nacc
+#define OP +
+#define NEG -
+
+DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) 
RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -2147483648
+
+#include "vf_mulop_widen_run.h"
diff --git 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f16.c 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f16.c
new file mode 100644
index 00000000000..dd28234b6e0
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f16.c
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-march=rv64gcv_zvfh --param=fpr2vr-cost=0" } */
+
+#include "vf_mulop.h"
+
+#define T1    _Float16
+#define T2    float
+#define NAME nsac
+#define OP -
+#define NEG -
+
+DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) 
RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -32768
+
+#include "vf_mulop_widen_run.h"
diff --git 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f32.c 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f32.c
new file mode 100644
index 00000000000..9eacacea44b
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwnmsac-run-1-f32.c
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_mulop.h"
+
+#define T1    float
+#define T2    double
+#define NAME nsac
+#define OP -
+#define NEG -
+
+DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) 
RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -2147483648
+
+#include "vf_mulop_widen_run.h"
-- 
2.39.5

Reply via email to