The current implementation of the fnmam4 instruction template requires
the third source operand to be assigned the same hard register as the
target operand, but the constraint is not documented in the instruction
manual or standard template definitions. The current constraint will
generate additional data dependencies and extra instructions.

gcc/ChangeLog:

        * config/loongarch/lasx.md (fnma<mode>4): Remove.
        * config/loongarch/lsx.md (fnma<mode>4): Remove.
        * config/loongarch/simd.md (fnma<mode>4): Simplify and correct.

gcc/testsuite/ChangeLog:

        * gcc.target/loongarch/fnmam4-vec.c: New test.
---
 gcc/config/loongarch/lasx.md                    | 10 ----------
 gcc/config/loongarch/lsx.md                     | 10 ----------
 gcc/config/loongarch/simd.md                    | 11 +++++++++++
 gcc/testsuite/gcc.target/loongarch/fnmam4-vec.c | 14 ++++++++++++++
 4 files changed, 25 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/fnmam4-vec.c

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 3d71f30a54b..9df572beb9e 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -993,16 +993,6 @@ (define_insn "fma<mode>4"
   [(set_attr "type" "simd_fmadd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "fnma<mode>4"
-  [(set (match_operand:FLASX 0 "register_operand" "=f")
-       (fma:FLASX (neg:FLASX (match_operand:FLASX 1 "register_operand" "f"))
-                  (match_operand:FLASX 2 "register_operand" "f")
-                  (match_operand:FLASX 3 "register_operand" "0")))]
-  "ISA_HAS_LASX"
-  "xvfnmsub.<flasxfmt>\t%u0,%u1,%u2,%u0"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "<MODE>")])
-
 (define_expand "sqrt<mode>2"
   [(set (match_operand:FLASX 0 "register_operand")
     (sqrt:FLASX (match_operand:FLASX 1 "register_operand")))]
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index fb0236ba0f1..cf48a16b69e 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -852,16 +852,6 @@ (define_insn "fma<mode>4"
   [(set_attr "type" "simd_fmadd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "fnma<mode>4"
-  [(set (match_operand:FLSX 0 "register_operand" "=f")
-       (fma:FLSX (neg:FLSX (match_operand:FLSX 1 "register_operand" "f"))
-                 (match_operand:FLSX 2 "register_operand" "f")
-                 (match_operand:FLSX 3 "register_operand" "0")))]
-  "ISA_HAS_LSX"
-  "vfnmsub.<flsxfmt>\t%w0,%w1,%w2,%w0"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "<MODE>")])
-
 (define_expand "sqrt<mode>2"
   [(set (match_operand:FLSX 0 "register_operand")
     (sqrt:FLSX (match_operand:FLSX 1 "register_operand")))]
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index 4156b269f9a..88ab138a8c6 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -431,6 +431,17 @@ (define_insn "<optab><mode>3"
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "<MODE>")])
 
+;; <x>vfnmsub.{s/d}
+(define_insn "fnma<mode>4"
+  [(set (match_operand:FVEC 0 "register_operand" "=f")
+       (fma:FVEC (neg:FVEC (match_operand:FVEC 1 "register_operand" "f"))
+                 (match_operand:FVEC 2 "register_operand" "f")
+                 (match_operand:FVEC 3 "register_operand" "f")))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode)"
+  "<x>vfnmsub.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2,%<wu>3"
+  [(set_attr "type" "simd_fmadd")
+   (set_attr "mode" "<MODE>")])
+
 ;; <x>vfcmp.*.{s/d} with defined RTX code
 ;; There are no fcmp.{sugt/suge/cgt/cge}.{s/d} menmonics in GAS, so we have
 ;; to reverse the operands ourselves :(.
diff --git a/gcc/testsuite/gcc.target/loongarch/fnmam4-vec.c 
b/gcc/testsuite/gcc.target/loongarch/fnmam4-vec.c
new file mode 100644
index 00000000000..09693039deb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/fnmam4-vec.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mlasx -ftree-vectorize" } */
+/* { dg-require-effective-target loongarch_asx } */
+
+void
+foo (float *u, float x, float *y, float z)
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    *(u++) = (x - y[i] * z);
+}
+
+/* { dg-final { scan-assembler-not "\tvori.b"} } */
+/* { dg-final { scan-assembler-not "\txvori.b"} } */
-- 
2.50.0

Reply via email to