I've committed this patch to fix a couple of bugs introduced in the recent CMul patch.

First, the fmsubadd insn was accidentally all adds and no substracts.

Second, there were input dependencies on the undefined output register which caused the compiler to reserve unnecessary slots in the stack-frame.

Both issues are now fixed.

This patch is already committed to OG12. I'll backport it to GCC 13 shortly.

Andrew
amdgcn: Fix addsub bug

The vec_fmsubadd instuction actually had add twice, by mistake.

Also improve code-gen for all the complex patterns by using properly
undefined values.  Mostly this just prevents the compiler reserving space
in the stack frame.

gcc/ChangeLog:

        * config/gcn/gcn-valu.md (cmul<conj_op><mode>3): Use gcn_gen_undef.
        (cml<addsub_as><mode>4): Likewise.
        (vec_addsub<mode>3): Likewise.
        (cadd<rot><mode>3): Likewise.
        (vec_fmaddsub<mode>4): Likewise.
        (vec_fmsubadd<mode>4): Likewise, and use sub for the odd lanes.

diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 44c48468dd6..7290cdc2fd0 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -2323,8 +2323,9 @@ (define_expand "cmul<conj_op><mode>3"
     rtx even = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (even, get_exec (0x5555555555555555UL));
     rtx dest = operands[0];
-    emit_insn (gen_<cmul_subadd><mode>3_exec (dest, t1, t1_perm, dest, even));
-                                                             // a*c-b*d 0
+    emit_insn (gen_<cmul_subadd><mode>3_exec (dest, t1, t1_perm,
+                                              gcn_gen_undef (<MODE>mode),
+                                              even));            // a*c-b*d 0
 
     rtx t2_perm = gen_reg_rtx (<MODE>mode);
     emit_insn (gen_dpp_swap_pairs<mode> (t2_perm, t2));          // b*c a*d
@@ -2368,7 +2369,8 @@ (define_expand "cml<addsub_as><mode>4"
     rtx even = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (even, get_exec (0x5555555555555555UL));
     rtx dest = operands[0];
-    emit_insn (gen_sub<mode>3_exec (dest, t1, t2_perm, dest, even));
+    emit_insn (gen_sub<mode>3_exec (dest, t1, t2_perm,
+                                    gcn_gen_undef (<MODE>mode), even));
 
     rtx odd = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
@@ -2392,7 +2394,8 @@ (define_expand "vec_addsub<mode>3"
     rtx dest = operands[0];
     rtx x = operands[1];
     rtx y = operands[2];
-    emit_insn (gen_sub<mode>3_exec (dest, x, y, dest, even));
+    emit_insn (gen_sub<mode>3_exec (dest, x, y, gcn_gen_undef (<MODE>mode),
+                                    even));
     rtx odd = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
     emit_insn (gen_add<mode>3_exec (dest, x, y, dest, odd));
@@ -2419,7 +2422,9 @@ (define_expand "cadd<rot><mode>3"
 
     rtx even = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (even, get_exec (0x5555555555555555UL));
-    emit_insn (gen_<cadd_subadd><mode>3_exec (dest, x, y, dest, even));
+    emit_insn (gen_<cadd_subadd><mode>3_exec (dest, x, y,
+                                              gcn_gen_undef (<MODE>mode),
+                                              even));
     rtx odd = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
     emit_insn (gen_<cadd_addsub><mode>3_exec (dest, x, y, dest, odd));
@@ -2439,7 +2444,8 @@ (define_expand "vec_fmaddsub<mode>4"
     rtx even = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (even, get_exec (0x5555555555555555UL));
     rtx dest = operands[0];
-    emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3], dest, even));
+    emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3],
+                                    gcn_gen_undef (<MODE>mode), even));
     rtx odd = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
     emit_insn (gen_add<mode>3_exec (dest, t1, operands[3], dest, odd));
@@ -2459,10 +2465,11 @@ (define_expand "vec_fmsubadd<mode>4"
     rtx even = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (even, get_exec (0x5555555555555555UL));
     rtx dest = operands[0];
-    emit_insn (gen_add<mode>3_exec (dest, t1, operands[3], dest, even));
+    emit_insn (gen_add<mode>3_exec (dest, t1, operands[3],
+                                    gcn_gen_undef (<MODE>mode), even));
     rtx odd = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
-    emit_insn (gen_add<mode>3_exec (dest, t1, operands[3], dest, odd));
+    emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3], dest, odd));
 
     DONE;
   })

Reply via email to