https://gcc.gnu.org/g:55e1cb227f9f1f2ccf1906d69c1301351fd69fa4

commit 55e1cb227f9f1f2ccf1906d69c1301351fd69fa4
Author: Michael Meissner <[email protected]>
Date:   Tue Oct 7 13:56:35 2025 -0400

    Add define_peephole2 for back to back __bfloat16 operations.
    
    2025-10-07  Michael Meissner  <[email protected]>
    
    gcc/
    
            * config/rs6000/float16.md (bfloat16_binary_op_internal3): Don't 
set the
            type attribute.
            (bfloat16_binary_op_internal4): Likewise.
            (bfloat16_binary_op_internal5): Likewise.
            (bfloat16_binary_op_internal6): Likewise.
            (peephole2): Eliminate xscvspdp and xscvdpspn between multiple
            __bfloat16 operations.

Diff:
---
 gcc/config/rs6000/float16.md | 61 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 53 insertions(+), 8 deletions(-)

diff --git a/gcc/config/rs6000/float16.md b/gcc/config/rs6000/float16.md
index 2bc552d344b3..43ee9e701d88 100644
--- a/gcc/config/rs6000/float16.md
+++ b/gcc/config/rs6000/float16.md
@@ -498,8 +498,7 @@
                              operands[5],
                              operands[6]);
   DONE;
-}
-  [(set_attr "type" "vecperm")])
+})
 
 (define_insn_and_split "*bfloat16_binary_op_internal4"
   [(set (match_operand:BF 0 "vsx_register_operand" "=wa,&wa,&wa")
@@ -524,8 +523,7 @@
                              operands[5],
                              operands[6]);
   DONE;
-}
-  [(set_attr "type" "vecperm")])
+})
 
 (define_insn_and_split "*bfloat16_binary_op_internal5"
   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
@@ -549,8 +547,7 @@
                              operands[5],
                              operands[6]);
   DONE;
-}
-  [(set_attr "type" "vecperm")])
+})
 
 (define_insn_and_split "*bfloat16_binary_op_internal6"
   [(set (match_operand:BF 0 "vsx_register_operand" "=wa")
@@ -575,9 +572,57 @@
                              operands[5],
                              operands[6]);
   DONE;
-}
-  [(set_attr "type" "vecperm")])
+})
+
+;; If we do multiple __bfloat16 operations, between the first and
+;; second operation, GCC will want to convert the first operation from
+;; V4SFmode to SFmode and then reconvert it back to V4SFmode.  On the
+;; PowerPC, this is complicated because internally in the vector
+;; register, SFmode values are stored as DFmode values.
+;;
+;; For example, if we have:
+;;
+;;     __bfloat16 a, b, c, d;
+;;     a = b + c + d;
+;;
+;; We would generate:
+;;
+;;      lxsihzx 0,4,2           // load b as BFmode
+;;      lxsihzx 11,5,2          // load c as BFmode
+;;      lxsihzx 12,6,2          // load d as BFmode
+;;      xxspltw 0,0,1           // shift b into bits 16..31
+;;      xxspltw 11,11,1         // shift c into bits 16..31
+;;      xxspltw 12,12,1         // shift d into bits 16..31
+;;      xvcvbf16spn 0,0         // convert b into V4SFmode
+;;      xvcvbf16spn 11,11       // convert c into V4SFmode
+;;      xvcvbf16spn 12,12       // convert d into V4SFmode
+;;      xvaddsp 0,0,11          // calculate b+c as V4SFmode
+;;      xscvspdp 0,0            // convert b+c into DFmode memory format
+;;      xscvdpspn 0,0           // convert b+c into SFmode memory format
+;;      xxspltw 0,0,0           // convert b+c into V4SFmode
+;;      xvaddsp 12,12,0         // calculate b+c+d as V4SFmode
+;;      xvcvspbf16 12,12        // convert b+c+d into BFmode memory format
+;;      stxsihx 12,3,2          // store b+c+d
+;;
+;; With this peephole2, we can eliminate the xscvspdp and xscvdpspn
+;; instructions.
+;;
+;; We keep the xxspltw between the two xvaddsp's in case the user
+;; explicitly did a SFmode extract of element 0 and did a splat
+;; operation.
+
+(define_peephole2
+  [(set (match_operand:SF 0 "vsx_register_operand")
+       (unspec:SF
+        [(match_operand:V4SF 1 "vsx_register_operand")]
+        UNSPEC_VSX_CVSPDP))
+   (set (match_operand:V4SF 2 "vsx_register_operand")
+       (unspec:V4SF [(match_dup 0)] UNSPEC_VSX_CVDPSPN))]
+  "REGNO (operands[1]) == REGNO (operands[2])
+   || peep2_reg_dead_p (1, operands[1])"
+  [(set (match_dup 2) (match_dup 1))])
 
+
 ;; Duplicate a HF/BF value so it can be used for xvcvhpspn/xvcvbf16spn.
 ;; Because xvcvhpspn/xvcvbf16spn only uses the even elements, we can
 ;; use xxspltw instead of vspltw.  This has the advantage that the

Reply via email to