work222-float)] Upgrade float16 vector optimizations.

Michael Meissner via Gcc-cvs Tue, 14 Oct 2025 18:30:46 -0700

https://gcc.gnu.org/g:5db69fda20936cdcc84bb9df41a41bb88e385670


commit 5db69fda20936cdcc84bb9df41a41bb88e385670
Author: Michael Meissner <[email protected]>
Date:   Tue Oct 14 21:30:06 2025 -0400

    Upgrade float16 vector optimizations.
    
    2025-10-14  Michael Meissner  <[email protected]>
    
    gcc/
    
            * config/rs6000/float16.cc (bfloat16_operation_as_v4sf): Upgrade 
float16
            vector optimizations to use new names.
            (float16_vectorization): New function for _Float16 vectorization
            support.
            * config/rs6000/float16.md (FLOAT16_UNARY_OP): New code iterator.
            (FLOAT16_BINARY_OP): Likewise.
            (float16_names): New code attribute.
            (vecdup<mode>): New insns.
            (bfloat16_binary_op_internal1): Upgrade float16 optimizations and 
change
            bfloat16 optimization names.
            (bfloat16_binary_op_internal2): Likewise.
            (bfloat16_binary_op_internal2): Likewise.
            (bfloat16_fma_op_internal1): Likewise.
            (bfloat16_fma_op_internal2): Likewise.
            (bfloat16_fms_op_internal1): Likewise.
            (bfloat16_fms_op_internal2): Likewise.
            (bfloat16_nfma_op_internal1): Likewise.
            (bfloat16_nfma_op_internal2): Likewise.
            (bfloat16_nfma_op_internal3): Likewise.
            (bfloat16_nfms_op_internal1): Likewise.
            (bfloat16_nfms_op_internal2): Likewise.
            (bfloat16_nfms_op_internal3): Likewise.
            (<float16_names>v8hf, FLOAT16_UNARY_OP iterator): Likewise.
            (<float16_names>v8hf, FLOAT16_BINARY_OP iterator): Likewise.
            (neg_<float16_names>v8hf3): Likewise.
            (abs_<float16_names>v8hf3): Likewise.
            (fmav8hf3): Likewise.
            (fmsv8hf3): Likewise.
            (nfmav8hf3): Likewise.
            (nfmsv8hf3): Likewise.
            * config/rs6000/predicates.md (fp16_binary_operator): Rename from
            bfloat16_binary_operator.
            * config/rs6000/rs6000-protos.h (enum fp16_operation): Rename from 
enum
            bfloat16_operation.  Rename all enums.
            (float16_vectorization): New declaration.
            * config/rs6000/rs6000cc (rs6000_expand_vector_init): Add support 
for
            creating 16-bit floating point value where all values are the same.
            * config/rs6000/rs6000.h (FP16_VECTOR_MODE_P): New macro.

Diff:
---
 gcc/config/rs6000/float16.cc      | 214 ++++++++++++++++++++++++++++++++++----
 gcc/config/rs6000/float16.md      | 194 +++++++++++++++++++++++++++++++---
 gcc/config/rs6000/predicates.md   |   2 +-
 gcc/config/rs6000/rs6000-protos.h |  21 ++--
 gcc/config/rs6000/rs6000.cc       |   9 ++
 gcc/config/rs6000/rs6000.h        |   5 +
 6 files changed, 399 insertions(+), 46 deletions(-)

diff --git a/gcc/config/rs6000/float16.cc b/gcc/config/rs6000/float16.cc
index 3dc7273719c1..b887d400312a 100644
--- a/gcc/config/rs6000/float16.cc
+++ b/gcc/config/rs6000/float16.cc
@@ -42,7 +42,7 @@
 #include "common/common-target.h"
 #include "rs6000-internal.h"
 
-/* Expand a bfloat16 floating point operation:
+/* Expand a bfloat16 scalar floating point operation:
 
    ICODE:   Operation to perform.
    RESULT:  Result of the operation.
@@ -64,7 +64,7 @@ bfloat16_operation_as_v4sf (enum rtx_code icode,
                            rtx op1,
                            rtx op2,
                            rtx op3,
-                           enum bfloat16_operation subtype)
+                           enum fp16_operation subtype)
 {
   gcc_assert (can_create_pseudo_p ());
 
@@ -75,19 +75,22 @@ bfloat16_operation_as_v4sf (enum rtx_code icode,
 
   switch (subtype)
     {
-    case BF16_BINARY:
+    case FP16_BINARY:
       n_opts = 2;
       gcc_assert (op3 == NULL_RTX);
       break;
 
-    case BF16_FMA:
-    case BF16_FMS:
-    case BF16_NFMA:
-    case BF16_NFMS:
+    case FP16_FMA:
+    case FP16_FMS:
+    case FP16_NFMA:
+    case FP16_NFMS:
       gcc_assert (icode == FMA);
       n_opts = 3;
       break;
 
+    case FP16_UNARY:
+    case FP16_ABS_BINARY:
+    case FP16_NEG_BINARY:
     default:
       gcc_unreachable ();
     }
@@ -144,27 +147,41 @@ bfloat16_operation_as_v4sf (enum rtx_code icode,
     }
 
   /* Do the operation in V4SFmode.  */
-  if (subtype == BF16_BINARY)
-    emit_insn (gen_rtx_SET (result_v4sf,
-                           gen_rtx_fmt_ee (icode, V4SFmode,
-                                           ops_v4sf[0],
-                                           ops_v4sf[1])));
-
-  else         /* FMA/FMS/NFMA/NFMS operation.  */
+  switch (subtype)
     {
-      rtx op1 = ops_v4sf[0];
-      rtx op2 = ops_v4sf[1];
-      rtx op3 = ops_v4sf[2];
+    case FP16_BINARY:
+      emit_insn (gen_rtx_SET (result_v4sf,
+                             gen_rtx_fmt_ee (icode, V4SFmode,
+                                             ops_v4sf[0],
+                                             ops_v4sf[1])));
+      break;
 
-      if (subtype == BF16_FMS || subtype == BF16_NFMS)
-       op3 = gen_rtx_NEG (V4SFmode, op3);
+    case FP16_FMA:
+    case FP16_FMS:
+    case FP16_NFMA:
+    case FP16_NFMS:
+      {
+       rtx op1 = ops_v4sf[0];
+       rtx op2 = ops_v4sf[1];
+       rtx op3 = ops_v4sf[2];
 
-      rtx op_fma = gen_rtx_FMA (V4SFmode, op1, op2, op3);
+       if (subtype == FP16_FMS || subtype == FP16_NFMS)
+         op3 = gen_rtx_NEG (V4SFmode, op3);
 
-      if (subtype == BF16_NFMA || subtype == BF16_NFMS)
-       op_fma = gen_rtx_NEG (V4SFmode, op_fma);
+       rtx op_fma = gen_rtx_FMA (V4SFmode, op1, op2, op3);
 
-      emit_insn (gen_rtx_SET (result_v4sf, op_fma));
+       if (subtype == FP16_NFMA || subtype == FP16_NFMS)
+         op_fma = gen_rtx_NEG (V4SFmode, op_fma);
+
+       emit_insn (gen_rtx_SET (result_v4sf, op_fma));
+      }
+      break;
+
+    case FP16_UNARY:
+    case FP16_ABS_BINARY:
+    case FP16_NEG_BINARY:
+    default:
+      gcc_unreachable ();
     }
 
   /* Convert V4SF result back to scalar mode.  */
@@ -180,3 +197,154 @@ bfloat16_operation_as_v4sf (enum rtx_code icode,
   else
     gcc_unreachable ();
 }
+
+
+/* Expand a _Float16 vector operation:
+
+   ICODE:   Operation to perform.
+   RESULT:  Result of the operation.
+   OP1:     Input operand1.
+   OP2:     Input operand2.
+   OP3:     Input operand3 or NULL_RTX.
+   SUBTYPE: Describe the operation.  */
+       
+void
+float16_vectorization (enum rtx_code icode,
+                      rtx result,
+                      rtx op1,
+                      rtx op2,
+                      rtx op3,
+                      enum fp16_operation subtype)
+{
+  gcc_assert (can_create_pseudo_p ());
+
+  enum rtx_code unary_op = UNKNOWN;
+  rtx op_orig[3] = { op1, op2, op3 };
+  rtx op_hi[3];
+  rtx op_lo[3];
+  rtx result_hi;
+  rtx result_lo;
+  size_t n_opts;
+
+  switch (subtype)
+    {
+    case FP16_UNARY:
+      n_opts = 1;
+      break;
+
+    case FP16_BINARY:
+      n_opts = 2;
+      break;
+
+    case FP16_ABS_BINARY:
+      unary_op = ABS;
+      n_opts = 2;
+      break;
+
+    case FP16_NEG_BINARY:
+      unary_op = NEG;
+      n_opts = 2;
+      break;
+
+    case FP16_FMA:
+    case FP16_FMS:
+    case FP16_NFMA:
+    case FP16_NFMS:
+      n_opts = 3;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Allocate 2 temporaries for the results and the input operands.  */
+  result_hi = gen_reg_rtx (V4SFmode);
+  result_lo = gen_reg_rtx (V4SFmode);
+
+  for (size_t i = 0; i < n_opts; i++)
+    {
+      gcc_assert (op_orig[i] != NULL_RTX);
+      op_hi[i] = gen_reg_rtx (V4SFmode);       /* high register.  */
+      op_lo[i] = gen_reg_rtx (V4SFmode);       /* low register.  */
+
+      emit_insn (gen_vec_unpacks_hi_v8hf (op_hi[i], op_orig[i]));
+      emit_insn (gen_vec_unpacks_lo_v8hf (op_lo[i], op_orig[i]));
+    }
+
+  /* Do 2 sets of V4SFmode operations.  */
+  switch (subtype)
+    {
+    case FP16_UNARY:
+      emit_insn (gen_rtx_SET (result_hi,
+                             gen_rtx_fmt_e (icode, V4SFmode, op_hi[0])));
+
+      emit_insn (gen_rtx_SET (result_lo,
+                             gen_rtx_fmt_e (icode, V4SFmode, op_lo[0])));
+      break;
+
+    case FP16_BINARY:
+    case FP16_ABS_BINARY:
+    case FP16_NEG_BINARY:
+      emit_insn (gen_rtx_SET (result_hi,
+                             gen_rtx_fmt_ee (icode, V4SFmode,
+                                             op_hi[0],
+                                             op_hi[1])));
+
+      emit_insn (gen_rtx_SET (result_lo,
+                             gen_rtx_fmt_ee (icode, V4SFmode,
+                                             op_lo[0],
+                                             op_lo[1])));
+      break;
+
+    case FP16_FMA:
+    case FP16_FMS:
+    case FP16_NFMA:
+    case FP16_NFMS:
+      {
+       rtx op1_hi = op_hi[0];
+       rtx op2_hi = op_hi[1];
+       rtx op3_hi = op_hi[2];
+
+       rtx op1_lo = op_lo[0];
+       rtx op2_lo = op_lo[1];
+       rtx op3_lo = op_lo[2];
+
+       if (subtype == FP16_FMS || subtype == FP16_NFMS)
+         {
+           op3_hi = gen_rtx_NEG (V4SFmode, op3_hi);
+           op3_lo = gen_rtx_NEG (V4SFmode, op3_lo);
+         }
+
+       rtx op_fma_hi = gen_rtx_FMA (V4SFmode, op1_hi, op2_hi, op3_hi);
+       rtx op_fma_lo = gen_rtx_FMA (V4SFmode, op1_lo, op2_lo, op3_lo);
+
+       if (subtype == FP16_NFMA || subtype == FP16_NFMS)
+         {
+           op_fma_hi = gen_rtx_NEG (V4SFmode, op_fma_hi);
+           op_fma_lo = gen_rtx_NEG (V4SFmode, op_fma_lo);
+         }
+
+       emit_insn (gen_rtx_SET (result_hi, op_fma_hi));
+       emit_insn (gen_rtx_SET (result_lo, op_fma_lo));
+      }
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Add any unary operator modifications.  */
+  if (unary_op != UNKNOWN)
+    {
+      emit_insn (gen_rtx_SET (result_hi,
+                             gen_rtx_fmt_e (unary_op, V4SFmode, result_hi)));
+
+      emit_insn (gen_rtx_SET (result_lo,
+                             gen_rtx_fmt_e (unary_op, V4SFmode, result_lo)));
+    }
+
+  /* Combine the 2 V4SFmode operations into one V8HFmode vector.  */
+  emit_insn (gen_vec_pack_trunc_v4sf_v8hf (result, result_hi, result_lo));
+  return;
+}
+
diff --git a/gcc/config/rs6000/float16.md b/gcc/config/rs6000/float16.md
index 28586654f96e..eaba1784362b 100644
--- a/gcc/config/rs6000/float16.md
+++ b/gcc/config/rs6000/float16.md
@@ -62,6 +62,22 @@
                                (V8BF "V4BF")
                                (V8HF "V4HF")])
 
+;; Unary operators for float16 vectorization.
+(define_code_iterator FLOAT16_UNARY_OP [abs neg])
+
+;; Binary operators for float16 vectorization.
+(define_code_iterator FLOAT16_BINARY_OP [plus minus mult smax smin])
+
+;; Standard names for the unary/binary/ternary operators
+(define_code_attr float16_names [(abs   "abs")
+                                (fma   "fma")
+                                (plus  "add")
+                                (minus "sub")
+                                (mult  "mul")
+                                (neg   "neg")
+                                (smax  "smax")
+                                (smin  "smin")])
+
 ;; UNSPEC constants
 (define_c_enum "unspec"
   [UNSPEC_FP16_SHIFT_LEFT_32BIT
@@ -133,6 +149,15 @@
    (set_attr "isa"  "*,         p9v,       p9v,       *,         *,
                      *,         p8v,       p8v,       p9v,       *")])
 
+;; Vector duplicate
+(define_insn "*vecdup<mode>"
+  [(set (match_operand:<FP16_VECTOR8> 0 "altivec_register_operand" "=v")
+       (vec_duplicate:<FP16_VECTOR8>
+        (match_operand:FP16 1 "altivec_register_operand" "v")))]
+  ""
+  "vsplth %0,%1,3"
+  [(set_attr "type" "vecperm")])
+
 
 ;; Convert IEEE 16-bit floating point to/from other floating point modes.
 
@@ -446,7 +471,7 @@
 
 (define_insn_and_split "*bfloat16_binary_op_internal1"
   [(set (match_operand:SF 0 "vsx_register_operand")
-       (match_operator:SF 1 "bfloat16_binary_operator"
+       (match_operator:SF 1 "fp16_binary_operator"
                           [(match_operand:SF 2 "bfloat16_v4sf_operand")
                            (match_operand:SF 3 "bfloat16_v4sf_operand")]))]
   "TARGET_BFLOAT16_HW && can_create_pseudo_p ()
@@ -457,14 +482,14 @@
   [(pc)]
 {
   bfloat16_operation_as_v4sf (GET_CODE (operands[1]), operands[0], operands[2],
-                             operands[3], NULL_RTX, BF16_BINARY);
+                             operands[3], NULL_RTX, FP16_BINARY);
   DONE;
 })
 
 (define_insn_and_split "*bfloat16_binary_op_internal2"
   [(set (match_operand:BF 0 "vsx_register_operand")
        (float_truncate:BF
-        (match_operator:SF 1 "bfloat16_binary_operator"
+        (match_operator:SF 1 "fp16_binary_operator"
                            [(match_operand:SF 2 "bfloat16_v4sf_operand")
                             (match_operand:SF 3 "bfloat16_v4sf_operand")])))]
   "TARGET_BFLOAT16_HW && can_create_pseudo_p ()
@@ -475,7 +500,7 @@
   [(pc)]
 {
   bfloat16_operation_as_v4sf (GET_CODE (operands[1]), operands[0], operands[2],
-                             operands[3], NULL_RTX, BF16_BINARY);
+                             operands[3], NULL_RTX, FP16_BINARY);
   DONE;
 })
 
@@ -494,7 +519,7 @@
   [(pc)]
 {
   bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
-                             operands[3], BF16_FMA);
+                             operands[3], FP16_FMA);
   DONE;
 })
 
@@ -514,7 +539,7 @@
   [(pc)]
 {
   bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
-                             operands[3], BF16_FMA);
+                             operands[3], FP16_FMA);
   DONE;
 })
 
@@ -534,7 +559,7 @@
   [(pc)]
 {
   bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
-                             operands[3], BF16_FMS);
+                             operands[3], FP16_FMS);
   DONE;
 })
 
@@ -555,7 +580,7 @@
   [(pc)]
 {
   bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
-                             operands[3], BF16_FMS);
+                             operands[3], FP16_FMS);
   DONE;
 })
 
@@ -575,7 +600,7 @@
   [(pc)]
 {
   bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
-                             operands[3], BF16_NFMA);
+                             operands[3], FP16_NFMA);
   DONE;
 })
 
@@ -596,7 +621,7 @@
   [(pc)]
 {
   bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
-                             operands[3], BF16_NFMA);
+                             operands[3], FP16_NFMA);
   DONE;
 })
 
@@ -617,7 +642,7 @@
   [(pc)]
 {
   bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
-                             operands[3], BF16_NFMA);
+                             operands[3], FP16_NFMA);
   DONE;
 })
 
@@ -638,7 +663,7 @@
   [(pc)]
 {
   bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
-                             operands[3], BF16_NFMS);
+                             operands[3], FP16_NFMS);
   DONE;
 })
 
@@ -660,7 +685,7 @@
   [(pc)]
 {
   bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
-                             operands[3], BF16_NFMS);
+                             operands[3], FP16_NFMS);
   DONE;
 })
 
@@ -682,10 +707,151 @@
   [(pc)]
 {
   bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2],
-                             operands[3], BF16_NFMS);
+                             operands[3], FP16_NFMS);
+  DONE;
+})
+
+
+;; Add vectorization support for _Float16.  Unfortunately, since there
+;; can only be one vec_pack_trunc_v4sf, we choose to support automatic
+;; vectorization for BFmode.  The following insns define vectorization
+;; for HFmode.
+
+;; Unary operators being vectorized.
+(define_insn_and_split "<float16_names>v8hf3"
+  [(set (match_operand:V8HF 0 "vsx_register_operand")
+       (FLOAT16_UNARY_OP:V8HF
+        (match_operand:V8HF 1 "vsx_register_operand")))]
+  "TARGET_FLOAT16_HW && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  float16_vectorization (<CODE>, operands[0], operands[1], NULL_RTX, NULL_RTX,
+                        FP16_UNARY);
+  DONE;
+})
+
+;; Binary operators being vectorized.
+(define_insn_and_split "<float16_names>v8hf3"
+  [(set (match_operand:V8HF 0 "vsx_register_operand")
+       (FLOAT16_BINARY_OP:V8HF
+        (match_operand:V8HF 1 "vsx_register_operand")
+        (match_operand:V8HF 2 "vsx_register_operand")))]
+  "TARGET_FLOAT16_HW && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  float16_vectorization (<CODE>, operands[0], operands[1], operands[2],
+                        NULL_RTX, FP16_BINARY);
+  DONE;
+})
+
+;; Negative of binary operators being vectorized.
+(define_insn_and_split "*neg_<float16_names>v8hf3"
+  [(set (match_operand:V8HF 0 "vsx_register_operand")
+       (neg:V8HF
+        (FLOAT16_BINARY_OP:V8HF
+         (match_operand:V8HF 1 "vsx_register_operand")
+         (match_operand:V8HF 2 "vsx_register_operand"))))]
+  "TARGET_FLOAT16_HW && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  float16_vectorization (<CODE>, operands[0], operands[1], operands[2],
+                        NULL_RTX, FP16_NEG_BINARY);
+  DONE;
+})
+
+;; Absolute value of binary operators being vectorized.
+(define_insn_and_split "*abs_<float16_names>v8hf3"
+  [(set (match_operand:V8HF 0 "vsx_register_operand")
+       (abs:V8HF
+        (FLOAT16_BINARY_OP:V8HF
+         (match_operand:V8HF 1 "vsx_register_operand")
+         (match_operand:V8HF 2 "vsx_register_operand"))))]
+  "TARGET_FLOAT16_HW && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  float16_vectorization (<CODE>, operands[0], operands[1], operands[2],
+                        NULL_RTX, FP16_ABS_BINARY);
+  DONE;
+})
+
+;; FMA operations being vectorized.
+(define_insn_and_split "fmav8hf3"
+  [(set (match_operand:V8HF 0 "vsx_register_operand")
+       (fma:V8HF
+        (match_operand:V8HF 1 "vsx_register_operand")
+        (match_operand:V8HF 2 "vsx_register_operand")
+        (match_operand:V8HF 3 "vsx_register_operand")))]
+  "TARGET_FLOAT16_HW && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  float16_vectorization (FMA, operands[0], operands[1], operands[2],
+                        operands[3], FP16_FMA);
+  DONE;
+})
+
+(define_insn_and_split "*fmsv8hf3"
+  [(set (match_operand:V8HF 0 "vsx_register_operand")
+       (fma:V8HF
+        (match_operand:V8HF 1 "vsx_register_operand")
+        (match_operand:V8HF 2 "vsx_register_operand")
+        (neg:V8HF
+         (match_operand:V8HF 3 "vsx_register_operand"))))]
+  "TARGET_FLOAT16_HW && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  float16_vectorization (FMA, operands[0], operands[1], operands[2],
+                        operands[3], FP16_FMS);
   DONE;
 })
 
+(define_insn_and_split "*nfmav8hf3"
+  [(set (match_operand:V8HF 0 "vsx_register_operand")
+       (neg:V8HF
+        (fma:V8HF
+         (match_operand:V8HF 1 "vsx_register_operand")
+         (match_operand:V8HF 2 "vsx_register_operand")
+         (match_operand:V8HF 3 "vsx_register_operand"))))]
+  "TARGET_FLOAT16_HW && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  float16_vectorization (FMA, operands[0], operands[1], operands[2],
+                        operands[3], FP16_NFMA);
+  DONE;
+})
+
+(define_insn_and_split "*nfmsv8hf3"
+  [(set (match_operand:V8HF 0 "vsx_register_operand")
+       (neg:V8HF
+        (fma:V8HF
+         (match_operand:V8HF 1 "vsx_register_operand")
+         (match_operand:V8HF 2 "vsx_register_operand")
+         (neg:V8HF
+          (match_operand:V8HF 3 "vsx_register_operand")))))]
+  "TARGET_FLOAT16_HW && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  float16_vectorization (FMA, operands[0], operands[1], operands[2],
+                        operands[3], FP16_NFMS);
+  DONE;
+})
+
+
 ;; If we do multiple __bfloat16 operations, between the first and
 ;; second operation, GCC will want to convert the first operation from
 ;; V4SFmode to SFmode and then reconvert it back to V4SFmode.  On the
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index d47d09cf73db..4394b4a6daa1 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -2208,7 +2208,7 @@
 ;; the operation in vector mode rather than convverting the BFmode to a
 ;; V8BFmode vector, converting that V8BFmode vector to V4SFmode, and
 ;; then converting the V4SFmode element to SFmode scalar.
-(define_predicate "bfloat16_binary_operator"
+(define_predicate "fp16_binary_operator"
   (match_code "plus,minus,mult,smax,smin"))
 
 ;; Match bfloat16/float operands that can be optimized to do the
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index db38468df816..f677506b4aa0 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -260,17 +260,22 @@ extern unsigned constant_generates_xxspltiw 
(vec_const_128bit_type *);
 extern unsigned constant_generates_xxspltidp (vec_const_128bit_type *);
 
 /* From float16.cc.  */
-/* Optimize bfloat16 operations.  */
-enum bfloat16_operation {
-  BF16_BINARY,                         /* Bfloat16 binary op.  */
-  BF16_FMA,                            /* (a * b) + c.  */
-  BF16_FMS,                            /* (a * b) - c.  */
-  BF16_NFMA,                           /* - ((a * b) + c).  */
-  BF16_NFMS                            /* - ((a * b) - c).  */
+/* Optimize bfloat16 and float16 operations.  */
+enum fp16_operation {
+  FP16_UNARY,                          /* Bfloat16/float16 unary op.  */
+  FP16_BINARY,                         /* Bfloat16/float16 binary op.  */
+  FP16_ABS_BINARY,                     /* abs (binary op).  */
+  FP16_NEG_BINARY,                     /* - binary op.  */
+  FP16_FMA,                            /* (a * b) + c.  */
+  FP16_FMS,                            /* (a * b) - c.  */
+  FP16_NFMA,                           /* - ((a * b) + c).  */
+  FP16_NFMS                            /* - ((a * b) - c).  */
 };
 
 extern void bfloat16_operation_as_v4sf (enum rtx_code, rtx, rtx, rtx, rtx,
-                                       enum bfloat16_operation);
+                                       enum fp16_operation);
+extern void float16_vectorization (enum rtx_code, rtx, rtx, rtx, rtx,
+                                  enum fp16_operation);
 #endif /* RTX_CODE */
 
 #ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index bf9e1e86203f..dbbce3c5338d 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7111,6 +7111,15 @@ rs6000_expand_vector_init (rtx target, rtx vals)
       return;
     }
 
+  /* Special case splats of 16-bit floating point.  */
+  if (all_same && FP16_VECTOR_MODE_P (mode))
+    {
+      rtx op0 = force_reg (GET_MODE_INNER (mode), XVECEXP (vals, 0, 0));
+      rtx dup = gen_rtx_VEC_DUPLICATE (mode, op0);
+      emit_insn (gen_rtx_SET (target, dup));
+      return;
+    }
+                                                    
   /* Special case initializing vector short/char that are splats if we are on
      64-bit systems with direct move.  */
   if (all_same && TARGET_DIRECT_MOVE_64BIT
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 16aa2a6443b1..7850affe6afc 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -358,6 +358,11 @@ extern const char *host_detect_local_cpu (int argc, const 
char **argv);
   (((MODE) == HFmode && TARGET_FLOAT16_HW)                             \
    || ((MODE) == BFmode && TARGET_BFLOAT16_HW))
 
+/* Is this a valid 16-bit vector floating point mode?  */
+#define FP16_VECTOR_MODE_P(MODE)                                       \
+  (((MODE) == V8HFmode && TARGET_FLOAT16)                              \
+   || ((MODE) == V8BFmode && TARGET_BFLOAT16))
+
 /* Return true for floating point that does not use a vector register.  */
 #define SCALAR_FLOAT_MODE_NOT_VECTOR_P(MODE)                           \
   (SCALAR_FLOAT_MODE_P (MODE) && !FLOAT128_VECTOR_P (MODE))

[gcc(refs/users/meissner/heads/work222-float)] Upgrade float16 vector optimizations.

Reply via email to