work221-float)] Add initial _Float16 support.

Michael Meissner via Gcc-cvs Mon, 08 Sep 2025 11:46:54 -0700

https://gcc.gnu.org/g:261ee5c479248aecc4a5bf6771f272eebf531f2f


commit 261ee5c479248aecc4a5bf6771f272eebf531f2f
Author: Michael Meissner <meiss...@linux.ibm.com>
Date:   Mon Sep 8 14:46:21 2025 -0400

    Add initial _Float16 support.
    
    2025-09-08  Michael Meissner  <meiss...@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/altivec.md (VM): Add V8HF.
            (VM2): Likewise.
            (VI_char): Likewise.
            (VI_scalar): Likewise.
            (VI_unit): Likewise.
            (VP_small): Likewise.
            (VP_small_lc): Likewise.
            (VU_char): Likewise.
            * config/rs6000/predicate.md (easy_fp_constant): Power10 can load
            _Float16 constants directly.
            (ieee16_xxspltiw_constant): New predicate.
            * config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add _Float16
            support.
            * config/rs6000/rs6000-call.cc (USE_FP_FOR_ARG_P): Likewise.
            * config/rs6000/rs6000-modes.def (HFmode): Likewise.
            * config/rs6000/rs6000-p8swap.cc (rs6000_gen_stvx): Drop V8HFmode
            support since V8HFmode doesn't exist on power8 or earlier.
            (rs6000_gen_lvx): Likewise.
            (replace_swapped_load_constant): Likewise.
            * config/rs6000/rs6000-protos.h (vec_const_128bit_type): Add mode 
field.
            * config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): Add
            _Float16 support.
            (rs6000_modes_tieable_p): Likewise.
            (rs6000_debug_reg_global): Likewise.
            (rs6000_setup_reg_addr_masks): Likewise.
            (rs6000_init_hard_regno_mode_ok): Likewise.
            (rs6000_secondary_reload_simple_move): Likewise.
            (rs6000_preferred_reload_class): Likewise.
            (rs6000_can_change_mode_class): Likewise.
            (rs6000_function_value): Likewise.
            (rs6000_scalar_mode_supported_p): Likewise.
            (rs6000_floatn_mode): Likewise.
            (constant_fp_to_128bit_vector): Likewise.
            (vec_const_128bit_to_bytes): Likewise.
            (constant_generates_xxspltiw): Likewise.
            * config/rs6000/rs6000.h (TARGET_IEEE16): New macro.
            * config/rs6000/rs6000.md (FMOVE128_GPR): Add support for V8HFmode.
            (RELOAD): Likewise.
            (movhf): Likewise.
            (movhf_xxspltiw): Likewise.
            (movhf_internal): Likewise.
            * config/rs6000/rs6000.opt (-mieee16-gpr-args): New debug switch.
            * config/rs6000/vector.md (VEC_L): Add V8HFmode.
            (VEC_M): Likewise.
            (VEC_E): Likewise.
            (VEC_base): Likewise.
            (VEC_base_l): Likewise.
            (vec_pack_trunc_v4s): Likewise.
            (vec_unpacks_hi_v8h): Likewise.
            * config/rs600/vsx.md (V8HI_V8HF): New mode iterator.
            (VSX_L): Add V8HFmode.
            (VSX_XXBR): Likewise.
            (VSm): Likewise.
            (VSr): Likewise.
            (VSisa): Likewise.
            (??r): Likewise.
            (VSc): Likewise.
            (VM3): Likewise.
            (VM3_char): Likewise.
            (vsx_le_perm_load_<mode>): Likewise.
            (vsx_le_perm_store_<mode>): Likewise.
            (permute splits): Likewise.
            (vsx_ld_elemrev_<mode): Likewise.
            (vsx_st_elemrev_<mode>): Likewise.
            (vsx_st_elemrev_<mode>_internal): Likewise.
            (vsx_xvcvhpsp): Add comment.
            (vsx_xvcvhpsp_v8hf): New insn.
            (vsx_xvcvsphp): Add comment.
            (vsx_xvcvsphp_v8hf): New insn.
            (extendhf<mode>2): Likewise.
            (trunc<mode>hf2): Likewise.
            (xxswapd_<mode>): Add V8HFmode.
            (vsx_lxvd2x8_le_<MODE): Likewise.
            (vsx_stxvd2x8_le_<MODE): Likewise.
            (vsx_extract_<mode>_store_p9): Likewise.

Diff:
---
 gcc/config/rs6000/altivec.md        |  28 +++++-
 gcc/config/rs6000/predicates.md     |  19 ++++
 gcc/config/rs6000/rs6000-builtin.cc |   2 +
 gcc/config/rs6000/rs6000-call.cc    |  10 +-
 gcc/config/rs6000/rs6000-modes.def  |   3 +
 gcc/config/rs6000/rs6000-p8swap.cc  |  14 +--
 gcc/config/rs6000/rs6000-protos.h   |   1 +
 gcc/config/rs6000/rs6000.cc         | 130 ++++++++++++++++++++++----
 gcc/config/rs6000/rs6000.h          |   3 +
 gcc/config/rs6000/rs6000.md         |  60 +++++++++++-
 gcc/config/rs6000/rs6000.opt        |   4 +
 gcc/config/rs6000/vector.md         |  59 +++++++++++-
 gcc/config/rs6000/vsx.md            | 178 +++++++++++++++++++++++-------------
 13 files changed, 402 insertions(+), 109 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 7edc288a6565..b6f92a71f963 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -191,6 +191,7 @@
 ;; otherwise handled by altivec (v2df, v2di, ti)
 (define_mode_iterator VM [V4SI
                          V8HI
+                         V8HF
                          V16QI
                          V4SF
                          V2DF
@@ -203,6 +204,7 @@
 ;; Like VM, except don't do TImode
 (define_mode_iterator VM2 [V4SI
                           V8HI
+                          V8HF
                           V16QI
                           V4SF
                           V2DF
@@ -222,18 +224,34 @@
                               V1TI
                               TI])
 
-(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")])
-(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")])
+(define_mode_attr VI_char [(V2DI "d")
+                          (V4SI "w")
+                          (V8HI "h")
+                          (V8HF "h")
+                          (V16QI "b")])
+(define_mode_attr VI_scalar [(V2DI "DI")
+                            (V4SI "SI")
+                            (V8HI "HI")
+                            (V8HF "HF")
+                            (V16QI "QI")])
 (define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)")
                           (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)")
+                          (V8HF "VECTOR_UNIT_ALTIVEC_P (V8HFmode)")
                           (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)")
                           (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")])
 
 ;; Vector pack/unpack
 (define_mode_iterator VP [V2DI V4SI V8HI])
-(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")])
-(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")])
-(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")])
+(define_mode_attr VP_small [(V2DI "V4SI")
+                           (V4SI "V8HI")
+                           (V8HI "V16QI")])
+(define_mode_attr VP_small_lc [(V2DI "v4si")
+                              (V4SI "v8hi")
+                              (V8HI "v16qi")])
+(define_mode_attr VU_char [(V2DI "w")
+                          (V4SI "h")
+                          (V8HI "b")
+                          (V8HF "b")])
 
 ;; Vector negate
 (define_mode_iterator VNEG [V4SI V2DI])
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 647e89afb6a7..2a4b38838d20 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -601,6 +601,11 @@
   if (TARGET_VSX && op == CONST0_RTX (mode))
     return 1;
 
+  /* Power9 needs to load HFmode constants from memory, Power10 can use
+     XXSPLTIW.  */
+  if (mode == HFmode && !TARGET_POWER10)
+    return 0;
+
   /* Constants that can be generated with ISA 3.1 instructions are easy.  */
   vec_const_128bit_type vsx_const;
   if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
@@ -2166,3 +2171,17 @@
   (and (match_code "subreg")
        (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op)))
                    == SUBREG_BYTE (op)")))
+
+;; Return 1 if this is a HFmode constant that can be loaded with XXSPLTIW.
+(define_predicate "ieee16_xxspltiw_constant"
+  (match_code "const_double")
+{
+  if (!TARGET_POWER10 || mode != HFmode)
+    return false;
+
+  vec_const_128bit_type vsx_const;
+  if (!vec_const_128bit_to_bytes (op, mode, &vsx_const))
+    return false;
+
+  return constant_generates_xxspltiw (&vsx_const);
+})
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index bc1580f051b0..05a730a8fdca 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -491,6 +491,8 @@ const char *rs6000_type_string (tree type_node)
     return "voidc*";
   else if (type_node == float128_type_node)
     return "_Float128";
+  else if (type_node == float16_type_node)
+    return "_Float16";
   else if (type_node == vector_pair_type_node)
     return "__vector_pair";
   else if (type_node == vector_quad_type_node)
diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc
index 8fe5652442e3..3872d742d159 100644
--- a/gcc/config/rs6000/rs6000-call.cc
+++ b/gcc/config/rs6000/rs6000-call.cc
@@ -82,10 +82,12 @@
 #endif
 
 /* Nonzero if we can use a floating-point register to pass this arg.  */
-#define USE_FP_FOR_ARG_P(CUM,MODE)             \
-  (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE)               \
-   && (CUM)->fregno <= FP_ARG_MAX_REG          \
-   && TARGET_HARD_FLOAT)
+#define USE_FP_FOR_ARG_P(CUM,MODE)                             \
+  (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE)                       \
+   && (CUM)->fregno <= FP_ARG_MAX_REG                          \
+   && TARGET_HARD_FLOAT                                                \
+   && ((MODE) != HFmode || !TARGET_IEEE16_GPR_ARGS))
+
 
 /* Nonzero if we can use an AltiVec register to pass this arg.  */
 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED)                  \
diff --git a/gcc/config/rs6000/rs6000-modes.def 
b/gcc/config/rs6000/rs6000-modes.def
index f89e4ef403c1..04dc1d8c9194 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -45,6 +45,9 @@ FLOAT_MODE (TF, 16, ieee_quad_format);
 /* IBM 128-bit floating point.  */
 FLOAT_MODE (IF, 16, ibm_extended_format);
 
+/* Explicit IEEE 16-bit floating point.  */
+FLOAT_MODE (HF, 2, ieee_half_format);
+
 /* Add any extra modes needed to represent the condition code.
 
    For the RS/6000, we need separate modes when unsigned (logical) comparisons
diff --git a/gcc/config/rs6000/rs6000-p8swap.cc 
b/gcc/config/rs6000/rs6000-p8swap.cc
index e92f01031270..4fb107c60a47 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -1598,10 +1598,6 @@ rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, 
rtx src_exp)
     stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp);
   else if (mode == V8HImode)
     stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp);
-#ifdef HAVE_V8HFmode
-  else if (mode == V8HFmode)
-    stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp);
-#endif
   else if (mode == V4SImode)
     stvx = gen_altivec_stvx_v4si (src_exp, dest_exp);
   else if (mode == V4SFmode)
@@ -1722,10 +1718,6 @@ rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, 
rtx src_exp)
     lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp);
   else if (mode == V8HImode)
     lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp);
-#ifdef HAVE_V8HFmode
-  else if (mode == V8HFmode)
-    lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp);
-#endif
   else if (mode == V4SImode)
     lvx = gen_altivec_lvx_v4si (dest_exp, src_exp);
   else if (mode == V4SFmode)
@@ -1930,11 +1922,7 @@ replace_swapped_load_constant (swap_web_entry 
*insn_entry, rtx swap_insn)
       rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
       new_mem = force_const_mem (mode, new_const_vector);
     }
-  else if ((mode == V8HImode)
-#ifdef HAVE_V8HFmode
-          || (mode == V8HFmode)
-#endif
-          )
+  else if (mode == V8HImode)
     {
       rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8));
       int i;
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 4619142d197b..9bf971370d41 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -250,6 +250,7 @@ typedef struct {
   bool all_words_same;                 /* Are the words all equal?  */
   bool all_half_words_same;            /* Are the half words all equal?  */
   bool all_bytes_same;                 /* Are the bytes all equal?  */
+  machine_mode mode;                   /* Original constant mode.  */
 } vec_const_128bit_type;
 
 extern bool vec_const_128bit_to_bytes (rtx, machine_mode,
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 04e688d243e1..7a6b16d30866 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1899,7 +1899,8 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
 
       if (ALTIVEC_REGNO_P (regno))
        {
-         if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
+         if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p
+             && mode != HFmode)
            return 0;
 
          return ALTIVEC_REGNO_P (last_regno);
@@ -1931,7 +1932,8 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
          if (TARGET_POPCNTD && mode == SImode)
            return 1;
 
-         if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
+         if (TARGET_P9_VECTOR
+             && (mode == QImode || mode == HImode || mode == HFmode))
            return 1;
        }
 
@@ -1989,7 +1991,8 @@ static bool
 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 {
   if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
-      || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
+      || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode
+      || mode1 == HFmode || mode2 == HFmode)
     return mode1 == mode2;
 
   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@@ -2255,6 +2258,7 @@ rs6000_debug_reg_global (void)
     DImode,
     TImode,
     PTImode,
+    HFmode,
     SFmode,
     DFmode,
     TFmode,
@@ -2275,6 +2279,7 @@ rs6000_debug_reg_global (void)
     V8SImode,
     V4DImode,
     V2TImode,
+    V8HFmode,
     V4SFmode,
     V2DFmode,
     V8SFmode,
@@ -2633,8 +2638,13 @@ rs6000_setup_reg_addr_masks (void)
 
       /* SDmode is special in that we want to access it only via REG+REG
         addressing on power7 and above, since we want to use the LFIWZX and
-        STFIWZX instructions to load it.  */
-      bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
+        STFIWZX instructions to load it.
+
+        Never allow offset addressing for HFmode, since it is expected that
+        16-bit floating point should always go into the vector registers and
+        we only have indexed and indirect 16-bit loads to VSR registers.  */
+      bool indexed_only_p = ((m == SDmode && TARGET_NO_SDMODE_STACK)
+                            || m == HFmode);
 
       any_addr_mask = 0;
       for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
@@ -2683,6 +2693,7 @@ rs6000_setup_reg_addr_masks (void)
                  && !complex_p
                  && (m != E_DFmode || !TARGET_VSX)
                  && (m != E_SFmode || !TARGET_P8_VECTOR)
+                 && m != E_HFmode
                  && !small_int_vsx_p)
                {
                  addr_mask |= RELOAD_REG_PRE_INCDEC;
@@ -2930,6 +2941,14 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
       rs6000_vector_align[V1TImode] = 128;
     }
 
+  /* _Float16 support.  */
+  if (TARGET_IEEE16)
+    {
+      rs6000_vector_unit[V8HFmode] = VECTOR_VSX;
+      rs6000_vector_mem[V8HFmode] = VECTOR_VSX;
+      rs6000_vector_align[V8HFmode] = align64;
+    }
+
   /* DFmode, see if we want to use the VSX unit.  Memory is handled
      differently, so don't set rs6000_vector_mem.  */
   if (TARGET_VSX)
@@ -2952,6 +2971,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
       rs6000_vector_align[TImode] = align64;
     }
 
+  /* Allow HFmode in VSX register and set the VSX memory macros.  */
+  if (TARGET_IEEE16)
+    {
+      rs6000_vector_mem[HImode] = VECTOR_VSX;
+      rs6000_vector_align[HFmode] = 16;
+    }
+
   /* Add support for vector pairs and vector quad registers.  */
   if (TARGET_MMA)
     {
@@ -3011,6 +3037,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
          reg_addr[V16QImode].reload_load  = CODE_FOR_reload_v16qi_di_load;
          reg_addr[V8HImode].reload_store  = CODE_FOR_reload_v8hi_di_store;
          reg_addr[V8HImode].reload_load   = CODE_FOR_reload_v8hi_di_load;
+         reg_addr[V8HFmode].reload_store  = CODE_FOR_reload_v8hf_di_store;
+         reg_addr[V8HFmode].reload_load   = CODE_FOR_reload_v8hf_di_load;
          reg_addr[V4SImode].reload_store  = CODE_FOR_reload_v4si_di_store;
          reg_addr[V4SImode].reload_load   = CODE_FOR_reload_v4si_di_load;
          reg_addr[V2DImode].reload_store  = CODE_FOR_reload_v2di_di_store;
@@ -3040,6 +3068,12 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
              reg_addr[TFmode].reload_load  = CODE_FOR_reload_tf_di_load;
            }
 
+         if (TARGET_IEEE16)
+           {
+             reg_addr[HFmode].reload_store = CODE_FOR_reload_hf_di_store;
+             reg_addr[HFmode].reload_load  = CODE_FOR_reload_hf_di_load;
+           }
+
          /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
             available.  */
          if (TARGET_NO_SDMODE_STACK)
@@ -3062,6 +3096,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
              reg_addr[V2DImode].reload_gpr_vsx  = 
CODE_FOR_reload_gpr_from_vsxv2di;
              reg_addr[V4SFmode].reload_gpr_vsx  = 
CODE_FOR_reload_gpr_from_vsxv4sf;
              reg_addr[V4SImode].reload_gpr_vsx  = 
CODE_FOR_reload_gpr_from_vsxv4si;
+             reg_addr[V8HFmode].reload_gpr_vsx  = 
CODE_FOR_reload_gpr_from_vsxv8hf;
              reg_addr[V8HImode].reload_gpr_vsx  = 
CODE_FOR_reload_gpr_from_vsxv8hi;
              reg_addr[V16QImode].reload_gpr_vsx = 
CODE_FOR_reload_gpr_from_vsxv16qi;
              reg_addr[SFmode].reload_gpr_vsx    = 
CODE_FOR_reload_gpr_from_vsxsf;
@@ -3072,6 +3107,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
              reg_addr[V2DImode].reload_vsx_gpr  = 
CODE_FOR_reload_vsx_from_gprv2di;
              reg_addr[V4SFmode].reload_vsx_gpr  = 
CODE_FOR_reload_vsx_from_gprv4sf;
              reg_addr[V4SImode].reload_vsx_gpr  = 
CODE_FOR_reload_vsx_from_gprv4si;
+             reg_addr[V8HFmode].reload_vsx_gpr  = 
CODE_FOR_reload_vsx_from_gprv8hf;
              reg_addr[V8HImode].reload_vsx_gpr  = 
CODE_FOR_reload_vsx_from_gprv8hi;
              reg_addr[V16QImode].reload_vsx_gpr = 
CODE_FOR_reload_vsx_from_gprv16qi;
              reg_addr[SFmode].reload_vsx_gpr    = 
CODE_FOR_reload_vsx_from_gprsf;
@@ -3109,6 +3145,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
          reg_addr[V2DImode].reload_load   = CODE_FOR_reload_v2di_si_load;
          reg_addr[V1TImode].reload_store  = CODE_FOR_reload_v1ti_si_store;
          reg_addr[V1TImode].reload_load   = CODE_FOR_reload_v1ti_si_load;
+         reg_addr[V8HFmode].reload_store  = CODE_FOR_reload_v8hf_si_store;
+         reg_addr[V8HFmode].reload_load   = CODE_FOR_reload_v8hf_si_load;
          reg_addr[V4SFmode].reload_store  = CODE_FOR_reload_v4sf_si_store;
          reg_addr[V4SFmode].reload_load   = CODE_FOR_reload_v4sf_si_load;
          reg_addr[V2DFmode].reload_store  = CODE_FOR_reload_v2df_si_store;
@@ -3132,6 +3170,12 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
              reg_addr[TFmode].reload_load  = CODE_FOR_reload_tf_si_load;
            }
 
+         if (TARGET_IEEE16)
+           {
+             reg_addr[HFmode].reload_store = CODE_FOR_reload_hf_si_store;
+             reg_addr[HFmode].reload_load  = CODE_FOR_reload_hf_si_load;
+           }
+
          /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
             available.  */
          if (TARGET_NO_SDMODE_STACK)
@@ -12662,6 +12706,9 @@ rs6000_secondary_reload_simple_move (enum 
rs6000_reg_type to_type,
       && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
          || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
     {
+      if (TARGET_IEEE16 && mode == HFmode)
+       return true;
+
       if (TARGET_POWERPC64)
        {
          /* ISA 2.07: MTVSRD or MVFVSRD.  */
@@ -12679,7 +12726,8 @@ rs6000_secondary_reload_simple_move (enum 
rs6000_reg_type to_type,
          if (mode == SImode)
            return true;
 
-         if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
+         if (TARGET_P9_VECTOR
+             && (mode == HImode || mode == QImode || mode == HFmode))
            return true;
        }
 
@@ -13449,6 +13497,11 @@ rs6000_preferred_reload_class (rtx x, enum reg_class 
rclass)
          || mode_supports_dq_form (mode))
        return rclass;
 
+      /* IEEE 16-bit don't support offset addressing, but they can go in any
+        floating point/vector register.  */
+      if (mode == HFmode && TARGET_IEEE16)
+       return rclass;
+
       /* If this is a scalar floating point value and we don't have D-form
         addressing, prefer the traditional floating point registers so that we
         can use D-form (register+offset) addressing.  */
@@ -13678,6 +13731,9 @@ rs6000_can_change_mode_class (machine_mode from,
   unsigned from_size = GET_MODE_SIZE (from);
   unsigned to_size = GET_MODE_SIZE (to);
 
+  if (from == HFmode || to == HFmode)
+    return from_size == to_size;
+
   if (from_size != to_size)
     {
       enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
@@ -24019,6 +24075,8 @@ rs6000_function_value (const_tree valtype,
   if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
     /* _Decimal128 must use an even/odd register pair.  */
     regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
+  else if (mode == HFmode && TARGET_IEEE16_GPR_ARGS)
+    regno = GP_ARG_RETURN;
   else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
           && !FLOAT128_VECTOR_P (mode))
     regno = FP_ARG_RETURN;
@@ -24291,6 +24349,8 @@ rs6000_scalar_mode_supported_p (scalar_mode mode)
     return default_decimal_float_supported_p ();
   else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
     return true;
+  else if (mode == HFmode)
+    return TARGET_IEEE16;
   else
     return default_scalar_mode_supported_p (mode);
 }
@@ -24342,6 +24402,9 @@ rs6000_floatn_mode (int n, bool extended)
     {
       switch (n)
        {
+       case 16:
+         return TARGET_IEEE16 ? SFmode : opt_scalar_float_mode ();
+
        case 32:
          return DFmode;
 
@@ -24363,6 +24426,9 @@ rs6000_floatn_mode (int n, bool extended)
     {
       switch (n)
        {
+       case 16:
+         return TARGET_IEEE16 ? HFmode : opt_scalar_float_mode ();
+
        case 32:
          return SFmode;
 
@@ -28903,24 +28969,43 @@ constant_fp_to_128bit_vector (rtx op,
   const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
   long real_words[VECTOR_128BIT_WORDS];
 
-  /* Make sure we don't overflow the real_words array and that it is
-     filled completely.  */
-  gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
+  /* For IEEE 16-bit, the constant doesn't fill the whole 32-bit word, so
+     deal with it here.  */
+  if (mode == HFmode)
+    {
+      real_to_target (real_words, rtype, mode);
+      unsigned char hi = (unsigned char) (real_words[0] >> 8);
+      unsigned char lo = (unsigned char) real_words[0];
+
+      if (!BYTES_BIG_ENDIAN)
+       std::swap (hi, lo);
 
-  real_to_target (real_words, rtype, mode);
+      info->bytes[0] = hi;
+      info->bytes[1] = lo;
+    }
 
-  /* Iterate over each 32-bit word in the floating point constant.  The
-     real_to_target function puts out words in target endian fashion.  We need
-     to arrange the order so that the bytes are written in big endian order.  
*/
-  for (unsigned num = 0; num < num_words; num++)
+  else
     {
-      unsigned endian_num = (BYTES_BIG_ENDIAN
-                            ? num
-                            : num_words - 1 - num);
+      /* Make sure we don't overflow the real_words array and that it is filled
+        completely.  */
+      gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
+
+      real_to_target (real_words, rtype, mode);
+
+      /* Iterate over each 32-bit word in the floating point constant.  The
+        real_to_target function puts out words in target endian fashion.  We
+        need to arrange the order so that the bytes are written in big endian
+        order.  */
+      for (unsigned num = 0; num < num_words; num++)
+       {
+         unsigned endian_num = (BYTES_BIG_ENDIAN
+                                ? num
+                                : num_words - 1 - num);
 
-      unsigned uvalue = real_words[endian_num];
-      for (int shift = 32 - 8; shift >= 0; shift -= 8)
-       info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
+         unsigned uvalue = real_words[endian_num];
+         for (int shift = 32 - 8; shift >= 0; shift -= 8)
+           info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
+       }
     }
 
   /* Mark that this constant involves floating point.  */
@@ -28959,6 +29044,7 @@ vec_const_128bit_to_bytes (rtx op,
     return false;
 
   /* Set up the bits.  */
+  info->mode = mode;
   switch (GET_CODE (op))
     {
       /* Integer constants, default to double word.  */
@@ -29186,6 +29272,10 @@ constant_generates_xxspltiw (vec_const_128bit_type 
*vsx_const)
   if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
     return 0;
 
+  /* HFmode constants can always use XXSPLTIW.  */
+  if (vsx_const->mode == HFmode)
+    return 1;
+
   if (!vsx_const->all_words_same)
     return 0;
 
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index cffe2750ba9a..31c1d8f613a6 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -567,6 +567,9 @@ extern int rs6000_vector_align[];
    below.  */
 #define RS6000_FN_TARGET_INFO_HTM 1
 
+/* Support for IEEE 16-bit floating point.  */
+#define TARGET_IEEE16          TARGET_P9_VECTOR
+
 /* Whether the various reciprocal divide/square root estimate instructions
    exist, and whether we should automatically generate code for the instruction
    by default.  */
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 04a6c0f7461d..7ba2088884d5 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -552,6 +552,7 @@
 (define_mode_iterator FMOVE128_GPR [TI
                                    V16QI
                                    V8HI
+                                   V8HF
                                    V4SI
                                    V4SF
                                    V2DI
@@ -837,8 +838,8 @@
 
 ;; Reload iterator for creating the function to allocate a base register to
 ;; supplement addressing modes.
-(define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
-                             SF SD SI DF DD DI TI PTI KF IF TF
+(define_mode_iterator RELOAD [V16QI V8HI V8HF V4SI V2DI V4SF V2DF V1TI
+                             SF SD SI DF DD DI TI PTI KF IF TF HF
                              OO XO])
 
 ;; Iterate over smin, smax
@@ -8145,6 +8146,61 @@
                 p9v,       p9v,       p9v,       p9v,       p9v,       p9v,
                 p9v,       *,         *,         *")])
 
+
+(define_expand "movhf"
+  [(set (match_operand:HF 0 "nonimmediate_operand")
+       (match_operand:HF 1 "any_operand"))]
+  "TARGET_IEEE16"
+{
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (HFmode, operands[1]);
+})
+
+;; On power10, we can load up HFmode constants with xxspltiw or pli.
+(define_insn "*movhf_xxspltiw"
+  [(set (match_operand:HF 0 "gpc_reg_operand" "=wa,r")
+       (match_operand:HF 1 "ieee16_xxspltiw_constant" "eP,eP"))]
+  "TARGET_IEEE16 && TARGET_POWER10 && TARGET_PREFIXED"
+{
+  rtx op1 = operands[1];
+  const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op1);
+  long real_words[VECTOR_128BIT_WORDS];
+
+  real_to_target (real_words, rtype, HFmode);
+  operands[2] = GEN_INT (real_words[0]);
+  return (vsx_register_operand (operands[0], HFmode)
+         ? "xxspltiw %x0,%2"
+         : "li %0,%2");
+}
+  [(set_attr "type" "vecperm,*")
+   (set_attr "prefixed" "yes")])
+
+(define_insn "*movhf_internal"
+  [(set (match_operand:HF 0 "nonimmediate_operand"
+                     "=wa,       wa,       Z,         r,          r,
+                      m,         r,        wa,        wa,         r")
+
+       (match_operand:HF 1 "any_operand"
+                     "wa,        Z,        wa,        r,          m,
+                      r,         wa,       r,         j,          j"))]
+  "TARGET_IEEE16
+   && (gpc_reg_operand (operands[0], HFmode)
+       || gpc_reg_operand (operands[1], HFmode))"
+  "@
+   xxlor %x0,%x1,%x1
+   lxsiwzx %x0,%y1
+   stxsiwx %x1,%y0
+   mr %0,%1
+   lwz%U1%X1 %0,%1
+   stw%U0%X0 %1,%0
+   mfvsrwz %0,%x1
+   mtvsrwz %x0,%1
+   xxspltib %x0,0
+   li %0,0"
+  [(set_attr "type" "vecsimple, fpload,    fpstore,   *,         load,
+                       store,     mtvsr,     mfvsr,     vecsimple, *")])
+
+
 
 ;; Here is how to move condition codes around.  When we store CC data in
 ;; an integer register or memory, we store just the high-order 4 bits.
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 39ae7791c60a..5f81d3426a2c 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -642,6 +642,10 @@ mieee128-constant
 Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
 Generate (do not generate) code that uses the LXVKQ instruction.
 
+mieee16-gpr-args
+Target Undocumented Var(TARGET_IEEE16_GPR_ARGS) Init(1) Save
+Pass _Float16 in GPR registers.
+
 ; Documented parameters
 
 -param=rs6000-vect-unroll-limit=
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index f5797387ca79..ed427ea05e9b 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -50,11 +50,29 @@
 (define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF])
 
 ;; Vector logical modes
-(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI KF TF])
+(define_mode_iterator VEC_L [V16QI
+                            V8HI
+                            V8HF
+                            V4SI
+                            V2DI
+                            V4SF
+                            V2DF
+                            V1TI
+                            TI
+                            KF
+                            TF])
 
 ;; Vector modes for moves.  Don't do TImode or TFmode here, since their
 ;; moves are handled elsewhere.
-(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI KF])
+(define_mode_iterator VEC_M [V16QI
+                            V8HI
+                            V4SI
+                            V2DI
+                            V8HF
+                            V4SF
+                            V2DF
+                            V1TI
+                            KF])
 
 ;; Vector modes for types that don't need a realignment under VSX
 (define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF V1TI KF TF])
@@ -63,7 +81,13 @@
 (define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF V1TI])
 
 ;; Vector init/extract modes
-(define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF])
+(define_mode_iterator VEC_E [V16QI
+                            V8HI
+                            V4SI
+                            V2DI
+                            V8HF
+                            V4SF
+                            V2DF])
 
 ;; Vector modes for 64-bit base types
 (define_mode_iterator VEC_64 [V2DI V2DF])
@@ -76,6 +100,7 @@
                            (V8HI  "HI")
                            (V4SI  "SI")
                            (V2DI  "DI")
+                           (V8HF  "HF")
                            (V4SF  "SF")
                            (V2DF  "DF")
                            (V1TI  "TI")
@@ -86,6 +111,7 @@
                              (V8HI  "hi")
                              (V4SI  "si")
                              (V2DI  "di")
+                             (V8HF  "hf")
                              (V4SF  "sf")
                              (V2DF  "df")
                              (V1TI  "ti")
@@ -1191,6 +1217,21 @@
   DONE;
 })
 
+(define_expand "vec_pack_trunc_v4sf"
+  [(match_operand:V8HF 0 "vfloat_operand")
+   (match_operand:V4SF 1 "vfloat_operand")
+   (match_operand:V4SF 2 "vfloat_operand")]
+  "TARGET_IEEE16"
+{
+  rtx r1 = gen_reg_rtx (V8HFmode);
+  rtx r2 = gen_reg_rtx (V8HFmode);
+
+  emit_insn (gen_vsx_xvcvsphp_v8hf (r1, operands[1]));
+  emit_insn (gen_vsx_xvcvsphp_v8hf (r2, operands[2]));
+  rs6000_expand_extract_even (operands[0], r1, r2);
+  DONE;
+})
+
 ;; Convert single word types to double word
 (define_expand "vec_unpacks_hi_v4sf"
   [(match_operand:V2DF 0 "vfloat_operand")
@@ -1264,6 +1305,18 @@
   DONE;
 })
 
+(define_expand "vec_unpacks_hi_v8hf"
+  [(match_operand:V4SF 0 "vfloat_operand")
+   (match_operand:V8HF 1 "vfloat_operand")]
+  "TARGET_IEEE16"
+{
+  rtx reg = gen_reg_rtx (V8HFmode);
+
+  rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
+  emit_insn (gen_vsx_xvcvhpsp (operands[0], reg));
+  DONE;
+})
+
 
 ;; Align vector loads with a permute.
 (define_expand "vec_realign_load_<mode>"
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index dd3573b80868..218351447349 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -46,9 +46,14 @@
 ;; Iterator for vector floating point types supported by VSX
 (define_mode_iterator VSX_F [V4SF V2DF])
 
+;; Iterator for 8 element vectors
+(define_mode_iterator V8HI_V8HF [V8HI
+                                (V8HF "TARGET_IEEE16")])
+
 ;; Iterator for logical types supported by VSX
 (define_mode_iterator VSX_L [V16QI
                             V8HI
+                            (V8HF      "TARGET_IEEE16")
                             V4SI
                             V2DI
                             V4SF
@@ -61,6 +66,7 @@
 ;; Iterator for memory moves.
 (define_mode_iterator VSX_M [V16QI
                             V8HI
+                            (V8HF      "TARGET_IEEE16")
                             V4SI
                             V2DI
                             V4SF
@@ -71,6 +77,7 @@
                             TI])
 
 (define_mode_attr VSX_XXBR  [(V8HI  "h")
+                            (V8HF  "h")
                             (V4SI  "w")
                             (V4SF  "w")
                             (V2DF  "d")
@@ -80,6 +87,7 @@
 ;; Map into the appropriate load/store name based on the type
 (define_mode_attr VSm  [(V16QI "vw4")
                        (V8HI  "vw4")
+                       (V8HF  "vw4")
                        (V4SI  "vw4")
                        (V4SF  "vw4")
                        (V2DF  "vd2")
@@ -93,6 +101,7 @@
 ;; Map the register class used
 (define_mode_attr VSr  [(V16QI "v")
                         (V8HI  "v")
+                        (V8HF  "v")
                         (V4SI  "v")
                         (V4SF  "wa")
                         (V2DI  "wa")
@@ -108,6 +117,7 @@
 ;; What value we need in the "isa" field, to make the IEEE QP float work.
 (define_mode_attr VSisa        [(V16QI "*")
                         (V8HI  "*")
+                        (V8HF  "p9v")
                         (V4SI  "*")
                         (V4SF  "*")
                         (V2DI  "*")
@@ -124,6 +134,7 @@
 ;; integer modes.
 (define_mode_attr ??r  [(V16QI "??r")
                         (V8HI  "??r")
+                        (V8HF  "??r")
                         (V4SI  "??r")
                         (V4SF  "??r")
                         (V2DI  "??r")
@@ -136,6 +147,7 @@
 ;; A mode attribute used for 128-bit constant values.
 (define_mode_attr nW   [(V16QI "W")
                         (V8HI  "W")
+                        (V8HF  "W")
                         (V4SI  "W")
                         (V4SF  "W")
                         (V2DI  "W")
@@ -163,6 +175,7 @@
 ;; operation
 (define_mode_attr VSv  [(V16QI "v")
                         (V8HI  "v")
+                        (V8HF  "v")
                         (V4SI  "v")
                         (V4SF  "v")
                         (V2DI  "v")
@@ -396,6 +409,7 @@
 ;; Like VM2 in altivec.md, just do char, short, int, long, float and double
 (define_mode_iterator VM3 [V4SI
                           V8HI
+                          V8HF
                           V16QI
                           V4SF
                           V2DF
@@ -407,6 +421,7 @@
 (define_mode_attr VM3_char [(V2DI "d")
                           (V4SI "w")
                           (V8HI "h")
+                          (V8HF "h")
                           (V16QI "b")
                           (V2DF  "d")
                           (V4SF  "w")])
@@ -541,21 +556,21 @@
   [(set_attr "type" "vecload")
    (set_attr "length" "8")])
 
-(define_insn_and_split "*vsx_le_perm_load_v8hi"
-  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
-        (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
+(define_insn_and_split "*vsx_le_perm_load_<mode>"
+  [(set (match_operand:V8HI_V8HF 0 "vsx_register_operand" "=wa")
+        (match_operand:V8HI_V8HF 1 "indexed_or_indirect_operand" "Z"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
   "&& 1"
   [(set (match_dup 2)
-        (vec_select:V8HI
+        (vec_select:V8HI_V8HF
           (match_dup 1)
           (parallel [(const_int 4) (const_int 5)
                      (const_int 6) (const_int 7)
                      (const_int 0) (const_int 1)
                      (const_int 2) (const_int 3)])))
    (set (match_dup 0)
-        (vec_select:V8HI
+        (vec_select:V8HI_V8HF
           (match_dup 2)
           (parallel [(const_int 4) (const_int 5)
                      (const_int 6) (const_int 7)
@@ -802,27 +817,27 @@
                     (const_int 0) (const_int 1)])))]
   "")
 
-(define_insn "*vsx_le_perm_store_v8hi"
-  [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
-        (match_operand:V8HI 1 "vsx_register_operand" "wa"))]
+(define_insn "*vsx_le_perm_store_<mode>"
+  [(set (match_operand:V8HI_V8HF 0 "indexed_or_indirect_operand" "=Z")
+        (match_operand:V8HI_V8HF 1 "vsx_register_operand" "wa"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
   [(set_attr "type" "vecstore")
    (set_attr "length" "12")])
 
 (define_split
-  [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
-        (match_operand:V8HI 1 "vsx_register_operand"))]
+  [(set (match_operand:V8HI_V8HF 0 "indexed_or_indirect_operand")
+        (match_operand:V8HI_V8HF 1 "vsx_register_operand"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
   [(set (match_dup 2)
-        (vec_select:V8HI
+        (vec_select:V8HI_V8HF
           (match_dup 1)
           (parallel [(const_int 4) (const_int 5)
                      (const_int 6) (const_int 7)
                      (const_int 0) (const_int 1)
                      (const_int 2) (const_int 3)])))
    (set (match_dup 0)
-        (vec_select:V8HI
+        (vec_select:V8HI_V8HF
           (match_dup 2)
           (parallel [(const_int 4) (const_int 5)
                      (const_int 6) (const_int 7)
@@ -861,25 +876,25 @@
 ;; The post-reload split requires that we re-permute the source
 ;; register in case it is still live.
 (define_split
-  [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
-        (match_operand:V8HI 1 "vsx_register_operand"))]
+  [(set (match_operand:V8HI_V8HF 0 "indexed_or_indirect_operand")
+        (match_operand:V8HI_V8HF 1 "vsx_register_operand"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
   [(set (match_dup 1)
-        (vec_select:V8HI
+        (vec_select:V8HI_V8HF
           (match_dup 1)
           (parallel [(const_int 4) (const_int 5)
                      (const_int 6) (const_int 7)
                      (const_int 0) (const_int 1)
                      (const_int 2) (const_int 3)])))
    (set (match_dup 0)
-        (vec_select:V8HI
+        (vec_select:V8HI_V8HF
           (match_dup 1)
           (parallel [(const_int 4) (const_int 5)
                      (const_int 6) (const_int 7)
                      (const_int 0) (const_int 1)
                      (const_int 2) (const_int 3)])))
    (set (match_dup 1)
-        (vec_select:V8HI
+        (vec_select:V8HI_V8HF
           (match_dup 1)
           (parallel [(const_int 4) (const_int 5)
                      (const_int 6) (const_int 7)
@@ -1434,15 +1449,15 @@
   "lxvw4x %x0,%y1"
   [(set_attr "type" "vecload")])
 
-(define_expand "vsx_ld_elemrev_v8hi"
-  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
-        (vec_select:V8HI
-         (match_operand:V8HI 1 "memory_operand" "Z")
+(define_expand "vsx_ld_elemrev_<mode>"
+  [(set (match_operand:V8HI_V8HF 0 "vsx_register_operand" "=wa")
+        (vec_select:V8HI_V8HF
+         (match_operand:V8HI_V8HF 1 "memory_operand" "Z")
          (parallel [(const_int 7) (const_int 6)
                     (const_int 5) (const_int 4)
                     (const_int 3) (const_int 2)
                     (const_int 1) (const_int 0)])))]
-  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
+  "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN"
 {
   if (!TARGET_P9_VECTOR)
     {
@@ -1452,9 +1467,9 @@
       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
       int i;
 
-      subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
+      subreg = simplify_gen_subreg (V4SImode, operands[1], <MODE>mode, 0);
       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
-      subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
+      subreg2 = simplify_gen_subreg (<MODE>mode, tmp, V4SImode, 0);
 
       for (i = 0; i < 16; ++i)
        perm[i] = GEN_INT (reorder[i]);
@@ -1462,21 +1477,21 @@
       pcv = force_reg (V16QImode,
                        gen_rtx_CONST_VECTOR (V16QImode,
                                              gen_rtvec_v (16, perm)));
-      emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
-                                                subreg2, pcv));
+      emit_insn (gen_altivec_vperm_<mode>_direct (operands[0], subreg2,
+                                                  subreg2, pcv));
       DONE;
     }
 })
 
-(define_insn "*vsx_ld_elemrev_v8hi_internal"
-  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
-        (vec_select:V8HI
-          (match_operand:V8HI 1 "memory_operand" "Z")
+(define_insn "*vsx_ld_elemrev_<mode>_internal"
+  [(set (match_operand:V8HI_V8HF 0 "vsx_register_operand" "=wa")
+        (vec_select:V8HI_V8HF
+          (match_operand:V8HI_V8HF 1 "memory_operand" "Z")
           (parallel [(const_int 7) (const_int 6)
                      (const_int 5) (const_int 4)
                      (const_int 3) (const_int 2)
                      (const_int 1) (const_int 0)])))]
-  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
+  "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
   "lxvh8x %x0,%y1"
   [(set_attr "type" "vecload")])
 
@@ -1584,20 +1599,20 @@
   "stxvw4x %x1,%y0"
   [(set_attr "type" "vecstore")])
 
-(define_expand "vsx_st_elemrev_v8hi"
-  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
-        (vec_select:V8HI
-          (match_operand:V8HI 1 "vsx_register_operand" "wa")
+(define_expand "vsx_st_elemrev_<mode>"
+  [(set (match_operand:V8HI_V8HF 0 "memory_operand" "=Z")
+        (vec_select:V8HI_V8HF
+          (match_operand:V8HI_V8HF 1 "vsx_register_operand" "wa")
           (parallel [(const_int 7) (const_int 6)
                      (const_int 5) (const_int 4)
                      (const_int 3) (const_int 2)
                      (const_int 1) (const_int 0)])))]
-  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
+  "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN"
 {
   if (!TARGET_P9_VECTOR)
     {
       rtx mem_subreg, subreg, perm[16], pcv;
-      rtx tmp = gen_reg_rtx (V8HImode);
+      rtx tmp = gen_reg_rtx (<MODE>mode);
       /* 2 is leftmost element in register */
       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
       int i;
@@ -1608,10 +1623,10 @@
       pcv = force_reg (V16QImode,
                        gen_rtx_CONST_VECTOR (V16QImode,
                                              gen_rtvec_v (16, perm)));
-      emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
-                                                operands[1], pcv));
-      subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
-      mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
+      emit_insn (gen_altivec_vperm_<mode>_direct (tmp, operands[1],
+                                                  operands[1], pcv));
+      subreg = simplify_gen_subreg (V4SImode, tmp, <MODE>mode, 0);
+      mem_subreg = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
       DONE;
     }
@@ -1626,15 +1641,15 @@
   "stxvd2x %x1,%y0"
   [(set_attr "type" "vecstore")])
 
-(define_insn "*vsx_st_elemrev_v8hi_internal"
-  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
-        (vec_select:V8HI
-          (match_operand:V8HI 1 "vsx_register_operand" "wa")
+(define_insn "*vsx_st_elemrev_<mode>_internal"
+  [(set (match_operand:V8HI_V8HF 0 "memory_operand" "=Z")
+        (vec_select:V8HI_V8HF
+          (match_operand:V8HI_V8HF 1 "vsx_register_operand" "wa")
           (parallel [(const_int 7) (const_int 6)
                      (const_int 5) (const_int 4)
                      (const_int 3) (const_int 2)
                      (const_int 1) (const_int 0)])))]
-  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
+  "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
   "stxvh8x %x1,%y0"
   [(set_attr "type" "vecstore")])
 
@@ -2420,6 +2435,7 @@
   [(set_attr "type" "fp")])
 
 ;; Generate xvcvhpsp instruction
+;; Used for the built-in function
 (define_insn "vsx_xvcvhpsp"
   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
        (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
@@ -2428,7 +2444,17 @@
   "xvcvhpsp %x0,%x1"
   [(set_attr "type" "vecfloat")])
 
+;; Used for conversion to/from _Float16
+(define_insn "vsx_xvcvhpsp_v8hf"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+       (unspec:V4SF [(match_operand:V8HF 1 "vsx_register_operand" "wa")]
+                    UNSPEC_VSX_CVHPSP))]
+  "TARGET_P9_VECTOR"
+  "xvcvhpsp %x0,%x1"
+  [(set_attr "type" "vecfloat")])
+
 ;; Generate xvcvsphp
+;; Used for the built-in function
 (define_insn "vsx_xvcvsphp"
   [(set (match_operand:V4SI 0 "register_operand" "=wa")
        (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
@@ -2437,6 +2463,15 @@
   "xvcvsphp %x0,%x1"
 [(set_attr "type" "vecfloat")])
 
+;; Used for conversion to/from _Float16
+(define_insn "vsx_xvcvsphp_v8hf"
+  [(set (match_operand:V8HF 0 "register_operand" "=wa")
+       (unspec:V8HF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
+                    UNSPEC_VSX_XVCVSPHP))]
+  "TARGET_P9_VECTOR"
+  "xvcvsphp %x0,%x1"
+[(set_attr "type" "vecfloat")])
+
 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
 ;; format of scalars is actually DF.
 (define_insn "vsx_xscvdpsp_scalar"
@@ -3120,6 +3155,25 @@
   "xvrdpiz %x0,%x1"
   [(set_attr "type" "vecdouble")])
 
+
+;; Convert IEEE 16-bit floating point to/from SF and DF modes.
+
+(define_insn "extendhf<mode>2"
+  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
+       (float_extend:SFDF
+        (match_operand:HF 1 "vsx_register_operand" "wa")))]
+  "TARGET_IEEE16"
+  "xscvhpdp %x0,%x1"
+  [(set_attr "type" "fpsimple")])
+
+(define_insn "trunc<mode>hf2"
+  [(set (match_operand:HF 0 "vsx_register_operand" "=wa")
+       (float_truncate:HF
+        (match_operand:SFDF 1 "vsx_register_operand" "wa")))]
+  "TARGET_IEEE16"
+  "xscvdphp %x0,%1"
+  [(set_attr "type" "fpsimple")])
+
 
 ;; Permute operations
 
@@ -3299,10 +3353,10 @@
   "xxpermdi %x0,%x1,%x1,2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "xxswapd_v8hi"
-  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
-       (vec_select:V8HI
-         (match_operand:V8HI 1 "vsx_register_operand" "wa")
+(define_insn "xxswapd_<mode>"
+  [(set (match_operand:V8HI_V8HF 0 "vsx_register_operand" "=wa")
+       (vec_select:V8HI_V8HF
+         (match_operand:V8HI_V8HF 1 "vsx_register_operand" "wa")
          (parallel [(const_int 4) (const_int 5)
                     (const_int 6) (const_int 7)
                     (const_int 0) (const_int 1)
@@ -3402,15 +3456,15 @@
   "lxvd2x %x0,%y1"
   [(set_attr "type" "vecload")])
 
-(define_insn "*vsx_lxvd2x8_le_V8HI"
-  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
-        (vec_select:V8HI
-          (match_operand:V8HI 1 "memory_operand" "Z")
+(define_insn "*vsx_lxvd2x8_le_<MODE>"
+  [(set (match_operand:V8HI_V8HF 0 "vsx_register_operand" "=wa")
+        (vec_select:V8HI_V8HF
+          (match_operand:V8HI_V8HF 1 "memory_operand" "Z")
           (parallel [(const_int 4) (const_int 5)
                      (const_int 6) (const_int 7)
                      (const_int 0) (const_int 1)
                      (const_int 2) (const_int 3)])))]
-  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
   "lxvd2x %x0,%y1"
   [(set_attr "type" "vecload")])
 
@@ -3478,15 +3532,15 @@
   [(set_attr "type" "vecstore")
    (set_attr "length" "8")])
 
-(define_insn "*vsx_stxvd2x8_le_V8HI"
-  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
-        (vec_select:V8HI
-          (match_operand:V8HI 1 "vsx_register_operand" "wa")
+(define_insn "*vsx_stxvd2x8_le_<MODE>"
+  [(set (match_operand:V8HI_V8HF 0 "memory_operand" "=Z")
+        (vec_select:V8HI_V8HF
+          (match_operand:V8HI_V8HF 1 "vsx_register_operand" "wa")
           (parallel [(const_int 4) (const_int 5)
                      (const_int 6) (const_int 7)
                      (const_int 0) (const_int 1)
                      (const_int 2) (const_int 3)])))]
-  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
   "stxvd2x %x1,%y0"
   [(set_attr "type" "vecstore")])
 
@@ -4060,7 +4114,7 @@
   if (which_alternative == 0
       && ((<MODE>mode == V16QImode
           && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 7 : 8))
-         || (<MODE>mode == V8HImode
+         || ((<MODE>mode == V8HImode || <MODE>mode == V8HFmode)
              && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 3 : 4))))
     {
       enum machine_mode dest_mode = GET_MODE (operands[0]);
@@ -4139,7 +4193,7 @@
       else
        vec_tmp = src;
     }
-  else if (<MODE>mode == V8HImode)
+  else if (<MODE>mode == V8HImode || <MODE>mode == V8HFmode)
     {
       if (value != 3)
        emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));

[gcc(refs/users/meissner/heads/work221-float)] Add initial _Float16 support.

Reply via email to