https://gcc.gnu.org/g:261ee5c479248aecc4a5bf6771f272eebf531f2f
commit 261ee5c479248aecc4a5bf6771f272eebf531f2f Author: Michael Meissner <meiss...@linux.ibm.com> Date: Mon Sep 8 14:46:21 2025 -0400 Add initial _Float16 support. 2025-09-08 Michael Meissner <meiss...@linux.ibm.com> gcc/ * config/rs6000/altivec.md (VM): Add V8HF. (VM2): Likewise. (VI_char): Likewise. (VI_scalar): Likewise. (VI_unit): Likewise. (VP_small): Likewise. (VP_small_lc): Likewise. (VU_char): Likewise. * config/rs6000/predicate.md (easy_fp_constant): Power10 can load _Float16 constants directly. (ieee16_xxspltiw_constant): New predicate. * config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add _Float16 support. * config/rs6000/rs6000-call.cc (USE_FP_FOR_ARG_P): Likewise. * config/rs6000/rs6000-modes.def (HFmode): Likewise. * config/rs6000/rs6000-p8swap.cc (rs6000_gen_stvx): Drop V8HFmode support since V8HFmode doesn't exist on power8 or earlier. (rs6000_gen_lvx): Likewise. (replace_swapped_load_constant): Likewise. * config/rs6000/rs6000-protos.h (vec_const_128bit_type): Add mode field. * config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): Add _Float16 support. (rs6000_modes_tieable_p): Likewise. (rs6000_debug_reg_global): Likewise. (rs6000_setup_reg_addr_masks): Likewise. (rs6000_init_hard_regno_mode_ok): Likewise. (rs6000_secondary_reload_simple_move): Likewise. (rs6000_preferred_reload_class): Likewise. (rs6000_can_change_mode_class): Likewise. (rs6000_function_value): Likewise. (rs6000_scalar_mode_supported_p): Likewise. (rs6000_floatn_mode): Likewise. (constant_fp_to_128bit_vector): Likewise. (vec_const_128bit_to_bytes): Likewise. (constant_generates_xxspltiw): Likewise. * config/rs6000/rs6000.h (TARGET_IEEE16): New macro. * config/rs6000/rs6000.md (FMOVE128_GPR): Add support for V8HFmode. (RELOAD): Likewise. (movhf): Likewise. (movhf_xxspltiw): Likewise. (movhf_internal): Likewise. * config/rs6000/rs6000.opt (-mieee16-gpr-args): New debug switch. * config/rs6000/vector.md (VEC_L): Add V8HFmode. (VEC_M): Likewise. (VEC_E): Likewise. (VEC_base): Likewise. (VEC_base_l): Likewise. (vec_pack_trunc_v4s): Likewise. (vec_unpacks_hi_v8h): Likewise. * config/rs600/vsx.md (V8HI_V8HF): New mode iterator. (VSX_L): Add V8HFmode. (VSX_XXBR): Likewise. (VSm): Likewise. (VSr): Likewise. (VSisa): Likewise. (??r): Likewise. (VSc): Likewise. (VM3): Likewise. (VM3_char): Likewise. (vsx_le_perm_load_<mode>): Likewise. (vsx_le_perm_store_<mode>): Likewise. (permute splits): Likewise. (vsx_ld_elemrev_<mode): Likewise. (vsx_st_elemrev_<mode>): Likewise. (vsx_st_elemrev_<mode>_internal): Likewise. (vsx_xvcvhpsp): Add comment. (vsx_xvcvhpsp_v8hf): New insn. (vsx_xvcvsphp): Add comment. (vsx_xvcvsphp_v8hf): New insn. (extendhf<mode>2): Likewise. (trunc<mode>hf2): Likewise. (xxswapd_<mode>): Add V8HFmode. (vsx_lxvd2x8_le_<MODE): Likewise. (vsx_stxvd2x8_le_<MODE): Likewise. (vsx_extract_<mode>_store_p9): Likewise. Diff: --- gcc/config/rs6000/altivec.md | 28 +++++- gcc/config/rs6000/predicates.md | 19 ++++ gcc/config/rs6000/rs6000-builtin.cc | 2 + gcc/config/rs6000/rs6000-call.cc | 10 +- gcc/config/rs6000/rs6000-modes.def | 3 + gcc/config/rs6000/rs6000-p8swap.cc | 14 +-- gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.cc | 130 ++++++++++++++++++++++---- gcc/config/rs6000/rs6000.h | 3 + gcc/config/rs6000/rs6000.md | 60 +++++++++++- gcc/config/rs6000/rs6000.opt | 4 + gcc/config/rs6000/vector.md | 59 +++++++++++- gcc/config/rs6000/vsx.md | 178 +++++++++++++++++++++++------------- 13 files changed, 402 insertions(+), 109 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 7edc288a6565..b6f92a71f963 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -191,6 +191,7 @@ ;; otherwise handled by altivec (v2df, v2di, ti) (define_mode_iterator VM [V4SI V8HI + V8HF V16QI V4SF V2DF @@ -203,6 +204,7 @@ ;; Like VM, except don't do TImode (define_mode_iterator VM2 [V4SI V8HI + V8HF V16QI V4SF V2DF @@ -222,18 +224,34 @@ V1TI TI]) -(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")]) -(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")]) +(define_mode_attr VI_char [(V2DI "d") + (V4SI "w") + (V8HI "h") + (V8HF "h") + (V16QI "b")]) +(define_mode_attr VI_scalar [(V2DI "DI") + (V4SI "SI") + (V8HI "HI") + (V8HF "HF") + (V16QI "QI")]) (define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)") (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)") + (V8HF "VECTOR_UNIT_ALTIVEC_P (V8HFmode)") (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)") (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")]) ;; Vector pack/unpack (define_mode_iterator VP [V2DI V4SI V8HI]) -(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")]) -(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")]) -(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")]) +(define_mode_attr VP_small [(V2DI "V4SI") + (V4SI "V8HI") + (V8HI "V16QI")]) +(define_mode_attr VP_small_lc [(V2DI "v4si") + (V4SI "v8hi") + (V8HI "v16qi")]) +(define_mode_attr VU_char [(V2DI "w") + (V4SI "h") + (V8HI "b") + (V8HF "b")]) ;; Vector negate (define_mode_iterator VNEG [V4SI V2DI]) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 647e89afb6a7..2a4b38838d20 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -601,6 +601,11 @@ if (TARGET_VSX && op == CONST0_RTX (mode)) return 1; + /* Power9 needs to load HFmode constants from memory, Power10 can use + XXSPLTIW. */ + if (mode == HFmode && !TARGET_POWER10) + return 0; + /* Constants that can be generated with ISA 3.1 instructions are easy. */ vec_const_128bit_type vsx_const; if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const)) @@ -2166,3 +2171,17 @@ (and (match_code "subreg") (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op))) == SUBREG_BYTE (op)"))) + +;; Return 1 if this is a HFmode constant that can be loaded with XXSPLTIW. +(define_predicate "ieee16_xxspltiw_constant" + (match_code "const_double") +{ + if (!TARGET_POWER10 || mode != HFmode) + return false; + + vec_const_128bit_type vsx_const; + if (!vec_const_128bit_to_bytes (op, mode, &vsx_const)) + return false; + + return constant_generates_xxspltiw (&vsx_const); +}) diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index bc1580f051b0..05a730a8fdca 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -491,6 +491,8 @@ const char *rs6000_type_string (tree type_node) return "voidc*"; else if (type_node == float128_type_node) return "_Float128"; + else if (type_node == float16_type_node) + return "_Float16"; else if (type_node == vector_pair_type_node) return "__vector_pair"; else if (type_node == vector_quad_type_node) diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc index 8fe5652442e3..3872d742d159 100644 --- a/gcc/config/rs6000/rs6000-call.cc +++ b/gcc/config/rs6000/rs6000-call.cc @@ -82,10 +82,12 @@ #endif /* Nonzero if we can use a floating-point register to pass this arg. */ -#define USE_FP_FOR_ARG_P(CUM,MODE) \ - (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \ - && (CUM)->fregno <= FP_ARG_MAX_REG \ - && TARGET_HARD_FLOAT) +#define USE_FP_FOR_ARG_P(CUM,MODE) \ + (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \ + && (CUM)->fregno <= FP_ARG_MAX_REG \ + && TARGET_HARD_FLOAT \ + && ((MODE) != HFmode || !TARGET_IEEE16_GPR_ARGS)) + /* Nonzero if we can use an AltiVec register to pass this arg. */ #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \ diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def index f89e4ef403c1..04dc1d8c9194 100644 --- a/gcc/config/rs6000/rs6000-modes.def +++ b/gcc/config/rs6000/rs6000-modes.def @@ -45,6 +45,9 @@ FLOAT_MODE (TF, 16, ieee_quad_format); /* IBM 128-bit floating point. */ FLOAT_MODE (IF, 16, ibm_extended_format); +/* Explicit IEEE 16-bit floating point. */ +FLOAT_MODE (HF, 2, ieee_half_format); + /* Add any extra modes needed to represent the condition code. For the RS/6000, we need separate modes when unsigned (logical) comparisons diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc index e92f01031270..4fb107c60a47 100644 --- a/gcc/config/rs6000/rs6000-p8swap.cc +++ b/gcc/config/rs6000/rs6000-p8swap.cc @@ -1598,10 +1598,6 @@ rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp); else if (mode == V8HImode) stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp); -#ifdef HAVE_V8HFmode - else if (mode == V8HFmode) - stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp); -#endif else if (mode == V4SImode) stvx = gen_altivec_stvx_v4si (src_exp, dest_exp); else if (mode == V4SFmode) @@ -1722,10 +1718,6 @@ rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp); else if (mode == V8HImode) lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp); -#ifdef HAVE_V8HFmode - else if (mode == V8HFmode) - lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp); -#endif else if (mode == V4SImode) lvx = gen_altivec_lvx_v4si (dest_exp, src_exp); else if (mode == V4SFmode) @@ -1930,11 +1922,7 @@ replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn) rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); new_mem = force_const_mem (mode, new_const_vector); } - else if ((mode == V8HImode) -#ifdef HAVE_V8HFmode - || (mode == V8HFmode) -#endif - ) + else if (mode == V8HImode) { rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8)); int i; diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 4619142d197b..9bf971370d41 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -250,6 +250,7 @@ typedef struct { bool all_words_same; /* Are the words all equal? */ bool all_half_words_same; /* Are the half words all equal? */ bool all_bytes_same; /* Are the bytes all equal? */ + machine_mode mode; /* Original constant mode. */ } vec_const_128bit_type; extern bool vec_const_128bit_to_bytes (rtx, machine_mode, diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 04e688d243e1..7a6b16d30866 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1899,7 +1899,8 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) if (ALTIVEC_REGNO_P (regno)) { - if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p) + if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p + && mode != HFmode) return 0; return ALTIVEC_REGNO_P (last_regno); @@ -1931,7 +1932,8 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) if (TARGET_POPCNTD && mode == SImode) return 1; - if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode)) + if (TARGET_P9_VECTOR + && (mode == QImode || mode == HImode || mode == HFmode)) return 1; } @@ -1989,7 +1991,8 @@ static bool rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2) { if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode - || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode) + || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode + || mode1 == HFmode || mode2 == HFmode) return mode1 == mode2; if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1)) @@ -2255,6 +2258,7 @@ rs6000_debug_reg_global (void) DImode, TImode, PTImode, + HFmode, SFmode, DFmode, TFmode, @@ -2275,6 +2279,7 @@ rs6000_debug_reg_global (void) V8SImode, V4DImode, V2TImode, + V8HFmode, V4SFmode, V2DFmode, V8SFmode, @@ -2633,8 +2638,13 @@ rs6000_setup_reg_addr_masks (void) /* SDmode is special in that we want to access it only via REG+REG addressing on power7 and above, since we want to use the LFIWZX and - STFIWZX instructions to load it. */ - bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK); + STFIWZX instructions to load it. + + Never allow offset addressing for HFmode, since it is expected that + 16-bit floating point should always go into the vector registers and + we only have indexed and indirect 16-bit loads to VSR registers. */ + bool indexed_only_p = ((m == SDmode && TARGET_NO_SDMODE_STACK) + || m == HFmode); any_addr_mask = 0; for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++) @@ -2683,6 +2693,7 @@ rs6000_setup_reg_addr_masks (void) && !complex_p && (m != E_DFmode || !TARGET_VSX) && (m != E_SFmode || !TARGET_P8_VECTOR) + && m != E_HFmode && !small_int_vsx_p) { addr_mask |= RELOAD_REG_PRE_INCDEC; @@ -2930,6 +2941,14 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_align[V1TImode] = 128; } + /* _Float16 support. */ + if (TARGET_IEEE16) + { + rs6000_vector_unit[V8HFmode] = VECTOR_VSX; + rs6000_vector_mem[V8HFmode] = VECTOR_VSX; + rs6000_vector_align[V8HFmode] = align64; + } + /* DFmode, see if we want to use the VSX unit. Memory is handled differently, so don't set rs6000_vector_mem. */ if (TARGET_VSX) @@ -2952,6 +2971,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_align[TImode] = align64; } + /* Allow HFmode in VSX register and set the VSX memory macros. */ + if (TARGET_IEEE16) + { + rs6000_vector_mem[HImode] = VECTOR_VSX; + rs6000_vector_align[HFmode] = 16; + } + /* Add support for vector pairs and vector quad registers. */ if (TARGET_MMA) { @@ -3011,6 +3037,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load; reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store; reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load; + reg_addr[V8HFmode].reload_store = CODE_FOR_reload_v8hf_di_store; + reg_addr[V8HFmode].reload_load = CODE_FOR_reload_v8hf_di_load; reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store; reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load; reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store; @@ -3040,6 +3068,12 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load; } + if (TARGET_IEEE16) + { + reg_addr[HFmode].reload_store = CODE_FOR_reload_hf_di_store; + reg_addr[HFmode].reload_load = CODE_FOR_reload_hf_di_load; + } + /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are available. */ if (TARGET_NO_SDMODE_STACK) @@ -3062,6 +3096,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di; reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf; reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si; + reg_addr[V8HFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hf; reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi; reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi; reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf; @@ -3072,6 +3107,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di; reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf; reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si; + reg_addr[V8HFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hf; reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi; reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi; reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf; @@ -3109,6 +3145,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load; reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store; reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load; + reg_addr[V8HFmode].reload_store = CODE_FOR_reload_v8hf_si_store; + reg_addr[V8HFmode].reload_load = CODE_FOR_reload_v8hf_si_load; reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store; reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load; reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store; @@ -3132,6 +3170,12 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load; } + if (TARGET_IEEE16) + { + reg_addr[HFmode].reload_store = CODE_FOR_reload_hf_si_store; + reg_addr[HFmode].reload_load = CODE_FOR_reload_hf_si_load; + } + /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are available. */ if (TARGET_NO_SDMODE_STACK) @@ -12662,6 +12706,9 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE))) { + if (TARGET_IEEE16 && mode == HFmode) + return true; + if (TARGET_POWERPC64) { /* ISA 2.07: MTVSRD or MVFVSRD. */ @@ -12679,7 +12726,8 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, if (mode == SImode) return true; - if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode)) + if (TARGET_P9_VECTOR + && (mode == HImode || mode == QImode || mode == HFmode)) return true; } @@ -13449,6 +13497,11 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass) || mode_supports_dq_form (mode)) return rclass; + /* IEEE 16-bit don't support offset addressing, but they can go in any + floating point/vector register. */ + if (mode == HFmode && TARGET_IEEE16) + return rclass; + /* If this is a scalar floating point value and we don't have D-form addressing, prefer the traditional floating point registers so that we can use D-form (register+offset) addressing. */ @@ -13678,6 +13731,9 @@ rs6000_can_change_mode_class (machine_mode from, unsigned from_size = GET_MODE_SIZE (from); unsigned to_size = GET_MODE_SIZE (to); + if (from == HFmode || to == HFmode) + return from_size == to_size; + if (from_size != to_size) { enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS; @@ -24019,6 +24075,8 @@ rs6000_function_value (const_tree valtype, if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT) /* _Decimal128 must use an even/odd register pair. */ regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; + else if (mode == HFmode && TARGET_IEEE16_GPR_ARGS) + regno = GP_ARG_RETURN; else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && !FLOAT128_VECTOR_P (mode)) regno = FP_ARG_RETURN; @@ -24291,6 +24349,8 @@ rs6000_scalar_mode_supported_p (scalar_mode mode) return default_decimal_float_supported_p (); else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode)) return true; + else if (mode == HFmode) + return TARGET_IEEE16; else return default_scalar_mode_supported_p (mode); } @@ -24342,6 +24402,9 @@ rs6000_floatn_mode (int n, bool extended) { switch (n) { + case 16: + return TARGET_IEEE16 ? SFmode : opt_scalar_float_mode (); + case 32: return DFmode; @@ -24363,6 +24426,9 @@ rs6000_floatn_mode (int n, bool extended) { switch (n) { + case 16: + return TARGET_IEEE16 ? HFmode : opt_scalar_float_mode (); + case 32: return SFmode; @@ -28903,24 +28969,43 @@ constant_fp_to_128bit_vector (rtx op, const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op); long real_words[VECTOR_128BIT_WORDS]; - /* Make sure we don't overflow the real_words array and that it is - filled completely. */ - gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0); + /* For IEEE 16-bit, the constant doesn't fill the whole 32-bit word, so + deal with it here. */ + if (mode == HFmode) + { + real_to_target (real_words, rtype, mode); + unsigned char hi = (unsigned char) (real_words[0] >> 8); + unsigned char lo = (unsigned char) real_words[0]; + + if (!BYTES_BIG_ENDIAN) + std::swap (hi, lo); - real_to_target (real_words, rtype, mode); + info->bytes[0] = hi; + info->bytes[1] = lo; + } - /* Iterate over each 32-bit word in the floating point constant. The - real_to_target function puts out words in target endian fashion. We need - to arrange the order so that the bytes are written in big endian order. */ - for (unsigned num = 0; num < num_words; num++) + else { - unsigned endian_num = (BYTES_BIG_ENDIAN - ? num - : num_words - 1 - num); + /* Make sure we don't overflow the real_words array and that it is filled + completely. */ + gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0); + + real_to_target (real_words, rtype, mode); + + /* Iterate over each 32-bit word in the floating point constant. The + real_to_target function puts out words in target endian fashion. We + need to arrange the order so that the bytes are written in big endian + order. */ + for (unsigned num = 0; num < num_words; num++) + { + unsigned endian_num = (BYTES_BIG_ENDIAN + ? num + : num_words - 1 - num); - unsigned uvalue = real_words[endian_num]; - for (int shift = 32 - 8; shift >= 0; shift -= 8) - info->bytes[byte_num++] = (uvalue >> shift) & 0xff; + unsigned uvalue = real_words[endian_num]; + for (int shift = 32 - 8; shift >= 0; shift -= 8) + info->bytes[byte_num++] = (uvalue >> shift) & 0xff; + } } /* Mark that this constant involves floating point. */ @@ -28959,6 +29044,7 @@ vec_const_128bit_to_bytes (rtx op, return false; /* Set up the bits. */ + info->mode = mode; switch (GET_CODE (op)) { /* Integer constants, default to double word. */ @@ -29186,6 +29272,10 @@ constant_generates_xxspltiw (vec_const_128bit_type *vsx_const) if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX) return 0; + /* HFmode constants can always use XXSPLTIW. */ + if (vsx_const->mode == HFmode) + return 1; + if (!vsx_const->all_words_same) return 0; diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index cffe2750ba9a..31c1d8f613a6 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -567,6 +567,9 @@ extern int rs6000_vector_align[]; below. */ #define RS6000_FN_TARGET_INFO_HTM 1 +/* Support for IEEE 16-bit floating point. */ +#define TARGET_IEEE16 TARGET_P9_VECTOR + /* Whether the various reciprocal divide/square root estimate instructions exist, and whether we should automatically generate code for the instruction by default. */ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 04a6c0f7461d..7ba2088884d5 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -552,6 +552,7 @@ (define_mode_iterator FMOVE128_GPR [TI V16QI V8HI + V8HF V4SI V4SF V2DI @@ -837,8 +838,8 @@ ;; Reload iterator for creating the function to allocate a base register to ;; supplement addressing modes. -(define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI - SF SD SI DF DD DI TI PTI KF IF TF +(define_mode_iterator RELOAD [V16QI V8HI V8HF V4SI V2DI V4SF V2DF V1TI + SF SD SI DF DD DI TI PTI KF IF TF HF OO XO]) ;; Iterate over smin, smax @@ -8145,6 +8146,61 @@ p9v, p9v, p9v, p9v, p9v, p9v, p9v, *, *, *")]) + +(define_expand "movhf" + [(set (match_operand:HF 0 "nonimmediate_operand") + (match_operand:HF 1 "any_operand"))] + "TARGET_IEEE16" +{ + if (MEM_P (operands[0]) && !REG_P (operands[1])) + operands[1] = force_reg (HFmode, operands[1]); +}) + +;; On power10, we can load up HFmode constants with xxspltiw or pli. +(define_insn "*movhf_xxspltiw" + [(set (match_operand:HF 0 "gpc_reg_operand" "=wa,r") + (match_operand:HF 1 "ieee16_xxspltiw_constant" "eP,eP"))] + "TARGET_IEEE16 && TARGET_POWER10 && TARGET_PREFIXED" +{ + rtx op1 = operands[1]; + const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op1); + long real_words[VECTOR_128BIT_WORDS]; + + real_to_target (real_words, rtype, HFmode); + operands[2] = GEN_INT (real_words[0]); + return (vsx_register_operand (operands[0], HFmode) + ? "xxspltiw %x0,%2" + : "li %0,%2"); +} + [(set_attr "type" "vecperm,*") + (set_attr "prefixed" "yes")]) + +(define_insn "*movhf_internal" + [(set (match_operand:HF 0 "nonimmediate_operand" + "=wa, wa, Z, r, r, + m, r, wa, wa, r") + + (match_operand:HF 1 "any_operand" + "wa, Z, wa, r, m, + r, wa, r, j, j"))] + "TARGET_IEEE16 + && (gpc_reg_operand (operands[0], HFmode) + || gpc_reg_operand (operands[1], HFmode))" + "@ + xxlor %x0,%x1,%x1 + lxsiwzx %x0,%y1 + stxsiwx %x1,%y0 + mr %0,%1 + lwz%U1%X1 %0,%1 + stw%U0%X0 %1,%0 + mfvsrwz %0,%x1 + mtvsrwz %x0,%1 + xxspltib %x0,0 + li %0,0" + [(set_attr "type" "vecsimple, fpload, fpstore, *, load, + store, mtvsr, mfvsr, vecsimple, *")]) + + ;; Here is how to move condition codes around. When we store CC data in ;; an integer register or memory, we store just the high-order 4 bits. diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 39ae7791c60a..5f81d3426a2c 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -642,6 +642,10 @@ mieee128-constant Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save Generate (do not generate) code that uses the LXVKQ instruction. +mieee16-gpr-args +Target Undocumented Var(TARGET_IEEE16_GPR_ARGS) Init(1) Save +Pass _Float16 in GPR registers. + ; Documented parameters -param=rs6000-vect-unroll-limit= diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index f5797387ca79..ed427ea05e9b 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -50,11 +50,29 @@ (define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF]) ;; Vector logical modes -(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI KF TF]) +(define_mode_iterator VEC_L [V16QI + V8HI + V8HF + V4SI + V2DI + V4SF + V2DF + V1TI + TI + KF + TF]) ;; Vector modes for moves. Don't do TImode or TFmode here, since their ;; moves are handled elsewhere. -(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI KF]) +(define_mode_iterator VEC_M [V16QI + V8HI + V4SI + V2DI + V8HF + V4SF + V2DF + V1TI + KF]) ;; Vector modes for types that don't need a realignment under VSX (define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF V1TI KF TF]) @@ -63,7 +81,13 @@ (define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF V1TI]) ;; Vector init/extract modes -(define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF]) +(define_mode_iterator VEC_E [V16QI + V8HI + V4SI + V2DI + V8HF + V4SF + V2DF]) ;; Vector modes for 64-bit base types (define_mode_iterator VEC_64 [V2DI V2DF]) @@ -76,6 +100,7 @@ (V8HI "HI") (V4SI "SI") (V2DI "DI") + (V8HF "HF") (V4SF "SF") (V2DF "DF") (V1TI "TI") @@ -86,6 +111,7 @@ (V8HI "hi") (V4SI "si") (V2DI "di") + (V8HF "hf") (V4SF "sf") (V2DF "df") (V1TI "ti") @@ -1191,6 +1217,21 @@ DONE; }) +(define_expand "vec_pack_trunc_v4sf" + [(match_operand:V8HF 0 "vfloat_operand") + (match_operand:V4SF 1 "vfloat_operand") + (match_operand:V4SF 2 "vfloat_operand")] + "TARGET_IEEE16" +{ + rtx r1 = gen_reg_rtx (V8HFmode); + rtx r2 = gen_reg_rtx (V8HFmode); + + emit_insn (gen_vsx_xvcvsphp_v8hf (r1, operands[1])); + emit_insn (gen_vsx_xvcvsphp_v8hf (r2, operands[2])); + rs6000_expand_extract_even (operands[0], r1, r2); + DONE; +}) + ;; Convert single word types to double word (define_expand "vec_unpacks_hi_v4sf" [(match_operand:V2DF 0 "vfloat_operand") @@ -1264,6 +1305,18 @@ DONE; }) +(define_expand "vec_unpacks_hi_v8hf" + [(match_operand:V4SF 0 "vfloat_operand") + (match_operand:V8HF 1 "vfloat_operand")] + "TARGET_IEEE16" +{ + rtx reg = gen_reg_rtx (V8HFmode); + + rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN); + emit_insn (gen_vsx_xvcvhpsp (operands[0], reg)); + DONE; +}) + ;; Align vector loads with a permute. (define_expand "vec_realign_load_<mode>" diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index dd3573b80868..218351447349 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -46,9 +46,14 @@ ;; Iterator for vector floating point types supported by VSX (define_mode_iterator VSX_F [V4SF V2DF]) +;; Iterator for 8 element vectors +(define_mode_iterator V8HI_V8HF [V8HI + (V8HF "TARGET_IEEE16")]) + ;; Iterator for logical types supported by VSX (define_mode_iterator VSX_L [V16QI V8HI + (V8HF "TARGET_IEEE16") V4SI V2DI V4SF @@ -61,6 +66,7 @@ ;; Iterator for memory moves. (define_mode_iterator VSX_M [V16QI V8HI + (V8HF "TARGET_IEEE16") V4SI V2DI V4SF @@ -71,6 +77,7 @@ TI]) (define_mode_attr VSX_XXBR [(V8HI "h") + (V8HF "h") (V4SI "w") (V4SF "w") (V2DF "d") @@ -80,6 +87,7 @@ ;; Map into the appropriate load/store name based on the type (define_mode_attr VSm [(V16QI "vw4") (V8HI "vw4") + (V8HF "vw4") (V4SI "vw4") (V4SF "vw4") (V2DF "vd2") @@ -93,6 +101,7 @@ ;; Map the register class used (define_mode_attr VSr [(V16QI "v") (V8HI "v") + (V8HF "v") (V4SI "v") (V4SF "wa") (V2DI "wa") @@ -108,6 +117,7 @@ ;; What value we need in the "isa" field, to make the IEEE QP float work. (define_mode_attr VSisa [(V16QI "*") (V8HI "*") + (V8HF "p9v") (V4SI "*") (V4SF "*") (V2DI "*") @@ -124,6 +134,7 @@ ;; integer modes. (define_mode_attr ??r [(V16QI "??r") (V8HI "??r") + (V8HF "??r") (V4SI "??r") (V4SF "??r") (V2DI "??r") @@ -136,6 +147,7 @@ ;; A mode attribute used for 128-bit constant values. (define_mode_attr nW [(V16QI "W") (V8HI "W") + (V8HF "W") (V4SI "W") (V4SF "W") (V2DI "W") @@ -163,6 +175,7 @@ ;; operation (define_mode_attr VSv [(V16QI "v") (V8HI "v") + (V8HF "v") (V4SI "v") (V4SF "v") (V2DI "v") @@ -396,6 +409,7 @@ ;; Like VM2 in altivec.md, just do char, short, int, long, float and double (define_mode_iterator VM3 [V4SI V8HI + V8HF V16QI V4SF V2DF @@ -407,6 +421,7 @@ (define_mode_attr VM3_char [(V2DI "d") (V4SI "w") (V8HI "h") + (V8HF "h") (V16QI "b") (V2DF "d") (V4SF "w")]) @@ -541,21 +556,21 @@ [(set_attr "type" "vecload") (set_attr "length" "8")]) -(define_insn_and_split "*vsx_le_perm_load_v8hi" - [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") - (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))] +(define_insn_and_split "*vsx_le_perm_load_<mode>" + [(set (match_operand:V8HI_V8HF 0 "vsx_register_operand" "=wa") + (match_operand:V8HI_V8HF 1 "indexed_or_indirect_operand" "Z"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" "&& 1" [(set (match_dup 2) - (vec_select:V8HI + (vec_select:V8HI_V8HF (match_dup 1) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)]))) (set (match_dup 0) - (vec_select:V8HI + (vec_select:V8HI_V8HF (match_dup 2) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) @@ -802,27 +817,27 @@ (const_int 0) (const_int 1)])))] "") -(define_insn "*vsx_le_perm_store_v8hi" - [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z") - (match_operand:V8HI 1 "vsx_register_operand" "wa"))] +(define_insn "*vsx_le_perm_store_<mode>" + [(set (match_operand:V8HI_V8HF 0 "indexed_or_indirect_operand" "=Z") + (match_operand:V8HI_V8HF 1 "vsx_register_operand" "wa"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" [(set_attr "type" "vecstore") (set_attr "length" "12")]) (define_split - [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") - (match_operand:V8HI 1 "vsx_register_operand"))] + [(set (match_operand:V8HI_V8HF 0 "indexed_or_indirect_operand") + (match_operand:V8HI_V8HF 1 "vsx_register_operand"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" [(set (match_dup 2) - (vec_select:V8HI + (vec_select:V8HI_V8HF (match_dup 1) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)]))) (set (match_dup 0) - (vec_select:V8HI + (vec_select:V8HI_V8HF (match_dup 2) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) @@ -861,25 +876,25 @@ ;; The post-reload split requires that we re-permute the source ;; register in case it is still live. (define_split - [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") - (match_operand:V8HI 1 "vsx_register_operand"))] + [(set (match_operand:V8HI_V8HF 0 "indexed_or_indirect_operand") + (match_operand:V8HI_V8HF 1 "vsx_register_operand"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" [(set (match_dup 1) - (vec_select:V8HI + (vec_select:V8HI_V8HF (match_dup 1) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)]))) (set (match_dup 0) - (vec_select:V8HI + (vec_select:V8HI_V8HF (match_dup 1) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)]))) (set (match_dup 1) - (vec_select:V8HI + (vec_select:V8HI_V8HF (match_dup 1) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) @@ -1434,15 +1449,15 @@ "lxvw4x %x0,%y1" [(set_attr "type" "vecload")]) -(define_expand "vsx_ld_elemrev_v8hi" - [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") - (vec_select:V8HI - (match_operand:V8HI 1 "memory_operand" "Z") +(define_expand "vsx_ld_elemrev_<mode>" + [(set (match_operand:V8HI_V8HF 0 "vsx_register_operand" "=wa") + (vec_select:V8HI_V8HF + (match_operand:V8HI_V8HF 1 "memory_operand" "Z") (parallel [(const_int 7) (const_int 6) (const_int 5) (const_int 4) (const_int 3) (const_int 2) (const_int 1) (const_int 0)])))] - "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" + "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN" { if (!TARGET_P9_VECTOR) { @@ -1452,9 +1467,9 @@ unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; int i; - subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0); + subreg = simplify_gen_subreg (V4SImode, operands[1], <MODE>mode, 0); emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); - subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0); + subreg2 = simplify_gen_subreg (<MODE>mode, tmp, V4SImode, 0); for (i = 0; i < 16; ++i) perm[i] = GEN_INT (reorder[i]); @@ -1462,21 +1477,21 @@ pcv = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm))); - emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2, - subreg2, pcv)); + emit_insn (gen_altivec_vperm_<mode>_direct (operands[0], subreg2, + subreg2, pcv)); DONE; } }) -(define_insn "*vsx_ld_elemrev_v8hi_internal" - [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") - (vec_select:V8HI - (match_operand:V8HI 1 "memory_operand" "Z") +(define_insn "*vsx_ld_elemrev_<mode>_internal" + [(set (match_operand:V8HI_V8HF 0 "vsx_register_operand" "=wa") + (vec_select:V8HI_V8HF + (match_operand:V8HI_V8HF 1 "memory_operand" "Z") (parallel [(const_int 7) (const_int 6) (const_int 5) (const_int 4) (const_int 3) (const_int 2) (const_int 1) (const_int 0)])))] - "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" + "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" "lxvh8x %x0,%y1" [(set_attr "type" "vecload")]) @@ -1584,20 +1599,20 @@ "stxvw4x %x1,%y0" [(set_attr "type" "vecstore")]) -(define_expand "vsx_st_elemrev_v8hi" - [(set (match_operand:V8HI 0 "memory_operand" "=Z") - (vec_select:V8HI - (match_operand:V8HI 1 "vsx_register_operand" "wa") +(define_expand "vsx_st_elemrev_<mode>" + [(set (match_operand:V8HI_V8HF 0 "memory_operand" "=Z") + (vec_select:V8HI_V8HF + (match_operand:V8HI_V8HF 1 "vsx_register_operand" "wa") (parallel [(const_int 7) (const_int 6) (const_int 5) (const_int 4) (const_int 3) (const_int 2) (const_int 1) (const_int 0)])))] - "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" + "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN" { if (!TARGET_P9_VECTOR) { rtx mem_subreg, subreg, perm[16], pcv; - rtx tmp = gen_reg_rtx (V8HImode); + rtx tmp = gen_reg_rtx (<MODE>mode); /* 2 is leftmost element in register */ unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; int i; @@ -1608,10 +1623,10 @@ pcv = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm))); - emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1], - operands[1], pcv)); - subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0); - mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0); + emit_insn (gen_altivec_vperm_<mode>_direct (tmp, operands[1], + operands[1], pcv)); + subreg = simplify_gen_subreg (V4SImode, tmp, <MODE>mode, 0); + mem_subreg = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0); emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg)); DONE; } @@ -1626,15 +1641,15 @@ "stxvd2x %x1,%y0" [(set_attr "type" "vecstore")]) -(define_insn "*vsx_st_elemrev_v8hi_internal" - [(set (match_operand:V8HI 0 "memory_operand" "=Z") - (vec_select:V8HI - (match_operand:V8HI 1 "vsx_register_operand" "wa") +(define_insn "*vsx_st_elemrev_<mode>_internal" + [(set (match_operand:V8HI_V8HF 0 "memory_operand" "=Z") + (vec_select:V8HI_V8HF + (match_operand:V8HI_V8HF 1 "vsx_register_operand" "wa") (parallel [(const_int 7) (const_int 6) (const_int 5) (const_int 4) (const_int 3) (const_int 2) (const_int 1) (const_int 0)])))] - "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" + "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" "stxvh8x %x1,%y0" [(set_attr "type" "vecstore")]) @@ -2420,6 +2435,7 @@ [(set_attr "type" "fp")]) ;; Generate xvcvhpsp instruction +;; Used for the built-in function (define_insn "vsx_xvcvhpsp" [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")] @@ -2428,7 +2444,17 @@ "xvcvhpsp %x0,%x1" [(set_attr "type" "vecfloat")]) +;; Used for conversion to/from _Float16 +(define_insn "vsx_xvcvhpsp_v8hf" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (unspec:V4SF [(match_operand:V8HF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVHPSP))] + "TARGET_P9_VECTOR" + "xvcvhpsp %x0,%x1" + [(set_attr "type" "vecfloat")]) + ;; Generate xvcvsphp +;; Used for the built-in function (define_insn "vsx_xvcvsphp" [(set (match_operand:V4SI 0 "register_operand" "=wa") (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")] @@ -2437,6 +2463,15 @@ "xvcvsphp %x0,%x1" [(set_attr "type" "vecfloat")]) +;; Used for conversion to/from _Float16 +(define_insn "vsx_xvcvsphp_v8hf" + [(set (match_operand:V8HF 0 "register_operand" "=wa") + (unspec:V8HF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_XVCVSPHP))] + "TARGET_P9_VECTOR" + "xvcvsphp %x0,%x1" +[(set_attr "type" "vecfloat")]) + ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF ;; format of scalars is actually DF. (define_insn "vsx_xscvdpsp_scalar" @@ -3120,6 +3155,25 @@ "xvrdpiz %x0,%x1" [(set_attr "type" "vecdouble")]) + +;; Convert IEEE 16-bit floating point to/from SF and DF modes. + +(define_insn "extendhf<mode>2" + [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa") + (float_extend:SFDF + (match_operand:HF 1 "vsx_register_operand" "wa")))] + "TARGET_IEEE16" + "xscvhpdp %x0,%x1" + [(set_attr "type" "fpsimple")]) + +(define_insn "trunc<mode>hf2" + [(set (match_operand:HF 0 "vsx_register_operand" "=wa") + (float_truncate:HF + (match_operand:SFDF 1 "vsx_register_operand" "wa")))] + "TARGET_IEEE16" + "xscvdphp %x0,%1" + [(set_attr "type" "fpsimple")]) + ;; Permute operations @@ -3299,10 +3353,10 @@ "xxpermdi %x0,%x1,%x1,2" [(set_attr "type" "vecperm")]) -(define_insn "xxswapd_v8hi" - [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") - (vec_select:V8HI - (match_operand:V8HI 1 "vsx_register_operand" "wa") +(define_insn "xxswapd_<mode>" + [(set (match_operand:V8HI_V8HF 0 "vsx_register_operand" "=wa") + (vec_select:V8HI_V8HF + (match_operand:V8HI_V8HF 1 "vsx_register_operand" "wa") (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) @@ -3402,15 +3456,15 @@ "lxvd2x %x0,%y1" [(set_attr "type" "vecload")]) -(define_insn "*vsx_lxvd2x8_le_V8HI" - [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") - (vec_select:V8HI - (match_operand:V8HI 1 "memory_operand" "Z") +(define_insn "*vsx_lxvd2x8_le_<MODE>" + [(set (match_operand:V8HI_V8HF 0 "vsx_register_operand" "=wa") + (vec_select:V8HI_V8HF + (match_operand:V8HI_V8HF 1 "memory_operand" "Z") (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)])))] - "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" "lxvd2x %x0,%y1" [(set_attr "type" "vecload")]) @@ -3478,15 +3532,15 @@ [(set_attr "type" "vecstore") (set_attr "length" "8")]) -(define_insn "*vsx_stxvd2x8_le_V8HI" - [(set (match_operand:V8HI 0 "memory_operand" "=Z") - (vec_select:V8HI - (match_operand:V8HI 1 "vsx_register_operand" "wa") +(define_insn "*vsx_stxvd2x8_le_<MODE>" + [(set (match_operand:V8HI_V8HF 0 "memory_operand" "=Z") + (vec_select:V8HI_V8HF + (match_operand:V8HI_V8HF 1 "vsx_register_operand" "wa") (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)])))] - "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" "stxvd2x %x1,%y0" [(set_attr "type" "vecstore")]) @@ -4060,7 +4114,7 @@ if (which_alternative == 0 && ((<MODE>mode == V16QImode && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 7 : 8)) - || (<MODE>mode == V8HImode + || ((<MODE>mode == V8HImode || <MODE>mode == V8HFmode) && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 3 : 4)))) { enum machine_mode dest_mode = GET_MODE (operands[0]); @@ -4139,7 +4193,7 @@ else vec_tmp = src; } - else if (<MODE>mode == V8HImode) + else if (<MODE>mode == V8HImode || <MODE>mode == V8HFmode) { if (value != 3) emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));