https://gcc.gnu.org/g:1ce89e9d7a4ba7737ad29ec49d64a8c3dae94bde
commit 1ce89e9d7a4ba7737ad29ec49d64a8c3dae94bde Author: Michael Meissner <[email protected]> Date: Thu Sep 25 20:16:37 2025 -0400 Add _Float16 and __bfloat16 support. 2025-09-25 Michael Meissner <[email protected]> gcc/ * config/rs6000/altivec.md (VM): Add _Float16 and __bfloat16 support. (VM2): Likewise. (VI_char): Likewise. (VI_scalar): Likewise. (VI_char): Likewise. (VP_small): Likewise. (VP_small_lc): Likewise. (VU_char): Likewise. (altivec_vsplth_v8b): Likewise. * config/rs6000/predicates.md (easy_fp_constant): Likewise. (fp16_xxspltiw_constant): Likewise. * config/rs6000/rs6000-builtin.cc (rs6000_type_string): Likewise. (rs6000_init_builtins): Likewise. * config/rs6000/rs6000-call.cc (USE_FP_FOR_ARG_P): Likewise. * config/rs6000/rs6000-cpus.def (ISA_3_0_MASKS_SERVER): Likewise. (OTHER_POWER10_MASKS): Likewise. (POWERPC_MASKS): Likewise. * config/rs6000/rs6000-modes.def (HFmode): Likewise. (BFmode): Likewise. * config/rs6000/rs6000-p8swap.cc (rs6000_gen_stvx): Drop V8HFmode support since V8HFmode doesn't exist on power8 or earlier. (rs6000_gen_lvx): Likewise. (replace_swapped_load_constant): Likewise. * config/rs6000/rs6000-protos.h (vec_const_128bit_type): Add field to store the mode. * config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): Add _Float16 and __bfloat16 support. (rs6000_modes_tieable_p): Likewise. (rs6000_debug_reg_global): Likewise. (rs6000_setup_reg_addr_masks): Likewise. (rs6000_init_hard_regno_mode_ok): Likewise. (rs6000_option_override_internal): Likewise. (rs6000_secondary_reload_simple_move): Likewise. (rs6000_preferred_reload_class): Likewise. (rs6000_can_change_mode_class): Likewise. (rs6000_function_value): Likewise. (rs6000_scalar_mode_supported_p): Likewise. (rs6000_floatn_mode): Likewise. (rs6000_floatn_mode): Likewise. (rs600_opt_masks): Likewise. (constant_fp_to_128bit_vector): Likewise. (vec_const_128bit_to_bytes): Likewise. (constant_generates_xxspltiw): Likewise. * config/rs6000/rs6000.h (FP16_SCALAR_MODE_P): Likewise. * config/rs6000/rs6000.md (FMOVE128_GPR): Likewise. (RELOAD): Likewise. (ALTIVEC_DFORM): Likewise. (FP16): Likewise. (FP16_CONVERT): Likewise. (extendhf<mode>): Likewise. (trunc<mode>hf2): Likewise. (extendbf<mode>2): Likewise. (mov<mode>, FP16 iterator): Likewise. (mov<mode>_xxspltiw): Likewise. (mov<mode>_internal): Likewise. * config/rs6000/rs6000.opt (-mieee16): New switch. (-mieee16-gpr-args): Likewise. (-mbfloat): Likewise. * config/rs6000/vector.md (VEC_L): Likewise. (VEC_M): Likewise. (VEC_N): Likewise. (VEC_E): Likewise. (VEC_base): Likewise. (VEC_base_l): Likewise. (vec_pack_ufix_trunc_v2df): Likewise. (vec_unpacks_hi_v8hf): Likewise. * config/rs6000/vsx.md (VECTOR_16BIT): Likewise. (VSX_L): Likewise. (VSX_M): Likewise. (VSX_XXBR): Likewise. (VSm): Likewise. (VSr): Likewise. (VSisa): Likewise. (??r): Likewise. (nW): Likewise. (VSv): Likewise. (VM3): Likewise. (VM3_char): Likewise. (vsx_le_perm_load_<mode>): Likewise. (vsx_le_perm_store_<mode>): Likewise. (split for vector with 16-bit elements): Likewise. (vsx_ld_elemrev_<mode>): Likewise. (sx_ld_elemrev_<mode>_internal): Likewise. (vsx_st_elemrev_<mode>): Likewise. (vsx_st_elemrev_<mode>_interna): Likewise. (vsx_xscvdpsp_sf): Likewise. (vsx_xvcvhpsp): Likewise. (vsx_xvcvhpsp_v8hf): Likewise. (vsx_xvcvsph): Likewise. (vsx_xvcvsphp_v8hf): Likewise. (vsx_xscvdpspn_sf): Likewise. (xxswapd_<mode>): Likewise. (vsx_lxvd2x8_le_<MODE>): Likewise. (vsx_stxvd2x8_le_<MODE>): Likewise. (vsx_extract_<mode>_store_p9): Likewise. (vsx_xvcvbf16spn_v8bf): Likewise. (vsx_xvcvspbf16_b): Likewise. Diff: --- gcc/config/rs6000/altivec.md | 45 ++++++- gcc/config/rs6000/predicates.md | 20 +++ gcc/config/rs6000/rs6000-builtin.cc | 16 +++ gcc/config/rs6000/rs6000-call.cc | 10 +- gcc/config/rs6000/rs6000-cpus.def | 4 + gcc/config/rs6000/rs6000-modes.def | 6 + gcc/config/rs6000/rs6000-p8swap.cc | 14 +-- gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.cc | 176 +++++++++++++++++++++++--- gcc/config/rs6000/rs6000.h | 5 + gcc/config/rs6000/rs6000.md | 244 +++++++++++++++++++++++++++++++++++- gcc/config/rs6000/rs6000.opt | 12 ++ gcc/config/rs6000/vector.md | 64 +++++++++- gcc/config/rs6000/vsx.md | 213 ++++++++++++++++++++++--------- 14 files changed, 721 insertions(+), 109 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 7edc288a6565..ba47c4d597ab 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -191,6 +191,8 @@ ;; otherwise handled by altivec (v2df, v2di, ti) (define_mode_iterator VM [V4SI V8HI + V8BF + V8HF V16QI V4SF V2DF @@ -203,6 +205,8 @@ ;; Like VM, except don't do TImode (define_mode_iterator VM2 [V4SI V8HI + V8BF + V8HF V16QI V4SF V2DF @@ -222,18 +226,38 @@ V1TI TI]) -(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")]) -(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")]) +(define_mode_attr VI_char [(V2DI "d") + (V4SI "w") + (V8HI "h") + (V8BF "h") + (V8HF "h") + (V16QI "b")]) +(define_mode_attr VI_scalar [(V2DI "DI") + (V4SI "SI") + (V8HI "HI") + (V8BF "BF") + (V8HF "HF") + (V16QI "QI")]) (define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)") (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)") + (V8BF "VECTOR_UNIT_ALTIVEC_P (V8BFmode)") + (V8HF "VECTOR_UNIT_ALTIVEC_P (V8HFmode)") (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)") (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")]) ;; Vector pack/unpack (define_mode_iterator VP [V2DI V4SI V8HI]) -(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")]) -(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")]) -(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")]) +(define_mode_attr VP_small [(V2DI "V4SI") + (V4SI "V8HI") + (V8HI "V16QI")]) +(define_mode_attr VP_small_lc [(V2DI "v4si") + (V4SI "v8hi") + (V8HI "v16qi")]) +(define_mode_attr VU_char [(V2DI "w") + (V4SI "h") + (V8HI "b") + (V8BF "b") + (V8HF "b")]) ;; Vector negate (define_mode_iterator VNEG [V4SI V2DI]) @@ -2454,6 +2478,17 @@ } [(set_attr "type" "vecperm")]) +;; Splat instruction needed to allow conversion of __bfloat16 +;; (i.e. BFmode) to SFmode/DFmode. +(define_insn "altivec_vsplth_v8bf" + [(set (match_operand:V8BF 0 "register_operand" "=v") + (unspec:V8BF [(match_operand:BF 1 "register_operand" "v") + (match_operand:QI 2 "const_0_to_7_operand" "i")] + UNSPEC_VSPLT_DIRECT))] + "TARGET_BFLOAT16" + "vsplth %0,%1,%2" + [(set_attr "type" "vecperm")]) + (define_insn "altivec_vspltis<VI_char>" [(set (match_operand:VI 0 "register_operand" "=v") (vec_duplicate:VI diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 647e89afb6a7..3dc9e020fd71 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -601,6 +601,11 @@ if (TARGET_VSX && op == CONST0_RTX (mode)) return 1; + /* Power9 needs to load HFmode constants from memory, Power10 can use + XXSPLTIW. */ + if (mode == HFmode && !TARGET_POWER10) + return 0; + /* Constants that can be generated with ISA 3.1 instructions are easy. */ vec_const_128bit_type vsx_const; if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const)) @@ -2166,3 +2171,18 @@ (and (match_code "subreg") (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op))) == SUBREG_BYTE (op)"))) + +;; Return 1 if this is a 16-bit floating point constant that can be +;; loaded with XXSPLTIW. +(define_predicate "fp16_xxspltiw_constant" + (match_code "const_double") +{ + if (!TARGET_POWER10 || !FP16_SCALAR_MODE_P (mode)) + return false; + + vec_const_128bit_type vsx_const; + if (!vec_const_128bit_to_bytes (op, mode, &vsx_const)) + return false; + + return constant_generates_xxspltiw (&vsx_const); +}) diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index dfbb7d02157b..94a4441e8f9c 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -491,6 +491,10 @@ const char *rs6000_type_string (tree type_node) return "voidc*"; else if (type_node == float128_type_node) return "_Float128"; + else if (type_node == float16_type_node) + return "_Float16"; + else if (TARGET_BFLOAT16 && type_node == bfloat16_type_node) + return "__bfloat16"; else if (type_node == vector_pair_type_node) return "__vector_pair"; else if (type_node == vector_quad_type_node) @@ -756,6 +760,18 @@ rs6000_init_builtins (void) else ieee128_float_type_node = NULL_TREE; + /* __bfloat16 support. */ + if (TARGET_BFLOAT16) + { + bfloat16_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (bfloat16_type_node) = 16; + SET_TYPE_MODE (bfloat16_type_node, BFmode); + layout_type (bfloat16_type_node); + t = build_qualified_type (bfloat16_type_node, TYPE_QUAL_CONST); + lang_hooks.types.register_builtin_type (bfloat16_type_node, + "__bfloat16"); + } + /* Vector pair and vector quad support. */ vector_pair_type_node = make_node (OPAQUE_TYPE); SET_TYPE_MODE (vector_pair_type_node, OOmode); diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc index 8fe5652442e3..a446897f842b 100644 --- a/gcc/config/rs6000/rs6000-call.cc +++ b/gcc/config/rs6000/rs6000-call.cc @@ -82,10 +82,12 @@ #endif /* Nonzero if we can use a floating-point register to pass this arg. */ -#define USE_FP_FOR_ARG_P(CUM,MODE) \ - (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \ - && (CUM)->fregno <= FP_ARG_MAX_REG \ - && TARGET_HARD_FLOAT) +#define USE_FP_FOR_ARG_P(CUM,MODE) \ + (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \ + && (CUM)->fregno <= FP_ARG_MAX_REG \ + && TARGET_HARD_FLOAT \ + && (!FP16_SCALAR_MODE_P (MODE) || !TARGET_IEEE16_GPR_ARGS)) + /* Nonzero if we can use an AltiVec register to pass this arg. */ #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \ diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 233f01e9c615..6b97a4f2d1e5 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -102,6 +102,7 @@ /* Add ISEL back into ISA 3.0, since it is supposed to be a win. Do not add FLOAT128_HW here until we are ready to make -mfloat128 on by default. */ #define ISA_3_0_MASKS_SERVER ((ISA_2_7_MASKS_SERVER \ + /* | OPTION_MASK_IEEE16 */ \ | OPTION_MASK_ISEL \ | OPTION_MASK_MODULO \ | OPTION_MASK_P9_MINMAX \ @@ -113,6 +114,7 @@ performance was degraded by it. */ #define OTHER_POWER10_MASKS (OPTION_MASK_MMA \ | OPTION_MASK_PCREL \ + /* | OPTION_MASK_BFLOAT16 */ \ /* | OPTION_MASK_PCREL_OPT */ \ | OPTION_MASK_PREFIXED) @@ -153,6 +155,7 @@ /* Mask of all options to set the default isa flags based on -mcpu=<xxx>. */ #define POWERPC_MASKS (OPTION_MASK_ALTIVEC \ + | OPTION_MASK_BFLOAT16 \ | OPTION_MASK_CMPB \ | OPTION_MASK_CRYPTO \ | OPTION_MASK_DFP \ @@ -163,6 +166,7 @@ | OPTION_MASK_FPRND \ | OPTION_MASK_P10_FUSION \ | OPTION_MASK_HTM \ + | OPTION_MASK_IEEE16 \ | OPTION_MASK_ISEL \ | OPTION_MASK_MFCRF \ | OPTION_MASK_MMA \ diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def index f89e4ef403c1..81de144a987f 100644 --- a/gcc/config/rs6000/rs6000-modes.def +++ b/gcc/config/rs6000/rs6000-modes.def @@ -45,6 +45,12 @@ FLOAT_MODE (TF, 16, ieee_quad_format); /* IBM 128-bit floating point. */ FLOAT_MODE (IF, 16, ibm_extended_format); +/* Explicit IEEE 16-bit floating point. */ +FLOAT_MODE (HF, 2, ieee_half_format); + +/* Explicit bfloat16 floating point. */ +FLOAT_MODE (BF, 2, arm_bfloat_half_format); + /* Add any extra modes needed to represent the condition code. For the RS/6000, we need separate modes when unsigned (logical) comparisons diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc index e92f01031270..4fb107c60a47 100644 --- a/gcc/config/rs6000/rs6000-p8swap.cc +++ b/gcc/config/rs6000/rs6000-p8swap.cc @@ -1598,10 +1598,6 @@ rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp); else if (mode == V8HImode) stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp); -#ifdef HAVE_V8HFmode - else if (mode == V8HFmode) - stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp); -#endif else if (mode == V4SImode) stvx = gen_altivec_stvx_v4si (src_exp, dest_exp); else if (mode == V4SFmode) @@ -1722,10 +1718,6 @@ rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp); else if (mode == V8HImode) lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp); -#ifdef HAVE_V8HFmode - else if (mode == V8HFmode) - lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp); -#endif else if (mode == V4SImode) lvx = gen_altivec_lvx_v4si (dest_exp, src_exp); else if (mode == V4SFmode) @@ -1930,11 +1922,7 @@ replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn) rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); new_mem = force_const_mem (mode, new_const_vector); } - else if ((mode == V8HImode) -#ifdef HAVE_V8HFmode - || (mode == V8HFmode) -#endif - ) + else if (mode == V8HImode) { rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8)); int i; diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 4619142d197b..9bf971370d41 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -250,6 +250,7 @@ typedef struct { bool all_words_same; /* Are the words all equal? */ bool all_half_words_same; /* Are the half words all equal? */ bool all_bytes_same; /* Are the bytes all equal? */ + machine_mode mode; /* Original constant mode. */ } vec_const_128bit_type; extern bool vec_const_128bit_to_bytes (rtx, machine_mode, diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 484cd2b61e95..4c69eb05b2cb 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1898,7 +1898,8 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) if (ALTIVEC_REGNO_P (regno)) { - if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p) + if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p + && !FP16_SCALAR_MODE_P (mode)) return 0; return ALTIVEC_REGNO_P (last_regno); @@ -1930,7 +1931,8 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) if (TARGET_POWER7 && mode == SImode) return 1; - if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode)) + if (TARGET_P9_VECTOR + && (mode == QImode || mode == HImode)) return 1; } @@ -1988,7 +1990,8 @@ static bool rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2) { if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode - || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode) + || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode + || FP16_SCALAR_MODE_P (mode1) || FP16_SCALAR_MODE_P (mode2)) return mode1 == mode2; if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1)) @@ -2254,6 +2257,8 @@ rs6000_debug_reg_global (void) DImode, TImode, PTImode, + BFmode, + HFmode, SFmode, DFmode, TFmode, @@ -2274,6 +2279,8 @@ rs6000_debug_reg_global (void) V8SImode, V4DImode, V2TImode, + V8BFmode, + V8HFmode, V4SFmode, V2DFmode, V8SFmode, @@ -2632,8 +2639,14 @@ rs6000_setup_reg_addr_masks (void) /* SDmode is special in that we want to access it only via REG+REG addressing on power7 and above, since we want to use the LFIWZX and - STFIWZX instructions to load it. */ - bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK); + STFIWZX instructions to load it. + + Never allow offset addressing for 16-bit floating point modes, since + it is expected that 16-bit floating point should always go into the + vector registers and we only have indexed and indirect 16-bit loads to + VSR registers. */ + bool indexed_only_p = ((m == SDmode && TARGET_NO_SDMODE_STACK) + || FP16_SCALAR_MODE_P (m)); any_addr_mask = 0; for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++) @@ -2682,6 +2695,7 @@ rs6000_setup_reg_addr_masks (void) && !complex_p && (m != E_DFmode || !TARGET_VSX) && (m != E_SFmode || !TARGET_P8_VECTOR) + && !FP16_SCALAR_MODE_P (m) && !small_int_vsx_p) { addr_mask |= RELOAD_REG_PRE_INCDEC; @@ -2929,6 +2943,28 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_align[V1TImode] = 128; } + /* _Float16 support. */ + if (TARGET_IEEE16) + { + rs6000_vector_unit[V8HFmode] = VECTOR_VSX; + rs6000_vector_mem[V8HFmode] = VECTOR_VSX; + rs6000_vector_align[V8HFmode] = align64; + + rs6000_vector_mem[HFmode] = VECTOR_VSX; + rs6000_vector_align[HFmode] = 16; + } + + /* _bfloat16 support. */ + if (TARGET_BFLOAT16) + { + rs6000_vector_unit[V8BFmode] = VECTOR_VSX; + rs6000_vector_mem[V8BFmode] = VECTOR_VSX; + rs6000_vector_align[V8BFmode] = align64; + + rs6000_vector_mem[BFmode] = VECTOR_VSX; + rs6000_vector_align[BFmode] = 16; + } + /* DFmode, see if we want to use the VSX unit. Memory is handled differently, so don't set rs6000_vector_mem. */ if (TARGET_VSX) @@ -3010,6 +3046,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load; reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store; reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load; + reg_addr[V8BFmode].reload_store = CODE_FOR_reload_v8bf_di_store; + reg_addr[V8BFmode].reload_load = CODE_FOR_reload_v8bf_di_load; + reg_addr[V8HFmode].reload_store = CODE_FOR_reload_v8hf_di_store; + reg_addr[V8HFmode].reload_load = CODE_FOR_reload_v8hf_di_load; reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store; reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load; reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store; @@ -3039,6 +3079,18 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load; } + if (TARGET_IEEE16) + { + reg_addr[HFmode].reload_store = CODE_FOR_reload_hf_di_store; + reg_addr[HFmode].reload_load = CODE_FOR_reload_hf_di_load; + } + + if (TARGET_BFLOAT16) + { + reg_addr[BFmode].reload_store = CODE_FOR_reload_bf_di_store; + reg_addr[BFmode].reload_load = CODE_FOR_reload_bf_di_load; + } + /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are available. */ if (TARGET_NO_SDMODE_STACK) @@ -3061,6 +3113,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di; reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf; reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si; + reg_addr[V8BFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8bf; + reg_addr[V8HFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hf; reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi; reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi; reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf; @@ -3071,6 +3125,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di; reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf; reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si; + reg_addr[V8BFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8bf; + reg_addr[V8HFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hf; reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi; reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi; reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf; @@ -3108,6 +3164,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load; reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store; reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load; + reg_addr[V8BFmode].reload_store = CODE_FOR_reload_v8bf_si_store; + reg_addr[V8BFmode].reload_load = CODE_FOR_reload_v8bf_si_load; + reg_addr[V8HFmode].reload_store = CODE_FOR_reload_v8hf_si_store; + reg_addr[V8HFmode].reload_load = CODE_FOR_reload_v8hf_si_load; reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store; reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load; reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store; @@ -3131,6 +3191,18 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load; } + if (TARGET_IEEE16) + { + reg_addr[HFmode].reload_store = CODE_FOR_reload_hf_si_store; + reg_addr[HFmode].reload_load = CODE_FOR_reload_hf_si_load; + } + + if (TARGET_BFLOAT16) + { + reg_addr[BFmode].reload_store = CODE_FOR_reload_bf_si_store; + reg_addr[BFmode].reload_load = CODE_FOR_reload_bf_si_load; + } + /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are available. */ if (TARGET_NO_SDMODE_STACK) @@ -3870,6 +3942,22 @@ rs6000_option_override_internal (bool global_init_p) } } + /* -mieee16 needs power9 at a minimum. */ + if (TARGET_IEEE16 && !TARGET_P9_VECTOR) + { + rs6000_isa_flags &= ~OPTION_MASK_IEEE16; + if (rs6000_isa_flags_explicit & OPTION_MASK_IEEE16) + error ("%qs requires at least %qs", "-mieee16", "-mcpu=power9"); + } + + /* -mbfloat16 needs power10 at a minimum. */ + if (TARGET_BFLOAT16 && !TARGET_POWER10) + { + rs6000_isa_flags &= ~OPTION_MASK_BFLOAT16; + if (rs6000_isa_flags_explicit & OPTION_MASK_BFLOAT16) + error ("%qs requires at least %qs", "-mbfloat16", "-mcpu=power10"); + } + /* If hard-float/altivec/vsx were explicitly turned off then don't allow the -mcpu setting to enable options that conflict. */ if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX) @@ -12661,6 +12749,9 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE))) { + if (FP16_SCALAR_MODE_P (mode)) + return true; + if (TARGET_POWERPC64) { /* ISA 2.07: MTVSRD or MVFVSRD. */ @@ -12678,7 +12769,8 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, if (mode == SImode) return true; - if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode)) + if (TARGET_P9_VECTOR + && (mode == HImode || mode == QImode)) return true; } @@ -13448,6 +13540,11 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass) || mode_supports_dq_form (mode)) return rclass; + /* IEEE 16-bit and bfloat16 don't support offset addressing, but they can + go in any floating point/vector register. */ + if (FP16_SCALAR_MODE_P (mode)) + return rclass; + /* If this is a scalar floating point value and we don't have D-form addressing, prefer the traditional floating point registers so that we can use D-form (register+offset) addressing. */ @@ -13677,6 +13774,9 @@ rs6000_can_change_mode_class (machine_mode from, unsigned from_size = GET_MODE_SIZE (from); unsigned to_size = GET_MODE_SIZE (to); + if (FP16_SCALAR_MODE_P (from) || FP16_SCALAR_MODE_P (to)) + return from_size == to_size; + if (from_size != to_size) { enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS; @@ -24018,6 +24118,8 @@ rs6000_function_value (const_tree valtype, if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT) /* _Decimal128 must use an even/odd register pair. */ regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; + else if (FP16_SCALAR_MODE_P (mode) && TARGET_IEEE16_GPR_ARGS) + regno = GP_ARG_RETURN; else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && !FLOAT128_VECTOR_P (mode)) regno = FP_ARG_RETURN; @@ -24290,6 +24392,8 @@ rs6000_scalar_mode_supported_p (scalar_mode mode) return default_decimal_float_supported_p (); else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode)) return true; + else if (FP16_SCALAR_MODE_P (mode)) + return true; else return default_scalar_mode_supported_p (mode); } @@ -24341,6 +24445,9 @@ rs6000_floatn_mode (int n, bool extended) { switch (n) { + case 16: + return TARGET_IEEE16 ? SFmode : opt_scalar_float_mode (); + case 32: return DFmode; @@ -24362,6 +24469,9 @@ rs6000_floatn_mode (int n, bool extended) { switch (n) { + case 16: + return TARGET_IEEE16 ? HFmode : opt_scalar_float_mode (); + case 32: return SFmode; @@ -24466,6 +24576,7 @@ struct rs6000_opt_mask { static struct rs6000_opt_mask const rs6000_opt_masks[] = { { "altivec", OPTION_MASK_ALTIVEC, false, true }, + { "bfloat16", OPTION_MASK_BFLOAT16, false, true }, { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX, false, true }, { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR, @@ -24481,6 +24592,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { "fprnd", OPTION_MASK_FPRND, false, true }, { "hard-dfp", OPTION_MASK_DFP, false, true }, { "htm", OPTION_MASK_HTM, false, true }, + { "ieee16", OPTION_MASK_IEEE16, false, true }, { "isel", OPTION_MASK_ISEL, false, true }, { "mfcrf", OPTION_MASK_MFCRF, false, true }, { "mfpgpr", 0, false, true }, @@ -28912,24 +29024,43 @@ constant_fp_to_128bit_vector (rtx op, const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op); long real_words[VECTOR_128BIT_WORDS]; - /* Make sure we don't overflow the real_words array and that it is - filled completely. */ - gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0); + /* For IEEE 16-bit, the constant doesn't fill the whole 32-bit word, so + deal with it here. */ + if (FP16_SCALAR_MODE_P (mode)) + { + real_to_target (real_words, rtype, mode); + unsigned char hi = (unsigned char) (real_words[0] >> 8); + unsigned char lo = (unsigned char) real_words[0]; + + if (!BYTES_BIG_ENDIAN) + std::swap (hi, lo); - real_to_target (real_words, rtype, mode); + info->bytes[0] = hi; + info->bytes[1] = lo; + } - /* Iterate over each 32-bit word in the floating point constant. The - real_to_target function puts out words in target endian fashion. We need - to arrange the order so that the bytes are written in big endian order. */ - for (unsigned num = 0; num < num_words; num++) + else { - unsigned endian_num = (BYTES_BIG_ENDIAN - ? num - : num_words - 1 - num); + /* Make sure we don't overflow the real_words array and that it is filled + completely. */ + gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0); + + real_to_target (real_words, rtype, mode); - unsigned uvalue = real_words[endian_num]; - for (int shift = 32 - 8; shift >= 0; shift -= 8) - info->bytes[byte_num++] = (uvalue >> shift) & 0xff; + /* Iterate over each 32-bit word in the floating point constant. The + real_to_target function puts out words in target endian fashion. We + need to arrange the order so that the bytes are written in big endian + order. */ + for (unsigned num = 0; num < num_words; num++) + { + unsigned endian_num = (BYTES_BIG_ENDIAN + ? num + : num_words - 1 - num); + + unsigned uvalue = real_words[endian_num]; + for (int shift = 32 - 8; shift >= 0; shift -= 8) + info->bytes[byte_num++] = (uvalue >> shift) & 0xff; + } } /* Mark that this constant involves floating point. */ @@ -28968,6 +29099,7 @@ vec_const_128bit_to_bytes (rtx op, return false; /* Set up the bits. */ + info->mode = mode; switch (GET_CODE (op)) { /* Integer constants, default to double word. */ @@ -29195,6 +29327,10 @@ constant_generates_xxspltiw (vec_const_128bit_type *vsx_const) if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX) return 0; + /* HFmode/BFmode constants can always use XXSPLTIW. */ + if (FP16_SCALAR_MODE_P (vsx_const->mode)) + return 1; + if (!vsx_const->all_words_same) return 0; diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 643aa2449318..310551e87fbb 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -343,6 +343,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || ((MODE) == TDmode) \ || (!TARGET_FLOAT128_TYPE && FLOAT128_IEEE_P (MODE))) +/* Is this a valid 16-bit scalar floating point mode? */ +#define FP16_SCALAR_MODE_P(MODE) \ + (((MODE) == HFmode && TARGET_IEEE16) \ + || ((MODE) == BFmode && TARGET_BFLOAT16)) + /* Return true for floating point that does not use a vector register. */ #define SCALAR_FLOAT_MODE_NOT_VECTOR_P(MODE) \ (SCALAR_FLOAT_MODE_P (MODE) && !FLOAT128_VECTOR_P (MODE)) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 71585dde89db..5e5bb46b78d8 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -552,6 +552,8 @@ (define_mode_iterator FMOVE128_GPR [TI V16QI V8HI + V8BF + V8HF V4SI V4SF V2DI @@ -837,8 +839,8 @@ ;; Reload iterator for creating the function to allocate a base register to ;; supplement addressing modes. -(define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI - SF SD SI DF DD DI TI PTI KF IF TF +(define_mode_iterator RELOAD [V16QI V8HI V8BF V8HF V4SI V2DI V4SF V2DF V1TI + SF SD SI DF DD DI TI PTI KF IF TF HF BF OO XO]) ;; Iterate over smin, smax @@ -857,6 +859,14 @@ (SF "TARGET_P8_VECTOR") (DI "TARGET_POWERPC64")]) +;; Mode iterator for 16-bit floating modes. +(define_mode_iterator FP16 [(BF "TARGET_BFLOAT16") + (HF "TARGET_IEEE16")]) + +;; Mode iterator for floating point modes other than SF/DFmode that we +;; convert to/from _Float16 (HFmode) via DFmode. +(define_mode_iterator FP16_CONVERT [TF KF IF SD DD TD]) + (include "darwin.md") ;; Start with fixed-point load and store insns. Here we put only the more @@ -5849,6 +5859,181 @@ "xxsel %x0,%x4,%x3,%x1" [(set_attr "type" "vecmove")]) + +;; Convert IEEE 16-bit floating point to/from other floating point modes. + +(define_insn "extendhf<mode>2" + [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa") + (float_extend:SFDF + (match_operand:HF 1 "vsx_register_operand" "wa")))] + "TARGET_IEEE16" + "xscvhpdp %x0,%x1" + [(set_attr "type" "fpsimple")]) + +(define_insn "trunc<mode>hf2" + [(set (match_operand:HF 0 "vsx_register_operand" "=wa") + (float_truncate:HF + (match_operand:SFDF 1 "vsx_register_operand" "wa")))] + "TARGET_IEEE16" + "xscvdphp %x0,%1" + [(set_attr "type" "fpsimple")]) + +;; Convert BFmode to SFmode/DFmode. +;; 3 instructions are generated: +;; VSPLTH -- duplicate BFmode into all elements +;; XVCVBF16SPN -- convert even BFmode elements to SFmode +;; XSCVSPNDP -- convert memory format of SFmode to DFmode. +(define_insn_and_split "extendbf<mode>2" + [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa") + (float_extend:SFDF + (match_operand:BF 1 "vsx_register_operand" "v"))) + (clobber (match_scratch:V8BF 2 "=v"))] + "TARGET_BFLOAT16" + "#" + "&& 1" + [(pc)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2_v8bf = operands[2]; + + if (GET_CODE (op2_v8bf) == SCRATCH) + op2_v8bf = gen_reg_rtx (V8BFmode); + + rtx op2_v4sf = gen_lowpart (V4SFmode, op2_v8bf); + + /* VSPLTH -- duplicate BFmode into all elements. */ + emit_insn (gen_altivec_vsplth_v8bf (op2_v8bf, op1, GEN_INT (3))); + + /* XVCVBF16SPN -- convert even V8BFmode elements to V4SFmode. */ + emit_insn (gen_vsx_xvcvbf16spn_v8bf (op2_v4sf, op2_v8bf)); + + /* XSCVSPNDP -- convert single V4SFmode element to DFmode. */ + emit_insn (GET_MODE (op0) == SFmode + ? gen_vsx_xscvspdpn_sf (op0, op2_v4sf) + : gen_vsx_xscvspdpn (op0, op2_v4sf)); + + DONE; +} + [(set_attr "type" "fpsimple") + (set_attr "length" "12")]) + +;; Convert SFmode/DFmode to BFmode. +;; 2 instructions are generated: +;; XSCVDPSPN -- convert SFmode/DFmode scalar to V4SFmode +;; XVCVSPBF16 -- convert V4SFmode to even V8BFmode + +(define_insn_and_split "trunc<mode>bf2" + [(set (match_operand:BF 0 "vsx_register_operand" "=wa") + (float_truncate:BF + (match_operand:SFDF 1 "vsx_register_operand" "wa"))) + (clobber (match_scratch:V4SF 2 "=wa"))] + "TARGET_BFLOAT16" + "#" + "&& 1" + [(pc)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + + if (GET_CODE (op2) == SCRATCH) + op2 = gen_reg_rtx (V4SFmode); + + emit_insn (GET_MODE (op1) == SFmode + ? gen_vsx_xscvdpspn_sf (op2, op1) + : gen_vsx_xscvdpspn (op2, op1)); + + emit_insn (gen_vsx_xvcvspbf16_bf (op0, op2)); + DONE; +} + [(set_attr "type" "fpsimple")]) + +;; Use DFmode to convert to/from 16-bit floating point types for +;; scalar floating point types other than SF/DFmode. +(define_expand "extend<FP16:mode><FP16_CONVERT:mode>2" + [(set (match_operand:FP16_CONVERT 0 "vsx_register_operand") + (float_extend:FP16_CONVERT + (match_operand:FP16 1 "vsx_register_operand")))] + "" +{ + rtx df_tmp = gen_reg_rtx (DFmode); + emit_insn (gen_extend<FP16:mode>df2 (df_tmp, operands[1])); + + /* convert_move handles things like conversion to Decimal types that + we don't have extenddfdd2 insns, so a call is made to do the + conversion. */ + convert_move (operands[0], df_tmp, 0); + DONE; +}) + +(define_expand "trunc<FP16_CONVERT:mode><FP16:mode>2" + [(set (match_operand:FP16 0 "vsx_register_operand") + (float_truncate:FP16 + (match_operand:FP16_CONVERT 1 "vsx_register_operand")))] + "" +{ + rtx df_tmp = gen_reg_rtx (DFmode); + + /* convert_move handles things like conversion from Decimal types + that we don't have truncdddf2 insns, so a call is made for + the conversion. */ + convert_move (df_tmp, operands[1], 0); + + emit_insn (gen_truncdf<FP16:mode>2 (operands[0], df_tmp)); + DONE; +}) + +;; Convert integers to 16-bit floating point modes. +(define_expand "float<GPR:mode><FP16:mode>2" + [(set (match_operand:FP16 0 "vsx_register_operand") + (float:FP16 + (match_operand:GPR 1 "nonimmediate_operand")))] + "" +{ + rtx df_tmp = gen_reg_rtx (DFmode); + emit_insn (gen_float<GPR:mode>df2 (df_tmp, operands[1])); + emit_insn (gen_truncdf<FP16:mode>2 (operands[0], df_tmp)); + DONE; +}) + +(define_expand "floatuns<GPR:mode><FP16:mode>2" + [(set (match_operand:FP16 0 "vsx_register_operand") + (unsigned_float:FP16 + (match_operand:GPR 1 "nonimmediate_operand")))] + "" +{ + rtx df_tmp = gen_reg_rtx (DFmode); + emit_insn (gen_floatuns<GPR:mode>df2 (df_tmp, operands[1])); + emit_insn (gen_truncdf<FP16:mode>2 (operands[0], df_tmp)); + DONE; +}) + +;; Convert 16-bit floating point modes to integers +(define_expand "fix_trunc<FP16:mode><GPR:mode>2" + [(set (match_operand:GPR 0 "vsx_register_operand") + (fix:GPR + (match_operand:FP16 1 "vsx_register_operand")))] + "" +{ + rtx df_tmp = gen_reg_rtx (DFmode); + emit_insn (gen_extend<FP16:mode>df2 (df_tmp, operands[1])); + emit_insn (gen_fix_truncdf<GPR:mode>2 (operands[0], df_tmp)); + DONE; +}) + +(define_expand "fixuns_trunc<FP16:mode><GPR:mode>2" + [(set (match_operand:GPR 0 "vsx_register_operand") + (unsigned_fix:GPR + (match_operand:FP16 1 "vsx_register_operand")))] + "" +{ + rtx df_tmp = gen_reg_rtx (DFmode); + emit_insn (gen_extend<FP16:mode>df2 (df_tmp, operands[1])); + emit_insn (gen_fixuns_truncdf<GPR:mode>2 (operands[0], df_tmp)); + DONE; +}) + ;; Conversions to and from floating-point. @@ -8145,6 +8330,61 @@ p9v, p9v, p9v, p9v, p9v, p9v, p9v, *, *, *")]) + +(define_expand "mov<mode>" + [(set (match_operand:FP16 0 "nonimmediate_operand") + (match_operand:FP16 1 "any_operand"))] + "" +{ + if (MEM_P (operands[0]) && !REG_P (operands[1])) + operands[1] = force_reg (<MODE>mode, operands[1]); +}) + +;; On power10, we can load up HFmode and BFmode constants with xxspltiw +;; or pli. +(define_insn "*mov<mode>_xxspltiw" + [(set (match_operand:FP16 0 "gpc_reg_operand" "=wa,r") + (match_operand:FP16 1 "fp16_xxspltiw_constant" "eP,eP"))] + "TARGET_POWER10 && TARGET_PREFIXED" +{ + rtx op1 = operands[1]; + const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op1); + long real_words[VECTOR_128BIT_WORDS]; + + real_to_target (real_words, rtype, <MODE>mode); + operands[2] = GEN_INT (real_words[0]); + return (vsx_register_operand (operands[0], <MODE>mode) + ? "xxspltiw %x0,%2" + : "li %0,%2"); +} + [(set_attr "type" "vecperm,*") + (set_attr "prefixed" "yes")]) + +(define_insn "*mov<mode>_internal" + [(set (match_operand:FP16 0 "nonimmediate_operand" + "=wa, wa, Z, r, r, + m, r, wa, wa, r") + + (match_operand:FP16 1 "any_operand" + "wa, Z, wa, r, m, + r, wa, r, j, j"))] + "gpc_reg_operand (operands[0], <MODE>mode) + || gpc_reg_operand (operands[1], <MODE>mode)" + "@ + xxlor %x0,%x1,%x1 + lxsihzx %x0,%y1 + stxsihx %x1,%y0 + mr %0,%1 + lhz%U1%X1 %0,%1 + sth%U0%X0 %1,%0 + mfvsrwz %0,%x1 + mtvsrwz %x0,%1 + xxspltib %x0,0 + li %0,0" + [(set_attr "type" "vecsimple, fpload, fpstore, *, load, + store, mtvsr, mfvsr, vecsimple, *")]) + + ;; Here is how to move condition codes around. When we store CC data in ;; an integer register or memory, we store just the high-order 4 bits. diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 31852e02aa0f..1db4e28508af 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -638,6 +638,18 @@ mieee128-constant Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save Generate (do not generate) code that uses the LXVKQ instruction. +mieee16 +Target Undocumented Mask(IEEE16) Var(rs6000_isa_flags) +Enable or disable _Float16 support. + +mieee16-gpr-args +Target Undocumented Var(TARGET_IEEE16_GPR_ARGS) Init(1) Save +Pass _Float16 in GPR registers. + +mbfloat16 +Target Undocumented Mask(BFLOAT16) Var(rs6000_isa_flags) +Enable or disable __bfloat16 support. + ; Documented parameters -param=rs6000-vect-unroll-limit= diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index f5797387ca79..0a9f092c1951 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -50,11 +50,31 @@ (define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF]) ;; Vector logical modes -(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI KF TF]) +(define_mode_iterator VEC_L [V16QI + V8HI + V8BF + V8HF + V4SI + V2DI + V4SF + V2DF + V1TI + TI + KF + TF]) ;; Vector modes for moves. Don't do TImode or TFmode here, since their ;; moves are handled elsewhere. -(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI KF]) +(define_mode_iterator VEC_M [V16QI + V8HI + V4SI + V2DI + V8BF + V8HF + V4SF + V2DF + V1TI + KF]) ;; Vector modes for types that don't need a realignment under VSX (define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF V1TI KF TF]) @@ -63,7 +83,14 @@ (define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF V1TI]) ;; Vector init/extract modes -(define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF]) +(define_mode_iterator VEC_E [V16QI + V8HI + V4SI + V2DI + V8BF + V8HF + V4SF + V2DF]) ;; Vector modes for 64-bit base types (define_mode_iterator VEC_64 [V2DI V2DF]) @@ -76,6 +103,8 @@ (V8HI "HI") (V4SI "SI") (V2DI "DI") + (V8BF "BF") + (V8HF "HF") (V4SF "SF") (V2DF "DF") (V1TI "TI") @@ -86,6 +115,8 @@ (V8HI "hi") (V4SI "si") (V2DI "di") + (V8BF "bf") + (V8HF "hf") (V4SF "sf") (V2DF "df") (V1TI "ti") @@ -1191,6 +1222,21 @@ DONE; }) +(define_expand "vec_pack_trunc_v4sf" + [(match_operand:V8HF 0 "vfloat_operand") + (match_operand:V4SF 1 "vfloat_operand") + (match_operand:V4SF 2 "vfloat_operand")] + "TARGET_IEEE16" +{ + rtx r1 = gen_reg_rtx (V8HFmode); + rtx r2 = gen_reg_rtx (V8HFmode); + + emit_insn (gen_vsx_xvcvsphp_v8hf (r1, operands[1])); + emit_insn (gen_vsx_xvcvsphp_v8hf (r2, operands[2])); + rs6000_expand_extract_even (operands[0], r1, r2); + DONE; +}) + ;; Convert single word types to double word (define_expand "vec_unpacks_hi_v4sf" [(match_operand:V2DF 0 "vfloat_operand") @@ -1264,6 +1310,18 @@ DONE; }) +(define_expand "vec_unpacks_hi_v8hf" + [(match_operand:V4SF 0 "vfloat_operand") + (match_operand:V8HF 1 "vfloat_operand")] + "TARGET_IEEE16" +{ + rtx reg = gen_reg_rtx (V8HFmode); + + rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN); + emit_insn (gen_vsx_xvcvhpsp (operands[0], reg)); + DONE; +}) + ;; Align vector loads with a permute. (define_expand "vec_realign_load_<mode>" diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index dd3573b80868..111457b8fe2e 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -46,9 +46,16 @@ ;; Iterator for vector floating point types supported by VSX (define_mode_iterator VSX_F [V4SF V2DF]) +;; Iterator for 8 element vectors +(define_mode_iterator VECTOR_16BIT [V8HI + (V8BF "TARGET_BFLOAT16") + (V8HF "TARGET_IEEE16")]) + ;; Iterator for logical types supported by VSX (define_mode_iterator VSX_L [V16QI V8HI + (V8BF "TARGET_BFLOAT16") + (V8HF "TARGET_IEEE16") V4SI V2DI V4SF @@ -61,6 +68,8 @@ ;; Iterator for memory moves. (define_mode_iterator VSX_M [V16QI V8HI + (V8BF "TARGET_BFLOAT16") + (V8HF "TARGET_IEEE16") V4SI V2DI V4SF @@ -71,6 +80,8 @@ TI]) (define_mode_attr VSX_XXBR [(V8HI "h") + (V8BF "h") + (V8HF "h") (V4SI "w") (V4SF "w") (V2DF "d") @@ -80,6 +91,8 @@ ;; Map into the appropriate load/store name based on the type (define_mode_attr VSm [(V16QI "vw4") (V8HI "vw4") + (V8BF "vw4") + (V8HF "vw4") (V4SI "vw4") (V4SF "vw4") (V2DF "vd2") @@ -93,6 +106,8 @@ ;; Map the register class used (define_mode_attr VSr [(V16QI "v") (V8HI "v") + (V8BF "v") + (V8HF "v") (V4SI "v") (V4SF "wa") (V2DI "wa") @@ -108,6 +123,8 @@ ;; What value we need in the "isa" field, to make the IEEE QP float work. (define_mode_attr VSisa [(V16QI "*") (V8HI "*") + (V8BF "p10") + (V8HF "p9v") (V4SI "*") (V4SF "*") (V2DI "*") @@ -124,6 +141,8 @@ ;; integer modes. (define_mode_attr ??r [(V16QI "??r") (V8HI "??r") + (V8BF "??r") + (V8HF "??r") (V4SI "??r") (V4SF "??r") (V2DI "??r") @@ -136,6 +155,8 @@ ;; A mode attribute used for 128-bit constant values. (define_mode_attr nW [(V16QI "W") (V8HI "W") + (V8BF "W") + (V8HF "W") (V4SI "W") (V4SF "W") (V2DI "W") @@ -163,6 +184,8 @@ ;; operation (define_mode_attr VSv [(V16QI "v") (V8HI "v") + (V8BF "v") + (V8HF "v") (V4SI "v") (V4SF "v") (V2DI "v") @@ -396,6 +419,8 @@ ;; Like VM2 in altivec.md, just do char, short, int, long, float and double (define_mode_iterator VM3 [V4SI V8HI + V8BF + V8HF V16QI V4SF V2DF @@ -407,6 +432,8 @@ (define_mode_attr VM3_char [(V2DI "d") (V4SI "w") (V8HI "h") + (V8BF "h") + (V8HF "h") (V16QI "b") (V2DF "d") (V4SF "w")]) @@ -541,21 +568,21 @@ [(set_attr "type" "vecload") (set_attr "length" "8")]) -(define_insn_and_split "*vsx_le_perm_load_v8hi" - [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") - (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))] +(define_insn_and_split "*vsx_le_perm_load_<mode>" + [(set (match_operand:VECTOR_16BIT 0 "vsx_register_operand" "=wa") + (match_operand:VECTOR_16BIT 1 "indexed_or_indirect_operand" "Z"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" "&& 1" [(set (match_dup 2) - (vec_select:V8HI + (vec_select:VECTOR_16BIT (match_dup 1) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)]))) (set (match_dup 0) - (vec_select:V8HI + (vec_select:VECTOR_16BIT (match_dup 2) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) @@ -802,27 +829,27 @@ (const_int 0) (const_int 1)])))] "") -(define_insn "*vsx_le_perm_store_v8hi" - [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z") - (match_operand:V8HI 1 "vsx_register_operand" "wa"))] +(define_insn "*vsx_le_perm_store_<mode>" + [(set (match_operand:VECTOR_16BIT 0 "indexed_or_indirect_operand" "=Z") + (match_operand:VECTOR_16BIT 1 "vsx_register_operand" "wa"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" [(set_attr "type" "vecstore") (set_attr "length" "12")]) (define_split - [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") - (match_operand:V8HI 1 "vsx_register_operand"))] + [(set (match_operand:VECTOR_16BIT 0 "indexed_or_indirect_operand") + (match_operand:VECTOR_16BIT 1 "vsx_register_operand"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" [(set (match_dup 2) - (vec_select:V8HI + (vec_select:VECTOR_16BIT (match_dup 1) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)]))) (set (match_dup 0) - (vec_select:V8HI + (vec_select:VECTOR_16BIT (match_dup 2) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) @@ -861,25 +888,25 @@ ;; The post-reload split requires that we re-permute the source ;; register in case it is still live. (define_split - [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") - (match_operand:V8HI 1 "vsx_register_operand"))] + [(set (match_operand:VECTOR_16BIT 0 "indexed_or_indirect_operand") + (match_operand:VECTOR_16BIT 1 "vsx_register_operand"))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" [(set (match_dup 1) - (vec_select:V8HI + (vec_select:VECTOR_16BIT (match_dup 1) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)]))) (set (match_dup 0) - (vec_select:V8HI + (vec_select:VECTOR_16BIT (match_dup 1) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)]))) (set (match_dup 1) - (vec_select:V8HI + (vec_select:VECTOR_16BIT (match_dup 1) (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) @@ -1434,15 +1461,15 @@ "lxvw4x %x0,%y1" [(set_attr "type" "vecload")]) -(define_expand "vsx_ld_elemrev_v8hi" - [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") - (vec_select:V8HI - (match_operand:V8HI 1 "memory_operand" "Z") +(define_expand "vsx_ld_elemrev_<mode>" + [(set (match_operand:VECTOR_16BIT 0 "vsx_register_operand" "=wa") + (vec_select:VECTOR_16BIT + (match_operand:VECTOR_16BIT 1 "memory_operand" "Z") (parallel [(const_int 7) (const_int 6) (const_int 5) (const_int 4) (const_int 3) (const_int 2) (const_int 1) (const_int 0)])))] - "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" + "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN" { if (!TARGET_P9_VECTOR) { @@ -1452,9 +1479,9 @@ unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; int i; - subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0); + subreg = simplify_gen_subreg (V4SImode, operands[1], <MODE>mode, 0); emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); - subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0); + subreg2 = simplify_gen_subreg (<MODE>mode, tmp, V4SImode, 0); for (i = 0; i < 16; ++i) perm[i] = GEN_INT (reorder[i]); @@ -1462,21 +1489,21 @@ pcv = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm))); - emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2, - subreg2, pcv)); + emit_insn (gen_altivec_vperm_<mode>_direct (operands[0], subreg2, + subreg2, pcv)); DONE; } }) -(define_insn "*vsx_ld_elemrev_v8hi_internal" - [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") - (vec_select:V8HI - (match_operand:V8HI 1 "memory_operand" "Z") +(define_insn "*vsx_ld_elemrev_<mode>_internal" + [(set (match_operand:VECTOR_16BIT 0 "vsx_register_operand" "=wa") + (vec_select:VECTOR_16BIT + (match_operand:VECTOR_16BIT 1 "memory_operand" "Z") (parallel [(const_int 7) (const_int 6) (const_int 5) (const_int 4) (const_int 3) (const_int 2) (const_int 1) (const_int 0)])))] - "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" + "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" "lxvh8x %x0,%y1" [(set_attr "type" "vecload")]) @@ -1584,20 +1611,20 @@ "stxvw4x %x1,%y0" [(set_attr "type" "vecstore")]) -(define_expand "vsx_st_elemrev_v8hi" - [(set (match_operand:V8HI 0 "memory_operand" "=Z") - (vec_select:V8HI - (match_operand:V8HI 1 "vsx_register_operand" "wa") +(define_expand "vsx_st_elemrev_<mode>" + [(set (match_operand:VECTOR_16BIT 0 "memory_operand" "=Z") + (vec_select:VECTOR_16BIT + (match_operand:VECTOR_16BIT 1 "vsx_register_operand" "wa") (parallel [(const_int 7) (const_int 6) (const_int 5) (const_int 4) (const_int 3) (const_int 2) (const_int 1) (const_int 0)])))] - "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" + "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN" { if (!TARGET_P9_VECTOR) { rtx mem_subreg, subreg, perm[16], pcv; - rtx tmp = gen_reg_rtx (V8HImode); + rtx tmp = gen_reg_rtx (<MODE>mode); /* 2 is leftmost element in register */ unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; int i; @@ -1608,10 +1635,10 @@ pcv = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm))); - emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1], - operands[1], pcv)); - subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0); - mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0); + emit_insn (gen_altivec_vperm_<mode>_direct (tmp, operands[1], + operands[1], pcv)); + subreg = simplify_gen_subreg (V4SImode, tmp, <MODE>mode, 0); + mem_subreg = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0); emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg)); DONE; } @@ -1626,15 +1653,15 @@ "stxvd2x %x1,%y0" [(set_attr "type" "vecstore")]) -(define_insn "*vsx_st_elemrev_v8hi_internal" - [(set (match_operand:V8HI 0 "memory_operand" "=Z") - (vec_select:V8HI - (match_operand:V8HI 1 "vsx_register_operand" "wa") +(define_insn "*vsx_st_elemrev_<mode>_internal" + [(set (match_operand:VECTOR_16BIT 0 "memory_operand" "=Z") + (vec_select:VECTOR_16BIT + (match_operand:VECTOR_16BIT 1 "vsx_register_operand" "wa") (parallel [(const_int 7) (const_int 6) (const_int 5) (const_int 4) (const_int 3) (const_int 2) (const_int 1) (const_int 0)])))] - "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" + "VECTOR_MEM_VSX_P (<MODE>mode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" "stxvh8x %x1,%y0" [(set_attr "type" "vecstore")]) @@ -2363,6 +2390,14 @@ "xscvdpsp %x0,%x1" [(set_attr "type" "fp")]) +(define_insn "vsx_xscvdpsp_sf" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa") + (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f,wa")] + UNSPEC_VSX_CVSPDP))] + "VECTOR_UNIT_VSX_P (DFmode)" + "xscvdpsp %x0,%x1" + [(set_attr "type" "fp")]) + (define_insn "vsx_xvcvspdp_be" [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa") (float_extend:V2DF @@ -2420,6 +2455,7 @@ [(set_attr "type" "fp")]) ;; Generate xvcvhpsp instruction +;; Used for the built-in function (define_insn "vsx_xvcvhpsp" [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")] @@ -2428,7 +2464,17 @@ "xvcvhpsp %x0,%x1" [(set_attr "type" "vecfloat")]) +;; Used for conversion to/from _Float16 +(define_insn "vsx_xvcvhpsp_v8hf" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (unspec:V4SF [(match_operand:V8HF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVHPSP))] + "TARGET_P9_VECTOR" + "xvcvhpsp %x0,%x1" + [(set_attr "type" "vecfloat")]) + ;; Generate xvcvsphp +;; Used for the built-in function (define_insn "vsx_xvcvsphp" [(set (match_operand:V4SI 0 "register_operand" "=wa") (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")] @@ -2437,6 +2483,15 @@ "xvcvsphp %x0,%x1" [(set_attr "type" "vecfloat")]) +;; Used for conversion to/from _Float16 +(define_insn "vsx_xvcvsphp_v8hf" + [(set (match_operand:V8HF 0 "register_operand" "=wa") + (unspec:V8HF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_XVCVSPHP))] + "TARGET_P9_VECTOR" + "xvcvsphp %x0,%x1" +[(set_attr "type" "vecfloat")]) + ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF ;; format of scalars is actually DF. (define_insn "vsx_xscvdpsp_scalar" @@ -2456,6 +2511,14 @@ "xscvdpspn %x0,%x1" [(set_attr "type" "fp")]) +(define_insn "vsx_xscvdpspn_sf" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVDPSPN))] + "TARGET_XSCVDPSPN" + "xscvdpspn %x0,%x1" + [(set_attr "type" "fp")]) + (define_insn "vsx_xscvspdpn" [(set (match_operand:DF 0 "vsx_register_operand" "=wa") (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] @@ -2464,6 +2527,14 @@ "xscvspdpn %x0,%x1" [(set_attr "type" "fp")]) +(define_insn "vsx_xscvspdpn_sf" + [(set (match_operand:SF 0 "vsx_register_operand" "=wa") + (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVSPDPN))] + "TARGET_XSCVSPDPN" + "xscvspdpn %x0,%x1" + [(set_attr "type" "fp")]) + (define_insn "vsx_xscvdpspn_scalar" [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")] @@ -3299,10 +3370,10 @@ "xxpermdi %x0,%x1,%x1,2" [(set_attr "type" "vecperm")]) -(define_insn "xxswapd_v8hi" - [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") - (vec_select:V8HI - (match_operand:V8HI 1 "vsx_register_operand" "wa") +(define_insn "xxswapd_<mode>" + [(set (match_operand:VECTOR_16BIT 0 "vsx_register_operand" "=wa") + (vec_select:VECTOR_16BIT + (match_operand:VECTOR_16BIT 1 "vsx_register_operand" "wa") (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) @@ -3402,15 +3473,15 @@ "lxvd2x %x0,%y1" [(set_attr "type" "vecload")]) -(define_insn "*vsx_lxvd2x8_le_V8HI" - [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") - (vec_select:V8HI - (match_operand:V8HI 1 "memory_operand" "Z") +(define_insn "*vsx_lxvd2x8_le_<MODE>" + [(set (match_operand:VECTOR_16BIT 0 "vsx_register_operand" "=wa") + (vec_select:VECTOR_16BIT + (match_operand:VECTOR_16BIT 1 "memory_operand" "Z") (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)])))] - "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" "lxvd2x %x0,%y1" [(set_attr "type" "vecload")]) @@ -3478,15 +3549,15 @@ [(set_attr "type" "vecstore") (set_attr "length" "8")]) -(define_insn "*vsx_stxvd2x8_le_V8HI" - [(set (match_operand:V8HI 0 "memory_operand" "=Z") - (vec_select:V8HI - (match_operand:V8HI 1 "vsx_register_operand" "wa") +(define_insn "*vsx_stxvd2x8_le_<MODE>" + [(set (match_operand:VECTOR_16BIT 0 "memory_operand" "=Z") + (vec_select:VECTOR_16BIT + (match_operand:VECTOR_16BIT 1 "vsx_register_operand" "wa") (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7) (const_int 0) (const_int 1) (const_int 2) (const_int 3)])))] - "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" + "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" "stxvd2x %x1,%y0" [(set_attr "type" "vecstore")]) @@ -4060,7 +4131,8 @@ if (which_alternative == 0 && ((<MODE>mode == V16QImode && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 7 : 8)) - || (<MODE>mode == V8HImode + || ((<MODE>mode == V8HImode || <MODE>mode == V8HFmode + || <MODE>mode == V8BFmode) && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 3 : 4)))) { enum machine_mode dest_mode = GET_MODE (operands[0]); @@ -4139,7 +4211,8 @@ else vec_tmp = src; } - else if (<MODE>mode == V8HImode) + else if (<MODE>mode == V8HImode || <MODE>mode == V8HFmode + || <MODE>mode == V8BFmode) { if (value != 3) emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element)); @@ -6448,6 +6521,22 @@ "<xvcvbf16> %x0,%x1" [(set_attr "type" "vecfloat")]) +(define_insn "vsx_xvcvbf16spn_v8bf" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (unspec:V4SF [(match_operand:V8BF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_XVCVBF16SPN))] + "TARGET_BFLOAT16" + "xvcvbf16spn %x0,%x1" + [(set_attr "type" "vecfloat")]) + +(define_insn "vsx_xvcvspbf16_bf" + [(set (match_operand:BF 0 "vsx_register_operand" "=wa") + (unspec:BF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_XVCVSPBF16))] + "TARGET_BFLOAT16" + "xvcvspbf16 %x0,%x1" + [(set_attr "type" "vecfloat")]) + (define_insn "vec_mtvsrbmi" [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") (unspec:V16QI [(match_operand:QI 1 "u6bit_cint_operand" "n")]
