This patch adds a series of built-in functions to allow users to write code to do a number of simple operations where the loop is done using the __vector_pair type. The __vector_pair type is an opaque type. These built-in functions keep the two 128-bit vectors within the __vector_pair together, and split the operation after register allocation.
This patch provides vector pair operations for loading up a vector pair with all 0's, duplicated (splat) from a scalar type, or combining two vectors in a vector pair. This patch also provides vector pair builtins to extract one vector element of a vector pair. I have built and tested these patches on: * A little endian power10 server using --with-cpu=power10 * A little endian power9 server using --with-cpu=power9 * A big endian power9 server using --with-cpu=power9. Can I check this patch into the master branch after the preceeding patches have been checked in? 2023-11-09 Michael Meissner <meiss...@linux.ibm.com> gcc/ * config/rs6000/predicates.md (mma_assemble_input_operand): Allow any 16-byte vector, not just V16QImode. * config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New vector pair initialization built-in functions. (__builtin_vpair_*_assemble): Likeise. (__builtin_vpair_*_splat): Likeise. (__builtin_vpair_*_extract_vector): New vector pair extraction built-in functions. * config/rs6000/vector-pair.md (UNSPEC_VPAIR_V32QI): New unspec. (UNSPEC_VPAIR_V16HI): Likewise. (UNSPEC_VPAIR_V8SI): Likewise. (UNSPEC_VPAIR_V4DI): Likewise. (VP_INT_BINARY): New iterator for integer vector pair. (vp_insn): Add supoort for integer vector pairs. (vp_ireg): New code attribute for integer vector pairs. (vp_ipredicate): Likewise. (VP_INT): New int interator for integer vector pairs. (VP_VEC_MODE): Likewise. (vp_pmode): Likewise. (vp_vmode): Likewise. (vp_neg_reg): New int interator for integer vector pairs. (vpair_neg_<vp_pmode>): Add integer vector pair support insns. (vpair_not_<vp_pmode>2): Likewise. (vpair_<vp_insn>_<vp_pmode>3): Likewise. (vpair_andc_<vp_pmode): Likewise. (vpair_iorc_<vp_pmode>): Likewise. (vpair_nand_<vp_pmode>_1): Likewise. (vpair_nand_<vp_pmode>_2): Likewise. (vpair_nor_<vp_pmode>_1): Likewise. (vpair_nor_<vp_pmode>_2): Likewise. * doc/extend.texi (PowerPC Vector Pair Built-in Functions): Document the integer vector pair built-in functions. gcc/testsuite/ * gcc.target/powerpc/vector-pair-5.c: New test. * gcc.target/powerpc/vector-pair-6.c: New test. * gcc.target/powerpc/vector-pair-7.c: New test. * gcc.target/powerpc/vector-pair-8.c: New test. --- gcc/config/rs6000/predicates.md | 2 +- gcc/config/rs6000/rs6000-builtins.def | 95 +++++++++ gcc/config/rs6000/vector-pair.md | 185 ++++++++++++++++++ gcc/doc/extend.texi | 44 +++++ .../gcc.target/powerpc/vector-pair-10.c | 86 ++++++++ .../gcc.target/powerpc/vector-pair-11.c | 84 ++++++++ .../gcc.target/powerpc/vector-pair-12.c | 156 +++++++++++++++ .../gcc.target/powerpc/vector-pair-13.c | 139 +++++++++++++ .../gcc.target/powerpc/vector-pair-14.c | 141 +++++++++++++ .../gcc.target/powerpc/vector-pair-15.c | 139 +++++++++++++ .../gcc.target/powerpc/vector-pair-9.c | 13 ++ 11 files changed, 1083 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/vector-pair-10.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vector-pair-11.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vector-pair-12.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vector-pair-13.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vector-pair-14.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vector-pair-15.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vector-pair-9.c diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index ef7d3f214c4..922a77716c4 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1301,7 +1301,7 @@ (define_predicate "splat_input_operand" ;; Return 1 if this operand is valid for a MMA assemble accumulator insn. (define_special_predicate "mma_assemble_input_operand" - (match_test "(mode == V16QImode + (match_test "(VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16 && (vsx_register_operand (op, mode) || (MEM_P (op) && (indexed_or_indirect_address (XEXP (op, 0), mode) diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 3b2db39c1ab..fbd416ceb87 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -4132,6 +4132,11 @@ void __builtin_vsx_stxvp (v256, unsigned long, const v256 *); STXVP nothing {mma,pair} +;; General vector pair built-in functions + + v256 __builtin_vpair_zero (); + VPAIR_ZERO vpair_zero {mma} + ;; vector pair built-in functions for 8 32-bit float values v256 __builtin_vpair_f32_abs (v256); @@ -4140,6 +4145,12 @@ v256 __builtin_vpair_f32_add (v256, v256); VPAIR_F32_ADD vpair_add_v8sf3 {mma,pair} + v256 __builtin_vpair_f32_assemble (vf, vf); + VPAIR_F32_ASSEMBLE vpair_assemble_v8sf {mma,pair} + + vf __builtin_vpair_f32_extract_vector (v256, const int<1>); + VPAIR_F32_EXTRACT_VECTOR vpair_extract_vector_v8sf {mma,pair} + v256 __builtin_vpair_f32_fma (v256, v256, v256); VPAIR_F32_FMA vpair_fma_v8sf4 {mma,pair} @@ -4155,6 +4166,9 @@ v256 __builtin_vpair_f32_neg (v256); VPAIR_F32_NEG vpair_neg_v8sf2 {mma,pair} + v256 __builtin_vpair_f32_splat (float); + VPAIR_F32_SPLAT vpair_splat_v8sf {mma,pair} + v256 __builtin_vpair_f32_sub (v256, v256); VPAIR_F32_SUB vpair_sub_v8sf3 {mma,pair} @@ -4166,6 +4180,12 @@ v256 __builtin_vpair_f64_add (v256, v256); VPAIR_F64_ADD vpair_add_v4df3 {mma,pair} +v256 __builtin_vpair_f64_assemble (vd, vd); + VPAIR_F64_ASSEMBLE vpair_assemble_v4df {mma,pair} + + vd __builtin_vpair_f64_extract_vector (v256, const int<1>); + VPAIR_F64_EXTRACT_VECTOR vpair_extract_vector_v4df {mma,pair} + v256 __builtin_vpair_f64_fma (v256, v256, v256); VPAIR_F64_FMA vpair_fma_v4df4 {mma,pair} @@ -4181,6 +4201,9 @@ v256 __builtin_vpair_f64_neg (v256); VPAIR_F64_NEG vpair_neg_v4df2 {mma,pair} + v256 __builtin_vpair_f64_splat (double); + VPAIR_F64_SPLAT vpair_splat_v4df {mma,pair} + v256 __builtin_vpair_f64_sub (v256, v256); VPAIR_F64_SUB vpair_sub_v4df3 {mma,pair} @@ -4193,6 +4216,12 @@ v256 __builtin_vpair_i8_and (v256, v256); VPAIR_I8_AND vpair_and_v32qi3 {mma,pair} + v256 __builtin_vpair_i8_assemble (vsc, vsc); + VPAIR_I8_ASSEMBLE vpair_assemble_v32qi {mma,pair} + + vsc __builtin_vpair_i8_extract_vector (v256, const int<1>); + VPAIR_I8_EXTRACT_VECTOR vpair_extract_vector_v32qi {mma,pair} + v256 __builtin_vpair_i8_ior (v256, v256); VPAIR_I8_IOR vpair_ior_v32qi3 {mma,pair} @@ -4208,18 +4237,30 @@ v256 __builtin_vpair_i8_not (v256); VPAIR_I8_NOT vpair_not_v32qi2 {mma,pair} + v256 __builtin_vpair_i8_splat (signed char); + VPAIR_I8_SPLAT vpair_splat_v32qi {mma,pair} + v256 __builtin_vpair_i8_sub (v256, v256); VPAIR_I8_SUB vpair_sub_v32qi3 {mma,pair} v256 __builtin_vpair_i8_xor (v256, v256); VPAIR_I8_XOR vpair_xor_v32qi3 {mma,pair} + v256 __builtin_vpair_i8u_assemble (vuc, vuc); + VPAIR_I8U_ASSEMBLE vpair_assemble_v32qi {mma,pair} + + vuc __builtin_vpair_i8u_extract_vector (v256, const int<1>); + VPAIR_I8U_EXTRACT_VECTOR vpair_extract_vector_v32qi {mma,pair} + v256 __builtin_vpair_i8u_max (v256, v256); VPAIR_I8U_MAX vpair_umax_v32qi3 {mma,pair} v256 __builtin_vpair_i8u_min (v256, v256); VPAIR_I8U_MIN vpair_umin_v32qi3 {mma,pair} + v256 __builtin_vpair_i8u_splat (unsigned char); + VPAIR_I8U_SPLAT vpair_splat_v32qi {mma,pair} + ;; vector pair built-in functions for 16 16-bit unsigned short or ;; signed short values @@ -4229,6 +4270,12 @@ v256 __builtin_vpair_i16_and (v256, v256); VPAIR_I16_AND vpair_and_v16hi3 {mma,pair} + v256 __builtin_vpair_i16_assemble (vss, vss); + VPAIR_I16_ASSEMBLE vpair_assemble_v16hi {mma,pair} + + vss __builtin_vpair_i16_extract_vector (v256, const int<1>); + VPAIR_I16_EXTRACT_VECTOR vpair_extract_vector_v16hi {mma,pair} + v256 __builtin_vpair_i16_ior (v256, v256); VPAIR_I16_IOR vpair_ior_v16hi3 {mma,pair} @@ -4244,18 +4291,30 @@ v256 __builtin_vpair_i16_not (v256); VPAIR_I16_NOT vpair_not_v16hi2 {mma,pair} + v256 __builtin_vpair_i16_splat (short); + VPAIR_I16_SPLAT vpair_splat_v16hi {mma,pair} + v256 __builtin_vpair_i16_sub (v256, v256); VPAIR_I16_SUB vpair_sub_v16hi3 {mma,pair} v256 __builtin_vpair_i16_xor (v256, v256); VPAIR_I16_XOR vpair_xor_v16hi3 {mma,pair} + v256 __builtin_vpair_i16u_assemble (vus, vus); + VPAIR_I16U_ASSEMBLE vpair_assemble_v16hi {mma,pair} + + vus __builtin_vpair_i16u_extract_vector (v256, const int<1>); + VPAIR_I16U_EXTRACT_VECTOR vpair_extract_vector_v16hi {mma,pair} + v256 __builtin_vpair_i16u_max (v256, v256); VPAIR_I16U_MAX vpair_umax_v16hi3 {mma,pair} v256 __builtin_vpair_i16u_min (v256, v256); VPAIR_I16U_MIN vpair_umin_v16hi3 {mma,pair} + v256 __builtin_vpair_i16u_splat (unsigned short); + VPAIR_I16U_SPLAT vpair_splat_v16hi {mma,pair} + ;; vector pair built-in functions for 8 32-bit unsigned int or ;; signed int values @@ -4265,6 +4324,12 @@ v256 __builtin_vpair_i32_and (v256, v256); VPAIR_I32_AND vpair_and_v8si3 {mma,pair} + v256 __builtin_vpair_i32_assemble (vsi, vsi); + VPAIR_I32_ASSEMBLE vpair_assemble_v8si {mma,pair} + + vsi __builtin_vpair_i32_extract_vector (v256, const int<1>); + VPAIR_I32_EXTRACT_VECTOR vpair_extract_vector_v8si {mma,pair} + v256 __builtin_vpair_i32_ior (v256, v256); VPAIR_I32_IOR vpair_ior_v8si3 {mma,pair} @@ -4280,18 +4345,30 @@ v256 __builtin_vpair_i32_not (v256); VPAIR_I32_NOT vpair_not_v8si2 {mma,pair} + v256 __builtin_vpair_i32_splat (int); + VPAIR_I32_SPLAT vpair_splat_v8si {mma,pair} + v256 __builtin_vpair_i32_sub (v256, v256); VPAIR_I32_SUB vpair_sub_v8si3 {mma,pair} v256 __builtin_vpair_i32_xor (v256, v256); VPAIR_I32_XOR vpair_xor_v8si3 {mma,pair} + v256 __builtin_vpair_i32u_assemble (vui, vui); + VPAIR_I32U_ASSEMBLE vpair_assemble_v8si {mma,pair} + + vui __builtin_vpair_i32u_extract_vector (v256, const int<1>); + VPAIR_I32U_EXTRACT_VECTOR vpair_extract_vector_v8si {mma,pair} + v256 __builtin_vpair_i32u_max (v256, v256); VPAIR_I32U_MAX vpair_umax_v8si3 {mma,pair} v256 __builtin_vpair_i32u_min (v256, v256); VPAIR_I32U_MIN vpair_umin_v8si3 {mma,pair} + v256 __builtin_vpair_i32u_splat (unsigned int); + VPAIR_I32U_SPLAT vpair_splat_v8si {mma,pair} + ;; vector pair built-in functions for 4 64-bit unsigned long long or ;; signed long long values @@ -4301,6 +4378,12 @@ v256 __builtin_vpair_i64_and (v256, v256); VPAIR_I64_AND vpair_and_v4di3 {mma,pair} + v256 __builtin_vpair_i64_assemble (vsll, vsll); + VPAIR_I64_ASSEMBLE vpair_assemble_v4di {mma,pair} + + vsll __builtin_vpair_i64_extract_vector (v256, const int<1>); + VPAIR_I64_EXTRACT_VECTOR vpair_extract_vector_v4di {mma,pair} + v256 __builtin_vpair_i64_ior (v256, v256); VPAIR_I64_IOR vpair_ior_v4di3 {mma,pair} @@ -4316,14 +4399,26 @@ v256 __builtin_vpair_i64_not (v256); VPAIR_I64_NOT vpair_not_v4di2 {mma,pair} + v256 __builtin_vpair_i64_splat (long long); + VPAIR_I64_SPLAT vpair_splat_v4di {mma,pair} + v256 __builtin_vpair_i64_sub (v256, v256); VPAIR_I64_SUB vpair_sub_v4di3 {mma,pair} v256 __builtin_vpair_i64_xor (v256, v256); VPAIR_I64_XOR vpair_xor_v4di3 {mma,pair} + v256 __builtin_vpair_i64u_assemble (vull, vull); + VPAIR_I64U_ASSEMBLE vpair_assemble_v4di {mma,pair} + + vull __builtin_vpair_i64u_extract_vector (v256, const int<1>); + VPAIR_I64U_EXTRACT_VECTOR vpair_extract_vector_v4di {mma,pair} + v256 __builtin_vpair_i64u_max (v256, v256); VPAIR_I64U_MAX vpair_umax_v4di3 {mma,pair} v256 __builtin_vpair_i64u_min (v256, v256); VPAIR_I64U_MIN vpair_umin_v4di3 {mma,pair} + + v256 __builtin_vpair_i64u_splat (unsigned long long); + VPAIR_I64U_SPLAT vpair_splat_v4di {mma,pair} diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md index cd14430f47a..f6d0b2a39fc 100644 --- a/gcc/config/rs6000/vector-pair.md +++ b/gcc/config/rs6000/vector-pair.md @@ -33,6 +33,8 @@ (define_c_enum "unspec" UNSPEC_VPAIR_V16HI UNSPEC_VPAIR_V8SI UNSPEC_VPAIR_V4DI + UNSPEC_VPAIR_ZERO + UNSPEC_VPAIR_SPLAT ]) ;; Iterator doing unary/binary arithmetic on vector pairs @@ -93,6 +95,13 @@ (define_int_iterator VP_INT [UNSPEC_VPAIR_V4DI UNSPEC_VPAIR_V16HI UNSPEC_VPAIR_V32QI]) +(define_int_iterator VP_ALL [UNSPEC_VPAIR_V4DF + UNSPEC_VPAIR_V8SF + UNSPEC_VPAIR_V4DI + UNSPEC_VPAIR_V8SI + UNSPEC_VPAIR_V16HI + UNSPEC_VPAIR_V32QI]) + ;; Map VP_* to vector mode of the arguments after they are split (define_int_attr VP_VEC_MODE [(UNSPEC_VPAIR_V4DF "V2DF") (UNSPEC_VPAIR_V8SF "V4SF") @@ -126,6 +135,182 @@ (define_int_attr vp_neg_reg [(UNSPEC_VPAIR_V32QI "&v") (UNSPEC_VPAIR_V8SI "X") (UNSPEC_VPAIR_V4DI "X")]) +;; Moddes of the vector element to splat to vector pair +(define_mode_iterator VP_SPLAT [DF SF DI SI HI QI]) + +;; Moddes of the vector to splat to vector pair +(define_mode_iterator VP_SPLAT_VEC [V2DF V4SF V2DI V4SI V8HI V16QI]) + +;; MAP VP_SPLAT and VP_SPLAT_VEC to the mode of the vector pair operation +(define_mode_attr vp_splat_pmode [(DF "v4df") + (V2DF "v4df") + (SF "v8sf") + (V4SF "v8sf") + (DI "v4di") + (V2DI "v4di") + (SI "v8si") + (V4SI "v8si") + (HI "v16hi") + (V8HI "v16hi") + (QI "v32qi") + (V16QI "v32qi")]) + +;; MAP VP_SPLAT to the mode of the vector containing the element +(define_mode_attr VP_SPLAT_VMODE [(DF "V2DF") + (SF "V4SF") + (DI "V2DI") + (SI "V4SI") + (HI "V8HI") + (QI "V16QI")]) + +;; Initialize a vector pair to 0 +(define_insn_and_split "vpair_zero" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 1) (match_dup 3)) + (set (match_dup 2) (match_dup 3))] +{ + rtx op0 = operands[0]; + unsigned offset_hi = (WORDS_BIG_ENDIAN) ? 0 : 16; + unsigned offset_lo = (WORDS_BIG_ENDIAN) ? 16 : 0; + + operands[1] = simplify_gen_subreg (V2DImode, op0, OOmode, offset_hi); + operands[2] = simplify_gen_subreg (V2DImode, op0, OOmode, offset_lo); + operands[3] = CONST0_RTX (V2DImode); +} + [(set_attr "length" "8")]) + +;; Assemble a vector pair from two vectors. Unlike +;; __builtin_mma_assemble_pair, this function produces a vector pair output +;; directly and it takes all of the vector types. +;; +;; We cannot update the two output registers atomically, so mark the output as +;; an early clobber so we don't accidentally clobber the input operands. */ + +(define_insn_and_split "vpair_assemble_<vp_pmode>" + [(set (match_operand:OO 0 "vsx_register_operand" "=&wa") + (unspec:OO + [(match_operand:<VP_VEC_MODE> 1 "mma_assemble_input_operand" "mwa") + (match_operand:<VP_VEC_MODE> 2 "mma_assemble_input_operand" "mwa")] + VP_ALL))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx src = gen_rtx_UNSPEC (OOmode, + gen_rtvec (2, operands[1], operands[2]), + UNSPEC_VSX_ASSEMBLE); + rs6000_split_multireg_move (operands[0], src); + DONE; +} + [(set_attr "length" "8")]) + +;; Extract one of the two 128-bit vectors from a vector pair. +(define_insn_and_split "vpair_extract_vector_<vp_pmode>" + [(set (match_operand:<VP_VEC_MODE> 0 "vsx_register_operand" "=wa") + (unspec:<VP_VEC_MODE> + [(match_operand:OO 1 "vsx_register_operand" "wa") + (match_operand 2 "const_0_to_1_operand" "n")] + VP_ALL))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 3))] +{ + machine_mode vmode = <VP_VEC_MODE>mode; + unsigned reg_num = UINTVAL (operands[2]); + if (!WORDS_BIG_ENDIAN) + reg_num = 1 - reg_num; + + operands[3] = simplify_gen_subreg (vmode, operands[1], OOmode, reg_num * 16); +}) + +;; Optimize extracting an 128-bit vector from a vector pair in memory. +(define_insn_and_split "*vpair_extract_vector_<vp_pmode>_mem" + [(set (match_operand:<VP_VEC_MODE> 0 "vsx_register_operand" "=wa") + (unspec:<VP_VEC_MODE> + [(match_operand:OO 1 "memory_operand" "o") + (match_operand 2 "const_0_to_1_operand" "n")] + VP_ALL))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 3))] +{ + operands[3] = adjust_address (operands[1], <VP_VEC_MODE>mode, + 16 * INTVAL (operands[2])); +} + [(set_attr "type" "vecload")]) + +;; Create a vector pair with a value splat'ed (duplicated) to all of the +;; elements. +(define_expand "vpair_splat_<vp_splat_pmode>" + [(use (match_operand:OO 0 "vsx_register_operand")) + (use (match_operand:VP_SPLAT 1 "input_operand"))] + "TARGET_MMA" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + machine_mode element_mode = <MODE>mode; + machine_mode vector_mode = <VP_SPLAT_VMODE>mode; + + if (op1 == CONST0_RTX (element_mode)) + { + emit_insn (gen_vpair_zero (op0)); + DONE; + } + + rtx vec = gen_reg_rtx (vector_mode); + unsigned num_elements = GET_MODE_NUNITS (vector_mode); + rtvec elements = rtvec_alloc (num_elements); + for (size_t i = 0; i < num_elements; i++) + RTVEC_ELT (elements, i) = copy_rtx (op1); + + rs6000_expand_vector_init (vec, gen_rtx_PARALLEL (vector_mode, elements)); + emit_insn (gen_vpair_splat_<vp_splat_pmode>_internal (op0, vec)); + DONE; +}) + +;; Inner splat support. Operand1 is the vector splat created above. Allow +;; operand 1 to overlap with the output registers to eliminate one move +;; instruction. +(define_insn_and_split "vpair_splat_<vp_splat_pmode>_internal" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(match_operand:VP_SPLAT_VEC 1 "vsx_register_operand" "0,wa")] + UNSPEC_VPAIR_SPLAT))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op0_vector0 = simplify_gen_subreg (<MODE>mode, op0, OOmode, 0); + rtx op0_vector1 = simplify_gen_subreg (<MODE>mode, op0, OOmode, 16); + + /* Check if the input is one of the output registers. */ + if (rtx_equal_p (op0_vector0, op1)) + emit_move_insn (op0_vector1, op1); + + else if (rtx_equal_p (op0_vector1, op1)) + emit_move_insn (op0_vector0, op1); + + else + { + emit_move_insn (op0_vector0, op1); + emit_move_insn (op0_vector1, op1); + } + + DONE; +} + [(set_attr "length" "*,8") + (set_attr "type" "vecmove")]) + ;; Vector pair floating point unary operations (define_insn_and_split "vpair_<vp_insn>_<vp_pmode>2" diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index ff7918c7a58..600e2c393db 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -21386,17 +21386,27 @@ two 128-bit vectors stored in the vector pair. The @code{__vector_pair} type is usually stored with a single vector pair store instruction. +The following built-in functions are independent on the type of the +underlying vector: + +@smallexample +__vector_pair __builtin_vpair_zero (); +@end smallexample + The following built-in functions operate on pairs of @code{vector float} values: @smallexample __vector_pair __builtin_vpair_f32_abs (__vector_pair); __vector_pair __builtin_vpair_f32_add (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_assemble (vector float, vector float); +vector float __builtin_vpair_f32_extract_vector (__vector_pair, int); __vector_pair __builtin_vpair_f32_fma (__vector_pair, __vector_pair, __vector_pair); __vector_pair __builtin_vpair_f32_max (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_f32_min (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_f32_mul (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_f32_neg (__vector_pair); +__vector_pair __builtin_vpair_f32_splat (float); __vector_pair __builtin_vpair_f32_sub (__vector_pair, __vector_pair); @end smallexample @@ -21406,11 +21416,14 @@ The following built-in functions operate on pairs of @smallexample __vector_pair __builtin_vpair_f64_abs (__vector_pair); __vector_pair __builtin_vpair_f64_add (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_assemble (vector double, vector double); +vector double __builtin_vpair_f64_extract_vector (__vector_pair, int); __vector_pair __builtin_vpair_f64_fma (__vector_pair, __vector_pair, __vector_pair); __vector_pair __builtin_vpair_f64_mul (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_f64_neg (__vector_pair); __vector_pair __builtin_vpair_f64_max (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_f64_min (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_splat (double); __vector_pair __builtin_vpair_f64_sub (__vector_pair, __vector_pair); @end smallexample @@ -21420,16 +21433,24 @@ The following built-in functions operate on pairs of @smallexample __vector_pair __builtin_vpair_i64_add (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i64_and (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i64_assemble (vector long long, + vector long long); +vector long long __builtin_vpair_i64_extract_vector (__vector_pair, int); __vector_pair __builtin_vpair_i64_ior (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i64_max (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i64_min (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i64_neg (__vector_pair); __vector_pair __builtin_vpair_i64_not (__vector_pair); +__vector_pair __builtin_vpair_i64_splat (long long); __vector_pair __builtin_vpair_i64_sub (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i64_xor (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i64u_assemble (vector unsigned long long, + vector unsigned long long); +vector unsigned long long __builtin_vpair_i64u_extract_vector (__vector_pair, int); __vector_pair __builtin_vpair_i64u_max (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i64u_min (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i64u_splat (unsigned long long); @end smallexample The following built-in functions operate on pairs of @@ -21438,16 +21459,23 @@ The following built-in functions operate on pairs of @smallexample __vector_pair __builtin_vpair_i32_add (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i32_and (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i32_assemble (vector int, vector int); +vector int __builtin_vpair_i32_extract_vector (__vector_pair, int); __vector_pair __builtin_vpair_i32_ior (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i32_neg (__vector_pair); __vector_pair __builtin_vpair_i32_not (__vector_pair); __vector_pair __builtin_vpair_i32_max (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i32_min (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i32_splat (int); __vector_pair __builtin_vpair_i32_sub (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i32_xor (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i32u_assemble (vector unsigned int, + vector unsigned int); +vector unsigned int __builtin_vpair_i32u_extract_vector (__vector_pair, int); __vector_pair __builtin_vpair_i32u_max (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i32u_min (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i32u_splat (unsigned int); @end smallexample The following built-in functions operate on pairs of @@ -21456,6 +21484,10 @@ The following built-in functions operate on pairs of @smallexample __vector_pair __builtin_vpair_i16_add (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i16_and (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i16_assemble (vector short, + vector short); +__vector_pair __builtin_vpair_i16_splat (short); +vector short __builtin_vpair_i16_extract_vector (__vector_pair, int); __vector_pair __builtin_vpair_i16_ior (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i16_max (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i16_min (__vector_pair, __vector_pair); @@ -21464,6 +21496,10 @@ __vector_pair __builtin_vpair_i16_not (__vector_pair); __vector_pair __builtin_vpair_i16_sub (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i16_xor (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i16u_assemble (vector unsigned short, + vector unsigned short); +vector unsigned short __builtin_vpair_i16u_extract_vector (__vector_pair, int); +__vector_pair __builtin_vpair_i16u_splat (unsigned short); __vector_pair __builtin_vpair_i16u_max (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i16u_min (__vector_pair, __vector_pair); @end smallexample @@ -21474,6 +21510,10 @@ The following built-in functions operate on pairs of @smallexample __vector_pair __builtin_vpair_i8_add (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i8_and (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i8_assemble (vector signed char, + vector signed char); +vector signed char __builtin_vpair_i8_extract_vector (__vector_pair, int); +__vector_pair __builtin_vpair_i8_splat (signed char); __vector_pair __builtin_vpair_i8_ior (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i8_max (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i8_min (__vector_pair, __vector_pair); @@ -21482,8 +21522,12 @@ __vector_pair __builtin_vpair_i8_not (__vector_pair); __vector_pair __builtin_vpair_i8_sub (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i8_xor (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i8u_assemble (vector unsigned char, + vector unsigned char4); +vector unsigned char __builtin_vpair_i8u_extract_vector (__vector_pair, int); __vector_pair __builtin_vpair_i8_umax (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_i8_umin (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i8u_splat (unsigned char); @end smallexample @node PowerPC Hardware Transactional Memory Built-in Functions diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-10.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-10.c new file mode 100644 index 00000000000..df1c4019245 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-10.c @@ -0,0 +1,86 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test the vector pair built-in functions for creation and extraction of + vector pair operations using 32-bit floats. */ + +void +test_f32_splat_0 (__vector_pair *p) +{ + /* 2 xxspltib, 1 stxvp. */ + *p = __builtin_vpair_f32_splat (0.0f); +} + +void +test_f32_splat_1 (__vector_pair *p) +{ + /* 1 xxspltiw, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_f32_splat (1.0f); +} + +void +test_f32_splat_var (__vector_pair *p, + float f) +{ + /* 1 xscvdpspn, 1 xxspltw, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_f32_splat (f); +} + +void +test_f32_splat_mem (__vector_pair *p, + float *q) +{ + /* 1 lxvwsx, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_f32_splat (*q); +} + +void +test_f32_assemble (__vector_pair *p, + vector float v1, + vector float v2) +{ + /* 2 xxlor, 1 stxvp. */ + *p = __builtin_vpair_f32_assemble (v1, v2); +} + +vector float +test_f32_extract_0_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_f32_extract_vector (vp, 0); +} + +vector float +test_f32_extract_1_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_f32_extract_vector (vp, 0); +} + +vector float +test_f32_extract_0_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_f32_extract_vector (p[1], 0); +} + +vector float +test_f32_extract_1_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_f32_extract_vector (p[2], 1); +} + +/* { dg-final { scan-assembler-times {\mlxv\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mlxvwsx\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 5 } } */ +/* { dg-final { scan-assembler-times {\mxscvdpspn\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxxspltw\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-11.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-11.c new file mode 100644 index 00000000000..397d7f60f45 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-11.c @@ -0,0 +1,84 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test the vector pair built-in functions for creation and extraction of + vector pair operations using 64-bit doubles. */ + +void +test_f64_splat_0 (__vector_pair *p) +{ + /* 2 xxspltib. */ + *p = __builtin_vpair_f64_splat (0.0); +} + +void +test_f64_splat_1 (__vector_pair *p) +{ + /* 1 xxspltidp, 1 xxlor. */ + *p = __builtin_vpair_f64_splat (1.0); +} + +void +test_f64_splat_var (__vector_pair *p, + double d) +{ + /* 1 xxpermdi, 1 xxlor. */ + *p = __builtin_vpair_f64_splat (d); +} + +void +test_f64_splat_mem (__vector_pair *p, + double *q) +{ + /* 1 lxvdsx, 1 xxlor. */ + *p = __builtin_vpair_f64_splat (*q); +} + +void +test_f64_assemble (__vector_pair *p, + vector double v1, + vector double v2) +{ + /* 2 xxlor, 1 stxvp. */ + *p = __builtin_vpair_f64_assemble (v1, v2); +} + +vector double +test_f64_extract_0_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_f64_extract_vector (vp, 0); +} + +vector double +test_f64_extract_1_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_f64_extract_vector (vp, 0); +} + +vector double +test_f64_extract_0_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_f64_extract_vector (p[1], 0); +} + +vector double +test_f64_extract_1_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_f64_extract_vector (p[2], 1); +} + +/* { dg-final { scan-assembler-times {\mlxvdsx\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 5 } } */ +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-12.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-12.c new file mode 100644 index 00000000000..0990dfe28d5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-12.c @@ -0,0 +1,156 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test the vector pair built-in functions for creation and extraction of + vector pair operations using 64-bit integers. */ + +void +test_i64_splat_0 (__vector_pair *p) +{ + /* 2 xxspltib, 1 stxvp. */ + *p = __builtin_vpair_i64_splat (0); +} + +void +test_i64_splat_1 (__vector_pair *p) +{ + /* 1 xxspltib, 1 vextsb2d, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i64_splat (1); +} + +void +test_i64_splat_var (__vector_pair *p, + long long ll) +{ + /* 1 xscvdpspn, 1 xxspltw, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i64_splat (ll); +} + +void +test_i64_splat_mem (__vector_pair *p, + long long *q) +{ + /* 1 lxvwsx, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i64_splat (*q); +} + +void +test_i64_assemble (__vector_pair *p, + vector long long v1, + vector long long v2) +{ + /* 2 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i64_assemble (v1, v2); +} + +vector long long +test_i64_extract_0_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i64_extract_vector (vp, 0); +} + +vector long long +test_i64_extract_1_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i64_extract_vector (vp, 0); +} + +vector long long +test_i64_extract_0_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i64_extract_vector (p[1], 0); +} + +vector long long +test_i64_extract_1_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i64_extract_vector (p[2], 1); +} + +void +test_i64u_splat_0 (__vector_pair *p) +{ + /* 2 xxspltib, 1 stxvp. */ + *p = __builtin_vpair_i64u_splat (0); +} + +void +test_i64u_splat_1 (__vector_pair *p) +{ + /* 1 xxspltib, 1 vextsb2d, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i64u_splat (1); +} + +void +test_i64u_splat_var (__vector_pair *p, + unsigned long long ull) +{ + /* 1 xscvdpspn, 1 xxspltw, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i64u_splat (ull); +} + +void +test_i64u_splat_mem (__vector_pair *p, + unsigned long long *q) +{ + /* 1 lxvwsx, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i64u_splat (*q); +} + +void +test_i64u_assemble (__vector_pair *p, + vector unsigned long long v1, + vector unsigned long long v2) +{ + /* 2 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i64u_assemble (v1, v2); +} + +vector unsigned long long +test_i64u_extract_0_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i64u_extract_vector (vp, 0); +} + +vector unsigned long long +test_i64u_extract_1_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i64u_extract_vector (vp, 0); +} + +vector unsigned long long +test_i64u_extract_0_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i64u_extract_vector (p[1], 0); +} + +vector unsigned long long +test_i64u_extract_1_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i64u_extract_vector (p[2], 1); +} + +/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mlxvdsx\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 10 } } */ +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-13.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-13.c new file mode 100644 index 00000000000..8174f6b1cc3 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-13.c @@ -0,0 +1,139 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test the vector pair built-in functions for creation and extraction of + vector pair operations using 32-bit integers. */ + +void +test_i32_splat_0 (__vector_pair *p) +{ + /* 2 xxspltib, 1 stxvp. */ + *p = __builtin_vpair_i32_splat (0); +} + +void +test_i32_splat_1 (__vector_pair *p) +{ + /* 1 vspltisw, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i32_splat (1); +} + +void +test_i32_splat_mem (__vector_pair *p, + int *q) +{ + /* 1 lxvwsx, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i32_splat (*q); +} + +void +test_i32_assemble (__vector_pair *p, + vector int v1, + vector int v2) +{ + /* 2 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i32_assemble (v1, v2); +} + +vector int +test_i32_extract_0_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i32_extract_vector (vp, 0); +} + +vector int +test_i32_extract_1_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i32_extract_vector (vp, 0); +} + +vector int +test_i32_extract_0_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i32_extract_vector (p[1], 0); +} + +vector int +test_i32_extract_1_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i32_extract_vector (p[2], 1); +} + +void +test_i32u_splat_0 (__vector_pair *p) +{ + /* 2 xxspltib, 1 stxvp. */ + *p = __builtin_vpair_i32u_splat (0); +} + +void +test_i32u_splat_1 (__vector_pair *p) +{ + /* 1 vspltisw, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i32u_splat (1); +} + +void +test_i32u_splat_mem (__vector_pair *p, + unsigned int *q) +{ + /* 1 lxvwsx, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i32u_splat (*q); +} + +void +test_i32u_assemble (__vector_pair *p, + vector unsigned int v1, + vector unsigned int v2) +{ + /* 2 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i32u_assemble (v1, v2); +} + +vector unsigned int +test_i32u_extract_0_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i32u_extract_vector (vp, 0); +} + +vector unsigned int +test_i32u_extract_1_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i32u_extract_vector (vp, 0); +} + +vector unsigned int +test_i32u_extract_0_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i32u_extract_vector (p[1], 0); +} + +vector unsigned int +test_i32u_extract_1_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i32u_extract_vector (p[2], 1); +} + +/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mlxvwsx\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 8 } } */ +/* { dg-final { scan-assembler-times {\mvspltisw\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M} 4 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-14.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-14.c new file mode 100644 index 00000000000..fe63df795d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-14.c @@ -0,0 +1,141 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test the vector pair built-in functions for creation and extraction of + vector pair operations using 16-bit integers. */ + +void +test_i16_splat_0 (__vector_pair *p) +{ + /* 2 xxspltib, 1 stxvp. */ + *p = __builtin_vpair_i16_splat (0); +} + +void +test_i16_splat_1 (__vector_pair *p) +{ + /* 1 vspltish, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i16_splat (1); +} + +void +test_i16_splat_mem (__vector_pair *p, + short *q) +{ + /* 1 lxsihzx, 1 vsplth, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i16_splat (*q); +} + +void +test_i16_assemble (__vector_pair *p, + vector short v1, + vector short v2) +{ + /* 2 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i16_assemble (v1, v2); +} + +vector short +test_i16_extract_0_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i16_extract_vector (vp, 0); +} + +vector short +test_i16_extract_1_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i16_extract_vector (vp, 0); +} + +vector short +test_i16_extract_0_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i16_extract_vector (p[1], 0); +} + +vector short +test_i16_extract_1_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i16_extract_vector (p[2], 1); +} + +void +test_i16u_splat_0 (__vector_pair *p) +{ + /* 2 xxspltib, 1 stxvp. */ + *p = __builtin_vpair_i16u_splat (0); +} + +void +test_i16u_splat_1 (__vector_pair *p) +{ + /* 1 vspltish, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i16u_splat (1); +} + +void +test_i16u_splat_mem (__vector_pair *p, + unsigned short *q) +{ + /* 1 lxsihzx, 1 vsplth, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i16u_splat (*q); +} + +void +test_i16u_assemble (__vector_pair *p, + vector unsigned short v1, + vector unsigned short v2) +{ + /* 2 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i16u_assemble (v1, v2); +} + +vector unsigned short +test_i16u_extract_0_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i16u_extract_vector (vp, 0); +} + +vector unsigned short +test_i16u_extract_1_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i16u_extract_vector (vp, 0); +} + +vector unsigned short +test_i16u_extract_0_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i16u_extract_vector (p[1], 0); +} + +vector unsigned short +test_i16u_extract_1_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i16u_extract_vector (p[2], 1); +} + +/* { dg-final { scan-assembler-times {\mlxsihzx\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 8 } } */ +/* { dg-final { scan-assembler-times {\mvsplth\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvspltish\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxlor\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M} 4 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-15.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-15.c new file mode 100644 index 00000000000..bd494327af6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-15.c @@ -0,0 +1,139 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test the vector pair built-in functions for creation and extraction of + vector pair operations using 8-bit integers. */ + +void +test_i8_splat_0 (__vector_pair *p) +{ + /* 2 xxspltib, 1 stxvp. */ + *p = __builtin_vpair_i8_splat (0); +} + +void +test_i8_splat_1 (__vector_pair *p) +{ + /* 1 vspltisb, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i8_splat (1); +} + +void +test_i8_splat_mem (__vector_pair *p, + signed char *q) +{ + /* 1 lxsibzx, 1 vspltb, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i8_splat (*q); +} + +void +test_i8_assemble (__vector_pair *p, + vector signed char v1, + vector signed char v2) +{ + /* 2 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i8_assemble (v1, v2); +} + +vector signed char +test_i8_extract_0_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i8_extract_vector (vp, 0); +} + +vector signed char +test_i8_extract_1_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i8_extract_vector (vp, 0); +} + +vector signed char +test_i8_extract_0_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i8_extract_vector (p[1], 0); +} + +vector signed char +test_i8_extract_1_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i8_extract_vector (p[2], 1); +} + +void +test_i8u_splat_0 (__vector_pair *p) +{ + /* 2 xxspltib, 1 stxvp. */ + *p = __builtin_vpair_i8u_splat (0); +} + +void +test_i8u_splat_1 (__vector_pair *p) +{ + /* 1 vspltisb, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i8u_splat (1); +} + +void +test_i8u_splat_mem (__vector_pair *p, + unsigned char *q) +{ + /* 1 lxsibzx, 1 vspltb, 1 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i8u_splat (*q); +} + +void +test_i8u_assemble (__vector_pair *p, + vector unsigned char v1, + vector unsigned char v2) +{ + /* 2 xxlor, 1 stxvp. */ + *p = __builtin_vpair_i8u_assemble (v1, v2); +} + +vector unsigned char +test_i8u_extract_0_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i8u_extract_vector (vp, 0); +} + +vector unsigned char +test_i8u_extract_1_reg (__vector_pair *p) +{ + /* 1 lxvp, 1 xxlor. */ + __vector_pair vp = *p; + __asm__ (" # extract in register %x0" : "+wa" (vp)); + return __builtin_vpair_i8u_extract_vector (vp, 0); +} + +vector unsigned char +test_i8u_extract_0_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i8u_extract_vector (p[1], 0); +} + +vector unsigned char +test_i8u_extract_1_mem (__vector_pair *p) +{ + /* 1 lxv. */ + return __builtin_vpair_i8u_extract_vector (p[2], 1); +} + +/* { dg-final { scan-assembler-times {\mlxsibzx\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 8 } } */ +/* { dg-final { scan-assembler-times {\mvspltb\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-9.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-9.c new file mode 100644 index 00000000000..95504a5afd0 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-9.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +void +test_zero (__vector_pair *p) +{ + /* 2 xxspltib. */ + *p = __builtin_vpair_zero (); +} + +/* { dg-final { scan-assembler-times {\mstxvp\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M} 2 } } */ -- 2.41.0 -- Michael Meissner, IBM PO Box 98, Ayer, Massachusetts, USA, 01432 email: meiss...@linux.ibm.com