On Mon, 2020-06-15 at 16:37 -0700, Carl Love via Gcc-patches wrote: > v2 fixes: > > change log entries config/rs6000/vsx.md, config/rs6000/rs6000-builtin.def, > config/rs6000/rs6000-call.c. > > gcc/config/rs6000/rs6000-call.c: fixed if check for 3rd arg between 0 and 3 > fixed if check for 3rd arg between 0 and 12 > > gcc/config/rs6000/vsx.md: removed REPLACE_ELT_atr definition and used > VS_scalar instead. > removed REPLACE_ELT_inst definition and used <mode> > i\ > nstead
bad word break. > fixed spelling mistake on Endianness. > fixed indenting for vreplace_elt_<mode> > > ----------------------------------- > > GCC maintainers: > > The following patch adds support for builtins vec_replace_elt and > vec_replace_unaligned. > > The patch has been compiled and tested on > > powerpc64le-unknown-linux-gnu (Power 9 LE) > > and mambo with no regression errors. > > Please let me know if this patch is acceptable for the pu > branch. Thanks. What branch? > > Carl Love > > ------------------------------------------------------- > > gcc/ChangeLog > > 2020-06-15 Carl Love <c...@us.ibm.com> > > * config/rs6000/altivec.h: Add define for vec_replace_elt and > vec_replace_unaligned. > * config/rs6000/vsx.md (UNSPEC_REPLACE_ELT, UNSPEC_REPLACE_UN): New. > (REPLACE_ELT): New mode iterator. > (REPLACE_ELT_atr, REPLACE_ELT_inst, REPLACE_ELT_char, > REPLACE_ELT_sh, REPLACE_ELT_max): New mode attributes. > (vreplace_un_<mode>, vreplace_elt_<mode>_inst): New. > * config/rs6000/rs6000-builtin.def (VREPLACE_ELT_V4SI, > VREPLACE_ELT_UV4\ > SI, > VREPLACE_ELT_V4SF, VREPLACE_ELT_UV2DI, VREPLACE_ELT_V2DF, > VREPLACE_UN_V4SI, VREPLACE_UN_UV4SI, VREPLACE_UN_V4SF, > VREPLACE_UN_V2DI, VREPLACE_UN_UV2DI, VREPLACE_UN_V2DF): New. > (REPLACE_ELT, REPLACE_UN): New. > * config/rs6000/rs6000-call.c (FUTURE_BUILTIN_VEC_REPLACE_ELT, > FUTURE_BUILTIN_VEC_REPLACE_UN): New. > (rs6000_expand_ternop_builtin): Add 3rd argument checks for > CODE_FOR_vreplace_elt_v4si, CODE_FOR_vreplace_elt_v4sf, > CODE_FOR_vreplace_un_v4si, CODE_FOR_vreplace_un_v4sf. > (builtin_function_type) [FUTURE_BUILTIN_VREPLACE_ELT_UV4SI, > FUTURE_BUIL\ > TIN_VREPLACE_ELT_UV2DI, > FUTURE_BUILTIN_VREPLACE_UN_UV4SI, FUTURE_BUILTIN_VREPLACE_UN_UV2DI]: > Ne\ > w cases. Multiple bad wordbreaks. > * doc/extend.texi: Add description for vec_replace_elt and > vec_replace_unaligned builtins. > > > gcc/testsuite/ChangeLog > > 2020-06-15 Carl Love <c...@us.ibm.com> > * gcc.target/powerpc/vec-replace-word.c: Add new test. > --- > gcc/config/rs6000/altivec.h | 2 + > gcc/config/rs6000/rs6000-builtin.def | 16 + > gcc/config/rs6000/rs6000-call.c | 61 ++++ > gcc/config/rs6000/vsx.md | 60 ++++ > gcc/doc/extend.texi | 50 +++ > .../powerpc/vec-replace-word-runnable.c | 289 ++++++++++++++++++ > 6 files changed, 478 insertions(+) > create mode 100644 > gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c > > diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h > index 936aeb1ee09..435ffb8158f 100644 > --- a/gcc/config/rs6000/altivec.h > +++ b/gcc/config/rs6000/altivec.h > @@ -701,6 +701,8 @@ __altivec_scalar_pred(vec_any_nle, > #define vec_extracth(a, b, c) __builtin_vec_extracth (a, b, c) > #define vec_insertl(a, b, c) __builtin_vec_insertl (a, b, c) > #define vec_inserth(a, b, c) __builtin_vec_inserth (a, b, c) > +#define vec_replace_elt(a, b, c) __builtin_vec_replace_elt (a, b, c) > +#define vec_replace_unaligned(a, b, c) __builtin_vec_replace_un (a, b, c) > I don't think the parms are necessary unless there is remapping going on. But existing nearby content has them, so match the existing style, i guess. :-) > #define vec_gnb(a, b) __builtin_vec_gnb (a, b) > #define vec_clrl(a, b) __builtin_vec_clrl (a, b) > diff --git a/gcc/config/rs6000/rs6000-builtin.def > b/gcc/config/rs6000/rs6000-builtin.def > index c5bd4f86555..91821f29a6f 100644 > --- a/gcc/config/rs6000/rs6000-builtin.def > +++ b/gcc/config/rs6000/rs6000-builtin.def > @@ -2643,6 +2643,20 @@ BU_FUTURE_V_3 (VINSERTVPRBR, "vinsvubvrx", CONST, > vinsertvr_v16qi) > BU_FUTURE_V_3 (VINSERTVPRHR, "vinsvuhvrx", CONST, vinsertvr_v8hi) > BU_FUTURE_V_3 (VINSERTVPRWR, "vinsvuwvrx", CONST, vinsertvr_v4si) > > +BU_FUTURE_V_3 (VREPLACE_ELT_V4SI, "vreplace_v4si", CONST, vreplace_elt_v4si) > +BU_FUTURE_V_3 (VREPLACE_ELT_UV4SI, "vreplace_uv4si", CONST, > vreplace_elt_v4si) > +BU_FUTURE_V_3 (VREPLACE_ELT_V4SF, "vreplace_v4sf", CONST, vreplace_elt_v4sf) > +BU_FUTURE_V_3 (VREPLACE_ELT_V2DI, "vreplace_v2di", CONST, vreplace_elt_v2di) > +BU_FUTURE_V_3 (VREPLACE_ELT_UV2DI, "vreplace_uv2di", CONST, > vreplace_elt_v2di) > +BU_FUTURE_V_3 (VREPLACE_ELT_V2DF, "vreplace_v2df", CONST, vreplace_elt_v2df) > + > +BU_FUTURE_V_3 (VREPLACE_UN_V4SI, "vreplace_un_v4si", CONST, vreplace_un_v4si) > +BU_FUTURE_V_3 (VREPLACE_UN_UV4SI, "vreplace_un_uv4si", CONST, > vreplace_un_v4si) > +BU_FUTURE_V_3 (VREPLACE_UN_V4SF, "vreplace_un_v4sf", CONST, vreplace_un_v4sf) > +BU_FUTURE_V_3 (VREPLACE_UN_V2DI, "vreplace_un_v2di", CONST, vreplace_un_v2di) > +BU_FUTURE_V_3 (VREPLACE_UN_UV2DI, "vreplace_un_uv2di", CONST, > vreplace_un_v2di) > +BU_FUTURE_V_3 (VREPLACE_UN_V2DF, "vreplace_un_v2df", CONST, vreplace_un_v2df) > + > BU_FUTURE_V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi) > BU_FUTURE_V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi) > BU_FUTURE_V_1 (VSTRIBL, "vstribl", CONST, vstril_v16qi) > @@ -2664,6 +2678,8 @@ BU_FUTURE_OVERLOAD_3 (EXTRACTL, "extractl") > BU_FUTURE_OVERLOAD_3 (EXTRACTH, "extracth") > BU_FUTURE_OVERLOAD_3 (INSERTL, "insertl") > BU_FUTURE_OVERLOAD_3 (INSERTH, "inserth") > +BU_FUTURE_OVERLOAD_3 (REPLACE_ELT, "replace_elt") > +BU_FUTURE_OVERLOAD_3 (REPLACE_UN, "replace_un") > > BU_FUTURE_OVERLOAD_1 (VSTRIR, "strir") > BU_FUTURE_OVERLOAD_1 (VSTRIL, "stril") > diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c > index abbe00030ea..2653222ced0 100644 > --- a/gcc/config/rs6000/rs6000-call.c > +++ b/gcc/config/rs6000/rs6000-call.c > @@ -5624,6 +5624,36 @@ const struct altivec_builtin_types > altivec_overloaded_builtins[] = { > RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, > RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTQI }, > > + { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_UV4SI, > + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, > + RS6000_BTI_UINTSI, RS6000_BTI_UINTQI }, > + { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_V4SI, > + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTQI }, > + { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_V4SF, > + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_float, RS6000_BTI_INTQI }, > + { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_UV2DI, > + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, > + RS6000_BTI_UINTDI, RS6000_BTI_UINTQI }, > + { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_V2DI, > + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTDI, RS6000_BTI_INTQI }, > + { FUTURE_BUILTIN_VEC_REPLACE_ELT, FUTURE_BUILTIN_VREPLACE_ELT_V2DF, > + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_double, RS6000_BTI_INTQI }, > + > + { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_UV4SI, > + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, > + RS6000_BTI_UINTSI, RS6000_BTI_UINTQI }, > + { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_V4SI, > + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTQI }, > + { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_V4SF, > + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_float, RS6000_BTI_INTQI }, > + { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_UV2DI, > + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, > + RS6000_BTI_UINTDI, RS6000_BTI_UINTQI }, > + { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_V2DI, > + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTDI, RS6000_BTI_INTQI }, > + { FUTURE_BUILTIN_VEC_REPLACE_UN, FUTURE_BUILTIN_VREPLACE_UN_V2DF, > + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_double, RS6000_BTI_INTQI }, > + > { FUTURE_BUILTIN_VEC_VSTRIL, FUTURE_BUILTIN_VSTRIBL, > RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, > { FUTURE_BUILTIN_VEC_VSTRIL, FUTURE_BUILTIN_VSTRIBL, > @@ -9987,6 +10017,33 @@ rs6000_expand_ternop_builtin (enum insn_code icode, > tree exp, rtx target) > return CONST0_RTX (tmode); > } > } > + else if (icode == CODE_FOR_vreplace_elt_v4si > + || icode == CODE_FOR_vreplace_elt_v4sf) > + { > + /* Check whether the 3rd argument is an integer constant in the range > + 0 to 3 inclusive. */ > + STRIP_NOPS (arg2); > + if (TREE_CODE (arg2) != INTEGER_CST > + || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 3)) > + { > + error ("argument 3 must be in the range 0 to 3"); > + return CONST0_RTX (tmode); > + } > + } > + > + else if (icode == CODE_FOR_vreplace_un_v4si > + || icode == CODE_FOR_vreplace_un_v4sf) > + { > + /* Check whether the 3rd argument is an integer constant in the range > + 0 to 12 inclusive. */ > + STRIP_NOPS (arg2); > + if (TREE_CODE (arg2) != INTEGER_CST > + || !IN_RANGE(TREE_INT_CST_LOW (arg2), 0, 12)) > + { > + error ("argument 3 must be in the range 0 to 12"); > + return CONST0_RTX (tmode); > + } > + } > > if (target == 0 > || GET_MODE (target) != tmode > @@ -13342,6 +13399,10 @@ builtin_function_type (machine_mode mode_ret, > machine_mode mode_arg0, > case FUTURE_BUILTIN_VINSERTVPRBL: > case FUTURE_BUILTIN_VINSERTVPRHL: > case FUTURE_BUILTIN_VINSERTVPRWL: > + case FUTURE_BUILTIN_VREPLACE_ELT_UV4SI: > + case FUTURE_BUILTIN_VREPLACE_ELT_UV2DI: > + case FUTURE_BUILTIN_VREPLACE_UN_UV4SI: > + case FUTURE_BUILTIN_VREPLACE_UN_UV2DI: > h.uns_p[0] = 1; > h.uns_p[1] = 1; > h.uns_p[2] = 1; > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > index 6ce93f14dec..57607998c42 100644 > --- a/gcc/config/rs6000/vsx.md > +++ b/gcc/config/rs6000/vsx.md > @@ -348,11 +348,22 @@ > UNSPEC_EXTRACTR > UNSPEC_INSERTL > UNSPEC_INSERTR > + UNSPEC_REPLACE_ELT > + UNSPEC_REPLACE_UN > ]) > > ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops > (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI]) > > +;; Vector extract_elt iterator/attr for 32-bit and 64-bit elements > +(define_mode_iterator REPLACE_ELT [V4SI V4SF V2DI V2DF]) > +(define_mode_attr REPLACE_ELT_char [(V4SI "w") (V4SF "w") > + (V2DI "d") (V2DF "d")]) > +(define_mode_attr REPLACE_ELT_sh [(V4SI "2") (V4SF "2") > + (V2DI "3") (V2DF "3")]) > +(define_mode_attr REPLACE_ELT_max [(V4SI "12") (V4SF "12") > + (V2DI "8") (V2DF "8")]) > + > ;; VSX moves > > ;; The patterns for LE permuted loads and stores come before the general > @@ -3957,6 +3968,55 @@ > "vins<wd>rx %0,%1,%2" > [(set_attr "type" "vecsimple")]) > > +(define_expand "vreplace_elt_<mode>" > + [(set (match_operand:REPLACE_ELT 0 "register_operand") > + (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand") > + (match_operand:<VS_scalar> 2 "register_operand") > + (match_operand:QI 3 "const_0_to_3_operand")] > + UNSPEC_REPLACE_ELT))] > + "TARGET_FUTURE" > +{ > + int index; > + /* Immediate value is the word index, convert to byte index and adjust for > + Endianness if needed. */ > + if (BYTES_BIG_ENDIAN) > + index = INTVAL (operands[3]) << <REPLACE_ELT_sh>; > + > + else > + index = <REPLACE_ELT_max> - (INTVAL (operands[3]) << <REPLACE_ELT_sh>); > + > + emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1], > + operands[2], > + GEN_INT (index))); > + DONE; > + } > +[(set_attr "type" "vecsimple")]) > + > +(define_expand "vreplace_un_<mode>" > + [(set (match_operand:REPLACE_ELT 0 "register_operand") > + (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand") > + (match_operand:<VS_scalar> 2 "register_operand") > + (match_operand:QI 3 "const_0_to_12_operand")] > + UNSPEC_REPLACE_UN))] > + "TARGET_FUTURE" > +{ > + /* Immediate value is the byte index Big Endian numbering. */ > + emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1], > + operands[2], operands[3])); > + DONE; > + } > +[(set_attr "type" "vecsimple")]) > + > +(define_insn "vreplace_elt_<mode>_inst" > + [(set (match_operand:REPLACE_ELT 0 "register_operand" "=v") > + (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand" "0") > + (match_operand:<VS_scalar> 2 "register_operand" "r") > + (match_operand:QI 3 "const_0_to_12_operand" "n")] > + UNSPEC_REPLACE_ELT))] > + "TARGET_FUTURE" > + "vins<REPLACE_ELT_char> %0,%2,%3" > + [(set_attr "type" "vecsimple")]) > + > ;; VSX_EXTRACT optimizations > ;; Optimize double d = (double) vec_extract (vi, <n>) > ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > index 8931c7950f6..00c17be1851 100644 > --- a/gcc/doc/extend.texi > +++ b/gcc/doc/extend.texi > @@ -21045,6 +21045,56 @@ This is a limitation of the bi-endian vector > programming model consistent with > the limitation on vec_perm, for example. > @findex vec_inserth > > +Vector Replace Element > +@smallexample > +@exdent vector signed int vec_replace_elt (vector signed int, signed int, > +const int); > +@exdent vector unsigned int vec_replace_elt (vector unsigned int, > +unsigned int, const int); > +@exdent vector float vec_replace_elt (vector float, float, const int); > +@exdent vector signed long long vec_replace_elt (vector signed long long, > +signed long long, const int); > +@exdent vector unsigned long long vec_replace_elt (vector unsigned long long, > +unsigned long long, const int); > +@exdent vector double rec_replace_elt (vector double, double, const int); > +@end smallexample > +The third argument (constrained to [0,3]) identifies the natural-endian > +element number of the first argument that will be replaced by the second > +argument to produce the result. The other elements of the first argument > will > +remain unchanged in the result. > + > +If it's desirable to insert a word at an unaligned position, use > +vec_replace_unaligned instead. > + > +@findex vec_replace_element > + > +Vector Replace Unaligned > +@smallexample > +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char, > +signed int, const int); > +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char, > +unsigned int, const int); > +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char, > +float, const int); > +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char, > +signed long long, const int); > +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char, > +unsigned long long, const int); > +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char, > +double, const int); > +@end smallexample > + > +The second argument replaces a portion of the first argument to produce the > +result, with the rest of the first argument unchanged in the result. The > +third argument identifies the byte index (using left-to-right, or big-endian > +order) where the high-order byte of the second argument will be placed, with > +the remaining bytes of the second argument placed naturally "to the right" > +of the high-order byte. > + > +The programmer is responsible for understanding the endianness issues > involved > +with the first argument and the result. > +@findex vec_replace_unaligned > + > @smallexample > @exdent vector unsigned long long int > @exdent vec_pdep (vector unsigned long long int, vector unsigned long long > int) > diff --git a/gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c > b/gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c > new file mode 100644 > index 00000000000..1fe23d5f912 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c > @@ -0,0 +1,289 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target powerpc_future_hw } */ > +/* { dg-options "-mdejagnu-cpu=future" } */ > + > +#include <altivec.h> > + > +#define DEBUG 1 > + > +#ifdef DEBUG > +#include <stdio.h> > +#endif > + > +extern void abort (void); > + > +int > +main (int argc, char *argv []) > +{ > + int i; > + unsigned char ch; > + unsigned int index; > + > + vector unsigned int vresult_uint; > + vector unsigned int expected_vresult_uint; > + vector unsigned int src_va_uint; > + vector unsigned int src_vb_uint; > + unsigned int src_a_uint; > + > + vector int vresult_int; > + vector int expected_vresult_int; > + vector int src_va_int; > + vector int src_vb_int; > + int src_a_int; > + > + vector unsigned long long int vresult_ullint; > + vector unsigned long long int expected_vresult_ullint; > + vector unsigned long long int src_va_ullint; > + vector unsigned long long int src_vb_ullint; > + unsigned int long long src_a_ullint; > + > + vector long long int vresult_llint; > + vector long long int expected_vresult_llint; > + vector long long int src_va_llint; > + vector long long int src_vb_llint; > + long long int src_a_llint; > + > + vector float vresult_float; > + vector float expected_vresult_float; > + vector float src_va_float; > + float src_a_float; > + > + vector double vresult_double; > + vector double expected_vresult_double; > + vector double src_va_double; > + double src_a_double; > + > + /* Vector replace 32-bit element */ > + src_a_uint = 345; > + src_va_uint = (vector unsigned int) { 0, 1, 2, 3 }; > + vresult_uint = (vector unsigned int) { 0, 0, 0, 0 }; > + expected_vresult_uint = (vector unsigned int) { 0, 1, 345, 3 }; > + > + vresult_uint = vec_replace_elt (src_va_uint, src_a_uint, 2); > + > + if (!vec_all_eq (vresult_uint, expected_vresult_uint)) { > +#if DEBUG > + printf("ERROR, vec_replace_elt (src_vb_uint, src_va_uint, index)\n"); > + for(i = 0; i < 4; i++) > + printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n", > + i, vresult_uint[i], i, expected_vresult_uint[i]); > +#else > + abort(); > +#endif > + } > + > + src_a_int = 234; > + src_va_int = (vector int) { 0, 1, 2, 3 }; > + vresult_int = (vector int) { 0, 0, 0, 0 }; > + expected_vresult_int = (vector int) { 0, 234, 2, 3 }; > + > + vresult_int = vec_replace_elt (src_va_int, src_a_int, 1); > + > + if (!vec_all_eq (vresult_int, expected_vresult_int)) { > +#if DEBUG > + printf("ERROR, vec_replace_elt (src_vb_int, src_va_int, index)\n"); > + for(i = 0; i < 4; i++) > + printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n", > + i, vresult_int[i], i, expected_vresult_int[i]); > +#else > + abort(); > +#endif > + } > + > + src_a_float = 34.0; > + src_va_float = (vector float) { 0.0, 10.0, 20.0, 30.0 }; > + vresult_float = (vector float) { 0.0, 0.0, 0.0, 0.0 }; > + expected_vresult_float = (vector float) { 0.0, 34.0, 20.0, 30.0 }; > + > + vresult_float = vec_replace_elt (src_va_float, src_a_float, 1); > + > + if (!vec_all_eq (vresult_float, expected_vresult_float)) { > +#if DEBUG > + printf("ERROR, vec_replace_elt (src_vb_float, src_va_float, index)\n"); > + for(i = 0; i < 4; i++) > + printf(" vresult_float[%d] = %f, expected_vresult_float[%d] = %f\n", > + i, vresult_float[i], i, expected_vresult_float[i]); > +#else > + abort(); > +#endif > + } > + > + /* Vector replace 64-bit element */ > + src_a_ullint = 456; > + src_va_ullint = (vector unsigned long long int) { 0, 1 }; > + vresult_ullint = (vector unsigned long long int) { 0, 0 }; > + expected_vresult_ullint = (vector unsigned long long int) { 0, 456 }; > + > + vresult_ullint = vec_replace_elt (src_va_ullint, src_a_ullint, 1); > + > + if (!vec_all_eq (vresult_ullint, expected_vresult_ullint)) { > +#if DEBUG > + printf("ERROR, vec_replace_elt (src_vb_ullint, src_va_ullint, index)\n"); > + for(i = 0; i < 2; i++) > + printf(" vresult_ullint[%d] = %d, expected_vresult_ullint[%d] = %d\n", > + i, vresult_ullint[i], i, expected_vresult_ullint[i]); > +#else > + abort(); > +#endif > + } > + > + src_a_llint = 678; > + src_va_llint = (vector long long int) { 0, 1 }; > + vresult_llint = (vector long long int) { 0, 0 }; > + expected_vresult_llint = (vector long long int) { 0, 678 }; > + > + vresult_llint = vec_replace_elt (src_va_llint, src_a_llint, 1); > + > + if (!vec_all_eq (vresult_llint, expected_vresult_llint)) { > +#if DEBUG > + printf("ERROR, vec_replace_elt (src_vb_llint, src_va_llint, index)\n"); > + for(i = 0; i < 2; i++) > + printf(" vresult_llint[%d] = %d, expected_vresult_llint[%d] = %d\n", > + i, vresult_llint[i], i, expected_vresult_llint[i]); > +#else > + abort(); > +#endif > + } > + > + src_a_double = 678.0; > + src_va_double = (vector double) { 0.0, 50.0 }; > + vresult_double = (vector double) { 0.0, 0.0 }; > + expected_vresult_double = (vector double) { 0.0, 678.0 }; > + > + vresult_double = vec_replace_elt (src_va_double, src_a_double, 1); > + > + if (!vec_all_eq (vresult_double, expected_vresult_double)) { > +#if DEBUG > + printf("ERROR, vec_replace_elt (src_vb_double, src_va_double, index)\n"); > + for(i = 0; i < 2; i++) > + printf(" vresult_double[%d] = %f, expected_vresult_double[%d] = %f\n", > + i, vresult_double[i], i, expected_vresult_double[i]); > +#else > + abort(); > +#endif > + } > + > + > + /* Vector replace 32-bit element, unaligned */ > + src_a_uint = 345; > + src_va_uint = (vector unsigned int) { 1, 2, 0, 0 }; > + vresult_uint = (vector unsigned int) { 0, 0, 0, 0 }; > + /* Byte index 7 will overwrite part of elements 2 and 3 */ > + expected_vresult_uint = (vector unsigned int) { 1, 2, 345*256, 0 }; > + > + vresult_uint = vec_replace_unaligned (src_va_uint, src_a_uint, 3); > + > + if (!vec_all_eq (vresult_uint, expected_vresult_uint)) { > +#if DEBUG > + printf("ERROR, vec_replace_unaligned (src_vb_uint, src_va_uint, > index)\n"); > + for(i = 0; i < 4; i++) > + printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n", > + i, vresult_uint[i], i, expected_vresult_uint[i]); > +#else > + abort(); > +#endif > + } > + > + src_a_int = 234; > + src_va_int = (vector int) { 1, 0, 3, 4 }; > + vresult_int = (vector int) { 0, 0, 0, 0 }; > + /* Byte index 7 will over write part of elements 1 and 2 */ > + expected_vresult_int = (vector int) { 1, 234*256, 0, 4 }; > + > + vresult_int = vec_replace_unaligned (src_va_int, src_a_int, 7); > + > + if (!vec_all_eq (vresult_int, expected_vresult_int)) { > +#if DEBUG > + printf("ERROR, vec_replace_unaligned (src_vb_int, src_va_int, index)\n"); > + for(i = 0; i < 4; i++) > + printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n", > + i, vresult_int[i], i, expected_vresult_int[i]); > +#else > + abort(); > +#endif > + } > + > + src_a_float = 34.0; > + src_va_float = (vector float) { 0.0, 10.0, 20.0, 30.0 }; > + vresult_float = (vector float) { 0.0, 0.0, 0.0, 0.0 }; > + expected_vresult_float = (vector float) { 0.0, 34.0, 20.0, 30.0 }; > + > + vresult_float = vec_replace_unaligned (src_va_float, src_a_float, 8); > + > + if (!vec_all_eq (vresult_float, expected_vresult_float)) { > +#if DEBUG > + printf("ERROR, vec_replace_unaligned (src_vb_float, src_va_float, > index)\n"); > + for(i = 0; i < 4; i++) > + printf(" vresult_float[%d] = %f, expected_vresult_float[%d] = %f\n", > + i, vresult_float[i], i, expected_vresult_float[i]); > +#else > + abort(); > +#endif > + } > + > + /* Vector replace 64-bit element, unaligned */ > + src_a_ullint = 456; > + src_va_ullint = (vector unsigned long long int) { 0, 0x222 }; > + vresult_ullint = (vector unsigned long long int) { 0, 0 }; > + expected_vresult_ullint = (vector unsigned long long int) { 456*256, > + 0x200 }; > + > + /* Byte index 7 will over write least significant byte of element 0 */ > + vresult_ullint = vec_replace_unaligned (src_va_ullint, src_a_ullint, 7); > + > + if (!vec_all_eq (vresult_ullint, expected_vresult_ullint)) { > +#if DEBUG > + printf("ERROR, vec_replace_unaligned (src_vb_ullint, src_va_ullint, > index)\n"); > + for(i = 0; i < 2; i++) > + printf(" vresult_ullint[%d] = %d, expected_vresult_ullint[%d] = %d\n", > + i, vresult_ullint[i], i, expected_vresult_ullint[i]); > +#else > + abort(); > +#endif > + } > + > + src_a_llint = 678; > + src_va_llint = (vector long long int) { 0, 0x101 }; > + vresult_llint = (vector long long int) { 0, 0 }; > + /* Byte index 7 will over write least significant byte of element 0 */ > + expected_vresult_llint = (vector long long int) { 678*256, 0x100 }; > + > + vresult_llint = vec_replace_unaligned (src_va_llint, src_a_llint, 7); > + > + if (!vec_all_eq (vresult_llint, expected_vresult_llint)) { > +#if DEBUG > + printf("ERROR, vec_replace_unaligned (src_vb_llint, src_va_llint, > index)\n"); > + for(i = 0; i < 2; i++) > + printf(" vresult_llint[%d] = %d, expected_vresult_llint[%d] = %d\n", > + i, vresult_llint[i], i, expected_vresult_llint[i]); > +#else > + abort(); > +#endif > + } > + > + src_a_double = 678.0; > + src_va_double = (vector double) { 0.0, 50.0 }; > + vresult_double = (vector double) { 0.0, 0.0 }; > + expected_vresult_double = (vector double) { 0.0, 678.0 }; > + > + vresult_double = vec_replace_unaligned (src_va_double, src_a_double, 0); > + > + if (!vec_all_eq (vresult_double, expected_vresult_double)) { > +#if DEBUG > + printf("ERROR, vec_replace_unaligned (src_vb_double, src_va_double, > index)\ > +n"); > + for(i = 0; i < 2; i++) > + printf(" vresult_double[%d] = %f, expected_vresult_double[%d] = %f\n", > + i, vresult_double[i], i, expected_vresult_double[i]); > +#else > + abort(); > +#endif > + } > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-times {\mvinsw\M} 6 } } */ > +/* { dg-final { scan-assembler-times {\mvinsd\M} 6 } } */ > + > +