On Jun 29, 2018, at 9:38 AM, Carl Love <c...@us.ibm.com> wrote: > > GCC Maintainers: > > The vec_unpackh, vec_unpackl builtins with vector float arguments > unpack the high or low half of a floating point vector and convert the > elements to a vector of doubles. The current implementation of the > builtin for the vector float argument is incorrectly using the vupklsh > instructions vupkhsh that unpack a vector of pixel type. The following > path fixes this issue by having the vec_unpackh and vec_unpackl > builtins use the xvcvspdp instruction to do the vector float to double > conversion. > > Additionally, runnable tests were added to verify that the builtins > work correctly for all of the valid argument types. > > The patch has been tested on > > powerpc64le-unknown-linux-gnu (Power 8 LE) > powerpc64-unknown-linux-gnu (Power 8 BE) > powerpc64le-unknown-linux-gnu (Power 9 LE) > > With no regressions. > > Please let me know if the patch looks OK for GCC mainline. The patch > also needs to be backported to GCC 8. > > Carl Love > ---------------------------------------------------------------------- > > gcc/ChangeLog: > > 2018-06-29 Carl Love <c...@us.ibm.com> > > * config/rs6000/altivec.md: Add define_expand altivec_unpackh_v4sf, > and define_expand altivec_unpackl_v4sf expansions. > * config/rs6000/rs6000-builtin.def: Add UNPACKH_V4SF and > UNPACKL_V4SF definitions. > * config/rs6000/rs6000-c.c: Map ALTIVEC_BUILTIN_VEC_UNPACKH for > float argument to ALTIVEC_BUILTIN_UNPACKH_V4SF. > Map ALTIVEC_BUILTIN_VEC_UNPACKL for float argument to > ALTIVEC_BUILTIN_UNPACKH_V4SF.
UNPACKL That's all I see; will leave to Segher to approve, of course. Thanks, Bill > > gcc/testsuite/ChangeLog: > > 2018-06-29 Carl Love <c...@us.ibm.com> > * gcc.target/altivec-1-runnable.c: New test file. > * gcc.target/altivec-2-runnable.c: New test file. > * gcc.target/vsx-7.c (main2): Change expected instruction > for tests. > --- > gcc/config/rs6000/altivec.md | 22 ++ > gcc/config/rs6000/rs6000-builtin.def | 2 + > gcc/config/rs6000/rs6000-c.c | 4 +- > .../gcc.target/powerpc/altivec-1-runnable.c | 257 +++++++++++++++++++++ > .../gcc.target/powerpc/altivec-2-runnable.c | 94 ++++++++ > gcc/testsuite/gcc.target/powerpc/vsx-7.c | 7 +- > 6 files changed, 380 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-1-runnable.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-2-runnable.c > > diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md > index 8ee42ae..e0ab588 100644 > --- a/gcc/config/rs6000/altivec.md > +++ b/gcc/config/rs6000/altivec.md > @@ -2311,6 +2311,28 @@ > } > [(set_attr "type" "vecperm")]) > > +;; Unpack high elements of float vector to vector of doubles > +(define_expand "altivec_unpackh_v4sf" > + [(set (match_operand:V2DF 0 "register_operand" "=v") > + (match_operand:V4SF 1 "register_operand" "v"))] > + "TARGET_VSX" > +{ > + emit_insn (gen_doublehv4sf2 (operands[0], operands[1])); > + DONE; > +} > + [(set_attr "type" "veccomplex")]) > + > +;; Unpack low elements of float vector to vector of doubles > +(define_expand "altivec_unpackl_v4sf" > + [(set (match_operand:V2DF 0 "register_operand" "=v") > + (match_operand:V4SF 1 "register_operand" "v"))] > + "TARGET_VSX" > +{ > + emit_insn (gen_doublelv4sf2 (operands[0], operands[1])); > + DONE; > +} > + [(set_attr "type" "veccomplex")]) > + > ;; Compare vectors producing a vector result and a predicate, setting CR6 to > ;; indicate a combined status > (define_insn "*altivec_vcmpequ<VI_char>_p" > diff --git a/gcc/config/rs6000/rs6000-builtin.def > b/gcc/config/rs6000/rs6000-builtin.def > index f799681..8c9fd4d 100644 > --- a/gcc/config/rs6000/rs6000-builtin.def > +++ b/gcc/config/rs6000/rs6000-builtin.def > @@ -1144,6 +1144,8 @@ BU_ALTIVEC_1 (VUPKHSH, "vupkhsh", CONST, > altivec_vupkhsh) > BU_ALTIVEC_1 (VUPKLSB, "vupklsb", CONST, altivec_vupklsb) > BU_ALTIVEC_1 (VUPKLPX, "vupklpx", CONST, altivec_vupklpx) > BU_ALTIVEC_1 (VUPKLSH, "vupklsh", CONST, altivec_vupklsh) > +BU_ALTIVEC_1 (UNPACKH_V4SF, "unpackh_v4sf", CONST, > altivec_unpackh_v4sf) > +BU_ALTIVEC_1 (UNPACKL_V4SF, "unpackl_v4sf", CONST, > altivec_unpackl_v4sf) > > BU_ALTIVEC_1 (VREVE_V2DI, "vreve_v2di", CONST, altivec_vrevev2di2) > BU_ALTIVEC_1 (VREVE_V4SI, "vreve_v4si", CONST, altivec_vrevev4si2) > diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c > index f4b1bf7..cd50d4e 100644 > --- a/gcc/config/rs6000/rs6000-c.c > +++ b/gcc/config/rs6000/rs6000-c.c > @@ -865,7 +865,7 @@ const struct altivec_builtin_types > altivec_overloaded_builtins[] = { > RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, > { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHPX, > RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 }, > - { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHPX, > + { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_UNPACKH_V4SF, > RS6000_BTI_V2DF, RS6000_BTI_V4SF, 0, 0 }, > { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH, > RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 }, > @@ -897,7 +897,7 @@ const struct altivec_builtin_types > altivec_overloaded_builtins[] = { > RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, > { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW, > RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 }, > - { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLPX, > + { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_UNPACKL_V4SF, > RS6000_BTI_V2DF, RS6000_BTI_V4SF, 0, 0 }, > { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX, > RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 }, > diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-1-runnable.c > b/gcc/testsuite/gcc.target/powerpc/altivec-1-runnable.c > new file mode 100644 > index 0000000..938fd36 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/altivec-1-runnable.c > @@ -0,0 +1,257 @@ > +/* { dg-do compile { target powerpc*-*-* } } */ > +/* { dg-require-effective-target powerpc_altivec_ok } */ > +/* { dg-options "-mpower8-vector -maltivec" } */ > + > +#include <altivec.h> > + > +#ifdef DEBUG > +#include <stdio.h> > +#endif > + > +/* Endian considerations: The "high" half of a vector with n elements is the > + first n/2 elements of the vector. For little endian, these elements are in > + the rightmost half of the vector. For big endian, these elements are in > the > + leftmost half of the vector. */ > + > + > +void abort (void); > + > +int main () > +{ > + int i; > + vector bool short vec_bs_arg; > + vector bool short vec_bs_result, vec_bs_expected; > + vector bool int vec_bi_arg; > + vector bool int vec_bi_result, vec_bi_expected; > + vector bool char vec_bc_arg; > + vector bool char vec_bc_result, vec_bc_expected; > + vector signed short vec_ss_arg; > + vector signed short vec_ss_result, vec_ss_expected; > + vector signed int vec_si_arg; > + vector signed int vec_si_result, vec_si_expected; > + vector signed char vec_sc_arg; > + vector signed char vec_sc_result, vec_sc_expected; > + vector float vec_float_arg; > + vector double vec_double_result, vec_double_expected; > + vector pixel vec_pixel_arg; > + vector unsigned int vec_ui_result, vec_ui_expected; > + > + union conv { > + double d; > + unsigned long long l; > + } conv_exp, conv_val; > + > + vec_bs_arg = (vector bool short){ 0, 101, 202, 303, > + 404, 505, 606, 707 }; > + vec_bi_expected = (vector bool int){ 0, 101, 202, 303 }; > + > + vec_bi_result = vec_unpackh (vec_bs_arg); > + > + for (i = 0; i < 4; i++) { > + if (vec_bi_expected[i] != vec_bi_result[i]) > +#if DEBUG > + printf("ERROR: vec_unpackh(), vec_bi_expected[%d] = %d does not > match vec_bi_result[%d] = %d\n", > + i, vec_bi_expected[i], i, vec_bi_result[i]); > +#else > + abort(); > +#endif > + } > + > + vec_bi_expected = (vector bool int){ 404, 505, 606, 707 }; > + vec_bi_result = vec_unpackl (vec_bs_arg); > + > + for (i = 0; i < 4; i++) { > + if (vec_bi_expected[i] != vec_bi_result[i]) > +#if DEBUG > + printf("ERROR: vec_unpackl(), vec_bi_expected[%d] = %d does not match > vec_bi_result[%d] = %d\n", > + i, vec_bi_expected[i], i, vec_bi_result[i]); > +#else > + abort(); > +#endif > + } > + > + > + vec_ss_arg = (vector signed short){ 0, 101, 202, 303, > + 404, 505, 606, 707 }; > + vec_si_expected = (vector signed int){ 0, 101, 202, 303 }; > + > + vec_si_result = vec_unpackh (vec_ss_arg); > + > + for (i = 0; i < 4; i++) { > + if (vec_si_expected[i] != vec_si_result[i]) > +#if DEBUG > + printf("ERROR: vec_unpackh(), vec_si_expected[%d] = %d does not match > vec_si_result[%d] = %d\n", > + i, vec_si_expected[i], i, vec_si_result[i]); > +#else > + abort(); > +#endif > + } > + > + vec_si_expected = (vector signed int){ 404, 505, 606, 707 }; > + > + vec_si_result = vec_unpackl (vec_ss_arg); > + > + for (i = 0; i < 4; i++) { > + if (vec_si_expected[i] != vec_si_result[i]) > +#if DEBUG > + printf("ERROR: vec_unpackl(), vec_si_expected[%d] = %d does not match > vec_si_result[%d] = %d\n", > + i, vec_si_expected[i], i, vec_si_result[i]); > +#else > + abort(); > +#endif > + } > + > + > + vec_pixel_arg = (vector pixel){ 0x0, 0x65, 0xca, 0x12f, > + 0x194, 0x1f9, 0x25e, 0x2c3 }; > + vec_ui_expected = (vector unsigned int){ 0x0, 0x305, 0x60a, 0x90f }; > + > + vec_ui_result = vec_unpackh (vec_pixel_arg); > + > + for (i = 0; i < 4; i++) { > + if (vec_ui_expected[i] != vec_ui_result[i]) > +#if DEBUG > + printf("ERROR: vec_unpackh(), vec_ui_expected[%d] = 0x%x does not > match vec_ui_result[%d] = 0x%x\n", > + i, vec_ui_expected[i], i, vec_ui_result[i]); > +#else > + abort(); > +#endif > + } > + > + vec_ui_expected = (vector unsigned int){ 0xc14, 0xf19, 0x121e, 0x1603 }; > + > + vec_ui_result = vec_unpackl (vec_pixel_arg); > + > + for (i = 0; i < 4; i++) { > + if (vec_ui_expected[i] != vec_ui_result[i]) > +#if DEBUG > + printf("ERROR: vec_unpackl(), vec_ui_expected[%d] = 0x%x does not > match vec_ui_result[%d] = 0x%x\n", > + i, vec_ui_expected[i], i, vec_ui_result[i]); > +#else > + abort(); > +#endif > + } > + > + > + vec_bc_arg = (vector bool char){ 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 0, 1, 1, 0, 0, 1, 1 }; > + > + vec_bs_expected = (vector bool short){ 0, 1, 0, 1, 0, 1, 0, 1 }; > + > + vec_bs_result = vec_unpackh (vec_bc_arg); > + > + for (i = 0; i < 8; i++) { > + if (vec_bs_expected[i] != vec_bs_result[i]) > +#if DEBUG > + printf("ERROR: vec_unpackh(), vec_bs_expected[%d] = %d does not match > vec_bs_result[%d] = %d\n", > + i, vec_bs_expected[i], i, vec_bs_result[i]); > +#else > + abort(); > +#endif > + } > + > + vec_bs_expected = (vector bool short){ 0, 0, 1, 1, 0, 0, 1, 1 }; > + > + vec_bs_result = vec_unpackl (vec_bc_arg); > + > + for (i = 0; i < 8; i++) { > + if (vec_bs_expected[i] != vec_bs_result[i]) > +#if DEBUG > + printf("ERROR: vec_unpackh(), vec_bs_expected[%d] = %d does not match > vec_bs_result[%d] = %d\n", > + i, vec_bs_expected[i], i, vec_bs_result[i]); > +#else > + abort(); > +#endif > + } > + > + vec_bs_expected = (vector bool short){ 0, 0, 1, 1, 0, 0, 1, 1 }; > + > + vec_bs_result = vec_unpackl (vec_bc_arg); > + > + for (i = 0; i < 8; i++) { > + if (vec_bs_expected[i] != vec_bs_result[i]) > +#if DEBUG > + printf("ERROR: vec_unpackl(), vec_bs_expected[%d] = %d does not match > vec_bs_result[%d] = %d\n", > + i, vec_bs_expected[i], i, vec_bs_result[i]); > +#else > + abort(); > +#endif > + } > + > + > + vec_sc_arg = (vector signed char){ 0, 1, 2, 3, 4, 5, 6, 7, > + 8, 9, 10, 11, 12, 13, 14, 15 }; > + > + vec_ss_expected = (vector signed short){ 0, 1, 2, 3, 4, 5, 6, 7 }; > + > + vec_ss_result = vec_unpackh (vec_sc_arg); > + > + for (i = 0; i < 8; i++) { > + if (vec_ss_expected[i] != vec_ss_result[i]) > +#if DEBUG > + printf("ERROR: vec_unpackh(), vec_ss_expected[%d] = %d does not match > vec_ss_result[%d] = %d\n", > + i, vec_ss_expected[i], i, vec_ss_result[i]); > +#else > + abort(); > +#endif > + } > + > + vec_ss_expected = (vector signed short){ 8, 9, 10, 11, 12, 13, 14, 15 }; > + > + vec_ss_result = vec_unpackl (vec_sc_arg); > + > + for (i = 0; i < 8; i++) { > + if (vec_ss_expected[i] != vec_ss_result[i]) > +#if DEBUG > + printf("ERROR: vec_unpackl(), vec_ss_expected[%d] = %d does not match > vec_ss_result[%d] = %d\n", > + i, vec_ss_expected[i], i, vec_ss_result[i]); > +#else > + abort(); > +#endif > + } > + > + > + vec_float_arg = (vector float){ 0.0, 1.5, 2.5, 3.5 }; > + > + vec_double_expected = (vector double){ 0.0, 1.5 }; > + > + vec_double_result = vec_unpackh (vec_float_arg); > + > + for (i = 0; i < 2; i++) { > + if (vec_double_expected[i] != vec_double_result[i]) > + { > +#if DEBUG > + printf("ERROR: vec_unpackh(), vec_double_expected[%d] = %f does not > match vec_double_result[%d] = %f\n", > + i, vec_double_expected[i], i, vec_double_result[i]); > + conv_val.d = vec_double_result[i]; > + conv_exp.d = vec_double_expected[i]; > + printf(" vec_unpackh(), vec_double_expected[%d] = 0x%llx does not > match vec_double_result[%d] = 0x%llx\n", > + i, conv_exp.l, i,conv_val.l); > +#else > + abort(); > +#endif > + } > + } > + > + vec_double_expected = (vector double){ 2.5, 3.5 }; > + > + vec_double_result = vec_unpackl (vec_float_arg); > + > + for (i = 0; i < 2; i++) { > + if (vec_double_expected[i] != vec_double_result[i]) > + { > +#if DEBUG > + printf("ERROR: vec_unpackl() vec_double_expected[%d] = %f does not > match vec_double_result[%d] = %f\n", > + i, vec_double_expected[i], i, vec_double_result[i]); > + conv_val.d = vec_double_result[i]; > + conv_exp.d = vec_double_expected[i]; > + printf(" vec_unpackh(), vec_double_expected[%d] = 0x%llx does not > match vec_double_result[%d] = 0x%llx\n", > + i, conv_exp.l, i,conv_val.l); > +#else > + abort(); > +#endif > + } > + } > + > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-2-runnable.c > b/gcc/testsuite/gcc.target/powerpc/altivec-2-runnable.c > new file mode 100644 > index 0000000..eaae84a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/altivec-2-runnable.c > @@ -0,0 +1,94 @@ > +/* { dg-do compile { target powerpc*-*-* } } */ > +/* { dg-require-effective-target powerpc_altivec_ok } */ > +/* { dg-options "-mpower8-vector -mvsx" } */ > + > +#include <altivec.h> > + > +#ifdef DEBUG > +#include <stdio.h> > +#endif > + > +void abort (void); > + > +/* Endian considerations: The "high" half of a vector with n elements is the > + first n/2 elements of the vector. For little endian, these elements are in > + the rightmost half of the vector. For big endian, these elements are in > the > + leftmost half of the vector. */ > + > +int main () > +{ > + int i; > + vector bool int vec_bi_arg; > + vector bool long long vec_bll_result, vec_bll_expected; > + > + vector signed int vec_si_arg; > + vector signed long long int vec_slli_result, vec_slli_expected; > + > + /* use of ‘long long’ in AltiVec types requires -mvsx */ > + /* __builtin_altivec_vupkhsw and __builtin_altivec_vupklsw > + requires the -mpower8-vector option */ > + > + vec_bi_arg = (vector bool int){ 0, 1, 1, 0 }; > + > + vec_bll_expected = (vector bool long long){ 0, 1 }; > + > + vec_bll_result = vec_unpackh (vec_bi_arg); > + > + for (i = 0; i < 2; i++) { > + if (vec_bll_expected[i] != vec_bll_result[i]) > +#if DEBUG > + printf("ERROR: vec_unpackh, vec_bll_expected[%d] = %d does not match > vec_bll_result[%d] = %d\n", > + i, vec_bll_expected[i], i, vec_bll_result[i]); > +#else > + abort(); > +#endif > + } > + > + vec_bll_expected = (vector bool long long){ 1, 0 }; > + > + vec_bll_result = vec_unpackl (vec_bi_arg); > + > + for (i = 0; i < 2; i++) { > + if (vec_bll_expected[i] != vec_bll_result[i]) > +#if DEBUG > + printf("ERROR: vec_unpackl, vec_bll_expected[%d] = %d does not match > vec_bll_result[%d] = %d\n", > + i, vec_bll_expected[i], i, vec_bll_result[i]); > +#else > + abort(); > +#endif > + } > + > + > + vec_si_arg = (vector signed int){ 0, 101, 202, 303 }; > + > + vec_slli_expected = (vector signed long long int){ 0, 101 }; > + > + vec_slli_result = vec_unpackh (vec_si_arg); > + > + for (i = 0; i < 2; i++) { > + if (vec_slli_expected[i] != vec_slli_result[i]) > +#if DEBUG > + printf("ERROR: vec_unpackh, vec_slli_expected[%d] = %d does not match > vec_slli_result[%d] = %d\n", > + i, vec_slli_expected[i], i, vec_slli_result[i]); > +#else > + abort(); > +#endif > + } > + > + vec_slli_result = vec_unpackl (vec_si_arg); > + vec_slli_expected = (vector signed long long int){ 202, 303 }; > + > + for (i = 0; i < 2; i++) { > + if (vec_slli_expected[i] != vec_slli_result[i]) > +#if DEBUG > + printf("ERROR: vec_unpackl, vec_slli_expected[%d] = %d does not match > vec_slli_result[%d] = %d\n", > + i, vec_slli_expected[i], i, vec_slli_result[i]); > +#else > + abort(); > +#endif > + } > + > + > + > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-7.c > b/gcc/testsuite/gcc.target/powerpc/vsx-7.c > index 94cb69e..ffeaf51 100644 > --- a/gcc/testsuite/gcc.target/powerpc/vsx-7.c > +++ b/gcc/testsuite/gcc.target/powerpc/vsx-7.c > @@ -17,10 +17,9 @@ int main2 () > } > > /* Expected results: > - vec_unpackl vupkhsh > - vec_unpackh vupklsh > + vec_unpackl xvcvspdp > + vec_unpackh xvcvspdp > */ > > -/* { dg-final { scan-assembler-times "vupkhpx" 1 } } */ > -/* { dg-final { scan-assembler-times "vupklpx" 1 } } */ > +/* { dg-final { scan-assembler-times "xvcvspdp" 2 } } */ > > -- > 2.7.4 >