On Jun 29, 2018, at 9:38 AM, Carl Love <c...@us.ibm.com> wrote:
> 
> GCC Maintainers:
> 
> The vec_unpackh, vec_unpackl builtins with vector float arguments
> unpack the high or low half of a floating point vector and convert the
> elements to a vector of doubles.  The current implementation of the
> builtin for the vector float argument is incorrectly using the vupklsh 
> instructions vupkhsh that unpack a vector of pixel type.  The following
> path fixes this issue by having the vec_unpackh and vec_unpackl
> builtins use the xvcvspdp instruction to do the vector float to double
> conversion.
> 
> Additionally, runnable tests were added to verify that the builtins
> work correctly for all of the valid argument types.
> 
> The patch has been tested on 
> 
>     powerpc64le-unknown-linux-gnu (Power 8 LE)  
>     powerpc64-unknown-linux-gnu (Power 8 BE)
>    powerpc64le-unknown-linux-gnu (Power 9 LE)
> 
> With no regressions.
> 
> Please let me know if the patch looks OK for GCC mainline. The patch
> also needs to be backported to GCC 8.
> 
>                          Carl Love
> ----------------------------------------------------------------------
> 
> gcc/ChangeLog:
> 
> 2018-06-29  Carl Love  <c...@us.ibm.com>
> 
>       * config/rs6000/altivec.md: Add define_expand altivec_unpackh_v4sf,
>       and define_expand altivec_unpackl_v4sf expansions.
>       * config/rs6000/rs6000-builtin.def: Add UNPACKH_V4SF and
>       UNPACKL_V4SF definitions.
>       * config/rs6000/rs6000-c.c: Map ALTIVEC_BUILTIN_VEC_UNPACKH for
>       float argument to ALTIVEC_BUILTIN_UNPACKH_V4SF.
>       Map ALTIVEC_BUILTIN_VEC_UNPACKL for float argument to
>       ALTIVEC_BUILTIN_UNPACKH_V4SF.

UNPACKL

That's all I see; will leave to Segher to approve, of course. 

Thanks,
Bill
> 
> gcc/testsuite/ChangeLog:
> 
> 2018-06-29  Carl Love  <c...@us.ibm.com>
>       * gcc.target/altivec-1-runnable.c: New test file.
>       * gcc.target/altivec-2-runnable.c: New test file.
>       * gcc.target/vsx-7.c (main2): Change expected instruction
>       for tests.
> ---
> gcc/config/rs6000/altivec.md                       |  22 ++
> gcc/config/rs6000/rs6000-builtin.def               |   2 +
> gcc/config/rs6000/rs6000-c.c                       |   4 +-
> .../gcc.target/powerpc/altivec-1-runnable.c        | 257 +++++++++++++++++++++
> .../gcc.target/powerpc/altivec-2-runnable.c        |  94 ++++++++
> gcc/testsuite/gcc.target/powerpc/vsx-7.c           |   7 +-
> 6 files changed, 380 insertions(+), 6 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-1-runnable.c
> create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-2-runnable.c
> 
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 8ee42ae..e0ab588 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -2311,6 +2311,28 @@
> }
>   [(set_attr "type" "vecperm")])
> 
> +;; Unpack high elements of float vector to vector of doubles
> +(define_expand "altivec_unpackh_v4sf"
> +  [(set (match_operand:V2DF 0 "register_operand" "=v")
> +        (match_operand:V4SF 1 "register_operand" "v"))]
> +  "TARGET_VSX"
> +{
> +  emit_insn (gen_doublehv4sf2 (operands[0], operands[1]));
> +  DONE;
> +}
> +  [(set_attr "type" "veccomplex")])
> +
> +;; Unpack low elements of float vector to vector of doubles
> +(define_expand "altivec_unpackl_v4sf"
> +  [(set (match_operand:V2DF 0 "register_operand" "=v")
> +        (match_operand:V4SF 1 "register_operand" "v"))]
> +  "TARGET_VSX"
> +{
> +  emit_insn (gen_doublelv4sf2 (operands[0], operands[1]));
> +  DONE;
> +}
> +  [(set_attr "type" "veccomplex")])
> +
> ;; Compare vectors producing a vector result and a predicate, setting CR6 to
> ;; indicate a combined status
> (define_insn "*altivec_vcmpequ<VI_char>_p"
> diff --git a/gcc/config/rs6000/rs6000-builtin.def 
> b/gcc/config/rs6000/rs6000-builtin.def
> index f799681..8c9fd4d 100644
> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -1144,6 +1144,8 @@ BU_ALTIVEC_1 (VUPKHSH,        "vupkhsh",        CONST,  
> altivec_vupkhsh)
> BU_ALTIVEC_1 (VUPKLSB,              "vupklsb",        CONST,  altivec_vupklsb)
> BU_ALTIVEC_1 (VUPKLPX,              "vupklpx",        CONST,  altivec_vupklpx)
> BU_ALTIVEC_1 (VUPKLSH,              "vupklsh",        CONST,  altivec_vupklsh)
> +BU_ALTIVEC_1 (UNPACKH_V4SF,   "unpackh_v4sf",        CONST,  
> altivec_unpackh_v4sf)
> +BU_ALTIVEC_1 (UNPACKL_V4SF,   "unpackl_v4sf",        CONST,  
> altivec_unpackl_v4sf)
> 
> BU_ALTIVEC_1 (VREVE_V2DI,      "vreve_v2di", CONST,  altivec_vrevev2di2)
> BU_ALTIVEC_1 (VREVE_V4SI,      "vreve_v4si", CONST,  altivec_vrevev4si2)
> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
> index f4b1bf7..cd50d4e 100644
> --- a/gcc/config/rs6000/rs6000-c.c
> +++ b/gcc/config/rs6000/rs6000-c.c
> @@ -865,7 +865,7 @@ const struct altivec_builtin_types 
> altivec_overloaded_builtins[] = {
>     RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
>   { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHPX,
>     RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 },
> -  { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHPX,
> +  { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_UNPACKH_V4SF,
>     RS6000_BTI_V2DF, RS6000_BTI_V4SF, 0, 0 },
>   { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
>     RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
> @@ -897,7 +897,7 @@ const struct altivec_builtin_types 
> altivec_overloaded_builtins[] = {
>     RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
>   { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW,
>     RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
> -  { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLPX,
> +  { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_UNPACKL_V4SF,
>     RS6000_BTI_V2DF, RS6000_BTI_V4SF, 0, 0 },
>   { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
>     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
> diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-1-runnable.c 
> b/gcc/testsuite/gcc.target/powerpc/altivec-1-runnable.c
> new file mode 100644
> index 0000000..938fd36
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/altivec-1-runnable.c
> @@ -0,0 +1,257 @@
> +/* { dg-do compile { target powerpc*-*-* } } */
> +/* { dg-require-effective-target powerpc_altivec_ok } */
> +/* { dg-options "-mpower8-vector -maltivec" } */
> +
> +#include <altivec.h>
> +
> +#ifdef DEBUG
> +#include <stdio.h>
> +#endif
> +
> +/* Endian considerations: The "high" half of a vector with n elements is the
> +   first n/2 elements of the vector. For little endian, these elements are in
> +   the rightmost half of the vector. For big endian, these elements are in 
> the
> +   leftmost half of the vector.  */
> +
> +
> +void abort (void);
> +
> +int main ()
> +{
> +  int i;
> +  vector bool short vec_bs_arg;
> +  vector bool short vec_bs_result, vec_bs_expected;
> +  vector bool int vec_bi_arg;
> +  vector bool int vec_bi_result, vec_bi_expected;
> +  vector bool char vec_bc_arg;
> +  vector bool char vec_bc_result, vec_bc_expected;
> +  vector signed short vec_ss_arg;
> +  vector signed short vec_ss_result, vec_ss_expected;
> +  vector signed int vec_si_arg;
> +  vector signed int vec_si_result, vec_si_expected;
> +  vector signed char vec_sc_arg;
> +  vector signed char vec_sc_result, vec_sc_expected;
> +  vector float vec_float_arg;
> +  vector double vec_double_result, vec_double_expected;
> +  vector pixel vec_pixel_arg;
> +  vector unsigned int vec_ui_result, vec_ui_expected;
> +
> +  union conv {
> +       double d;
> +       unsigned long long l;
> +  } conv_exp, conv_val;
> +
> +  vec_bs_arg = (vector bool short){ 0, 101, 202, 303,
> +                                 404, 505, 606, 707 };
> +  vec_bi_expected = (vector bool int){ 0, 101, 202, 303 };
> +
> +  vec_bi_result = vec_unpackh (vec_bs_arg);
> +
> +  for (i = 0; i < 4; i++) {
> +    if (vec_bi_expected[i] != vec_bi_result[i])
> +#if DEBUG
> +       printf("ERROR: vec_unpackh(),  vec_bi_expected[%d] = %d does not 
> match vec_bi_result[%d] = %d\n",
> +           i, vec_bi_expected[i], i, vec_bi_result[i]);
> +#else
> +       abort();
> +#endif
> +  }
> +
> +  vec_bi_expected = (vector bool int){ 404, 505, 606, 707 };
> +  vec_bi_result = vec_unpackl (vec_bs_arg);
> +
> +  for (i = 0; i < 4; i++) {
> +    if (vec_bi_expected[i] != vec_bi_result[i])
> +#if DEBUG
> +       printf("ERROR: vec_unpackl(), vec_bi_expected[%d] = %d does not match 
> vec_bi_result[%d] = %d\n",
> +           i, vec_bi_expected[i], i, vec_bi_result[i]);
> +#else
> +       abort();
> +#endif
> +  }
> +
> +  
> +  vec_ss_arg = (vector signed short){ 0, 101, 202, 303,
> +                                 404, 505, 606, 707 };
> +  vec_si_expected = (vector signed int){ 0, 101, 202, 303 };
> +
> +  vec_si_result = vec_unpackh (vec_ss_arg);
> +
> +  for (i = 0; i < 4; i++) {
> +    if (vec_si_expected[i] != vec_si_result[i])
> +#if DEBUG
> +       printf("ERROR: vec_unpackh(), vec_si_expected[%d] = %d does not match 
> vec_si_result[%d] = %d\n",
> +           i, vec_si_expected[i], i, vec_si_result[i]);
> +#else
> +       abort();
> +#endif
> +  }
> +
> +  vec_si_expected = (vector signed int){ 404, 505, 606, 707 };
> +
> +  vec_si_result = vec_unpackl (vec_ss_arg);
> +
> +  for (i = 0; i < 4; i++) {
> +    if (vec_si_expected[i] != vec_si_result[i])
> +#if DEBUG
> +       printf("ERROR: vec_unpackl(), vec_si_expected[%d] = %d does not match 
> vec_si_result[%d] = %d\n",
> +           i, vec_si_expected[i], i, vec_si_result[i]);
> +#else
> +       abort();
> +#endif
> +  }
> +
> +
> +  vec_pixel_arg = (vector pixel){ 0x0, 0x65, 0xca, 0x12f,
> +                               0x194, 0x1f9, 0x25e, 0x2c3 };
> +  vec_ui_expected = (vector unsigned int){ 0x0, 0x305, 0x60a, 0x90f };
> +
> +  vec_ui_result = vec_unpackh (vec_pixel_arg);
> +
> +  for (i = 0; i < 4; i++) {
> +    if (vec_ui_expected[i] != vec_ui_result[i])
> +#if DEBUG
> +       printf("ERROR: vec_unpackh(), vec_ui_expected[%d] = 0x%x does not 
> match vec_ui_result[%d] = 0x%x\n",
> +           i, vec_ui_expected[i], i, vec_ui_result[i]);
> +#else
> +       abort();
> +#endif
> +  }
> +
> +  vec_ui_expected = (vector unsigned int){ 0xc14, 0xf19, 0x121e, 0x1603 };
> +
> +  vec_ui_result = vec_unpackl (vec_pixel_arg);
> +
> +  for (i = 0; i < 4; i++) {
> +    if (vec_ui_expected[i] != vec_ui_result[i])
> +#if DEBUG
> +       printf("ERROR: vec_unpackl(), vec_ui_expected[%d] = 0x%x does not 
> match vec_ui_result[%d] = 0x%x\n",
> +           i, vec_ui_expected[i], i, vec_ui_result[i]);
> +#else
> +       abort();
> +#endif
> +  }
> +
> +
> +  vec_bc_arg = (vector bool char){ 0, 1, 0, 1, 0, 1, 0, 1,
> +                                0, 0, 1, 1, 0, 0, 1, 1 };
> +
> +  vec_bs_expected = (vector bool short){ 0, 1, 0, 1, 0, 1, 0, 1 };
> +
> +  vec_bs_result = vec_unpackh (vec_bc_arg);
> +
> +  for (i = 0; i < 8; i++) {
> +    if (vec_bs_expected[i] != vec_bs_result[i])
> +#if DEBUG
> +       printf("ERROR: vec_unpackh(), vec_bs_expected[%d] = %d does not match 
> vec_bs_result[%d] = %d\n",
> +           i, vec_bs_expected[i], i, vec_bs_result[i]);
> +#else
> +       abort();
> +#endif
> +  }
> +
> +  vec_bs_expected = (vector bool short){ 0, 0, 1, 1, 0, 0, 1, 1 };
> +
> +  vec_bs_result = vec_unpackl (vec_bc_arg);
> +
> +  for (i = 0; i < 8; i++) {
> +    if (vec_bs_expected[i] != vec_bs_result[i])
> +#if DEBUG
> +       printf("ERROR: vec_unpackh(), vec_bs_expected[%d] = %d does not match 
> vec_bs_result[%d] = %d\n",
> +           i, vec_bs_expected[i], i, vec_bs_result[i]);
> +#else
> +       abort();
> +#endif
> +  }
> +
> +  vec_bs_expected = (vector bool short){ 0, 0, 1, 1, 0, 0, 1, 1 };
> +
> +  vec_bs_result = vec_unpackl (vec_bc_arg);
> +
> +  for (i = 0; i < 8; i++) {
> +    if (vec_bs_expected[i] != vec_bs_result[i])
> +#if DEBUG
> +       printf("ERROR: vec_unpackl(), vec_bs_expected[%d] = %d does not match 
> vec_bs_result[%d] = %d\n",
> +           i, vec_bs_expected[i], i, vec_bs_result[i]);
> +#else
> +       abort();
> +#endif
> +  }
> +
> +
> +  vec_sc_arg = (vector signed char){ 0, 1, 2, 3, 4, 5, 6, 7,
> +                                  8, 9, 10, 11, 12, 13, 14, 15 };
> +
> +  vec_ss_expected = (vector signed short){ 0, 1, 2, 3, 4, 5, 6, 7 };
> +
> +  vec_ss_result = vec_unpackh (vec_sc_arg);
> +
> +  for (i = 0; i < 8; i++) {
> +    if (vec_ss_expected[i] != vec_ss_result[i])
> +#if DEBUG
> +       printf("ERROR: vec_unpackh(), vec_ss_expected[%d] = %d does not match 
> vec_ss_result[%d] = %d\n",
> +           i, vec_ss_expected[i], i, vec_ss_result[i]);
> +#else
> +       abort();
> +#endif
> +  }
> +
> +  vec_ss_expected = (vector signed short){ 8, 9, 10, 11, 12, 13, 14, 15 };
> +
> +  vec_ss_result = vec_unpackl (vec_sc_arg);
> +
> +  for (i = 0; i < 8; i++) {
> +    if (vec_ss_expected[i] != vec_ss_result[i])
> +#if DEBUG
> +       printf("ERROR: vec_unpackl(), vec_ss_expected[%d] = %d does not match 
> vec_ss_result[%d] = %d\n",
> +           i, vec_ss_expected[i], i, vec_ss_result[i]);
> +#else
> +       abort();
> +#endif
> +  }
> +  
> +
> +  vec_float_arg = (vector float){ 0.0, 1.5, 2.5, 3.5 };
> +
> +  vec_double_expected = (vector double){ 0.0, 1.5 };
> +
> +  vec_double_result = vec_unpackh (vec_float_arg);
> +
> +  for (i = 0; i < 2; i++) {
> +    if (vec_double_expected[i] != vec_double_result[i])
> +      {
> +#if DEBUG
> +      printf("ERROR: vec_unpackh(), vec_double_expected[%d] = %f does not 
> match vec_double_result[%d] = %f\n",
> +             i, vec_double_expected[i], i, vec_double_result[i]);
> +      conv_val.d = vec_double_result[i];
> +      conv_exp.d = vec_double_expected[i];
> +      printf("     vec_unpackh(), vec_double_expected[%d] = 0x%llx does not 
> match vec_double_result[%d] = 0x%llx\n",
> +             i, conv_exp.l, i,conv_val.l);
> +#else
> +      abort();
> +#endif
> +    }
> +  }
> +
> +  vec_double_expected = (vector double){ 2.5, 3.5 };
> +
> +  vec_double_result = vec_unpackl (vec_float_arg);
> +
> +  for (i = 0; i < 2; i++) {
> +    if (vec_double_expected[i] != vec_double_result[i])
> +      {
> +#if DEBUG
> +         printf("ERROR: vec_unpackl() vec_double_expected[%d] = %f does not 
> match vec_double_result[%d] = %f\n",
> +             i, vec_double_expected[i], i, vec_double_result[i]);
> +      conv_val.d = vec_double_result[i];
> +      conv_exp.d = vec_double_expected[i];
> +      printf("     vec_unpackh(), vec_double_expected[%d] = 0x%llx does not 
> match vec_double_result[%d] = 0x%llx\n",
> +             i, conv_exp.l, i,conv_val.l);
> +#else
> +         abort();
> +#endif
> +      }
> +  }
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-2-runnable.c 
> b/gcc/testsuite/gcc.target/powerpc/altivec-2-runnable.c
> new file mode 100644
> index 0000000..eaae84a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/altivec-2-runnable.c
> @@ -0,0 +1,94 @@
> +/* { dg-do compile { target powerpc*-*-* } } */
> +/* { dg-require-effective-target powerpc_altivec_ok } */
> +/* { dg-options "-mpower8-vector -mvsx" } */
> +
> +#include <altivec.h>
> +
> +#ifdef DEBUG
> +#include <stdio.h>
> +#endif
> +
> +void abort (void);
> +
> +/* Endian considerations: The "high" half of a vector with n elements is the
> +   first n/2 elements of the vector. For little endian, these elements are in
> +   the rightmost half of the vector. For big endian, these elements are in 
> the
> +   leftmost half of the vector.  */
> +
> +int main ()
> +{
> +  int i;
> +  vector bool int vec_bi_arg;
> +  vector bool long long vec_bll_result, vec_bll_expected;
> +
> +  vector signed int vec_si_arg;
> +  vector signed long long int vec_slli_result, vec_slli_expected;
> +
> +  /*  use of ‘long long’ in AltiVec types requires -mvsx */
> +  /* __builtin_altivec_vupkhsw and __builtin_altivec_vupklsw
> +     requires the -mpower8-vector option */
> +
> +  vec_bi_arg = (vector bool int){ 0, 1, 1, 0 };
> +
> +  vec_bll_expected = (vector bool long long){ 0, 1 };
> +
> +  vec_bll_result = vec_unpackh (vec_bi_arg);
> +
> +  for (i = 0; i < 2; i++) {
> +    if (vec_bll_expected[i] != vec_bll_result[i])
> +#if DEBUG
> +       printf("ERROR: vec_unpackh, vec_bll_expected[%d] = %d does not match 
> vec_bll_result[%d] = %d\n",
> +           i, vec_bll_expected[i], i, vec_bll_result[i]);
> +#else
> +       abort();
> +#endif
> +  }
> +
> +  vec_bll_expected = (vector bool long long){ 1, 0 };
> +
> +  vec_bll_result = vec_unpackl (vec_bi_arg);
> +
> +  for (i = 0; i < 2; i++) {
> +    if (vec_bll_expected[i] != vec_bll_result[i])
> +#if DEBUG
> +       printf("ERROR: vec_unpackl, vec_bll_expected[%d] = %d does not match 
> vec_bll_result[%d] = %d\n",
> +           i, vec_bll_expected[i], i, vec_bll_result[i]);
> +#else
> +       abort();
> +#endif
> +  }
> +
> +
> +  vec_si_arg = (vector signed int){ 0, 101, 202, 303 };
> +
> +  vec_slli_expected = (vector signed long long int){ 0, 101 };
> +
> +  vec_slli_result = vec_unpackh (vec_si_arg);
> +
> +  for (i = 0; i < 2; i++) {
> +    if (vec_slli_expected[i] != vec_slli_result[i])
> +#if DEBUG
> +       printf("ERROR: vec_unpackh, vec_slli_expected[%d] = %d does not match 
> vec_slli_result[%d] = %d\n",
> +           i, vec_slli_expected[i], i, vec_slli_result[i]);
> +#else
> +       abort();
> +#endif
> +  }
> +
> +  vec_slli_result = vec_unpackl (vec_si_arg);
> +  vec_slli_expected = (vector signed long long int){ 202, 303 };
> +
> +  for (i = 0; i < 2; i++) {
> +    if (vec_slli_expected[i] != vec_slli_result[i])
> +#if DEBUG
> +       printf("ERROR: vec_unpackl, vec_slli_expected[%d] = %d does not match 
> vec_slli_result[%d] = %d\n",
> +           i, vec_slli_expected[i], i, vec_slli_result[i]);
> +#else
> +       abort();
> +#endif
> +  }
> +
> +
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-7.c 
> b/gcc/testsuite/gcc.target/powerpc/vsx-7.c
> index 94cb69e..ffeaf51 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsx-7.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-7.c
> @@ -17,10 +17,9 @@ int main2 ()
> }
> 
> /* Expected results:
> -     vec_unpackl                    vupkhsh
> -     vec_unpackh                    vupklsh
> +     vec_unpackl                    xvcvspdp
> +     vec_unpackh                    xvcvspdp
> */
> 
> -/* { dg-final { scan-assembler-times "vupkhpx" 1 } } */
> -/* { dg-final { scan-assembler-times "vupklpx" 1 } } */
> +/* { dg-final { scan-assembler-times "xvcvspdp" 2 } } */
> 
> -- 
> 2.7.4
> 

Reply via email to