> On 6/27/25 8:09 PM, Juergen Christ wrote:
> > s390 missed constant vector permutation cases based on the vector pack
> > instruction or changing the size of the vector elements during vector
> > merge.  This enables some more patterns that do not need to load a
> > constant vector for permutation.
> > 
> > Bootstrapped and regtested on s390.  Okay for trunk?
> > 
> > gcc/ChangeLog:
> > 
> >     * config/s390/s390.cc (expand_perm_with_merge): Add size change cases.
> >     (expand_perm_with_pack): New function.
> >     (vectorize_vec_perm_const_1): Wire up new function.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> >     * gcc.target/s390/vector/vec-perm-merge-1.c: New test.
> >     * gcc.target/s390/vector/vec-perm-pack-1.c: New test.
> > 
> > Signed-off-by: Juergen Christ <jchr...@linux.ibm.com>
> 
> Ok. Thanks!
> 
> 
> Andreas

I guess after the recent change set from Jakub I should add
-fno-stack-protector to the new test files.  Still okay with this
change?

> 
> 
> > ---
> >   gcc/config/s390/s390.cc                       | 169 +++++++++++-
> >   .../gcc.target/s390/vector/vec-perm-merge-1.c | 242 ++++++++++++++++++
> >   .../gcc.target/s390/vector/vec-perm-pack-1.c  | 133 ++++++++++
> >   3 files changed, 542 insertions(+), 2 deletions(-)
> >   create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-perm-merge-1.c
> >   create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-perm-pack-1.c
> > 
> > diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> > index 38267202f668..de9c15c7bd42 100644
> > --- a/gcc/config/s390/s390.cc
> > +++ b/gcc/config/s390/s390.cc
> > @@ -18041,9 +18041,34 @@ expand_perm_with_merge (const struct 
> > expand_vec_perm_d &d)
> >     static const unsigned char lo_perm_qi_swap[16]
> >       = {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15};
> > +  static const unsigned char hi_perm_qi_di[16]
> > +    = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
> > +  static const unsigned char hi_perm_qi_si[16]
> > +    = {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
> > +  static const unsigned char hi_perm_qi_hi[16]
> > +    = {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23};
> > +
> > +  static const unsigned char lo_perm_qi_di[16]
> > +    = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
> > +  static const unsigned char lo_perm_qi_si[16]
> > +    = {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31};
> > +  static const unsigned char lo_perm_qi_hi[16]
> > +    = {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31};
> > +
> > +  static const unsigned char hi_perm_hi_si[8] = {0, 1, 8, 9, 2, 3, 10, 11};
> > +  static const unsigned char hi_perm_hi_di[8] = {0, 1, 2, 3, 8, 9, 10, 11};
> > +
> > +  static const unsigned char lo_perm_hi_si[8] = {4, 5, 12, 13, 6, 7, 14, 
> > 15};
> > +  static const unsigned char lo_perm_hi_di[8] = {4, 5, 6, 7, 12, 13, 14, 
> > 15};
> > +
> > +  static const unsigned char hi_perm_si_di[4] = {0, 1, 4, 5};
> > +
> > +  static const unsigned char lo_perm_si_di[4] = {2, 3, 6, 7};
> > +
> >     bool merge_lo_p = false;
> >     bool merge_hi_p = false;
> >     bool swap_operands_p = false;
> > +  machine_mode mergemode = d.vmode;
> >     if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di, 2) == 0)
> >         || (d.nelt == 4 && memcmp (d.perm, hi_perm_si, 4) == 0)
> > @@ -18075,6 +18100,75 @@ expand_perm_with_merge (const struct 
> > expand_vec_perm_d &d)
> >         merge_lo_p = true;
> >         swap_operands_p = true;
> >       }
> > +  else if (d.nelt == 16)
> > +    {
> > +      if (memcmp (d.perm, hi_perm_qi_di, 16) == 0)
> > +   {
> > +     merge_hi_p = true;
> > +     mergemode = E_V2DImode;
> > +   }
> > +      else if (memcmp (d.perm, hi_perm_qi_si, 16) == 0)
> > +   {
> > +     merge_hi_p = true;
> > +     mergemode = E_V4SImode;
> > +   }
> > +      else if (memcmp (d.perm, hi_perm_qi_hi, 16) == 0)
> > +   {
> > +     merge_hi_p = true;
> > +     mergemode = E_V8HImode;
> > +   }
> > +      else if (memcmp (d.perm, lo_perm_qi_di, 16) == 0)
> > +   {
> > +     merge_lo_p = true;
> > +     mergemode = E_V2DImode;
> > +   }
> > +      else if (memcmp (d.perm, lo_perm_qi_si, 16) == 0)
> > +   {
> > +     merge_lo_p = true;
> > +     mergemode = E_V4SImode;
> > +   }
> > +      else if (memcmp (d.perm, lo_perm_qi_hi, 16) == 0)
> > +   {
> > +     merge_lo_p = true;
> > +     mergemode = E_V8HImode;
> > +   }
> > +    }
> > +  else if (d.nelt == 8)
> > +    {
> > +      if (memcmp (d.perm, hi_perm_hi_di, 8) == 0)
> > +   {
> > +     merge_hi_p = true;
> > +     mergemode = E_V2DImode;
> > +   }
> > +      else if (memcmp (d.perm, hi_perm_hi_si, 8) == 0)
> > +   {
> > +     merge_hi_p = true;
> > +     mergemode = E_V4SImode;
> > +   }
> > +      else if (memcmp (d.perm, lo_perm_hi_di, 8) == 0)
> > +   {
> > +     merge_lo_p = true;
> > +     mergemode = E_V2DImode;
> > +   }
> > +      else if (memcmp (d.perm, lo_perm_hi_si, 8) == 0)
> > +   {
> > +     merge_lo_p = true;
> > +     mergemode = E_V4SImode;
> > +   }
> > +    }
> > +  else if (d.nelt == 4)
> > +    {
> > +      if (memcmp (d.perm, hi_perm_si_di, 4) == 0)
> > +   {
> > +     merge_hi_p = true;
> > +     mergemode = E_V2DImode;
> > +   }
> > +      else if (memcmp (d.perm, lo_perm_si_di, 4) == 0)
> > +   {
> > +     merge_lo_p = true;
> > +     mergemode = E_V2DImode;
> > +   }
> > +    }
> >     if (!merge_lo_p && !merge_hi_p)
> >       return false;
> > @@ -18082,7 +18176,7 @@ expand_perm_with_merge (const struct 
> > expand_vec_perm_d &d)
> >     if (d.testing_p)
> >       return merge_lo_p || merge_hi_p;
> > -  rtx op0, op1;
> > +  rtx op0, op1, target = d.target;
> >     if (swap_operands_p)
> >       {
> >         op0 = d.op1;
> > @@ -18093,9 +18187,77 @@ expand_perm_with_merge (const struct 
> > expand_vec_perm_d &d)
> >         op0 = d.op0;
> >         op1 = d.op1;
> >       }
> > +  if (mergemode != d.vmode)
> > +    {
> > +      target = simplify_gen_subreg (mergemode, target, d.vmode, 0);
> > +      op0 = simplify_gen_subreg (mergemode, op0, d.vmode, 0);
> > +      op1 = simplify_gen_subreg (mergemode, op1, d.vmode, 0);
> > +    }
> > +
> > +  s390_expand_merge (target, op0, op1, merge_hi_p);
> > +
> > +  return true;
> > +}
> > +
> > +/* Try to expand the vector permute operation described by D using the 
> > vector
> > +   pack instruction vpk.  Return true if vector pack could be used.  */
> > +static bool
> > +expand_perm_with_pack (const struct expand_vec_perm_d &d)
> > +{
> > +  static const unsigned char qi_hi[16]
> > +    = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
> > +  static const unsigned char qi_si[16]
> > +    = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31};
> > +  static const unsigned char qi_di[16]
> > +    = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31};
> > +
> > +  static const unsigned char hi_si[8]
> > +    = {1, 3, 5, 7, 9, 11, 13, 15};
> > +  static const unsigned char hi_di[8]
> > +    = {2, 3, 6, 7, 10, 11, 14, 15};
> > +
> > +  static const unsigned char si_di[4]
> > +    = {1, 3, 5, 7};
> > +
> > +  machine_mode packmode, resmode;
> > +  enum insn_code code = CODE_FOR_nothing;
> > +
> > +  if (d.nelt == 16 && memcmp (d.perm, qi_hi, 16) == 0)
> > +    {
> > +      packmode = E_V8HImode;
> > +      resmode = E_V16QImode;
> > +      code = CODE_FOR_vec_pack_trunc_v8hi;
> > +    }
> > +  else if ((d.nelt == 16 && memcmp (d.perm, qi_si, 16) == 0)
> > +      || (d.nelt == 8 && memcmp (d.perm, hi_si, 8) == 0))
> > +    {
> > +      packmode = E_V4SImode;
> > +      resmode = E_V8HImode;
> > +      code = CODE_FOR_vec_pack_trunc_v4si;
> > +    }
> > +  else if ((d.nelt == 16 && memcmp (d.perm, qi_di, 16) == 0)
> > +      || (d.nelt == 8 && memcmp (d.perm, hi_di, 8) == 0)
> > +      || (d.nelt == 4 && memcmp (d.perm, si_di, 4) == 0))
> > +    {
> > +      packmode = E_V2DImode;
> > +      resmode = E_V4SImode;
> > +      code = CODE_FOR_vec_pack_trunc_v2di;
> > +    }
> > -  s390_expand_merge (d.target, op0, op1, merge_hi_p);
> > +  if (code == CODE_FOR_nothing)
> > +    return false;
> > +  if (d.testing_p)
> > +    return true;
> > +  rtx target = simplify_gen_subreg (resmode, d.target, d.vmode, 0);
> > +  rtx op0 = simplify_gen_subreg (packmode,
> > +                            force_reg (GET_MODE (d.op0), d.op0),
> > +                            d.vmode, 0);
> > +  rtx op1 = simplify_gen_subreg (packmode,
> > +                            force_reg (GET_MODE (d.op1), d.op1),
> > +                            d.vmode, 0);
> > +  rtx pat = GEN_FCN (code) (target, op0, op1);
> > +  emit_insn (pat);
> >     return true;
> >   }
> > @@ -18322,6 +18484,9 @@ vectorize_vec_perm_const_1 (const struct 
> > expand_vec_perm_d &d)
> >     if (expand_perm_with_merge (d))
> >       return true;
> > +  if (expand_perm_with_pack (d))
> > +    return true;
> > +
> >     if (expand_perm_with_vpdi (d))
> >       return true;
> > diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-perm-merge-1.c 
> > b/gcc/testsuite/gcc.target/s390/vector/vec-perm-merge-1.c
> > new file mode 100644
> > index 000000000000..2b639e306888
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/s390/vector/vec-perm-merge-1.c
> > @@ -0,0 +1,242 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -mzarch -march=z14 -mzvector --save-temps" } */
> > +/* { dg-do run { target { s390_z14_hw } } } */
> > +/* { dg-final {check-function-bodies "**" "" } } */
> > +
> > +#include "vec-types.h"
> > +
> > +/*
> > +** qi_via_hi_hi:
> > +** vmrhh   %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v16qi __attribute__((noinline,noipa))
> > +qi_via_hi_hi (v16qi a, v16qi b)
> > +{
> > +  return (v16qi){a[0], a[1], b[0], b[1], a[2], a[3], b[2], b[3],
> > +            a[4], a[5], b[4], b[5], a[6], a[7], b[6], b[7]};
> > +}
> > +
> > +/*
> > +** qi_via_hi_lo:
> > +** vmrlh   %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v16qi __attribute__((noinline,noipa))
> > +qi_via_hi_lo (v16qi a, v16qi b)
> > +{
> > +  return (v16qi){a[8], a[9], b[8], b[9], a[10], a[11], b[10], b[11],
> > +            a[12], a[13], b[12], b[13], a[14], a[15], b[14], b[15]};
> > +}
> > +
> > +/*
> > +** qi_via_si_hi:
> > +** vmrhf   %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v16qi __attribute__((noinline,noipa))
> > +qi_via_si_hi (v16qi a, v16qi b)
> > +{
> > +  return (v16qi){a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3],
> > +            a[4], a[5], a[6], a[7], b[4], b[5], b[6], b[7]};
> > +}
> > +
> > +/*
> > +** qi_via_si_lo:
> > +** vmrlf   %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v16qi __attribute__((noinline,noipa))
> > +qi_via_si_lo (v16qi a, v16qi b)
> > +{
> > +  return (v16qi){a[8], a[9], a[10], a[11], b[8], b[9], b[10], b[11],
> > +            a[12], a[13], a[14], a[15], b[12], b[13], b[14], b[15]};
> > +}
> > +
> > +/*
> > +** qi_via_di_hi:
> > +** vmrhg   %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v16qi __attribute__((noinline,noipa))
> > +qi_via_di_hi (v16qi a, v16qi b)
> > +{
> > +  return (v16qi){a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7],
> > +            b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]};
> > +}
> > +
> > +/*
> > +** qi_via_di_lo:
> > +** vmrlg   %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v16qi __attribute__((noinline,noipa))
> > +qi_via_di_lo (v16qi a, v16qi b)
> > +{
> > +  return (v16qi){a[8], a[9], a[10], a[11], a[12], a[13], a[14], a[15],
> > +            b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15]};
> > +}
> > +
> > +/*
> > +** hi_via_si_hi:
> > +** vmrhf   %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v8hi __attribute__((noinline,noipa))
> > +hi_via_si_hi (v8hi a, v8hi b)
> > +{
> > +  return (v8hi){a[0], a[1], b[0], b[1], a[2], a[3], b[2], b[3]};
> > +}
> > +
> > +/*
> > +** hi_via_si_lo:
> > +** vmrlf   %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v8hi __attribute__((noinline,noipa))
> > +hi_via_si_lo (v8hi a, v8hi b)
> > +{
> > +  return (v8hi){a[4], a[5], b[4], b[5], a[6], a[7], b[6], b[7]};
> > +}
> > +
> > +/*
> > +** hi_via_di_hi:
> > +** vmrhg   %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v8hi __attribute__((noinline,noipa))
> > +hi_via_di_hi (v8hi a, v8hi b)
> > +{
> > +  return (v8hi){a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]};
> > +}
> > +
> > +/*
> > +** hi_via_di_lo:
> > +** vmrlg   %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v8hi __attribute__((noinline,noipa))
> > +hi_via_di_lo (v8hi a, v8hi b)
> > +{
> > +  return (v8hi){a[4], a[5], a[6], a[7], b[4], b[5], b[6], b[7]};
> > +}
> > +
> > +/*
> > +** si_via_di_hi:
> > +** vmrhg   %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v4si __attribute__((noinline,noipa))
> > +si_via_di_hi (v4si a, v4si b)
> > +{
> > +  return (v4si){a[0], a[1], b[0], b[1]};
> > +}
> > +
> > +/*
> > +** si_via_di_lo:
> > +** vmrlg   %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v4si __attribute__((noinline,noipa))
> > +si_via_di_lo (v4si a, v4si b)
> > +{
> > +  return (v4si){a[2], a[3], b[2], b[3]};
> > +}
> > +
> > +int
> > +main ()
> > +{
> > +  static const signed char e_qi_via_hi_hi[16]
> > +    = {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23};
> > +  static const signed char e_qi_via_hi_lo[16]
> > +    = {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31};
> > +  static const signed char e_qi_via_si_hi[16]
> > +    = {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
> > +  static const signed char e_qi_via_si_lo[16]
> > +    = {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31};
> > +  static const signed char e_qi_via_di_hi[16]
> > +    = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
> > +  static const signed char e_qi_via_di_lo[16]
> > +    = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
> > +
> > +  static const short e_hi_via_si_hi[8] = {0, 1, 8, 9, 2, 3, 10, 11};
> > +  static const short e_hi_via_si_lo[8] = {4, 5, 12, 13, 6, 7, 14, 15};
> > +  static const short e_hi_via_di_hi[8] = {0, 1, 2, 3, 8, 9, 10, 11};
> > +  static const short e_hi_via_di_lo[8] = {4, 5, 6, 7, 12, 13, 14, 15};
> > +
> > +  static const int e_si_via_di_hi[4] = {0, 1, 4, 5};
> > +  static const int e_si_via_di_lo[4] = {2, 3, 6, 7};
> > +
> > +  v16qi a_qi = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
> > +  v16qi b_qi = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 
> > 30, 31};
> > +  v8hi a_hi = {0, 1, 2, 3, 4, 5, 6, 7};
> > +  v8hi b_hi = {8, 9, 10, 11, 12, 13, 14, 15};
> > +  v4si a_si = {0, 1, 2, 3};
> > +  v4si b_si = {4, 5, 6, 7};
> > +  v16qi r_qi;
> > +  v8hi r_hi;
> > +  v4si r_si;
> > +  int i;
> > +
> > +  r_qi = qi_via_hi_hi (a_qi, b_qi);
> > +  for (i = 0; i < 16; ++i)
> > +    if (r_qi[i] != e_qi_via_hi_hi[i])
> > +      __builtin_abort ();
> > +
> > +  r_qi = qi_via_hi_lo (a_qi, b_qi);
> > +  for (i = 0; i < 16; ++i)
> > +    if (r_qi[i] != e_qi_via_hi_lo[i])
> > +      __builtin_abort ();
> > +
> > +  r_qi = qi_via_si_hi (a_qi, b_qi);
> > +  for (i = 0; i < 16; ++i)
> > +    if (r_qi[i] != e_qi_via_si_hi[i])
> > +      __builtin_abort ();
> > +
> > +  r_qi = qi_via_si_lo (a_qi, b_qi);
> > +  for (i = 0; i < 16; ++i)
> > +    if (r_qi[i] != e_qi_via_si_lo[i])
> > +      __builtin_abort ();
> > +
> > +  r_qi = qi_via_di_hi (a_qi, b_qi);
> > +  for (i = 0; i < 16; ++i)
> > +    if (r_qi[i] != e_qi_via_di_hi[i])
> > +      __builtin_abort ();
> > +
> > +  r_qi = qi_via_di_lo (a_qi, b_qi);
> > +  for (i = 0; i < 16; ++i)
> > +    if (r_qi[i] != e_qi_via_di_lo[i])
> > +      __builtin_abort ();
> > +
> > +  r_hi = hi_via_si_hi (a_hi, b_hi);
> > +  for (i = 0; i < 8; ++i)
> > +    if (r_hi[i] != e_hi_via_si_hi[i])
> > +      __builtin_abort ();
> > +
> > +  r_hi = hi_via_si_lo (a_hi, b_hi);
> > +  for (i = 0; i < 8; ++i)
> > +    if (r_hi[i] != e_hi_via_si_lo[i])
> > +      __builtin_abort ();
> > +
> > +  r_hi = hi_via_di_hi (a_hi, b_hi);
> > +  for (i = 0; i < 8; ++i)
> > +    if (r_hi[i] != e_hi_via_di_hi[i])
> > +      __builtin_abort ();
> > +
> > +  r_hi = hi_via_di_lo (a_hi, b_hi);
> > +  for (i = 0; i < 8; ++i)
> > +    if (r_hi[i] != e_hi_via_di_lo[i])
> > +      __builtin_abort ();
> > +
> > +  r_si = si_via_di_hi (a_si, b_si);
> > +  for (i = 0; i < 4; ++i)
> > +    if (r_si[i] != e_si_via_di_hi[i])
> > +      __builtin_abort ();
> > +
> > +  r_si = si_via_di_lo (a_si, b_si);
> > +  for (i = 0; i < 4; ++i)
> > +    if (r_si[i] != e_si_via_di_lo[i])
> > +      __builtin_abort ();
> > +
> > +  return 0;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-perm-pack-1.c 
> > b/gcc/testsuite/gcc.target/s390/vector/vec-perm-pack-1.c
> > new file mode 100644
> > index 000000000000..74aedfce6c88
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/s390/vector/vec-perm-pack-1.c
> > @@ -0,0 +1,133 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -mzarch -march=z14 -mzvector --save-temps" } */
> > +/* { dg-do run { target { s390_z14_hw } } } */
> > +/* { dg-final { check-function-bodies "**" "" } } */
> > +
> > +#include "vec-types.h"
> > +
> > +/*
> > +** qi_via_hi:
> > +** vpkh    %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v16qi __attribute__((noinline,noipa))
> > +qi_via_hi (v16qi a, v16qi b)
> > +{
> > +  return (v16qi){a[1], a[3], a[5], a[7], a[9], a[11], a[13], a[15],
> > +            b[1], b[3], b[5], b[7], b[9], b[11], b[13], b[15]};
> > +}
> > +
> > +/*
> > +** qi_via_si:
> > +** vpkf    %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v16qi __attribute__((noinline,noipa))
> > +qi_via_si (v16qi a, v16qi b)
> > +{
> > +  return (v16qi){a[2], a[3], a[6], a[7], a[10], a[11], a[14], a[15],
> > +            b[2], b[3], b[6], b[7], b[10], b[11], b[14], b[15]};
> > +}
> > +
> > +/*
> > +** qi_via_di:
> > +** vpkg    %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v16qi __attribute__((noinline,noipa))
> > +qi_via_di (v16qi a, v16qi b)
> > +{
> > +  return (v16qi){a[4], a[5], a[6], a[7], a[12], a[13], a[14], a[15],
> > +            b[4], b[5], b[6], b[7], b[12], b[13], b[14], b[15]};
> > +}
> > +
> > +/*
> > +** hi_via_si:
> > +** vpkf    %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v8hi __attribute__((noinline,noipa))
> > +hi_via_si (v8hi a, v8hi b)
> > +{
> > +  return (v8hi){a[1], a[3], a[5], a[7], b[1], b[3], b[5], b[7]};
> > +}
> > +
> > +/*
> > +** hi_via_di:
> > +** vpkg    %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v8hi __attribute__((noinline,noipa))
> > +hi_via_di (v8hi a, v8hi b)
> > +{
> > +  return (v8hi){a[2], a[3], a[6], a[7], b[2], b[3], b[6], b[7]};
> > +}
> > +
> > +/*
> > +** si_via_di:
> > +** vpkg    %v24,%v24,%v26
> > +** br      %r14
> > +*/
> > +v4si __attribute__((noinline,noipa))
> > +si_via_di (v4si a, v4si b)
> > +{
> > +  return (v4si){a[1], a[3], b[1], b[3]};
> > +}
> > +
> > +int
> > +main ()
> > +{
> > +  static const signed char e_qi_via_hi[16]
> > +    = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
> > +  static const signed char e_qi_via_si[16]
> > +    = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31};
> > +  static const signed char e_qi_via_di[16]
> > +    = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31};
> > +
> > +  static const short e_hi_via_si[8] = {1, 3, 5, 7, 9, 11, 13, 15};
> > +  static const short e_hi_via_di[8] = {2, 3, 6, 7, 10, 11, 14, 15};
> > +
> > +  static const int e_si_via_di[4] = {1, 3, 5, 7};
> > +
> > +  v16qi a_qi = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
> > +  v16qi b_qi = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 
> > 30, 31};
> > +  v8hi a_hi = {0, 1, 2, 3, 4, 5, 6, 7};
> > +  v8hi b_hi = {8, 9, 10, 11, 12, 13, 14, 15};
> > +  v4si a_si = {0, 1, 2, 3};
> > +  v4si b_si = {4, 5, 6, 7};
> > +  v16qi r_qi;
> > +  v8hi r_hi;
> > +  v4si r_si;
> > +  int i;
> > +
> > +  r_qi = qi_via_hi (a_qi, b_qi);
> > +  for (i = 0; i < 16; ++i)
> > +    if (r_qi[i] != e_qi_via_hi[i])
> > +      __builtin_abort ();
> > +
> > +  r_qi = qi_via_si (a_qi, b_qi);
> > +  for (i = 0; i < 16; ++i)
> > +    if (r_qi[i] != e_qi_via_si[i])
> > +      __builtin_abort ();
> > +
> > +  r_qi = qi_via_di (a_qi, b_qi);
> > +  for (i = 0; i < 16; ++i)
> > +    if (r_qi[i] != e_qi_via_di[i])
> > +      __builtin_abort ();
> > +
> > +  r_hi = hi_via_si (a_hi, b_hi);
> > +  for (i = 0; i < 8; ++i)
> > +    if (r_hi[i] != e_hi_via_si[i])
> > +      __builtin_abort ();
> > +
> > +  r_hi = hi_via_di (a_hi, b_hi);
> > +  for (i = 0; i < 8; ++i)
> > +    if (r_hi[i] != e_hi_via_di[i])
> > +      __builtin_abort ();
> > +
> > +  r_si = si_via_di (a_si, b_si);
> > +  for (i = 0; i < 4; ++i)
> > +    if (r_si[i] != e_si_via_di[i])
> > +      __builtin_abort ();
> > +  return 0;
> > +}

Reply via email to