> On 6/27/25 8:09 PM, Juergen Christ wrote: > > s390 missed constant vector permutation cases based on the vector pack > > instruction or changing the size of the vector elements during vector > > merge. This enables some more patterns that do not need to load a > > constant vector for permutation. > > > > Bootstrapped and regtested on s390. Okay for trunk? > > > > gcc/ChangeLog: > > > > * config/s390/s390.cc (expand_perm_with_merge): Add size change cases. > > (expand_perm_with_pack): New function. > > (vectorize_vec_perm_const_1): Wire up new function. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/s390/vector/vec-perm-merge-1.c: New test. > > * gcc.target/s390/vector/vec-perm-pack-1.c: New test. > > > > Signed-off-by: Juergen Christ <jchr...@linux.ibm.com> > > Ok. Thanks! > > > Andreas
I guess after the recent change set from Jakub I should add -fno-stack-protector to the new test files. Still okay with this change? > > > > --- > > gcc/config/s390/s390.cc | 169 +++++++++++- > > .../gcc.target/s390/vector/vec-perm-merge-1.c | 242 ++++++++++++++++++ > > .../gcc.target/s390/vector/vec-perm-pack-1.c | 133 ++++++++++ > > 3 files changed, 542 insertions(+), 2 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-perm-merge-1.c > > create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-perm-pack-1.c > > > > diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc > > index 38267202f668..de9c15c7bd42 100644 > > --- a/gcc/config/s390/s390.cc > > +++ b/gcc/config/s390/s390.cc > > @@ -18041,9 +18041,34 @@ expand_perm_with_merge (const struct > > expand_vec_perm_d &d) > > static const unsigned char lo_perm_qi_swap[16] > > = {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}; > > + static const unsigned char hi_perm_qi_di[16] > > + = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}; > > + static const unsigned char hi_perm_qi_si[16] > > + = {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}; > > + static const unsigned char hi_perm_qi_hi[16] > > + = {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}; > > + > > + static const unsigned char lo_perm_qi_di[16] > > + = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}; > > + static const unsigned char lo_perm_qi_si[16] > > + = {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}; > > + static const unsigned char lo_perm_qi_hi[16] > > + = {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}; > > + > > + static const unsigned char hi_perm_hi_si[8] = {0, 1, 8, 9, 2, 3, 10, 11}; > > + static const unsigned char hi_perm_hi_di[8] = {0, 1, 2, 3, 8, 9, 10, 11}; > > + > > + static const unsigned char lo_perm_hi_si[8] = {4, 5, 12, 13, 6, 7, 14, > > 15}; > > + static const unsigned char lo_perm_hi_di[8] = {4, 5, 6, 7, 12, 13, 14, > > 15}; > > + > > + static const unsigned char hi_perm_si_di[4] = {0, 1, 4, 5}; > > + > > + static const unsigned char lo_perm_si_di[4] = {2, 3, 6, 7}; > > + > > bool merge_lo_p = false; > > bool merge_hi_p = false; > > bool swap_operands_p = false; > > + machine_mode mergemode = d.vmode; > > if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di, 2) == 0) > > || (d.nelt == 4 && memcmp (d.perm, hi_perm_si, 4) == 0) > > @@ -18075,6 +18100,75 @@ expand_perm_with_merge (const struct > > expand_vec_perm_d &d) > > merge_lo_p = true; > > swap_operands_p = true; > > } > > + else if (d.nelt == 16) > > + { > > + if (memcmp (d.perm, hi_perm_qi_di, 16) == 0) > > + { > > + merge_hi_p = true; > > + mergemode = E_V2DImode; > > + } > > + else if (memcmp (d.perm, hi_perm_qi_si, 16) == 0) > > + { > > + merge_hi_p = true; > > + mergemode = E_V4SImode; > > + } > > + else if (memcmp (d.perm, hi_perm_qi_hi, 16) == 0) > > + { > > + merge_hi_p = true; > > + mergemode = E_V8HImode; > > + } > > + else if (memcmp (d.perm, lo_perm_qi_di, 16) == 0) > > + { > > + merge_lo_p = true; > > + mergemode = E_V2DImode; > > + } > > + else if (memcmp (d.perm, lo_perm_qi_si, 16) == 0) > > + { > > + merge_lo_p = true; > > + mergemode = E_V4SImode; > > + } > > + else if (memcmp (d.perm, lo_perm_qi_hi, 16) == 0) > > + { > > + merge_lo_p = true; > > + mergemode = E_V8HImode; > > + } > > + } > > + else if (d.nelt == 8) > > + { > > + if (memcmp (d.perm, hi_perm_hi_di, 8) == 0) > > + { > > + merge_hi_p = true; > > + mergemode = E_V2DImode; > > + } > > + else if (memcmp (d.perm, hi_perm_hi_si, 8) == 0) > > + { > > + merge_hi_p = true; > > + mergemode = E_V4SImode; > > + } > > + else if (memcmp (d.perm, lo_perm_hi_di, 8) == 0) > > + { > > + merge_lo_p = true; > > + mergemode = E_V2DImode; > > + } > > + else if (memcmp (d.perm, lo_perm_hi_si, 8) == 0) > > + { > > + merge_lo_p = true; > > + mergemode = E_V4SImode; > > + } > > + } > > + else if (d.nelt == 4) > > + { > > + if (memcmp (d.perm, hi_perm_si_di, 4) == 0) > > + { > > + merge_hi_p = true; > > + mergemode = E_V2DImode; > > + } > > + else if (memcmp (d.perm, lo_perm_si_di, 4) == 0) > > + { > > + merge_lo_p = true; > > + mergemode = E_V2DImode; > > + } > > + } > > if (!merge_lo_p && !merge_hi_p) > > return false; > > @@ -18082,7 +18176,7 @@ expand_perm_with_merge (const struct > > expand_vec_perm_d &d) > > if (d.testing_p) > > return merge_lo_p || merge_hi_p; > > - rtx op0, op1; > > + rtx op0, op1, target = d.target; > > if (swap_operands_p) > > { > > op0 = d.op1; > > @@ -18093,9 +18187,77 @@ expand_perm_with_merge (const struct > > expand_vec_perm_d &d) > > op0 = d.op0; > > op1 = d.op1; > > } > > + if (mergemode != d.vmode) > > + { > > + target = simplify_gen_subreg (mergemode, target, d.vmode, 0); > > + op0 = simplify_gen_subreg (mergemode, op0, d.vmode, 0); > > + op1 = simplify_gen_subreg (mergemode, op1, d.vmode, 0); > > + } > > + > > + s390_expand_merge (target, op0, op1, merge_hi_p); > > + > > + return true; > > +} > > + > > +/* Try to expand the vector permute operation described by D using the > > vector > > + pack instruction vpk. Return true if vector pack could be used. */ > > +static bool > > +expand_perm_with_pack (const struct expand_vec_perm_d &d) > > +{ > > + static const unsigned char qi_hi[16] > > + = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; > > + static const unsigned char qi_si[16] > > + = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}; > > + static const unsigned char qi_di[16] > > + = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}; > > + > > + static const unsigned char hi_si[8] > > + = {1, 3, 5, 7, 9, 11, 13, 15}; > > + static const unsigned char hi_di[8] > > + = {2, 3, 6, 7, 10, 11, 14, 15}; > > + > > + static const unsigned char si_di[4] > > + = {1, 3, 5, 7}; > > + > > + machine_mode packmode, resmode; > > + enum insn_code code = CODE_FOR_nothing; > > + > > + if (d.nelt == 16 && memcmp (d.perm, qi_hi, 16) == 0) > > + { > > + packmode = E_V8HImode; > > + resmode = E_V16QImode; > > + code = CODE_FOR_vec_pack_trunc_v8hi; > > + } > > + else if ((d.nelt == 16 && memcmp (d.perm, qi_si, 16) == 0) > > + || (d.nelt == 8 && memcmp (d.perm, hi_si, 8) == 0)) > > + { > > + packmode = E_V4SImode; > > + resmode = E_V8HImode; > > + code = CODE_FOR_vec_pack_trunc_v4si; > > + } > > + else if ((d.nelt == 16 && memcmp (d.perm, qi_di, 16) == 0) > > + || (d.nelt == 8 && memcmp (d.perm, hi_di, 8) == 0) > > + || (d.nelt == 4 && memcmp (d.perm, si_di, 4) == 0)) > > + { > > + packmode = E_V2DImode; > > + resmode = E_V4SImode; > > + code = CODE_FOR_vec_pack_trunc_v2di; > > + } > > - s390_expand_merge (d.target, op0, op1, merge_hi_p); > > + if (code == CODE_FOR_nothing) > > + return false; > > + if (d.testing_p) > > + return true; > > + rtx target = simplify_gen_subreg (resmode, d.target, d.vmode, 0); > > + rtx op0 = simplify_gen_subreg (packmode, > > + force_reg (GET_MODE (d.op0), d.op0), > > + d.vmode, 0); > > + rtx op1 = simplify_gen_subreg (packmode, > > + force_reg (GET_MODE (d.op1), d.op1), > > + d.vmode, 0); > > + rtx pat = GEN_FCN (code) (target, op0, op1); > > + emit_insn (pat); > > return true; > > } > > @@ -18322,6 +18484,9 @@ vectorize_vec_perm_const_1 (const struct > > expand_vec_perm_d &d) > > if (expand_perm_with_merge (d)) > > return true; > > + if (expand_perm_with_pack (d)) > > + return true; > > + > > if (expand_perm_with_vpdi (d)) > > return true; > > diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-perm-merge-1.c > > b/gcc/testsuite/gcc.target/s390/vector/vec-perm-merge-1.c > > new file mode 100644 > > index 000000000000..2b639e306888 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/s390/vector/vec-perm-merge-1.c > > @@ -0,0 +1,242 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -mzarch -march=z14 -mzvector --save-temps" } */ > > +/* { dg-do run { target { s390_z14_hw } } } */ > > +/* { dg-final {check-function-bodies "**" "" } } */ > > + > > +#include "vec-types.h" > > + > > +/* > > +** qi_via_hi_hi: > > +** vmrhh %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v16qi __attribute__((noinline,noipa)) > > +qi_via_hi_hi (v16qi a, v16qi b) > > +{ > > + return (v16qi){a[0], a[1], b[0], b[1], a[2], a[3], b[2], b[3], > > + a[4], a[5], b[4], b[5], a[6], a[7], b[6], b[7]}; > > +} > > + > > +/* > > +** qi_via_hi_lo: > > +** vmrlh %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v16qi __attribute__((noinline,noipa)) > > +qi_via_hi_lo (v16qi a, v16qi b) > > +{ > > + return (v16qi){a[8], a[9], b[8], b[9], a[10], a[11], b[10], b[11], > > + a[12], a[13], b[12], b[13], a[14], a[15], b[14], b[15]}; > > +} > > + > > +/* > > +** qi_via_si_hi: > > +** vmrhf %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v16qi __attribute__((noinline,noipa)) > > +qi_via_si_hi (v16qi a, v16qi b) > > +{ > > + return (v16qi){a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3], > > + a[4], a[5], a[6], a[7], b[4], b[5], b[6], b[7]}; > > +} > > + > > +/* > > +** qi_via_si_lo: > > +** vmrlf %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v16qi __attribute__((noinline,noipa)) > > +qi_via_si_lo (v16qi a, v16qi b) > > +{ > > + return (v16qi){a[8], a[9], a[10], a[11], b[8], b[9], b[10], b[11], > > + a[12], a[13], a[14], a[15], b[12], b[13], b[14], b[15]}; > > +} > > + > > +/* > > +** qi_via_di_hi: > > +** vmrhg %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v16qi __attribute__((noinline,noipa)) > > +qi_via_di_hi (v16qi a, v16qi b) > > +{ > > + return (v16qi){a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], > > + b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]}; > > +} > > + > > +/* > > +** qi_via_di_lo: > > +** vmrlg %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v16qi __attribute__((noinline,noipa)) > > +qi_via_di_lo (v16qi a, v16qi b) > > +{ > > + return (v16qi){a[8], a[9], a[10], a[11], a[12], a[13], a[14], a[15], > > + b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15]}; > > +} > > + > > +/* > > +** hi_via_si_hi: > > +** vmrhf %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v8hi __attribute__((noinline,noipa)) > > +hi_via_si_hi (v8hi a, v8hi b) > > +{ > > + return (v8hi){a[0], a[1], b[0], b[1], a[2], a[3], b[2], b[3]}; > > +} > > + > > +/* > > +** hi_via_si_lo: > > +** vmrlf %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v8hi __attribute__((noinline,noipa)) > > +hi_via_si_lo (v8hi a, v8hi b) > > +{ > > + return (v8hi){a[4], a[5], b[4], b[5], a[6], a[7], b[6], b[7]}; > > +} > > + > > +/* > > +** hi_via_di_hi: > > +** vmrhg %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v8hi __attribute__((noinline,noipa)) > > +hi_via_di_hi (v8hi a, v8hi b) > > +{ > > + return (v8hi){a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]}; > > +} > > + > > +/* > > +** hi_via_di_lo: > > +** vmrlg %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v8hi __attribute__((noinline,noipa)) > > +hi_via_di_lo (v8hi a, v8hi b) > > +{ > > + return (v8hi){a[4], a[5], a[6], a[7], b[4], b[5], b[6], b[7]}; > > +} > > + > > +/* > > +** si_via_di_hi: > > +** vmrhg %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v4si __attribute__((noinline,noipa)) > > +si_via_di_hi (v4si a, v4si b) > > +{ > > + return (v4si){a[0], a[1], b[0], b[1]}; > > +} > > + > > +/* > > +** si_via_di_lo: > > +** vmrlg %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v4si __attribute__((noinline,noipa)) > > +si_via_di_lo (v4si a, v4si b) > > +{ > > + return (v4si){a[2], a[3], b[2], b[3]}; > > +} > > + > > +int > > +main () > > +{ > > + static const signed char e_qi_via_hi_hi[16] > > + = {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}; > > + static const signed char e_qi_via_hi_lo[16] > > + = {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}; > > + static const signed char e_qi_via_si_hi[16] > > + = {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}; > > + static const signed char e_qi_via_si_lo[16] > > + = {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}; > > + static const signed char e_qi_via_di_hi[16] > > + = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}; > > + static const signed char e_qi_via_di_lo[16] > > + = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}; > > + > > + static const short e_hi_via_si_hi[8] = {0, 1, 8, 9, 2, 3, 10, 11}; > > + static const short e_hi_via_si_lo[8] = {4, 5, 12, 13, 6, 7, 14, 15}; > > + static const short e_hi_via_di_hi[8] = {0, 1, 2, 3, 8, 9, 10, 11}; > > + static const short e_hi_via_di_lo[8] = {4, 5, 6, 7, 12, 13, 14, 15}; > > + > > + static const int e_si_via_di_hi[4] = {0, 1, 4, 5}; > > + static const int e_si_via_di_lo[4] = {2, 3, 6, 7}; > > + > > + v16qi a_qi = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; > > + v16qi b_qi = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, > > 30, 31}; > > + v8hi a_hi = {0, 1, 2, 3, 4, 5, 6, 7}; > > + v8hi b_hi = {8, 9, 10, 11, 12, 13, 14, 15}; > > + v4si a_si = {0, 1, 2, 3}; > > + v4si b_si = {4, 5, 6, 7}; > > + v16qi r_qi; > > + v8hi r_hi; > > + v4si r_si; > > + int i; > > + > > + r_qi = qi_via_hi_hi (a_qi, b_qi); > > + for (i = 0; i < 16; ++i) > > + if (r_qi[i] != e_qi_via_hi_hi[i]) > > + __builtin_abort (); > > + > > + r_qi = qi_via_hi_lo (a_qi, b_qi); > > + for (i = 0; i < 16; ++i) > > + if (r_qi[i] != e_qi_via_hi_lo[i]) > > + __builtin_abort (); > > + > > + r_qi = qi_via_si_hi (a_qi, b_qi); > > + for (i = 0; i < 16; ++i) > > + if (r_qi[i] != e_qi_via_si_hi[i]) > > + __builtin_abort (); > > + > > + r_qi = qi_via_si_lo (a_qi, b_qi); > > + for (i = 0; i < 16; ++i) > > + if (r_qi[i] != e_qi_via_si_lo[i]) > > + __builtin_abort (); > > + > > + r_qi = qi_via_di_hi (a_qi, b_qi); > > + for (i = 0; i < 16; ++i) > > + if (r_qi[i] != e_qi_via_di_hi[i]) > > + __builtin_abort (); > > + > > + r_qi = qi_via_di_lo (a_qi, b_qi); > > + for (i = 0; i < 16; ++i) > > + if (r_qi[i] != e_qi_via_di_lo[i]) > > + __builtin_abort (); > > + > > + r_hi = hi_via_si_hi (a_hi, b_hi); > > + for (i = 0; i < 8; ++i) > > + if (r_hi[i] != e_hi_via_si_hi[i]) > > + __builtin_abort (); > > + > > + r_hi = hi_via_si_lo (a_hi, b_hi); > > + for (i = 0; i < 8; ++i) > > + if (r_hi[i] != e_hi_via_si_lo[i]) > > + __builtin_abort (); > > + > > + r_hi = hi_via_di_hi (a_hi, b_hi); > > + for (i = 0; i < 8; ++i) > > + if (r_hi[i] != e_hi_via_di_hi[i]) > > + __builtin_abort (); > > + > > + r_hi = hi_via_di_lo (a_hi, b_hi); > > + for (i = 0; i < 8; ++i) > > + if (r_hi[i] != e_hi_via_di_lo[i]) > > + __builtin_abort (); > > + > > + r_si = si_via_di_hi (a_si, b_si); > > + for (i = 0; i < 4; ++i) > > + if (r_si[i] != e_si_via_di_hi[i]) > > + __builtin_abort (); > > + > > + r_si = si_via_di_lo (a_si, b_si); > > + for (i = 0; i < 4; ++i) > > + if (r_si[i] != e_si_via_di_lo[i]) > > + __builtin_abort (); > > + > > + return 0; > > +} > > diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-perm-pack-1.c > > b/gcc/testsuite/gcc.target/s390/vector/vec-perm-pack-1.c > > new file mode 100644 > > index 000000000000..74aedfce6c88 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/s390/vector/vec-perm-pack-1.c > > @@ -0,0 +1,133 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -mzarch -march=z14 -mzvector --save-temps" } */ > > +/* { dg-do run { target { s390_z14_hw } } } */ > > +/* { dg-final { check-function-bodies "**" "" } } */ > > + > > +#include "vec-types.h" > > + > > +/* > > +** qi_via_hi: > > +** vpkh %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v16qi __attribute__((noinline,noipa)) > > +qi_via_hi (v16qi a, v16qi b) > > +{ > > + return (v16qi){a[1], a[3], a[5], a[7], a[9], a[11], a[13], a[15], > > + b[1], b[3], b[5], b[7], b[9], b[11], b[13], b[15]}; > > +} > > + > > +/* > > +** qi_via_si: > > +** vpkf %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v16qi __attribute__((noinline,noipa)) > > +qi_via_si (v16qi a, v16qi b) > > +{ > > + return (v16qi){a[2], a[3], a[6], a[7], a[10], a[11], a[14], a[15], > > + b[2], b[3], b[6], b[7], b[10], b[11], b[14], b[15]}; > > +} > > + > > +/* > > +** qi_via_di: > > +** vpkg %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v16qi __attribute__((noinline,noipa)) > > +qi_via_di (v16qi a, v16qi b) > > +{ > > + return (v16qi){a[4], a[5], a[6], a[7], a[12], a[13], a[14], a[15], > > + b[4], b[5], b[6], b[7], b[12], b[13], b[14], b[15]}; > > +} > > + > > +/* > > +** hi_via_si: > > +** vpkf %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v8hi __attribute__((noinline,noipa)) > > +hi_via_si (v8hi a, v8hi b) > > +{ > > + return (v8hi){a[1], a[3], a[5], a[7], b[1], b[3], b[5], b[7]}; > > +} > > + > > +/* > > +** hi_via_di: > > +** vpkg %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v8hi __attribute__((noinline,noipa)) > > +hi_via_di (v8hi a, v8hi b) > > +{ > > + return (v8hi){a[2], a[3], a[6], a[7], b[2], b[3], b[6], b[7]}; > > +} > > + > > +/* > > +** si_via_di: > > +** vpkg %v24,%v24,%v26 > > +** br %r14 > > +*/ > > +v4si __attribute__((noinline,noipa)) > > +si_via_di (v4si a, v4si b) > > +{ > > + return (v4si){a[1], a[3], b[1], b[3]}; > > +} > > + > > +int > > +main () > > +{ > > + static const signed char e_qi_via_hi[16] > > + = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; > > + static const signed char e_qi_via_si[16] > > + = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}; > > + static const signed char e_qi_via_di[16] > > + = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}; > > + > > + static const short e_hi_via_si[8] = {1, 3, 5, 7, 9, 11, 13, 15}; > > + static const short e_hi_via_di[8] = {2, 3, 6, 7, 10, 11, 14, 15}; > > + > > + static const int e_si_via_di[4] = {1, 3, 5, 7}; > > + > > + v16qi a_qi = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; > > + v16qi b_qi = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, > > 30, 31}; > > + v8hi a_hi = {0, 1, 2, 3, 4, 5, 6, 7}; > > + v8hi b_hi = {8, 9, 10, 11, 12, 13, 14, 15}; > > + v4si a_si = {0, 1, 2, 3}; > > + v4si b_si = {4, 5, 6, 7}; > > + v16qi r_qi; > > + v8hi r_hi; > > + v4si r_si; > > + int i; > > + > > + r_qi = qi_via_hi (a_qi, b_qi); > > + for (i = 0; i < 16; ++i) > > + if (r_qi[i] != e_qi_via_hi[i]) > > + __builtin_abort (); > > + > > + r_qi = qi_via_si (a_qi, b_qi); > > + for (i = 0; i < 16; ++i) > > + if (r_qi[i] != e_qi_via_si[i]) > > + __builtin_abort (); > > + > > + r_qi = qi_via_di (a_qi, b_qi); > > + for (i = 0; i < 16; ++i) > > + if (r_qi[i] != e_qi_via_di[i]) > > + __builtin_abort (); > > + > > + r_hi = hi_via_si (a_hi, b_hi); > > + for (i = 0; i < 8; ++i) > > + if (r_hi[i] != e_hi_via_si[i]) > > + __builtin_abort (); > > + > > + r_hi = hi_via_di (a_hi, b_hi); > > + for (i = 0; i < 8; ++i) > > + if (r_hi[i] != e_hi_via_di[i]) > > + __builtin_abort (); > > + > > + r_si = si_via_di (a_si, b_si); > > + for (i = 0; i < 4; ++i) > > + if (r_si[i] != e_si_via_di[i]) > > + __builtin_abort (); > > + return 0; > > +}