On 7 November 2011 20:35, Jakub Jelinek <ja...@redhat.com> wrote: > Hi!
Hi, > > Here is an updated patch, which handles both modifier == NONE > and modifier == NARROW for SLP, after all it wasn't that hard. > Additionally it checks that the fndecls and various call flags > match, and adds some testcases. > > Bootstrapped/regtested on x86_64-linux and i686-linux, > ok for trunk? > @@ -1723,6 +1764,55 @@ vectorizable_call (gimple stmt, gimple_s > else > VEC_truncate (tree, vargs, 0); > > + if (slp_node) > + { > + VEC (slp_void_p, heap) *vec_defs > + = VEC_alloc (slp_void_p, heap, nargs); > + VEC (tree, heap) *vec_oprnds0; > + > + for (i = 0; i < nargs; i++) > + VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i)); > + vect_get_slp_defs (vargs, slp_node, &vec_defs, -1); > + vec_oprnds0 > + = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0); > + > + /* Arguments are ready. Create the new vector stmt. */ > + FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0) Was this line left by mistake? > + for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0); > + i += 2) > + { > + size_t k; > + VEC_truncate (tree, vargs, 0); > + for (k = 0; k < nargs; k++) > + { > + VEC (tree, heap) *vec_oprndsk > + = (VEC (tree, heap) *) > + VEC_index (slp_void_p, vec_defs, k); > + VEC_quick_push (tree, vargs, > + VEC_index (tree, vec_oprndsk, i)); > + VEC_quick_push (tree, vargs, > + VEC_index (tree, vec_oprndsk, i + 1)); > + } > + new_stmt = gimple_build_call_vec (fndecl, vargs); > + new_temp = make_ssa_name (vec_dest, new_stmt); > + gimple_call_set_lhs (new_stmt, new_temp); > + vect_finish_stmt_generation (stmt, new_stmt, gsi); > + mark_symbols_for_renaming (new_stmt); > + VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), > + new_stmt); > + } > + > + for (i = 0; i < nargs; i++) > + { > + VEC (tree, heap) *vec_oprndsi > + = (VEC (tree, heap) *) > + VEC_index (slp_void_p, vec_defs, i); > + VEC_free (tree, heap, vec_oprndsi); > + } > + VEC_free (slp_void_p, heap, vec_defs); > + continue; > + } > + > for (i = 0; i < nargs; i++) > { > op = gimple_call_arg (stmt, i); Could you please rearrange the tests (separate basic blocks and loops) and make them actually test that bbs/loops were vectorized? Also there is no need in dg-do run. OK otherwise. Thanks, Ira > --- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c.jj 2011-11-07 > 15:05:36.000000000 +0100 > +++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c 2011-11-07 > 15:07:10.000000000 +0100 > @@ -0,0 +1,100 @@ > +/* { dg-do run } */ > + > +#include "tree-vect.h" > + > +extern float copysignf (float, float); > +extern float sqrtf (float); > +extern float fabsf (float); > +extern void abort (void); > +float a[64], b[64], c[64], d[64]; > + > +__attribute__((noinline, noclone)) void > +f1 (void) > +{ > + a[0] = copysignf (b[0], c[0]) + 1.0f + sqrtf (d[0]); > + a[1] = copysignf (b[1], c[1]) + 2.0f + sqrtf (d[1]); > + a[2] = copysignf (b[2], c[2]) + 3.0f + sqrtf (d[2]); > + a[3] = copysignf (b[3], c[3]) + 4.0f + sqrtf (d[3]); > + a[4] = copysignf (b[4], c[4]) + 5.0f + sqrtf (d[4]); > + a[5] = copysignf (b[5], c[5]) + 6.0f + sqrtf (d[5]); > + a[6] = copysignf (b[6], c[6]) + 7.0f + sqrtf (d[6]); > + a[7] = copysignf (b[7], c[7]) + 8.0f + sqrtf (d[7]); > +} > + > +__attribute__((noinline, noclone)) void > +f2 (int n) > +{ > + int i; > + for (i = 0; i < n; i++) > + { > + a[4 * i + 0] = copysignf (b[4 * i + 0], c[4 * i + 0]) + 1.0f + sqrtf > (d[4 * i + 0]); > + a[4 * i + 1] = copysignf (b[4 * i + 1], c[4 * i + 1]) + 2.0f + sqrtf > (d[4 * i + 1]); > + a[4 * i + 2] = copysignf (b[4 * i + 2], c[4 * i + 2]) + 3.0f + sqrtf > (d[4 * i + 2]); > + a[4 * i + 3] = copysignf (b[4 * i + 3], c[4 * i + 3]) + 4.0f + sqrtf > (d[4 * i + 3]); > + } > +} > + > +__attribute__((noinline, noclone)) void > +f3 (int n) > +{ > + int i; > + for (i = 0; i < 2 * n; i++) > + { > + a[2 * i + 0] = copysignf (b[2 * i + 0], c[2 * i + 0]) + 1.0f + sqrtf > (d[2 * i + 0]); > + a[2 * i + 1] = copysignf (b[2 * i + 1], c[2 * i + 1]) + 2.0f + sqrtf > (d[2 * i + 1]); > + } > +} > + > +__attribute__((noinline, noclone)) void > +f4 (void) > +{ > + int i; > + for (i = 0; i < 64; i++) > + a[i] = copysignf (b[i], c[i]) + 1.0f + sqrtf (d[i]); > +} > + > +__attribute__((noinline, noclone)) int > +main1 () > +{ > + int i; > + > + for (i = 0; i < 64; i++) > + { > + asm (""); > + b[i] = (i & 1) ? -4 * i : 4 * i; > + c[i] = (i & 2) ? -8 * i : 8 * i; > + d[i] = i * i; > + } > + f1 (); > + for (i = 0; i < 8; i++) > + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i + i - a[i]) >= 0.0001f) > + abort (); > + else > + a[i] = 131.25; > + f2 (16); > + for (i = 0; i < 64; i++) > + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 3) + i - a[i]) >= > 0.0001f) > + abort (); > + else > + a[i] = 131.25; > + f3 (16); > + for (i = 0; i < 64; i++) > + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 1) + i - a[i]) >= > 0.0001f) > + abort (); > + else > + a[i] = 131.25; > + f4 (); > + for (i = 0; i < 64; i++) > + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i - a[i]) >= 0.0001f) > + abort (); > + return 0; > +} > + > +int > +main () > +{ > + check_vect (); > + return main1 (); > +} > + > +/* { dg-final { cleanup-tree-dump "vect" } } */ > --- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c.jj 2011-11-07 > 15:09:00.000000000 +0100 > +++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c 2011-11-07 > 15:11:58.000000000 +0100 > @@ -0,0 +1,166 @@ > +/* { dg-do run } */ > + > +#include "tree-vect.h" > + > +extern long int lrint (double); > +extern void abort (void); > +long int a[64]; > +double b[64]; > + > +__attribute__((noinline, noclone)) void > +f1 (void) > +{ > + a[0] = lrint (b[0]) + 1; > + a[1] = lrint (b[1]) + 2; > + a[2] = lrint (b[2]) + 3; > + a[3] = lrint (b[3]) + 4; > + a[4] = lrint (b[4]) + 5; > + a[5] = lrint (b[5]) + 6; > + a[6] = lrint (b[6]) + 7; > + a[7] = lrint (b[7]) + 8; > +} > + > +__attribute__((noinline, noclone)) void > +f2 (int n) > +{ > + int i; > + for (i = 0; i < n; i++) > + { > + a[4 * i + 0] = lrint (b[4 * i + 0]) + 1; > + a[4 * i + 1] = lrint (b[4 * i + 1]) + 2; > + a[4 * i + 2] = lrint (b[4 * i + 2]) + 3; > + a[4 * i + 3] = lrint (b[4 * i + 3]) + 4; > + } > +} > + > +__attribute__((noinline, noclone)) void > +f3 (int n) > +{ > + int i; > + for (i = 0; i < 2 * n; i++) > + { > + a[2 * i + 0] = lrint (b[2 * i + 0]) + 1; > + a[2 * i + 1] = lrint (b[2 * i + 1]) + 2; > + } > +} > + > +__attribute__((noinline, noclone)) void > +f4 (void) > +{ > + int i; > + for (i = 0; i < 64; i++) > + a[i] = lrint (b[i]) + 1; > +} > + > +__attribute__((noinline, noclone)) void > +f5 (void) > +{ > + a[0] = lrint (b[0]); > + a[1] = lrint (b[1]); > + a[2] = lrint (b[2]); > + a[3] = lrint (b[3]); > + a[4] = lrint (b[4]); > + a[5] = lrint (b[5]); > + a[6] = lrint (b[6]); > + a[7] = lrint (b[7]); > +} > + > +__attribute__((noinline, noclone)) void > +f6 (int n) > +{ > + int i; > + for (i = 0; i < n; i++) > + { > + a[4 * i + 0] = lrint (b[4 * i + 0]); > + a[4 * i + 1] = lrint (b[4 * i + 1]); > + a[4 * i + 2] = lrint (b[4 * i + 2]); > + a[4 * i + 3] = lrint (b[4 * i + 3]); > + } > +} > + > +__attribute__((noinline, noclone)) void > +f7 (int n) > +{ > + int i; > + for (i = 0; i < 2 * n; i++) > + { > + a[2 * i + 0] = lrint (b[2 * i + 0]); > + a[2 * i + 1] = lrint (b[2 * i + 1]); > + } > +} > + > +__attribute__((noinline, noclone)) void > +f8 (void) > +{ > + int i; > + for (i = 0; i < 64; i++) > + a[i] = lrint (b[i]); > +} > + > +__attribute__((noinline, noclone)) int > +main1 () > +{ > + int i; > + > + for (i = 0; i < 64; i++) > + { > + asm (""); > + b[i] = ((i & 1) ? -4 * i : 4 * i) + 0.25; > + } > + f1 (); > + for (i = 0; i < 8; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + i) > + abort (); > + else > + a[i] = 131.25; > + f2 (16); > + for (i = 0; i < 64; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + (i & 3)) > + abort (); > + else > + a[i] = 131.25; > + f3 (16); > + for (i = 0; i < 64; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + (i & 1)) > + abort (); > + else > + a[i] = 131.25; > + f4 (); > + for (i = 0; i < 64; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1) > + abort (); > + else > + a[i] = 131.25; > + f5 (); > + for (i = 0; i < 8; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) > + abort (); > + else > + a[i] = 131.25; > + f6 (16); > + for (i = 0; i < 64; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) > + abort (); > + else > + a[i] = 131.25; > + f7 (16); > + for (i = 0; i < 64; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) > + abort (); > + else > + a[i] = 131.25; > + f8 (); > + for (i = 0; i < 64; i++) > + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) > + abort (); > + return 0; > +} > + > +int > +main () > +{ > + check_vect (); > + return main1 (); > +} > + > +/* { dg-final { cleanup-tree-dump "vect" } } */ > > > Jakub >