On 7 November 2011 20:35, Jakub Jelinek ja...@redhat.com wrote:
Hi!
Hi,
Here is an updated patch, which handles both modifier == NONE
and modifier == NARROW for SLP, after all it wasn't that hard.
Additionally it checks that the fndecls and various call flags
match, and adds some testcases.
Bootstrapped/regtested on x86_64-linux and i686-linux,
ok for trunk?
@@ -1723,6 +1764,55 @@ vectorizable_call (gimple stmt, gimple_s
else
VEC_truncate (tree, vargs, 0);
+ if (slp_node)
+ {
+ VEC (slp_void_p, heap) *vec_defs
+ = VEC_alloc (slp_void_p, heap, nargs);
+ VEC (tree, heap) *vec_oprnds0;
+
+ for (i = 0; i nargs; i++)
+ VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
+ vect_get_slp_defs (vargs, slp_node, vec_defs, -1);
+ vec_oprnds0
+ = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
+
+ /* Arguments are ready. Create the new vector stmt. */
+ FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
Was this line left by mistake?
+ for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
+ i += 2)
+ {
+ size_t k;
+ VEC_truncate (tree, vargs, 0);
+ for (k = 0; k nargs; k++)
+ {
+ VEC (tree, heap) *vec_oprndsk
+ = (VEC (tree, heap) *)
+ VEC_index (slp_void_p, vec_defs, k);
+ VEC_quick_push (tree, vargs,
+ VEC_index (tree, vec_oprndsk, i));
+ VEC_quick_push (tree, vargs,
+ VEC_index (tree, vec_oprndsk, i + 1));
+ }
+ new_stmt = gimple_build_call_vec (fndecl, vargs);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_call_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ mark_symbols_for_renaming (new_stmt);
+ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
+ new_stmt);
+ }
+
+ for (i = 0; i nargs; i++)
+ {
+ VEC (tree, heap) *vec_oprndsi
+ = (VEC (tree, heap) *)
+ VEC_index (slp_void_p, vec_defs, i);
+ VEC_free (tree, heap, vec_oprndsi);
+ }
+ VEC_free (slp_void_p, heap, vec_defs);
+ continue;
+ }
+
for (i = 0; i nargs; i++)
{
op = gimple_call_arg (stmt, i);
Could you please rearrange the tests (separate basic blocks and loops)
and make them actually test that bbs/loops were vectorized?
Also there is no need in dg-do run.
OK otherwise.
Thanks,
Ira
--- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c.jj 2011-11-07
15:05:36.0 +0100
+++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c 2011-11-07
15:07:10.0 +0100
@@ -0,0 +1,100 @@
+/* { dg-do run } */
+
+#include tree-vect.h
+
+extern float copysignf (float, float);
+extern float sqrtf (float);
+extern float fabsf (float);
+extern void abort (void);
+float a[64], b[64], c[64], d[64];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+ a[0] = copysignf (b[0], c[0]) + 1.0f + sqrtf (d[0]);
+ a[1] = copysignf (b[1], c[1]) + 2.0f + sqrtf (d[1]);
+ a[2] = copysignf (b[2], c[2]) + 3.0f + sqrtf (d[2]);
+ a[3] = copysignf (b[3], c[3]) + 4.0f + sqrtf (d[3]);
+ a[4] = copysignf (b[4], c[4]) + 5.0f + sqrtf (d[4]);
+ a[5] = copysignf (b[5], c[5]) + 6.0f + sqrtf (d[5]);
+ a[6] = copysignf (b[6], c[6]) + 7.0f + sqrtf (d[6]);
+ a[7] = copysignf (b[7], c[7]) + 8.0f + sqrtf (d[7]);
+}
+
+__attribute__((noinline, noclone)) void
+f2 (int n)
+{
+ int i;
+ for (i = 0; i n; i++)
+ {
+ a[4 * i + 0] = copysignf (b[4 * i + 0], c[4 * i + 0]) + 1.0f + sqrtf
(d[4 * i + 0]);
+ a[4 * i + 1] = copysignf (b[4 * i + 1], c[4 * i + 1]) + 2.0f + sqrtf
(d[4 * i + 1]);
+ a[4 * i + 2] = copysignf (b[4 * i + 2], c[4 * i + 2]) + 3.0f + sqrtf
(d[4 * i + 2]);
+ a[4 * i + 3] = copysignf (b[4 * i + 3], c[4 * i + 3]) + 4.0f + sqrtf
(d[4 * i + 3]);
+ }
+}
+
+__attribute__((noinline, noclone)) void
+f3 (int n)
+{
+ int i;
+ for (i = 0; i 2 * n; i++)
+ {
+ a[2 * i + 0] = copysignf (b[2 * i + 0], c[2 * i + 0]) + 1.0f + sqrtf
(d[2 * i + 0]);
+ a[2 * i + 1] = copysignf (b[2 * i + 1], c[2 * i + 1]) + 2.0f + sqrtf
(d[2 * i + 1]);
+ }
+}
+
+__attribute__((noinline, noclone)) void
+f4 (void)
+{
+ int i;
+ for (i = 0; i 64; i++)
+ a[i] = copysignf (b[i], c[i]) + 1.0f + sqrtf (d[i]);
+}
+
+__attribute__((noinline, noclone)) int
+main1 ()