Hi! On Fri, Oct 21, 2011 at 03:44:11PM +0200, Ira Rosen wrote: > There are vectorizable_type_promotion/demotion, and for the rest the > copies are "hidden" inside multiple vector operands that you get from > vect_get_vec_defs. But, of course, there is not need to handle > modifier == NARROW for SLP at the moment. I was just wondering out > loud.
Here is an updated patch, which handles both modifier == NONE and modifier == NARROW for SLP, after all it wasn't that hard. Additionally it checks that the fndecls and various call flags match, and adds some testcases. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2011-11-07 Jakub Jelinek <ja...@redhat.com> * tree-vect-stmts.c (vectorizable_call): Add SLP_NODE argument. Handle vectorization of SLP calls. (vect_analyze_stmt): Adjust caller, add call to it for SLP too. (vect_transform_stmt): Adjust vectorizable_call caller, remove assertion. * tree-vect-slp.c (vect_get_and_check_slp_defs): For calls start with op_idx 3. (vect_build_slp_tree): Allow CALL_EXPR. * gcc.dg/vect/fast-math-vect-call-1.c: New test. * gcc.dg/vect/fast-math-vect-call-2.c: New test. --- gcc/tree-vect-slp.c.jj 2011-11-07 12:40:56.000000000 +0100 +++ gcc/tree-vect-slp.c 2011-11-07 12:45:06.000000000 +0100 @@ -202,7 +202,10 @@ vect_get_and_check_slp_defs (loop_vec_in loop = LOOP_VINFO_LOOP (loop_vinfo); if (is_gimple_call (stmt)) - number_of_oprnds = gimple_call_num_args (stmt); + { + number_of_oprnds = gimple_call_num_args (stmt); + op_idx = 3; + } else if (is_gimple_assign (stmt)) { number_of_oprnds = gimple_num_ops (stmt) - 1; @@ -558,7 +561,25 @@ vect_build_slp_tree (loop_vec_info loop_ ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); if (is_gimple_call (stmt)) - rhs_code = CALL_EXPR; + { + rhs_code = CALL_EXPR; + if (gimple_call_internal_p (stmt) + || gimple_call_tail_p (stmt) + || gimple_call_noreturn_p (stmt) + || !gimple_call_nothrow_p (stmt) + || gimple_call_chain (stmt)) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, + "Build SLP failed: unsupported call type "); + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + + vect_free_oprnd_info (&oprnds_info, true); + return false; + } + } else rhs_code = gimple_assign_rhs_code (stmt); @@ -653,6 +674,27 @@ vect_build_slp_tree (loop_vec_info loop_ vect_free_oprnd_info (&oprnds_info, true); return false; } + + if (rhs_code == CALL_EXPR) + { + gimple first_stmt = VEC_index (gimple, stmts, 0); + if (gimple_call_num_args (stmt) != nops + || !operand_equal_p (gimple_call_fn (first_stmt), + gimple_call_fn (stmt), 0) + || gimple_call_fntype (first_stmt) + != gimple_call_fntype (stmt)) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, + "Build SLP failed: different calls in "); + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + + vect_free_oprnd_info (&oprnds_info, true); + return false; + } + } } /* Strided store or load. */ @@ -786,7 +828,8 @@ vect_build_slp_tree (loop_vec_info loop_ /* Not memory operation. */ if (TREE_CODE_CLASS (rhs_code) != tcc_binary && TREE_CODE_CLASS (rhs_code) != tcc_unary - && rhs_code != COND_EXPR) + && rhs_code != COND_EXPR + && rhs_code != CALL_EXPR) { if (vect_print_dump_info (REPORT_SLP)) { --- gcc/tree-vect-stmts.c.jj 2011-11-07 12:40:56.000000000 +0100 +++ gcc/tree-vect-stmts.c 2011-11-07 14:39:54.000000000 +0100 @@ -1505,7 +1505,8 @@ vectorizable_function (gimple call, tree Return FALSE if not a vectorizable STMT, TRUE otherwise. */ static bool -vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt) +vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, + slp_tree slp_node) { tree vec_dest; tree scalar_dest; @@ -1516,6 +1517,7 @@ vectorizable_call (gimple stmt, gimple_s int nunits_in; int nunits_out; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); tree fndecl, new_temp, def, rhs_type; gimple def_stmt; enum vect_def_type dt[3] @@ -1527,19 +1529,12 @@ vectorizable_call (gimple stmt, gimple_s size_t i, nargs; tree lhs; - /* FORNOW: unsupported in basic block SLP. */ - gcc_assert (loop_vinfo); - - if (!STMT_VINFO_RELEVANT_P (stmt_info)) + if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) return false; if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) return false; - /* FORNOW: SLP not supported. */ - if (STMT_SLP_TYPE (stmt_info)) - return false; - /* Is STMT a vectorizable call? */ if (!is_gimple_call (stmt)) return false; @@ -1580,7 +1575,7 @@ vectorizable_call (gimple stmt, gimple_s if (!rhs_type) rhs_type = TREE_TYPE (op); - if (!vect_is_simple_use_1 (op, loop_vinfo, NULL, + if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[i], &opvectype)) { if (vect_print_dump_info (REPORT_DETAILS)) @@ -1642,7 +1637,9 @@ vectorizable_call (gimple stmt, gimple_s gcc_assert (!gimple_vuse (stmt)); - if (modifier == NARROW) + if (slp_node || PURE_SLP_STMT (stmt_info)) + ncopies = 1; + else if (modifier == NARROW) ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; else ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; @@ -1681,6 +1678,50 @@ vectorizable_call (gimple stmt, gimple_s else VEC_truncate (tree, vargs, 0); + if (slp_node) + { + VEC (slp_void_p, heap) *vec_defs + = VEC_alloc (slp_void_p, heap, nargs); + VEC (tree, heap) *vec_oprnds0; + + for (i = 0; i < nargs; i++) + VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i)); + vect_get_slp_defs (vargs, slp_node, &vec_defs, -1); + vec_oprnds0 + = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0); + + /* Arguments are ready. Create the new vector stmt. */ + FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0) + { + size_t k; + for (k = 0; k < nargs; k++) + { + VEC (tree, heap) *vec_oprndsk + = (VEC (tree, heap) *) + VEC_index (slp_void_p, vec_defs, k); + VEC_replace (tree, vargs, k, + VEC_index (tree, vec_oprndsk, i)); + } + new_stmt = gimple_build_call_vec (fndecl, vargs); + new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_call_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + mark_symbols_for_renaming (new_stmt); + VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), + new_stmt); + } + + for (i = 0; i < nargs; i++) + { + VEC (tree, heap) *vec_oprndsi + = (VEC (tree, heap) *) + VEC_index (slp_void_p, vec_defs, i); + VEC_free (tree, heap, vec_oprndsi); + } + VEC_free (slp_void_p, heap, vec_defs); + continue; + } + for (i = 0; i < nargs; i++) { op = gimple_call_arg (stmt, i); @@ -1723,6 +1764,55 @@ vectorizable_call (gimple stmt, gimple_s else VEC_truncate (tree, vargs, 0); + if (slp_node) + { + VEC (slp_void_p, heap) *vec_defs + = VEC_alloc (slp_void_p, heap, nargs); + VEC (tree, heap) *vec_oprnds0; + + for (i = 0; i < nargs; i++) + VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i)); + vect_get_slp_defs (vargs, slp_node, &vec_defs, -1); + vec_oprnds0 + = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0); + + /* Arguments are ready. Create the new vector stmt. */ + FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0) + for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0); + i += 2) + { + size_t k; + VEC_truncate (tree, vargs, 0); + for (k = 0; k < nargs; k++) + { + VEC (tree, heap) *vec_oprndsk + = (VEC (tree, heap) *) + VEC_index (slp_void_p, vec_defs, k); + VEC_quick_push (tree, vargs, + VEC_index (tree, vec_oprndsk, i)); + VEC_quick_push (tree, vargs, + VEC_index (tree, vec_oprndsk, i + 1)); + } + new_stmt = gimple_build_call_vec (fndecl, vargs); + new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_call_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + mark_symbols_for_renaming (new_stmt); + VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), + new_stmt); + } + + for (i = 0; i < nargs; i++) + { + VEC (tree, heap) *vec_oprndsi + = (VEC (tree, heap) *) + VEC_index (slp_void_p, vec_defs, i); + VEC_free (tree, heap, vec_oprndsi); + } + VEC_free (slp_void_p, heap, vec_defs); + continue; + } + for (i = 0; i < nargs; i++) { op = gimple_call_arg (stmt, i); @@ -1788,7 +1878,8 @@ vectorizable_call (gimple stmt, gimple_s lhs = gimple_call_lhs (stmt); new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); set_vinfo_for_stmt (new_stmt, stmt_info); - set_vinfo_for_stmt (stmt, NULL); + if (!slp_node) + set_vinfo_for_stmt (stmt, NULL); STMT_VINFO_STMT (stmt_info) = new_stmt; gsi_replace (gsi, new_stmt, false); SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt; @@ -5058,7 +5149,7 @@ vect_analyze_stmt (gimple stmt, bool *ne || vectorizable_operation (stmt, NULL, NULL, NULL) || vectorizable_assignment (stmt, NULL, NULL, NULL) || vectorizable_load (stmt, NULL, NULL, NULL, NULL) - || vectorizable_call (stmt, NULL, NULL) + || vectorizable_call (stmt, NULL, NULL, NULL) || vectorizable_store (stmt, NULL, NULL, NULL) || vectorizable_reduction (stmt, NULL, NULL, NULL) || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL)); @@ -5070,6 +5161,7 @@ vect_analyze_stmt (gimple stmt, bool *ne || vectorizable_operation (stmt, NULL, NULL, node) || vectorizable_assignment (stmt, NULL, NULL, node) || vectorizable_load (stmt, NULL, NULL, node, NULL) + || vectorizable_call (stmt, NULL, NULL, node) || vectorizable_store (stmt, NULL, NULL, node) || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)); } @@ -5184,8 +5276,7 @@ vect_transform_stmt (gimple stmt, gimple break; case call_vec_info_type: - gcc_assert (!slp_node); - done = vectorizable_call (stmt, gsi, &vec_stmt); + done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node); stmt = gsi_stmt (*gsi); break; --- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c.jj 2011-11-07 15:05:36.000000000 +0100 +++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c 2011-11-07 15:07:10.000000000 +0100 @@ -0,0 +1,100 @@ +/* { dg-do run } */ + +#include "tree-vect.h" + +extern float copysignf (float, float); +extern float sqrtf (float); +extern float fabsf (float); +extern void abort (void); +float a[64], b[64], c[64], d[64]; + +__attribute__((noinline, noclone)) void +f1 (void) +{ + a[0] = copysignf (b[0], c[0]) + 1.0f + sqrtf (d[0]); + a[1] = copysignf (b[1], c[1]) + 2.0f + sqrtf (d[1]); + a[2] = copysignf (b[2], c[2]) + 3.0f + sqrtf (d[2]); + a[3] = copysignf (b[3], c[3]) + 4.0f + sqrtf (d[3]); + a[4] = copysignf (b[4], c[4]) + 5.0f + sqrtf (d[4]); + a[5] = copysignf (b[5], c[5]) + 6.0f + sqrtf (d[5]); + a[6] = copysignf (b[6], c[6]) + 7.0f + sqrtf (d[6]); + a[7] = copysignf (b[7], c[7]) + 8.0f + sqrtf (d[7]); +} + +__attribute__((noinline, noclone)) void +f2 (int n) +{ + int i; + for (i = 0; i < n; i++) + { + a[4 * i + 0] = copysignf (b[4 * i + 0], c[4 * i + 0]) + 1.0f + sqrtf (d[4 * i + 0]); + a[4 * i + 1] = copysignf (b[4 * i + 1], c[4 * i + 1]) + 2.0f + sqrtf (d[4 * i + 1]); + a[4 * i + 2] = copysignf (b[4 * i + 2], c[4 * i + 2]) + 3.0f + sqrtf (d[4 * i + 2]); + a[4 * i + 3] = copysignf (b[4 * i + 3], c[4 * i + 3]) + 4.0f + sqrtf (d[4 * i + 3]); + } +} + +__attribute__((noinline, noclone)) void +f3 (int n) +{ + int i; + for (i = 0; i < 2 * n; i++) + { + a[2 * i + 0] = copysignf (b[2 * i + 0], c[2 * i + 0]) + 1.0f + sqrtf (d[2 * i + 0]); + a[2 * i + 1] = copysignf (b[2 * i + 1], c[2 * i + 1]) + 2.0f + sqrtf (d[2 * i + 1]); + } +} + +__attribute__((noinline, noclone)) void +f4 (void) +{ + int i; + for (i = 0; i < 64; i++) + a[i] = copysignf (b[i], c[i]) + 1.0f + sqrtf (d[i]); +} + +__attribute__((noinline, noclone)) int +main1 () +{ + int i; + + for (i = 0; i < 64; i++) + { + asm (""); + b[i] = (i & 1) ? -4 * i : 4 * i; + c[i] = (i & 2) ? -8 * i : 8 * i; + d[i] = i * i; + } + f1 (); + for (i = 0; i < 8; i++) + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i + i - a[i]) >= 0.0001f) + abort (); + else + a[i] = 131.25; + f2 (16); + for (i = 0; i < 64; i++) + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 3) + i - a[i]) >= 0.0001f) + abort (); + else + a[i] = 131.25; + f3 (16); + for (i = 0; i < 64; i++) + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 1) + i - a[i]) >= 0.0001f) + abort (); + else + a[i] = 131.25; + f4 (); + for (i = 0; i < 64; i++) + if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i - a[i]) >= 0.0001f) + abort (); + return 0; +} + +int +main () +{ + check_vect (); + return main1 (); +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ --- gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c.jj 2011-11-07 15:09:00.000000000 +0100 +++ gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c 2011-11-07 15:11:58.000000000 +0100 @@ -0,0 +1,166 @@ +/* { dg-do run } */ + +#include "tree-vect.h" + +extern long int lrint (double); +extern void abort (void); +long int a[64]; +double b[64]; + +__attribute__((noinline, noclone)) void +f1 (void) +{ + a[0] = lrint (b[0]) + 1; + a[1] = lrint (b[1]) + 2; + a[2] = lrint (b[2]) + 3; + a[3] = lrint (b[3]) + 4; + a[4] = lrint (b[4]) + 5; + a[5] = lrint (b[5]) + 6; + a[6] = lrint (b[6]) + 7; + a[7] = lrint (b[7]) + 8; +} + +__attribute__((noinline, noclone)) void +f2 (int n) +{ + int i; + for (i = 0; i < n; i++) + { + a[4 * i + 0] = lrint (b[4 * i + 0]) + 1; + a[4 * i + 1] = lrint (b[4 * i + 1]) + 2; + a[4 * i + 2] = lrint (b[4 * i + 2]) + 3; + a[4 * i + 3] = lrint (b[4 * i + 3]) + 4; + } +} + +__attribute__((noinline, noclone)) void +f3 (int n) +{ + int i; + for (i = 0; i < 2 * n; i++) + { + a[2 * i + 0] = lrint (b[2 * i + 0]) + 1; + a[2 * i + 1] = lrint (b[2 * i + 1]) + 2; + } +} + +__attribute__((noinline, noclone)) void +f4 (void) +{ + int i; + for (i = 0; i < 64; i++) + a[i] = lrint (b[i]) + 1; +} + +__attribute__((noinline, noclone)) void +f5 (void) +{ + a[0] = lrint (b[0]); + a[1] = lrint (b[1]); + a[2] = lrint (b[2]); + a[3] = lrint (b[3]); + a[4] = lrint (b[4]); + a[5] = lrint (b[5]); + a[6] = lrint (b[6]); + a[7] = lrint (b[7]); +} + +__attribute__((noinline, noclone)) void +f6 (int n) +{ + int i; + for (i = 0; i < n; i++) + { + a[4 * i + 0] = lrint (b[4 * i + 0]); + a[4 * i + 1] = lrint (b[4 * i + 1]); + a[4 * i + 2] = lrint (b[4 * i + 2]); + a[4 * i + 3] = lrint (b[4 * i + 3]); + } +} + +__attribute__((noinline, noclone)) void +f7 (int n) +{ + int i; + for (i = 0; i < 2 * n; i++) + { + a[2 * i + 0] = lrint (b[2 * i + 0]); + a[2 * i + 1] = lrint (b[2 * i + 1]); + } +} + +__attribute__((noinline, noclone)) void +f8 (void) +{ + int i; + for (i = 0; i < 64; i++) + a[i] = lrint (b[i]); +} + +__attribute__((noinline, noclone)) int +main1 () +{ + int i; + + for (i = 0; i < 64; i++) + { + asm (""); + b[i] = ((i & 1) ? -4 * i : 4 * i) + 0.25; + } + f1 (); + for (i = 0; i < 8; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + i) + abort (); + else + a[i] = 131.25; + f2 (16); + for (i = 0; i < 64; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + (i & 3)) + abort (); + else + a[i] = 131.25; + f3 (16); + for (i = 0; i < 64; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1 + (i & 1)) + abort (); + else + a[i] = 131.25; + f4 (); + for (i = 0; i < 64; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i) + 1) + abort (); + else + a[i] = 131.25; + f5 (); + for (i = 0; i < 8; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) + abort (); + else + a[i] = 131.25; + f6 (16); + for (i = 0; i < 64; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) + abort (); + else + a[i] = 131.25; + f7 (16); + for (i = 0; i < 64; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) + abort (); + else + a[i] = 131.25; + f8 (); + for (i = 0; i < 64; i++) + if (a[i] != ((i & 1) ? -4 * i : 4 * i)) + abort (); + return 0; +} + +int +main () +{ + check_vect (); + return main1 (); +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ Jakub