On 20 October 2011 23:50, Jakub Jelinek <ja...@redhat.com> wrote:
> Hi!

Hi,

>
> While looking at *.vect dumps from Polyhedron, I've noticed the lack
> of SLP vectorization of builtin calls.
>
> This patch is an attempt to handle at least 1 and 2 operand builtin calls
> (SLP doesn't handle ternary stmts either yet),

This is on the top of my todo list :).

> where all the types are the
> same.  E.g. it can handle
> extern float copysignf (float, float);
> extern float sqrtf (float);
> float a[8], b[8], c[8], d[8];
>
> void
> foo (void)
> {
>  a[0] = copysignf (b[0], c[0]) + 1.0f + sqrtf (d[0]);
>  a[1] = copysignf (b[1], c[1]) + 2.0f + sqrtf (d[1]);
>  a[2] = copysignf (b[2], c[2]) + 3.0f + sqrtf (d[2]);
>  a[3] = copysignf (b[3], c[3]) + 4.0f + sqrtf (d[3]);
>  a[4] = copysignf (b[4], c[4]) + 5.0f + sqrtf (d[4]);
>  a[5] = copysignf (b[5], c[5]) + 6.0f + sqrtf (d[5]);
>  a[6] = copysignf (b[6], c[6]) + 7.0f + sqrtf (d[6]);
>  a[7] = copysignf (b[7], c[7]) + 8.0f + sqrtf (d[7]);
> }
> and compile it into:
>        vmovaps .LC0(%rip), %ymm0
>        vandnps b(%rip), %ymm0, %ymm1
>        vandps  c(%rip), %ymm0, %ymm0
>        vorps   %ymm0, %ymm1, %ymm0
>        vsqrtps d(%rip), %ymm1
>        vaddps  %ymm1, %ymm0, %ymm0
>        vaddps  .LC1(%rip), %ymm0, %ymm0
>        vmovaps %ymm0, a(%rip)
> I've bootstrapped/regtested it on x86_64-linux and i686-linux, but
> am not 100% sure about all the changes, e.g. that
> || PURE_SLP_STMT (stmt_info) part.
>
> 2011-10-20  Jakub Jelinek  <ja...@redhat.com>
>
>        * tree-vect-stmts.c (vectorizable_call): Add SLP_NODE argument.
>        Handle vectorization of SLP calls.
>        (vect_analyze_stmt): Adjust caller, add call to it for SLP too.
>        (vect_transform_stmt): Adjust vectorizable_call caller, remove
>        assertion.
>        * tree-vect-slp.c (vect_get_and_check_slp_defs): Handle one
>        and two argument calls too.
>        (vect_build_slp_tree): Allow CALL_EXPR.
>        (vect_get_slp_defs): Handle calls.
>
> --- gcc/tree-vect-stmts.c.jj    2011-10-20 14:13:34.000000000 +0200
> +++ gcc/tree-vect-stmts.c       2011-10-20 18:02:43.000000000 +0200
> @@ -1483,7 +1483,8 @@ vectorizable_function (gimple call, tree
>    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
>
>  static bool
> -vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
> +vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
> +                  slp_tree slp_node)
>  {
>   tree vec_dest;
>   tree scalar_dest;
> @@ -1494,6 +1495,7 @@ vectorizable_call (gimple stmt, gimple_s
>   int nunits_in;
>   int nunits_out;
>   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
> +  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
>   tree fndecl, new_temp, def, rhs_type;
>   gimple def_stmt;
>   enum vect_def_type dt[3]
> @@ -1505,19 +1507,12 @@ vectorizable_call (gimple stmt, gimple_s
>   size_t i, nargs;
>   tree lhs;
>
> -  /* FORNOW: unsupported in basic block SLP.  */
> -  gcc_assert (loop_vinfo);
> -
> -  if (!STMT_VINFO_RELEVANT_P (stmt_info))
> +  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
>     return false;
>
>   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
>     return false;
>
> -  /* FORNOW: SLP not supported.  */
> -  if (STMT_SLP_TYPE (stmt_info))
> -    return false;
> -
>   /* Is STMT a vectorizable call?   */
>   if (!is_gimple_call (stmt))
>     return false;
> @@ -1558,7 +1553,7 @@ vectorizable_call (gimple stmt, gimple_s
>       if (!rhs_type)
>        rhs_type = TREE_TYPE (op);
>
> -      if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
> +      if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
>                                 &def_stmt, &def, &dt[i], &opvectype))
>        {
>          if (vect_print_dump_info (REPORT_DETAILS))
> @@ -1620,7 +1615,13 @@ vectorizable_call (gimple stmt, gimple_s
>
>   gcc_assert (!gimple_vuse (stmt));
>
> -  if (modifier == NARROW)
> +  if (slp_node || PURE_SLP_STMT (stmt_info))
> +    {
> +      if (modifier != NONE)
> +       return false;
> +      ncopies = 1;
> +    }

If you want to bail out if it's SLP and modifier != NONE, this check
is not enough. PURE_SLP means the stmt is not used outside the SLP
instance, so for hybrid SLP stmts (those that have uses outside SLP)
this check will not work. You need

  if (modifier != NONE && STMT_SLP_TYPE (stmt_info))
     return false;

But I wonder why not allow different type sizes? I see that we fail in
such cases in vectorizable_conversion too, but I think we should
support this as well.

> +  else if (modifier == NARROW)
>     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
>   else
>     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
> @@ -1659,6 +1660,43 @@ vectorizable_call (gimple stmt, gimple_s
>          else
>            VEC_truncate (tree, vargs, 0);
>
> +         if (slp_node)
> +           {
> +             VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
> +
> +             gcc_assert (j == 0);
> +             if (nargs == 1)
> +               vect_get_vec_defs (gimple_call_arg (stmt, 0), NULL_TREE, stmt,
> +                                  &vec_oprnds0, &vec_oprnds1, slp_node);
> +             else if (nargs == 2)
> +               vect_get_vec_defs (gimple_call_arg (stmt, 0),
> +                                  gimple_call_arg (stmt, 1), stmt,
> +                                  &vec_oprnds0, &vec_oprnds1, slp_node);
> +             else
> +               gcc_unreachable ();
> +
> +             /* Arguments are ready.  Create the new vector stmt.  */
> +             FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
> +               {
> +                 vec_oprnd1 = nargs == 2 ? VEC_index (tree, vec_oprnds1, i)
> +                                         : NULL_TREE;
> +                 new_stmt = gimple_build_call (fndecl, nargs, vec_oprnd0,
> +                                               vec_oprnd1);
> +                 new_temp = make_ssa_name (vec_dest, new_stmt);
> +                 gimple_call_set_lhs (new_stmt, new_temp);
> +                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
> +                 mark_symbols_for_renaming (new_stmt);
> +                 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
> +                                 new_stmt);
> +               }
> +
> +             VEC_free (tree, heap, vec_oprnds0);
> +             if (vec_oprnds1)
> +               VEC_free (tree, heap, vec_oprnds1);
> +
> +             continue;
> +           }
> +
>          for (i = 0; i < nargs; i++)
>            {
>              op = gimple_call_arg (stmt, i);
> @@ -5099,7 +5137,7 @@ vect_analyze_stmt (gimple stmt, bool *ne
>             || vectorizable_operation (stmt, NULL, NULL, NULL)
>             || vectorizable_assignment (stmt, NULL, NULL, NULL)
>             || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
> -            || vectorizable_call (stmt, NULL, NULL)
> +            || vectorizable_call (stmt, NULL, NULL, NULL)
>             || vectorizable_store (stmt, NULL, NULL, NULL)
>             || vectorizable_reduction (stmt, NULL, NULL, NULL)
>             || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
> @@ -5108,10 +5146,11 @@ vect_analyze_stmt (gimple stmt, bool *ne
>         if (bb_vinfo)
>           ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
>                 || vectorizable_type_demotion (stmt, NULL, NULL, node)
> -               || vectorizable_shift (stmt, NULL, NULL, node)
> +                || vectorizable_shift (stmt, NULL, NULL, node)
>                 || vectorizable_operation (stmt, NULL, NULL, node)
>                 || vectorizable_assignment (stmt, NULL, NULL, node)
>                 || vectorizable_load (stmt, NULL, NULL, node, NULL)
> +                || vectorizable_call (stmt, NULL, NULL, node)
>                 || vectorizable_store (stmt, NULL, NULL, node));
>       }
>
> @@ -5234,8 +5273,7 @@ vect_transform_stmt (gimple stmt, gimple
>       break;
>
>     case call_vec_info_type:
> -      gcc_assert (!slp_node);
> -      done = vectorizable_call (stmt, gsi, &vec_stmt);
> +      done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
>       stmt = gsi_stmt (*gsi);
>       break;
>
> --- gcc/tree-vect-slp.c.jj      2011-10-18 23:52:07.000000000 +0200
> +++ gcc/tree-vect-slp.c 2011-10-20 18:06:55.000000000 +0200
> @@ -129,12 +129,30 @@ vect_get_and_check_slp_defs (loop_vec_in
>   if (loop_vinfo)
>     loop = LOOP_VINFO_LOOP (loop_vinfo);
>
> -  rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
> -  number_of_oprnds = gimple_num_ops (stmt) - 1;        /* RHS only */
> +  if (is_gimple_call (stmt))
> +    {
> +      number_of_oprnds = gimple_call_num_args (stmt);
> +      if (number_of_oprnds != 1 && number_of_oprnds != 2)
> +       {
> +         if (vect_print_dump_info (REPORT_SLP))
> +           fprintf (vect_dump, "Build SLP failed: calls with %d "
> +                               "operands unhandled\n", number_of_oprnds);

No need in \n.

> +         return false;
> +       }
> +      rhs_class = number_of_oprnds == 1 ? GIMPLE_UNARY_RHS : 
> GIMPLE_BINARY_RHS;
> +    }
> +  else
> +    {
> +      rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
> +      number_of_oprnds = gimple_num_ops (stmt) - 1;    /* RHS only */
> +    }
>
>   for (i = 0; i < number_of_oprnds; i++)
>     {
> -      oprnd = gimple_op (stmt, i + 1);
> +      if (is_gimple_call (stmt))
> +       oprnd = gimple_call_arg (stmt, i);
> +      else
> +       oprnd = gimple_op (stmt, i + 1);
>
>       if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, 
> &def[i],
>                                &dt[i])

I think you forgot to check that all the calls are to the same function.

Thanks,
Ira

> @@ -660,7 +678,8 @@ vect_build_slp_tree (loop_vec_info loop_
>
>          /* Not memory operation.  */
>          if (TREE_CODE_CLASS (rhs_code) != tcc_binary
> -             && TREE_CODE_CLASS (rhs_code) != tcc_unary)
> +             && TREE_CODE_CLASS (rhs_code) != tcc_unary
> +             && rhs_code != CALL_EXPR)
>            {
>              if (vect_print_dump_info (REPORT_SLP))
>                {
> @@ -2308,9 +2327,19 @@ vect_get_slp_defs (tree op0, tree op1, s
>   if (reduc_index != -1)
>     return;
>
> -  code = gimple_assign_rhs_code (first_stmt);
> -  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1 || 
> !op1)
> +  if (!vec_oprnds1 || !op1)
>     return;
> +  if (is_gimple_call (first_stmt))
> +    {
> +      if (gimple_call_num_args (first_stmt) < 2)
> +        return;
> +    }
> +  else
> +    {
> +      code = gimple_assign_rhs_code (first_stmt);
> +      if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
> +       return;
> +    }
>
>   /* The number of vector defs is determined by the number of vector 
> statements
>      in the node from which we get those statements.  */
>
>        Jakub
>

Reply via email to