The following patch optimizes the case where we decide to build up
all operands from a SLP node from scalars to just build up the result
from scalars.  That's usually less expensive and in the PR65961
works around a case that we don't handle correctly (yet).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-06-01  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/65961
        * tree-vect-slp.c (vect_get_and_check_slp_defs): Remove bogus
        check and clarify dump message.
        (vect_build_slp_tree): If all children are built up from scalars
        build up the parent from scalars instead.
        * tree-vect-stmts.c (vect_is_simple_use): Cleanup.

        * gcc.dg/torture/pr65961.c: New testcase.

Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c (revision 223974)
--- gcc/tree-vect-slp.c (working copy)
*************** again:
*** 301,313 ****
        oprnd_info = (*oprnds_info)[i];
  
        if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
!                              &def, &dt)
!         || (!def_stmt && dt != vect_constant_def))
        {
          if (dump_enabled_p ())
            {
              dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
!                              "Build SLP failed: can't find def for ");
              dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
                dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
            }
--- 301,312 ----
        oprnd_info = (*oprnds_info)[i];
  
        if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
!                              &def, &dt))
        {
          if (dump_enabled_p ())
            {
              dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
!                              "Build SLP failed: can't analyze def for ");
              dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
                dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
            }
*************** vect_build_slp_tree (loop_vec_info loop_
*** 1092,1097 ****
--- 1091,1125 ----
                               vectorization_factor, matches,
                               npermutes, &this_tree_size, max_tree_size))
        {
+         /* If we have all children of child built up from scalars then just
+            throw that away and build it up this node from scalars.  */
+         if (!SLP_TREE_CHILDREN (child).is_empty ())
+           {
+             unsigned int j;
+             slp_tree grandchild;
+ 
+             FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
+               if (grandchild != NULL)
+                 break;
+             if (!grandchild)
+               {
+                 /* Roll back.  */
+                 *max_nunits = old_max_nunits;
+                 loads->truncate (old_nloads);
+                 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
+                     vect_free_slp_tree (grandchild);
+                 SLP_TREE_CHILDREN (child).truncate (0);
+ 
+                 dump_printf_loc (MSG_NOTE, vect_location,
+                                  "Building parent vector operands from "
+                                  "scalars instead\n");
+                 oprnd_info->def_stmts = vNULL;
+                 vect_free_slp_tree (child);
+                 SLP_TREE_CHILDREN (*node).quick_push (NULL);
+                 continue;
+               }
+           }
+ 
          oprnd_info->def_stmts = vNULL;
          SLP_TREE_CHILDREN (*node).quick_push (child);
          continue;
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c       (revision 223974)
--- gcc/tree-vect-stmts.c       (working copy)
*************** vect_is_simple_use (tree operand, gimple
*** 7878,7892 ****
                      bb_vec_info bb_vinfo, gimple *def_stmt,
                    tree *def, enum vect_def_type *dt)
  {
-   basic_block bb;
-   stmt_vec_info stmt_vinfo;
-   struct loop *loop = NULL;
- 
-   if (loop_vinfo)
-     loop = LOOP_VINFO_LOOP (loop_vinfo);
- 
    *def_stmt = NULL;
    *def = NULL_TREE;
  
    if (dump_enabled_p ())
      {
--- 7878,7886 ----
                      bb_vec_info bb_vinfo, gimple *def_stmt,
                    tree *def, enum vect_def_type *dt)
  {
    *def_stmt = NULL;
    *def = NULL_TREE;
+   *dt = vect_unknown_def_type;
  
    if (dump_enabled_p ())
      {
*************** vect_is_simple_use (tree operand, gimple
*** 7909,7921 ****
        return true;
      }
  
-   if (TREE_CODE (operand) == PAREN_EXPR)
-     {
-       if (dump_enabled_p ())
-         dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
-       operand = TREE_OPERAND (operand, 0);
-     }
- 
    if (TREE_CODE (operand) != SSA_NAME)
      {
        if (dump_enabled_p ())
--- 7903,7908 ----
*************** vect_is_simple_use (tree operand, gimple
*** 7924,7963 ****
        return false;
      }
  
!   *def_stmt = SSA_NAME_DEF_STMT (operand);
!   if (*def_stmt == NULL)
      {
!       if (dump_enabled_p ())
!         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
!                          "no def_stmt.\n");
!       return false;
      }
  
    if (dump_enabled_p ())
      {
        dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
        dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
      }
  
!   /* Empty stmt is expected only in case of a function argument.
!      (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN).  */
!   if (gimple_nop_p (*def_stmt))
!     {
!       *def = operand;
!       *dt = vect_external_def;
!       return true;
!     }
! 
!   bb = gimple_bb (*def_stmt);
! 
!   if ((loop && !flow_bb_inside_loop_p (loop, bb))
!       || (!loop && bb != BB_VINFO_BB (bb_vinfo))
!       || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
      *dt = vect_external_def;
    else
      {
!       stmt_vinfo = vinfo_for_stmt (*def_stmt);
!       if (!loop && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
        *dt = vect_external_def;
        else
        *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
--- 7911,7940 ----
        return false;
      }
  
!   if (SSA_NAME_IS_DEFAULT_DEF (operand))
      {
!       *def = operand;
!       *dt = vect_external_def;
!       return true;
      }
  
+   *def_stmt = SSA_NAME_DEF_STMT (operand);
    if (dump_enabled_p ())
      {
        dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
        dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
      }
  
!   basic_block bb = gimple_bb (*def_stmt);
!   if ((loop_vinfo && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), 
bb))
!       || (bb_vinfo
!         && (bb != BB_VINFO_BB (bb_vinfo)
!             || gimple_code (*def_stmt) == GIMPLE_PHI)))
      *dt = vect_external_def;
    else
      {
!       stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
!       if (bb_vinfo && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
        *dt = vect_external_def;
        else
        *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
Index: gcc/testsuite/gcc.dg/torture/pr65961.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr65961.c      (revision 0)
--- gcc/testsuite/gcc.dg/torture/pr65961.c      (working copy)
***************
*** 0 ****
--- 1,20 ----
+ /* { dg-do compile } */
+ 
+ int *a;
+ void
+ foo ()
+ {
+   do
+     {
+       a[16] = (a[1] ^ a[0]) << 1 | a[1];
+       a[17] = (a[0] ^ a[1]) << 1 | a[0];
+       a[18] = (a[0] ^ a[1]) << 1 | a[0];
+       a[19] = (a[0] ^ a[1]) << 1 | a[0];
+       a[20] = (a[0] ^ a[1]) << 1 | a[0];
+       a[21] = (a[0] ^ a[1]) << 1 | a[0];
+       a[22] = (a[0] ^ a[1]) << 1 | a[0];
+       a[23] = (a[20] ^ a[1]) << 1 | a[9];
+       a += 8;
+     }
+   while (1);
+ }

Reply via email to