When I fixed PR64909 I knew it wasn't a 100% precise fix.  This bites
us now in PR65660 (which also shows other issues).  Thus the following
precise fix for PR64909 (which didn't turn out as too involved) - compute
a cost vector for the scalar iteration cost and apply it for peeled
iterations.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

I plan to apply this to trunk and to the 4.9 branch where I backported
the original fix to as it fixes round-off errors which can lead to
very unprofitable vectorizations.

Richard.

2015-04-02  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/64909
        PR tree-optimization/65660
        * tree-vectorizer.h (vect_get_known_peeling_cost): Adjust
        to take a cost vector for scalar iteration cost.
        (vect_get_single_scalar_iteration_cost): Likewise.
        * tree-vect-loop.c (vect_get_single_scalar_iteration_cost):
        Compute the scalar iteration cost into a cost vector.
        (vect_get_known_peeling_cost): Use the scalar cost vector to
        account for the cost of the peeled iterations.
        (vect_estimate_min_profitable_iters): Likewise.
        * tree-vect-data-refs.c (vect_peeling_hash_get_lowest_cost):
        Likewise.

Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h       (revision 220580)
+++ gcc/tree-vectorizer.h       (working copy)
@@ -1101,10 +1101,12 @@ extern bool vectorizable_reduction (gimp
 extern bool vectorizable_induction (gimple, gimple_stmt_iterator *, gimple *);
 extern tree get_initial_def_for_reduction (gimple, tree, tree *);
 extern int vect_min_worthwhile_factor (enum tree_code);
-extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, int,
+extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
+                                       stmt_vector_for_cost *,
                                        stmt_vector_for_cost *,
                                        stmt_vector_for_cost *);
-extern int vect_get_single_scalar_iteration_cost (loop_vec_info);
+extern int vect_get_single_scalar_iteration_cost (loop_vec_info,
+                                                 stmt_vector_for_cost *);
 
 /* In tree-vect-slp.c.  */
 extern void vect_free_slp_instance (slp_instance);
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c        (revision 220580)
+++ gcc/tree-vect-loop.c        (working copy)
@@ -2653,12 +2653,13 @@ vect_force_simple_reduction (loop_vec_in
 
 /* Calculate the cost of one scalar iteration of the loop.  */
 int
-vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
+vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo,
+                                      stmt_vector_for_cost *scalar_cost_vec)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
-  int innerloop_iters, i, stmt_cost;
+  int innerloop_iters, i;
 
   /* Count statements in scalar loop.  Using this as scalar cost for a single
      iteration for now.
@@ -2699,17 +2700,20 @@ vect_get_single_scalar_iteration_cost (l
              && !STMT_VINFO_IN_PATTERN_P (stmt_info))
             continue;
 
+         vect_cost_for_stmt kind;
           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
             {
               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
-               stmt_cost = vect_get_stmt_cost (scalar_load);
+               kind = scalar_load;
              else
-               stmt_cost = vect_get_stmt_cost (scalar_store);
+               kind = scalar_store;
             }
           else
-            stmt_cost = vect_get_stmt_cost (scalar_stmt);
+            kind = scalar_stmt;
 
-          scalar_single_iter_cost += stmt_cost * factor;
+         scalar_single_iter_cost
+           += record_stmt_cost (scalar_cost_vec, factor, kind,
+                                NULL, 0, vect_prologue);
         }
     }
   return scalar_single_iter_cost;
@@ -2719,7 +2723,7 @@ vect_get_single_scalar_iteration_cost (l
 int
 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
                              int *peel_iters_epilogue,
-                             int scalar_single_iter_cost,
+                             stmt_vector_for_cost *scalar_cost_vec,
                             stmt_vector_for_cost *prologue_cost_vec,
                             stmt_vector_for_cost *epilogue_cost_vec)
 {
@@ -2736,8 +2740,10 @@ vect_get_known_peeling_cost (loop_vec_in
 
       /* If peeled iterations are known but number of scalar loop
          iterations are unknown, count a taken branch per peeled loop.  */
-      retval = record_stmt_cost (prologue_cost_vec, 2, cond_branch_taken,
+      retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
                                 NULL, 0, vect_prologue);
+      retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
+                                NULL, 0, vect_epilogue);
     }
   else
     {
@@ -2751,14 +2757,21 @@ vect_get_known_peeling_cost (loop_vec_in
         *peel_iters_epilogue = vf;
     }
 
+  stmt_info_for_cost *si;
+  int j;
   if (peel_iters_prologue)
-    retval += record_stmt_cost (prologue_cost_vec,
-                               peel_iters_prologue * scalar_single_iter_cost,
-                               scalar_stmt, NULL, 0, vect_prologue);
+    FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
+      retval += record_stmt_cost (prologue_cost_vec,
+                                 si->count * peel_iters_prologue,
+                                 si->kind, NULL, si->misalign,
+                                 vect_prologue);
   if (*peel_iters_epilogue)
-    retval += record_stmt_cost (epilogue_cost_vec,
-                               *peel_iters_epilogue * scalar_single_iter_cost,
-                               scalar_stmt, NULL, 0, vect_epilogue);
+    FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
+      retval += record_stmt_cost (epilogue_cost_vec,
+                                 si->count * *peel_iters_epilogue,
+                                 si->kind, NULL, si->misalign,
+                                 vect_epilogue);
+
   return retval;
 }
 
@@ -2833,12 +2846,9 @@ vect_estimate_min_profitable_iters (loop
      TODO: Consider assigning different costs to different scalar
      statements.  */
 
-  scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
-  /* ???  Below we use this cost as number of stmts with scalar_stmt cost,
-     thus divide by that.  This introduces rounding errors, thus better
-     introduce a new cost kind (raw_cost?  scalar_iter_cost?). */
-  int scalar_single_iter_stmts
-    = scalar_single_iter_cost / vect_get_stmt_cost (scalar_stmt);
+  auto_vec<stmt_info_for_cost> scalar_cost_vec;
+  scalar_single_iter_cost
+     = vect_get_single_scalar_iteration_cost (loop_vinfo, &scalar_cost_vec);
 
   /* Add additional cost for the peeled instructions in prologue and epilogue
      loop.
@@ -2866,18 +2876,29 @@ vect_estimate_min_profitable_iters (loop
          branch per peeled loop. Even if scalar loop iterations are known,
          vector iterations are not known since peeled prologue iterations are
          not known. Hence guards remain the same.  */
-      (void) add_stmt_cost (target_cost_data, 2, cond_branch_taken,
+      (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
                            NULL, 0, vect_prologue);
-      (void) add_stmt_cost (target_cost_data, 2, cond_branch_not_taken,
+      (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
                            NULL, 0, vect_prologue);
-      /* FORNOW: Don't attempt to pass individual scalar instructions to
-        the model; just assume linear cost for scalar iterations.  */
-      (void) add_stmt_cost (target_cost_data,
-                           peel_iters_prologue * scalar_single_iter_stmts,
-                           scalar_stmt, NULL, 0, vect_prologue);
-      (void) add_stmt_cost (target_cost_data, 
-                           peel_iters_epilogue * scalar_single_iter_stmts,
-                           scalar_stmt, NULL, 0, vect_epilogue);
+      (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
+                           NULL, 0, vect_epilogue);
+      (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
+                           NULL, 0, vect_epilogue);
+      stmt_info_for_cost *si;
+      int j;
+      FOR_EACH_VEC_ELT (scalar_cost_vec, j, si)
+       {
+         struct _stmt_vec_info *stmt_info
+           = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
+         (void) add_stmt_cost (target_cost_data,
+                               si->count * peel_iters_prologue,
+                               si->kind, stmt_info, si->misalign,
+                               vect_prologue);
+         (void) add_stmt_cost (target_cost_data,
+                               si->count * peel_iters_epilogue,
+                               si->kind, stmt_info, si->misalign,
+                               vect_epilogue);
+       }
     }
   else
     {
@@ -2892,7 +2913,7 @@ vect_estimate_min_profitable_iters (loop
 
       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
                                          &peel_iters_epilogue,
-                                         scalar_single_iter_stmts,
+                                         &scalar_cost_vec,
                                          &prologue_cost_vec,
                                          &epilogue_cost_vec);
 
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c   (revision 220580)
+++ gcc/tree-vect-data-refs.c   (working copy)
@@ -1183,14 +1183,12 @@ vect_peeling_hash_get_lowest_cost (_vect
       SET_DR_MISALIGNMENT (dr, save_misalignment);
     }
 
-  single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
+  auto_vec<stmt_info_for_cost> scalar_cost_vec;
+  single_iter_cost
+    = vect_get_single_scalar_iteration_cost (loop_vinfo, &scalar_cost_vec);
   outside_cost += vect_get_known_peeling_cost
     (loop_vinfo, elem->npeel, &dummy,
-     /* ???  We use this cost as number of stmts with scalar_stmt cost,
-       thus divide by that.  This introduces rounding errors, thus better 
-       introduce a new cost kind (raw_cost?  scalar_iter_cost?). */
-     single_iter_cost / vect_get_stmt_cost (scalar_stmt),
-     &prologue_cost_vec, &epilogue_cost_vec);
+     &scalar_cost_vec, &prologue_cost_vec, &epilogue_cost_vec);
 
   /* Prologue and epilogue costs are added to the target model later.
      These costs depend only on the scalar iteration cost, the

Reply via email to