On Fri, Aug 30, 2013 at 1:23 AM, Richard Biener
<richard.guent...@gmail.com> wrote:
> On Fri, Aug 30, 2013 at 1:28 AM, Xinliang David Li <davi...@google.com> wrote:
>> I was debugging a runtime failure of SPEC06 xalancbmk built with LIPO.
>> Using -fdisable-<pass> option pinpoints the problem in slp vectorize
>> pass on a particular function. dbgcnt support is added to to track
>> down the individual BB, but it  fails even when the dbg count is set
>> to 0.
>>
>> It turns out that no BB was actually vectorized for that function, but
>> turning on/off slp-vectorize does make a difference in generated code
>> -- the only difference between the good and bad case is stack layout.
>>  The problem is  in the alignment analysis phase -- which
>> speculatively changes the base declaration's alignment regardless
>> whether the vectorization transformation will be performed or not
>> later.
>>
>> The attached patch fixes the problem. Testing is ok. Ok for trunk?
>
> Not in this form.  I'd rather not put extra fields in the data-refs this way.
> (As for the xalancbmk runtime problem - doesn't this patch just paper
> over the real issue?)

I believe it is stack-limit related. This program has some recursive
call chains that can generate a call frame ~9k deep. The vectorizer
side effect causes the affected function in the call frame to grow
~100 byte in stack size. Since this function appears lots of times in
the callstack, the overall stack consumption increase a lot. Combined
with the aggressive cross module inlining, it ends up blowing up the
stack limit.


>
> For BB SLP you still adjust DR bases that do not take part in
> vectorization - the DR vector contains all refs in the BB, not just
> those in the SLP instances we are going to vectorize.  So the
> commit of the re-aligning decision should be done from where
> we vectorize the DR, in vectorizable_load/store in its transform
> phase.
>
> If we decide to integrate the fields into the data-ref then the
> analysis and commit parts should go into the data-ref machinery
> as well.  Otherwise the vectorizer should use data-ref->aux or some
> other way to hang off its private data.
>

Good point.

> Other than that, modifying alignment of variables that are not
> vectorized is indeed a bug worth fixing.

The new version of the patch is attached. Ok for trunk after testing?

thanks,

David

>
> Richard.
>
>> thanks,
>>
>> David
Index: ChangeLog
===================================================================
--- ChangeLog   (revision 202088)
+++ ChangeLog   (working copy)
@@ -1,5 +1,25 @@
 2013-08-29  Xinliang David Li  <davi...@google.com>
 
+       * tree-vect-slp.c (destroy_bb_vec_info): Data ref cleanup.
+       * tree-vect-loop.c (destroy_bb_vec_info): Ditto.
+       * tree-vect-data-refs.c (vect_compute_data_ref_alignment):
+       Delay base decl alignment adjustment.
+       * tree-vectorizer.c (destroy_datarefs): New function.
+       * tree-vectorizer.h: New data structure.
+       (set_dr_misalignment): New function.
+       (dr_misalignment): Ditto.
+       * tree-vect-stmts.c (vectorizable_store_1): Name change.
+       (vectorizable_load_1): Ditto.
+       (vectorizable_store): New function.
+       (vectorizable_load): Ditto.
+       (ensure_base_align): Ditto.
+       (vectorize_loops): Add dbg_cnt support.
+       (execute_vect_slp): Ditto.
+       * dbgcnt.def: New debug counter.
+       * Makefile: New dependency.
+
+2013-08-29  Xinliang David Li  <davi...@google.com>
+
        * loop-unroll.c (report_unroll_peel): Minor message
        change.
        * tree-vect-loop-manip.c (vect_do_peeling_for_alignment):
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c    (revision 202088)
+++ tree-vect-loop.c    (working copy)
@@ -957,7 +957,7 @@ destroy_loop_vec_info (loop_vec_info loo
     }
 
   free (LOOP_VINFO_BBS (loop_vinfo));
-  free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
+  destroy_datarefs (loop_vinfo, NULL);
   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
Index: dbgcnt.def
===================================================================
--- dbgcnt.def  (revision 202088)
+++ dbgcnt.def  (working copy)
@@ -172,6 +172,8 @@ DEBUG_COUNTER (pre_insn)
 DEBUG_COUNTER (treepre_insert)
 DEBUG_COUNTER (tree_sra)
 DEBUG_COUNTER (eipa_sra)
+DEBUG_COUNTER (vect_loop)
+DEBUG_COUNTER (vect_slp)
 DEBUG_COUNTER (sched2_func)
 DEBUG_COUNTER (sched_block)
 DEBUG_COUNTER (sched_func)
Index: Makefile.in
===================================================================
--- Makefile.in (revision 202088)
+++ Makefile.in (working copy)
@@ -2645,7 +2645,7 @@ tree-vect-data-refs.o: tree-vect-data-re
 tree-vectorizer.o: tree-vectorizer.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(DUMPFILE_H) $(TM_H) $(GGC_H) $(TREE_H) $(TREE_FLOW_H) \
    $(CFGLOOP_H) $(TREE_PASS_H) $(TREE_VECTORIZER_H) \
-   $(TREE_PRETTY_PRINT_H)
+   $(TREE_PRETTY_PRINT_H) $(DBGCNT_H)
 vtable-verify.o: vtable-verify.c vtable-verify.h $(CONFIG_H) \
    $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) cp/cp-tree.h $(TM_P_H) \
    $(BASIC_BLOCK_H) output.h $(TREE_FLOW_H) $(TREE_DUMP_H) $(TREE_PASS_H) \
Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c     (revision 202088)
+++ tree-vect-slp.c     (working copy)
@@ -1825,7 +1825,7 @@ destroy_bb_vec_info (bb_vec_info bb_vinf
         free_stmt_vec_info (stmt);
     }
 
-  free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
+  destroy_datarefs (NULL, bb_vinfo);
   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
Index: tree-vectorizer.c
===================================================================
--- tree-vectorizer.c   (revision 202088)
+++ tree-vectorizer.c   (working copy)
@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3.
 #include "tree-pass.h"
 #include "hash-table.h"
 #include "tree-ssa-propagate.h"
+#include "dbgcnt.h"
 
 /* Loop or bb location.  */
 LOC vect_location;
@@ -279,6 +280,31 @@ note_simd_array_uses (hash_table <simd_a
       }
 }
 
+/* A helper function to free data refs.  */
+
+void
+destroy_datarefs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
+{
+  vec<data_reference_p> datarefs;
+  struct data_reference *dr;
+  unsigned int i;
+
+ if (loop_vinfo)
+    datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+  else
+    datarefs = BB_VINFO_DATAREFS (bb_vinfo);
+
+  FOR_EACH_VEC_ELT (datarefs, i, dr)
+    if (dr->aux)
+      {
+        free (dr->aux);
+        dr->aux = NULL;
+      }
+
+  free_data_refs (datarefs);
+}
+
+
 /* Function vectorize_loops.
 
    Entry point to loop vectorization phase.  */
@@ -331,6 +357,9 @@ vectorize_loops (void)
        if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
          continue;
 
+        if (!dbg_cnt (vect_loop))
+         break;
+
         if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOC
            && dump_enabled_p ())
           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
@@ -431,6 +460,7 @@ static unsigned int
 execute_vect_slp (void)
 {
   basic_block bb;
+  bb_vec_info bb_vinfo;
 
   init_stmt_vec_info_vec ();
 
@@ -438,8 +468,11 @@ execute_vect_slp (void)
     {
       vect_location = find_bb_location (bb);
 
-      if (vect_slp_analyze_bb (bb))
+      if ((bb_vinfo = vect_slp_analyze_bb (bb)))
         {
+          if (!dbg_cnt (vect_slp))
+            break;
+
           vect_slp_transform_bb (bb);
           if (dump_enabled_p ())
             dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
Index: tree-vectorizer.h
===================================================================
--- tree-vectorizer.h   (revision 202088)
+++ tree-vectorizer.h   (working copy)
@@ -629,6 +629,12 @@ typedef struct _stmt_vec_info {
 #define PURE_SLP_STMT(S)                  ((S)->slp_type == pure_slp)
 #define STMT_SLP_TYPE(S)                   (S)->slp_type
 
+typedef struct _dataref_aux {
+  tree base_decl;
+  bool base_misaligned;
+  int misalignment;
+} dataref_aux;
+
 #define VECT_MAX_COST 1000
 
 /* The maximum number of intermediate steps required in multi-step type
@@ -831,11 +837,31 @@ destroy_cost_data (void *data)
 /*-----------------------------------------------------------------*/
 /* Info on data references alignment.                              */
 /*-----------------------------------------------------------------*/
+inline void
+set_dr_misalignment (struct data_reference *dr, int val)
+{
+  dataref_aux *data_aux = (dataref_aux *) dr->aux;
+
+  if (!data_aux)
+    {
+      data_aux = XCNEW (dataref_aux);
+      dr->aux = data_aux;
+    }
+
+  data_aux->misalignment = val;
+}
+
+inline int
+dr_misalignment (struct data_reference *dr)
+{
+  gcc_assert (dr->aux);
+  return ((dataref_aux *) dr->aux)->misalignment;
+}
 
 /* Reflects actual alignment of first access in the vectorized loop,
    taking into account peeling/versioning if applied.  */
-#define DR_MISALIGNMENT(DR)   ((int) (size_t) (DR)->aux)
-#define SET_DR_MISALIGNMENT(DR, VAL)   ((DR)->aux = (void *) (size_t) (VAL))
+#define DR_MISALIGNMENT(DR) dr_misalignment (DR)
+#define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL)
 
 /* Return TRUE if the data access is aligned, and FALSE otherwise.  */
 
@@ -1014,5 +1040,6 @@ void vect_pattern_recog (loop_vec_info,
 
 /* In tree-vectorizer.c.  */
 unsigned vectorize_loops (void);
+void destroy_datarefs (loop_vec_info, bb_vec_info);
 
 #endif  /* GCC_TREE_VECTORIZER_H  */
Index: tree-vect-data-refs.c
===================================================================
--- tree-vect-data-refs.c       (revision 202088)
+++ tree-vect-data-refs.c       (working copy)
@@ -763,15 +763,11 @@ vect_compute_data_ref_alignment (struct
           dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
         }
 
-      DECL_ALIGN (base) = TYPE_ALIGN (vectype);
-      DECL_USER_ALIGN (base) = 1;
+      gcc_assert (dr->aux);
+      ((dataref_aux *)dr->aux)->base_decl = base;
+      ((dataref_aux *)dr->aux)->base_misaligned = true;
     }
 
-  /* At this point we assume that the base is aligned.  */
-  gcc_assert (base_aligned
-             || (TREE_CODE (base) == VAR_DECL
-                 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
-
   /* If this is a backward running DR then first access in the larger
      vectype actually is N-1 elements before the address in the DR.
      Adjust misalign accordingly.  */
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c   (revision 202088)
+++ tree-vect-stmts.c   (working copy)
@@ -3819,15 +3819,16 @@ vectorizable_operation (gimple stmt, gim
    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 static bool
-vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
-                   slp_tree slp_node)
+vectorizable_store_1 (gimple stmt, stmt_vec_info stmt_info,
+                      struct data_reference *dr,
+                      gimple_stmt_iterator *gsi, gimple *vec_stmt,
+                      slp_tree slp_node)
 {
   tree scalar_dest;
   tree data_ref;
   tree op;
   tree vec_oprnd = NULL_TREE;
-  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = 
NULL;
+  struct data_reference *first_dr = NULL;
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   tree elem_type;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
@@ -4284,6 +4285,43 @@ vectorizable_store (gimple stmt, gimple_
   return true;
 }
 
+/* A helper function to ensure data reference DR's base alignment
+   for STMT_INFO.  */
+
+static void
+ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
+{
+  if (!dr->aux)
+    return;
+
+  if (((dataref_aux *)dr->aux)->base_misaligned)
+    {
+      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+      tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
+
+      DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
+      DECL_USER_ALIGN (base_decl) = 1;
+      ((dataref_aux *)dr->aux)->base_misaligned = false;
+    }
+}
+
+/* A wrapper function to vectorizable_store.  */
+
+static bool
+vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
+                    slp_tree slp_node)
+{
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+  bool ret;
+
+  if ((ret = vectorizable_store_1 (stmt, stmt_info, dr, gsi,
+                                   vec_stmt, slp_node)))
+    ensure_base_align (stmt_info, dr);
+
+  return ret;
+}
+
 /* Given a vector type VECTYPE and permutation SEL returns
    the VECTOR_CST mask that implements the permutation of the
    vector elements.  If that is impossible to do, returns NULL.  */
@@ -4354,7 +4392,7 @@ permute_vec_elements (tree x, tree y, tr
   return data_ref;
 }
 
-/* vectorizable_load.
+/* vectorizable_load_1.
 
    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
    can be vectorized.
@@ -4363,19 +4401,20 @@ permute_vec_elements (tree x, tree y, tr
    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 static bool
-vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
-                  slp_tree slp_node, slp_instance slp_node_instance)
+vectorizable_load_1 (gimple stmt, stmt_vec_info stmt_info,
+                     struct data_reference *dr,
+                     gimple_stmt_iterator *gsi, gimple *vec_stmt,
+                     slp_tree slp_node, slp_instance slp_node_instance)
 {
   tree scalar_dest;
   tree vec_dest = NULL;
   tree data_ref = NULL;
-  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   stmt_vec_info prev_stmt_info;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   struct loop *loop = NULL;
   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
   bool nested_in_vect_loop = false;
-  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
+  struct data_reference *first_dr;
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   tree elem_type;
   tree new_temp;
@@ -5299,6 +5338,23 @@ vectorizable_load (gimple stmt, gimple_s
   return true;
 }
 
+/* A wrapper function to vectorizable_load_1.  */
+
+static bool
+vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
+                   slp_tree slp_node, slp_instance slp_node_instance)
+{
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+  bool ret;
+
+  if ((ret = vectorizable_load_1 (stmt, stmt_info, dr, gsi,
+                                  vec_stmt, slp_node,
+                                  slp_node_instance)))
+    ensure_base_align (stmt_info, dr);
+  return ret;
+}
+
 /* Function vect_is_simple_cond.
 
    Input:

Reply via email to