This is the access stride computation change.  Apart from the
stride extraction I adjusted the cost model to handle non-constant
strides by checking if either is a multiple of the other and
simply fail interchanging if it's the wrong way around for one
ref or if the simple method using multiple_of_p fails to determine
either case.

This still handles the bwaves case.

I think we want additional testcases with variable strides for each
case we add - I believe this is the most conservative way to treat
variable strides.

It may be inconsistent with the constant stride handling where you
allow for many OK DRs to outweight a few not OK DRs, but as it
worked for bwaves it must be good enough ;)

Tested on x86_64-unknown-linux-gnu (just the interchange testcases).

Currently running a bootstrap with -O3 -g -floop-interchange.

Ok for the branch?

Richard.

2017-12-01  Richard Biener  <rguent...@suse.de>

        * gimple-loop-interchange.cc (estimate_val_by_simplify_replace):
        Remove.
        (compute_access_stride): Rewrite using instantiate_scev,
        remove constant substitution.
        (should_interchange_loops): Adjust for non-constant strides.

Index: gcc/gimple-loop-interchange.cc
===================================================================
--- gcc/gimple-loop-interchange.cc      (revision 255303)
+++ gcc/gimple-loop-interchange.cc      (working copy)
@@ -1325,42 +1325,6 @@ tree_loop_interchange::move_code_to_inne
     }
 }
 
-/* Estimate and return the value of EXPR by replacing variables in EXPR
-   with CST_TREE and simplifying.  */
-
-static tree
-estimate_val_by_simplify_replace (tree expr, tree cst_tree)
-{
-  unsigned i, n;
-  tree ret = NULL_TREE, e, se;
-
-  if (!expr)
-    return NULL_TREE;
-
-  /* Do not bother to replace constants.  */
-  if (CONSTANT_CLASS_P (expr))
-    return expr;
-
-  if (!EXPR_P (expr))
-    return cst_tree;
-
-  n = TREE_OPERAND_LENGTH (expr);
-  for (i = 0; i < n; i++)
-    {
-      e = TREE_OPERAND (expr, i);
-      se = estimate_val_by_simplify_replace (e, cst_tree);
-      if (e == se)
-       continue;
-
-      if (!ret)
-       ret = copy_node (expr);
-
-      TREE_OPERAND (ret, i) = se;
-    }
-
-  return (ret ? fold (ret) : expr);
-}
-
 /* Given data reference DR in LOOP_NEST, the function computes DR's access
    stride at each level of loop from innermost LOOP to outer.  On success,
    it saves access stride at each level loop in a vector which is pointed
@@ -1388,44 +1352,31 @@ compute_access_stride (struct loop *loop
 
   tree ref = DR_REF (dr);
   tree scev_base = build_fold_addr_expr (ref);
-  tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref));
-  tree niters = build_int_cst (sizetype, AVG_LOOP_NITER);
-  access_size = fold_build2 (MULT_EXPR, sizetype, niters, access_size);
-
-  do {
-    tree scev_fn = analyze_scalar_evolution (loop, scev_base);
-    if (chrec_contains_undetermined (scev_fn)
-       || chrec_contains_symbols_defined_in_loop (scev_fn, loop->num))
-      break;
-
-    if (TREE_CODE (scev_fn) != POLYNOMIAL_CHREC)
-      {
-       scev_base = scev_fn;
-       strides->safe_push (build_int_cst (sizetype, 0));
-       continue;
-      }
-
-    scev_base = CHREC_LEFT (scev_fn);
-    if (tree_contains_chrecs (scev_base, NULL))
-      break;
-
-    tree scev_step = fold_convert (sizetype, CHREC_RIGHT (scev_fn));
-
-    enum ev_direction scev_dir = scev_direction (scev_fn);
-    /* Estimate if step isn't constant.  */
-    if (scev_dir == EV_DIR_UNKNOWN)
-      {
-       scev_step = estimate_val_by_simplify_replace (scev_step, niters);
-       if (TREE_CODE (scev_step) != INTEGER_CST
-           || tree_int_cst_lt (scev_step, access_size))
-         scev_step = access_size;
-      }
-    /* Compute absolute value of scev step.  */
-    else if (scev_dir == EV_DIR_DECREASES)
-      scev_step = fold_build1 (NEGATE_EXPR, sizetype, scev_step);
-
-    strides->safe_push (scev_step);
-  } while (loop != loop_nest && (loop = loop_outer (loop)) != NULL);
+  tree scev = analyze_scalar_evolution (loop, scev_base);
+  scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
+  if (! chrec_contains_undetermined (scev))
+    {
+      tree sl = scev;
+      struct loop *expected = loop;
+      while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
+       {
+         struct loop *sl_loop = get_chrec_loop (sl);
+         while (sl_loop != expected)
+           {
+             strides->safe_push (size_int (0));
+             expected = loop_outer (expected);
+           }
+         strides->safe_push (CHREC_RIGHT (sl));
+         sl = CHREC_LEFT (sl);
+         expected = loop_outer (expected);
+       }
+      if (! tree_contains_chrecs (sl, NULL))
+       while (expected != loop_outer (loop_nest))
+         {
+           strides->safe_push (size_int (0));
+           expected = loop_outer (expected);
+         }
+    }
 
   dr->aux = strides;
 }
@@ -1538,6 +1489,9 @@ should_interchange_loops (unsigned i_idx
   struct data_reference *dr;
   bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
   widest_int iloop_strides = 0, oloop_strides = 0;
+  unsigned num_unresolved_drs = 0;
+  unsigned num_resolved_ok_drs = 0;
+  unsigned num_resolved_not_ok_drs = 0;
 
   if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
@@ -1546,28 +1500,42 @@ should_interchange_loops (unsigned i_idx
     {
       vec<tree> *stride = DR_ACCESS_STRIDE (dr);
       tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
-      gcc_assert (TREE_CODE (iloop_stride) == INTEGER_CST);
-      gcc_assert (TREE_CODE (oloop_stride) == INTEGER_CST);
 
       bool subloop_stride_p = false;
       /* Data ref can't be invariant or sequential access at current loop if
         its address changes with respect to any subloops.  */
       for (j = i_idx + 1; j < stride->length (); ++j)
-       if (integer_nonzerop ((*stride)[j]))
+       if (!integer_zerop ((*stride)[j]))
          {
            subloop_stride_p = true;
            break;
          }
 
-      if (integer_nonzerop (iloop_stride))
-       iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
-      else if (!subloop_stride_p)
-       num_old_inv_drs++;
-
-      if (integer_nonzerop (oloop_stride))
-       oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
-      else if (!subloop_stride_p)
-       num_new_inv_drs++;
+      if (integer_zerop (iloop_stride))
+       {
+         if (!subloop_stride_p)
+           num_old_inv_drs++;
+       }
+      if (integer_zerop (oloop_stride))
+       {
+         if (!subloop_stride_p)
+           num_new_inv_drs++;
+       }
+
+      if (TREE_CODE (iloop_stride) == INTEGER_CST
+         && TREE_CODE (oloop_stride) == INTEGER_CST)
+       {
+         iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
+         oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
+       }
+      else if (multiple_of_p (TREE_TYPE (iloop_stride),
+                             iloop_stride, oloop_stride))
+       num_resolved_ok_drs++;
+      else if (multiple_of_p (TREE_TYPE (iloop_stride),
+                             oloop_stride, iloop_stride))
+       num_resolved_not_ok_drs++;
+      else
+       num_unresolved_drs++;
 
       /* Data ref can't be sequential access if its address changes in sub
         loop.  */
@@ -1581,10 +1549,12 @@ should_interchange_loops (unsigned i_idx
         interchange.  Note invariant is considered sequential here.  */
       tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
       if (all_seq_dr_before_p
-         && wi::gtu_p (wi::to_wide (iloop_stride), wi::to_wide (access_size)))
+         && ! (integer_zerop (iloop_stride)
+               || operand_equal_p (access_size, iloop_stride, 0)))
        all_seq_dr_before_p = false;
       if (all_seq_dr_after_p
-         && wi::gtu_p (wi::to_wide (oloop_stride), wi::to_wide (access_size)))
+         && ! (integer_zerop (oloop_stride)
+               || operand_equal_p (access_size, oloop_stride, 0)))
        all_seq_dr_after_p = false;
     }
 
@@ -1601,8 +1571,17 @@ should_interchange_loops (unsigned i_idx
       fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
               all_seq_dr_before_p ? "true" : "false",
               all_seq_dr_after_p ? "true" : "false");
+      fprintf (dump_file, "OK to interchage with variable strides: %d\n",
+              num_resolved_ok_drs);
+      fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
+              num_resolved_not_ok_drs);
+      fprintf (dump_file, "Variable strides we cannot decide: %d\n",
+              num_unresolved_drs);
     }
 
+  if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
+    return false;
+
   /* We use different stride comparison ratio for interchanging innermost
      two loops or not.  The idea is to be conservative in interchange for
      the innermost loops.  */

Reply via email to