I've noticed we perform FP reduction association without the required
checks for associative math.  I've added 
gcc.dg/tree-ssa/loop-interchange-1b.c to cover this.

I also noticed we happily interchange a loop with a reduction like

 sum = a[i] - sum;

where a change in order of elements isn't ok.  Unfortunately bwaves
is exactly a case where single_use != next_def (tried to simply remove
that case for now), because reassoc didn't have a chance to fix the
operand order.  Thus this patch exports the relevant handling from
the vectorizer (for stage1 having a separate infrastructure gathering /
analyzing of reduction/induction infrastructure would be nice...)
and uses it from interchange.  We then don't handle
gcc.dg/tree-ssa/loop-interchange-4.c anymore (similar vectorizer 
missed-opt is PR65930).  I didn't bother to split up the vectorizer
code further to implement relaxed validity checking but simply XFAILed
this testcase.

Earlier I simplified allocation stuff in the main loop which is why
this part is included in this patch.

Bootstrap running on x86_64-unknown-linux-gnu.

I'll see to craft a testcase with the sum = a[i] - sum; mis-handling.

Ok?

Thanks,
Richard.

2017-12-04  Richard Biener  <rguent...@suse.de>

        * tree-vectorizer.h (check_reduction_path): Declare.
        * tree-vect-loop.c (check_reduction_path): New function, split out
        from ...
        (vect_is_simple_reduction): ... here.
        * gimple-loop-interchange.cc: Include tree-vectorizer.h.
        (loop_cand::analyze_iloop_reduction_var): Use single_imm_use.
        Properly check for a supported reduction operation and a
        valid expression if the reduction covers multiple stmts.
        (prepare_perfect_loop_nest): Simpify allocation.
        (pass_linterchange::execute): Likewise.

        * gcc.dg/tree-ssa/loop-interchange-1.c: Add fast-math flags.
        * gcc.dg/tree-ssa/loop-interchange-1b.c: New test variant.
        * gcc.dg/tree-ssa/loop-interchange-4.c: XFAIL.


Index: gcc/gimple-loop-interchange.cc
===================================================================
--- gcc/gimple-loop-interchange.cc      (revision 255375)
+++ gcc/gimple-loop-interchange.cc      (working copy)
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.
 #include "tree-ssa-loop-ivopts.h"
 #include "tree-ssa-dce.h"
 #include "tree-data-ref.h"
+#include "tree-vectorizer.h"
 
 /* This pass performs loop interchange: for example, the loop nest
 
@@ -551,23 +552,29 @@ loop_cand::analyze_iloop_reduction_var (
      in a way that reduction operation is seen as black box.  In general,
      we can ignore reassociation of reduction operator; we can handle fake
      reductions in which VAR is not even used to compute NEXT.  */
-  FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
-    {
-      stmt = USE_STMT (use_p);
-      if (is_gimple_debug (stmt))
-       continue;
-
-      if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
-       return false;
-
-      if (single_use != NULL)
-       return false;
+  if (! single_imm_use (var, &use_p, &single_use)
+      || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
+    return false;
 
-      single_use = stmt;
-    }
+  /* Check the reduction operation.  We require a commutative or
+     left-associative operation.  For FP math we also need to be allowed
+     to associate operations.  */
+  if (! is_gimple_assign (single_use)
+      || ! (commutative_tree_code (gimple_assign_rhs_code (single_use))
+           || (commutative_ternary_tree_code
+                 (gimple_assign_rhs_code (single_use))
+               && (use_p->use == gimple_assign_rhs1_ptr (single_use)
+                   || use_p->use == gimple_assign_rhs2_ptr (single_use)))
+           || (gimple_assign_rhs_code (single_use) == MINUS_EXPR
+               && use_p->use == gimple_assign_rhs1_ptr (single_use)))
+      || (FLOAT_TYPE_P (TREE_TYPE (var))
+         && ! flag_associative_math))
+    return false;
 
+  /* Handle and verify a series of stmts feeding the reduction op.  */
   if (single_use != next_def
-      && !stmt_dominates_stmt_p (single_use, next_def))
+      && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next,
+                               gimple_assign_rhs_code (single_use)))
     return false;
 
   /* Only support cases in which INIT is used in inner loop.  */
@@ -1964,7 +1971,7 @@ prepare_perfect_loop_nest (struct loop *
                           vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
 {
   struct loop *start_loop = NULL, *innermost = loop;
-  struct loop *outermost = superloop_at_depth (loop, 0);
+  struct loop *outermost = loops_for_fn (cfun)->tree_root;
 
   /* Find loop nest from the innermost loop.  The outermost is the innermost
      outer*/
@@ -1985,21 +1992,26 @@ prepare_perfect_loop_nest (struct loop *
   if (!start_loop || !start_loop->inner)
     return false;
 
+  /* Prepare the data reference vector for the loop nest, pruning outer
+     loops we cannot handle.  */
   datarefs->create (20);
-  if ((start_loop = prepare_data_references (start_loop, datarefs)) == NULL
+  start_loop = prepare_data_references (start_loop, datarefs);
+  if (!start_loop
       /* Check if there is no data reference.  */
       || datarefs->is_empty ()
       /* Check if there are too many of data references.  */
-      || (int) datarefs->length () > MAX_DATAREFS
-      /* Compute access strides for all data references.  */
-      || ((start_loop = compute_access_strides (start_loop, innermost,
-                                               *datarefs)) == NULL)
-      /* Check if loop nest should be interchanged.  */
-      || !should_interchange_loop_nest (start_loop, innermost, *datarefs))
-    {
-      free_data_refs_with_aux (*datarefs);
-      return false;
-    }
+      || (int) datarefs->length () > MAX_DATAREFS)
+    return false;
+
+  /* Compute access strides for all data references, pruning outer
+     loops we cannot analyze refs in.  */
+  start_loop = compute_access_strides (start_loop, innermost, *datarefs);
+  if (!start_loop)
+    return false;
+
+  /* Check if any interchange is profitable in the loop nest.  */
+  if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
+    return false;
 
   /* Check if data dependences can be computed for loop nest starting from
      start_loop.  */
@@ -2029,14 +2041,15 @@ prepare_perfect_loop_nest (struct loop *
        return true;
       }
 
+    /* ???  We should be able to adjust DDRs we already computed by
+       truncating distance vectors.  */
     free_dependence_relations (*ddrs);
+    *ddrs = vNULL;
     /* Try to compute data dependences with the outermost loop stripped.  */
     loop = start_loop;
     start_loop = start_loop->inner;
   } while (start_loop && start_loop->inner);
 
-  loop_nest->release ();
-  free_data_refs_with_aux (*datarefs);
   return false;
 }
 
@@ -2050,19 +2063,20 @@ pass_linterchange::execute (function *fu
 
   bool changed_p = false;
   struct loop *loop;
-  vec<loop_p> loop_nest;
-  vec<data_reference_p> datarefs;
-  vec<ddr_p> ddrs;
-
   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
-    if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
-      {
-       tree_loop_interchange loop_interchange (loop_nest);
-       changed_p |= loop_interchange.interchange (datarefs, ddrs);
-       free_dependence_relations (ddrs);
-       free_data_refs_with_aux (datarefs);
-       loop_nest.release ();
-      }
+    {
+      vec<loop_p> loop_nest = vNULL;
+      vec<data_reference_p> datarefs = vNULL;
+      vec<ddr_p> ddrs = vNULL;
+      if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
+       {
+         tree_loop_interchange loop_interchange (loop_nest);
+         changed_p |= loop_interchange.interchange (datarefs, ddrs);
+       }
+      free_dependence_relations (ddrs);
+      free_data_refs_with_aux (datarefs);
+      loop_nest.release ();
+    }
 
   if (changed_p)
     scev_reset_htab ();
Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c  (revision 255375)
+++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c  (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */
+/* { dg-options "-O2 -floop-interchange -fassociative-math -fno-signed-zeros 
-fno-trapping-math -fdump-tree-linterchange-details" } */
 
 /* Copied from graphite/interchange-4.c */
 
Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c (nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c (working copy)
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */
+
+/* Copied from graphite/interchange-4.c */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+double u[1782225];
+
+static void __attribute__((noinline))
+foo (int N, double *res)
+{
+  int i, j;
+  double sum = 0;
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      sum = sum + u[i + 1335 * j];
+
+  *res = sum;
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j;
+  double res;
+
+  for (i = 0; i < 1782225; i++)
+    u[i] = 0;
+  u[0] = __DBL_MAX__;
+  u[1335] = -__DBL_MAX__;
+  u[1] = __DBL_MAX__;
+  u[1336] = -__DBL_MAX__;
+
+  foo (1335, &res);
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 0.0)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "is interchanged" "linterchange"} } */
Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c  (revision 255375)
+++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c  (working copy)
@@ -47,4 +47,4 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "Loop_pair<outer:., inner:.> is 
interchanged" 1 "linterchange"} } */
+/* { dg-final { scan-tree-dump-times "Loop_pair<outer:., inner:.> is 
interchanged" 1 "linterchange" { xfail *-*-* } } } */
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h       (revision 255375)
+++ gcc/tree-vectorizer.h       (working copy)
@@ -1255,6 +1255,9 @@ extern tree vect_create_addr_base_for_ve
 /* FORNOW: Used in tree-parloops.c.  */
 extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *,
                                            bool *, bool);
+/* Used in gimple-loop-interchange.c.  */
+extern bool check_reduction_path (location_t, loop_p, gphi *, tree,
+                                 enum tree_code);
 /* Drive for loop analysis stage.  */
 extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info);
 extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c        (revision 255375)
+++ gcc/tree-vect-loop.c        (working copy)
@@ -2626,6 +2626,114 @@ vect_is_slp_reduction (loop_vec_info loo
 }
 
 
+/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
+   reduction operation CODE has a handled computation expression.  */
+
+bool
+check_reduction_path (location_t loc, loop_p loop, gphi *phi, tree loop_arg,
+                     enum tree_code code)
+{
+  auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
+  auto_bitmap visited;
+  tree lookfor = PHI_RESULT (phi);
+  ssa_op_iter curri;
+  use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE);
+  while (USE_FROM_PTR (curr) != loop_arg)
+    curr = op_iter_next_use (&curri);
+  curri.i = curri.numops;
+  do
+    {
+      path.safe_push (std::make_pair (curri, curr));
+      tree use = USE_FROM_PTR (curr);
+      if (use == lookfor)
+       break;
+      gimple *def = SSA_NAME_DEF_STMT (use);
+      if (gimple_nop_p (def)
+         || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
+       {
+pop:
+         do
+           {
+             std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
+             curri = x.first;
+             curr = x.second;
+             do
+               curr = op_iter_next_use (&curri);
+             /* Skip already visited or non-SSA operands (from iterating
+                over PHI args).  */
+             while (curr != NULL_USE_OPERAND_P
+                    && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
+                        || ! bitmap_set_bit (visited,
+                                             SSA_NAME_VERSION
+                                               (USE_FROM_PTR (curr)))));
+           }
+         while (curr == NULL_USE_OPERAND_P && ! path.is_empty ());
+         if (curr == NULL_USE_OPERAND_P)
+           break;
+       }
+      else
+       {
+         if (gimple_code (def) == GIMPLE_PHI)
+           curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE);
+         else
+           curr = op_iter_init_use (&curri, def, SSA_OP_USE);
+         while (curr != NULL_USE_OPERAND_P
+                && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
+                    || ! bitmap_set_bit (visited,
+                                         SSA_NAME_VERSION
+                                           (USE_FROM_PTR (curr)))))
+           curr = op_iter_next_use (&curri);
+         if (curr == NULL_USE_OPERAND_P)
+           goto pop;
+       }
+    }
+  while (1);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      dump_printf_loc (MSG_NOTE, loc, "reduction path: ");
+      unsigned i;
+      std::pair<ssa_op_iter, use_operand_p> *x;
+      FOR_EACH_VEC_ELT (path, i, x)
+       {
+         dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second));
+         dump_printf (MSG_NOTE, " ");
+       }
+      dump_printf (MSG_NOTE, "\n");
+    }
+
+  /* Check whether the reduction path detected is valid.  */
+  bool fail = path.length () == 0;
+  bool neg = false;
+  for (unsigned i = 1; i < path.length (); ++i)
+    {
+      gimple *use_stmt = USE_STMT (path[i].second);
+      tree op = USE_FROM_PTR (path[i].second);
+      if (! has_single_use (op)
+         || ! is_gimple_assign (use_stmt))
+       {
+         fail = true;
+         break;
+       }
+      if (gimple_assign_rhs_code (use_stmt) != code)
+       {
+         if (code == PLUS_EXPR
+             && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+           {
+             /* Track whether we negate the reduction value each iteration.  */
+             if (gimple_assign_rhs2 (use_stmt) == op)
+               neg = ! neg;
+           }
+         else
+           {
+             fail = true;
+             break;
+           }
+       }
+    }
+  return ! fail && ! neg;
+}
+
+
 /* Function vect_is_simple_reduction
 
    (1) Detect a cross-iteration def-use cycle that represents a simple
@@ -3128,106 +3236,8 @@ vect_is_simple_reduction (loop_vec_info
     }
 
   /* Look for the expression computing loop_arg from loop PHI result.  */
-  auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
-  auto_bitmap visited;
-  tree lookfor = PHI_RESULT (phi);
-  ssa_op_iter curri;
-  use_operand_p curr = op_iter_init_phiuse (&curri, as_a <gphi *>(phi),
-                                           SSA_OP_USE);
-  while (USE_FROM_PTR (curr) != loop_arg)
-    curr = op_iter_next_use (&curri);
-  curri.i = curri.numops;
-  do
-    {
-      path.safe_push (std::make_pair (curri, curr));
-      tree use = USE_FROM_PTR (curr);
-      if (use == lookfor)
-       break;
-      gimple *def = SSA_NAME_DEF_STMT (use);
-      if (gimple_nop_p (def)
-         || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
-       {
-pop:
-         do
-           {
-             std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
-             curri = x.first;
-             curr = x.second;
-             do
-               curr = op_iter_next_use (&curri);
-             /* Skip already visited or non-SSA operands (from iterating
-                over PHI args).  */
-             while (curr != NULL_USE_OPERAND_P
-                    && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
-                        || ! bitmap_set_bit (visited,
-                                             SSA_NAME_VERSION
-                                               (USE_FROM_PTR (curr)))));
-           }
-         while (curr == NULL_USE_OPERAND_P && ! path.is_empty ());
-         if (curr == NULL_USE_OPERAND_P)
-           break;
-       }
-      else
-       {
-         if (gimple_code (def) == GIMPLE_PHI)
-           curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE);
-         else
-           curr = op_iter_init_use (&curri, def, SSA_OP_USE);
-         while (curr != NULL_USE_OPERAND_P
-                && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
-                    || ! bitmap_set_bit (visited,
-                                         SSA_NAME_VERSION
-                                           (USE_FROM_PTR (curr)))))
-           curr = op_iter_next_use (&curri);
-         if (curr == NULL_USE_OPERAND_P)
-           goto pop;
-       }
-    }
-  while (1);
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    {
-      dump_printf_loc (MSG_NOTE, vect_location,
-                      "reduction path: ");
-      unsigned i;
-      std::pair<ssa_op_iter, use_operand_p> *x;
-      FOR_EACH_VEC_ELT (path, i, x)
-       {
-         dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second));
-         dump_printf (MSG_NOTE, " ");
-       }
-      dump_printf (MSG_NOTE, "\n");
-    }
-
-  /* Check whether the reduction path detected is valid.  */
-  bool fail = path.length () == 0;
-  bool neg = false;
-  for (unsigned i = 1; i < path.length (); ++i)
-    {
-      gimple *use_stmt = USE_STMT (path[i].second);
-      tree op = USE_FROM_PTR (path[i].second);
-      if (! has_single_use (op)
-         || ! is_gimple_assign (use_stmt))
-       {
-         fail = true;
-         break;
-       }
-      if (gimple_assign_rhs_code (use_stmt) != code)
-       {
-         if (code == PLUS_EXPR
-             && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
-           {
-             /* Track whether we negate the reduction value each iteration.  */
-             if (gimple_assign_rhs2 (use_stmt) == op)
-               neg = ! neg;
-           }
-         else
-           {
-             fail = true;
-             break;
-           }
-       }
-    }
-  if (! fail && ! neg)
+  if (check_reduction_path (vect_location, loop, as_a <gphi *> (phi), loop_arg,
+                           code))
     return def_stmt;
 
   if (dump_enabled_p ())

Reply via email to