This patch fixes a bug in the RTL doloop pass that showed as timeouts
of gcc.c-torture/execute/961017-1.c execution on slow targets because
a 256-iteration loop was replaced with a 2^32-iteration loop (if the
test did not time out, it would still pass as it didn't contain any
checks on the number of iterations).  The testcases included with the
patch are self-checking testcases that will reliably fail on affected
targets (if the rest of the patch is not applied), aborting if they do
not time out.  Affected targets include sh-linux-gnu and
powerpc-linux-gnu.

The replacement occurs in the RTL doloop pass (loop-doloop.c).  Recall
that RTL CONST_INTs do not have modes.  The number of iterations of
the loop (appropriately defined) is calculated as (const_int -1) -
implicitly QImode.  It might seem appropriate for
loop-iv.c:iv_number_of_iterations, where it does

  if (CONST_INT_P (desc->niter_expr))
    {
      unsigned HOST_WIDEST_INT val = INTVAL (desc->niter_expr);

      desc->const_iter = true;
      desc->niter_max = desc->niter = val & GET_MODE_MASK
      (desc->mode);
    }

to adjust desc->niter_expr using the mask in the same way (i.e.
desc->niter_expr = GEN_INT (desc->niter);).  But that is neither
necessary nor sufficient to fix the bug.  It changes the number of
iterations to the correct (const_int 255).  But whether the number is
given as 255 or -1, doloop_modify is entered with zero_extend_p ==
true and from_mode == QImode.  The code there then determines that it
needs to increment the count - and does so in QImode, which in either
case produces 0, before then zero-extending to SImode.

This code for doing the increment in from_mode comes from the fix for
PR 37451 and the follow-up fix for PR 37782
<http://gcc.gnu.org/ml/gcc-patches/2008-09/msg01070.html>
<http://gcc.gnu.org/ml/gcc-patches/2008-10/msg01321.html>.  As far as
I can tell the idea of those changes - which were an attempt to
improve optimization - is simply broken when the loop might have
maximum length like this (which in the original PR 37451 case it
can't, but telling that in this code would be nontrivial) - including
the case of nonconstant length as well as that of constant length.

So this patch reverts both those previous patches and adds testcases
to demonstrate the problem they caused.  Bootstrapped with no
regressions on powerpc-linux-gnu.  OK to commit?

(If the patch holds up on trunk I'd propose it for 4.6 and 4.5 branches as 
well, as a wrong-code regression fix.)

2011-11-25  Joseph Myers  <jos...@codesourcery.com>

        Revert:

        2008-09-18  Andrew Pinski  <andrew_pin...@playstation.sony.com>

        PR rtl-opt/37451
        * loop-doloop.c (doloop_modify): New argument zero_extend_p and
        zero extend count after the correction to it is done.
        (doloop_optimize): Update call to doloop_modify, don't zero extend
        count before call.

        2008-11-03  Andrew Pinski  <andrew_pin...@playstation.sony.com>

        PR rtl-opt/37782
        * loop-doloop.c (doloop_modify): Add from_mode argument that says what
        mode count is in.
        (doloop_optimize): Update call to doloop_modify.

testsuite:
2011-11-25  Joseph Myers  <jos...@codesourcery.com>

        * gcc.c-torture/execute/doloop-1.c,
        gcc.c-torture/execute/doloop-2.c: New tests.

Index: testsuite/gcc.c-torture/execute/doloop-1.c
===================================================================
--- testsuite/gcc.c-torture/execute/doloop-1.c  (revision 0)
+++ testsuite/gcc.c-torture/execute/doloop-1.c  (revision 0)
@@ -0,0 +1,18 @@
+#include <limits.h>
+
+extern void exit (int);
+extern void abort (void);
+
+volatile unsigned int i;
+
+int
+main (void)
+{
+  unsigned char z = 0;
+
+  do ++i;
+  while (--z > 0);
+  if (i != UCHAR_MAX + 1U)
+    abort ();
+  exit (0);
+}
Index: testsuite/gcc.c-torture/execute/doloop-2.c
===================================================================
--- testsuite/gcc.c-torture/execute/doloop-2.c  (revision 0)
+++ testsuite/gcc.c-torture/execute/doloop-2.c  (revision 0)
@@ -0,0 +1,18 @@
+#include <limits.h>
+
+extern void exit (int);
+extern void abort (void);
+
+volatile unsigned int i;
+
+int
+main (void)
+{
+  unsigned short z = 0;
+
+  do ++i;
+  while (--z > 0);
+  if (i != USHRT_MAX + 1U)
+    abort ();
+  exit (0);
+}
Index: loop-doloop.c
===================================================================
--- loop-doloop.c       (revision 181697)
+++ loop-doloop.c       (working copy)
@@ -394,14 +394,11 @@ add_test (rtx cond, edge *e, basic_block
    describes the loop, DESC describes the number of iterations of the
    loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
    end of the loop.  CONDITION is the condition separated from the
-   DOLOOP_SEQ.  COUNT is the number of iterations of the LOOP.
-   ZERO_EXTEND_P says to zero extend COUNT after the increment of it to
-   word_mode from FROM_MODE.  */
+   DOLOOP_SEQ.  COUNT is the number of iterations of the LOOP.  */
 
 static void
 doloop_modify (struct loop *loop, struct niter_desc *desc,
-              rtx doloop_seq, rtx condition, rtx count,
-              bool zero_extend_p, enum machine_mode from_mode)
+              rtx doloop_seq, rtx condition, rtx count)
 {
   rtx counter_reg;
   rtx tmp, noloop = NULL_RTX;
@@ -475,11 +472,7 @@ doloop_modify (struct loop *loop, struct
     }
 
   if (increment_count)
-    count = simplify_gen_binary (PLUS, from_mode, count, const1_rtx);
-
-  if (zero_extend_p)
-    count = simplify_gen_unary (ZERO_EXTEND, word_mode,
-                               count, from_mode);
+    count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
 
   /* Insert initialization of the count register into the loop header.  */
   start_sequence ();
@@ -615,7 +608,6 @@ doloop_optimize (struct loop *loop)
   struct niter_desc *desc;
   unsigned word_mode_size;
   unsigned HOST_WIDE_INT word_mode_max;
-  bool zero_extend_p = false;
 
   if (dump_file)
     fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
@@ -690,7 +682,8 @@ doloop_optimize (struct loop *loop)
     {
       if (word_mode_size > GET_MODE_PRECISION (mode))
        {
-         zero_extend_p = true;
+         count = simplify_gen_unary (ZERO_EXTEND, word_mode,
+                                     count, mode);
          iterations = simplify_gen_unary (ZERO_EXTEND, word_mode,
                                           iterations, mode);
          iterations_max = simplify_gen_unary (ZERO_EXTEND, word_mode,
@@ -734,8 +727,7 @@ doloop_optimize (struct loop *loop)
       return false;
     }
 
-  doloop_modify (loop, desc, doloop_seq, condition, count,
-                zero_extend_p, mode);
+  doloop_modify (loop, desc, doloop_seq, condition, count);
   return true;
 }
 

-- 
Joseph S. Myers
jos...@codesourcery.com

Reply via email to