This patch clamps the number of statements to copy when not threading across a loop backedge in the FSM/backwards threader. This fixes the bulk of the regression in 69196. I'm still evaluating whether or not 69196 can be closed, but at the least it's a P2 now.

Bootstrapped and regression tested on x86_64-linux-gnu. Also verified the size of the testcase in the BZ was drastically reduced. Installing on the trunk.

The best idea I've got for further improvements would be to sort the FSM threads based on those which are perceived to be the most profitable. That would cause certain less profitable jump threads to be abandoned.

It also appears that the threads through the switch statement ought to be realized -- my recollection was those were profitable both from a codesize and a speed standpoint. It may be the case that the path to realize those is just too long after the other jump threads are handled.

But my overall advice would be that if you care about codesize, use -Os which will disable most jump threading optimizations. While the specific test included in the BZ wasn't improved by jump threading, in general jump threading has been shown to be profitable in an of itself with nice secondary benefits as well.

Jeff
commit 3d54e848883bc7c510c41b718aabe1771cb16fbf
Author: law <law@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Tue Mar 1 23:12:10 2016 +0000

        PR tree-optimization/69196
        * tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
        Appropriately clamp the number of statements to copy when the
        thread path does not traverse a loop backedge.
    
        PR tree-optimization/69196
        * gcc.dg/tree-ssa/pr69196.c: New test.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@233870 
138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 982a7c0..88f0806 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -8,6 +8,11 @@
 
        PR tree-optimization/69196
        * tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
+       Appropriately clamp the number of statements to copy when the
+       thread path does not traverse a loop backedge.
+
+       PR tree-optimization/69196
+       * tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
        Do count some PHIs in the thread path against the insn count.  Decrease
        final statement count by one as the control statement in the last
        block will get removed.  Remove special cased code for handling PHIs    
        in the last block.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index bdc4b11..1e6a850 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -5,6 +5,9 @@
 2016-03-01  Jeff Law  <l...@redhat.com>
 
        PR tree-optimization/69196
+       * gcc.dg/tree-ssa/pr69196.c: New test.
+
+       PR tree-optimization/69196
        * gcc.dg/tree-ssa/vrp46.c: Twiddle threading params to keep it from
        duplicating code and spoiling the expected output.
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c
new file mode 100644
index 0000000..11c7cf5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c
@@ -0,0 +1,138 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
+
+/* { dg-final { scan-tree-dump "FSM did not thread around loop and would copy 
too many statements" "vrp1" } } */
+
+
+typedef __builtin_va_list __gnuc_va_list;
+typedef __gnuc_va_list va_list;
+extern void rtems_putc(char c);
+
+void vprintk(
+  const char *fmt,
+  va_list ap
+)
+{
+  for (; *fmt != '\0'; fmt++) {
+    unsigned base = 0;
+    unsigned width = 0;
+    enum {
+      LFLAG_INT,
+      LFLAG_LONG,
+      LFLAG_LONG_LONG
+    } lflag = LFLAG_INT;
+    _Bool minus = 0;
+    _Bool sign = 0;
+    char lead = ' ';
+    char c = *fmt;
+    long long num;
+
+    if (c != '%') {
+      rtems_putc(c);
+      continue;
+    }
+
+    ++fmt; c = *fmt;
+
+    if (c == '0') {
+      lead = '0';
+      ++fmt; c = *fmt;
+    }
+
+    if (c == '-') {
+      minus = 1;
+      ++fmt; c = *fmt;
+    }
+
+    while (c >= '0' && c <= '9' ) {
+      width *= 10;
+      width += ((unsigned) c - '0');
+      ++fmt; c = *fmt;
+    }
+
+    if (c == 'l') {
+      lflag = LFLAG_LONG;
+      ++fmt; c = *fmt;
+
+      if (c == 'l') {
+        lflag = LFLAG_LONG_LONG;
+        ++fmt; c = *fmt;
+      }
+    }
+
+    if ( c == 'c' ) {
+
+      char chr = (char) __builtin_va_arg(ap,int);
+      rtems_putc(chr);
+      continue;
+    }
+
+    if ( c == 's' ) {
+      unsigned i, len;
+      char *s, *str;
+
+      str = __builtin_va_arg(ap,char *);
+
+      if ( str == ((void *)0) ) {
+        str = "";
+      }
+
+
+      for ( len=0, s=str ; *s ; len++, s++ )
+        ;
+
+
+      if ( !minus )
+        for ( i=len ; i<width ; i++ )
+          rtems_putc(' ');
+
+
+      if (width == 0) {
+          width = len;
+      }
+
+
+      for ( i=0 ; i<width && *str ; str++ )
+        rtems_putc(*str);
+
+
+      if ( minus )
+        for ( i=len ; i<width ; i++ )
+          rtems_putc(' ');
+
+      continue;
+    }
+
+
+    if ( c == 'o' || c == 'O' ) {
+      base = 8; sign = 0;
+    } else if ( c == 'i' || c == 'I' ||
+                c == 'd' || c == 'D' ) {
+      base = 10; sign = 1;
+    } else if ( c == 'u' || c == 'U' ) {
+      base = 10; sign = 0;
+    } else if ( c == 'x' || c == 'X' ) {
+      base = 16; sign = 0;
+    } else if ( c == 'p' ) {
+      base = 16; sign = 0; lflag = LFLAG_LONG;
+    } else {
+      rtems_putc(c);
+      continue;
+    }
+
+    switch (lflag) {
+      case LFLAG_LONG:
+        num = sign ? (long long) __builtin_va_arg(ap,long)
+          : (long long) __builtin_va_arg(ap,unsigned long);
+        break;
+      case LFLAG_LONG_LONG:
+        num = __builtin_va_arg(ap,long long);
+        break;
+      case LFLAG_INT:
+      default:
+        num = sign ? (long long) __builtin_va_arg(ap,int)
+          : (long long) __builtin_va_arg(ap,unsigned int);
+        break;
+    }
+  }
+}
diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index 3028504..747296b 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -429,6 +429,26 @@ fsm_find_control_statement_thread_paths (tree name,
              continue;
            }
 
+
+         /* If this path does not thread through the loop latch, then we are
+            using the FSM threader to find old style jump threads.  This
+            is good, except the FSM threader does not re-use an existing
+            threading path to reduce code duplication.
+
+            So for that case, drastically reduce the number of statements
+            we are allowed to copy.  */
+         if (!threaded_through_latch
+             && (n_insns * PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS)
+                 >= PARAM_VALUE (PARAM_MAX_JUMP_THREAD_DUPLICATION_STMTS)))
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file,
+                        "FSM did not thread around loop and would copy too "
+                        "many statements.\n");
+             path->pop ();
+             continue;
+           }
+
          /* When there is a multi-way branch on the path, then threading can
             explode the CFG due to duplicating the edges for that multi-way
             branch.  So like above, only allow a multi-way branch on the path

Reply via email to