Re: [PATCH][PR tree-optimization/pr69196] Clamp number of statements to copy when not threading across a loop backedge

2016-03-02 Thread Christophe Lyon
On 2 March 2016 at 00:12, Jeff Law  wrote:
>
> This patch clamps the number of statements to copy when not threading across
> a loop backedge in the FSM/backwards threader.  This fixes the bulk of the
> regression in 69196.  I'm still evaluating whether or not 69196 can be
> closed, but at the least it's a P2 now.
>
> Bootstrapped and regression tested on x86_64-linux-gnu.  Also verified the
> size of the testcase in the BZ was drastically reduced.  Installing on the
> trunk.
>
> The best idea I've got for further improvements would be to sort the FSM
> threads based on those which are perceived to be the most profitable. That
> would cause certain less profitable jump threads to be abandoned.
>
> It also appears that the threads through the switch statement ought to be
> realized -- my recollection was those were profitable both from a codesize
> and a speed standpoint.  It may be the case that the path to realize those
> is just too long after the other jump threads are handled.
>
> But my overall advice would be that if you care about codesize, use -Os
> which will disable most jump threading optimizations.  While the specific
> test included in the BZ wasn't improved by jump threading, in general jump
> threading has been shown to be profitable in an of itself with nice
> secondary benefits as well.
>
> Jeff
>
> commit 3d54e848883bc7c510c41b718aabe1771cb16fbf
> Author: law 
> Date:   Tue Mar 1 23:12:10 2016 +
>
> PR tree-optimization/69196
> * tree-ssa-threadbackward.c
> (fsm_find_control_statement_thread_paths):
> Appropriately clamp the number of statements to copy when the
> thread path does not traverse a loop backedge.
>
> PR tree-optimization/69196
> * gcc.dg/tree-ssa/pr69196.c: New test.
>
Hi,

This new test (the name is pr69196-1.c BTW), fails
for target arm-none-linux-gnueabihf
--with-cpu cortex-a5

FAIL: gcc.dg/tree-ssa/pr69196-1.c scan-tree-dump vrp1 "FSM did not
thread around loop and would copy too many statements

Christophe.


> git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@233870
> 138bc75d-0d04-0410-961f-82ee72b054a4
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 982a7c0..88f0806 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -8,6 +8,11 @@
>
> PR tree-optimization/69196
> * tree-ssa-threadbackward.c
> (fsm_find_control_statement_thread_paths):
> +   Appropriately clamp the number of statements to copy when the
> +   thread path does not traverse a loop backedge.
> +
> +   PR tree-optimization/69196
> +   * tree-ssa-threadbackward.c
> (fsm_find_control_statement_thread_paths):
> Do count some PHIs in the thread path against the insn count.
> Decrease
> final statement count by one as the control statement in the last
> block will get removed.  Remove special cased code for handling PHIs
> in the last block.
> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
> index bdc4b11..1e6a850 100644
> --- a/gcc/testsuite/ChangeLog
> +++ b/gcc/testsuite/ChangeLog
> @@ -5,6 +5,9 @@
>  2016-03-01  Jeff Law  
>
> PR tree-optimization/69196
> +   * gcc.dg/tree-ssa/pr69196.c: New test.
> +
> +   PR tree-optimization/69196
> * gcc.dg/tree-ssa/vrp46.c: Twiddle threading params to keep it from
> duplicating code and spoiling the expected output.
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c
> b/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c
> new file mode 100644
> index 000..11c7cf5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c
> @@ -0,0 +1,138 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
> +
> +/* { dg-final { scan-tree-dump "FSM did not thread around loop and would
> copy too many statements" "vrp1" } } */
> +
> +
> +typedef __builtin_va_list __gnuc_va_list;
> +typedef __gnuc_va_list va_list;
> +extern void rtems_putc(char c);
> +
> +void vprintk(
> +  const char *fmt,
> +  va_list ap
> +)
> +{
> +  for (; *fmt != '\0'; fmt++) {
> +unsigned base = 0;
> +unsigned width = 0;
> +enum {
> +  LFLAG_INT,
> +  LFLAG_LONG,
> +  LFLAG_LONG_LONG
> +} lflag = LFLAG_INT;
> +_Bool minus = 0;
> +_Bool sign = 0;
> +char lead = ' ';
> +char c = *fmt;
> +long long num;
> +
> +if (c != '%') {
> +  rtems_putc(c);
> +  continue;
> +}
> +
> +++fmt; c = *fmt;
> +
> +if (c == '0') {
> +  lead = '0';
> +  ++fmt; c = *fmt;
> +}
> +
> +if (c == '-') {
> +  minus = 1;
> +  ++fmt; c = *fmt;
> +}
> +
> +while (c >= '0' && c <= '9' ) {
> +  width *= 10;
> +  width += ((unsigned) c - '0');
> +  ++fmt; c = *fmt;
> +}
> +
> +if (c == 'l') {
> +  lflag = LFLAG_LONG;
> +  ++fmt; c = *fmt;
> +
> +  if (c == 'l') {
> +lflag = LFLAG_LONG_LONG;
> +++fmt; c = *fmt;
> +  }
> +}
> +
> +if ( c == 'c' ) {
> +
> +  char chr = (char) __bui

[PATCH][PR tree-optimization/pr69196] Clamp number of statements to copy when not threading across a loop backedge

2016-03-01 Thread Jeff Law


This patch clamps the number of statements to copy when not threading 
across a loop backedge in the FSM/backwards threader.  This fixes the 
bulk of the regression in 69196.  I'm still evaluating whether or not 
69196 can be closed, but at the least it's a P2 now.


Bootstrapped and regression tested on x86_64-linux-gnu.  Also verified 
the size of the testcase in the BZ was drastically reduced.  Installing 
on the trunk.


The best idea I've got for further improvements would be to sort the FSM 
threads based on those which are perceived to be the most profitable. 
That would cause certain less profitable jump threads to be abandoned.


It also appears that the threads through the switch statement ought to 
be realized -- my recollection was those were profitable both from a 
codesize and a speed standpoint.  It may be the case that the path to 
realize those is just too long after the other jump threads are handled.


But my overall advice would be that if you care about codesize, use -Os 
which will disable most jump threading optimizations.  While the 
specific test included in the BZ wasn't improved by jump threading, in 
general jump threading has been shown to be profitable in an of itself 
with nice secondary benefits as well.


Jeff
commit 3d54e848883bc7c510c41b718aabe1771cb16fbf
Author: law 
Date:   Tue Mar 1 23:12:10 2016 +

PR tree-optimization/69196
* tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
Appropriately clamp the number of statements to copy when the
thread path does not traverse a loop backedge.

PR tree-optimization/69196
* gcc.dg/tree-ssa/pr69196.c: New test.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@233870 
138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 982a7c0..88f0806 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -8,6 +8,11 @@
 
PR tree-optimization/69196
* tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
+   Appropriately clamp the number of statements to copy when the
+   thread path does not traverse a loop backedge.
+
+   PR tree-optimization/69196
+   * tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
Do count some PHIs in the thread path against the insn count.  Decrease
final statement count by one as the control statement in the last
block will get removed.  Remove special cased code for handling PHIs
in the last block.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index bdc4b11..1e6a850 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -5,6 +5,9 @@
 2016-03-01  Jeff Law  
 
PR tree-optimization/69196
+   * gcc.dg/tree-ssa/pr69196.c: New test.
+
+   PR tree-optimization/69196
* gcc.dg/tree-ssa/vrp46.c: Twiddle threading params to keep it from
duplicating code and spoiling the expected output.
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c
new file mode 100644
index 000..11c7cf5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c
@@ -0,0 +1,138 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
+
+/* { dg-final { scan-tree-dump "FSM did not thread around loop and would copy 
too many statements" "vrp1" } } */
+
+
+typedef __builtin_va_list __gnuc_va_list;
+typedef __gnuc_va_list va_list;
+extern void rtems_putc(char c);
+
+void vprintk(
+  const char *fmt,
+  va_list ap
+)
+{
+  for (; *fmt != '\0'; fmt++) {
+unsigned base = 0;
+unsigned width = 0;
+enum {
+  LFLAG_INT,
+  LFLAG_LONG,
+  LFLAG_LONG_LONG
+} lflag = LFLAG_INT;
+_Bool minus = 0;
+_Bool sign = 0;
+char lead = ' ';
+char c = *fmt;
+long long num;
+
+if (c != '%') {
+  rtems_putc(c);
+  continue;
+}
+
+++fmt; c = *fmt;
+
+if (c == '0') {
+  lead = '0';
+  ++fmt; c = *fmt;
+}
+
+if (c == '-') {
+  minus = 1;
+  ++fmt; c = *fmt;
+}
+
+while (c >= '0' && c <= '9' ) {
+  width *= 10;
+  width += ((unsigned) c - '0');
+  ++fmt; c = *fmt;
+}
+
+if (c == 'l') {
+  lflag = LFLAG_LONG;
+  ++fmt; c = *fmt;
+
+  if (c == 'l') {
+lflag = LFLAG_LONG_LONG;
+++fmt; c = *fmt;
+  }
+}
+
+if ( c == 'c' ) {
+
+  char chr = (char) __builtin_va_arg(ap,int);
+  rtems_putc(chr);
+  continue;
+}
+
+if ( c == 's' ) {
+  unsigned i, len;
+  char *s, *str;
+
+  str = __builtin_va_arg(ap,char *);
+
+  if ( str == ((void *)0) ) {
+str = "";
+  }
+
+
+  for ( len=0, s=str ; *s ; len++, s++ )
+;
+
+
+  if ( !minus )
+for ( i=len ; i= PARAM_VALUE (PARAM_MAX_JUMP_THREAD_DUPLICATION_STMTS)))
+   {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   fprintf (dump_file,
+