Re: [PATCH][PR tree-optimization/pr69196] Clamp number of statements to copy when not threading across a loop backedge
On 2 March 2016 at 00:12, Jeff Law wrote: > > This patch clamps the number of statements to copy when not threading across > a loop backedge in the FSM/backwards threader. This fixes the bulk of the > regression in 69196. I'm still evaluating whether or not 69196 can be > closed, but at the least it's a P2 now. > > Bootstrapped and regression tested on x86_64-linux-gnu. Also verified the > size of the testcase in the BZ was drastically reduced. Installing on the > trunk. > > The best idea I've got for further improvements would be to sort the FSM > threads based on those which are perceived to be the most profitable. That > would cause certain less profitable jump threads to be abandoned. > > It also appears that the threads through the switch statement ought to be > realized -- my recollection was those were profitable both from a codesize > and a speed standpoint. It may be the case that the path to realize those > is just too long after the other jump threads are handled. > > But my overall advice would be that if you care about codesize, use -Os > which will disable most jump threading optimizations. While the specific > test included in the BZ wasn't improved by jump threading, in general jump > threading has been shown to be profitable in an of itself with nice > secondary benefits as well. > > Jeff > > commit 3d54e848883bc7c510c41b718aabe1771cb16fbf > Author: law > Date: Tue Mar 1 23:12:10 2016 + > > PR tree-optimization/69196 > * tree-ssa-threadbackward.c > (fsm_find_control_statement_thread_paths): > Appropriately clamp the number of statements to copy when the > thread path does not traverse a loop backedge. > > PR tree-optimization/69196 > * gcc.dg/tree-ssa/pr69196.c: New test. > Hi, This new test (the name is pr69196-1.c BTW), fails for target arm-none-linux-gnueabihf --with-cpu cortex-a5 FAIL: gcc.dg/tree-ssa/pr69196-1.c scan-tree-dump vrp1 "FSM did not thread around loop and would copy too many statements Christophe. > git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@233870 > 138bc75d-0d04-0410-961f-82ee72b054a4 > > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index 982a7c0..88f0806 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -8,6 +8,11 @@ > > PR tree-optimization/69196 > * tree-ssa-threadbackward.c > (fsm_find_control_statement_thread_paths): > + Appropriately clamp the number of statements to copy when the > + thread path does not traverse a loop backedge. > + > + PR tree-optimization/69196 > + * tree-ssa-threadbackward.c > (fsm_find_control_statement_thread_paths): > Do count some PHIs in the thread path against the insn count. > Decrease > final statement count by one as the control statement in the last > block will get removed. Remove special cased code for handling PHIs > in the last block. > diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog > index bdc4b11..1e6a850 100644 > --- a/gcc/testsuite/ChangeLog > +++ b/gcc/testsuite/ChangeLog > @@ -5,6 +5,9 @@ > 2016-03-01 Jeff Law > > PR tree-optimization/69196 > + * gcc.dg/tree-ssa/pr69196.c: New test. > + > + PR tree-optimization/69196 > * gcc.dg/tree-ssa/vrp46.c: Twiddle threading params to keep it from > duplicating code and spoiling the expected output. > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c > new file mode 100644 > index 000..11c7cf5 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c > @@ -0,0 +1,138 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-vrp1-details" } */ > + > +/* { dg-final { scan-tree-dump "FSM did not thread around loop and would > copy too many statements" "vrp1" } } */ > + > + > +typedef __builtin_va_list __gnuc_va_list; > +typedef __gnuc_va_list va_list; > +extern void rtems_putc(char c); > + > +void vprintk( > + const char *fmt, > + va_list ap > +) > +{ > + for (; *fmt != '\0'; fmt++) { > +unsigned base = 0; > +unsigned width = 0; > +enum { > + LFLAG_INT, > + LFLAG_LONG, > + LFLAG_LONG_LONG > +} lflag = LFLAG_INT; > +_Bool minus = 0; > +_Bool sign = 0; > +char lead = ' '; > +char c = *fmt; > +long long num; > + > +if (c != '%') { > + rtems_putc(c); > + continue; > +} > + > +++fmt; c = *fmt; > + > +if (c == '0') { > + lead = '0'; > + ++fmt; c = *fmt; > +} > + > +if (c == '-') { > + minus = 1; > + ++fmt; c = *fmt; > +} > + > +while (c >= '0' && c <= '9' ) { > + width *= 10; > + width += ((unsigned) c - '0'); > + ++fmt; c = *fmt; > +} > + > +if (c == 'l') { > + lflag = LFLAG_LONG; > + ++fmt; c = *fmt; > + > + if (c == 'l') { > +lflag = LFLAG_LONG_LONG; > +++fmt; c = *fmt; > + } > +} > + > +if ( c == 'c' ) { > + > + char chr = (char) __bui
[PATCH][PR tree-optimization/pr69196] Clamp number of statements to copy when not threading across a loop backedge
This patch clamps the number of statements to copy when not threading across a loop backedge in the FSM/backwards threader. This fixes the bulk of the regression in 69196. I'm still evaluating whether or not 69196 can be closed, but at the least it's a P2 now. Bootstrapped and regression tested on x86_64-linux-gnu. Also verified the size of the testcase in the BZ was drastically reduced. Installing on the trunk. The best idea I've got for further improvements would be to sort the FSM threads based on those which are perceived to be the most profitable. That would cause certain less profitable jump threads to be abandoned. It also appears that the threads through the switch statement ought to be realized -- my recollection was those were profitable both from a codesize and a speed standpoint. It may be the case that the path to realize those is just too long after the other jump threads are handled. But my overall advice would be that if you care about codesize, use -Os which will disable most jump threading optimizations. While the specific test included in the BZ wasn't improved by jump threading, in general jump threading has been shown to be profitable in an of itself with nice secondary benefits as well. Jeff commit 3d54e848883bc7c510c41b718aabe1771cb16fbf Author: law Date: Tue Mar 1 23:12:10 2016 + PR tree-optimization/69196 * tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths): Appropriately clamp the number of statements to copy when the thread path does not traverse a loop backedge. PR tree-optimization/69196 * gcc.dg/tree-ssa/pr69196.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@233870 138bc75d-0d04-0410-961f-82ee72b054a4 diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 982a7c0..88f0806 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -8,6 +8,11 @@ PR tree-optimization/69196 * tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths): + Appropriately clamp the number of statements to copy when the + thread path does not traverse a loop backedge. + + PR tree-optimization/69196 + * tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths): Do count some PHIs in the thread path against the insn count. Decrease final statement count by one as the control statement in the last block will get removed. Remove special cased code for handling PHIs in the last block. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index bdc4b11..1e6a850 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -5,6 +5,9 @@ 2016-03-01 Jeff Law PR tree-optimization/69196 + * gcc.dg/tree-ssa/pr69196.c: New test. + + PR tree-optimization/69196 * gcc.dg/tree-ssa/vrp46.c: Twiddle threading params to keep it from duplicating code and spoiling the expected output. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c new file mode 100644 index 000..11c7cf5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c @@ -0,0 +1,138 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-vrp1-details" } */ + +/* { dg-final { scan-tree-dump "FSM did not thread around loop and would copy too many statements" "vrp1" } } */ + + +typedef __builtin_va_list __gnuc_va_list; +typedef __gnuc_va_list va_list; +extern void rtems_putc(char c); + +void vprintk( + const char *fmt, + va_list ap +) +{ + for (; *fmt != '\0'; fmt++) { +unsigned base = 0; +unsigned width = 0; +enum { + LFLAG_INT, + LFLAG_LONG, + LFLAG_LONG_LONG +} lflag = LFLAG_INT; +_Bool minus = 0; +_Bool sign = 0; +char lead = ' '; +char c = *fmt; +long long num; + +if (c != '%') { + rtems_putc(c); + continue; +} + +++fmt; c = *fmt; + +if (c == '0') { + lead = '0'; + ++fmt; c = *fmt; +} + +if (c == '-') { + minus = 1; + ++fmt; c = *fmt; +} + +while (c >= '0' && c <= '9' ) { + width *= 10; + width += ((unsigned) c - '0'); + ++fmt; c = *fmt; +} + +if (c == 'l') { + lflag = LFLAG_LONG; + ++fmt; c = *fmt; + + if (c == 'l') { +lflag = LFLAG_LONG_LONG; +++fmt; c = *fmt; + } +} + +if ( c == 'c' ) { + + char chr = (char) __builtin_va_arg(ap,int); + rtems_putc(chr); + continue; +} + +if ( c == 's' ) { + unsigned i, len; + char *s, *str; + + str = __builtin_va_arg(ap,char *); + + if ( str == ((void *)0) ) { +str = ""; + } + + + for ( len=0, s=str ; *s ; len++, s++ ) +; + + + if ( !minus ) +for ( i=len ; i= PARAM_VALUE (PARAM_MAX_JUMP_THREAD_DUPLICATION_STMTS))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, +