On 2 March 2016 at 00:12, Jeff Law <l...@redhat.com> wrote: > > This patch clamps the number of statements to copy when not threading across > a loop backedge in the FSM/backwards threader. This fixes the bulk of the > regression in 69196. I'm still evaluating whether or not 69196 can be > closed, but at the least it's a P2 now. > > Bootstrapped and regression tested on x86_64-linux-gnu. Also verified the > size of the testcase in the BZ was drastically reduced. Installing on the > trunk. > > The best idea I've got for further improvements would be to sort the FSM > threads based on those which are perceived to be the most profitable. That > would cause certain less profitable jump threads to be abandoned. > > It also appears that the threads through the switch statement ought to be > realized -- my recollection was those were profitable both from a codesize > and a speed standpoint. It may be the case that the path to realize those > is just too long after the other jump threads are handled. > > But my overall advice would be that if you care about codesize, use -Os > which will disable most jump threading optimizations. While the specific > test included in the BZ wasn't improved by jump threading, in general jump > threading has been shown to be profitable in an of itself with nice > secondary benefits as well. > > Jeff > > commit 3d54e848883bc7c510c41b718aabe1771cb16fbf > Author: law <law@138bc75d-0d04-0410-961f-82ee72b054a4> > Date: Tue Mar 1 23:12:10 2016 +0000 > > PR tree-optimization/69196 > * tree-ssa-threadbackward.c > (fsm_find_control_statement_thread_paths): > Appropriately clamp the number of statements to copy when the > thread path does not traverse a loop backedge. > > PR tree-optimization/69196 > * gcc.dg/tree-ssa/pr69196.c: New test. > Hi,
This new test (the name is pr69196-1.c BTW), fails for target arm-none-linux-gnueabihf --with-cpu cortex-a5 FAIL: gcc.dg/tree-ssa/pr69196-1.c scan-tree-dump vrp1 "FSM did not thread around loop and would copy too many statements Christophe. > git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@233870 > 138bc75d-0d04-0410-961f-82ee72b054a4 > > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index 982a7c0..88f0806 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -8,6 +8,11 @@ > > PR tree-optimization/69196 > * tree-ssa-threadbackward.c > (fsm_find_control_statement_thread_paths): > + Appropriately clamp the number of statements to copy when the > + thread path does not traverse a loop backedge. > + > + PR tree-optimization/69196 > + * tree-ssa-threadbackward.c > (fsm_find_control_statement_thread_paths): > Do count some PHIs in the thread path against the insn count. > Decrease > final statement count by one as the control statement in the last > block will get removed. Remove special cased code for handling PHIs > in the last block. > diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog > index bdc4b11..1e6a850 100644 > --- a/gcc/testsuite/ChangeLog > +++ b/gcc/testsuite/ChangeLog > @@ -5,6 +5,9 @@ > 2016-03-01 Jeff Law <l...@redhat.com> > > PR tree-optimization/69196 > + * gcc.dg/tree-ssa/pr69196.c: New test. > + > + PR tree-optimization/69196 > * gcc.dg/tree-ssa/vrp46.c: Twiddle threading params to keep it from > duplicating code and spoiling the expected output. > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c > new file mode 100644 > index 0000000..11c7cf5 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr69196-1.c > @@ -0,0 +1,138 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-vrp1-details" } */ > + > +/* { dg-final { scan-tree-dump "FSM did not thread around loop and would > copy too many statements" "vrp1" } } */ > + > + > +typedef __builtin_va_list __gnuc_va_list; > +typedef __gnuc_va_list va_list; > +extern void rtems_putc(char c); > + > +void vprintk( > + const char *fmt, > + va_list ap > +) > +{ > + for (; *fmt != '\0'; fmt++) { > + unsigned base = 0; > + unsigned width = 0; > + enum { > + LFLAG_INT, > + LFLAG_LONG, > + LFLAG_LONG_LONG > + } lflag = LFLAG_INT; > + _Bool minus = 0; > + _Bool sign = 0; > + char lead = ' '; > + char c = *fmt; > + long long num; > + > + if (c != '%') { > + rtems_putc(c); > + continue; > + } > + > + ++fmt; c = *fmt; > + > + if (c == '0') { > + lead = '0'; > + ++fmt; c = *fmt; > + } > + > + if (c == '-') { > + minus = 1; > + ++fmt; c = *fmt; > + } > + > + while (c >= '0' && c <= '9' ) { > + width *= 10; > + width += ((unsigned) c - '0'); > + ++fmt; c = *fmt; > + } > + > + if (c == 'l') { > + lflag = LFLAG_LONG; > + ++fmt; c = *fmt; > + > + if (c == 'l') { > + lflag = LFLAG_LONG_LONG; > + ++fmt; c = *fmt; > + } > + } > + > + if ( c == 'c' ) { > + > + char chr = (char) __builtin_va_arg(ap,int); > + rtems_putc(chr); > + continue; > + } > + > + if ( c == 's' ) { > + unsigned i, len; > + char *s, *str; > + > + str = __builtin_va_arg(ap,char *); > + > + if ( str == ((void *)0) ) { > + str = ""; > + } > + > + > + for ( len=0, s=str ; *s ; len++, s++ ) > + ; > + > + > + if ( !minus ) > + for ( i=len ; i<width ; i++ ) > + rtems_putc(' '); > + > + > + if (width == 0) { > + width = len; > + } > + > + > + for ( i=0 ; i<width && *str ; str++ ) > + rtems_putc(*str); > + > + > + if ( minus ) > + for ( i=len ; i<width ; i++ ) > + rtems_putc(' '); > + > + continue; > + } > + > + > + if ( c == 'o' || c == 'O' ) { > + base = 8; sign = 0; > + } else if ( c == 'i' || c == 'I' || > + c == 'd' || c == 'D' ) { > + base = 10; sign = 1; > + } else if ( c == 'u' || c == 'U' ) { > + base = 10; sign = 0; > + } else if ( c == 'x' || c == 'X' ) { > + base = 16; sign = 0; > + } else if ( c == 'p' ) { > + base = 16; sign = 0; lflag = LFLAG_LONG; > + } else { > + rtems_putc(c); > + continue; > + } > + > + switch (lflag) { > + case LFLAG_LONG: > + num = sign ? (long long) __builtin_va_arg(ap,long) > + : (long long) __builtin_va_arg(ap,unsigned long); > + break; > + case LFLAG_LONG_LONG: > + num = __builtin_va_arg(ap,long long); > + break; > + case LFLAG_INT: > + default: > + num = sign ? (long long) __builtin_va_arg(ap,int) > + : (long long) __builtin_va_arg(ap,unsigned int); > + break; > + } > + } > +} > diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c > index 3028504..747296b 100644 > --- a/gcc/tree-ssa-threadbackward.c > +++ b/gcc/tree-ssa-threadbackward.c > @@ -429,6 +429,26 @@ fsm_find_control_statement_thread_paths (tree name, > continue; > } > > + > + /* If this path does not thread through the loop latch, then we > are > + using the FSM threader to find old style jump threads. This > + is good, except the FSM threader does not re-use an existing > + threading path to reduce code duplication. > + > + So for that case, drastically reduce the number of statements > + we are allowed to copy. */ > + if (!threaded_through_latch > + && (n_insns * PARAM_VALUE (PARAM_FSM_SCALE_PATH_STMTS) > + >= PARAM_VALUE (PARAM_MAX_JUMP_THREAD_DUPLICATION_STMTS))) > + { > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, > + "FSM did not thread around loop and would copy too > " > + "many statements.\n"); > + path->pop (); > + continue; > + } > + > /* When there is a multi-way branch on the path, then threading > can > explode the CFG due to duplicating the edges for that multi-way > branch. So like above, only allow a multi-way branch on the > path >