On Wed, 22 May 2019, Richard Biener wrote: > > This enables -ftree-loop-distribute-patterns at -O[2s] and also > arranges cold loops to be still processed but for pattern > recognition to save code-size. > > Bootstrap and regtest running on x86_64-unknown-linux-gnu. > > Martin has done extensive compile-time testing on SPEC > identifying only a single regression I'll have a look into.
The reason for the compile-time regression is the complexity heuristic in LRA no longer choosing "simple" algorithms and the LIVE problem in particular being awfully slow. Unsurprisingly testing has also revealed loads of testsuite fallout which I deal with in the patch as committed below. Sorry for any further fallout on other targets (which I do expect). Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2019-05-23 Richard Biener <rguent...@suse.de> PR tree-optimization/88440 * opts.c (default_options_table): Enable -ftree-loop-distribute-patterns at -O[2s]+. * tree-loop-distribution.c (generate_memset_builtin): Fold the generated call. (generate_memcpy_builtin): Likewise. (distribute_loop): Pass in whether to only distribute patterns. (prepare_perfect_loop_nest): Also allow size optimization. (pass_loop_distribution::execute): When optimizing a loop nest for size allow pattern replacement. * gcc.dg/tree-ssa/ldist-37.c: New testcase. * gcc.dg/tree-ssa/ldist-38.c: Likewise. * gcc.dg/vect/vect.exp: Add -fno-tree-loop-distribute-patterns. * gcc.dg/tree-ssa/ldist-37.c: Adjust. * gcc.dg/tree-ssa/ldist-38.c: Likewise. * g++.dg/tree-ssa/pr78847.C: Likewise. * gcc.dg/autopar/pr39500-1.c: Likewise. * gcc.dg/autopar/reduc-1char.c: Likewise. * gcc.dg/autopar/reduc-7.c: Likewise. * gcc.dg/tree-ssa/ivopts-lt-2.c: Likewise. * gcc.dg/tree-ssa/ivopts-lt.c: Likewise. * gcc.dg/tree-ssa/predcom-dse-1.c: Likewise. * gcc.dg/tree-ssa/predcom-dse-2.c: Likewise. * gcc.dg/tree-ssa/predcom-dse-3.c: Likewise. * gcc.dg/tree-ssa/predcom-dse-4.c: Likewise. * gcc.dg/tree-ssa/prefetch-7.c: Likewise. * gcc.dg/tree-ssa/prefetch-8.c: Likewise. * gcc.dg/tree-ssa/prefetch-9.c: Likewise. * gcc.dg/tree-ssa/scev-11.c: Likewise. * gcc.dg/vect/costmodel/i386/costmodel-vect-31.c: Likewise. * gcc.dg/vect/costmodel/i386/costmodel-vect-33.c: Likewise. * gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c: Likewise. * gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c: Likewise. * gcc.target/i386/pr30970.c: Likewise. * gcc.target/i386/vect-double-1.c: Likewise. * gcc.target/i386/vect-double-2.c: Likewise. * gcc.dg/tree-ssa/gen-vect-2.c: Likewise. * gcc.dg/tree-ssa/gen-vect-26.c: Likewise. * gcc.dg/tree-ssa/gen-vect-28.c: Likewise. * gcc.dg/tree-ssa/gen-vect-32.c: Likewise. * gfortran.dg/vect/vect-5.f90: Likewise. * gfortran.dg/vect/vect-8.f90: Likewise. Index: gcc/opts.c =================================================================== --- gcc/opts.c (revision 271513) +++ gcc/opts.c (working copy) @@ -550,7 +550,7 @@ static const struct default_options defa { OPT_LEVELS_3_PLUS, OPT_fpredictive_commoning, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_fsplit_loops, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_fsplit_paths, NULL, 1 }, - { OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribution, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 }, Index: gcc/testsuite/g++.dg/tree-ssa/pr78847.C =================================================================== --- gcc/testsuite/g++.dg/tree-ssa/pr78847.C (revision 271513) +++ gcc/testsuite/g++.dg/tree-ssa/pr78847.C (working copy) @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-require-effective-target c++14 } */ -/* { dg-options "-O3 -fdump-tree-ldist" } */ +/* { dg-options "-O3 -fdump-tree-ldist-optimized" } */ #include <stddef.h> #include <cstring> @@ -23,4 +23,4 @@ void testWithLoopValue(const Foo foo, si buf_[ptr++] = c; } -/* { dg-final { scan-tree-dump "memcpy\[^\n\r\]*, 9\\);" "ldist" } } */ +/* { dg-final { scan-tree-dump "split to 0 loops and 1 library calls" "ldist" } } */ Index: gcc/testsuite/gcc.dg/autopar/pr39500-1.c =================================================================== --- gcc/testsuite/gcc.dg/autopar/pr39500-1.c (revision 271513) +++ gcc/testsuite/gcc.dg/autopar/pr39500-1.c (working copy) @@ -1,7 +1,7 @@ /* pr39500: autopar fails to parallel */ /* origin: nemoking...@gmail.com(LiFeng) */ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops2-details" } */ +/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-parallelize-loops=4 -fdump-tree-parloops2-details" } */ void abort (void); Index: gcc/testsuite/gcc.dg/autopar/reduc-1char.c =================================================================== --- gcc/testsuite/gcc.dg/autopar/reduc-1char.c (revision 271513) +++ gcc/testsuite/gcc.dg/autopar/reduc-1char.c (working copy) @@ -61,5 +61,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops2" } } */ -/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops2" } } */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops2" } } */ Index: gcc/testsuite/gcc.dg/autopar/reduc-7.c =================================================================== --- gcc/testsuite/gcc.dg/autopar/reduc-7.c (revision 271513) +++ gcc/testsuite/gcc.dg/autopar/reduc-7.c (working copy) @@ -85,5 +85,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops2" } } */ -/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops2" } } */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops2" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c (revision 271513) +++ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c (working copy) @@ -1,6 +1,6 @@ /* { dg-do run { target vect_cmdline_needed } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ +/* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */ #include <stdlib.h> Index: gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c (revision 271513) +++ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c (working copy) @@ -1,6 +1,6 @@ /* { dg-do run { target vect_cmdline_needed } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ +/* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */ #include <stdlib.h> Index: gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c (revision 271513) +++ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c (working copy) @@ -1,6 +1,6 @@ /* { dg-do run { target vect_cmdline_needed } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ +/* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */ #include <stdlib.h> Index: gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c (revision 271513) +++ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do run { target vect_cmdline_needed } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fno-vect-cost-model" } */ +/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize -fdump-tree-vect-details -fno-vect-cost-model" } */ /* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */ #include <stdlib.h> Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-2.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-2.c (revision 271513) +++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-2.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-ivopts" } */ +/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fdump-tree-ivopts" } */ /* { dg-skip-if "PR68644" { hppa*-*-* powerpc*-*-* } } */ void Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c (revision 271513) +++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-ivopts" } */ +/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fdump-tree-ivopts" } */ /* { dg-require-effective-target stdint_types } */ #include "stdint.h" Index: gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-1.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-1.c (revision 271513) +++ gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-1.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */ +/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */ int arr[105] = {2, 3, 5, 7, 11}; int result0[10] = {2, 3, 5, 7, 11}; Index: gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-2.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-2.c (revision 271513) +++ gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-2.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */ +/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */ int arr[105] = {2, 3, 5, 7, 11}; int result0[10] = {2, 3, 5, 7, 11}; Index: gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-3.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-3.c (revision 271513) +++ gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-3.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */ +/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */ int arr1[105] = {2, 3, 5, 7, 11, 13, 0}; int arr2[105] = {2, 3, 5, 7, 11, 13, 0}; Index: gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-4.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-4.c (revision 271513) +++ gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-4.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */ +/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */ int arr[105] = {2, 3, 5, 7, 11}; int result0[10] = {2, 3, 5, 7, 11}; Index: gcc/testsuite/gcc.dg/tree-ssa/prefetch-7.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/prefetch-7.c (revision 271513) +++ gcc/testsuite/gcc.dg/tree-ssa/prefetch-7.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */ -/* { dg-options "-O2 -fprefetch-loop-arrays -march=amdfam10 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */ +/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fprefetch-loop-arrays -march=amdfam10 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */ #define K 1000000 int a[K]; Index: gcc/testsuite/gcc.dg/tree-ssa/prefetch-8.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/prefetch-8.c (revision 271513) +++ gcc/testsuite/gcc.dg/tree-ssa/prefetch-8.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */ -/* { dg-options "-O2 -fprefetch-loop-arrays -march=amdfam10 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */ +/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fprefetch-loop-arrays -march=amdfam10 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */ #define K 1000000 int a[K]; Index: gcc/testsuite/gcc.dg/tree-ssa/prefetch-9.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/prefetch-9.c (revision 271513) +++ gcc/testsuite/gcc.dg/tree-ssa/prefetch-9.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */ -/* { dg-options "-O2 -fprefetch-loop-arrays -march=amdfam10 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */ +/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fprefetch-loop-arrays -march=amdfam10 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */ #define K 1000000 int a[K], b[K]; Index: gcc/testsuite/gcc.dg/tree-ssa/scev-11.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/scev-11.c (revision 271513) +++ gcc/testsuite/gcc.dg/tree-ssa/scev-11.c (working copy) @@ -15,7 +15,7 @@ foo (int n) { unsigned char uc = (unsigned char)i; a[i] = i; - b[uc] = 0; + b[uc] = 1; } bar (a); Index: gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-31.c =================================================================== --- gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-31.c (revision 271513) +++ gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-31.c (working copy) @@ -1,4 +1,5 @@ /* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fno-tree-loop-distribute-patterns" } */ #include <stdarg.h> #include "../../tree-vect.h" Index: gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-33.c =================================================================== --- gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-33.c (revision 271513) +++ gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-33.c (working copy) @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fno-tree-loop-distribute-patterns" } */ #include <stdarg.h> #include "../../tree-vect.h" Index: gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c =================================================================== --- gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c (revision 271513) +++ gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c (working copy) @@ -1,4 +1,5 @@ /* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fno-tree-loop-distribute-patterns" } */ #include <stdarg.h> #include "../../tree-vect.h" Index: gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c =================================================================== --- gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c (revision 271513) +++ gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c (working copy) @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fno-tree-loop-distribute-patterns" } */ #include <stdarg.h> #include "../../tree-vect.h" Index: gcc/testsuite/gcc.dg/vect/vect.exp =================================================================== --- gcc/testsuite/gcc.dg/vect/vect.exp (revision 271513) +++ gcc/testsuite/gcc.dg/vect/vect.exp (working copy) @@ -45,7 +45,7 @@ if ![check_vect_support_and_set_flags] { } # These flags are used for all targets. -lappend DEFAULT_VECTCFLAGS "-ftree-vectorize" "-fno-vect-cost-model" "-fno-common" +lappend DEFAULT_VECTCFLAGS "-ftree-vectorize" "-fno-tree-loop-distribute-patterns" "-fno-vect-cost-model" "-fno-common" # Initialize `dg'. dg-init Index: gcc/testsuite/gcc.target/i386/pr30970.c =================================================================== --- gcc/testsuite/gcc.target/i386/pr30970.c (revision 271513) +++ gcc/testsuite/gcc.target/i386/pr30970.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } -/* { dg-options "-msse2 -O2 -ftree-vectorize -mtune=generic" } */ +/* { dg-options "-msse2 -O2 -fno-tree-loop-distribute-patterns -ftree-vectorize -mtune=generic" } */ #define N 256 int b[N]; Index: gcc/testsuite/gcc.target/i386/vect-double-1.c =================================================================== --- gcc/testsuite/gcc.target/i386/vect-double-1.c (revision 271513) +++ gcc/testsuite/gcc.target/i386/vect-double-1.c (working copy) @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=core2" } } */ -/* { dg-options "-O2 -ftree-vectorize -mfpmath=sse -march=core2 -fdump-tree-vect-stats" } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns -mfpmath=sse -march=core2 -fdump-tree-vect-stats" } */ /* { dg-add-options bind_pic_locally } */ extern void abort (void); Index: gcc/testsuite/gcc.target/i386/vect-double-2.c =================================================================== --- gcc/testsuite/gcc.target/i386/vect-double-2.c (revision 271513) +++ gcc/testsuite/gcc.target/i386/vect-double-2.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -mfpmath=sse -msse2 -mtune=atom -fdump-tree-vect-stats" } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns -mfpmath=sse -msse2 -mtune=atom -fdump-tree-vect-stats" } */ extern void abort (void); Index: gcc/testsuite/gfortran.dg/vect/vect-5.f90 =================================================================== --- gcc/testsuite/gfortran.dg/vect/vect-5.f90 (revision 271513) +++ gcc/testsuite/gfortran.dg/vect/vect-5.f90 (working copy) @@ -1,5 +1,5 @@ ! { dg-require-effective-target vect_int } -! { dg-additional-options "--param vect-max-peeling-for-alignment=0" } +! { dg-additional-options "-fno-tree-loop-distribute-patterns --param vect-max-peeling-for-alignment=0" } Subroutine foo (N, M) Integer N Index: gcc/testsuite/gfortran.dg/vect/vect-8.f90 =================================================================== --- gcc/testsuite/gfortran.dg/vect/vect-8.f90 (revision 271513) +++ gcc/testsuite/gfortran.dg/vect/vect-8.f90 (working copy) @@ -1,6 +1,6 @@ ! { dg-do compile } ! { dg-require-effective-target vect_double } -! { dg-additional-options "-finline-matmul-limit=0" } +! { dg-additional-options "-fno-tree-loop-distribute-patterns -finline-matmul-limit=0" } module lfk_prec integer, parameter :: dp=kind(1.d0) Index: gcc/tree-loop-distribution.c =================================================================== --- gcc/tree-loop-distribution.c (revision 271513) +++ gcc/tree-loop-distribution.c (working copy) @@ -115,6 +115,7 @@ along with GCC; see the file COPYING3. #include "params.h" #include "tree-vectorizer.h" #include "tree-eh.h" +#include "gimple-fold.h" #define MAX_DATAREFS_NUM \ @@ -1028,6 +1029,7 @@ generate_memset_builtin (struct loop *lo fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); + fold_stmt (&gsi); if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -1071,6 +1073,7 @@ generate_memcpy_builtin (struct loop *lo fn = build_fold_addr_expr (builtin_decl_implicit (kind)); fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); + fold_stmt (&gsi); if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -2769,7 +2772,8 @@ finalize_partitions (struct loop *loop, static int distribute_loop (struct loop *loop, vec<gimple *> stmts, - control_dependences *cd, int *nb_calls, bool *destroy_p) + control_dependences *cd, int *nb_calls, bool *destroy_p, + bool only_patterns_p) { ddrs_table = new hash_table<ddr_hasher> (389); struct graph *rdg; @@ -2843,7 +2847,7 @@ distribute_loop (struct loop *loop, vec< /* If we are only distributing patterns but did not detect any, simply bail out. */ - if (!flag_tree_loop_distribution + if (only_patterns_p && !any_builtin) { nbp = 0; @@ -2855,7 +2859,7 @@ distribute_loop (struct loop *loop, vec< a loop into pieces, separated by builtin calls. That is, we only want no or a single loop body remaining. */ struct partition *into; - if (!flag_tree_loop_distribution) + if (only_patterns_p) { for (i = 0; partitions.iterate (i, &into); ++i) if (!partition_builtin_p (into)) @@ -3085,7 +3089,6 @@ prepare_perfect_loop_nest (struct loop * && loop_outer (outer) && outer->inner == loop && loop->next == NULL && single_exit (outer) - && optimize_loop_for_speed_p (outer) && !chrec_contains_symbols_defined_in_loop (niters, outer->num) && (niters = number_of_latch_executions (outer)) != NULL_TREE && niters != chrec_dont_know) @@ -3139,9 +3142,11 @@ pass_loop_distribution::execute (functio walking to innermost loops. */ FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) { - /* Don't distribute multiple exit edges loop, or cold loop. */ + /* Don't distribute multiple exit edges loop, or cold loop when + not doing pattern detection. */ if (!single_exit (loop) - || !optimize_loop_for_speed_p (loop)) + || (!flag_tree_loop_distribute_patterns + && !optimize_loop_for_speed_p (loop))) continue; /* Don't distribute loop if niters is unknown. */ @@ -3169,9 +3174,10 @@ pass_loop_distribution::execute (functio bool destroy_p; int nb_generated_loops, nb_generated_calls; - nb_generated_loops = distribute_loop (loop, work_list, cd, - &nb_generated_calls, - &destroy_p); + nb_generated_loops + = distribute_loop (loop, work_list, cd, &nb_generated_calls, + &destroy_p, (!optimize_loop_for_speed_p (loop) + || !flag_tree_loop_distribution)); if (destroy_p) loops_to_be_destroyed.safe_push (loop);