On Wed, 22 May 2019, Richard Biener wrote:

> 
> This enables -ftree-loop-distribute-patterns at -O[2s] and also
> arranges cold loops to be still processed but for pattern
> recognition to save code-size.
> 
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
> 
> Martin has done extensive compile-time testing on SPEC
> identifying only a single regression I'll have a look into.

The reason for the compile-time regression is the complexity
heuristic in LRA no longer choosing "simple" algorithms and
the LIVE problem in particular being awfully slow.

Unsurprisingly testing has also revealed loads of testsuite
fallout which I deal with in the patch as committed below.
Sorry for any further fallout on other targets (which I do
expect).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2019-05-23  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/88440
        * opts.c (default_options_table): Enable -ftree-loop-distribute-patterns
        at -O[2s]+.
        * tree-loop-distribution.c (generate_memset_builtin): Fold the
        generated call.
        (generate_memcpy_builtin): Likewise.
        (distribute_loop): Pass in whether to only distribute patterns.
        (prepare_perfect_loop_nest): Also allow size optimization.
        (pass_loop_distribution::execute): When optimizing a loop
        nest for size allow pattern replacement.

        * gcc.dg/tree-ssa/ldist-37.c: New testcase.
        * gcc.dg/tree-ssa/ldist-38.c: Likewise.
        * gcc.dg/vect/vect.exp: Add -fno-tree-loop-distribute-patterns.
        * gcc.dg/tree-ssa/ldist-37.c: Adjust.
        * gcc.dg/tree-ssa/ldist-38.c: Likewise.
        * g++.dg/tree-ssa/pr78847.C: Likewise.
        * gcc.dg/autopar/pr39500-1.c: Likewise.
        * gcc.dg/autopar/reduc-1char.c: Likewise.
        * gcc.dg/autopar/reduc-7.c: Likewise.
        * gcc.dg/tree-ssa/ivopts-lt-2.c: Likewise.
        * gcc.dg/tree-ssa/ivopts-lt.c: Likewise.
        * gcc.dg/tree-ssa/predcom-dse-1.c: Likewise.
        * gcc.dg/tree-ssa/predcom-dse-2.c: Likewise.
        * gcc.dg/tree-ssa/predcom-dse-3.c: Likewise.
        * gcc.dg/tree-ssa/predcom-dse-4.c: Likewise.
        * gcc.dg/tree-ssa/prefetch-7.c: Likewise.
        * gcc.dg/tree-ssa/prefetch-8.c: Likewise.
        * gcc.dg/tree-ssa/prefetch-9.c: Likewise.
        * gcc.dg/tree-ssa/scev-11.c: Likewise.
        * gcc.dg/vect/costmodel/i386/costmodel-vect-31.c: Likewise.
        * gcc.dg/vect/costmodel/i386/costmodel-vect-33.c: Likewise.
        * gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c: Likewise.
        * gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c: Likewise.
        * gcc.target/i386/pr30970.c: Likewise.
        * gcc.target/i386/vect-double-1.c: Likewise.
        * gcc.target/i386/vect-double-2.c: Likewise.
        * gcc.dg/tree-ssa/gen-vect-2.c: Likewise.
        * gcc.dg/tree-ssa/gen-vect-26.c: Likewise.
        * gcc.dg/tree-ssa/gen-vect-28.c: Likewise.
        * gcc.dg/tree-ssa/gen-vect-32.c: Likewise.
        * gfortran.dg/vect/vect-5.f90: Likewise.
        * gfortran.dg/vect/vect-8.f90: Likewise.

Index: gcc/opts.c
===================================================================
--- gcc/opts.c  (revision 271513)
+++ gcc/opts.c  (working copy)
@@ -550,7 +550,7 @@ static const struct default_options defa
     { OPT_LEVELS_3_PLUS, OPT_fpredictive_commoning, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_fsplit_loops, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_fsplit_paths, NULL, 1 },
-    { OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
+    { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribution, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 },
Index: gcc/testsuite/g++.dg/tree-ssa/pr78847.C
===================================================================
--- gcc/testsuite/g++.dg/tree-ssa/pr78847.C     (revision 271513)
+++ gcc/testsuite/g++.dg/tree-ssa/pr78847.C     (working copy)
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target c++14 } */
-/* { dg-options "-O3 -fdump-tree-ldist" } */
+/* { dg-options "-O3 -fdump-tree-ldist-optimized" } */
 
 #include <stddef.h>
 #include <cstring>
@@ -23,4 +23,4 @@ void testWithLoopValue(const Foo foo, si
       buf_[ptr++] = c;
 }
 
-/* { dg-final { scan-tree-dump "memcpy\[^\n\r\]*, 9\\);" "ldist" } } */
+/* { dg-final { scan-tree-dump "split to 0 loops and 1 library calls" "ldist" 
} } */
Index: gcc/testsuite/gcc.dg/autopar/pr39500-1.c
===================================================================
--- gcc/testsuite/gcc.dg/autopar/pr39500-1.c    (revision 271513)
+++ gcc/testsuite/gcc.dg/autopar/pr39500-1.c    (working copy)
@@ -1,7 +1,7 @@
 /* pr39500: autopar fails to parallel */
 /* origin: nemoking...@gmail.com(LiFeng) */
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops2-details" 
} */
+/* { dg-options "-O2 -fno-tree-loop-distribute-patterns 
-ftree-parallelize-loops=4 -fdump-tree-parloops2-details" } */
 
 void abort (void);
 
Index: gcc/testsuite/gcc.dg/autopar/reduc-1char.c
===================================================================
--- gcc/testsuite/gcc.dg/autopar/reduc-1char.c  (revision 271513)
+++ gcc/testsuite/gcc.dg/autopar/reduc-1char.c  (working copy)
@@ -61,5 +61,5 @@ int main (void)
 
 
 /* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops2" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 
"parloops2" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 
"parloops2" } } */
 
Index: gcc/testsuite/gcc.dg/autopar/reduc-7.c
===================================================================
--- gcc/testsuite/gcc.dg/autopar/reduc-7.c      (revision 271513)
+++ gcc/testsuite/gcc.dg/autopar/reduc-7.c      (working copy)
@@ -85,5 +85,5 @@ int main (void)
 
 
 /* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops2" } } */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 
"parloops2" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 
"parloops2" } } */
 
Index: gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c  (revision 271513)
+++ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c  (working copy)
@@ -1,6 +1,6 @@
 /* { dg-do run { target vect_cmdline_needed } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details 
-fvect-cost-model=dynamic" } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details 
-fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize 
-fdump-tree-vect-details -fvect-cost-model=dynamic" } */
+/* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */
 
 #include <stdlib.h>
 
Index: gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c (revision 271513)
+++ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c (working copy)
@@ -1,6 +1,6 @@
 /* { dg-do run { target vect_cmdline_needed } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details 
-fvect-cost-model=dynamic" } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details 
-fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize 
-fdump-tree-vect-details -fvect-cost-model=dynamic" } */
+/* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */
 
 #include <stdlib.h>
 
Index: gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c (revision 271513)
+++ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c (working copy)
@@ -1,6 +1,6 @@
 /* { dg-do run { target vect_cmdline_needed } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details 
-fvect-cost-model=dynamic" } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details 
-fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize 
-fdump-tree-vect-details -fvect-cost-model=dynamic" } */
+/* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */
 
 #include <stdlib.h>
 
Index: gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c (revision 271513)
+++ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run { target vect_cmdline_needed } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details 
-fno-vect-cost-model" } */
+/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize 
-fdump-tree-vect-details -fno-vect-cost-model" } */
 /* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */
 
 #include <stdlib.h>
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-2.c (revision 271513)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-2.c (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-ivopts" } */
+/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fdump-tree-ivopts" } 
*/
 /* { dg-skip-if "PR68644" { hppa*-*-* powerpc*-*-* } } */
 
 void
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c   (revision 271513)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c   (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-ivopts" } */
+/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fdump-tree-ivopts" } 
*/
 /* { dg-require-effective-target stdint_types } */
 
 #include "stdint.h"
Index: gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-1.c       (revision 271513)
+++ gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-1.c       (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning 
-fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns 
-fpredictive-commoning -fdump-tree-pcom-details" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int result0[10] = {2, 3, 5, 7, 11};
Index: gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-2.c       (revision 271513)
+++ gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-2.c       (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning 
-fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns 
-fpredictive-commoning -fdump-tree-pcom-details" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int result0[10] = {2, 3, 5, 7, 11};
Index: gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-3.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-3.c       (revision 271513)
+++ gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-3.c       (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning 
-fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns 
-fpredictive-commoning -fdump-tree-pcom-details" } */
 
 int arr1[105] = {2, 3, 5, 7, 11, 13, 0};
 int arr2[105] = {2, 3, 5, 7, 11, 13, 0};
Index: gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-4.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-4.c       (revision 271513)
+++ gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-4.c       (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning 
-fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns 
-fpredictive-commoning -fdump-tree-pcom-details" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int result0[10] = {2, 3, 5, 7, 11};
Index: gcc/testsuite/gcc.dg/tree-ssa/prefetch-7.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/prefetch-7.c  (revision 271513)
+++ gcc/testsuite/gcc.dg/tree-ssa/prefetch-7.c  (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
-/* { dg-options "-O2 -fprefetch-loop-arrays -march=amdfam10 --param 
simultaneous-prefetches=100 -fdump-tree-aprefetch-details 
-fdump-tree-optimized" } */
+/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fprefetch-loop-arrays 
-march=amdfam10 --param simultaneous-prefetches=100 
-fdump-tree-aprefetch-details -fdump-tree-optimized" } */
 
 #define K 1000000
 int a[K];
Index: gcc/testsuite/gcc.dg/tree-ssa/prefetch-8.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/prefetch-8.c  (revision 271513)
+++ gcc/testsuite/gcc.dg/tree-ssa/prefetch-8.c  (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
-/* { dg-options "-O2 -fprefetch-loop-arrays -march=amdfam10 --param 
simultaneous-prefetches=100 -fdump-tree-aprefetch-details 
-fdump-tree-optimized" } */
+/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fprefetch-loop-arrays 
-march=amdfam10 --param simultaneous-prefetches=100 
-fdump-tree-aprefetch-details -fdump-tree-optimized" } */
 
 #define K 1000000
 int a[K];
Index: gcc/testsuite/gcc.dg/tree-ssa/prefetch-9.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/prefetch-9.c  (revision 271513)
+++ gcc/testsuite/gcc.dg/tree-ssa/prefetch-9.c  (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
-/* { dg-options "-O2 -fprefetch-loop-arrays -march=amdfam10 --param 
simultaneous-prefetches=100 -fdump-tree-aprefetch-details 
-fdump-tree-optimized" } */
+/* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fprefetch-loop-arrays 
-march=amdfam10 --param simultaneous-prefetches=100 
-fdump-tree-aprefetch-details -fdump-tree-optimized" } */
 
 #define K 1000000
 int a[K], b[K];
Index: gcc/testsuite/gcc.dg/tree-ssa/scev-11.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/scev-11.c     (revision 271513)
+++ gcc/testsuite/gcc.dg/tree-ssa/scev-11.c     (working copy)
@@ -15,7 +15,7 @@ foo (int n)
     {
       unsigned char uc = (unsigned char)i;
       a[i] = i;
-      b[uc] = 0;
+      b[uc] = 1;
     }
 
   bar (a);
Index: gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-31.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-31.c        
(revision 271513)
+++ gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-31.c        
(working copy)
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fno-tree-loop-distribute-patterns" } */
 
 #include <stdarg.h>
 #include "../../tree-vect.h"
Index: gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-33.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-33.c        
(revision 271513)
+++ gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-33.c        
(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fno-tree-loop-distribute-patterns" } */
 
 #include <stdarg.h>
 #include "../../tree-vect.h"
Index: gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c      
(revision 271513)
+++ gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c      
(working copy)
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fno-tree-loop-distribute-patterns" } */
 
 #include <stdarg.h>
 #include "../../tree-vect.h"
Index: gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c      
(revision 271513)
+++ gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c      
(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fno-tree-loop-distribute-patterns" } */
 
 #include <stdarg.h>
 #include "../../tree-vect.h"
Index: gcc/testsuite/gcc.dg/vect/vect.exp
===================================================================
--- gcc/testsuite/gcc.dg/vect/vect.exp  (revision 271513)
+++ gcc/testsuite/gcc.dg/vect/vect.exp  (working copy)
@@ -45,7 +45,7 @@ if ![check_vect_support_and_set_flags] {
 }
 
 # These flags are used for all targets.
-lappend DEFAULT_VECTCFLAGS "-ftree-vectorize" "-fno-vect-cost-model" 
"-fno-common"
+lappend DEFAULT_VECTCFLAGS "-ftree-vectorize" 
"-fno-tree-loop-distribute-patterns" "-fno-vect-cost-model" "-fno-common"
 
 # Initialize `dg'.
 dg-init
Index: gcc/testsuite/gcc.target/i386/pr30970.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr30970.c     (revision 271513)
+++ gcc/testsuite/gcc.target/i386/pr30970.c     (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile }
-/* { dg-options "-msse2 -O2 -ftree-vectorize -mtune=generic" } */
+/* { dg-options "-msse2 -O2 -fno-tree-loop-distribute-patterns 
-ftree-vectorize -mtune=generic" } */
 
 #define N 256
 int b[N];
Index: gcc/testsuite/gcc.target/i386/vect-double-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/vect-double-1.c       (revision 271513)
+++ gcc/testsuite/gcc.target/i386/vect-double-1.c       (working copy)
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=core2" } } */
-/* { dg-options "-O2 -ftree-vectorize -mfpmath=sse -march=core2 
-fdump-tree-vect-stats" } */
+/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns 
-mfpmath=sse -march=core2 -fdump-tree-vect-stats" } */
 /* { dg-add-options bind_pic_locally } */
 
 extern void abort (void);
Index: gcc/testsuite/gcc.target/i386/vect-double-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/vect-double-2.c       (revision 271513)
+++ gcc/testsuite/gcc.target/i386/vect-double-2.c       (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -mfpmath=sse -msse2 -mtune=atom 
-fdump-tree-vect-stats" } */
+/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns 
-mfpmath=sse -msse2 -mtune=atom -fdump-tree-vect-stats" } */
 
 extern void abort (void);
 
Index: gcc/testsuite/gfortran.dg/vect/vect-5.f90
===================================================================
--- gcc/testsuite/gfortran.dg/vect/vect-5.f90   (revision 271513)
+++ gcc/testsuite/gfortran.dg/vect/vect-5.f90   (working copy)
@@ -1,5 +1,5 @@
 ! { dg-require-effective-target vect_int }
-! { dg-additional-options "--param vect-max-peeling-for-alignment=0" }
+! { dg-additional-options "-fno-tree-loop-distribute-patterns --param 
vect-max-peeling-for-alignment=0" }
 
         Subroutine foo (N, M)
         Integer N
Index: gcc/testsuite/gfortran.dg/vect/vect-8.f90
===================================================================
--- gcc/testsuite/gfortran.dg/vect/vect-8.f90   (revision 271513)
+++ gcc/testsuite/gfortran.dg/vect/vect-8.f90   (working copy)
@@ -1,6 +1,6 @@
 ! { dg-do compile }
 ! { dg-require-effective-target vect_double }
-! { dg-additional-options "-finline-matmul-limit=0" }
+! { dg-additional-options "-fno-tree-loop-distribute-patterns 
-finline-matmul-limit=0" }
 
 module lfk_prec
  integer, parameter :: dp=kind(1.d0)
Index: gcc/tree-loop-distribution.c
===================================================================
--- gcc/tree-loop-distribution.c        (revision 271513)
+++ gcc/tree-loop-distribution.c        (working copy)
@@ -115,6 +115,7 @@ along with GCC; see the file COPYING3.
 #include "params.h"
 #include "tree-vectorizer.h"
 #include "tree-eh.h"
+#include "gimple-fold.h"
 
 
 #define MAX_DATAREFS_NUM \
@@ -1028,6 +1029,7 @@ generate_memset_builtin (struct loop *lo
   fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
   fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
+  fold_stmt (&gsi);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
@@ -1071,6 +1073,7 @@ generate_memcpy_builtin (struct loop *lo
   fn = build_fold_addr_expr (builtin_decl_implicit (kind));
   fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
   gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
+  fold_stmt (&gsi);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
@@ -2769,7 +2772,8 @@ finalize_partitions (struct loop *loop,
 
 static int
 distribute_loop (struct loop *loop, vec<gimple *> stmts,
-                control_dependences *cd, int *nb_calls, bool *destroy_p)
+                control_dependences *cd, int *nb_calls, bool *destroy_p,
+                bool only_patterns_p)
 {
   ddrs_table = new hash_table<ddr_hasher> (389);
   struct graph *rdg;
@@ -2843,7 +2847,7 @@ distribute_loop (struct loop *loop, vec<
 
   /* If we are only distributing patterns but did not detect any,
      simply bail out.  */
-  if (!flag_tree_loop_distribution
+  if (only_patterns_p
       && !any_builtin)
     {
       nbp = 0;
@@ -2855,7 +2859,7 @@ distribute_loop (struct loop *loop, vec<
      a loop into pieces, separated by builtin calls.  That is, we
      only want no or a single loop body remaining.  */
   struct partition *into;
-  if (!flag_tree_loop_distribution)
+  if (only_patterns_p)
     {
       for (i = 0; partitions.iterate (i, &into); ++i)
        if (!partition_builtin_p (into))
@@ -3085,7 +3089,6 @@ prepare_perfect_loop_nest (struct loop *
         && loop_outer (outer)
         && outer->inner == loop && loop->next == NULL
         && single_exit (outer)
-        && optimize_loop_for_speed_p (outer)
         && !chrec_contains_symbols_defined_in_loop (niters, outer->num)
         && (niters = number_of_latch_executions (outer)) != NULL_TREE
         && niters != chrec_dont_know)
@@ -3139,9 +3142,11 @@ pass_loop_distribution::execute (functio
      walking to innermost loops.  */
   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
     {
-      /* Don't distribute multiple exit edges loop, or cold loop.  */
+      /* Don't distribute multiple exit edges loop, or cold loop when
+         not doing pattern detection.  */
       if (!single_exit (loop)
-         || !optimize_loop_for_speed_p (loop))
+         || (!flag_tree_loop_distribute_patterns
+             && !optimize_loop_for_speed_p (loop)))
        continue;
 
       /* Don't distribute loop if niters is unknown.  */
@@ -3169,9 +3174,10 @@ pass_loop_distribution::execute (functio
 
          bool destroy_p;
          int nb_generated_loops, nb_generated_calls;
-         nb_generated_loops = distribute_loop (loop, work_list, cd,
-                                               &nb_generated_calls,
-                                               &destroy_p);
+         nb_generated_loops
+           = distribute_loop (loop, work_list, cd, &nb_generated_calls,
+                              &destroy_p, (!optimize_loop_for_speed_p (loop)
+                                           || !flag_tree_loop_distribution));
          if (destroy_p)
            loops_to_be_destroyed.safe_push (loop);
 

Reply via email to