If the attached test case is compiled with current trunk, it runs almost 4 times more slowly than the same code compiled with gcc 4.4 and identical options:
~/ujedi/splotchnew>gcc -O2 -v bugrep.c -W -Wall Using built-in specs. Target: x86_64-unknown-linux-gnu Configured with: /tmp/S/gcc-4.4.2/configure --prefix=/usr --sysconfdir=/etc --localstatedir=/var --mandir=/usr/share/man --infodir=/usr/share/info --enable-cpp --enable-nls --enable-shared --enable-multilib --enable-languages=c,c++,fortran Thread model: posix gcc version 4.4.2 (GCC) COLLECT_GCC_OPTIONS='-O2' '-v' '-W' '-Wall' '-mtune=generic' /afs/mpa/@sys/system/MPA-5.13/usr/bin/../libexec/gcc/x86_64-unknown-linux-gnu/4.4.2/cc1 -quiet -v -iprefix /afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/ bugrep.c -quiet -dumpbase bugrep.c -mtune=generic -auxbase bugrep -O2 -W -Wall -version -o /tmp/cc3cnddf.s ignoring nonexistent directory "/afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/../../../../x86_64-unknown-linux-gnu/include" ignoring duplicate directory "/afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/../../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/include" ignoring duplicate directory "/afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/../../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/include-fixed" ignoring nonexistent directory "/afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/../../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/../../../../x86_64-unknown-linux-gnu/include" #include "..." search starts here: #include <...> search starts here: /afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/include /afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/include-fixed /usr/local/include /usr/include End of search list. GNU C (GCC) version 4.4.2 (x86_64-unknown-linux-gnu) compiled by GNU C version 4.4.2, GMP version 4.3.1, MPFR version 2.4.1. warning: MPFR header version 2.4.1 differs from library version 2.4.2. GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 Compiler executable checksum: 364bbcc3e471e0834234549d6d2cb3d0 COLLECT_GCC_OPTIONS='-O2' '-v' '-W' '-Wall' '-mtune=generic' /afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/../../../../x86_64-unknown-linux-gnu/bin/as -V -Qy -o /tmp/ccKlmGkp.o /tmp/cc3cnddf.s GNU assembler version 2.19.1 (x86_64-unknown-linux-gnu) using BFD version (GNU Binutils) 2.19.1 COMPILER_PATH=/afs/mpa/@sys/system/MPA-5.13/usr/bin/../libexec/gcc/x86_64-unknown-linux-gnu/4.4.2/:/afs/mpa/@sys/system/MPA-5.13/usr/bin/../libexec/gcc/:/usr/libexec/gcc/x86_64-unknown-linux-gnu/4.4.2/:/usr/libexec/gcc/x86_64-unknown-linux-gnu/:/usr/lib/gcc/x86_64-unknown-linux-gnu/4.4.2/:/usr/lib/gcc/x86_64-unknown-linux-gnu/:/afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/../../../../x86_64-unknown-linux-gnu/bin/ LIBRARY_PATH=/afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/:/afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/:/usr/lib/gcc/x86_64-unknown-linux-gnu/4.4.2/:/afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/../../../../lib64/:/lib/../lib64/:/usr/lib/../lib64/:/afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/../../../../x86_64-unknown-linux-gnu/lib/:/afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/../../../:/lib/:/usr/lib/ COLLECT_GCC_OPTIONS='-O2' '-v' '-W' '-Wall' '-mtune=generic' /afs/mpa/@sys/system/MPA-5.13/usr/bin/../libexec/gcc/x86_64-unknown-linux-gnu/4.4.2/collect2 --eh-frame-hdr -m elf_x86_64 -dynamic-linker /lib64/ld-linux-x86-64.so.2 /afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/../../../../lib64/crt1.o /afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/../../../../lib64/crti.o /afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/crtbegin.o -L/afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2 -L/afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc -L/usr/lib/gcc/x86_64-unknown-linux-gnu/4.4.2 -L/afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/../../../../lib64 -L/lib/../lib64 -L/usr/lib/../lib64 -L/afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/../../../../x86_64-unknown-linux-gnu/lib -L/afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/../../.. /tmp/ccKlmGkp.o -lgcc --as-needed -lgcc_s --no-as-needed -lc -lgcc --as-needed -lgcc_s --no-as-needed /afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/crtend.o /afs/mpa/@sys/system/MPA-5.13/usr/bin/../lib/gcc/x86_64-unknown-linux-gnu/4.4.2/../../../../lib64/crtn.o ~/ujedi/splotchnew>time ./a.out 0.440u 0.000s 0:00.44 100.0% 0+0k 0+0io 0pf+0w ~/ujedi/splotchnew>gcc -O2 -v bugrep.c -W -Wall Using built-in specs. COLLECT_GCC=gcc COLLECT_LTO_WRAPPER=/afs/mpa/data/martin/ugcc/libexec/gcc/x86_64-unknown-linux-gnu/4.6.0/lto-wrapper Target: x86_64-unknown-linux-gnu Configured with: /scratch/martin/gcc/configure --disable-bootstrap --enable-gold --enable-plugins --prefix=/afs/mpa/data/martin/ugcc --with-ppl=/afs/mpa/data/martin/numlibs64 --with-cloog=/afs/mpa/data/martin/numlibs64 --with-libelf=/afs/mpa/data/martin/numlibs64 --enable-languages=c++,fortran --enable-target=all --enable-checking=release Thread model: posix gcc version 4.6.0 20100604 (experimental) [trunk revision 160258] (GCC) COLLECT_GCC_OPTIONS='-O2' '-v' '-W' '-Wall' '-mtune=generic' '-march=x86-64' /afs/mpa/data/martin/ugcc/libexec/gcc/x86_64-unknown-linux-gnu/4.6.0/cc1 -quiet -v bugrep.c -quiet -dumpbase bugrep.c -mtune=generic -march=x86-64 -auxbase bugrep -O2 -W -Wall -version -o /tmp/ccm5w6IQ.s GNU C (GCC) version 4.6.0 20100604 (experimental) [trunk revision 160258] (x86_64-unknown-linux-gnu) compiled by GNU C version 4.4.2, GMP version 4.3.2, MPFR version 2.4.2, MPC version 0.8.1 warning: GMP header version 4.3.2 differs from library version 4.3.1. GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 ignoring nonexistent directory "/afs/mpa/data/martin/ugcc/lib/gcc/x86_64-unknown-linux-gnu/4.6.0/../../../../x86_64-unknown-linux-gnu/include" #include "..." search starts here: #include <...> search starts here: /usr/local/include /afs/mpa/data/martin/ugcc/include /afs/mpa/data/martin/ugcc/lib/gcc/x86_64-unknown-linux-gnu/4.6.0/include /afs/mpa/data/martin/ugcc/lib/gcc/x86_64-unknown-linux-gnu/4.6.0/include-fixed /usr/include End of search list. GNU C (GCC) version 4.6.0 20100604 (experimental) [trunk revision 160258] (x86_64-unknown-linux-gnu) compiled by GNU C version 4.4.2, GMP version 4.3.2, MPFR version 2.4.2, MPC version 0.8.1 warning: GMP header version 4.3.2 differs from library version 4.3.1. GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072 Compiler executable checksum: d4da26fe4a5a46c88c2087304495247d COLLECT_GCC_OPTIONS='-O2' '-v' '-W' '-Wall' '-mtune=generic' '-march=x86-64' as -V -Qy --64 -o /tmp/ccmGCvZC.o /tmp/ccm5w6IQ.s GNU assembler version 2.19.1 (x86_64-unknown-linux-gnu) using BFD version (GNU Binutils) 2.19.1 COMPILER_PATH=/afs/mpa/data/martin/ugcc/libexec/gcc/x86_64-unknown-linux-gnu/4.6.0/:/afs/mpa/data/martin/ugcc/libexec/gcc/x86_64-unknown-linux-gnu/4.6.0/:/afs/mpa/data/martin/ugcc/libexec/gcc/x86_64-unknown-linux-gnu/:/afs/mpa/data/martin/ugcc/lib/gcc/x86_64-unknown-linux-gnu/4.6.0/:/afs/mpa/data/martin/ugcc/lib/gcc/x86_64-unknown-linux-gnu/ LIBRARY_PATH=/afs/mpa/data/martin/ugcc/lib/gcc/x86_64-unknown-linux-gnu/4.6.0/:/afs/mpa/data/martin/ugcc/lib/gcc/x86_64-unknown-linux-gnu/4.6.0/../../../../lib64/:/lib/../lib64/:/usr/lib/../lib64/:/afs/mpa/data/martin/ugcc/lib/gcc/x86_64-unknown-linux-gnu/4.6.0/../../../:/lib/:/usr/lib/ COLLECT_GCC_OPTIONS='-O2' '-v' '-W' '-Wall' '-mtune=generic' '-march=x86-64' /afs/mpa/data/martin/ugcc/libexec/gcc/x86_64-unknown-linux-gnu/4.6.0/collect2 --eh-frame-hdr -m elf_x86_64 -dynamic-linker /lib64/ld-linux-x86-64.so.2 /usr/lib/../lib64/crt1.o /usr/lib/../lib64/crti.o /afs/mpa/data/martin/ugcc/lib/gcc/x86_64-unknown-linux-gnu/4.6.0/crtbegin.o -L/afs/mpa/data/martin/ugcc/lib/gcc/x86_64-unknown-linux-gnu/4.6.0 -L/afs/mpa/data/martin/ugcc/lib/gcc/x86_64-unknown-linux-gnu/4.6.0/../../../../lib64 -L/lib/../lib64 -L/usr/lib/../lib64 -L/afs/mpa/data/martin/ugcc/lib/gcc/x86_64-unknown-linux-gnu/4.6.0/../../.. /tmp/ccmGCvZC.o -lgcc --as-needed -lgcc_s --no-as-needed -lc -lgcc --as-needed -lgcc_s --no-as-needed /afs/mpa/data/martin/ugcc/lib/gcc/x86_64-unknown-linux-gnu/4.6.0/crtend.o /usr/lib/../lib64/crtn.o ~/ujedi/splotchnew>time ./a.out 1.580u 0.000s 0:01.58 100.0% 0+0k 0+0io 0pf+0w This was run on a Core2Duo. gcc 4.5 also exhibits this regression. Another observation: if I change ARRSZ in the testcase to 20 instead of 1024, all executables run even more slowly (by about another factor of five); I have absolutely no explanation for this :-/ -- Summary: [4.5/4.6] Massive performance regression in SSE code Product: gcc Version: 4.6.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: regression AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: martin at mpa-garching dot mpg dot de GCC build triplet: x86_64-unknown-linux-gnu GCC host triplet: x86_64-unknown-linux-gnu GCC target triplet: x86_64-unknown-linux-gnu http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44423