http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47658
Summary: -Os generates bigger code than -O2/3 for many small inline functions (objects) Product: gcc Version: 4.5.2 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c++ AssignedTo: unassig...@gcc.gnu.org ReportedBy: kice...@gmail.com I've noticed that, when I use a class which is a wrapper for a pointer to volatile variable too often, then the -Os generates code bigger than -O2/-O3. here's a big example: class A { volatile int * const ptr; public: inline A(int * a):ptr(a) {} inline int read() const { return *ptr; } inline void write(int v) const { *ptr=v; } }; class B { const A a1,a2,a3; public: inline B(int *a):a1(a),a2(a-2),a3(a-1) {} inline int operator()() const { return a2.read(); } inline const B &operator=(int v) const { a1.write(v); return *this; } inline void foo(int v) const { a3.write(v); } }; template<int *addr, int *addr2> class C { public: C() { B foo(addr),bar(addr2); foo=56; foo.foo(67); bar=58; bar.foo(345); }; void foo1() { B foo(addr),bar(addr2); foo=bar(); foo.foo(12); bar.foo(foo()); foo2(2); } void foo2(int v) { B foo(addr),bar(addr2); bar=foo(); bar.foo(34*v); foo.foo(bar()); } void foo3() { B foo(addr),bar(addr2); for (int i=0;i<128; i++) { foo.foo(i); foo=bar(); bar.foo(i+1); } } }; template<int *addr, int *addr2> class D { public: D() { B foo(addr),bar(addr2); foo=56; foo.foo(67); bar=58; bar.foo(345); }; void foo1() { B foo(addr),bar(addr2); foo=bar(); foo.foo(12); bar.foo(foo()); foo2(2); } void foo2(int v) { B foo(addr),bar(addr2); bar=foo(); bar.foo(34*v); foo.foo(bar()); } void foo3() { B foo(addr),bar(addr2); for (int i=0;i<128; i++) { foo.foo(i); foo=bar(); bar.foo(i+1); } } }; template<int *addr, int *addr2> class E { public: E() { B foo(addr),bar(addr2); foo=56; foo.foo(67); bar=58; bar.foo(345); }; void foo1() { B foo(addr),bar(addr2); foo=bar(); foo.foo(12); bar.foo(foo()); foo2(2); } void foo2(int v) { B foo(addr),bar(addr2); bar=foo(); bar.foo(34*v); foo.foo(bar()); } void foo3() { B foo(addr),bar(addr2); for (int i=0;i<128; i++) { foo.foo(i); foo=bar(); bar.foo(i+1); } } }; int a,b; int a2,b2; int a3,b3; void f() __attribute__((used)); void f() { C<&a,&b> c; D<&a2,&b2> d; E<&a3,&b3> e; c.foo3(); c.foo1(); d.foo3(); d.foo1(); e.foo3(); e.foo1(); } int main() { C<&a,&b> c; D<&a2,&b2> d; E<&a3,&b3> e; c.foo1(); c.foo3(); d.foo1(); d.foo3(); e.foo1(); e.foo3(); return 1; } class A is a wrapper, class B expands it. Then classes C and D are using class B. While the program is small (just comment function void f()), all calls to class' A and class' B functions are inlined and code is small (as all those functions are simple movs). But then the code is big (like above) g++ expands all functions to standalone ones (when -Os is used) and code is getting bigger. -O2 keeps all functions to be inlined. So the problem is that g++ thinks that many inlines of same functions are bigger than calling non-inlined versions of them (which is wrong in this case). [michal@Kicer michal]$ g++ -v Using built-in specs. COLLECT_GCC=g++ COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-mandriva-linux-gnu/4.5.2/lto-wrapper Target: x86_64-mandriva-linux-gnu Configured with: ../gcc-4.5.2/configure --host=x86_64-mandriva-linux-gnu --build=x86_64-mandriva-linux-gnu --program-prefix= --prefix=/usr --exec-prefix=/usr --bindir=/usr/bin --sbindir=/usr/sbin --sysconfdir=/etc --datadir=/usr/share --includedir=/usr/include --libdir=/usr/lib64 --libexecdir=/usr/libexec --localstatedir=/usr/var --sharedstatedir=/usr/com --mandir=/usr/share/man --infodir=/usr/share/info --enable-checking=release --enable-languages=c,c++,objc,obj-c++,fortran --with-system-zlib --enable-threads=posix --enable-shared --enable-long-long --enable-__cxa_atexit --disable-libunwind-exceptions --enable-clocale=gnu --enable-gtk-cairo --disable-libjava-multilib --enable-ssp --disable-libssp --disable-werror --enable-lto --program-suffix=-4.5.2 CFLAGS= CXXFLAGS= Thread model: posix gcc version 4.5.2 (GCC) [michal@Kicer michal]$