------- Comment #42 from rguenth at gcc dot gnu dot org 2007-05-04 11:45 ------- I put it on the tester. A testcase that does regress (simplified from tramp3d):
template <class T, int D> class Vector { public: Vector() { for (int i = 0; i < D; ++i) new (&x_m[i]) T(); } T& operator[](int i) { return x_m[i]; } private: T x_m[D]; }; void foo(Vector<double, 3> *m) { Vector<double, 3> v; v[0] = 1.0; v[1] = 2.0; v[3] = 3.0; *m = v; } where we can no longer optimize the redundant stores to v: <bb 2>: v.x_m[0] = 0.0; __asm__ __volatile__("":"=m" v.x_m[0]:"m" v.x_m[0]); v.x_m[1] = 0.0; __asm__ __volatile__("":"=m" v.x_m[1]:"m" v.x_m[1]); v.x_m[2] = 0.0; __asm__ __volatile__("":"=m" v.x_m[2]:"m" v.x_m[2]); v.x_m[0] = 1.0e+0; v.x_m[1] = 2.0e+0; v.x_m[3] = 3.0e+0; *m = v; return; vs. <bb 2>: v.x_m[2] = 0.0; v.x_m[0] = 1.0e+0; v.x_m[1] = 2.0e+0; v.x_m[3] = 3.0e+0; *m = v; return; and assembly: _Z3fooP6VectorIdLi3EE: .LFB17: xorl %r9d, %r9d movq %r9, -40(%rsp) movq %r9, -32(%rsp) movq %r9, -24(%rsp) movabsq $4607182418800017408, %r8 movabsq $4611686018427387904, %rsi movq -24(%rsp), %rax movq %r8, -40(%rsp) movq %rsi, -32(%rsp) movq -40(%rsp), %rcx movq -32(%rsp), %rdx movq %rax, 16(%rdi) movq %rcx, (%rdi) movq %rdx, 8(%rdi) ret vs. _Z3fooP6VectorIdLi3EE: .LFB17: movabsq $4607182418800017408, %r8 movabsq $4611686018427387904, %rsi movq $0, -24(%rsp) movq %r8, -40(%rsp) movq %rsi, -32(%rsp) movq -40(%rsp), %rcx movq -32(%rsp), %rdx movq -24(%rsp), %rax movq %rcx, (%rdi) movq %rdx, 8(%rdi) movq %rax, 16(%rdi) ret this happens in hot tramp3d loops and is a quite common idiom for initializing storage. To fix this we need to avoid creating the asm if the type of the original storage is the same as the other. -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29286