http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50819
Bug #: 50819 Summary: missed SLP vectorization Classification: Unclassified Product: gcc Version: 4.7.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization AssignedTo: unassig...@gcc.gnu.org ReportedBy: vincenzo.innoce...@cern.ch in this example sum2 vectorize sum1 does not. As you may suspect all current code looks more like sum1… typedef float Value; struct LorentzVector { LorentzVector(Value x=0, Value y=0, Value z=0, Value t=0) : theX(x),theY(y),theZ(z),theT(t){} LorentzVector & operator+=(const LorentzVector & a) { theX += a.theX; theY += a.theY; theZ += a.theZ; theT += a.theT; return *this; } Value theX; Value theY; Value theZ; Value theT; } __attribute__ ((aligned(16))); inline LorentzVector operator+(LorentzVector const & a, LorentzVector const & b) { return LorentzVector(a.theX+b.theX,a.theY+b.theY,a.theZ+b.theZ,a.theT+b.theT); } inline LorentzVector operator*(LorentzVector const & a, Value s) { return LorentzVector(a.theX*s,a.theY*s,a.theZ*s,a.theT*s); } inline LorentzVector operator*(Value s, LorentzVector const & a) { return a*s; } void sum1(LorentzVector & res, Value s, LorentzVector const & v1, LorentzVector const & v2) { res += s*(v1+v2); } void sum2(LorentzVector & res, Value s, LorentzVector const & v1, LorentzVector const & v2) { res = res + s*(v1+v2); } c++ -O3 -c FourVec.cc Vincenzos-MacBook-Pro:ctest innocent$ otool -V -t -v -X FourVec.o | c++filt sum1(LorentzVector&, float, LorentzVector const&, LorentzVector const&): movss 0x0c(%rsi),%xmm1 movss 0x08(%rsi),%xmm2 movss 0x04(%rsi),%xmm3 movss (%rsi),%xmm4 addss 0x0c(%rdx),%xmm1 addss 0x08(%rdx),%xmm2 addss 0x04(%rdx),%xmm3 addss (%rdx),%xmm4 mulss %xmm0,%xmm1 mulss %xmm0,%xmm2 mulss %xmm0,%xmm3 mulss %xmm0,%xmm4 addss 0x0c(%rdi),%xmm1 addss 0x08(%rdi),%xmm2 addss 0x04(%rdi),%xmm3 addss (%rdi),%xmm4 movss %xmm1,0x0c(%rdi) movss %xmm2,0x08(%rdi) movss %xmm3,0x04(%rdi) movss %xmm4,(%rdi) ret nopl (%rax) sum2(LorentzVector&, float, LorentzVector const&, LorentzVector const&): movaps (%rsi),%xmm1 shufps $0x0,%xmm0,%xmm0 addps (%rdx),%xmm1 mulps %xmm1,%xmm0 addps (%rdi),%xmm0 movaps %xmm0,(%rdi) ret