http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50819

             Bug #: 50819
           Summary: missed SLP vectorization
    Classification: Unclassified
           Product: gcc
           Version: 4.7.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassig...@gcc.gnu.org
        ReportedBy: vincenzo.innoce...@cern.ch


in this example sum2 vectorize sum1 does not.
As you may suspect all current code looks more like sum1…

typedef float Value;

struct LorentzVector
{

  LorentzVector(Value x=0, Value  y=0, Value  z=0, Value  t=0) :
theX(x),theY(y),theZ(z),theT(t){} 
  LorentzVector & operator+=(const LorentzVector & a) {
    theX += a.theX;
    theY += a.theY;
    theZ += a.theZ;
    theT += a.theT;
    return *this;
  }

  Value theX;
  Value theY;
  Value theZ;
  Value theT;
}  __attribute__ ((aligned(16)));

inline LorentzVector
operator+(LorentzVector const & a, LorentzVector const & b) {
  return
LorentzVector(a.theX+b.theX,a.theY+b.theY,a.theZ+b.theZ,a.theT+b.theT);
}

inline LorentzVector
operator*(LorentzVector const & a, Value s) {
    return LorentzVector(a.theX*s,a.theY*s,a.theZ*s,a.theT*s);
}

inline LorentzVector
operator*(Value s, LorentzVector const & a) {
  return a*s;
}


void sum1(LorentzVector & res, Value s, LorentzVector const & v1, LorentzVector
const & v2) {
  res += s*(v1+v2);
}

void sum2(LorentzVector & res, Value s, LorentzVector const & v1, LorentzVector
const & v2) {
  res = res + s*(v1+v2);
}


c++ -O3 -c FourVec.cc
Vincenzos-MacBook-Pro:ctest innocent$ otool -V -t -v -X  FourVec.o | c++filt
sum1(LorentzVector&, float, LorentzVector const&, LorentzVector const&):
    movss    0x0c(%rsi),%xmm1
    movss    0x08(%rsi),%xmm2
    movss    0x04(%rsi),%xmm3
    movss    (%rsi),%xmm4
    addss    0x0c(%rdx),%xmm1
    addss    0x08(%rdx),%xmm2
    addss    0x04(%rdx),%xmm3
    addss    (%rdx),%xmm4
    mulss    %xmm0,%xmm1
    mulss    %xmm0,%xmm2
    mulss    %xmm0,%xmm3
    mulss    %xmm0,%xmm4
    addss    0x0c(%rdi),%xmm1
    addss    0x08(%rdi),%xmm2
    addss    0x04(%rdi),%xmm3
    addss    (%rdi),%xmm4
    movss    %xmm1,0x0c(%rdi)
    movss    %xmm2,0x08(%rdi)
    movss    %xmm3,0x04(%rdi)
    movss    %xmm4,(%rdi)
    ret
    nopl    (%rax)
sum2(LorentzVector&, float, LorentzVector const&, LorentzVector const&):
    movaps    (%rsi),%xmm1
    shufps    $0x0,%xmm0,%xmm0
    addps    (%rdx),%xmm1
    mulps    %xmm1,%xmm0
    addps    (%rdi),%xmm0
    movaps    %xmm0,(%rdi)
    ret

Reply via email to