http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58902

            Bug ID: 58902
           Summary: small matrix multiplication non vectorized
           Product: gcc
           Version: 4.9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: vincenzo.innocente at cern dot ch

in the following example
matmul and matmul2 do not vectorize
the manual unroll does
c++ -std=c++11 -Ofast -S m3x10.cc -march=corei7-avx -fopt-info-vec-all
gcc version 4.9.0 20131011 (experimental) [trunk revision 203426] (GCC) 

cat m3x10.cc
const int nrow=3;
 alignas(32) double tmp[nrow][10];
 alignas(32) double param[nrow];
 alignas(32) double frame[10];

void matmul() {
    for (int j=0; j<nrow; ++j)
    for (int i=0; i<10; ++i)
        param[j] += tmp[j][i]*frame[i];
}

void matmul2() {
    for (int j=0; j<nrow; ++j) {
      double s=0;
      for (int i=0; i<10; ++i)
        s += tmp[j][i]*frame[i];
      param[j] =s;
    }
}


void matmul3() {
      for (int i=0; i<10; ++i) {
        param[0] += tmp[0][i]*frame[i];
        param[1] += tmp[1][i]*frame[i];
        param[2] += tmp[2][i]*frame[i];
    }
}



double vmul0() {
  double s=0;
    for (int i=0; i<10; ++i)
      s += tmp[0][i]*frame[i];
  return s;
}

double vmul1() {
  double s=0;
    for (int i=0; i<10; ++i)
      s += tmp[1][i]*frame[i];
  return s;
}

Reply via email to