The code below runs significantly slower when compiled in 64 bit with 3.4.3 than
it does in 3.3.4, and both are significantly slower than a 32 bit compile.

Can anyone tell what's going on:
  1) between 32 and 64 bits
  2) between 3.3.4 and 3.4.3

Thanks.

amd64 3200, 1024k cache

with gcc 3.4.3
-O3 -march=k8 -m32 (runtime: 0.62)
-O3 -march=k8 -m64 (runtime: 3.01)

with gcc 3.3.4
-O3 -march=k8 -m32 (runtime: 0.65)
-O3 -march=k8 -m64 (runtime: 2.06) 
------------------------------------------------------------

// run time is anywhere from 33 to 50 % longer when compiled with gcc 3.4.3 
compared to 3.3.4
// compiled with g++ -O3 -Wall -march=k8     (same performance lag observed 
with -O2)
//
// Objects are created in a heirarchy of classes.  When referenced, 
// it seems that the pointer lookups
//    must cause more cache misses in gcc 3.4.3 binaries. 

#include <stdio.h>
#include <vector>

class mytype_A {
 public:
  int id;
  mytype_A():id(0) {}
};

class mytype_B {
 public:
  mytype_A* A;
  mytype_B(mytype_A* p):A(p) {}
};

class mytype_C {
 public:
  mytype_B* B;
  mytype_C(mytype_B* p):B(p) {}
};


class mytype_D {   
 public:
  // mytype_C* C[2];          // less performance difference if we use simple 
arrays
  std::vector<mytype_C*> C;   
  int junk[3];                // affects performance (must cause cache misses)

 public:
  mytype_D(mytype_A* a0, mytype_A* a1) {
    //    C[0] = new mytype_C(new mytype_B(a0));
    //    C[1] = new mytype_C(new mytype_B(a0));
    C.push_back(new mytype_C(new mytype_B(a0)));
    C.push_back(new mytype_C(new mytype_B(a0)));
  }
};



int main() {
  int k = 5000;                    // run-time not linear in k
  mytype_A* A[k];
  mytype_D* D[k];
  for (int i=0;i<=k;i++)
    A[i] = new mytype_A();
  for (int i=0;i<k;i++)
    D[i] = new mytype_D(A[i],A[k-i]);    // intentionally make some pointers 
farther apart

  clock_t before = clock();

  int k0 = 0;
  for (int i=0;i<k;i++) {
    k0 = 0;
    for (int j=0;j<k;j++) {         // run through list of D's, and reference 
pointers
      mytype_D* d = D[j];
      if (d->C[0]->B->A->id)         k0++;
      if (d->C[1]->B->A->id)         k0++;
    }
  }
  printf("%d\n",k0);                // don't allow compiler to optimize away k0

  printf("time: %f\n",(double)(clock()-before)/CLOCKS_PER_SEC);

  return 0;
} 

Reply via email to