The code below runs significantly slower when compiled in 64 bit with 3.4.3 than it does in 3.3.4, and both are significantly slower than a 32 bit compile.
Can anyone tell what's going on: 1) between 32 and 64 bits 2) between 3.3.4 and 3.4.3 Thanks. amd64 3200, 1024k cache with gcc 3.4.3 -O3 -march=k8 -m32 (runtime: 0.62) -O3 -march=k8 -m64 (runtime: 3.01) with gcc 3.3.4 -O3 -march=k8 -m32 (runtime: 0.65) -O3 -march=k8 -m64 (runtime: 2.06) ------------------------------------------------------------ // run time is anywhere from 33 to 50 % longer when compiled with gcc 3.4.3 compared to 3.3.4 // compiled with g++ -O3 -Wall -march=k8 (same performance lag observed with -O2) // // Objects are created in a heirarchy of classes. When referenced, // it seems that the pointer lookups // must cause more cache misses in gcc 3.4.3 binaries. #include <stdio.h> #include <vector> class mytype_A { public: int id; mytype_A():id(0) {} }; class mytype_B { public: mytype_A* A; mytype_B(mytype_A* p):A(p) {} }; class mytype_C { public: mytype_B* B; mytype_C(mytype_B* p):B(p) {} }; class mytype_D { public: // mytype_C* C[2]; // less performance difference if we use simple arrays std::vector<mytype_C*> C; int junk[3]; // affects performance (must cause cache misses) public: mytype_D(mytype_A* a0, mytype_A* a1) { // C[0] = new mytype_C(new mytype_B(a0)); // C[1] = new mytype_C(new mytype_B(a0)); C.push_back(new mytype_C(new mytype_B(a0))); C.push_back(new mytype_C(new mytype_B(a0))); } }; int main() { int k = 5000; // run-time not linear in k mytype_A* A[k]; mytype_D* D[k]; for (int i=0;i<=k;i++) A[i] = new mytype_A(); for (int i=0;i<k;i++) D[i] = new mytype_D(A[i],A[k-i]); // intentionally make some pointers farther apart clock_t before = clock(); int k0 = 0; for (int i=0;i<k;i++) { k0 = 0; for (int j=0;j<k;j++) { // run through list of D's, and reference pointers mytype_D* d = D[j]; if (d->C[0]->B->A->id) k0++; if (d->C[1]->B->A->id) k0++; } } printf("%d\n",k0); // don't allow compiler to optimize away k0 printf("time: %f\n",(double)(clock()-before)/CLOCKS_PER_SEC); return 0; }