I noticed a performance regression on the following code:

$ cat a.c
#include <stdint.h>
#include <stdio.h>

void
add256 (uint64_t x[4], const uint64_t y[4])
{
 unsigned char carry;
 x[0] += y[0];
 carry = (x[0] < y[0]);
 x[1] += y[1]+carry;
 carry = carry ? (x[1] <= y[1]) : (x[1] < y[1]);
 x[2] += y[2]+carry;
 carry = carry ? (x[2] <= y[2]) : (x[2] < y[2]);
 x[3] += y[3]+carry;
}

int
main (void)
{
 int i;
 uint64_t x[4], y[4];

 x[0] = 0;  x[1] = 0;  x[2] = 0;  x[3] = 0;
 y[0] = 0x0123456789abcdefULL;
 y[1] = 0xfedcba9876543210ULL;
 y[2] = 0xdeadbeeff001baadULL;
 y[3] = 0x001001001001ffffULL;
 for ( i=0 ; i<100000000 ; i++ )
   add256 (x, y);
 printf ("%016llx%016llx%016llx%016llx\n",
         (unsigned long long)x[3],
         (unsigned long long)x[2],
         (unsigned long long)x[1],
         (unsigned long long)x[0]);
 return 0;
}
$ gcc -march=pentium4 -O3 a.c && time ./a.out
064069fbc13963b920219c3e939225e38e38e38e3956d81c71c71c71c0ba0f00
./a.out  1.81s user 0.00s system 99% cpu 1.818 total
$ gcc-4.3 -march=pentium4 -O3 a.c && time ./a.out
064069fbc13963b920219c3e939225e38e38e38e3956d81c71c71c71c0ba0f00
./a.out  2.40s user 0.01s system 87% cpu 2.746 total

where gcc is gcc version 4.1.1 20070105 (Red Hat 4.1.1-51) and gcc-4.3
is gcc version 4.3.0 20070209 (experimental). I don't have a 4.1 or
4.2 compiler at hand, so I don't know if it's a 4.2 or 4.3 regression,
or even if there's a special patch in redhat 4.1 that makes it
lightning fast. But in any case, I wondered if it was known, and if it
was worth opening a PR.

Thanks,
FX

Reply via email to