On Tue, Jul 21, 2009 at 12:07 AM, Tom Lane<[email protected]> wrote:
> Greg Stark <[email protected]> writes:
>> I also wonder if this microbenchmark is actually ok because it's
>> testing the same value over and over so any branch-prediction will
>> shine unrealistically well.
>
> Yeah, that is a good point --- and it would benefit the unrolled loop
> more than the other versions. We should probably revise the test
> harness so it mixes the size requests a bit. I'm not sure of a suitably
> simple way to do that, though.
Well it was a bit of a pain but I filled an array with (1/1000 scaled
down) values and then permuted them. I also went ahead and set the
low-order bits to random values since the lookup table based algorithm
might be affected by it.
The results are a bit disappointing on my machine, only the CLZ and
lookup table come out significantly ahead:
$ ./testit 10
0: 620
1: 4949
2: 5378
3: 3560
4: 4426
5: 4218
6: 1098
7: 387
8: 599
9: 44
10: 52
clz 1.530s
lookup table 1.720s
float hack 4.424s
unrolled 5.280s
normal 5.369s
--
greg
http://mit.edu/~gsstark/resume.pdf
/*
* usage: time ./testit N
* N is a repeat count, set it large enough to get repeatable timings
*/
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#define ALLOC_MINBITS 3 /* smallest chunk size is 8 bytes */
#define ALLOCSET_NUM_FREELISTS 11
/* number of calls to AllocSetFreeIndex with each result category */
static const int numoccurs[ALLOCSET_NUM_FREELISTS] = {
#ifdef USE_64BIT_COUNTS
2139534,
5994692,
5711479,
3289034,
4550931,
2573389,
487566,
588470,
155148,
52750,
202597
#else
5190113,
5663980,
3573261,
4476398,
4246178,
1100634,
386501,
601654,
44884,
52372,
202801
#endif
};
extern int AllocSetFreeIndex_clz(size_t size);
extern int AllocSetFreeIndex_lt(size_t size);
extern int AllocSetFreeIndex_fh(size_t size);
extern int AllocSetFreeIndex_ur(size_t size);
extern int AllocSetFreeIndex(size_t size);
int
main(int argc, char **argv)
{
int repeat = atoi(argv[1]);
int k,l,m, min=1000, total=0;
size_t *arr;
struct timeval before, after;
double elapsed;
// for (k=0; k< ALLOCSET_NUM_FREELISTS; k++)
// if (numoccurs[k] < min)
// min = numoccurs[k];
for (k=0; k< ALLOCSET_NUM_FREELISTS; k++)
total += numoccurs[k]/min;
arr = malloc(sizeof(size_t)*total);
m = 0;
for (k=0; k < ALLOCSET_NUM_FREELISTS; k++)
for (l=0; l < numoccurs[k]/min; l++)
{
int p = 1 << (ALLOC_MINBITS + k);
p |= (p*2 - 1) & random();
arr[m++] = p;
}
for (m=0; m < total; m++)
{
int r = random()%total;
if (r != m)
{
int tmp = arr[r];
arr[r] = arr[m];
arr[m] = tmp;
}
}
repeat *= min;
static int occurs[ALLOCSET_NUM_FREELISTS];
for (m=0; m < total; m++)
{
int retval_clz = AllocSetFreeIndex_clz(arr[m]);
int retval_fh = AllocSetFreeIndex_fh(arr[m]);
int retval_lt = AllocSetFreeIndex_lt(arr[m]);
int retval_ur = AllocSetFreeIndex_ur(arr[m]);
int retval = AllocSetFreeIndex(arr[m]);
if (retval != retval_clz ||
retval != retval_fh ||
retval != retval_lt ||
retval != retval_ur)
printf("%08x: %4d %4d %4d %4d %4d\n", arr[m], retval, retval_clz, retval_fh, retval_lt, retval_ur);
else
occurs[retval]++;
}
for (k=0; k<ALLOCSET_NUM_FREELISTS; k++)
printf("%4d: %8d\n", k, occurs[k]);
gettimeofday(&before, NULL);
for(l=0; l<repeat; l++)
for (m=0; m < total; m++)
AllocSetFreeIndex_clz(arr[m]);
gettimeofday(&after, NULL);
elapsed = after.tv_sec - before.tv_sec;
elapsed += (after.tv_usec - before.tv_usec)/1000000.0;
printf("%20s %.3fs\n", "clz", elapsed);
gettimeofday(&before, NULL);
for(l=0; l<repeat; l++)
for (m=0; m < total; m++)
AllocSetFreeIndex_lt(arr[m]);
gettimeofday(&after, NULL);
elapsed = after.tv_sec - before.tv_sec;
elapsed += (after.tv_usec - before.tv_usec)/1000000.0;
printf("%20s %.3fs\n", "lookup table", elapsed);
gettimeofday(&before, NULL);
for(l=0; l<repeat; l++)
for (m=0; m < total; m++)
AllocSetFreeIndex_fh(arr[m]);
gettimeofday(&after, NULL);
elapsed = after.tv_sec - before.tv_sec;
elapsed += (after.tv_usec - before.tv_usec)/1000000.0;
printf("%20s %.3fs\n", "float hack", elapsed);
gettimeofday(&before, NULL);
for(l=0; l<repeat; l++)
for (m=0; m < total; m++)
AllocSetFreeIndex_ur(arr[m]);
gettimeofday(&after, NULL);
elapsed = after.tv_sec - before.tv_sec;
elapsed += (after.tv_usec - before.tv_usec)/1000000.0;
printf("%20s %.3fs\n", "unrolled", elapsed);
gettimeofday(&before, NULL);
for(l=0; l<repeat; l++)
for (m=0; m < total; m++)
AllocSetFreeIndex(arr[m]);
gettimeofday(&after, NULL);
elapsed = after.tv_sec - before.tv_sec;
elapsed += (after.tv_usec - before.tv_usec)/1000000.0;
printf("%20s %.3fs\n", "normal", elapsed);
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#define ALLOC_MINBITS 3 /* smallest chunk size is 8 bytes */
#define ALLOCSET_NUM_FREELISTS 11
#define BITS_PER_BYTE 8
/* ----------
* AllocSetFreeIndex -
*
* Depending on the size of an allocation compute which freechunk
* list of the alloc set it belongs to. Caller must have verified
* that size <= ALLOC_CHUNK_LIMIT.
* ----------
*/
int
AllocSetFreeIndex_clz(size_t size)
{
int idx = 0;
if (size > (1 << ALLOC_MINBITS))
{
size = (size - 1) >> ALLOC_MINBITS;
idx = sizeof(unsigned int) * BITS_PER_BYTE -
__builtin_clz((unsigned int)size);
}
return idx;
}
int
AllocSetFreeIndex_fh(size_t size)
{
int idx = 0;
if (size > (1 << ALLOC_MINBITS))
{
size = (size - 1) >> ALLOC_MINBITS;
union { unsigned int u[2]; double d; } t; // temp
t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] = 0x43300000;
t.u[__FLOAT_WORD_ORDER!=LITTLE_ENDIAN] = size;
t.d -= 4503599627370496.0;
idx = (t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] >> 20) - 0x3FE;
}
return idx;
}
int
AllocSetFreeIndex_lt(size_t size)
{
int idx = 0;
if (size > (1 << ALLOC_MINBITS))
{
size = (size - 1) >> ALLOC_MINBITS;
#define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
static const char LogTable256[256] =
{
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
LT(5), LT(6), LT(6), LT(7), LT(7), LT(7), LT(7),
LT(8), LT(8), LT(8), LT(8), LT(8), LT(8), LT(8), LT(8)
};
unsigned int t, tt; // temporaries
if (tt = size >> 16)
{
idx = (t = tt >> 8) ? 24 + LogTable256[t] : 16 + LogTable256[tt];
}
else
{
idx = (t = size >> 8) ? 8 + LogTable256[t] : LogTable256[size];
}
}
return idx;
}
int
AllocSetFreeIndex_ur(size_t size)
{
int idx = 0;
if (size > (1 << ALLOC_MINBITS))
{
size = (size - 1) >> ALLOC_MINBITS;
idx++;
size >>= 1;
if (size != 0)
{
idx++;
size >>= 1;
if (size != 0)
{
idx++;
size >>= 1;
if (size != 0)
{
idx++;
size >>= 1;
while (size != 0)
{
idx++;
size >>= 1;
}
}
}
}
}
return idx;
}
int
AllocSetFreeIndex(size_t size)
{
int idx = 0;
if (size > (1 << ALLOC_MINBITS))
{
size = (size - 1) >> ALLOC_MINBITS;
do
{
idx++;
size >>= 1;
}
while (size != 0);
}
return idx;
}
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers