On Tue, Jul 21, 2009 at 12:07 AM, Tom Lane<t...@sss.pgh.pa.us> wrote: > Greg Stark <gsst...@mit.edu> writes: >> I also wonder if this microbenchmark is actually ok because it's >> testing the same value over and over so any branch-prediction will >> shine unrealistically well. > > Yeah, that is a good point --- and it would benefit the unrolled loop > more than the other versions. We should probably revise the test > harness so it mixes the size requests a bit. I'm not sure of a suitably > simple way to do that, though.
Well it was a bit of a pain but I filled an array with (1/1000 scaled down) values and then permuted them. I also went ahead and set the low-order bits to random values since the lookup table based algorithm might be affected by it. The results are a bit disappointing on my machine, only the CLZ and lookup table come out significantly ahead: $ ./testit 10 0: 620 1: 4949 2: 5378 3: 3560 4: 4426 5: 4218 6: 1098 7: 387 8: 599 9: 44 10: 52 clz 1.530s lookup table 1.720s float hack 4.424s unrolled 5.280s normal 5.369s -- greg http://mit.edu/~gsstark/resume.pdf
/* * usage: time ./testit N * N is a repeat count, set it large enough to get repeatable timings */ #include <stdio.h> #include <stdlib.h> #include <sys/time.h> #define ALLOC_MINBITS 3 /* smallest chunk size is 8 bytes */ #define ALLOCSET_NUM_FREELISTS 11 /* number of calls to AllocSetFreeIndex with each result category */ static const int numoccurs[ALLOCSET_NUM_FREELISTS] = { #ifdef USE_64BIT_COUNTS 2139534, 5994692, 5711479, 3289034, 4550931, 2573389, 487566, 588470, 155148, 52750, 202597 #else 5190113, 5663980, 3573261, 4476398, 4246178, 1100634, 386501, 601654, 44884, 52372, 202801 #endif }; extern int AllocSetFreeIndex_clz(size_t size); extern int AllocSetFreeIndex_lt(size_t size); extern int AllocSetFreeIndex_fh(size_t size); extern int AllocSetFreeIndex_ur(size_t size); extern int AllocSetFreeIndex(size_t size); int main(int argc, char **argv) { int repeat = atoi(argv[1]); int k,l,m, min=1000, total=0; size_t *arr; struct timeval before, after; double elapsed; // for (k=0; k< ALLOCSET_NUM_FREELISTS; k++) // if (numoccurs[k] < min) // min = numoccurs[k]; for (k=0; k< ALLOCSET_NUM_FREELISTS; k++) total += numoccurs[k]/min; arr = malloc(sizeof(size_t)*total); m = 0; for (k=0; k < ALLOCSET_NUM_FREELISTS; k++) for (l=0; l < numoccurs[k]/min; l++) { int p = 1 << (ALLOC_MINBITS + k); p |= (p*2 - 1) & random(); arr[m++] = p; } for (m=0; m < total; m++) { int r = random()%total; if (r != m) { int tmp = arr[r]; arr[r] = arr[m]; arr[m] = tmp; } } repeat *= min; static int occurs[ALLOCSET_NUM_FREELISTS]; for (m=0; m < total; m++) { int retval_clz = AllocSetFreeIndex_clz(arr[m]); int retval_fh = AllocSetFreeIndex_fh(arr[m]); int retval_lt = AllocSetFreeIndex_lt(arr[m]); int retval_ur = AllocSetFreeIndex_ur(arr[m]); int retval = AllocSetFreeIndex(arr[m]); if (retval != retval_clz || retval != retval_fh || retval != retval_lt || retval != retval_ur) printf("%08x: %4d %4d %4d %4d %4d\n", arr[m], retval, retval_clz, retval_fh, retval_lt, retval_ur); else occurs[retval]++; } for (k=0; k<ALLOCSET_NUM_FREELISTS; k++) printf("%4d: %8d\n", k, occurs[k]); gettimeofday(&before, NULL); for(l=0; l<repeat; l++) for (m=0; m < total; m++) AllocSetFreeIndex_clz(arr[m]); gettimeofday(&after, NULL); elapsed = after.tv_sec - before.tv_sec; elapsed += (after.tv_usec - before.tv_usec)/1000000.0; printf("%20s %.3fs\n", "clz", elapsed); gettimeofday(&before, NULL); for(l=0; l<repeat; l++) for (m=0; m < total; m++) AllocSetFreeIndex_lt(arr[m]); gettimeofday(&after, NULL); elapsed = after.tv_sec - before.tv_sec; elapsed += (after.tv_usec - before.tv_usec)/1000000.0; printf("%20s %.3fs\n", "lookup table", elapsed); gettimeofday(&before, NULL); for(l=0; l<repeat; l++) for (m=0; m < total; m++) AllocSetFreeIndex_fh(arr[m]); gettimeofday(&after, NULL); elapsed = after.tv_sec - before.tv_sec; elapsed += (after.tv_usec - before.tv_usec)/1000000.0; printf("%20s %.3fs\n", "float hack", elapsed); gettimeofday(&before, NULL); for(l=0; l<repeat; l++) for (m=0; m < total; m++) AllocSetFreeIndex_ur(arr[m]); gettimeofday(&after, NULL); elapsed = after.tv_sec - before.tv_sec; elapsed += (after.tv_usec - before.tv_usec)/1000000.0; printf("%20s %.3fs\n", "unrolled", elapsed); gettimeofday(&before, NULL); for(l=0; l<repeat; l++) for (m=0; m < total; m++) AllocSetFreeIndex(arr[m]); gettimeofday(&after, NULL); elapsed = after.tv_sec - before.tv_sec; elapsed += (after.tv_usec - before.tv_usec)/1000000.0; printf("%20s %.3fs\n", "normal", elapsed); return 0; }
#include <stdio.h> #include <stdlib.h> #define ALLOC_MINBITS 3 /* smallest chunk size is 8 bytes */ #define ALLOCSET_NUM_FREELISTS 11 #define BITS_PER_BYTE 8 /* ---------- * AllocSetFreeIndex - * * Depending on the size of an allocation compute which freechunk * list of the alloc set it belongs to. Caller must have verified * that size <= ALLOC_CHUNK_LIMIT. * ---------- */ int AllocSetFreeIndex_clz(size_t size) { int idx = 0; if (size > (1 << ALLOC_MINBITS)) { size = (size - 1) >> ALLOC_MINBITS; idx = sizeof(unsigned int) * BITS_PER_BYTE - __builtin_clz((unsigned int)size); } return idx; } int AllocSetFreeIndex_fh(size_t size) { int idx = 0; if (size > (1 << ALLOC_MINBITS)) { size = (size - 1) >> ALLOC_MINBITS; union { unsigned int u[2]; double d; } t; // temp t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] = 0x43300000; t.u[__FLOAT_WORD_ORDER!=LITTLE_ENDIAN] = size; t.d -= 4503599627370496.0; idx = (t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] >> 20) - 0x3FE; } return idx; } int AllocSetFreeIndex_lt(size_t size) { int idx = 0; if (size > (1 << ALLOC_MINBITS)) { size = (size - 1) >> ALLOC_MINBITS; #define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n static const char LogTable256[256] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, LT(5), LT(6), LT(6), LT(7), LT(7), LT(7), LT(7), LT(8), LT(8), LT(8), LT(8), LT(8), LT(8), LT(8), LT(8) }; unsigned int t, tt; // temporaries if (tt = size >> 16) { idx = (t = tt >> 8) ? 24 + LogTable256[t] : 16 + LogTable256[tt]; } else { idx = (t = size >> 8) ? 8 + LogTable256[t] : LogTable256[size]; } } return idx; } int AllocSetFreeIndex_ur(size_t size) { int idx = 0; if (size > (1 << ALLOC_MINBITS)) { size = (size - 1) >> ALLOC_MINBITS; idx++; size >>= 1; if (size != 0) { idx++; size >>= 1; if (size != 0) { idx++; size >>= 1; if (size != 0) { idx++; size >>= 1; while (size != 0) { idx++; size >>= 1; } } } } } return idx; } int AllocSetFreeIndex(size_t size) { int idx = 0; if (size > (1 << ALLOC_MINBITS)) { size = (size - 1) >> ALLOC_MINBITS; do { idx++; size >>= 1; } while (size != 0); } return idx; }
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers