On Mon, Jul 20, 2009 at 8:37 PM, Tom Lane<t...@sss.pgh.pa.us> wrote: > Anyone want to see if they can beat that? Some testing on other > architectures would help too.
Hm, I took the three implementations so far (normal, unrolled, and clz) as well as the two from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogObvious and got some very strange results: normal: 1.494s clz: 2.214s unrolled: 2.966s lookup table: 0.001s float hack: 11.930s I can't see why the unrolled implementation is slower than the non-unrolled so I'm suspecting something's wrong with my #ifdefs but I don't see it. I do think the code I grabbed from the stanford page might be off-by-one for our purposes but I haven't looked closely at that. I also wonder if this microbenchmark is actually ok because it's testing the same value over and over so any branch-prediction will shine unrealistically well. -- greg http://mit.edu/~gsstark/resume.pdf
/* * usage: time ./testit N * N is a repeat count, set it large enough to get repeatable timings */ #include <stdio.h> #include <stdlib.h> #define ALLOC_MINBITS 3 /* smallest chunk size is 8 bytes */ #define ALLOCSET_NUM_FREELISTS 11 /* number of calls to AllocSetFreeIndex with each result category */ static const int numoccurs[ALLOCSET_NUM_FREELISTS] = { #ifdef USE_64BIT_COUNTS 2139534, 5994692, 5711479, 3289034, 4550931, 2573389, 487566, 588470, 155148, 52750, 202597 #else 5190113, 5663980, 3573261, 4476398, 4246178, 1100634, 386501, 601654, 44884, 52372, 202801 #endif }; extern int AllocSetFreeIndex(size_t size); int main(int argc, char **argv) { int repeat = atoi(argv[1]); while (repeat-- > 0) { int k; for (k = 0; k < ALLOCSET_NUM_FREELISTS; k++) { int n = numoccurs[k]; size_t siz = (1 << (ALLOC_MINBITS + k)); while (n-- > 0) { AllocSetFreeIndex(siz); } } } return 0; } #include <stdio.h> #include <stdlib.h> #define ALLOC_MINBITS 3 /* smallest chunk size is 8 bytes */ #define ALLOCSET_NUM_FREELISTS 11 #define BITS_PER_BYTE 8 /* ---------- * AllocSetFreeIndex - * * Depending on the size of an allocation compute which freechunk * list of the alloc set it belongs to. Caller must have verified * that size <= ALLOC_CHUNK_LIMIT. * ---------- */ int AllocSetFreeIndex(size_t size) { int idx = 0; if (size > (1 << ALLOC_MINBITS)) { size = (size - 1) >> ALLOC_MINBITS; #if HAVE_BUILTIN_CLZ idx = sizeof(unsigned int) * BITS_PER_BYTE - __builtin_clz((unsigned int)size); #elif TEST_FLOAT_HACK union { unsigned int u[2]; double d; } t; // temp t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] = 0x43300000; t.u[__FLOAT_WORD_ORDER!=LITTLE_ENDIAN] = size; t.d -= 4503599627370496.0; idx = (t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] >> 20) - 0x3FF; #elif TEST_LOOKUP_TABLE #define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n static const char LogTable256[256] = { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, LT(4), LT(5), LT(5), LT(6), LT(6), LT(6), LT(6), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7) }; unsigned int t, tt; // temporaries if (tt = size >> 16) { idx = (t = tt >> 8) ? 24 + LogTable256[t] : 16 + LogTable256[tt]; } else { idx = (t = size >> 8) ? 8 + LogTable256[t] : LogTable256[size]; } #elif TEST_UNROLL idx++; size >>= 1; if (size != 0) { idx++; size >>= 1; if (size != 0) { idx++; size >>= 1; if (size != 0) { idx++; size >>= 1; while (size != 0) { idx++; size >>= 1; } } } } #else do { idx++; size >>= 1; } while (size != 0); #endif } return idx; }
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers