On Tue, Jul 21, 2009 at 12:07 AM, Tom Lane<t...@sss.pgh.pa.us> wrote:
> Greg Stark <gsst...@mit.edu> writes:
>> I also wonder if this microbenchmark is actually ok because it's
>> testing the same value over and over so any branch-prediction will
>> shine unrealistically well.
>
> Yeah, that is a good point --- and it would benefit the unrolled loop
> more than the other versions.  We should probably revise the test
> harness so it mixes the size requests a bit.  I'm not sure of a suitably
> simple way to do that, though.

Well it was a bit of a pain but I filled an array with (1/1000 scaled
down) values and then permuted them. I also went ahead and set the
low-order bits to random values since the lookup table based algorithm
might be affected by it.

The results are a bit disappointing on my machine, only the CLZ and
lookup table come out significantly ahead:

$ ./testit 10
   0:      620
   1:     4949
   2:     5378
   3:     3560
   4:     4426
   5:     4218
   6:     1098
   7:      387
   8:      599
   9:       44
  10:       52
                 clz 1.530s
        lookup table 1.720s
          float hack 4.424s
            unrolled 5.280s
              normal 5.369s



-- 
greg
http://mit.edu/~gsstark/resume.pdf
/*
 * usage: time ./testit N
 *      N is a repeat count, set it large enough to get repeatable timings
 */

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

#define ALLOC_MINBITS           3       /* smallest chunk size is 8 bytes */
#define ALLOCSET_NUM_FREELISTS  11

/* number of calls to AllocSetFreeIndex with each result category */
static const int numoccurs[ALLOCSET_NUM_FREELISTS] = {
#ifdef USE_64BIT_COUNTS
	2139534,
	5994692,
	5711479,
	3289034,
	4550931,
	2573389,
	487566,
	588470,
	155148,
	52750,
	202597
#else
	5190113,
	5663980,
	3573261,
	4476398,
	4246178,
	1100634,
	386501,
	601654,
	44884,
	52372,
	202801
#endif
};

extern int AllocSetFreeIndex_clz(size_t size);
extern int AllocSetFreeIndex_lt(size_t size);
extern int AllocSetFreeIndex_fh(size_t size);
extern int AllocSetFreeIndex_ur(size_t size);
extern int AllocSetFreeIndex(size_t size);

int
main(int argc, char **argv)
{
	int repeat = atoi(argv[1]);
	int k,l,m, min=1000, total=0;
	size_t *arr;
	struct timeval before, after;
	double elapsed;

//	for (k=0; k< ALLOCSET_NUM_FREELISTS; k++) 
//		if (numoccurs[k] < min)
//			min = numoccurs[k];
	for (k=0; k< ALLOCSET_NUM_FREELISTS; k++) 
		total += numoccurs[k]/min;
	
	arr = malloc(sizeof(size_t)*total);

	m = 0;
	for (k=0; k < ALLOCSET_NUM_FREELISTS; k++) 
		for (l=0; l < numoccurs[k]/min; l++)
		{
			int p = 1 << (ALLOC_MINBITS + k);
			p |= (p*2 - 1) & random();
			arr[m++] = p;
		}

	for (m=0; m < total; m++)
	{
		int r = random()%total;
		if (r != m) 
		{
			int tmp = arr[r];
			arr[r] = arr[m];
			arr[m] = tmp;
		}
	}

	repeat *= min;

	static int occurs[ALLOCSET_NUM_FREELISTS];

	for (m=0; m < total; m++)
	{
		int retval_clz = AllocSetFreeIndex_clz(arr[m]);
		int retval_fh  = AllocSetFreeIndex_fh(arr[m]);
		int retval_lt  = AllocSetFreeIndex_lt(arr[m]);
		int retval_ur  = AllocSetFreeIndex_ur(arr[m]);
		int retval     = AllocSetFreeIndex(arr[m]);
		
		if (retval != retval_clz ||
			retval != retval_fh ||
			retval != retval_lt ||
			retval != retval_ur)
			printf("%08x: %4d %4d %4d %4d %4d\n", arr[m], retval, retval_clz, retval_fh, retval_lt, retval_ur);
		else
			occurs[retval]++;
	}
	for (k=0; k<ALLOCSET_NUM_FREELISTS; k++)
		printf("%4d: %8d\n", k, occurs[k]);
	
	gettimeofday(&before, NULL);
	for(l=0; l<repeat; l++)
		for (m=0; m < total; m++)
			AllocSetFreeIndex_clz(arr[m]);
	gettimeofday(&after, NULL);
	elapsed = after.tv_sec - before.tv_sec;
	elapsed += (after.tv_usec - before.tv_usec)/1000000.0;
	printf("%20s %.3fs\n", "clz", elapsed);

	gettimeofday(&before, NULL);
	for(l=0; l<repeat; l++)
		for (m=0; m < total; m++)
			AllocSetFreeIndex_lt(arr[m]);
	gettimeofday(&after, NULL);
	elapsed = after.tv_sec - before.tv_sec;
	elapsed += (after.tv_usec - before.tv_usec)/1000000.0;
	printf("%20s %.3fs\n", "lookup table", elapsed);

	gettimeofday(&before, NULL);
	for(l=0; l<repeat; l++)
		for (m=0; m < total; m++)
			AllocSetFreeIndex_fh(arr[m]);
	gettimeofday(&after, NULL);
	elapsed = after.tv_sec - before.tv_sec;
	elapsed += (after.tv_usec - before.tv_usec)/1000000.0;
	printf("%20s %.3fs\n", "float hack", elapsed);

	gettimeofday(&before, NULL);
	for(l=0; l<repeat; l++)
		for (m=0; m < total; m++)
			AllocSetFreeIndex_ur(arr[m]);
	gettimeofday(&after, NULL);
	elapsed = after.tv_sec - before.tv_sec;
	elapsed += (after.tv_usec - before.tv_usec)/1000000.0;
	printf("%20s %.3fs\n", "unrolled", elapsed);

	gettimeofday(&before, NULL);
	for(l=0; l<repeat; l++)
		for (m=0; m < total; m++)
			AllocSetFreeIndex(arr[m]);
	gettimeofday(&after, NULL);
	elapsed = after.tv_sec - before.tv_sec;
	elapsed += (after.tv_usec - before.tv_usec)/1000000.0;
	printf("%20s %.3fs\n", "normal", elapsed);

	return 0;
}

#include <stdio.h>
#include <stdlib.h>

#define ALLOC_MINBITS           3       /* smallest chunk size is 8 bytes */
#define ALLOCSET_NUM_FREELISTS  11
#define BITS_PER_BYTE 8

/* ----------
 * AllocSetFreeIndex -
 *
 *              Depending on the size of an allocation compute which freechunk
 *              list of the alloc set it belongs to.  Caller must have verified
 *              that size <= ALLOC_CHUNK_LIMIT.
 * ----------
 */
int
AllocSetFreeIndex_clz(size_t size)
{
	int                     idx = 0;

	if (size > (1 << ALLOC_MINBITS))
	{
		size = (size - 1) >> ALLOC_MINBITS;

		idx = sizeof(unsigned int) * BITS_PER_BYTE -
			__builtin_clz((unsigned int)size);

	}

	return idx;
}

int
AllocSetFreeIndex_fh(size_t size)
{
	int                     idx = 0;

	if (size > (1 << ALLOC_MINBITS))
	{
		size = (size - 1) >> ALLOC_MINBITS;


		union { unsigned int u[2]; double d; } t; // temp

		t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] = 0x43300000;
		t.u[__FLOAT_WORD_ORDER!=LITTLE_ENDIAN] = size;
		t.d -= 4503599627370496.0;
		idx = (t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] >> 20) - 0x3FE;
	       
	}

	return idx;
}

int
AllocSetFreeIndex_lt(size_t size)
{
	int                     idx = 0;

	if (size > (1 << ALLOC_MINBITS))
	{
		size = (size - 1) >> ALLOC_MINBITS;


#define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n

		static const char LogTable256[256] = 
			{
				0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
				LT(5), LT(6), LT(6), LT(7), LT(7), LT(7), LT(7),
				LT(8), LT(8), LT(8), LT(8), LT(8), LT(8), LT(8), LT(8)
			};

		unsigned int t, tt; // temporaries

		if (tt = size >> 16)
		{
			idx = (t = tt >> 8) ? 24 + LogTable256[t] : 16 + LogTable256[tt];
		}
		else 
		{
			idx = (t = size >> 8) ? 8 + LogTable256[t] : LogTable256[size];
		}

	}

	return idx;
}

int
AllocSetFreeIndex_ur(size_t size)
{
	int                     idx = 0;

	if (size > (1 << ALLOC_MINBITS))
	{
		size = (size - 1) >> ALLOC_MINBITS;


		idx++;
		size >>= 1;
		if (size != 0)
		{
			idx++;
			size >>= 1;
			if (size != 0)
			{
				idx++;
				size >>= 1;
				if (size != 0)
				{
					idx++;
					size >>= 1;
					while (size != 0)
					{
						idx++;
						size >>= 1;
					}
				}
			}
		}
	}

	return idx;
}

int
AllocSetFreeIndex(size_t size)
{
	int                     idx = 0;

	if (size > (1 << ALLOC_MINBITS))
	{
		size = (size - 1) >> ALLOC_MINBITS;

		do
		{
			idx++;
			size >>= 1;
		}
		while (size != 0);
	}

	return idx;
}
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to