https://bugs.llvm.org/show_bug.cgi?id=50026

I reported it to the llvm people. it is two slightly different quicksort
algorithms which perform radically differently. The one which you could
assume would take more time, performs MUCH better.

I made a custom quicksort algorithm which outperforms qsort by A LOT for
sorting an array of around 300 randomly created unsigned characters, which
is what I use it for.

if you read the report a guy said there's a 10% difference for sorting 3
million characters on freebsd, but there's about 40% performance difference
on OpenBSD. maybe it's also how the OpenBSD team modified clang to prevent
rop chain stuff or something? I'm using a westmere based intell server.

it's the same for clang 11.

What's the deal?

-Luke
/* 
clang sort_test2.c -O2 -o sort_test && ./sort_test
 */

/*
 * quicksort should be slightly faster than quicksort1 because it is
 * identical except quicksort1 does more work using global counters.
 * quicsort1 runs 41% faster. Using gcc, quicksort is instead slightly faster.
 */


/*
 * I'm using clang 10.0.1 on OpenBSD. these are my results:


quicksort
time = 0.004501771



quicksort1
time = 0.002655233
*/


#include <err.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/param.h>

size_t m;
size_t g;
size_t i;


/* 
 * quicksort sorts characters on a u_char array.
 * 'a' is the first character on the array.
 * 'b' is last character on the array
 */
static
void quicksort(u_char a[], u_char b[])
{
	u_char temp;
	
	if (b - a <= 1)
	{
		if (a == b)
			return;

		if (*a > *b)
		{
			temp = *a; *a = *b; *b = temp;
		}
		return;
	}
	u_char * j = a + 1;
	u_char * k = b;
	u_char u = *a;

	/* 
	 * This if() avoids repeated awkward special case branching 
	 * in the similar if() in the "while (++j < k)" loop
	 */
	if (*j > u)
	{
		do
		{
			if (*k < u)
			{
				temp = *j; *j = *k; *k = temp;
				goto skip0;
			}
		} while (j < --k);

		quicksort(j, b);
		return;

		skip0:
		if (j == --k)
		{
			*a = *j; *j = u;

			quicksort(j+1, b);
			return;
		}
	}


	while (++j < k)
	{
		if (*j > u)
		{
			do
			{
				if (*k < u)
				{
					temp = *j; *j = *k; *k = temp;
					goto skip;
				}
			} while (j < --k);

			quicksort(j, b);

			*a = *--j; *j = u;

			quicksort(a, j-1);
			return;

			skip:
			if (j == --k)
			{
				*a = *j; *j = u;

				quicksort(a, j-1);
				quicksort(j+1, b);
				return;
			}
		}
	}

	if (*j > u)
	{
		quicksort(j, b);

		*a = *--j; *j = u;

		quicksort(a, j-1);
	}
	else
	{
		*a = *j; *j = u;

		quicksort(a, j-1);

		if (j == b)
			return;

		quicksort(j+1, b);
	}
}

/* 
 * quicksort1 is identical to quicksort() but it alters some global counters
 * it should take slightly longer to run, but it runs in half the time
 */
static
void quicksort1(u_char a[], u_char b[], size_t level)
{
	++m;
	if (level > i)
		i = level;
		
	u_char temp;
	
	
	if (b - a <= 1)
	{
		++g;
		if (a == b)
			return;
		if (*a > *b)
		{
			temp = *a; *a = *b; *b = temp;
		}
		return;
	}
	u_char * j = a + 1;
	u_char * k = b;
	u_char u = *a;

	/* 
	 * This if() avoids repeated awkward special case branching 
	 * in the similar if() in the "while (++j < k)" loop
	 */
	if (*j > u)
	{
		do
		{
			if (*k < u)
			{
				temp = *j; *j = *k; *k = temp;
				goto skip0;
			}
		} while (j < --k);

		quicksort1(j, b, level + 1);
		return;

		skip0:
		if (j == --k)
		{
			*a = *j; *j = u;

			quicksort1(j+1, b, level + 1);
			return;
		}
	}


	while (++j < k)
	{
		if (*j > u)
		{
			do
			{
				if (*k < u)
				{
					temp = *j; *j = *k; *k = temp;
					goto skip;
				}
			} while (j < --k);

			quicksort1(j, b, level + 1);

			*a = *--j; *j = u;

			quicksort1(a, j-1, level + 1);
			return;

			skip:
			if (j == --k)
			{
				*a = *j; *j = u;

				quicksort1(a, j-1, level + 1);
				quicksort1(j+1, b, level + 1);
				return;
			}
		}
	}

	if (*j > u)
	{
		quicksort1(j, b, level + 1);

		*a = *--j; *j = u;

		quicksort1(a, j-1, level + 1);
	}
	else
	{
		*a = *j; *j = u;

		quicksort1(a, j-1, level + 1);

		if (j == b)
			return;

		quicksort1(j+1, b, level + 1);
	}
}


int main()
{

	long double cpu_time_used;
	struct timespec start, end;
	u_char *buffer, *buffer2;
	
	size_t y;
	size_t n = 30000;
	
	buffer = (u_char*)malloc(n);
	if (buffer == NULL) err(1, "malloc");
	buffer2 = (u_char*)malloc(n);
	if (buffer2 == NULL) err(1, "malloc");
	
	
	for (y = 0; y < n; ++y)
		buffer[y] = rand() % 256;

	// run it once to clear any wrappers
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);


	memcpy(buffer2, buffer, n);

	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	quicksort(buffer2, buffer2 + n - 1);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("\n\nquicksort\n");
	printf("time = %.9Lf\n\n", cpu_time_used);



	memcpy(buffer2, buffer, n);

	i = m = g = 0;
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	quicksort1(buffer2, buffer2 + n - 1, 1);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("\n\nquicksort1\n");
	//~ printf("time = %.9Lf, max depth: %lu, calls: %lu, g: %lu\n\n\n", cpu_time_used, i, m, g);
	printf("time = %.9Lf\n", cpu_time_used);


	return 0;
}

Reply via email to