I appreciate your response, Damien.

I did do the bare minimum of correctness testing and it was the post right
after Guenther was congratulated on proving incorrectness:

https://marc.info/?l=openbsd-tech&m=165259528425835&w=2

The post includes software to reproduce the results.



I wrote a highly dynamically allocated program to test at intervals of
intervals to show at various stages to show the degree that the output
remains random

This is an example of some output:

testing arc4random_uniform(5) and arc4random_uniform_small_unlocked(5):

                     256X         2048X        16384X       131072X
 1048576X

 ....................................................................
   0 original        246          2053         16400        131115
1048306
         mine        263          2042         16312        131258
1046989

   1 original        234          2013         16337        130894
1047810
         mine        249          2022         16304        131161
1049511

   2 original        236          2061         16367        130802
1047430
         mine        257          2117         16597        130718
1046375

   3 original        284          2089         16444        131092
1050332
         mine        266          2058         16379        131190
1049877

   4 original        280          2024         16372        131457
1049002
         mine        245          2001         16328        131033
1050128



max-min values:

     original         50            76           107           655
 2902
         mine         21           116           293           540
 3753


The program is set up to test values 2 through 50. This will create 224KiB
of output.
I suspected that you'd prefer that I didn't attach it.

Some progress output has been relegated to stderr so that you can easily
pipe it to a file.

I added some getrlimit an setrlimit functions to maximize memory limits if
necessary
In the case that I created for you, this will not be needed.

It uses 1276K RAM in the current configuration.


> Just to bring this back to where we came in: even putting thread-safety
> aside (which you absolutely can't): it doesn't matter how much faster
> it is, your replacement function isn't useful until you do the work to
> demonstrate correctness.
>
> You have done literally zero so far, not even the bare minimum of
> testing the output. As a result your first version was demonstrated to
> be completely broken by literally the most basic of possible tests, a
> mere 10 lines of code.
>
> That you left this to others to do tells me that you fundamentally don't
> understand the environment in which you're trying to operate, and that
> you don't respect the time of the people you're trying to convince.
>
> Please stop wasting our time.
>
> -d

-- 
-Luke
#include <sys/resource.h>
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>

extern char *malloc_options;


/*
cc arc4random_uniform_small.c -O2 -march=native -mtune=native -flto=full \
-static -p -fno-inline && ./a.out && gprof a.out gmon.out

cc arc4random_uniform_small.c -O2 && ./a.out
 */


/* 
 * uint32_t
 * arc4random_uniform(uint32_t upper_bound);
 * 
 * for which this function is named, provides a cryptographically secure:
 * uint32_t arc4random() % upper_bound;
 * 
 * this function minimizes the waste of randomly generated bits,
 * for small upper bounds.
 * 
 * 'bits' is the requested number of random bits and it needs to be
 * within the following parameters:
 * 
 *   1 << bits     >=     upper_bound     >     (1 << bits) >> 1
 * 
 * I make a possibly incorrect presumption that size_t is
 * at the very least a uint32_t, but probably a uint64_t for speed
 */
static uint32_t
arc4random_uniform_small_unlocked(uint32_t upper_bound)
{	
	static uint64_t rand_holder = 0;
	static uint64_t rand_bits = 0;
	
	static uint64_t upper_bound0 = 2;
	static uint64_t bits0 = 1;

	uint64_t bits = 16, i = 1, n = 32, ret, ub = upper_bound;

	if (ub != upper_bound0) {
		
		if (ub < 2) {
			
			/*
			 * reset static cache for if a process needs to fork()
			 * to make it manually fork-safe
			 */
			if (ub == 0) { 
				rand_holder = 0;
				rand_bits = 0;
			}
			return 0;
		}
		
		/*
		 * binary search for ideal size random bitstream selection
		 */
		for (;;) {
			
			if ( ub > ((uint64_t)1 << bits) ) {
				
				if (n - i == 1) {
					bits = n;
					break;
				}
				
				i = bits;
				bits = (i + n) / 2;
				continue;
			}
			if ( ub <= ((uint64_t)1 << bits) >> 1 ) {
				
				if (n - i == 1) {
					bits = n;
					break;
				}
				
				n = bits;
				bits = (i + n) / 2;
				continue;
			}
			
			break;
		}
		upper_bound0 = ub;
		bits0 = bits;
	} else
		bits = bits0;
		
	const uint64_t bitmask = ((uint64_t)1 << bits) - 1;
		
	do {
		if (rand_bits < bits) {
			rand_holder |= ((uint64_t)arc4random()) << rand_bits;
			
			/* 
			 * rand_bits will be a number between 0 and 31 here
			 * so the 0x20 bit will be empty
			 * rand_bits += 32;
			 */ 
			rand_bits |= 32;
		}
		
		ret = rand_holder & bitmask;
		rand_holder >>= bits;
		rand_bits -= bits;

	} while (ret >= ub);
		
	
	return (uint32_t)ret;
}


//~ /* 
 //~ * uint32_t
 //~ * arc4random_uniform(uint32_t upper_bound);
 //~ * 
 //~ * for which this function is named, provides a cryptographically secure:
 //~ * uint32_t arc4random() % upper_bound;
 //~ * 
 //~ * this function minimizes the waste of randomly generated bits,
 //~ * for small upper bounds and is possibly more secure.
 //~ * 
 //~ *   1 << bits     >=     upper_bound     >     (1 << bits) >> 1
 //~ */
//~ /*
 //~ * this one doesn't waste any bits it doesn't need to.
 //~ * it doesn't take a bits value, because it generates it for you.
 //~ * This makes it less efficient than the other arc4random_uniform_fast
 //~ * functions
 //~ */
//~ static uint32_t
//~ arc4random_uniform_fast2(uint32_t upper_bound)
//~ {
	//~ static uint64_t rand_holder = 0;
	//~ static size_t rand_bits = 0;
	
	//~ static uint64_t upper_bound0 = 2;
	//~ static size_t bits0 = 1;

	//~ size_t ret;
	//~ size_t bits = 16, i = 1, n = 32, ub = upper_bound;

	//~ if (ub != upper_bound0) {
		
		//~ if (ub < 2)
			//~ return 0;
		
		//~ for (;;) {
			
			//~ if ( ub <= ((size_t)1 << bits) >> 1 ) {
				
				//~ if (n - i == 1) {
					//~ bits = n;
					//~ break;
				//~ }
				
				//~ n = bits;
				//~ bits = (i + n) / 2;
				//~ continue;
			//~ }
			//~ if ( ub > ((size_t)1 << bits) ) {
				
				//~ if (n - i == 1) {
					//~ bits = n;
					//~ break;
				//~ }
				
				//~ i = bits;
				//~ bits = (i + n) / 2;
				//~ continue;
			//~ }
			
			
			//~ break;
		//~ }
		//~ upper_bound0 = ub;
		//~ bits0 = bits;
		
	//~ } else
		//~ bits = bits0;

	//~ if (rand_bits < bits) {
		
		//~ rand_holder |= ((uint64_t)arc4random()) << rand_bits;
		
		//~ /* 
		 //~ * rand_bits will be a number between 0 and 31 here
		 //~ * so the 0x20 bit will be empty
		 //~ * rand_bits += 32;
		 //~ */ 
		//~ rand_bits |= 32;
	//~ }
	
	//~ ret = rand_holder & (  ((size_t)1 << bits) - 1  );
	//~ rand_holder >>= bits;
	//~ rand_bits -= bits;
	
	//~ while (ret >= ub) {
		
		//~ if (!rand_bits) {
			//~ rand_holder = arc4random();
			//~ rand_bits = 32;
		//~ }
		
		//~ /*
		 //~ * this shifts ret right and puts in
		 //~ * a new maximum bit from rand_holder
		 //~ * 
		 //~ *            bits >= 1
		 //~ */
		//~ ret = ((rand_holder & 1) << (bits - 1)) | ( ret >> 1 );

		//~ rand_holder >>= 1;
		//~ --rand_bits;
	//~ }
	
	//~ return ret;
//~ }

/*
 *  Random binary. Ideal conditions.
 */
static
void rand_ideal(size_t length)
{
	size_t len = length, n;
	for (n = 0; n != len; ++n)
		arc4random_uniform(2);
}

/*
 *  Random binary. Ideal conditions.
 */
static
void rand_ideal_small(size_t length)
{
	size_t len = length, n;
	for (n = 0; n != len; ++n)
		arc4random_uniform_small_unlocked(2);
}

/*
 *  Random short. Ideal conditions.
 */
static
void rand_short_ideal(size_t length)
{
	size_t len = length, n;
	for (n = 0; n != len; ++n)
		arc4random_uniform(0x10000);
}

/*
 *  Random short. Ideal conditions.
 */
static
void rand_short_ideal_small(size_t length)
{
	size_t len = length, n;
	for (n = 0; n != len; ++n)
		arc4random_uniform_small_unlocked(0x10000);
}

/*
 *  Random short plus. Reveals limitations of arc4random_uniform_small_unlocked()
 */
static
void rand_short_worst(size_t length)
{
	size_t len = length, n;
	for (n = 0; n != len; ++n)
		arc4random_uniform(0x10001);
}

/*
 *  Random short plus. Reveals limitations of arc4random_uniform_small_unlocked()
 */
static
void rand_short_worst_small(size_t length)
{
	size_t len = length, n;
	for (n = 0; n != len; ++n)
		arc4random_uniform_small_unlocked(0x10001);
}

/*
 * Random 3 byte value plus. Reveals limitations of arc4random_uniform_small_unlocked()
 * versus arc4random_uniform()
 */
static
void rand_worst(size_t length)
{
	size_t len = length, n;
	for (n = 0; n != len; ++n)
		arc4random_uniform(0x400001);
}

/*
 * Random 3 byte value plus. Reveals limitations of arc4random_uniform_small_unlocked()
 * versus arc4random_uniform()
 */
static
void rand_worst_small(size_t length)
{
	size_t len = length, n;
	for (n = 0; n != len; ++n)
		arc4random_uniform_small_unlocked(0x400001);
}

/*
 *  Random 'a'-'z'
 * length doesn't include the '\0' character at the end
 */
static
void newdata(char a[], size_t length)
{
	size_t len = length, n;
	for (n = 0; n != len; ++n)
		a[n] = 'a' + (char)arc4random_uniform(26);
	a[len] = '\0';
}

/*
 *  Random 'a'-'z'
 * length doesn't include the '\0' character at the end
 */
static
void newdata_small(char a[], size_t length)
{
	size_t len = length, n;
	for (n = 0; n != len; ++n) {
		a[n] = 'a' +
		    (char)arc4random_uniform_small_unlocked(26);
	}
	a[len] = '\0';
}


//	33-45		skip a bunch of whitespace characters
//	48-126		skip '.' and '/'
//	I don't include '.'(46), because it could cause issues I'd rather avoid
static const char filenameTypableArray[92] =
{
	 33,  34,  35,  36,  37,  38,  39,  40,  41,  42,
	 43,  44,  45,  48,  49,  50,  51,  52,  53,  54,
	 55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
	 65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
	 75,  76,  77,  78,  79,  80,  81,  82,  83,  84,
	 85,  86,  87,  88,  89,  90,  91,  92,  93,  94,
	 95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
	105, 106, 107, 108, 109, 110, 111, 112, 113, 114,
	115, 116, 117, 118, 119, 120, 121, 122, 123, 124,
	125, 126
};

//length doesn't include the '\0' character at the end
static
void newdataTypableFilename(char a[], const size_t length)
{
	size_t j;

	for (j = 0; j < length; ++j)
		a[j] = filenameTypableArray[ arc4random_uniform(92) ];
	a[j] = '\0';
}

//length doesn't include the '\0' character at the end
static
void newdataTypableFilename_small(char a[], const size_t length)
{
	size_t j;

	for (j = 0; j < length; ++j) {
		a[j] = 
		    filenameTypableArray[ arc4random_uniform_small_unlocked(92) ];
	}
	a[j] = '\0';
}

/* 
 * simulate the kind of decreasing upperbound
 * random generation needed to shuffle an array
 */
static
void shuffle(const size_t length)
{
	size_t i, g = length - 1;
	
	for (i = 0; i < g; ++i)
		arc4random_uniform(length - i);
}

/* 
 * simulate the kind of decreasing upperbound
 * random generation needed to shuffle an array
 */
static
void shuffle_small(const size_t length)
{
	size_t i, g = length - 1;
	
	for (i = 0; i < g; ++i)
		arc4random_uniform_small_unlocked(length - i);
}

	/*
	 * if you are going to calculate a large number 
	 * like 100+ million, you need these
	 * getrlimit()/setrlimit()
	 * calls or calloc() will fail.
	 * However, if you do so, it can venture into
	 * using swap instead of just RAM and if it
	 * exceeds swap, it can crash your system.
	 * 
	 * LOWER_LIMIT = 100000000;
	 * UPPER_LIMIT = 100000000;
	 * LEN_LIMIT = 2;
	 * LEN_INIT = 10000;
	 * LEN_MULTIPLE = 10;
	 * 
	 * populates 4588M of memory on my machine
	 */

int main()
{

	malloc_options = "SJJ";

	if (unveil(NULL, NULL) == -1)
		err(1, "unveil, line: %d", __LINE__);
	
	/*
	 * needs "proc" for the struct rlimit commands
	 * a pledge() follows them
	 */
	if (pledge("stdio proc", NULL) == -1)
		err(1, "pledge, line: %d", __LINE__);


	struct rlimit rlp;

	getrlimit(RLIMIT_CPU, &rlp);
	rlp.rlim_cur = rlp.rlim_max;
	setrlimit(RLIMIT_CPU, &rlp);


	getrlimit(RLIMIT_DATA, &rlp);
	rlp.rlim_cur = rlp.rlim_max;
	setrlimit(RLIMIT_DATA, &rlp);


	getrlimit(RLIMIT_MEMLOCK, &rlp);
	rlp.rlim_cur = rlp.rlim_max;
	setrlimit(RLIMIT_MEMLOCK, &rlp);


	getrlimit(RLIMIT_RSS, &rlp);
	rlp.rlim_cur = rlp.rlim_max;
	setrlimit(RLIMIT_RSS, &rlp);

	
	if (pledge("stdio", NULL) == -1)
		err(1, "pledge, line: %d", __LINE__);
	
	size_t n = 50000000;
	
	size_t i, j, k, p;

	struct timespec start, end;
	
	clock_gettime(CLOCK_REALTIME, &start);
	
	/*
	 * It will calculate up to and including UPPER_LIMIT
	 */

	//~ size_t UPPER_LIMIT = 100000000;
	//~ size_t UPPER_LIMIT = 1000000;
	
	//~ const size_t LOWER_LIMIT = 1000000;
	//~ const size_t UPPER_LIMIT = 1000000;
	
	//~ const size_t LOWER_LIMIT = 0x1000;
	//~ const size_t UPPER_LIMIT = 0x1001;
	
	//~ const size_t LEN_LIMIT = 5;
	//~ const size_t LEN_INIT = 10;
	//~ const size_t LEN_MULTIPLE = 8;


	//~ const size_t LOWER_LIMIT = 2;
	//~ const size_t UPPER_LIMIT = 49;
		
	//~ const size_t LEN_LIMIT = 13;
	//~ const size_t LEN_INIT = 256;
	//~ const size_t LEN_MULTIPLE = 2;



	const size_t LOWER_LIMIT = 2;
	const size_t UPPER_LIMIT = 50;
	
	const size_t LEN_LIMIT = 5;
	const size_t LEN_INIT = 256;
	const size_t LEN_MULTIPLE = 8;
	
	if (!LEN_INIT)
		errx(1, "LEN_INIT cannnot be 0");
	if (!LEN_LIMIT)
		errx(1, "LEN_LIMIT cannnot be 0");
	if (LEN_MULTIPLE < 2)
		errx(1, "LEN_MULTIPLE must be > 1");
	
	printf("\n");
	printf("        LOWER_LIMIT == %zu\n", LOWER_LIMIT);
	printf("        UPPER_LIMIT == %zu\n", UPPER_LIMIT);
	printf("            Columns == %zu\n", LEN_LIMIT);
	printf("   Initial Interval == %zu\n", LEN_INIT);
	printf("Interval Multiplier == %zu\n\n", LEN_MULTIPLE);
	
	/*
	 * will start with looking at LEN_INIT iterations then will
	 * look at LEN_MULTIPLE multiples of iterations after that
	 */
	size_t *len = calloc(LEN_LIMIT, sizeof(size_t));
	if (len == NULL)
		errx(1, "calloc");
	
	len[0] = LEN_INIT;
	for (k = 1; k < LEN_LIMIT; ++k)
		len[k] = len[k - 1] * LEN_MULTIPLE;

	size_t **counts1;
	size_t **counts2;
//	size_t **counts1[UPPER_LIMIT + 1][LEN_LIMIT];
//	size_t **counts2[UPPER_LIMIT + 1][LEN_LIMIT];
	
	counts1 = calloc(UPPER_LIMIT + 1, sizeof(size_t*));
	if (counts1 == NULL)
		errx(1, "calloc");
	counts2 = calloc(UPPER_LIMIT + 1, sizeof(size_t*));
	if (counts1 == NULL)
		errx(1, "calloc");
	
	
	/*
	 * for big numbers, having an individual calloc for every
	 * counts1[k] will SERIOUSLY add up in memory
	 */
	counts1[0] = calloc((UPPER_LIMIT + 1) * LEN_LIMIT, sizeof(size_t));
	if (counts1[0] == NULL)
		errx(1, "calloc");
	counts2[0] = calloc((UPPER_LIMIT + 1) * LEN_LIMIT, sizeof(size_t));
	if (counts2[0] == NULL)
		errx(1, "calloc");
		
	for (j = 1; j < UPPER_LIMIT; ++j) {
		counts1[j] = counts1[j - 1] + LEN_LIMIT;
		counts2[j] = counts2[j - 1] + LEN_LIMIT;
	}
	
	size_t *min1 = calloc(LEN_LIMIT, sizeof(size_t));
	if (min1 == NULL)
		errx(1, "calloc");
	size_t *max1 = calloc(LEN_LIMIT, sizeof(size_t));
	if (max1 == NULL)
		errx(1, "calloc");
	size_t *min2 = calloc(LEN_LIMIT, sizeof(size_t));
	if (min2 == NULL)
		errx(1, "calloc");
	size_t *max2 = calloc(LEN_LIMIT, sizeof(size_t));
	if (max2 == NULL)
		errx(1, "calloc");
		
	for (j = LOWER_LIMIT; j <= UPPER_LIMIT; ++j) {
		
		
		k = p = 0;
		size_t jj;
		size_t ll = len[LEN_LIMIT - 1];
		size_t g = len[p];
		size_t gg = (j * ll >= 10000000);
		int pp = 0;
		int ppp = 0;
		int q = fprintf(stderr, "%zu out of %zu: ", j, UPPER_LIMIT);
		for (i = 0; i < ll; ++i) {
			
			
			if (gg && !(i % 100000)) {
				for (jj = (size_t)pp; jj; --jj)
					fprintf(stderr, "\b");
				pp = fprintf(stderr, "%zu", (ll - i) / 100000);
				if (ppp > pp)
					fprintf(stderr, " \b");
				ppp = pp;
				fflush(stderr);
			}
			
			if (i == g) {
				++k;
				g = len[++p];
			}
			
			for (jj = j; jj; --jj) {
								
				++counts1[      arc4random_uniform(j)         ][k];
				++counts2[arc4random_uniform_small_unlocked(j)][k];
			}

		}
		
		
		/*
		 * clear stderr output
		 */
		g = (size_t)pp + (size_t)q;
		for (jj = g; jj; --jj)
			fprintf(stderr, "\b");
		for (jj = g; jj; --jj)
			fprintf(stderr, " ");
		for (jj = g; jj; --jj)
			fprintf(stderr, "\b");
		
		for (i = 0; i < j; ++i) {
			for (k = 1; k < LEN_LIMIT; ++k) {
				counts1[i][k] += counts1[i][k - 1];
				counts2[i][k] += counts2[i][k - 1];
			}
		}
			
		printf("testing arc4random_uniform(%zu) and arc4random_uniform_small_unlocked(%zu):\n\n", j, j);
		
		printf(        "              %10zuX", len[0]);
		for (k = 1; k < LEN_LIMIT; ++k)
			printf(" %12zuX", len[k]);
		printf("\n");
		
		printf(        "             ............");
		for (k = 1; k < LEN_LIMIT; ++k)
			printf("..............");
		printf("\n");
		for (i = 0; i < j; ++i) {
			printf("%4zu original %10zu", i, counts1[i][0]);
			for (k = 1; k < LEN_LIMIT; ++k)
				printf(" %13zu", counts1[i][k]);
			printf("\n");
			
			printf("         mine %10zu", counts2[i][0]);
			for (k = 1; k < LEN_LIMIT; ++k)
				printf(" %13zu", counts2[i][k]);
			printf("\n\n");
		}
		
		printf("\n\nmax-min values:\n\n");
		
		for (k = 0; k < LEN_LIMIT; ++k) {
			min1[k] = max1[k] = counts1[0][k];
			min2[k] = max2[k] = counts2[0][k];
			for (i = 1; i < j; ++i) {
				if (counts1[i][k] < min1[k])
					min1[k] = counts1[i][k];
				else if (counts1[i][k] > max1[k])
					max1[k] = counts1[i][k];
					
				if (counts2[i][k] < min2[k])
					min2[k] = counts2[i][k];
				else if (counts2[i][k] > max2[k])
					max2[k] = counts2[i][k];
			}
		}
		
		printf("     original %10zu", max1[0] - min1[0]);
		for (k = 1; k < LEN_LIMIT; ++k)
			printf(" %13zu", max1[k] - min1[k]);
		printf("\n");
		
		printf("         mine %10zu", max2[0] - min2[0]);
		for (k = 1; k < LEN_LIMIT; ++k)
			printf(" %13zu", max2[k] - min2[k]);
		printf("\n\n\n");
		    
		//~ printf("         mine %10zu %13zu %13zu %13zu %13zu\n\n", max2[0] - min2[0], max2[1] - min2[1], max2[2] - min2[2], max2[3] - min2[3], max2[4] - min2[4]);
	
		memset(counts1[0], 0, (UPPER_LIMIT + 1) * LEN_LIMIT * sizeof(size_t));
		memset(counts2[0], 0, (UPPER_LIMIT + 1) * LEN_LIMIT * sizeof(size_t));
	}
	clock_gettime(CLOCK_REALTIME, &end);
	
	free(counts1[0]);
	free(counts2[0]);
	
	free(counts1);
	free(counts2);
	
	free(len);
	free(min1);
	free(max1);
	free(min2);
	free(max2);

	long double cpu_time_used0;
	long double cpu_time_used1;
	
	cpu_time_used0 =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("time = %.9Lf seconds\n\n", cpu_time_used0);

	printf("\n\n\n\n\n");
	
	
		
	//~ size_t m;
		
	char *buf = malloc(n + 1);
	if (buf == NULL)
		errx(1, "malloc");
		
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	
	printf("\n\n\n");
	
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	newdata(buf, n);
	//~ printf("%s\n\n", buf);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used0 =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("newdata\n");
	printf("time = %.9Lf seconds\n\n", cpu_time_used0);
	
	
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	newdata_small(buf, n);
	//~ printf("%s\n\n", buf);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used1 =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("newdata_small\n");
	printf("time = %.9Lf seconds\n", cpu_time_used1);
	
printf("\nThe original takes %.3Lf%% of the runtime of newdata_small\n\n",
	    (cpu_time_used0 / cpu_time_used1) * (long double)100.0);
	
	
	printf("\n\n\n\n\n");
	
	
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	newdataTypableFilename(buf, n);
	//~ printf("%s\n\n", buf);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used0 =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("newdataTypableFilename\n");
	printf("time = %.9Lf seconds\n\n", cpu_time_used0);
	
	
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	newdataTypableFilename_small(buf, n);
	//~ printf("%s\n\n", buf);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used1 =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("newdataTypableFilename_small\n");
	printf("time = %.9Lf seconds\n", cpu_time_used1);
	
	printf("\nThe original takes %.3Lf%% ",
	    (cpu_time_used0 / cpu_time_used1) * (long double)100.0);
	printf("of the runtime of newdataTypableFilename_small\n\n");
	
	
	printf("\n\n\n\n\n");

	
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	rand_ideal(n);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used0 =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("rand_ideal\n");
	printf("time = %.9Lf seconds\n\n", cpu_time_used0);
	
	
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	rand_ideal_small(n);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used1 =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("rand_ideal_small\n");
	printf("time = %.9Lf seconds\n", cpu_time_used1);
	
printf("\nThe original takes %.3Lf%% of the runtime of rand_ideal_small\n\n",
	    (cpu_time_used0 / cpu_time_used1) * (long double)100.0);
	
	
	printf("\n\n\n\n\n");

	
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	rand_short_ideal(n);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used0 =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("rand_short_ideal\n");
	printf("time = %.9Lf seconds\n\n", cpu_time_used0);
	
	
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	rand_short_ideal_small(n);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used1 =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("rand_short_ideal_small\n");
	printf("time = %.9Lf seconds\n", cpu_time_used1);
	
printf("\nThe original takes %.3Lf%% of the runtime of rand_short_ideal_small\n\n",
	    (cpu_time_used0 / cpu_time_used1) * (long double)100.0);
	
	
	printf("\n\n\n\n\n");

	
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	rand_short_worst(n);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used0 =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("rand_short_worst\n");
	printf("time = %.9Lf seconds\n\n", cpu_time_used0);
	
	
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	rand_short_worst_small(n);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used1 =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("rand_short_worst_small\n");
	printf("time = %.9Lf seconds\n", cpu_time_used1);
	
printf("\nThe original takes %.3Lf%% of the runtime of rand_short_worst_small\n\n",
	    (cpu_time_used0 / cpu_time_used1) * (long double)100.0);
	
	
	printf("\n\n\n\n\n");

	
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	rand_worst(n);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used0 =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("rand_worst\n");
	printf("time = %.9Lf seconds\n\n", cpu_time_used0);
	
	
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	rand_worst_small(n);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used1 =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("rand_worst_small\n");
	printf("time = %.9Lf seconds\n", cpu_time_used1);
	
printf("\nThe original takes %.3Lf%% of the runtime of rand_worst_small\n\n",
	    (cpu_time_used0 / cpu_time_used1) * (long double)100.0);
	
	
	printf("\n\n\n\n\n");

	
	
	size_t shuffle_length = 65536;
	
	
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	//~ for (m = n / shuffle_length; m; --m)
		shuffle(shuffle_length);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used0 =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("shuffle\n");
	printf("time = %.9Lf seconds\n\n", cpu_time_used0);
	
	
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	//~ for (m = n / shuffle_length; m; --m)
		shuffle_small(shuffle_length);
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

	cpu_time_used1 =
	    (long double) (end.tv_sec - start.tv_sec) +
	    (long double) (end.tv_nsec - start.tv_nsec) /
	    (long double) 1000000000;

	printf("shuffle_small\n");
	printf("time = %.9Lf seconds\n", cpu_time_used1);
	
printf("\nThe original takes %.3Lf%% of the runtime of shuffle_small\n\n",
	    (cpu_time_used0 / cpu_time_used1) * (long double)100.0);
	
	free(buf);


	printf("If it crashed here, you are trying to profile.\n");
	printf("Turn off all pledge() at the beginning of main().\n\n");
	
	return 0;
}

Reply via email to