Hello Robert,

3. Similarly, I suggest that the use of gaussian or uniform be an
error when argc < 6 OR argc > 6.  I also suggest that the
parenthesized distribution type be dropped from the error message in
all cases.

I wish to agree, but my interpretation of the previous code is that they
were ignored before, so ISTM that we are stuck with keeping the same
unfortunate behavior.

I don't agree.

Attached B patch does turn incorrect setrandom syntax into errors instead of ignoring extra parameters.

First A patch is repeated to help commitfest references.

--
Fabien.
diff --git a/contrib/pgbench/README b/contrib/pgbench/README
new file mode 100644
index 0000000..6881256
--- /dev/null
+++ b/contrib/pgbench/README
@@ -0,0 +1,5 @@
+# gaussian and exponential tests
+# with XXX as "expo" or "gauss"
+psql test < test-init.sql
+./pgbench -M prepared -f test-XXX-run.sql -t 1000000 -P 1 -n test
+psql test < test-XXX-check.sql
diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c
index 4aa8a50..e07206a 100644
--- a/contrib/pgbench/pgbench.c
+++ b/contrib/pgbench/pgbench.c
@@ -98,6 +98,8 @@ static int	pthread_join(pthread_t th, void **thread_return);
 #define LOG_STEP_SECONDS	5	/* seconds between log messages */
 #define DEFAULT_NXACTS	10		/* default nxacts */
 
+#define MIN_GAUSSIAN_THRESHOLD		2.0	/* minimum threshold for gauss */
+
 int			nxacts = 0;			/* number of transactions per client */
 int			duration = 0;		/* duration in seconds */
 
@@ -471,6 +473,76 @@ getrand(TState *thread, int64 min, int64 max)
 	return min + (int64) ((max - min + 1) * pg_erand48(thread->random_state));
 }
 
+/*
+ * random number generator: exponential distribution from min to max inclusive.
+ * the threshold is so that the density of probability for the last cut-off max
+ * value is exp(-threshold).
+ */
+static int64
+getExponentialRand(TState *thread, int64 min, int64 max, double threshold)
+{
+	double cut, uniform, rand;
+	Assert(threshold > 0.0);
+	cut = exp(-threshold);
+	/* erand in [0, 1), uniform in (0, 1] */
+	uniform = 1.0 - pg_erand48(thread->random_state);
+	/*
+	 * inner expresion in (cut, 1] (if threshold > 0),
+	 * rand in [0, 1)
+	 */
+	Assert((1.0 - cut) != 0.0);
+	rand = - log(cut + (1.0 - cut) * uniform) / threshold;
+	/* return int64 random number within between min and max */
+	return min + (int64)((max - min + 1) * rand);
+}
+
+/* random number generator: gaussian distribution from min to max inclusive */
+static int64
+getGaussianRand(TState *thread, int64 min, int64 max, double threshold)
+{
+	double		stdev;
+	double		rand;
+
+	/*
+	 * Get user specified random number from this loop, with
+	 * -threshold < stdev <= threshold
+	 *
+	 * This loop is executed until the number is in the expected range.
+	 *
+	 * As the minimum threshold is 2.0, the probability of looping is low:
+	 * sqrt(-2 ln(r)) <= 2 => r >= e^{-2} ~ 0.135, then when taking the average
+	 * sinus multiplier as 2/pi, we have a 8.6% looping probability in the
+	 * worst case. For a 5.0 threshold value, the looping probability
+	 * is about e^{-5} * 2 / pi ~ 0.43%.
+	 */
+	do
+	{
+		/*
+		 * pg_erand48 generates [0,1), but for the basic version of the
+		 * Box-Muller transform the two uniformly distributed random numbers
+		 * are expected in (0, 1] (see http://en.wikipedia.org/wiki/Box_muller)
+		 */
+		double rand1 = 1.0 - pg_erand48(thread->random_state);
+		double rand2 = 1.0 - pg_erand48(thread->random_state);
+
+		/* Box-Muller basic form transform */
+		double var_sqrt = sqrt(-2.0 * log(rand1));
+		stdev = var_sqrt * sin(2.0 * M_PI * rand2);
+
+		/*
+		 * we may try with cos, but there may be a bias induced if the previous
+		 * value fails the test. To be on the safe side, let us try over.
+		 */
+	}
+	while (stdev < -threshold || stdev >= threshold);
+
+	/* stdev is in [-threshold, threshold), normalization to [0,1) */
+	rand = (stdev + threshold) / (threshold * 2.0);
+
+	/* return int64 random number within between min and max */
+	return min + (int64)((max - min + 1) * rand);
+}
+
 /* call PQexec() and exit() on failure */
 static void
 executeStatement(PGconn *con, const char *sql)
@@ -1319,6 +1391,7 @@ top:
 			char	   *var;
 			int64		min,
 						max;
+			double		threshold = 0;
 			char		res[64];
 
 			if (*argv[2] == ':')
@@ -1364,11 +1437,11 @@ top:
 			}
 
 			/*
-			 * getrand() needs to be able to subtract max from min and add one
-			 * to the result without overflowing.  Since we know max > min, we
-			 * can detect overflow just by checking for a negative result. But
-			 * we must check both that the subtraction doesn't overflow, and
-			 * that adding one to the result doesn't overflow either.
+			 * Generate random number functions need to be able to subtract
+			 * max from min and add one to the result without overflowing.
+			 * Since we know max > min, we can detect overflow just by checking
+			 * for a negative result. But we must check both that the subtraction
+			 * doesn't overflow, and that adding one to the result doesn't overflow either.
 			 */
 			if (max - min < 0 || (max - min) + 1 < 0)
 			{
@@ -1377,10 +1450,63 @@ top:
 				return true;
 			}
 
+			if (argc == 4) /* uniform */
+			{
 #ifdef DEBUG
-			printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max));
+				printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max));
 #endif
-			snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max));
+				snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max));
+			}
+			else if ((pg_strcasecmp(argv[4], "gaussian") == 0) ||
+				 (pg_strcasecmp(argv[4], "exponential") == 0))
+			{
+				if (*argv[5] == ':')
+				{
+					if ((var = getVariable(st, argv[5] + 1)) == NULL)
+					{
+						fprintf(stderr, "%s: invalid threshold number %s\n", argv[0], argv[5]);
+						st->ecnt++;
+						return true;
+					}
+					threshold = strtod(var, NULL);
+				}
+				else
+					threshold = strtod(argv[5], NULL);
+
+				if (pg_strcasecmp(argv[4], "gaussian") == 0)
+				{
+					if (threshold < MIN_GAUSSIAN_THRESHOLD)
+					{
+						fprintf(stderr, "%s: gaussian threshold must be more than %f\n,", argv[5], MIN_GAUSSIAN_THRESHOLD);
+						st->ecnt++;
+						return true;
+					}
+#ifdef DEBUG
+					printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getGaussianRand(thread, min, max, threshold));
+#endif
+					snprintf(res, sizeof(res), INT64_FORMAT, getGaussianRand(thread, min, max, threshold));
+				}
+				else if (pg_strcasecmp(argv[4], "exponential") == 0)
+				{
+					if (threshold <= 0.0)
+					{
+						fprintf(stderr, "%s: exponential threshold must be strictly positive\n,", argv[5]);
+						st->ecnt++;
+						return true;
+					}
+#ifdef DEBUG
+					printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getExponentialRand(thread, min, max, threshold));
+#endif
+					snprintf(res, sizeof(res), INT64_FORMAT, getExponentialRand(thread, min, max, threshold));
+				}
+			}
+			else /* uniform with extra arguments */
+			{
+#ifdef DEBUG
+				printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max));
+#endif
+				snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max));
+			}
 
 			if (!putVariable(st, argv[0], argv[1], res))
 			{
@@ -1920,9 +2046,34 @@ process_commands(char *buf)
 				exit(1);
 			}
 
-			for (j = 4; j < my_commands->argc; j++)
-				fprintf(stderr, "%s: extra argument \"%s\" ignored\n",
-						my_commands->argv[0], my_commands->argv[j]);
+			if (my_commands->argc == 4 ) /* uniform */
+			{
+				/* nothing to do */
+			}
+			else if ((pg_strcasecmp(my_commands->argv[4], "gaussian") == 0) ||
+				 (pg_strcasecmp(my_commands->argv[4], "exponential") == 0))
+			{
+				if (my_commands->argc < 6)
+				{
+					fprintf(stderr, "%s(%s): missing argument\n", my_commands->argv[0], my_commands->argv[4]);
+					exit(1);
+				}
+
+				for (j = 6; j < my_commands->argc; j++)
+					fprintf(stderr, "%s(%s): extra argument \"%s\" ignored\n",
+							my_commands->argv[0], my_commands->argv[4], my_commands->argv[j]);
+			}
+			else /* uniform with extra argument */
+			{
+				int arg_pos = 4;
+
+				if (pg_strcasecmp(my_commands->argv[4], "uniform") == 0)
+					arg_pos++;
+
+				for (j = arg_pos; j < my_commands->argc; j++)
+					fprintf(stderr, "%s(uniform): extra argument \"%s\" ignored\n",
+							my_commands->argv[0], my_commands->argv[j]);
+			}
 		}
 		else if (pg_strcasecmp(my_commands->argv[0], "set") == 0)
 		{
diff --git a/contrib/pgbench/test-expo-check.sql b/contrib/pgbench/test-expo-check.sql
new file mode 100644
index 0000000..fbf35fd
--- /dev/null
+++ b/contrib/pgbench/test-expo-check.sql
@@ -0,0 +1,14 @@
+-- val, min, max, threshold
+CREATE OR REPLACE FUNCTION
+expoProba(INTEGER, INTEGER, INTEGER, DOUBLE PRECISION)
+RETURNS DOUBLE PRECISION IMMUTABLE STRICT AS $$
+  SELECT (exp(-$4*($1-$2)/($3-$2+1)) - exp(-$4*($1-$2+1)/($3-$2+1))) /
+         (1.0 - exp(-$4));
+$$ LANGUAGE SQL;
+
+SELECT SUM(cnt) FROM pgbench_dist;
+
+SELECT id, 1.0*cnt/SUM(cnt) OVER(), expoProba(id, 0, 99, 10.0)
+FROM pgbench_dist
+ORDER BY id;
+
diff --git a/contrib/pgbench/test-expo-run.sql b/contrib/pgbench/test-expo-run.sql
new file mode 100644
index 0000000..1d476bc
--- /dev/null
+++ b/contrib/pgbench/test-expo-run.sql
@@ -0,0 +1,2 @@
+\setrandom id 0 99 exponential 10.0
+UPDATE pgbench_dist SET cnt=cnt+1 WHERE id = :id;
diff --git a/contrib/pgbench/test-gauss-check.sql b/contrib/pgbench/test-gauss-check.sql
new file mode 100644
index 0000000..7d56117
--- /dev/null
+++ b/contrib/pgbench/test-gauss-check.sql
@@ -0,0 +1,57 @@
+-- approximation with maximal error of 1.2 10E-07, as told from
+-- https://en.wikipedia.org/wiki/Error_function#Numerical_approximation
+CREATE OR REPLACE FUNCTION erf(x DOUBLE PRECISION)
+RETURNS DOUBLE PRECISION IMMUTABLE STRICT AS $$
+DECLARE
+  t DOUBLE PRECISION := 1.0 / ( 1.0 + 0.5 * ABS(x));
+  tau DOUBLE PRECISION;
+BEGIN
+  IF ABS(x) >= 6.0 THEN
+    -- avoid underflow error
+    tau := 0.0;
+  ELSE
+    -- use approximation
+    tau := t * exp(-x*x - 1.26551223
+         + t * (1.00002368
+         + t * (0.37409196
+         + t * (0.09678418
+         + t * (-0.18628806
+         + t * (0.27886807
+         + t * (-1.13520398
+         + t * (1.48851587
+         + t * (-0.82215223
+         + t *  0.17087277)))))))));
+  END IF;
+  IF x >= 0 THEN
+    RETURN 1.0 - tau;
+  ELSE
+    RETURN tau - 1.0;
+  END IF;
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION PHI(DOUBLE PRECISION)
+RETURNS DOUBLE PRECISION IMMUTABLE STRICT AS $$
+  SELECT 0.5 * ( 1.0 + erf( $1 / SQRT(2.0) ) );
+$$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION
+gaussianProba(i INTEGER, mini INTEGER, maxi INTEGER, threshold DOUBLE PRECISION)
+RETURNS DOUBLE PRECISION IMMUTABLE STRICT AS $$
+DECLARE
+  extent DOUBLE PRECISION;
+  mu DOUBLE PRECISION;
+BEGIN
+  extent := maxi - mini + 1.0;
+  mu := 0.5 * (maxi + mini);
+  RETURN (PHI(2.0 * threshold * (i - mini - mu + 0.5) / extent) -
+          PHI(2.0 * threshold * (i - mini - mu - 0.5) / extent))
+         -- truncated gaussian
+	 / ( 2.0 * PHI(threshold) - 1.0 );
+END;
+$$ LANGUAGE plpgsql;
+
+SELECT SUM(cnt) FROM pgbench_dist;
+SELECT id, 1.0*cnt/SUM(cnt) OVER(), gaussianProba(id, 0, 99, 2.0)
+FROM pgbench_dist
+ORDER BY id;
diff --git a/contrib/pgbench/test-gauss-run.sql b/contrib/pgbench/test-gauss-run.sql
new file mode 100644
index 0000000..984a3b4
--- /dev/null
+++ b/contrib/pgbench/test-gauss-run.sql
@@ -0,0 +1,2 @@
+\setrandom id 0 99 gaussian 2.0
+UPDATE pgbench_dist SET cnt=cnt+1 WHERE id = :id;
diff --git a/contrib/pgbench/test-init.sql b/contrib/pgbench/test-init.sql
new file mode 100644
index 0000000..84f7cc9
--- /dev/null
+++ b/contrib/pgbench/test-init.sql
@@ -0,0 +1,4 @@
+DROP TABLE IF EXISTS pgbench_dist;
+CREATE UNLOGGED TABLE pgbench_dist(id SERIAL PRIMARY KEY, cnt INTEGER NOT NULL DEFAULT 0);
+INSERT INTO pgbench_dist(id, cnt) 
+  SELECT i, 0 FROM generate_series(0, 99) AS i;
diff --git a/doc/src/sgml/pgbench.sgml b/doc/src/sgml/pgbench.sgml
index f264c24..276476a 100644
--- a/doc/src/sgml/pgbench.sgml
+++ b/doc/src/sgml/pgbench.sgml
@@ -748,8 +748,8 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
 
    <varlistentry>
     <term>
-     <literal>\setrandom <replaceable>varname</> <replaceable>min</> <replaceable>max</></literal>
-    </term>
+     <literal>\setrandom <replaceable>varname</> <replaceable>min</> <replaceable>max</> [ uniform | [ { gaussian | exponential } <replaceable>threshold</> ] ]</literal>
+     </term>
 
     <listitem>
      <para>
@@ -761,9 +761,76 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
      </para>
 
      <para>
+      By default, all values in the range are drawn with equal probability,
+      that is the distribution is <literal>uniform</>.
+      The <literal>gaussian</> and <literal>exponential</> options modify
+      this behavior; each requires a mandatory threshold which determines
+      the precise shape of the distribution.
+     </para>
+
+     <para>
+      <!-- introduction -->
+      For a Gaussian distribution, the interval is mapped onto a standard
+      <ulink url="http://en.wikipedia.org/wiki/Normal_distribution";>normal distribution</ulink>
+      (the classical bell-shaped Gaussian curve) truncated at
+      <literal>-threshold</> on the left and <literal>+threshold</>
+      on the right.
+      <!-- formula -->
+      To be precise, if <literal>PHI(x)</> is the cumulative distribution
+      function of the standard normal distribution, with mean <literal>mu</>
+      defined as <literal>(max + min) / 2.0</>, then value <replaceable>i</>
+      between <replaceable>min</> and <replaceable>max</> inclusive is drawn
+      with probability:
+      <literal>
+        (PHI(2.0 * threshold * (i - min - mu + 0.5) / (max - min + 1)) -
+         PHI(2.0 * threshold * (i - min - mu - 0.5) / (max - min + 1))) /
+         (2.0 * PHI(threshold) - 1.0)
+      </>
+      <!-- intuition -->
+      The larger the <replaceable>threshold</>, the more frequently values
+      close to the middle of the interval are drawn, and the less frequently
+      values close to the <replaceable>min</> and <replaceable>max</> bounds.
+      <!-- rule of thumb -->
+      For a Gaussian distribution, about 67% of values are drawn from the middle
+      <literal>1.0 / threshold</> and 95% in the middle <literal>2.0 / threshold</>,
+      thus if <replaceable>threshold</> is 4.0, 67% of values are drawn from the middle
+      quarter and 95% from the middle half of the interval.
+      <!-- constraint -->
+      The minimum <replaceable>threshold</> is 2.0 for performance of
+      the Box-Muller transform.
+     </para>
+
+     <para>
+      <!-- introduction -->
+      For an exponential distribution, the <replaceable>threshold</>
+      parameter controls the distribution by truncating a quickly-decreasing
+      <ulink url="http://en.wikipedia.org/wiki/Exponential_distribution";>exponential distribution</ulink>
+      at <replaceable>threshold</>, and then projecting onto integers between
+      the bounds.
+      <!-- formula -->
+      To be precise, value <replaceable>i</> between <replaceable>min</> and
+      <replaceable>max</> inclusive is drawn with probability:
+      <literal>(exp(-threshold*(i-min)/(max+1-min)) -
+       exp(-threshold*(i+1-min)/(max+1-min))) / (1.0 - exp(-threshold))</>.
+      <!-- intuition -->
+      Intuitively, the larger the <replaceable>threshold</>, the more
+      frequently values close to <replaceable>min</> are accessed, and the
+      less frequently values close to <replaceable>max</> are accessed.
+      The closer to 0 the threshold, the flatter (more uniform) the access
+      distribution.
+      <!-- rule of thumb -->
+      A crude approximation of the distribution is that the most frequent 1%
+      values in the range, close to <replaceable>min</>, are drawn
+      <replaceable>threshold</>%  of the time.
+      <!-- constraint -->
+      The <replaceable>threshold</> value must be strictly positive with the
+      <literal>exponential</> option.
+     </para>
+
+     <para>
       Example:
 <programlisting>
-\setrandom aid 1 :naccounts
+\setrandom aid 1 :naccounts gaussian 5.0
 </programlisting></para>
     </listitem>
    </varlistentry>
diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c
index e07206a..685ff03 100644
--- a/contrib/pgbench/pgbench.c
+++ b/contrib/pgbench/pgbench.c
@@ -41,6 +41,7 @@
 #include <math.h>
 #include <signal.h>
 #include <sys/time.h>
+#include <assert.h>
 #ifdef HAVE_SYS_SELECT_H
 #include <sys/select.h>
 #endif
@@ -173,6 +174,11 @@ bool		is_connect;			/* establish connection for each transaction */
 bool		is_latencies;		/* report per-command latencies */
 int			main_pid;			/* main process id used in log filename */
 
+/* gaussian/exponential distribution tests */
+double		threshold;          /* threshold for gaussian or exponential */
+bool        use_gaussian = false;
+bool		use_exponential = false;
+
 char	   *pghost = "";
 char	   *pgport = "";
 char	   *login = NULL;
@@ -294,11 +300,11 @@ static int	num_commands = 0;	/* total number of Command structs */
 static int	debug = 0;			/* debug flag */
 
 /* default scenario */
-static char *tpc_b = {
+static char *tpc_b_fmt = {
 	"\\set nbranches " CppAsString2(nbranches) " * :scale\n"
 	"\\set ntellers " CppAsString2(ntellers) " * :scale\n"
 	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
-	"\\setrandom aid 1 :naccounts\n"
+	"\\setrandom aid 1 :naccounts%s\n"
 	"\\setrandom bid 1 :nbranches\n"
 	"\\setrandom tid 1 :ntellers\n"
 	"\\setrandom delta -5000 5000\n"
@@ -312,11 +318,11 @@ static char *tpc_b = {
 };
 
 /* -N case */
-static char *simple_update = {
+static char *simple_update_fmt = {
 	"\\set nbranches " CppAsString2(nbranches) " * :scale\n"
 	"\\set ntellers " CppAsString2(ntellers) " * :scale\n"
 	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
-	"\\setrandom aid 1 :naccounts\n"
+	"\\setrandom aid 1 :naccounts%s\n"
 	"\\setrandom bid 1 :nbranches\n"
 	"\\setrandom tid 1 :ntellers\n"
 	"\\setrandom delta -5000 5000\n"
@@ -328,9 +334,9 @@ static char *simple_update = {
 };
 
 /* -S case */
-static char *select_only = {
+static char *select_only_fmt = {
 	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
-	"\\setrandom aid 1 :naccounts\n"
+	"\\setrandom aid 1 :naccounts%s\n"
 	"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
 };
 
@@ -377,6 +383,8 @@ usage(void)
 		   "  -v, --vacuum-all         vacuum all four standard tables before tests\n"
 		   "  --aggregate-interval=NUM aggregate data over NUM seconds\n"
 		   "  --sampling-rate=NUM      fraction of transactions to log (e.g. 0.01 for 1%%)\n"
+		   "  --exponential=NUM        exponential distribution with NUM threshold parameter\n"
+		   "  --gaussian=NUM           gaussian distribution with NUM threshold parameter\n"
 		   "\nCommon options:\n"
 		   "  -d, --debug              print debugging output\n"
 	  "  -h, --host=HOSTNAME      database server host or socket directory\n"
@@ -1450,15 +1458,17 @@ top:
 				return true;
 			}
 
-			if (argc == 4) /* uniform */
+			if (argc == 4 || /* uniform without or with "uniform" keyword */
+				(argc == 5 && pg_strcasecmp(argv[4], "uniform") == 0))
 			{
 #ifdef DEBUG
 				printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max));
 #endif
 				snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max));
 			}
-			else if ((pg_strcasecmp(argv[4], "gaussian") == 0) ||
-				 (pg_strcasecmp(argv[4], "exponential") == 0))
+			else if (argc == 6 &&
+					 ((pg_strcasecmp(argv[4], "gaussian") == 0) ||
+					  (pg_strcasecmp(argv[4], "exponential") == 0)))
 			{
 				if (*argv[5] == ':')
 				{
@@ -1500,12 +1510,11 @@ top:
 					snprintf(res, sizeof(res), INT64_FORMAT, getExponentialRand(thread, min, max, threshold));
 				}
 			}
-			else /* uniform with extra arguments */
+			else /* this means an error somewhere in the parsing phase... */
 			{
-#ifdef DEBUG
-				printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getrand(thread, min, max));
-#endif
-				snprintf(res, sizeof(res), INT64_FORMAT, getrand(thread, min, max));
+				fprintf(stderr, "%s: unexpected arguments\n", argv[0]);
+				st->ecnt++;
+				return true;
 			}
 
 			if (!putVariable(st, argv[0], argv[1], res))
@@ -2040,39 +2049,50 @@ process_commands(char *buf)
 
 		if (pg_strcasecmp(my_commands->argv[0], "setrandom") == 0)
 		{
+			/* parsing:
+			 * \setrandom variable min max [uniform]
+			 * \setrandom variable min max (gaussian|exponential) threshold
+			 */
+
 			if (my_commands->argc < 4)
 			{
 				fprintf(stderr, "%s: missing argument\n", my_commands->argv[0]);
 				exit(1);
 			}
+			/* argc >= 4 */
 
-			if (my_commands->argc == 4 ) /* uniform */
+			if (my_commands->argc == 4 || /* uniform without/with "uniform" keyword */
+				(my_commands->argc == 5 &&
+				 pg_strcasecmp(my_commands->argv[4], "uniform") == 0))
 			{
 				/* nothing to do */
 			}
-			else if ((pg_strcasecmp(my_commands->argv[4], "gaussian") == 0) ||
-				 (pg_strcasecmp(my_commands->argv[4], "exponential") == 0))
+			else if (/* argc >= 5 */
+					 (pg_strcasecmp(my_commands->argv[4], "gaussian") == 0) ||
+					 (pg_strcasecmp(my_commands->argv[4], "exponential") == 0))
 			{
 				if (my_commands->argc < 6)
 				{
-					fprintf(stderr, "%s(%s): missing argument\n", my_commands->argv[0], my_commands->argv[4]);
+					fprintf(stderr, "%s(%s): missing threshold argument\n", my_commands->argv[0], my_commands->argv[4]);
+					exit(1);
+				}
+				else if (my_commands->argc > 6)
+				{
+					fprintf(stderr, "%s(%s): too many arguments (extra:",
+							my_commands->argv[0], my_commands->argv[4]);
+					for (j = 6; j < my_commands->argc; j++)
+						fprintf(stderr, " %s", my_commands->argv[j]);
+					fprintf(stderr, ")\n");
 					exit(1);
 				}
-
-				for (j = 6; j < my_commands->argc; j++)
-					fprintf(stderr, "%s(%s): extra argument \"%s\" ignored\n",
-							my_commands->argv[0], my_commands->argv[4], my_commands->argv[j]);
 			}
-			else /* uniform with extra argument */
+			else /* cannot parse, unexpected arguments */
 			{
-				int arg_pos = 4;
-
-				if (pg_strcasecmp(my_commands->argv[4], "uniform") == 0)
-					arg_pos++;
-
-				for (j = arg_pos; j < my_commands->argc; j++)
-					fprintf(stderr, "%s(uniform): extra argument \"%s\" ignored\n",
-							my_commands->argv[0], my_commands->argv[j]);
+				fprintf(stderr, "%s: unexpected arguments (bad:", my_commands->argv[0]);
+				for (j = 4; j < my_commands->argc; j++)
+					fprintf(stderr, " %s", my_commands->argv[j]);
+				fprintf(stderr, ")\n");
+				exit(1);
 			}
 		}
 		else if (pg_strcasecmp(my_commands->argv[0], "set") == 0)
@@ -2329,6 +2349,30 @@ process_builtin(char *tb)
 	return my_commands;
 }
 
+/*
+ * compute the probability of the truncated gaussian random generation
+ * to draw values in the i-th slot of the range.
+ */
+static double gaussianProbability(int i, int slots, double threshold)
+{
+	assert(1 <= i && i <= slots);
+	return (0.50 * (erf (threshold * (1.0 - 1.0 / slots * (2.0 * i - 2.0)) / sqrt(2.0)) -
+		erf (threshold * (1.0 - 1.0 / slots * 2.0 * i) / sqrt(2.0))) /
+		erf (threshold / sqrt(2.0)));
+}
+
+/*
+ * compute the probability of the truncated exponential random generation
+ * to draw values in the i-th slot of the range.
+ */
+static double exponentialProbability(int i, int slots, double threshold)
+{
+	assert(1 <= i && i <= slots);
+	return (exp(- threshold * (i - 1) / slots) - exp(- threshold * i / slots)) /
+		(1.0 - exp(- threshold));
+}
+
+
 /* print out results */
 static void
 printResults(int ttype, int64 normal_xacts, int nclients,
@@ -2340,7 +2384,7 @@ printResults(int ttype, int64 normal_xacts, int nclients,
 	double		time_include,
 				tps_include,
 				tps_exclude;
-	char	   *s;
+	char	   *s, *d;
 
 	time_include = INSTR_TIME_GET_DOUBLE(total_time);
 	tps_include = normal_xacts / time_include;
@@ -2356,8 +2400,46 @@ printResults(int ttype, int64 normal_xacts, int nclients,
 	else
 		s = "Custom query";
 
-	printf("transaction type: %s\n", s);
+	if (use_gaussian)
+		d = "Gaussian distribution ";
+	else if (use_exponential)
+		d = "Exponential distribution ";
+	else
+		d = ""; /* default uniform case */
+
+	printf("transaction type: %s%s\n", d, s);
 	printf("scaling factor: %d\n", scale);
+
+	/* output in gaussian distribution benchmark */
+	if (use_gaussian)
+	{
+		int i;
+		printf("pgbench_account's aid selected with a truncated gaussian distribution\n");
+		printf("standard deviation threshold: %.5f\n", threshold);
+		printf("decile percents:");
+		for (i = 1; i <= 10; i++)
+			printf(" %.1f%%", 100.0 * gaussianProbability(i, 10.0, threshold));
+		printf("\n");
+		printf("probability of max/min percent of the range: %.1f%% %.1f%%\n",
+			  100.0 * gaussianProbability(50, 100, threshold),
+			  100.0 * gaussianProbability(100, 100, threshold));
+	}
+	/* output in exponential distribution benchmark */
+	else if (use_exponential)
+	{
+		int i;
+		printf("pgbench_account's aid selected with a truncated exponential distribution\n");
+		printf("exponential threshold: %.5f\n", threshold);
+		printf("decile percents:");
+		for (i = 1; i <= 10; i++)
+			printf(" %.1f%%",
+				   100.0 * exponentialProbability(i, 10, threshold));
+		printf("\n");
+		printf("probability of fist/last percent of the range: %.1f%% %.1f%%\n",
+			   100.0 * exponentialProbability(1, 100, threshold),
+			   100.0 * exponentialProbability(100, 100, threshold));
+	}
+
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -2488,6 +2570,8 @@ main(int argc, char **argv)
 		{"unlogged-tables", no_argument, &unlogged_tables, 1},
 		{"sampling-rate", required_argument, NULL, 4},
 		{"aggregate-interval", required_argument, NULL, 5},
+		{"gaussian", required_argument, NULL, 6},
+		{"exponential", required_argument, NULL, 7},
 		{"rate", required_argument, NULL, 'R'},
 		{NULL, 0, NULL, 0}
 	};
@@ -2768,6 +2852,25 @@ main(int argc, char **argv)
 				}
 #endif
 				break;
+			case 6:
+				use_gaussian = true;
+				threshold = atof(optarg);
+				if(threshold < MIN_GAUSSIAN_THRESHOLD)
+				{
+					fprintf(stderr, "--gaussian=NUM must be more than %f: %f\n",
+							MIN_GAUSSIAN_THRESHOLD, threshold);
+					exit(1);
+				}
+				break;
+			case 7:
+				use_exponential = true;
+				threshold = atof(optarg);
+				if(threshold <= 0.0)
+				{
+					fprintf(stderr, "--exponential=NUM must be more than 0.0\n");
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -2965,6 +3068,17 @@ main(int argc, char **argv)
 		}
 	}
 
+	/* set :threshold variable */
+	if(getVariable(&state[0], "threshold") == NULL)
+	{
+		snprintf(val, sizeof(val), "%lf", threshold);
+		for (i = 0; i < nclients; i++)
+		{
+			if (!putVariable(&state[i], "startup", "threshold", val))
+				exit(1);
+		}
+	}
+
 	if (!is_no_vacuum)
 	{
 		fprintf(stderr, "starting vacuum...");
@@ -2987,25 +3101,24 @@ main(int argc, char **argv)
 	srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
 
 	/* process builtin SQL scripts */
-	switch (ttype)
-	{
-		case 0:
-			sql_files[0] = process_builtin(tpc_b);
-			num_files = 1;
-			break;
-
-		case 1:
-			sql_files[0] = process_builtin(select_only);
-			num_files = 1;
-			break;
-
-		case 2:
-			sql_files[0] = process_builtin(simple_update);
-			num_files = 1;
-			break;
-
-		default:
-			break;
+	if (ttype < 3)
+	{
+		char *fmt, *distribution, *queries;
+		int ret;
+		fmt = (ttype == 0)? tpc_b_fmt:
+			  (ttype == 1)? select_only_fmt:
+			  (ttype == 2)? simple_update_fmt: NULL;
+		assert(fmt != NULL);
+		distribution =
+			use_gaussian? " gaussian :threshold":
+			use_exponential? " exponential :threshold":
+			"" /* default uniform case */ ;
+		queries = pg_malloc(strlen(fmt) + strlen(distribution) + 1);
+		ret = sprintf(queries, fmt, distribution);
+		assert(ret >= 0);
+		sql_files[0] = process_builtin(queries);
+		num_files = 1;
+		pg_free(queries);
 	}
 
 	/* set up thread data structures */
diff --git a/doc/src/sgml/pgbench.sgml b/doc/src/sgml/pgbench.sgml
index 276476a..b718dd3 100644
--- a/doc/src/sgml/pgbench.sgml
+++ b/doc/src/sgml/pgbench.sgml
@@ -307,6 +307,49 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
      </varlistentry>
 
      <varlistentry>
+      <term><option>--exponential</option><replaceable>threshold</></term>
+      <listitem>
+       <para>
+         Run exponential distribution pgbench test using this threshold parameter.
+         The threshold controls the distribution of access frequency on the
+         <structname>pgbench_accounts</> table.
+         See the <literal>\setrandom</> documentation below for details about
+         the impact of the threshold value.
+         When set, this option applies to all test variants (<option>-N</> for
+         skipping updates, or <option>-S</> for selects).
+       </para>
+
+       <para>
+         When run, the output is expanded to show the distribution
+         depending on the <replaceable>threshold</> value:
+
+<screen>
+...
+pgbench_account's aid selected with a truncated exponential distribution
+exponential threshold: 5.00000
+decile percents: 39.6% 24.0% 14.6% 8.8% 5.4% 3.3% 2.0% 1.2% 0.7% 0.4%
+probability of fist/last percent of the range: 4.9% 0.0%
+...
+</screen>
+
+         The figures are to be interpreted as follows.
+         If the scaling factor is 10, there are 1,000,000 accounts in
+         <literal>pgbench_accounts</>.
+         The first decile, with <literal>aid</> from 1 to 100,000, is
+         drawn 39.6% of the time, that is about 4 times more than average.
+         The second decile, from 100,001 to 200,000 is drawn 24.0% of the time,
+         that is 2.4 times more than average.
+         Up to the last decile, from 900,001 to 1,000,000, which is drawn
+         0.4% of the time, well below average.
+         Moreover, the first percent of the range, that is <literal>aid</>
+         from 1 to 10,000, is drawn 4.9% of the time, this 4.9 times more
+         than average, and the last percent, with <literal>aid</>
+         from 990,001 to 1,000,000, is drawn less than 0.1% of the time.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>-f</option> <replaceable>filename</></term>
       <term><option>--file=</option><replaceable>filename</></term>
       <listitem>
@@ -320,6 +363,49 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
      </varlistentry>
 
      <varlistentry>
+      <term><option>--gaussian</option><replaceable>threshold</></term>
+      <listitem>
+       <para>
+         Run gaussian distribution pgbench test using this threshold parameter.
+         The threshold controls the distribution of access frequency on the
+         <structname>pgbench_accounts</> table.
+         See the <literal>\setrandom</> documentation below for details about
+         the impact of the threshold value.
+         When set, this option applies to all test variants (<option>-N</> for
+         skipping updates, or <option>-S</> for selects).
+       </para>
+
+       <para>
+         When run, the output is expanded to show the distribution
+         depending on the <replaceable>threshold</> value:
+
+<screen>
+...
+pgbench_account's aid selected with a truncated gaussian distribution
+standard deviation threshold: 5.00000
+decile percents: 0.0% 0.1% 2.1% 13.6% 34.1% 34.1% 13.6% 2.1% 0.1% 0.0%
+probability of max/min percent of the range: 4.0% 0.0%
+...
+</screen>
+
+         The figures are to be interpreted as follows.
+         If the scaling factor is 10, there are 1,000,000 accounts in
+         <literal>pgbench_accounts</>.
+         The first decile, with <literal>aid</> from 1 to 100,000, is
+         drawn less than 0.1% of the time.
+         The second, from 100,001 to 200,000 is drawn about 0.1% of the time...
+         up to the fifth decile, from 400,001 to 500,000, which
+         is drawn 34.1% of the time, about 3.4 times more thn average,
+         and then the gaussian curve is symmetric for the last five deciles.
+         Moreover, the first percent of the range, that is <literal>aid</>
+         from 50,001 to 60,000, is drawn 4.0% of the time, 4.0 times more
+         than average, and the last percent, with <literal>aid</>
+         from 990,001 to 1,000,000, is drawn less than 0.1% of the time.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>-j</option> <replaceable>threads</></term>
       <term><option>--jobs=</option><replaceable>threads</></term>
       <listitem>
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to