Thank you for your grate documentation and fix working!!!
It becomes very helpful for understanding our feature.

Hopefully it will help make it, or part of it, pass through.

I add two feature in gauss_B_4.patch.

1) Add gaussianProbability() function
It is same as exponentialProbability(). And the feature is as same as
before.

Ok, that is better for readability and easy reuse.

2) Add result of "max/min percent of the range"
It is almost same as --exponential option's result. However, max percent of
the range is center of distribution
and min percent of the range is most side of distribution.
Here is the output example,

Ok, good that make it homogeneous with the exponential case.

+ pgbench_account's aid selected with a truncated gaussian distribution
+ standard deviation threshold: 5.00000
+ decile percents: 0.0% 0.1% 2.1% 13.6% 34.1% 34.1% 13.6% 2.1% 0.1% 0.0%
+ probability of max/min percent of the range: 4.0% 0.0%

And I add the explanation about this in the document.

This is a definite improvement. I tested these minor changes and everything seems ok.

Attached is a very small update. One word removed from the doc, and one redundant declaration removed from the code.

I also have a problem with assert & Assert. I finally figured out that Assert is not compiled in by default, thus it is generally ignored. So it is more for debugging purposes when activated than for guarding against some unexpected user errors.

--
Fabien.
diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c
index e07206a..0247a05 100644
--- a/contrib/pgbench/pgbench.c
+++ b/contrib/pgbench/pgbench.c
@@ -41,6 +41,7 @@
 #include <math.h>
 #include <signal.h>
 #include <sys/time.h>
+#include <assert.h>
 #ifdef HAVE_SYS_SELECT_H
 #include <sys/select.h>
 #endif
@@ -173,6 +174,11 @@ bool		is_connect;			/* establish connection for each transaction */
 bool		is_latencies;		/* report per-command latencies */
 int			main_pid;			/* main process id used in log filename */
 
+/* gaussian/exponential distribution tests */
+double		threshold;          /* threshold for gaussian or exponential */
+bool        use_gaussian = false;
+bool		use_exponential = false;
+
 char	   *pghost = "";
 char	   *pgport = "";
 char	   *login = NULL;
@@ -294,11 +300,11 @@ static int	num_commands = 0;	/* total number of Command structs */
 static int	debug = 0;			/* debug flag */
 
 /* default scenario */
-static char *tpc_b = {
+static char *tpc_b_fmt = {
 	"\\set nbranches " CppAsString2(nbranches) " * :scale\n"
 	"\\set ntellers " CppAsString2(ntellers) " * :scale\n"
 	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
-	"\\setrandom aid 1 :naccounts\n"
+	"\\setrandom aid 1 :naccounts%s\n"
 	"\\setrandom bid 1 :nbranches\n"
 	"\\setrandom tid 1 :ntellers\n"
 	"\\setrandom delta -5000 5000\n"
@@ -312,11 +318,11 @@ static char *tpc_b = {
 };
 
 /* -N case */
-static char *simple_update = {
+static char *simple_update_fmt = {
 	"\\set nbranches " CppAsString2(nbranches) " * :scale\n"
 	"\\set ntellers " CppAsString2(ntellers) " * :scale\n"
 	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
-	"\\setrandom aid 1 :naccounts\n"
+	"\\setrandom aid 1 :naccounts%s\n"
 	"\\setrandom bid 1 :nbranches\n"
 	"\\setrandom tid 1 :ntellers\n"
 	"\\setrandom delta -5000 5000\n"
@@ -328,9 +334,9 @@ static char *simple_update = {
 };
 
 /* -S case */
-static char *select_only = {
+static char *select_only_fmt = {
 	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
-	"\\setrandom aid 1 :naccounts\n"
+	"\\setrandom aid 1 :naccounts%s\n"
 	"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
 };
 
@@ -377,6 +383,8 @@ usage(void)
 		   "  -v, --vacuum-all         vacuum all four standard tables before tests\n"
 		   "  --aggregate-interval=NUM aggregate data over NUM seconds\n"
 		   "  --sampling-rate=NUM      fraction of transactions to log (e.g. 0.01 for 1%%)\n"
+		   "  --exponential=NUM        exponential distribution with NUM threshold parameter\n"
+		   "  --gaussian=NUM           gaussian distribution with NUM threshold parameter\n"
 		   "\nCommon options:\n"
 		   "  -d, --debug              print debugging output\n"
 	  "  -h, --host=HOSTNAME      database server host or socket directory\n"
@@ -2329,6 +2337,30 @@ process_builtin(char *tb)
 	return my_commands;
 }
 
+/*
+ * compute the probability of the truncated gaussian random generation
+ * to draw values in the i-th slot of the range.
+ */
+static double gaussianProbability(int i, int slots, double threshold)
+{
+	assert(1 <= i && i <= slots);
+	return (0.50 * (erf (threshold * (1.0 - 1.0 / slots * (2.0 * i - 2.0)) / sqrt(2.0)) -
+		erf (threshold * (1.0 - 1.0 / slots * 2.0 * i) / sqrt(2.0))) /
+		erf (threshold / sqrt(2.0)));
+}
+
+/*
+ * compute the probability of the truncated exponential random generation
+ * to draw values in the i-th slot of the range.
+ */
+static double exponentialProbability(int i, int slots, double threshold)
+{
+	assert(1 <= i && i <= slots);
+	return (exp(- threshold * (i - 1) / slots) - exp(- threshold * i / slots)) /
+		(1.0 - exp(- threshold));
+}
+
+
 /* print out results */
 static void
 printResults(int ttype, int64 normal_xacts, int nclients,
@@ -2340,7 +2372,7 @@ printResults(int ttype, int64 normal_xacts, int nclients,
 	double		time_include,
 				tps_include,
 				tps_exclude;
-	char	   *s;
+	char	   *s, *d;
 
 	time_include = INSTR_TIME_GET_DOUBLE(total_time);
 	tps_include = normal_xacts / time_include;
@@ -2356,8 +2388,46 @@ printResults(int ttype, int64 normal_xacts, int nclients,
 	else
 		s = "Custom query";
 
-	printf("transaction type: %s\n", s);
+	if (use_gaussian)
+		d = "Gaussian distribution ";
+	else if (use_exponential)
+		d = "Exponential distribution ";
+	else
+		d = ""; /* default uniform case */
+
+	printf("transaction type: %s%s\n", d, s);
 	printf("scaling factor: %d\n", scale);
+
+	/* output in gaussian distribution benchmark */
+	if (use_gaussian)
+	{
+		int i;
+		printf("pgbench_account's aid selected with a truncated gaussian distribution\n");
+		printf("standard deviation threshold: %.5f\n", threshold);
+		printf("decile percents:");
+		for (i = 1; i <= 10; i++)
+			printf(" %.1f%%", 100.0 * gaussianProbability(i, 10.0, threshold));
+		printf("\n");
+		printf("probability of max/min percent of the range: %.1f%% %.1f%%\n",
+			  100.0 * gaussianProbability(50, 100, threshold),
+			  100.0 * gaussianProbability(100, 100, threshold));
+	}
+	/* output in exponential distribution benchmark */
+	else if (use_exponential)
+	{
+		int i;
+		printf("pgbench_account's aid selected with a truncated exponential distribution\n");
+		printf("exponential threshold: %.5f\n", threshold);
+		printf("decile percents:");
+		for (i = 1; i <= 10; i++)
+			printf(" %.1f%%",
+				   100.0 * exponentialProbability(i, 10, threshold));
+		printf("\n");
+		printf("probability of fist/last percent of the range: %.1f%% %.1f%%\n",
+			   100.0 * exponentialProbability(1, 100, threshold),
+			   100.0 * exponentialProbability(100, 100, threshold));
+	}
+
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -2488,6 +2558,8 @@ main(int argc, char **argv)
 		{"unlogged-tables", no_argument, &unlogged_tables, 1},
 		{"sampling-rate", required_argument, NULL, 4},
 		{"aggregate-interval", required_argument, NULL, 5},
+		{"gaussian", required_argument, NULL, 6},
+		{"exponential", required_argument, NULL, 7},
 		{"rate", required_argument, NULL, 'R'},
 		{NULL, 0, NULL, 0}
 	};
@@ -2768,6 +2840,25 @@ main(int argc, char **argv)
 				}
 #endif
 				break;
+			case 6:
+				use_gaussian = true;
+				threshold = atof(optarg);
+				if(threshold < MIN_GAUSSIAN_THRESHOLD)
+				{
+					fprintf(stderr, "--gaussian=NUM must be more than %f: %f\n",
+							MIN_GAUSSIAN_THRESHOLD, threshold);
+					exit(1);
+				}
+				break;
+			case 7:
+				use_exponential = true;
+				threshold = atof(optarg);
+				if(threshold <= 0.0)
+				{
+					fprintf(stderr, "--exponential=NUM must be more than 0.0\n");
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -2965,6 +3056,17 @@ main(int argc, char **argv)
 		}
 	}
 
+	/* set :threshold variable */
+	if(getVariable(&state[0], "threshold") == NULL)
+	{
+		snprintf(val, sizeof(val), "%lf", threshold);
+		for (i = 0; i < nclients; i++)
+		{
+			if (!putVariable(&state[i], "startup", "threshold", val))
+				exit(1);
+		}
+	}
+
 	if (!is_no_vacuum)
 	{
 		fprintf(stderr, "starting vacuum...");
@@ -2987,25 +3089,24 @@ main(int argc, char **argv)
 	srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
 
 	/* process builtin SQL scripts */
-	switch (ttype)
-	{
-		case 0:
-			sql_files[0] = process_builtin(tpc_b);
-			num_files = 1;
-			break;
-
-		case 1:
-			sql_files[0] = process_builtin(select_only);
-			num_files = 1;
-			break;
-
-		case 2:
-			sql_files[0] = process_builtin(simple_update);
-			num_files = 1;
-			break;
-
-		default:
-			break;
+	if (ttype < 3)
+	{
+		char *fmt, *distribution, *queries;
+		int ret;
+		fmt = (ttype == 0)? tpc_b_fmt:
+			  (ttype == 1)? select_only_fmt:
+			  (ttype == 2)? simple_update_fmt: NULL;
+		assert(fmt != NULL);
+		distribution =
+			use_gaussian? " gaussian :threshold":
+			use_exponential? " exponential :threshold":
+			"" /* default uniform case */ ;
+		queries = pg_malloc(strlen(fmt) + strlen(distribution) + 1);
+		ret = sprintf(queries, fmt, distribution);
+		assert(ret >= 0);
+		sql_files[0] = process_builtin(queries);
+		num_files = 1;
+		pg_free(queries);
 	}
 
 	/* set up thread data structures */
diff --git a/doc/src/sgml/pgbench.sgml b/doc/src/sgml/pgbench.sgml
index 276476a..b718dd3 100644
--- a/doc/src/sgml/pgbench.sgml
+++ b/doc/src/sgml/pgbench.sgml
@@ -307,6 +307,49 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
      </varlistentry>
 
      <varlistentry>
+      <term><option>--exponential</option><replaceable>threshold</></term>
+      <listitem>
+       <para>
+         Run exponential distribution pgbench test using this threshold parameter.
+         The threshold controls the distribution of access frequency on the
+         <structname>pgbench_accounts</> table.
+         See the <literal>\setrandom</> documentation below for details about
+         the impact of the threshold value.
+         When set, this option applies to all test variants (<option>-N</> for
+         skipping updates, or <option>-S</> for selects).
+       </para>
+
+       <para>
+         When run, the output is expanded to show the distribution
+         depending on the <replaceable>threshold</> value:
+
+<screen>
+...
+pgbench_account's aid selected with a truncated exponential distribution
+exponential threshold: 5.00000
+decile percents: 39.6% 24.0% 14.6% 8.8% 5.4% 3.3% 2.0% 1.2% 0.7% 0.4%
+probability of fist/last percent of the range: 4.9% 0.0%
+...
+</screen>
+
+         The figures are to be interpreted as follows.
+         If the scaling factor is 10, there are 1,000,000 accounts in
+         <literal>pgbench_accounts</>.
+         The first decile, with <literal>aid</> from 1 to 100,000, is
+         drawn 39.6% of the time, that is about 4 times more than average.
+         The second decile, from 100,001 to 200,000 is drawn 24.0% of the time,
+         that is 2.4 times more than average.
+         Up to the last decile, from 900,001 to 1,000,000, which is drawn
+         0.4% of the time, well below average.
+         Moreover, the first percent of the range, that is <literal>aid</>
+         from 1 to 10,000, is drawn 4.9% of the time, this 4.9 times more
+         than average, and the last percent, with <literal>aid</>
+         from 990,001 to 1,000,000, is drawn less than 0.1% of the time.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>-f</option> <replaceable>filename</></term>
       <term><option>--file=</option><replaceable>filename</></term>
       <listitem>
@@ -320,6 +363,49 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
      </varlistentry>
 
      <varlistentry>
+      <term><option>--gaussian</option><replaceable>threshold</></term>
+      <listitem>
+       <para>
+         Run gaussian distribution pgbench test using this threshold parameter.
+         The threshold controls the distribution of access frequency on the
+         <structname>pgbench_accounts</> table.
+         See the <literal>\setrandom</> documentation below for details about
+         the impact of the threshold value.
+         When set, this option applies to all test variants (<option>-N</> for
+         skipping updates, or <option>-S</> for selects).
+       </para>
+
+       <para>
+         When run, the output is expanded to show the distribution
+         depending on the <replaceable>threshold</> value:
+
+<screen>
+...
+pgbench_account's aid selected with a truncated gaussian distribution
+standard deviation threshold: 5.00000
+decile percents: 0.0% 0.1% 2.1% 13.6% 34.1% 34.1% 13.6% 2.1% 0.1% 0.0%
+probability of max/min percent of the range: 4.0% 0.0%
+...
+</screen>
+
+         The figures are to be interpreted as follows.
+         If the scaling factor is 10, there are 1,000,000 accounts in
+         <literal>pgbench_accounts</>.
+         The first decile, with <literal>aid</> from 1 to 100,000, is
+         drawn less than 0.1% of the time.
+         The second, from 100,001 to 200,000 is drawn about 0.1% of the time...
+         up to the fifth decile, from 400,001 to 500,000, which
+         is drawn 34.1% of the time, about 3.4 times more thn average,
+         and then the gaussian curve is symmetric for the last five deciles.
+         Moreover, the first percent of the range, that is <literal>aid</>
+         from 50,001 to 60,000, is drawn 4.0% of the time, 4.0 times more
+         than average, and the last percent, with <literal>aid</>
+         from 990,001 to 1,000,000, is drawn less than 0.1% of the time.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>-j</option> <replaceable>threads</></term>
       <term><option>--jobs=</option><replaceable>threads</></term>
       <listitem>
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to