Sorry, previos attached patch has small bug.
Please use latest one.

> 134 - return min + (int64) (max - min + 1) * rand;
> 134 + return min + (int64)((max - min + 1) * rand);

Regards,
--
Mitsumasa KONDO
NTT Open Source Software Center
*** a/contrib/pgbench/pgbench.c
--- b/contrib/pgbench/pgbench.c
***************
*** 176,181 **** int			progress_nthreads = 0; /* number of threads for progress report */
--- 176,183 ----
  bool		is_connect;			/* establish connection for each transaction */
  bool		is_latencies;		/* report per-command latencies */
  int			main_pid;			/* main process id used in log filename */
+ double		stdev_threshold = 5;		/* standard deviation threshold */
+ bool		gaussian_option = false;	/* use gaussian distribution random generator */
  
  char	   *pghost = "";
  char	   *pgport = "";
***************
*** 338,346 **** static char *select_only = {
--- 340,390 ----
  	"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
  };
  
+ /* --gaussian case */
+ static char *gaussian_tpc_b = {
+ 	"\\set nbranches " CppAsString2(nbranches) " * :scale\n"
+ 	"\\set ntellers " CppAsString2(ntellers) " * :scale\n"
+ 	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ 	"\\setgaussian aid 1 :naccounts :stdev_threshold\n"
+ 	"\\setrandom bid 1 :nbranches\n"
+ 	"\\setrandom tid 1 :ntellers\n"
+ 	"\\setrandom delta -5000 5000\n"
+ 	"BEGIN;\n"
+ 	"UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
+ 	"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ 	"UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;\n"
+ 	"UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;\n"
+ 	"INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n"
+ 	"END;\n"
+ };
+ 
+ /* --gaussian with -N case */
+ static char *gaussian_simple_update = {
+ 	"\\set nbranches " CppAsString2(nbranches) " * :scale\n"
+ 	"\\set ntellers " CppAsString2(ntellers) " * :scale\n"
+ 	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ 	"\\setgaussian aid 1 :naccounts :stdev_threshold\n"
+ 	"\\setrandom bid 1 :nbranches\n"
+ 	"\\setrandom tid 1 :ntellers\n"
+ 	"\\setrandom delta -5000 5000\n"
+ 	"BEGIN;\n"
+ 	"UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
+ 	"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ 	"INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n"
+ 	"END;\n"
+ };
+ 
+ /* --gaussian with -S case */
+ static char *gaussian_select_only = {
+ 	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ 	"\\setgaussian aid 1 :naccounts :stdev_threshold\n"
+ 	"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ };
+ 
  /* Function prototypes */
  static void setalarm(int seconds);
  static void *threadRun(void *arg);
+ static inline double sqrtd(const double x);
  
  static void
  usage(void)
***************
*** 381,386 **** usage(void)
--- 425,431 ----
  		   "  -v, --vacuum-all         vacuum all four standard tables before tests\n"
  		   "  --aggregate-interval=NUM aggregate data over NUM seconds\n"
  		   "  --sampling-rate=NUM      fraction of transactions to log (e.g. 0.01 for 1%%)\n"
+ 		   "  --gaussian=NUM           gaussian distribution with NUM standard deviation threshold\n"
  		   "\nCommon options:\n"
  		   "  -d, --debug              print debugging output\n"
  		   "  -h, --host=HOSTNAME      database server host or socket directory\n"
***************
*** 477,482 **** getrand(TState *thread, int64 min, int64 max)
--- 522,597 ----
  	return min + (int64) ((max - min + 1) * pg_erand48(thread->random_state));
  }
  
+ /* random number generator: gaussian distribution from min to max inclusive */
+ static int64
+ getGaussianrand(TState *thread, int64 min, int64 max, double stdev_threshold)
+ {
+ 	double		stdev;
+ 	double		rand;
+ 	static double	rand1;
+ 	static double	rand2;
+ 	static double	var_sqrt;
+ 	static bool	reuse = false;
+ 	
+ 	/*
+ 	 * Get user specified random number(-stdev_threshold < stdev <= stdev_threshold) 
+ 	 * in this loop. This loop is executed until appeared ranged number we want.
+ 	 * However, this loop could not almost go on, because min stdev_threshold is 2
+ 	 * then the possibility of retry-loop is under 4 percent. And possibility of
+ 	 * re-retry-loop is under 1.6 percent. And it doesn't happen frequentry even if
+ 	 * we also think about the cycle of the trigonometric function.
+  	 */
+ 	do
+ 	{
+ 		/* reuse pre calculation result as possible */
+ 		if(!reuse)
+ 		{
+ 			/* 
+  			 * pg_erand48 generates [0,1) random number. However rand1 
+  			 * needs (0,1) random number because log(0) cannot calculate.
+  			 * And rand2 also needs (0,1) random number in strictly. But
+  			 * normalization cost is high and we can substitute (0,1] at
+  			 * rand1 and [0,1) at rand2, so we use approximate calculation.
+  			 */
+ 			rand1 = 1.0 - pg_erand48(thread->random_state);
+ 			rand2 = pg_erand48(thread->random_state);
+ 		
+ 			 /* Box-Muller transform */
+ 			var_sqrt = sqrtd(-2.0 * log(rand1));
+ 			stdev = var_sqrt * sin(2.0 * M_PI * rand2);
+ 			reuse = true;
+ 		}
+ 		else
+ 		{
+ 			stdev = var_sqrt * cos(2.0 * M_PI * rand2);
+ 			reuse = false;
+ 		}		
+ 	} while (stdev < -stdev_threshold || stdev >= stdev_threshold);
+ 
+ 	/* normalization to [0,1) */
+ 	rand = (stdev + stdev_threshold) / (stdev_threshold * 2.0);
+ 
+ 	/* return int64 random number within between min and max */
+ 	return min + (int64)((max - min + 1) * rand);
+ }
+ 
+ /*
+  * fast sqrt algorithm: reference from Fast inverse square root algorithms.
+  */ 
+ static inline double
+ sqrtd(const double x)
+ {
+ 	double		x_half = 0.5 * x;
+ 	long long int	tmp = 0x5FE6EB50C7B537AAl - ( *(long long int*)&x >> 1);
+ 	double		x_result = *(double*)&tmp;
+ 
+ 	x_result *= (1.5 - (x_half * x_result * x_result));
+ 	/* retry this calculation, it becomes higher precision at sqrt */
+ 	x_result *= (1.5 - (x_half * x_result * x_result));
+ 
+ 	return x_result * x;
+ }
+ 
  /* call PQexec() and exit() on failure */
  static void
  executeStatement(PGconn *con, const char *sql)
***************
*** 1391,1396 **** top:
--- 1506,1601 ----
  
  			st->listen = 1;
  		}
+ 		else if (pg_strcasecmp(argv[0], "setgaussian") == 0)
+ 		{
+ 			char	*var;
+ 			char	*endptr;
+ 			int64	min;
+ 			int64	max;
+ 			double	stdev_threshold;
+ 			char	res[64];
+ 
+ 			if (*argv[2] == ':')
+ 			{
+ 				if((var = getVariable(st, argv[2] + 1)) == NULL)
+ 				{
+ 					fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[2]);
+ 					st->ecnt++;
+ 					return true;
+ 				}
+ 				min = strtoint64(var);
+ 			}
+ 			else
+ 				min = strtoint64(argv[2]);
+ #ifdef NOT_USED
+ 			if (min < 0)
+ 			{
+ 				fprintf(stderr, "%s: invalid minimum number %d\n", argv[0], min);
+ 				st->ecnt++;
+ 				return;
+ 			}
+ #endif
+ 			if (*argv[3] == ':')
+ 			{
+ 				if((var = getVariable(st, argv[3] + 1)) == NULL)
+ 				{
+ 					fprintf(stderr, "%s: invalid maximum number %s\n", argv[0], argv[3]);
+ 					st->ecnt++;
+ 					return true;
+ 				}
+ 				max = strtoint64(var);
+ 			}
+ 			else
+ 				max = strtoint64(argv[3]);
+ 
+ 			/* check if min and max are appropriate value */
+ 			if(max < min)
+ 			{
+ 				fprintf(stderr, "%s: maximum is less than minimum\n", argv[0]);
+ 				st->ecnt++;
+ 				return true;
+ 			}
+ 
+ 			/* for not overflowing when generating random number */
+ 			if(max - min < 0 || (max - min) + 1 < 0)
+ 			{
+ 				fprintf(stderr, "%s: range too large\n", argv[0]);
+ 				st->ecnt++;
+ 				return true;
+ 			}
+ 
+ 			if(*argv[4] == ':')
+ 			{
+ 				if((var = getVariable(st, argv[4] + 1)) == NULL)
+ 				{
+ 					fprintf(stderr, "%s: invalid gaussian threshold number %s\n", argv[0], argv[4]);
+ 					st->ecnt++;
+ 					return true;
+ 				}
+ 				stdev_threshold = strtod(var, NULL);
+ 			}
+ 			else
+ 				stdev_threshold = strtod(argv[4], &endptr);
+ 
+ 			if ( stdev_threshold < 2)
+ 			{
+ 				fprintf(stderr, "%s: gaussian threshold must be more than 2\n,", argv[4]);
+ 				st->ecnt++;
+ 				return true;
+ 			}
+ #ifdef DEBUG
+ 			printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getGaussianrand(thread, min, max, stdev_threshold));
+ #endif
+ 			snprintf(res, sizeof(res), INT64_FORMAT, getGaussianrand(thread, min, max, stdev_threshold));
+ 
+ 			if(!putVariable(st, argv[0], argv[1], res))
+ 			{
+ 				st->ecnt++;
+ 				return true;
+ 			}
+ 
+ 			st->listen = 1;
+ 		}
  		else if (pg_strcasecmp(argv[0], "set") == 0)
  		{
  			char	   *var;
***************
*** 1915,1920 **** process_commands(char *buf)
--- 2120,2137 ----
  				fprintf(stderr, "%s: extra argument \"%s\" ignored\n",
  						my_commands->argv[0], my_commands->argv[j]);
  		}
+ 		else if (pg_strcasecmp(my_commands->argv[0], "setgaussian") == 0)
+ 		{
+ 			if (my_commands->argc < 5)
+ 			{
+ 				fprintf(stderr, "%s: missing argument\n", my_commands->argv[0]);
+ 				exit(1);
+ 			}
+ 
+ 			for (j = 5; j < my_commands->argc; j++)
+ 				fprintf(stderr, "%s: extra argument \"%s\" ignored\n",
+ 						my_commands->argv[0], my_commands->argv[j]);
+ 		}
  		else if (pg_strcasecmp(my_commands->argv[0], "set") == 0)
  		{
  			if (my_commands->argc < 3)
***************
*** 2188,2203 **** printResults(int ttype, int normal_xacts, int nclients,
  						(INSTR_TIME_GET_DOUBLE(conn_total_time) / nthreads));
  
  	if (ttype == 0)
! 		s = "TPC-B (sort of)";
  	else if (ttype == 2)
! 		s = "Update only pgbench_accounts";
  	else if (ttype == 1)
! 		s = "SELECT only";
  	else
  		s = "Custom query";
  
  	printf("transaction type: %s\n", s);
  	printf("scaling factor: %d\n", scale);
  	printf("query mode: %s\n", QUERYMODE[querymode]);
  	printf("number of clients: %d\n", nclients);
  	printf("number of threads: %d\n", nthreads);
--- 2405,2447 ----
  						(INSTR_TIME_GET_DOUBLE(conn_total_time) / nthreads));
  
  	if (ttype == 0)
! 	{
! 		if(gaussian_option)
! 			s = "TPC-B (sort of)";
! 		else
! 			s = "Gaussian distributed TPC-B (sort of)";
! 	}
  	else if (ttype == 2)
! 	{
! 		if(gaussian_option)
! 			s = "Gaussian distributed update only pgbench_accounts";
! 		else
! 			s = "Update only pgbench_accounts";
! 	}
  	else if (ttype == 1)
! 	{
! 		if(gaussian_option)
! 			s = "Gaussian distributed SELECT only";
! 		else
! 			s = "SELECT only";
! 	}
  	else
  		s = "Custom query";
  
  	printf("transaction type: %s\n", s);
  	printf("scaling factor: %d\n", scale);
+ 
+ 	/* output in only gaussian distributed benchmark */
+ 	if(gaussian_option)
+ 	{
+ 		printf("standard deviation threshold: %.5f\n", stdev_threshold);
+ 		printf("access probability of top 20%%, 10%% and 5%% records: %.5f %.5f %.5f\n",
+ 			(double) ((erf (stdev_threshold * 0.2 / sqrt(2.0))) / (erf (stdev_threshold / sqrt(2.0)))),
+ 			(double) ((erf (stdev_threshold * 0.1 / sqrt(2.0))) / (erf (stdev_threshold / sqrt(2.0)))),
+ 			(double) ((erf (stdev_threshold * 0.05 / sqrt(2.0))) / (erf (stdev_threshold / sqrt(2.0))))
+ 			);
+ 	}
+ 
  	printf("query mode: %s\n", QUERYMODE[querymode]);
  	printf("number of clients: %d\n", nclients);
  	printf("number of threads: %d\n", nthreads);
***************
*** 2327,2332 **** main(int argc, char **argv)
--- 2571,2577 ----
  		{"unlogged-tables", no_argument, &unlogged_tables, 1},
  		{"sampling-rate", required_argument, NULL, 4},
  		{"aggregate-interval", required_argument, NULL, 5},
+ 		{"gaussian", required_argument, NULL, 6},
  		{"rate", required_argument, NULL, 'R'},
  		{NULL, 0, NULL, 0}
  	};
***************
*** 2606,2611 **** main(int argc, char **argv)
--- 2851,2865 ----
  				}
  #endif
  				break;
+ 			case 6:
+ 				gaussian_option = true;
+ 				stdev_threshold = atof(optarg);
+ 				if(stdev_threshold < 2)
+ 				{
+ 					fprintf(stderr, "--gaussian=NUM must be more than 2: %f\n", stdev_threshold);
+ 					exit(1);
+ 				}
+ 				break;
  			default:
  				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
  				exit(1);
***************
*** 2803,2808 **** main(int argc, char **argv)
--- 3057,3073 ----
  		}
  	}
  
+ 	/* set :stdev_threshold variable */
+ 	if(getVariable(&state[0], "stdev_threshold") == NULL)
+ 	{
+ 		snprintf(val, sizeof(val), "%lf", stdev_threshold);
+ 		for (i = 0; i < nclients; i++)
+ 		{
+ 			if (!putVariable(&state[i], "startup", "stdev_threshold", val))
+ 				exit(1);
+ 		}
+ 	}
+ 
  	if (!is_no_vacuum)
  	{
  		fprintf(stderr, "starting vacuum...");
***************
*** 2828,2844 **** main(int argc, char **argv)
  	switch (ttype)
  	{
  		case 0:
! 			sql_files[0] = process_builtin(tpc_b);
  			num_files = 1;
  			break;
  
  		case 1:
! 			sql_files[0] = process_builtin(select_only);
  			num_files = 1;
  			break;
  
  		case 2:
! 			sql_files[0] = process_builtin(simple_update);
  			num_files = 1;
  			break;
  
--- 3093,3118 ----
  	switch (ttype)
  	{
  		case 0:
! 			if(gaussian_option)
! 				sql_files[0] = process_builtin(gaussian_tpc_b);
! 			else
! 				sql_files[0] = process_builtin(tpc_b);
  			num_files = 1;
  			break;
  
  		case 1:
! 			if(gaussian_option)
! 				sql_files[0] = process_builtin(gaussian_select_only);
! 			else
! 				sql_files[0] = process_builtin(select_only);
  			num_files = 1;
  			break;
  
  		case 2:
! 			if(gaussian_option)
! 				sql_files[0] = process_builtin(simple_update);
! 			else
! 				sql_files[0] = process_builtin(gaussian_simple_update);
  			num_files = 1;
  			break;
  
*** a/doc/src/sgml/pgbench.sgml
--- b/doc/src/sgml/pgbench.sgml
***************
*** 320,325 **** pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
--- 320,342 ----
       </varlistentry>
  
       <varlistentry>
+       <term><option>--gaussian</option><replaceable>standard deviation</></term>
+       <listitem>
+        <para>
+         Gaussian distribution pgbench option. Need the standard deviation threshold.
+         Standard deviation threshold can control distribution of access patern that
+         is used by aid in pgbench_accounts table. If we set larger standard deviation
+         threshold, pgbench access patern limited more specific records. On the other
+         hands, if you set smaller standard deviation, pgbench access patern will be
+         more gently distribution. Standard deviation threshold must be higher than 2.
+         This rule is needed for realizing realistic calculation costs. If you add 
+         '-N' or '-S' options, you can execute gaussian distribution pgbench in these
+         benchmarks.
+        </para>
+       </listitem>
+      </varlistentry>
+ 
+      <varlistentry>
        <term><option>-j</option> <replaceable>threads</></term>
        <term><option>--jobs=</option><replaceable>threads</></term>
        <listitem>
***************
*** 770,775 **** pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
--- 787,818 ----
  
     <varlistentry>
      <term>
+      <literal>\setgaussian <replaceable>varname</> <replaceable>min</> <replaceable>max</> <replaceable>
+      standard deviation threshold</literal>
+     </term>
+ 
+     <listitem>
+      <para>
+       Sets variable <replaceable>varname</> to a gaussian random integer value
+       between the limits <replaceable>min</> and <replaceable>max</> inclusive.
+       Each limit can be either an integer constant or a
+       <literal>:</><replaceable>variablename</> reference to a variable
+       having an integer value. Standard deviation threshold controls
+       distribution of access patern. If we set larger value in standard
+       deviation threshold, more frequentry access patern will be more 
+       limited ranges. Min standard deviation threshold is 2. This rule 
+       needs for realizing realistic calculation costs.
+      </para>
+ 
+      <para>
+       Example:
+ <programlisting>
+ \setgaussian aid 1 :naccounts 5
+ </programlisting></para>
+     </listitem>
+    </varlistentry>
+  <varlistentry>
+     <term>
       <literal>\sleep <replaceable>number</> [ us | ms | s ]</literal>
      </term>
  
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to