Sorry, previos attached patch has small bug. Please use latest one.
> 134 - return min + (int64) (max - min + 1) * rand; > 134 + return min + (int64)((max - min + 1) * rand); Regards, -- Mitsumasa KONDO NTT Open Source Software Center
*** a/contrib/pgbench/pgbench.c --- b/contrib/pgbench/pgbench.c *************** *** 176,181 **** int progress_nthreads = 0; /* number of threads for progress report */ --- 176,183 ---- bool is_connect; /* establish connection for each transaction */ bool is_latencies; /* report per-command latencies */ int main_pid; /* main process id used in log filename */ + double stdev_threshold = 5; /* standard deviation threshold */ + bool gaussian_option = false; /* use gaussian distribution random generator */ char *pghost = ""; char *pgport = ""; *************** *** 338,346 **** static char *select_only = { --- 340,390 ---- "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" }; + /* --gaussian case */ + static char *gaussian_tpc_b = { + "\\set nbranches " CppAsString2(nbranches) " * :scale\n" + "\\set ntellers " CppAsString2(ntellers) " * :scale\n" + "\\set naccounts " CppAsString2(naccounts) " * :scale\n" + "\\setgaussian aid 1 :naccounts :stdev_threshold\n" + "\\setrandom bid 1 :nbranches\n" + "\\setrandom tid 1 :ntellers\n" + "\\setrandom delta -5000 5000\n" + "BEGIN;\n" + "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n" + "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" + "UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;\n" + "UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;\n" + "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n" + "END;\n" + }; + + /* --gaussian with -N case */ + static char *gaussian_simple_update = { + "\\set nbranches " CppAsString2(nbranches) " * :scale\n" + "\\set ntellers " CppAsString2(ntellers) " * :scale\n" + "\\set naccounts " CppAsString2(naccounts) " * :scale\n" + "\\setgaussian aid 1 :naccounts :stdev_threshold\n" + "\\setrandom bid 1 :nbranches\n" + "\\setrandom tid 1 :ntellers\n" + "\\setrandom delta -5000 5000\n" + "BEGIN;\n" + "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n" + "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" + "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n" + "END;\n" + }; + + /* --gaussian with -S case */ + static char *gaussian_select_only = { + "\\set naccounts " CppAsString2(naccounts) " * :scale\n" + "\\setgaussian aid 1 :naccounts :stdev_threshold\n" + "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" + }; + /* Function prototypes */ static void setalarm(int seconds); static void *threadRun(void *arg); + static inline double sqrtd(const double x); static void usage(void) *************** *** 381,386 **** usage(void) --- 425,431 ---- " -v, --vacuum-all vacuum all four standard tables before tests\n" " --aggregate-interval=NUM aggregate data over NUM seconds\n" " --sampling-rate=NUM fraction of transactions to log (e.g. 0.01 for 1%%)\n" + " --gaussian=NUM gaussian distribution with NUM standard deviation threshold\n" "\nCommon options:\n" " -d, --debug print debugging output\n" " -h, --host=HOSTNAME database server host or socket directory\n" *************** *** 477,482 **** getrand(TState *thread, int64 min, int64 max) --- 522,597 ---- return min + (int64) ((max - min + 1) * pg_erand48(thread->random_state)); } + /* random number generator: gaussian distribution from min to max inclusive */ + static int64 + getGaussianrand(TState *thread, int64 min, int64 max, double stdev_threshold) + { + double stdev; + double rand; + static double rand1; + static double rand2; + static double var_sqrt; + static bool reuse = false; + + /* + * Get user specified random number(-stdev_threshold < stdev <= stdev_threshold) + * in this loop. This loop is executed until appeared ranged number we want. + * However, this loop could not almost go on, because min stdev_threshold is 2 + * then the possibility of retry-loop is under 4 percent. And possibility of + * re-retry-loop is under 1.6 percent. And it doesn't happen frequentry even if + * we also think about the cycle of the trigonometric function. + */ + do + { + /* reuse pre calculation result as possible */ + if(!reuse) + { + /* + * pg_erand48 generates [0,1) random number. However rand1 + * needs (0,1) random number because log(0) cannot calculate. + * And rand2 also needs (0,1) random number in strictly. But + * normalization cost is high and we can substitute (0,1] at + * rand1 and [0,1) at rand2, so we use approximate calculation. + */ + rand1 = 1.0 - pg_erand48(thread->random_state); + rand2 = pg_erand48(thread->random_state); + + /* Box-Muller transform */ + var_sqrt = sqrtd(-2.0 * log(rand1)); + stdev = var_sqrt * sin(2.0 * M_PI * rand2); + reuse = true; + } + else + { + stdev = var_sqrt * cos(2.0 * M_PI * rand2); + reuse = false; + } + } while (stdev < -stdev_threshold || stdev >= stdev_threshold); + + /* normalization to [0,1) */ + rand = (stdev + stdev_threshold) / (stdev_threshold * 2.0); + + /* return int64 random number within between min and max */ + return min + (int64)((max - min + 1) * rand); + } + + /* + * fast sqrt algorithm: reference from Fast inverse square root algorithms. + */ + static inline double + sqrtd(const double x) + { + double x_half = 0.5 * x; + long long int tmp = 0x5FE6EB50C7B537AAl - ( *(long long int*)&x >> 1); + double x_result = *(double*)&tmp; + + x_result *= (1.5 - (x_half * x_result * x_result)); + /* retry this calculation, it becomes higher precision at sqrt */ + x_result *= (1.5 - (x_half * x_result * x_result)); + + return x_result * x; + } + /* call PQexec() and exit() on failure */ static void executeStatement(PGconn *con, const char *sql) *************** *** 1391,1396 **** top: --- 1506,1601 ---- st->listen = 1; } + else if (pg_strcasecmp(argv[0], "setgaussian") == 0) + { + char *var; + char *endptr; + int64 min; + int64 max; + double stdev_threshold; + char res[64]; + + if (*argv[2] == ':') + { + if((var = getVariable(st, argv[2] + 1)) == NULL) + { + fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[2]); + st->ecnt++; + return true; + } + min = strtoint64(var); + } + else + min = strtoint64(argv[2]); + #ifdef NOT_USED + if (min < 0) + { + fprintf(stderr, "%s: invalid minimum number %d\n", argv[0], min); + st->ecnt++; + return; + } + #endif + if (*argv[3] == ':') + { + if((var = getVariable(st, argv[3] + 1)) == NULL) + { + fprintf(stderr, "%s: invalid maximum number %s\n", argv[0], argv[3]); + st->ecnt++; + return true; + } + max = strtoint64(var); + } + else + max = strtoint64(argv[3]); + + /* check if min and max are appropriate value */ + if(max < min) + { + fprintf(stderr, "%s: maximum is less than minimum\n", argv[0]); + st->ecnt++; + return true; + } + + /* for not overflowing when generating random number */ + if(max - min < 0 || (max - min) + 1 < 0) + { + fprintf(stderr, "%s: range too large\n", argv[0]); + st->ecnt++; + return true; + } + + if(*argv[4] == ':') + { + if((var = getVariable(st, argv[4] + 1)) == NULL) + { + fprintf(stderr, "%s: invalid gaussian threshold number %s\n", argv[0], argv[4]); + st->ecnt++; + return true; + } + stdev_threshold = strtod(var, NULL); + } + else + stdev_threshold = strtod(argv[4], &endptr); + + if ( stdev_threshold < 2) + { + fprintf(stderr, "%s: gaussian threshold must be more than 2\n,", argv[4]); + st->ecnt++; + return true; + } + #ifdef DEBUG + printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getGaussianrand(thread, min, max, stdev_threshold)); + #endif + snprintf(res, sizeof(res), INT64_FORMAT, getGaussianrand(thread, min, max, stdev_threshold)); + + if(!putVariable(st, argv[0], argv[1], res)) + { + st->ecnt++; + return true; + } + + st->listen = 1; + } else if (pg_strcasecmp(argv[0], "set") == 0) { char *var; *************** *** 1915,1920 **** process_commands(char *buf) --- 2120,2137 ---- fprintf(stderr, "%s: extra argument \"%s\" ignored\n", my_commands->argv[0], my_commands->argv[j]); } + else if (pg_strcasecmp(my_commands->argv[0], "setgaussian") == 0) + { + if (my_commands->argc < 5) + { + fprintf(stderr, "%s: missing argument\n", my_commands->argv[0]); + exit(1); + } + + for (j = 5; j < my_commands->argc; j++) + fprintf(stderr, "%s: extra argument \"%s\" ignored\n", + my_commands->argv[0], my_commands->argv[j]); + } else if (pg_strcasecmp(my_commands->argv[0], "set") == 0) { if (my_commands->argc < 3) *************** *** 2188,2203 **** printResults(int ttype, int normal_xacts, int nclients, (INSTR_TIME_GET_DOUBLE(conn_total_time) / nthreads)); if (ttype == 0) ! s = "TPC-B (sort of)"; else if (ttype == 2) ! s = "Update only pgbench_accounts"; else if (ttype == 1) ! s = "SELECT only"; else s = "Custom query"; printf("transaction type: %s\n", s); printf("scaling factor: %d\n", scale); printf("query mode: %s\n", QUERYMODE[querymode]); printf("number of clients: %d\n", nclients); printf("number of threads: %d\n", nthreads); --- 2405,2447 ---- (INSTR_TIME_GET_DOUBLE(conn_total_time) / nthreads)); if (ttype == 0) ! { ! if(gaussian_option) ! s = "TPC-B (sort of)"; ! else ! s = "Gaussian distributed TPC-B (sort of)"; ! } else if (ttype == 2) ! { ! if(gaussian_option) ! s = "Gaussian distributed update only pgbench_accounts"; ! else ! s = "Update only pgbench_accounts"; ! } else if (ttype == 1) ! { ! if(gaussian_option) ! s = "Gaussian distributed SELECT only"; ! else ! s = "SELECT only"; ! } else s = "Custom query"; printf("transaction type: %s\n", s); printf("scaling factor: %d\n", scale); + + /* output in only gaussian distributed benchmark */ + if(gaussian_option) + { + printf("standard deviation threshold: %.5f\n", stdev_threshold); + printf("access probability of top 20%%, 10%% and 5%% records: %.5f %.5f %.5f\n", + (double) ((erf (stdev_threshold * 0.2 / sqrt(2.0))) / (erf (stdev_threshold / sqrt(2.0)))), + (double) ((erf (stdev_threshold * 0.1 / sqrt(2.0))) / (erf (stdev_threshold / sqrt(2.0)))), + (double) ((erf (stdev_threshold * 0.05 / sqrt(2.0))) / (erf (stdev_threshold / sqrt(2.0)))) + ); + } + printf("query mode: %s\n", QUERYMODE[querymode]); printf("number of clients: %d\n", nclients); printf("number of threads: %d\n", nthreads); *************** *** 2327,2332 **** main(int argc, char **argv) --- 2571,2577 ---- {"unlogged-tables", no_argument, &unlogged_tables, 1}, {"sampling-rate", required_argument, NULL, 4}, {"aggregate-interval", required_argument, NULL, 5}, + {"gaussian", required_argument, NULL, 6}, {"rate", required_argument, NULL, 'R'}, {NULL, 0, NULL, 0} }; *************** *** 2606,2611 **** main(int argc, char **argv) --- 2851,2865 ---- } #endif break; + case 6: + gaussian_option = true; + stdev_threshold = atof(optarg); + if(stdev_threshold < 2) + { + fprintf(stderr, "--gaussian=NUM must be more than 2: %f\n", stdev_threshold); + exit(1); + } + break; default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); *************** *** 2803,2808 **** main(int argc, char **argv) --- 3057,3073 ---- } } + /* set :stdev_threshold variable */ + if(getVariable(&state[0], "stdev_threshold") == NULL) + { + snprintf(val, sizeof(val), "%lf", stdev_threshold); + for (i = 0; i < nclients; i++) + { + if (!putVariable(&state[i], "startup", "stdev_threshold", val)) + exit(1); + } + } + if (!is_no_vacuum) { fprintf(stderr, "starting vacuum..."); *************** *** 2828,2844 **** main(int argc, char **argv) switch (ttype) { case 0: ! sql_files[0] = process_builtin(tpc_b); num_files = 1; break; case 1: ! sql_files[0] = process_builtin(select_only); num_files = 1; break; case 2: ! sql_files[0] = process_builtin(simple_update); num_files = 1; break; --- 3093,3118 ---- switch (ttype) { case 0: ! if(gaussian_option) ! sql_files[0] = process_builtin(gaussian_tpc_b); ! else ! sql_files[0] = process_builtin(tpc_b); num_files = 1; break; case 1: ! if(gaussian_option) ! sql_files[0] = process_builtin(gaussian_select_only); ! else ! sql_files[0] = process_builtin(select_only); num_files = 1; break; case 2: ! if(gaussian_option) ! sql_files[0] = process_builtin(simple_update); ! else ! sql_files[0] = process_builtin(gaussian_simple_update); num_files = 1; break; *** a/doc/src/sgml/pgbench.sgml --- b/doc/src/sgml/pgbench.sgml *************** *** 320,325 **** pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</> --- 320,342 ---- </varlistentry> <varlistentry> + <term><option>--gaussian</option><replaceable>standard deviation</></term> + <listitem> + <para> + Gaussian distribution pgbench option. Need the standard deviation threshold. + Standard deviation threshold can control distribution of access patern that + is used by aid in pgbench_accounts table. If we set larger standard deviation + threshold, pgbench access patern limited more specific records. On the other + hands, if you set smaller standard deviation, pgbench access patern will be + more gently distribution. Standard deviation threshold must be higher than 2. + This rule is needed for realizing realistic calculation costs. If you add + '-N' or '-S' options, you can execute gaussian distribution pgbench in these + benchmarks. + </para> + </listitem> + </varlistentry> + + <varlistentry> <term><option>-j</option> <replaceable>threads</></term> <term><option>--jobs=</option><replaceable>threads</></term> <listitem> *************** *** 770,775 **** pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</> --- 787,818 ---- <varlistentry> <term> + <literal>\setgaussian <replaceable>varname</> <replaceable>min</> <replaceable>max</> <replaceable> + standard deviation threshold</literal> + </term> + + <listitem> + <para> + Sets variable <replaceable>varname</> to a gaussian random integer value + between the limits <replaceable>min</> and <replaceable>max</> inclusive. + Each limit can be either an integer constant or a + <literal>:</><replaceable>variablename</> reference to a variable + having an integer value. Standard deviation threshold controls + distribution of access patern. If we set larger value in standard + deviation threshold, more frequentry access patern will be more + limited ranges. Min standard deviation threshold is 2. This rule + needs for realizing realistic calculation costs. + </para> + + <para> + Example: + <programlisting> + \setgaussian aid 1 :naccounts 5 + </programlisting></para> + </listitem> + </varlistentry> + <varlistentry> + <term> <literal>\sleep <replaceable>number</> [ us | ms | s ]</literal> </term>
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers