After Karel Moppel piece on pgbench scale/size conversion, it occured to
me that having this as an option would be nice.
https://www.cybertec-postgresql.com/en/a-formula-to-calculate-pgbench-scaling-factor-for-target-db-size/
Here is a attempt at extending --scale so that it can be given a size.
pgbench -i --scale=124G ...
The approximated database size is also shown in the end-of-run summary.
--
Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 3dd492c..093e1d4 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -49,7 +49,7 @@
<screen>
transaction type: <builtin: TPC-B (sort of)>
-scaling factor: 10
+scaling factor: 10 (about 149 MiB)
query mode: simple
number of clients: 10
number of threads: 1
@@ -282,6 +282,16 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
in order to be big enough to hold the range of account
identifiers.
</para>
+
+ <para>
+ The scale can also be specified as an expected database size by
+ specifying a unit, assuming around 15 MiB size increments per scale unit.
+ For instance, <literal>-s 5G</literal> will approximate the scale required
+ for a 5 GiB database.
+ Allowed units are IEC 1024 powers (<literal>KiB MiB GiB TiB PiB</literal>),
+ SI 1000 powers (<literal>kB MB GB TB PB</literal>) and for convenience
+ simple size prefixes <literal>K M G T P</literal> are aliases for the IEC binary sizes.
+ </para>
</listitem>
</varlistentry>
@@ -1600,7 +1610,7 @@ END;
<screen>
starting vacuum...end.
transaction type: <builtin: TPC-B (sort of)>
-scaling factor: 1
+scaling factor: 1 (about 15 MiB)
query mode: simple
number of clients: 10
number of threads: 1
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index d420942..38eb13d 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -524,7 +524,7 @@ usage(void)
" -F, --fillfactor=NUM set fill factor\n"
" -n, --no-vacuum do not run VACUUM during initialization\n"
" -q, --quiet quiet logging (one message each 5 seconds)\n"
- " -s, --scale=NUM scaling factor\n"
+ " -s, --scale=NUM|SIZE scaling factor or expected database size\n"
" --foreign-keys create foreign key constraints between tables\n"
" --index-tablespace=TABLESPACE\n"
" create indexes in the specified tablespace\n"
@@ -552,7 +552,7 @@ usage(void)
" -P, --progress=NUM show thread progress report every NUM seconds\n"
" -r, --report-latencies report average latency per command\n"
" -R, --rate=NUM target rate in transactions per second\n"
- " -s, --scale=NUM report this scale factor in output\n"
+ " -s, --scale=NUM|SIZE report this scale factor in output\n"
" -t, --transactions=NUM number of transactions each client runs (default: 10)\n"
" -T, --time=NUM duration of benchmark test in seconds\n"
" -v, --vacuum-all vacuum all four standard tables before tests\n"
@@ -668,6 +668,87 @@ gotdigits:
return ((sign < 0) ? -result : result);
}
+/* return a size in bytes, or exit with an error message
+ */
+static int64
+parse_size(char * s, const char * error_message)
+{
+ static struct { char *name; int64 multiplier; }
+ UNITS[17] = {
+ /* IEC units */
+ { "KiB", 1024 },
+ { "MiB", 1024 * 1024 },
+ { "GiB", 1024 * 1024 * 1024 },
+ { "TiB", (int64) 1024 * 1024 * 1024 * 1024 },
+ { "PiB", (int64) 1024 * 1024 * 1024 * 1024 * 1024 },
+ /* SI units */
+ { "kB", 1000 },
+ { "MB", 1000 * 1000 },
+ { "GB", 1000 * 1000 * 1000 },
+ { "TB", (int64) 1000 * 1000 * 1000 * 1000 },
+ { "PB", (int64) 1000 * 1000 * 1000 * 1000 * 1000 },
+ /* common/convenient JEDEC usage */
+ { "KB", 1024 },
+ { "K", 1024 },
+ { "M", 1024 * 1024 },
+ { "G", 1024 * 1024 * 1024 },
+ { "T", (int64) 1024 * 1024 * 1024 * 1024 },
+ { "P", (int64) 1024 * 1024 * 1024 * 1024 * 1024 },
+ /* unit */
+ { "B", 1 },
+ };
+
+ int len = strlen(s), last = -1, i;
+ int64 size;
+ char clast;
+
+ /* look for the unit */
+ for (i = 0; i < lengthof(UNITS); i++)
+ if (strcmp(s + len - strlen(UNITS[i].name), UNITS[i].name) == 0)
+ break;
+
+ /* found, or not */
+ if (i < lengthof(UNITS))
+ {
+ last = len - strlen(UNITS[i].name);
+ clast = s[last];
+ s[last] = '\0';
+ }
+ else /* assume bytes */
+ i = lengthof(UNITS) - 1;
+
+ if (!is_an_int(s))
+ {
+ fprintf(stderr, "invalid %s: \"%s\"\n", error_message, s);
+ exit(1);
+ }
+
+ size = strtoint64(s) * UNITS[i].multiplier;
+
+ if (last != -1)
+ s[last] = clast;
+
+ return size;
+}
+
+/* parse scale, returning at least 1 */
+static int
+parse_scale(char * s)
+{
+ int64 size = parse_size(s, "scaling factor");
+ /*
+ * formula from Kaarel Moppel linear regression on pg 10.1,
+ * which gives about 15 MiB per pgbench scale unit
+ */
+ int scale = (int) ceil(0.066888816 * size / (1024 * 1024) - 0.511799076);
+ if (scale <= 0)
+ {
+ fprintf(stderr, "scale %s too small, rounded to 1\n", s);
+ scale = 1;
+ }
+ return scale;
+}
+
/* random number generator: uniform distribution from min to max inclusive */
static int64
getrand(TState *thread, int64 min, int64 max)
@@ -4244,7 +4325,8 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
/* Report test parameters. */
printf("transaction type: %s\n",
num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
- printf("scaling factor: %d\n", scale);
+ /* scale to MiB evaluation must be consistent with parse_scale */
+ printf("scaling factor: %d (about %.0f MiB)\n", scale, 14.95 * scale);
printf("query mode: %s\n", QUERYMODE[querymode]);
printf("number of clients: %d\n", nclients);
printf("number of threads: %d\n", nthreads);
@@ -4560,12 +4642,7 @@ main(int argc, char **argv)
break;
case 's':
scale_given = true;
- scale = atoi(optarg);
- if (scale <= 0)
- {
- fprintf(stderr, "invalid scaling factor: \"%s\"\n", optarg);
- exit(1);
- }
+ scale = parse_scale(optarg);
break;
case 't':
benchmarking_option_set = true;
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index 6ea55f8..52d135c 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -44,6 +44,7 @@ my @options = (
[ 'bad #threads', '-j eleven', [qr{invalid number of threads: "eleven"}]
],
[ 'bad scale', '-i -s two', [qr{invalid scaling factor: "two"}] ],
+ [ 'bad scale size', '-i -s 2stuff', [qr{invalid scaling factor: "2stuff"}] ],
[ 'invalid #transactions',
'-t zil',
[qr{invalid number of transactions: "zil"}] ],