After Karel Moppel piece on pgbench scale/size conversion, it occured to me that having this as an option would be nice.

https://www.cybertec-postgresql.com/en/a-formula-to-calculate-pgbench-scaling-factor-for-target-db-size/

Here is a attempt at extending --scale so that it can be given a size.

  pgbench -i --scale=124G ...

The approximated database size is also shown in the end-of-run summary.

--
Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 3dd492c..093e1d4 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -49,7 +49,7 @@
 
 <screen>
 transaction type: &lt;builtin: TPC-B (sort of)&gt;
-scaling factor: 10
+scaling factor: 10 (about 149 MiB)
 query mode: simple
 number of clients: 10
 number of threads: 1
@@ -282,6 +282,16 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
         in order to be big enough to hold the range of account
         identifiers.
        </para>
+
+       <para>
+        The scale can also be specified as an expected database size by
+        specifying a unit, assuming around 15 MiB size increments per scale unit.
+        For instance, <literal>-s 5G</literal> will approximate the scale required
+        for a 5 GiB database.
+        Allowed units are IEC 1024 powers (<literal>KiB MiB GiB TiB PiB</literal>),
+        SI 1000 powers (<literal>kB MB GB TB PB</literal>) and for convenience
+        simple size prefixes <literal>K M G T P</literal> are aliases for the IEC binary sizes.
+       </para>
       </listitem>
      </varlistentry>
 
@@ -1600,7 +1610,7 @@ END;
 <screen>
 starting vacuum...end.
 transaction type: &lt;builtin: TPC-B (sort of)&gt;
-scaling factor: 1
+scaling factor: 1 (about 15 MiB)
 query mode: simple
 number of clients: 10
 number of threads: 1
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index d420942..38eb13d 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -524,7 +524,7 @@ usage(void)
 		   "  -F, --fillfactor=NUM     set fill factor\n"
 		   "  -n, --no-vacuum          do not run VACUUM during initialization\n"
 		   "  -q, --quiet              quiet logging (one message each 5 seconds)\n"
-		   "  -s, --scale=NUM          scaling factor\n"
+		   "  -s, --scale=NUM|SIZE     scaling factor or expected database size\n"
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
@@ -552,7 +552,7 @@ usage(void)
 		   "  -P, --progress=NUM       show thread progress report every NUM seconds\n"
 		   "  -r, --report-latencies   report average latency per command\n"
 		   "  -R, --rate=NUM           target rate in transactions per second\n"
-		   "  -s, --scale=NUM          report this scale factor in output\n"
+		   "  -s, --scale=NUM|SIZE     report this scale factor in output\n"
 		   "  -t, --transactions=NUM   number of transactions each client runs (default: 10)\n"
 		   "  -T, --time=NUM           duration of benchmark test in seconds\n"
 		   "  -v, --vacuum-all         vacuum all four standard tables before tests\n"
@@ -668,6 +668,87 @@ gotdigits:
 	return ((sign < 0) ? -result : result);
 }
 
+/* return a size in bytes, or exit with an error message
+ */
+static int64
+parse_size(char * s, const char * error_message)
+{
+	static struct { char *name; int64 multiplier; }
+		UNITS[17] = {
+			/* IEC units */
+			{ "KiB", 1024 },
+			{ "MiB", 1024 * 1024 },
+			{ "GiB", 1024 * 1024 * 1024 },
+			{ "TiB", (int64) 1024 * 1024 * 1024 * 1024 },
+			{ "PiB", (int64) 1024 * 1024 * 1024 * 1024 * 1024 },
+			/* SI units */
+			{ "kB", 1000 },
+			{ "MB", 1000 * 1000 },
+			{ "GB", 1000 * 1000 * 1000 },
+			{ "TB", (int64) 1000 * 1000 * 1000 * 1000 },
+			{ "PB", (int64) 1000 * 1000 * 1000 * 1000 * 1000 },
+			/* common/convenient JEDEC usage */
+			{ "KB", 1024 },
+			{ "K", 1024 },
+			{ "M", 1024 * 1024 },
+			{ "G", 1024 * 1024 * 1024 },
+			{ "T", (int64) 1024 * 1024 * 1024 * 1024 },
+			{ "P", (int64) 1024 * 1024 * 1024 * 1024 * 1024 },
+			/* unit */
+			{ "B", 1 },
+	};
+
+	int		len = strlen(s), last = -1, i;
+	int64	size;
+	char	clast;
+
+	/* look for the unit */
+	for (i = 0; i < lengthof(UNITS); i++)
+		if (strcmp(s + len - strlen(UNITS[i].name), UNITS[i].name) == 0)
+			break;
+
+	/* found, or not */
+	if (i < lengthof(UNITS))
+	{
+		last = len - strlen(UNITS[i].name);
+		clast = s[last];
+		s[last] = '\0';
+	}
+	else /* assume bytes */
+		i = lengthof(UNITS) - 1;
+
+	if (!is_an_int(s))
+	{
+		fprintf(stderr, "invalid %s: \"%s\"\n", error_message, s);
+		exit(1);
+	}
+
+	size = strtoint64(s) * UNITS[i].multiplier;
+
+	if (last != -1)
+		s[last] = clast;
+
+	return size;
+}
+
+/* parse scale, returning at least 1 */
+static int
+parse_scale(char * s)
+{
+	int64 size = parse_size(s, "scaling factor");
+	/*
+	 * formula from Kaarel Moppel linear regression on pg 10.1,
+	 * which gives about 15 MiB per pgbench scale unit
+	 */
+	int scale = (int) ceil(0.066888816 * size / (1024 * 1024) - 0.511799076);
+	if (scale <= 0)
+	{
+		fprintf(stderr, "scale %s too small, rounded to 1\n", s);
+		scale = 1;
+	}
+	return scale;
+}
+
 /* random number generator: uniform distribution from min to max inclusive */
 static int64
 getrand(TState *thread, int64 min, int64 max)
@@ -4244,7 +4325,8 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
 	/* Report test parameters. */
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
-	printf("scaling factor: %d\n", scale);
+	/* scale to MiB evaluation must be consistent with parse_scale */
+	printf("scaling factor: %d (about %.0f MiB)\n", scale, 14.95 * scale);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -4560,12 +4642,7 @@ main(int argc, char **argv)
 				break;
 			case 's':
 				scale_given = true;
-				scale = atoi(optarg);
-				if (scale <= 0)
-				{
-					fprintf(stderr, "invalid scaling factor: \"%s\"\n", optarg);
-					exit(1);
-				}
+				scale = parse_scale(optarg);
 				break;
 			case 't':
 				benchmarking_option_set = true;
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index 6ea55f8..52d135c 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -44,6 +44,7 @@ my @options = (
 	[   'bad #threads', '-j eleven', [qr{invalid number of threads: "eleven"}]
 	],
 	[ 'bad scale', '-i -s two', [qr{invalid scaling factor: "two"}] ],
+	[ 'bad scale size', '-i -s 2stuff', [qr{invalid scaling factor: "2stuff"}] ],
 	[   'invalid #transactions',
 		'-t zil',
 		[qr{invalid number of transactions: "zil"}] ],

Reply via email to