From 62e8fec09bd07184f351f386b81bfaef34e10bd9 Mon Sep 17 00:00:00 2001
From: Amit Kapila <akapila@postgresql.org>
Date: Tue, 1 Oct 2019 09:50:26 +0530
Subject: [PATCH] pgbench: add --partitions and --partition-method options.

These new options allow users to partition the pgbench_accounts table by
specifying the number of partitions and partitioning method.  The values
allowed for partitioning method are range and hash.

This feature allows users to measure the overhead of partitioning if any.

Author: Fabien COELHO
Reviewed-by: Amit Kapila, Amit Langote, Dilip Kumar, Asif Rehman, and
Alvaro Herrera
Discussion: https://postgr.es/m/alpine.DEB.2.21.1907230826190.7008@lancre
---
 doc/src/sgml/ref/pgbench.sgml                |  25 +++
 src/bin/pgbench/pgbench.c                    | 306 +++++++++++++++++++++++----
 src/bin/pgbench/t/001_pgbench_with_server.pl |  20 +-
 src/bin/pgbench/t/002_pgbench_no_server.pl   |   7 +
 4 files changed, 320 insertions(+), 38 deletions(-)

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index c857aa3..e3a0abb 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -307,6 +307,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
      </varlistentry>
 
      <varlistentry>
+      <term><option>--partitions=<replaceable>NUM</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NUM</replaceable> partitions of nearly equal size for
+        the scaled number of accounts.
+        Default is <literal>0</literal>, meaning no partitioning.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
+      <listitem>
+       <para>
+        Create a partitioned <literal>pgbench_accounts</literal> table with
+        <replaceable>NAME</replaceable> method.
+        Expected values are <literal>range</literal> or <literal>hash</literal>.
+        This option requires that <option>--partitions</option> is set to non-zero.
+        If unspecified, default is <literal>range</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
       <listitem>
        <para>
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ed7652b..52ea50b 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -186,6 +186,23 @@ int64		latency_limit = 0;
 char	   *tablespace = NULL;
 char	   *index_tablespace = NULL;
 
+/*
+ * Number of "pgbench_accounts" partitions.  0 is the default and means no
+ * partitioning.
+ */
+static int	partitions = 0;
+
+/* partitioning strategy for "pgbench_accounts" */
+typedef enum
+{
+	PART_NONE,					/* no partitioning */
+	PART_RANGE,					/* range partitioning */
+	PART_HASH					/* hash partitioning */
+}			partition_method_t;
+
+static partition_method_t partition_method = PART_NONE;
+static const char *PARTITION_METHOD[] = {"none", "range", "hash"};
+
 /* random seed used to initialize base_random_sequence */
 int64		random_seed = -1;
 
@@ -582,6 +599,7 @@ static void doLog(TState *thread, CState *st,
 				  StatsData *agg, bool skipped, double latency, double lag);
 static void processXactStats(TState *thread, CState *st, instr_time *now,
 							 bool skipped, StatsData *agg);
+static void append_fillfactor(char *opts, int len);
 static void addScript(ParsedScript script);
 static void *threadRun(void *arg);
 static void finishCon(CState *st);
@@ -617,6 +635,9 @@ usage(void)
 		   "  --foreign-keys           create foreign key constraints between tables\n"
 		   "  --index-tablespace=TABLESPACE\n"
 		   "                           create indexes in the specified tablespace\n"
+		   "  --partitions=NUM         partition pgbench_accounts in NUM parts (default: 0)\n"
+		   "  --partition-method=(range|hash)\n"
+		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -3602,6 +3623,77 @@ initDropTables(PGconn *con)
 }
 
 /*
+ * Create "pgbench_accounts" partitions if needed.
+ *
+ * This is the larger table of pgbench default tpc-b like schema
+ * with a known size, so we choose to partition it.
+ */
+static void
+createPartitions(PGconn *con)
+{
+	char		ff[64];
+
+	ff[0] = '\0';
+
+	/*
+	 * Per ddlinfo in initCreateTables, fillfactor is needed on table
+	 * pgbench_accounts.
+	 */
+	append_fillfactor(ff, sizeof(ff));
+
+	/* we must have to create some partitions */
+	Assert(partitions > 0);
+
+	fprintf(stderr, "creating %d partitions...\n", partitions);
+
+	for (int p = 1; p <= partitions; p++)
+	{
+		char		query[256];
+
+		if (partition_method == PART_RANGE)
+		{
+			int64		part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
+			char		minvalue[32],
+						maxvalue[32];
+
+			/*
+			 * For RANGE, we use open-ended partitions at the beginning and
+			 * end to allow any valid value for the primary key.  Although the
+			 * actual minimum and maximum values can be derived from the
+			 * scale, it is more generic and the performance is better.
+			 */
+			if (p == 1)
+				sprintf(minvalue, "minvalue");
+			else
+				sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
+
+			if (p < partitions)
+				sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
+			else
+				sprintf(maxvalue, "maxvalue");
+
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values from (%s) to (%s)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 minvalue, maxvalue, ff);
+		}
+		else if (partition_method == PART_HASH)
+			snprintf(query, sizeof(query),
+					 "create%s table pgbench_accounts_%d\n"
+					 "  partition of pgbench_accounts\n"
+					 "  for values with (modulus %d, remainder %d)%s\n",
+					 unlogged_tables ? " unlogged" : "", p,
+					 partitions, p - 1, ff);
+		else					/* cannot get there */
+			Assert(0);
+
+		executeStatement(con, query);
+	}
+}
+
+/*
  * Create pgbench's standard tables
  */
 static void
@@ -3664,9 +3756,15 @@ initCreateTables(PGconn *con)
 
 		/* Construct new create table statement. */
 		opts[0] = '\0';
-		if (ddl->declare_fillfactor)
+
+		/* Partition pgbench_accounts table */
+		if (partition_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0)
 			snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
-					 " with (fillfactor=%d)", fillfactor);
+					 " partition by %s (aid)", PARTITION_METHOD[partition_method]);
+		else if (ddl->declare_fillfactor)
+			/* fillfactor is only expected on actual tables */
+			append_fillfactor(opts, sizeof(opts));
+
 		if (tablespace != NULL)
 		{
 			char	   *escape_tablespace;
@@ -3686,6 +3784,21 @@ initCreateTables(PGconn *con)
 
 		executeStatement(con, buffer);
 	}
+
+	if (partition_method != PART_NONE)
+		createPartitions(con);
+}
+
+/*
+ * add fillfactor percent option.
+ *
+ * XXX - As default is 100, it could be removed in this case.
+ */
+static void
+append_fillfactor(char *opts, int len)
+{
+	snprintf(opts + strlen(opts), len - strlen(opts),
+			 " with (fillfactor=%d)", fillfactor);
 }
 
 /*
@@ -4011,6 +4124,121 @@ runInitSteps(const char *initialize_steps)
 }
 
 /*
+ * Extract pgbench table informations into global variables scale,
+ * partition_method and partitions.
+ */
+static void
+GetTableInfo(PGconn *con, bool scale_given)
+{
+	PGresult   *res;
+
+	/*
+	 * get the scaling factor that should be same as count(*) from
+	 * pgbench_branches if this is not a custom query
+	 */
+	res = PQexec(con, "select count(*) from pgbench_branches");
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		char	   *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
+
+		fprintf(stderr, "%s", PQerrorMessage(con));
+		if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
+		{
+			fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+		}
+
+		exit(1);
+	}
+	scale = atoi(PQgetvalue(res, 0, 0));
+	if (scale < 0)
+	{
+		fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
+				PQgetvalue(res, 0, 0));
+		exit(1);
+	}
+	PQclear(res);
+
+	/* warn if we override user-given -s switch */
+	if (scale_given)
+		fprintf(stderr,
+				"scale option ignored, using count from pgbench_branches table (%d)\n",
+				scale);
+
+	/*
+	 * Get the partition information for the first "pgbench_accounts" table
+	 * found in search_path.
+	 *
+	 * The result is empty if no "pgbench_accounts" is found.
+	 *
+	 * Otherwise, it always returns one row even if the table is not
+	 * partitioned (in which case the partition strategy is NULL).
+	 *
+	 * The number of partitions can be 0 even for partitioned tables, if no
+	 * partition is attached.
+	 *
+	 * We assume no partitioning on any failure, so as to avoid failing on an
+	 * old version without "pg_partitioned_table".
+	 */
+	res = PQexec(con,
+				 "select o.n, p.partstrat, pg_catalog.count(i.inhparent) "
+				 "from pg_catalog.pg_class as c "
+				 "join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
+				 "cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
+				 "left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
+				 "left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
+				 "where c.relname = 'pgbench_accounts' and o.n is not null "
+				 "group by 1, 2 "
+				 "order by 1 asc "
+				 "limit 1");
+
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		/* probably an older version, coldly assume no partitioning */
+		partition_method = PART_NONE;
+		partitions = 0;
+	}
+	else if (PQntuples(res) == 0)
+	{
+		/*
+		 * This case is unlikely as pgbench already found "pgbench_branches"
+		 * above to compute the scale.
+		 */
+		fprintf(stderr,
+				"no pgbench_accounts table found in search_path\n"
+				"Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
+		exit(1);
+	}
+	else						/* PQntupes(res) == 1 */
+	{
+		/* normal case, extract partition information */
+		if (PQgetisnull(res, 0, 1))
+			partition_method = PART_NONE;
+		else
+		{
+			char	   *ps = PQgetvalue(res, 0, 1);
+
+			/* column must be there */
+			Assert(ps != NULL);
+
+			if (strcmp(ps, "r") == 0)
+				partition_method = PART_RANGE;
+			else if (strcmp(ps, "h") == 0)
+				partition_method = PART_HASH;
+			else
+			{
+				/* possibly a newer version with new partition method */
+				fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
+				exit(1);
+			}
+		}
+
+		partitions = atoi(PQgetvalue(res, 0, 2));
+	}
+
+	PQclear(res);
+}
+
+/*
  * Replace :param with $n throughout the command's SQL text, which
  * is a modifiable string in cmd->lines.
  */
@@ -4919,6 +5147,10 @@ printResults(StatsData *total, instr_time total_time,
 	printf("transaction type: %s\n",
 		   num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
 	printf("scaling factor: %d\n", scale);
+	/* only print partitioning information if some partitioning was detected */
+	if (partition_method != PART_NONE)
+		printf("partition method: %s\npartitions: %d\n",
+			   PARTITION_METHOD[partition_method], partitions);
 	printf("query mode: %s\n", QUERYMODE[querymode]);
 	printf("number of clients: %d\n", nclients);
 	printf("number of threads: %d\n", nthreads);
@@ -5126,6 +5358,8 @@ main(int argc, char **argv)
 		{"foreign-keys", no_argument, NULL, 8},
 		{"random-seed", required_argument, NULL, 9},
 		{"show-script", required_argument, NULL, 10},
+		{"partitions", required_argument, NULL, 11},
+		{"partition-method", required_argument, NULL, 12},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -5160,7 +5394,6 @@ main(int argc, char **argv)
 #endif
 
 	PGconn	   *con;
-	PGresult   *res;
 	char	   *env;
 
 	int			exit_code = 0;
@@ -5486,6 +5719,29 @@ main(int argc, char **argv)
 					exit(0);
 				}
 				break;
+			case 11:			/* partitions */
+				initialization_option_set = true;
+				partitions = atoi(optarg);
+				if (partitions < 0)
+				{
+					fprintf(stderr, "invalid number of partitions: \"%s\"\n",
+							optarg);
+					exit(1);
+				}
+				break;
+			case 12:			/* partition-method */
+				initialization_option_set = true;
+				if (pg_strcasecmp(optarg, "range") == 0)
+					partition_method = PART_RANGE;
+				else if (pg_strcasecmp(optarg, "hash") == 0)
+					partition_method = PART_HASH;
+				else
+				{
+					fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
+							" got: \"%s\"\n", optarg);
+					exit(1);
+				}
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -5567,6 +5823,16 @@ main(int argc, char **argv)
 			exit(1);
 		}
 
+		if (partitions == 0 && partition_method != PART_NONE)
+		{
+			fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
+			exit(1);
+		}
+
+		/* set default method */
+		if (partitions > 0 && partition_method == PART_NONE)
+			partition_method = PART_RANGE;
+
 		if (initialize_steps == NULL)
 			initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
 
@@ -5724,39 +5990,7 @@ main(int argc, char **argv)
 	}
 
 	if (internal_script_used)
-	{
-		/*
-		 * get the scaling factor that should be same as count(*) from
-		 * pgbench_branches if this is not a custom query
-		 */
-		res = PQexec(con, "select count(*) from pgbench_branches");
-		if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		{
-			char	   *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
-
-			fprintf(stderr, "%s", PQerrorMessage(con));
-			if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
-			{
-				fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
-			}
-
-			exit(1);
-		}
-		scale = atoi(PQgetvalue(res, 0, 0));
-		if (scale < 0)
-		{
-			fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
-					PQgetvalue(res, 0, 0));
-			exit(1);
-		}
-		PQclear(res);
-
-		/* warn if we override user-given -s switch */
-		if (scale_given)
-			fprintf(stderr,
-					"scale option ignored, using count from pgbench_branches table (%d)\n",
-					scale);
-	}
+		GetTableInfo(con, scale_given);
 
 	/*
 	 * :scale variables normally get -s or database scale, but don't override
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f6..c441626 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -58,6 +58,19 @@ sub pgbench
 	return;
 }
 
+# tablespace for testing, because partitioned tables cannot use pg_default
+# explicitly and we want to test that table creation with tablespace works
+# for partitioned tables.
+my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
+mkdir $ts or die "cannot create directory $ts";
+# this takes care of WIN-specific path issues
+my $ets = TestLib::perl2host($ts);
+
+# the next commands will issue a syntax error if the path contains a "'"
+$node->safe_psql('postgres',
+       "CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
+);
+
 # Test concurrent OID generation via pg_enum_oid_index.  This indirectly
 # exercises LWLock and spinlock concurrency.
 my $labels = join ',', map { "'l$_'" } 1 .. 1000;
@@ -100,12 +113,13 @@ pgbench(
 
 # Again, with all possible options
 pgbench(
-	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
+	'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
 	0,
 	[qr{^$}i],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 2 partitions},
 		qr{vacuuming},
 		qr{creating primary keys},
 		qr{creating foreign keys},
@@ -116,12 +130,13 @@ pgbench(
 
 # Test interaction of --init-steps with legacy step-selection options
 pgbench(
-	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
+	'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
 	0,
 	[qr{^$}],
 	[
 		qr{dropping old tables},
 		qr{creating tables},
+		qr{creating 3 partitions},
 		qr{creating primary keys},
 		qr{.* of .* tuples \(.*\) done},
 		qr{creating foreign keys},
@@ -910,5 +925,6 @@ check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
 	qr{^\d \d{1,2} \d+ \d \d+ \d+$});
 
 # done
+$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
 $node->stop;
 done_testing();
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index f7fa184..1e9542a 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -157,6 +157,13 @@ my @options = (
 			qr{error while setting random seed from --random-seed option}
 		]
 	],
+	[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
+	[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
+	[
+		'partition method without partitioning',
+		'-i --partition-method=hash',
+		[ qr{partition-method requires greater than zero --partitions} ]
+	],
 
 	# logging sub-options
 	[
-- 
1.8.3.1

