Similar to 'pg_dump --binary-upgrade' [0], we can speed up pg_dump with
many sequences by gathering the required information in a single query
instead of two queries per sequence.  The attached patches are
works-in-progress, but here are the results I see on my machine for
'pg_dump --schema-only --binary-upgrade' with a million sequences:

          HEAD : 6m22.809s
           [0] : 1m54.701s
[0] + attached : 0m38.233s

I'm not sure I have all the details correct in 0003, and we might want to
separate the table into two tables which are only populated when the
relevant section is dumped.  Furthermore, the query in 0003 is a bit goofy
because it needs to dance around a bug reported elsewhere [1].

[0] https://postgr.es/m/20240418041712.GA3441570%40nathanxps13
[1] https://postgr.es/m/20240501005730.GA594666%40nathanxps13

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com
>From 32d1e994f31be002d4490f9239f819d6ee55cb36 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nat...@postgresql.org>
Date: Tue, 30 Apr 2024 14:41:36 -0500
Subject: [PATCH v1 1/3] parse sequence information

---
 src/bin/pg_dump/pg_dump.c | 64 ++++++++++++++++-----------------------
 1 file changed, 26 insertions(+), 38 deletions(-)

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 379debac24..b53c17aace 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -17566,18 +17566,16 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo)
 {
 	DumpOptions *dopt = fout->dopt;
 	PGresult   *res;
-	char	   *startv,
-			   *incby,
-			   *maxv,
-			   *minv,
-			   *cache,
-			   *seqtype;
+	char		seqtype[10];
 	bool		cycled;
 	bool		is_ascending;
 	int64		default_minv,
-				default_maxv;
-	char		bufm[32],
-				bufx[32];
+				default_maxv,
+				minv,
+				maxv,
+				startv,
+				incby,
+				cache;
 	PQExpBuffer query = createPQExpBuffer();
 	PQExpBuffer delqry = createPQExpBuffer();
 	char	   *qseqname;
@@ -17619,16 +17617,21 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo)
 						  PQntuples(res)),
 				 tbinfo->dobj.name, PQntuples(res));
 
-	seqtype = PQgetvalue(res, 0, 0);
-	startv = PQgetvalue(res, 0, 1);
-	incby = PQgetvalue(res, 0, 2);
-	maxv = PQgetvalue(res, 0, 3);
-	minv = PQgetvalue(res, 0, 4);
-	cache = PQgetvalue(res, 0, 5);
+	Assert(strlen(PQgetvalue(res, 0, 0)) < sizeof(seqtype));
+	strncpy(seqtype, PQgetvalue(res, 0, 0), sizeof(seqtype));
+	seqtype[sizeof(seqtype) - 1] = '\0';
+
+	startv = strtoi64(PQgetvalue(res, 0, 1), NULL, 10);
+	incby = strtoi64(PQgetvalue(res, 0, 2), NULL, 10);
+	maxv = strtoi64(PQgetvalue(res, 0, 3), NULL, 10);
+	minv = strtoi64(PQgetvalue(res, 0, 4), NULL, 10);
+	cache = strtoi64(PQgetvalue(res, 0, 5), NULL, 10);
 	cycled = (strcmp(PQgetvalue(res, 0, 6), "t") == 0);
 
+	PQclear(res);
+
 	/* Calculate default limits for a sequence of this type */
-	is_ascending = (incby[0] != '-');
+	is_ascending = (incby >= 0);
 	if (strcmp(seqtype, "smallint") == 0)
 	{
 		default_minv = is_ascending ? 1 : PG_INT16_MIN;
@@ -17650,19 +17653,6 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo)
 		default_minv = default_maxv = 0;	/* keep compiler quiet */
 	}
 
-	/*
-	 * 64-bit strtol() isn't very portable, so convert the limits to strings
-	 * and compare that way.
-	 */
-	snprintf(bufm, sizeof(bufm), INT64_FORMAT, default_minv);
-	snprintf(bufx, sizeof(bufx), INT64_FORMAT, default_maxv);
-
-	/* Don't print minv/maxv if they match the respective default limit */
-	if (strcmp(minv, bufm) == 0)
-		minv = NULL;
-	if (strcmp(maxv, bufx) == 0)
-		maxv = NULL;
-
 	/*
 	 * Identity sequences are not to be dropped separately.
 	 */
@@ -17714,22 +17704,22 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo)
 			appendPQExpBuffer(query, "    AS %s\n", seqtype);
 	}
 
-	appendPQExpBuffer(query, "    START WITH %s\n", startv);
+	appendPQExpBuffer(query, "    START WITH " INT64_FORMAT "\n", startv);
 
-	appendPQExpBuffer(query, "    INCREMENT BY %s\n", incby);
+	appendPQExpBuffer(query, "    INCREMENT BY " INT64_FORMAT "\n", incby);
 
-	if (minv)
-		appendPQExpBuffer(query, "    MINVALUE %s\n", minv);
+	if (minv != default_minv)
+		appendPQExpBuffer(query, "    MINVALUE " INT64_FORMAT "\n", minv);
 	else
 		appendPQExpBufferStr(query, "    NO MINVALUE\n");
 
-	if (maxv)
-		appendPQExpBuffer(query, "    MAXVALUE %s\n", maxv);
+	if (maxv != default_maxv)
+		appendPQExpBuffer(query, "    MAXVALUE " INT64_FORMAT "\n", maxv);
 	else
 		appendPQExpBufferStr(query, "    NO MAXVALUE\n");
 
 	appendPQExpBuffer(query,
-					  "    CACHE %s%s",
+					  "    CACHE " INT64_FORMAT "%s",
 					  cache, (cycled ? "\n    CYCLE" : ""));
 
 	if (tbinfo->is_identity_sequence)
@@ -17816,8 +17806,6 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo)
 					 tbinfo->dobj.namespace->dobj.name, tbinfo->rolname,
 					 tbinfo->dobj.catId, 0, tbinfo->dobj.dumpId);
 
-	PQclear(res);
-
 	destroyPQExpBuffer(query);
 	destroyPQExpBuffer(delqry);
 	free(qseqname);
-- 
2.25.1

>From a58f6c33029659aef0a2139293da5ba9fd7f178f Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nat...@postgresql.org>
Date: Tue, 30 Apr 2024 15:18:52 -0500
Subject: [PATCH v1 2/3] cache sequence information

---
 src/bin/pg_dump/pg_dump.c        | 143 +++++++++++++++++++++++++------
 src/tools/pgindent/typedefs.list |   1 +
 2 files changed, 116 insertions(+), 28 deletions(-)

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index b53c17aace..98cc2698ac 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -55,6 +55,7 @@
 #include "catalog/pg_trigger_d.h"
 #include "catalog/pg_type_d.h"
 #include "common/connect.h"
+#include "common/int.h"
 #include "common/relpath.h"
 #include "compress_io.h"
 #include "dumputils.h"
@@ -92,6 +93,18 @@ typedef struct
 	int			objsubid;		/* subobject (table column #) */
 } SecLabelItem;
 
+typedef struct
+{
+	Oid			oid;
+	char		seqtype[10];
+	bool		cycled;
+	int64		minv;
+	int64		maxv;
+	int64		startv;
+	int64		incby;
+	int64		cache;
+} SequenceItem;
+
 typedef enum OidOptions
 {
 	zeroIsError = 1,
@@ -157,6 +170,10 @@ static int	ncomments = 0;
 static SecLabelItem *seclabels = NULL;
 static int	nseclabels = 0;
 
+/* sorted table of sequences */
+static SequenceItem *sequences = NULL;
+static int	nsequences = 0;
+
 /*
  * The default number of rows per INSERT when
  * --inserts is specified without --rows-per-insert
@@ -254,6 +271,7 @@ static void dumpTable(Archive *fout, const TableInfo *tbinfo);
 static void dumpTableSchema(Archive *fout, const TableInfo *tbinfo);
 static void dumpTableAttach(Archive *fout, const TableAttachInfo *attachinfo);
 static void dumpAttrDef(Archive *fout, const AttrDefInfo *adinfo);
+static void collectSequences(Archive *fout);
 static void dumpSequence(Archive *fout, const TableInfo *tbinfo);
 static void dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo);
 static void dumpIndex(Archive *fout, const IndxInfo *indxinfo);
@@ -971,6 +989,9 @@ main(int argc, char **argv)
 	if (!dopt.no_security_labels)
 		collectSecLabels(fout);
 
+	/* Collect sequence information. */
+	collectSequences(fout);
+
 	/* Lastly, create dummy objects to represent the section boundaries */
 	boundaryObjs = createBoundaryObjects();
 
@@ -17557,6 +17578,63 @@ dumpTableConstraintComment(Archive *fout, const ConstraintInfo *coninfo)
 	free(qtabname);
 }
 
+/*
+ * bsearch() comparator for SequenceItem
+ */
+static int
+SequenceItemCmp(const void *p1, const void *p2)
+{
+	SequenceItem v1 = *((const SequenceItem *) p1);
+	SequenceItem v2 = *((const SequenceItem *) p2);
+
+	return pg_cmp_u32(v1.oid, v2.oid);
+}
+
+/*
+ * collectSequences
+ *
+ * Construct a table of sequence information.  This table is sorted by OID for
+ * speed in lookup.
+ */
+static void
+collectSequences(Archive *fout)
+{
+	PGresult   *res;
+	const char *query;
+
+	query = "SELECT seqrelid, format_type(seqtypid, NULL), "
+		"seqstart, seqincrement, "
+		"seqmax, seqmin, "
+		"seqcache, seqcycle "
+		"FROM pg_catalog.pg_sequence "
+		"ORDER BY seqrelid";
+
+	res = ExecuteSqlQuery(fout, query, PGRES_TUPLES_OK);
+
+	nsequences = PQntuples(res);
+	sequences = (SequenceItem *) pg_malloc(nsequences * sizeof(SequenceItem));
+
+	for (int i = 0; i < nsequences; i++)
+	{
+		size_t		seqtype_sz = sizeof(((SequenceItem *) 0)->seqtype);
+
+		sequences[i].oid = atooid(PQgetvalue(res, i, 0));
+
+		Assert(strlen(PQgetvalue(res, i, 1)) < seqtype_sz);
+		strncpy(sequences[i].seqtype, PQgetvalue(res, i, 1), seqtype_sz);
+		sequences[i].seqtype[seqtype_sz - 1] = '\0';
+
+		sequences[i].startv = strtoi64(PQgetvalue(res, i, 2), NULL, 10);
+		sequences[i].incby = strtoi64(PQgetvalue(res, i, 3), NULL, 10);
+		sequences[i].maxv = strtoi64(PQgetvalue(res, i, 4), NULL, 10);
+		sequences[i].minv = strtoi64(PQgetvalue(res, i, 5), NULL, 10);
+		sequences[i].cache = strtoi64(PQgetvalue(res, i, 6), NULL, 10);
+		sequences[i].cycled = (strcmp(PQgetvalue(res, i, 7), "t") == 0);
+	}
+
+	PQclear(res);
+}
+
 /*
  * dumpSequence
  *	  write the declaration (not data) of one user-defined sequence
@@ -17565,7 +17643,6 @@ static void
 dumpSequence(Archive *fout, const TableInfo *tbinfo)
 {
 	DumpOptions *dopt = fout->dopt;
-	PGresult   *res;
 	char		seqtype[10];
 	bool		cycled;
 	bool		is_ascending;
@@ -17585,17 +17662,27 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo)
 
 	if (fout->remoteVersion >= 100000)
 	{
-		appendPQExpBuffer(query,
-						  "SELECT format_type(seqtypid, NULL), "
-						  "seqstart, seqincrement, "
-						  "seqmax, seqmin, "
-						  "seqcache, seqcycle "
-						  "FROM pg_catalog.pg_sequence "
-						  "WHERE seqrelid = '%u'::oid",
-						  tbinfo->dobj.catId.oid);
+		SequenceItem key = {0};
+		SequenceItem *entry;
+
+		Assert(sequences);
+
+		key.oid = tbinfo->dobj.catId.oid;
+		entry = bsearch(&key, sequences, nsequences,
+						sizeof(SequenceItem), SequenceItemCmp);
+
+		strncpy(seqtype, entry->seqtype, sizeof(seqtype));
+		startv = entry->startv;
+		incby = entry->incby;
+		maxv = entry->maxv;
+		minv = entry->minv;
+		cache = entry->cache;
+		cycled = entry->cycled;
 	}
 	else
 	{
+		PGresult   *res;
+
 		/*
 		 * Before PostgreSQL 10, sequence metadata is in the sequence itself.
 		 *
@@ -17607,28 +17694,28 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo)
 						  "start_value, increment_by, max_value, min_value, "
 						  "cache_value, is_cycled FROM %s",
 						  fmtQualifiedDumpable(tbinfo));
-	}
-
-	res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
-
-	if (PQntuples(res) != 1)
-		pg_fatal(ngettext("query to get data of sequence \"%s\" returned %d row (expected 1)",
-						  "query to get data of sequence \"%s\" returned %d rows (expected 1)",
-						  PQntuples(res)),
-				 tbinfo->dobj.name, PQntuples(res));
 
-	Assert(strlen(PQgetvalue(res, 0, 0)) < sizeof(seqtype));
-	strncpy(seqtype, PQgetvalue(res, 0, 0), sizeof(seqtype));
-	seqtype[sizeof(seqtype) - 1] = '\0';
+		res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
 
-	startv = strtoi64(PQgetvalue(res, 0, 1), NULL, 10);
-	incby = strtoi64(PQgetvalue(res, 0, 2), NULL, 10);
-	maxv = strtoi64(PQgetvalue(res, 0, 3), NULL, 10);
-	minv = strtoi64(PQgetvalue(res, 0, 4), NULL, 10);
-	cache = strtoi64(PQgetvalue(res, 0, 5), NULL, 10);
-	cycled = (strcmp(PQgetvalue(res, 0, 6), "t") == 0);
+		if (PQntuples(res) != 1)
+			pg_fatal(ngettext("query to get data of sequence \"%s\" returned %d row (expected 1)",
+							  "query to get data of sequence \"%s\" returned %d rows (expected 1)",
+							  PQntuples(res)),
+					 tbinfo->dobj.name, PQntuples(res));
+
+		Assert(strlen(PQgetvalue(res, 0, 0)) < sizeof(seqtype));
+		strncpy(seqtype, PQgetvalue(res, 0, 0), sizeof(seqtype));
+		seqtype[sizeof(seqtype) - 1] = '\0';
+
+		startv = strtoi64(PQgetvalue(res, 0, 1), NULL, 10);
+		incby = strtoi64(PQgetvalue(res, 0, 2), NULL, 10);
+		maxv = strtoi64(PQgetvalue(res, 0, 3), NULL, 10);
+		minv = strtoi64(PQgetvalue(res, 0, 4), NULL, 10);
+		cache = strtoi64(PQgetvalue(res, 0, 5), NULL, 10);
+		cycled = (strcmp(PQgetvalue(res, 0, 6), "t") == 0);
 
-	PQclear(res);
+		PQclear(res);
+	}
 
 	/* Calculate default limits for a sequence of this type */
 	is_ascending = (incby >= 0);
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index e10ff28ee5..ff7a4ca7e1 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2555,6 +2555,7 @@ SeqScan
 SeqScanState
 SeqTable
 SeqTableData
+SequenceItem
 SerCommitSeqNo
 SerialControl
 SerialIOData
-- 
2.25.1

>From b590247938434d4f0fa2360069ad797fd30b7181 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nat...@postgresql.org>
Date: Tue, 30 Apr 2024 19:33:24 -0500
Subject: [PATCH v1 3/3] cache more sequence data

---
 src/bin/pg_dump/pg_dump.c | 72 +++++++++++++++++++++++++++++----------
 1 file changed, 54 insertions(+), 18 deletions(-)

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 98cc2698ac..02bf4f65e6 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -103,6 +103,8 @@ typedef struct
 	int64		startv;
 	int64		incby;
 	int64		cache;
+	int64		last_value;
+	bool		is_called;
 } SequenceItem;
 
 typedef enum OidOptions
@@ -17602,11 +17604,24 @@ collectSequences(Archive *fout)
 	PGresult   *res;
 	const char *query;
 
+	if (fout->remoteVersion < 100000)
+		return;
+
 	query = "SELECT seqrelid, format_type(seqtypid, NULL), "
 		"seqstart, seqincrement, "
 		"seqmax, seqmin, "
-		"seqcache, seqcycle "
-		"FROM pg_catalog.pg_sequence "
+		"seqcache, seqcycle, "
+		"CASE WHEN has_sequence_privilege(seqrelid, 'SELECT,USAGE'::text) "
+		"AND (pg_is_in_recovery() = 'f' OR c.relpersistence = 'p') "
+		"THEN pg_sequence_last_value(seqrelid) IS NOT NULL "
+		"ELSE 'f' END AS is_called, "
+		"CASE WHEN has_sequence_privilege(seqrelid, 'SELECT,USAGE'::text) "
+		"AND (pg_is_in_recovery() = 'f' OR c.relpersistence = 'p') "
+		"AND pg_sequence_last_value(seqrelid) IS NOT NULL "
+		"THEN pg_sequence_last_value(seqrelid) "
+		"ELSE seqstart END AS last_value "
+		"FROM pg_catalog.pg_sequence s "
+		"JOIN pg_class c ON s.seqrelid = c.oid "
 		"ORDER BY seqrelid";
 
 	res = ExecuteSqlQuery(fout, query, PGRES_TUPLES_OK);
@@ -17630,6 +17645,8 @@ collectSequences(Archive *fout)
 		sequences[i].minv = strtoi64(PQgetvalue(res, i, 5), NULL, 10);
 		sequences[i].cache = strtoi64(PQgetvalue(res, i, 6), NULL, 10);
 		sequences[i].cycled = (strcmp(PQgetvalue(res, i, 7), "t") == 0);
+		sequences[i].is_called = (strcmp(PQgetvalue(res, i, 8), "t") == 0);
+		sequences[i].last_value = strtoi64(PQgetvalue(res, i, 9), NULL, 10);
 	}
 
 	PQclear(res);
@@ -17906,30 +17923,51 @@ static void
 dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo)
 {
 	TableInfo  *tbinfo = tdinfo->tdtable;
-	PGresult   *res;
-	char	   *last;
+	int64		last;
 	bool		called;
 	PQExpBuffer query = createPQExpBuffer();
 
-	appendPQExpBuffer(query,
-					  "SELECT last_value, is_called FROM %s",
-					  fmtQualifiedDumpable(tbinfo));
+	if (fout->remoteVersion < 100000)
+	{
+		PGresult   *res;
 
-	res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+		appendPQExpBuffer(query,
+						  "SELECT last_value, is_called FROM %s",
+						  fmtQualifiedDumpable(tbinfo));
 
-	if (PQntuples(res) != 1)
-		pg_fatal(ngettext("query to get data of sequence \"%s\" returned %d row (expected 1)",
-						  "query to get data of sequence \"%s\" returned %d rows (expected 1)",
-						  PQntuples(res)),
-				 tbinfo->dobj.name, PQntuples(res));
+		res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
 
-	last = PQgetvalue(res, 0, 0);
-	called = (strcmp(PQgetvalue(res, 0, 1), "t") == 0);
+		if (PQntuples(res) != 1)
+			pg_fatal(ngettext("query to get data of sequence \"%s\" returned %d row (expected 1)",
+							  "query to get data of sequence \"%s\" returned %d rows (expected 1)",
+							  PQntuples(res)),
+					 tbinfo->dobj.name, PQntuples(res));
+
+		last = strtoi64(PQgetvalue(res, 0, 0), NULL, 10);
+		called = (strcmp(PQgetvalue(res, 0, 1), "t") == 0);
+
+		PQclear(res);
+	}
+	else
+	{
+		SequenceItem key = {0};
+		SequenceItem *entry;
+
+		Assert(sequences);
+		Assert(tbinfo->dobj.catId.oid);
+
+		key.oid = tbinfo->dobj.catId.oid;
+		entry = bsearch(&key, sequences, nsequences,
+						sizeof(SequenceItem), SequenceItemCmp);
+
+		last = entry->last_value;
+		called = entry->is_called;
+	}
 
 	resetPQExpBuffer(query);
 	appendPQExpBufferStr(query, "SELECT pg_catalog.setval(");
 	appendStringLiteralAH(query, fmtQualifiedDumpable(tbinfo), fout);
-	appendPQExpBuffer(query, ", %s, %s);\n",
+	appendPQExpBuffer(query, ", " INT64_FORMAT ", %s);\n",
 					  last, (called ? "true" : "false"));
 
 	if (tdinfo->dobj.dump & DUMP_COMPONENT_DATA)
@@ -17943,8 +17981,6 @@ dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo)
 								  .deps = &(tbinfo->dobj.dumpId),
 								  .nDeps = 1));
 
-	PQclear(res);
-
 	destroyPQExpBuffer(query);
 }
 
-- 
2.25.1

Reply via email to