Similar to 'pg_dump --binary-upgrade' [0], we can speed up pg_dump with many sequences by gathering the required information in a single query instead of two queries per sequence. The attached patches are works-in-progress, but here are the results I see on my machine for 'pg_dump --schema-only --binary-upgrade' with a million sequences:
HEAD : 6m22.809s [0] : 1m54.701s [0] + attached : 0m38.233s I'm not sure I have all the details correct in 0003, and we might want to separate the table into two tables which are only populated when the relevant section is dumped. Furthermore, the query in 0003 is a bit goofy because it needs to dance around a bug reported elsewhere [1]. [0] https://postgr.es/m/20240418041712.GA3441570%40nathanxps13 [1] https://postgr.es/m/20240501005730.GA594666%40nathanxps13 -- Nathan Bossart Amazon Web Services: https://aws.amazon.com
>From 32d1e994f31be002d4490f9239f819d6ee55cb36 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Tue, 30 Apr 2024 14:41:36 -0500 Subject: [PATCH v1 1/3] parse sequence information --- src/bin/pg_dump/pg_dump.c | 64 ++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 38 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 379debac24..b53c17aace 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -17566,18 +17566,16 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo) { DumpOptions *dopt = fout->dopt; PGresult *res; - char *startv, - *incby, - *maxv, - *minv, - *cache, - *seqtype; + char seqtype[10]; bool cycled; bool is_ascending; int64 default_minv, - default_maxv; - char bufm[32], - bufx[32]; + default_maxv, + minv, + maxv, + startv, + incby, + cache; PQExpBuffer query = createPQExpBuffer(); PQExpBuffer delqry = createPQExpBuffer(); char *qseqname; @@ -17619,16 +17617,21 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo) PQntuples(res)), tbinfo->dobj.name, PQntuples(res)); - seqtype = PQgetvalue(res, 0, 0); - startv = PQgetvalue(res, 0, 1); - incby = PQgetvalue(res, 0, 2); - maxv = PQgetvalue(res, 0, 3); - minv = PQgetvalue(res, 0, 4); - cache = PQgetvalue(res, 0, 5); + Assert(strlen(PQgetvalue(res, 0, 0)) < sizeof(seqtype)); + strncpy(seqtype, PQgetvalue(res, 0, 0), sizeof(seqtype)); + seqtype[sizeof(seqtype) - 1] = '\0'; + + startv = strtoi64(PQgetvalue(res, 0, 1), NULL, 10); + incby = strtoi64(PQgetvalue(res, 0, 2), NULL, 10); + maxv = strtoi64(PQgetvalue(res, 0, 3), NULL, 10); + minv = strtoi64(PQgetvalue(res, 0, 4), NULL, 10); + cache = strtoi64(PQgetvalue(res, 0, 5), NULL, 10); cycled = (strcmp(PQgetvalue(res, 0, 6), "t") == 0); + PQclear(res); + /* Calculate default limits for a sequence of this type */ - is_ascending = (incby[0] != '-'); + is_ascending = (incby >= 0); if (strcmp(seqtype, "smallint") == 0) { default_minv = is_ascending ? 1 : PG_INT16_MIN; @@ -17650,19 +17653,6 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo) default_minv = default_maxv = 0; /* keep compiler quiet */ } - /* - * 64-bit strtol() isn't very portable, so convert the limits to strings - * and compare that way. - */ - snprintf(bufm, sizeof(bufm), INT64_FORMAT, default_minv); - snprintf(bufx, sizeof(bufx), INT64_FORMAT, default_maxv); - - /* Don't print minv/maxv if they match the respective default limit */ - if (strcmp(minv, bufm) == 0) - minv = NULL; - if (strcmp(maxv, bufx) == 0) - maxv = NULL; - /* * Identity sequences are not to be dropped separately. */ @@ -17714,22 +17704,22 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo) appendPQExpBuffer(query, " AS %s\n", seqtype); } - appendPQExpBuffer(query, " START WITH %s\n", startv); + appendPQExpBuffer(query, " START WITH " INT64_FORMAT "\n", startv); - appendPQExpBuffer(query, " INCREMENT BY %s\n", incby); + appendPQExpBuffer(query, " INCREMENT BY " INT64_FORMAT "\n", incby); - if (minv) - appendPQExpBuffer(query, " MINVALUE %s\n", minv); + if (minv != default_minv) + appendPQExpBuffer(query, " MINVALUE " INT64_FORMAT "\n", minv); else appendPQExpBufferStr(query, " NO MINVALUE\n"); - if (maxv) - appendPQExpBuffer(query, " MAXVALUE %s\n", maxv); + if (maxv != default_maxv) + appendPQExpBuffer(query, " MAXVALUE " INT64_FORMAT "\n", maxv); else appendPQExpBufferStr(query, " NO MAXVALUE\n"); appendPQExpBuffer(query, - " CACHE %s%s", + " CACHE " INT64_FORMAT "%s", cache, (cycled ? "\n CYCLE" : "")); if (tbinfo->is_identity_sequence) @@ -17816,8 +17806,6 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo) tbinfo->dobj.namespace->dobj.name, tbinfo->rolname, tbinfo->dobj.catId, 0, tbinfo->dobj.dumpId); - PQclear(res); - destroyPQExpBuffer(query); destroyPQExpBuffer(delqry); free(qseqname); -- 2.25.1
>From a58f6c33029659aef0a2139293da5ba9fd7f178f Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Tue, 30 Apr 2024 15:18:52 -0500 Subject: [PATCH v1 2/3] cache sequence information --- src/bin/pg_dump/pg_dump.c | 143 +++++++++++++++++++++++++------ src/tools/pgindent/typedefs.list | 1 + 2 files changed, 116 insertions(+), 28 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index b53c17aace..98cc2698ac 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -55,6 +55,7 @@ #include "catalog/pg_trigger_d.h" #include "catalog/pg_type_d.h" #include "common/connect.h" +#include "common/int.h" #include "common/relpath.h" #include "compress_io.h" #include "dumputils.h" @@ -92,6 +93,18 @@ typedef struct int objsubid; /* subobject (table column #) */ } SecLabelItem; +typedef struct +{ + Oid oid; + char seqtype[10]; + bool cycled; + int64 minv; + int64 maxv; + int64 startv; + int64 incby; + int64 cache; +} SequenceItem; + typedef enum OidOptions { zeroIsError = 1, @@ -157,6 +170,10 @@ static int ncomments = 0; static SecLabelItem *seclabels = NULL; static int nseclabels = 0; +/* sorted table of sequences */ +static SequenceItem *sequences = NULL; +static int nsequences = 0; + /* * The default number of rows per INSERT when * --inserts is specified without --rows-per-insert @@ -254,6 +271,7 @@ static void dumpTable(Archive *fout, const TableInfo *tbinfo); static void dumpTableSchema(Archive *fout, const TableInfo *tbinfo); static void dumpTableAttach(Archive *fout, const TableAttachInfo *attachinfo); static void dumpAttrDef(Archive *fout, const AttrDefInfo *adinfo); +static void collectSequences(Archive *fout); static void dumpSequence(Archive *fout, const TableInfo *tbinfo); static void dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo); static void dumpIndex(Archive *fout, const IndxInfo *indxinfo); @@ -971,6 +989,9 @@ main(int argc, char **argv) if (!dopt.no_security_labels) collectSecLabels(fout); + /* Collect sequence information. */ + collectSequences(fout); + /* Lastly, create dummy objects to represent the section boundaries */ boundaryObjs = createBoundaryObjects(); @@ -17557,6 +17578,63 @@ dumpTableConstraintComment(Archive *fout, const ConstraintInfo *coninfo) free(qtabname); } +/* + * bsearch() comparator for SequenceItem + */ +static int +SequenceItemCmp(const void *p1, const void *p2) +{ + SequenceItem v1 = *((const SequenceItem *) p1); + SequenceItem v2 = *((const SequenceItem *) p2); + + return pg_cmp_u32(v1.oid, v2.oid); +} + +/* + * collectSequences + * + * Construct a table of sequence information. This table is sorted by OID for + * speed in lookup. + */ +static void +collectSequences(Archive *fout) +{ + PGresult *res; + const char *query; + + query = "SELECT seqrelid, format_type(seqtypid, NULL), " + "seqstart, seqincrement, " + "seqmax, seqmin, " + "seqcache, seqcycle " + "FROM pg_catalog.pg_sequence " + "ORDER BY seqrelid"; + + res = ExecuteSqlQuery(fout, query, PGRES_TUPLES_OK); + + nsequences = PQntuples(res); + sequences = (SequenceItem *) pg_malloc(nsequences * sizeof(SequenceItem)); + + for (int i = 0; i < nsequences; i++) + { + size_t seqtype_sz = sizeof(((SequenceItem *) 0)->seqtype); + + sequences[i].oid = atooid(PQgetvalue(res, i, 0)); + + Assert(strlen(PQgetvalue(res, i, 1)) < seqtype_sz); + strncpy(sequences[i].seqtype, PQgetvalue(res, i, 1), seqtype_sz); + sequences[i].seqtype[seqtype_sz - 1] = '\0'; + + sequences[i].startv = strtoi64(PQgetvalue(res, i, 2), NULL, 10); + sequences[i].incby = strtoi64(PQgetvalue(res, i, 3), NULL, 10); + sequences[i].maxv = strtoi64(PQgetvalue(res, i, 4), NULL, 10); + sequences[i].minv = strtoi64(PQgetvalue(res, i, 5), NULL, 10); + sequences[i].cache = strtoi64(PQgetvalue(res, i, 6), NULL, 10); + sequences[i].cycled = (strcmp(PQgetvalue(res, i, 7), "t") == 0); + } + + PQclear(res); +} + /* * dumpSequence * write the declaration (not data) of one user-defined sequence @@ -17565,7 +17643,6 @@ static void dumpSequence(Archive *fout, const TableInfo *tbinfo) { DumpOptions *dopt = fout->dopt; - PGresult *res; char seqtype[10]; bool cycled; bool is_ascending; @@ -17585,17 +17662,27 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo) if (fout->remoteVersion >= 100000) { - appendPQExpBuffer(query, - "SELECT format_type(seqtypid, NULL), " - "seqstart, seqincrement, " - "seqmax, seqmin, " - "seqcache, seqcycle " - "FROM pg_catalog.pg_sequence " - "WHERE seqrelid = '%u'::oid", - tbinfo->dobj.catId.oid); + SequenceItem key = {0}; + SequenceItem *entry; + + Assert(sequences); + + key.oid = tbinfo->dobj.catId.oid; + entry = bsearch(&key, sequences, nsequences, + sizeof(SequenceItem), SequenceItemCmp); + + strncpy(seqtype, entry->seqtype, sizeof(seqtype)); + startv = entry->startv; + incby = entry->incby; + maxv = entry->maxv; + minv = entry->minv; + cache = entry->cache; + cycled = entry->cycled; } else { + PGresult *res; + /* * Before PostgreSQL 10, sequence metadata is in the sequence itself. * @@ -17607,28 +17694,28 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo) "start_value, increment_by, max_value, min_value, " "cache_value, is_cycled FROM %s", fmtQualifiedDumpable(tbinfo)); - } - - res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); - - if (PQntuples(res) != 1) - pg_fatal(ngettext("query to get data of sequence \"%s\" returned %d row (expected 1)", - "query to get data of sequence \"%s\" returned %d rows (expected 1)", - PQntuples(res)), - tbinfo->dobj.name, PQntuples(res)); - Assert(strlen(PQgetvalue(res, 0, 0)) < sizeof(seqtype)); - strncpy(seqtype, PQgetvalue(res, 0, 0), sizeof(seqtype)); - seqtype[sizeof(seqtype) - 1] = '\0'; + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); - startv = strtoi64(PQgetvalue(res, 0, 1), NULL, 10); - incby = strtoi64(PQgetvalue(res, 0, 2), NULL, 10); - maxv = strtoi64(PQgetvalue(res, 0, 3), NULL, 10); - minv = strtoi64(PQgetvalue(res, 0, 4), NULL, 10); - cache = strtoi64(PQgetvalue(res, 0, 5), NULL, 10); - cycled = (strcmp(PQgetvalue(res, 0, 6), "t") == 0); + if (PQntuples(res) != 1) + pg_fatal(ngettext("query to get data of sequence \"%s\" returned %d row (expected 1)", + "query to get data of sequence \"%s\" returned %d rows (expected 1)", + PQntuples(res)), + tbinfo->dobj.name, PQntuples(res)); + + Assert(strlen(PQgetvalue(res, 0, 0)) < sizeof(seqtype)); + strncpy(seqtype, PQgetvalue(res, 0, 0), sizeof(seqtype)); + seqtype[sizeof(seqtype) - 1] = '\0'; + + startv = strtoi64(PQgetvalue(res, 0, 1), NULL, 10); + incby = strtoi64(PQgetvalue(res, 0, 2), NULL, 10); + maxv = strtoi64(PQgetvalue(res, 0, 3), NULL, 10); + minv = strtoi64(PQgetvalue(res, 0, 4), NULL, 10); + cache = strtoi64(PQgetvalue(res, 0, 5), NULL, 10); + cycled = (strcmp(PQgetvalue(res, 0, 6), "t") == 0); - PQclear(res); + PQclear(res); + } /* Calculate default limits for a sequence of this type */ is_ascending = (incby >= 0); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index e10ff28ee5..ff7a4ca7e1 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2555,6 +2555,7 @@ SeqScan SeqScanState SeqTable SeqTableData +SequenceItem SerCommitSeqNo SerialControl SerialIOData -- 2.25.1
>From b590247938434d4f0fa2360069ad797fd30b7181 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Tue, 30 Apr 2024 19:33:24 -0500 Subject: [PATCH v1 3/3] cache more sequence data --- src/bin/pg_dump/pg_dump.c | 72 +++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 98cc2698ac..02bf4f65e6 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -103,6 +103,8 @@ typedef struct int64 startv; int64 incby; int64 cache; + int64 last_value; + bool is_called; } SequenceItem; typedef enum OidOptions @@ -17602,11 +17604,24 @@ collectSequences(Archive *fout) PGresult *res; const char *query; + if (fout->remoteVersion < 100000) + return; + query = "SELECT seqrelid, format_type(seqtypid, NULL), " "seqstart, seqincrement, " "seqmax, seqmin, " - "seqcache, seqcycle " - "FROM pg_catalog.pg_sequence " + "seqcache, seqcycle, " + "CASE WHEN has_sequence_privilege(seqrelid, 'SELECT,USAGE'::text) " + "AND (pg_is_in_recovery() = 'f' OR c.relpersistence = 'p') " + "THEN pg_sequence_last_value(seqrelid) IS NOT NULL " + "ELSE 'f' END AS is_called, " + "CASE WHEN has_sequence_privilege(seqrelid, 'SELECT,USAGE'::text) " + "AND (pg_is_in_recovery() = 'f' OR c.relpersistence = 'p') " + "AND pg_sequence_last_value(seqrelid) IS NOT NULL " + "THEN pg_sequence_last_value(seqrelid) " + "ELSE seqstart END AS last_value " + "FROM pg_catalog.pg_sequence s " + "JOIN pg_class c ON s.seqrelid = c.oid " "ORDER BY seqrelid"; res = ExecuteSqlQuery(fout, query, PGRES_TUPLES_OK); @@ -17630,6 +17645,8 @@ collectSequences(Archive *fout) sequences[i].minv = strtoi64(PQgetvalue(res, i, 5), NULL, 10); sequences[i].cache = strtoi64(PQgetvalue(res, i, 6), NULL, 10); sequences[i].cycled = (strcmp(PQgetvalue(res, i, 7), "t") == 0); + sequences[i].is_called = (strcmp(PQgetvalue(res, i, 8), "t") == 0); + sequences[i].last_value = strtoi64(PQgetvalue(res, i, 9), NULL, 10); } PQclear(res); @@ -17906,30 +17923,51 @@ static void dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo) { TableInfo *tbinfo = tdinfo->tdtable; - PGresult *res; - char *last; + int64 last; bool called; PQExpBuffer query = createPQExpBuffer(); - appendPQExpBuffer(query, - "SELECT last_value, is_called FROM %s", - fmtQualifiedDumpable(tbinfo)); + if (fout->remoteVersion < 100000) + { + PGresult *res; - res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + appendPQExpBuffer(query, + "SELECT last_value, is_called FROM %s", + fmtQualifiedDumpable(tbinfo)); - if (PQntuples(res) != 1) - pg_fatal(ngettext("query to get data of sequence \"%s\" returned %d row (expected 1)", - "query to get data of sequence \"%s\" returned %d rows (expected 1)", - PQntuples(res)), - tbinfo->dobj.name, PQntuples(res)); + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); - last = PQgetvalue(res, 0, 0); - called = (strcmp(PQgetvalue(res, 0, 1), "t") == 0); + if (PQntuples(res) != 1) + pg_fatal(ngettext("query to get data of sequence \"%s\" returned %d row (expected 1)", + "query to get data of sequence \"%s\" returned %d rows (expected 1)", + PQntuples(res)), + tbinfo->dobj.name, PQntuples(res)); + + last = strtoi64(PQgetvalue(res, 0, 0), NULL, 10); + called = (strcmp(PQgetvalue(res, 0, 1), "t") == 0); + + PQclear(res); + } + else + { + SequenceItem key = {0}; + SequenceItem *entry; + + Assert(sequences); + Assert(tbinfo->dobj.catId.oid); + + key.oid = tbinfo->dobj.catId.oid; + entry = bsearch(&key, sequences, nsequences, + sizeof(SequenceItem), SequenceItemCmp); + + last = entry->last_value; + called = entry->is_called; + } resetPQExpBuffer(query); appendPQExpBufferStr(query, "SELECT pg_catalog.setval("); appendStringLiteralAH(query, fmtQualifiedDumpable(tbinfo), fout); - appendPQExpBuffer(query, ", %s, %s);\n", + appendPQExpBuffer(query, ", " INT64_FORMAT ", %s);\n", last, (called ? "true" : "false")); if (tdinfo->dobj.dump & DUMP_COMPONENT_DATA) @@ -17943,8 +17981,6 @@ dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo) .deps = &(tbinfo->dobj.dumpId), .nDeps = 1)); - PQclear(res); - destroyPQExpBuffer(query); } -- 2.25.1