While examining pg_upgrade on a cluster with many tables (created with the command in [0]), I noticed that a huge amount of pg_dump time goes towards the binary_upgrade_set_pg_class_oids() function. This function executes a rather expensive query for a single row, and this function appears to be called for most of the rows in pg_class.
The attached work-in-progress patch speeds up 'pg_dump --binary-upgrade' for this case. Instead of executing the query in every call to the function, we can execute it once during the first call and store all the required information in a sorted array that we can bsearch() in future calls. For the aformentioned test, pg_dump on my machine goes from ~2 minutes to ~18 seconds, which is much closer to the ~14 seconds it takes without --binary-upgrade. One downside of this approach is the memory usage. This was more-or-less the first approach that crossed my mind, so I wouldn't be surprised if there's a better way. I tried to keep the pg_dump output the same, but if that isn't important, maybe we could dump all the pg_class OIDs at once instead of calling binary_upgrade_set_pg_class_oids() for each one. [0] https://postgr.es/m/3612876.1689443232%40sss.pgh.pa.us -- Nathan Bossart Amazon Web Services: https://aws.amazon.com
>From 27b4a3249dd97376f13a7c99505330ab7cd78e3f Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Wed, 17 Apr 2024 22:55:27 -0500 Subject: [PATCH v1 1/1] Improve performance of pg_dump --binary-upgrade. --- src/bin/pg_dump/pg_dump.c | 113 +++++++++++++++++++------------ src/tools/pgindent/typedefs.list | 1 + 2 files changed, 70 insertions(+), 44 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index c52e961b30..d93d974108 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -55,6 +55,7 @@ #include "catalog/pg_trigger_d.h" #include "catalog/pg_type_d.h" #include "common/connect.h" +#include "common/int.h" #include "common/relpath.h" #include "compress_io.h" #include "dumputils.h" @@ -99,6 +100,17 @@ typedef enum OidOptions zeroAsNone = 4, } OidOptions; +typedef struct +{ + Oid oid; + char relkind; + RelFileNumber relfilenode; + Oid reltoastrelid; + RelFileNumber toast_relfilenode; + Oid indexrelid; + RelFileNumber toast_index_relfilenode; +} BinaryUpgradeClassOids; + /* global decls */ static bool dosync = true; /* Issue fsync() to make dump durable on disk. */ @@ -5392,19 +5404,56 @@ binary_upgrade_set_type_oids_by_rel(Archive *fout, pg_type_oid, false, false); } +static int +BinaryUpgradeClassOidsCmp(const void *p1, const void *p2) +{ + BinaryUpgradeClassOids v1 = *((const BinaryUpgradeClassOids *) p1); + BinaryUpgradeClassOids v2 = *((const BinaryUpgradeClassOids *) p2); + + return pg_cmp_u32(v1.oid, v2.oid); +} + static void binary_upgrade_set_pg_class_oids(Archive *fout, PQExpBuffer upgrade_buffer, Oid pg_class_oid, bool is_index) { - PQExpBuffer upgrade_query = createPQExpBuffer(); - PGresult *upgrade_res; - RelFileNumber relfilenumber; - Oid toast_oid; - RelFileNumber toast_relfilenumber; - char relkind; - Oid toast_index_oid; - RelFileNumber toast_index_relfilenumber; + static BinaryUpgradeClassOids *oids = NULL; + static int oids_len = 0; + BinaryUpgradeClassOids key = {0, '?', 0, 0, 0, 0, 0}; + BinaryUpgradeClassOids *entry; + + if (oids == NULL) + { + PGresult *res; + + res = ExecuteSqlQuery(fout, + "SELECT c.oid, c.relkind, c.relfilenode, c.reltoastrelid, ct.relfilenode AS toast_relfilenode, " + "i.indexrelid, cti.relfilenode AS toast_index_relfilenode " + "FROM pg_catalog.pg_class c LEFT JOIN " + "pg_catalog.pg_index i ON (c.reltoastrelid = i.indrelid AND i.indisvalid) " + "LEFT JOIN pg_catalog.pg_class ct ON (c.reltoastrelid = ct.oid) " + "LEFT JOIN pg_catalog.pg_class AS cti ON (i.indexrelid = cti.oid) " + "ORDER BY c.oid;", + PGRES_TUPLES_OK); + + oids_len = PQntuples(res); + oids = (BinaryUpgradeClassOids *) + pg_malloc(oids_len * sizeof(BinaryUpgradeClassOids)); + + for (int i = 0; i < oids_len; i++) + { + oids[i].oid = atooid(PQgetvalue(res, i, 0)); + oids[i].relkind = *PQgetvalue(res, i, 1); + oids[i].relfilenode = atooid(PQgetvalue(res, i, 2)); + oids[i].reltoastrelid = atooid(PQgetvalue(res, i, 3)); + oids[i].toast_relfilenode = atooid(PQgetvalue(res, i, 4)); + oids[i].indexrelid = atooid(PQgetvalue(res, i, 5)); + oids[i].toast_index_relfilenode = atooid(PQgetvalue(res, i, 6)); + } + + PQclear(res); + } /* * Preserve the OID and relfilenumber of the table, table's index, table's @@ -5417,29 +5466,9 @@ binary_upgrade_set_pg_class_oids(Archive *fout, * by the new backend, so we can copy the files during binary upgrade * without worrying about this case. */ - appendPQExpBuffer(upgrade_query, - "SELECT c.relkind, c.relfilenode, c.reltoastrelid, ct.relfilenode AS toast_relfilenode, i.indexrelid, cti.relfilenode AS toast_index_relfilenode " - "FROM pg_catalog.pg_class c LEFT JOIN " - "pg_catalog.pg_index i ON (c.reltoastrelid = i.indrelid AND i.indisvalid) " - "LEFT JOIN pg_catalog.pg_class ct ON (c.reltoastrelid = ct.oid) " - "LEFT JOIN pg_catalog.pg_class AS cti ON (i.indexrelid = cti.oid) " - "WHERE c.oid = '%u'::pg_catalog.oid;", - pg_class_oid); - - upgrade_res = ExecuteSqlQueryForSingleRow(fout, upgrade_query->data); - - relkind = *PQgetvalue(upgrade_res, 0, PQfnumber(upgrade_res, "relkind")); - - relfilenumber = atooid(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "relfilenode"))); - toast_oid = atooid(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "reltoastrelid"))); - toast_relfilenumber = atooid(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "toast_relfilenode"))); - toast_index_oid = atooid(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "indexrelid"))); - toast_index_relfilenumber = atooid(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "toast_index_relfilenode"))); + key.oid = pg_class_oid; + entry = bsearch(&key, oids, oids_len, sizeof(BinaryUpgradeClassOids), + BinaryUpgradeClassOidsCmp); appendPQExpBufferStr(upgrade_buffer, "\n-- For binary upgrade, must preserve pg_class oids and relfilenodes\n"); @@ -5455,35 +5484,33 @@ binary_upgrade_set_pg_class_oids(Archive *fout, * partitioned tables have a relfilenumber, which should not be * preserved when upgrading. */ - if (RelFileNumberIsValid(relfilenumber) && relkind != RELKIND_PARTITIONED_TABLE) + if (RelFileNumberIsValid(entry->relfilenode) && entry->relkind != RELKIND_PARTITIONED_TABLE) appendPQExpBuffer(upgrade_buffer, "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n", - relfilenumber); + entry->relfilenode); /* * In a pre-v12 database, partitioned tables might be marked as having * toast tables, but we should ignore them if so. */ - if (OidIsValid(toast_oid) && - relkind != RELKIND_PARTITIONED_TABLE) + if (OidIsValid(entry->reltoastrelid) && + entry->relkind != RELKIND_PARTITIONED_TABLE) { appendPQExpBuffer(upgrade_buffer, "SELECT pg_catalog.binary_upgrade_set_next_toast_pg_class_oid('%u'::pg_catalog.oid);\n", - toast_oid); + entry->reltoastrelid); appendPQExpBuffer(upgrade_buffer, "SELECT pg_catalog.binary_upgrade_set_next_toast_relfilenode('%u'::pg_catalog.oid);\n", - toast_relfilenumber); + entry->toast_relfilenode); /* every toast table has an index */ appendPQExpBuffer(upgrade_buffer, "SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n", - toast_index_oid); + entry->indexrelid); appendPQExpBuffer(upgrade_buffer, "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n", - toast_index_relfilenumber); + entry->toast_index_relfilenode); } - - PQclear(upgrade_res); } else { @@ -5493,12 +5520,10 @@ binary_upgrade_set_pg_class_oids(Archive *fout, pg_class_oid); appendPQExpBuffer(upgrade_buffer, "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n", - relfilenumber); + entry->relfilenode); } appendPQExpBufferChar(upgrade_buffer, '\n'); - - destroyPQExpBuffer(upgrade_query); } /* diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index d551ada325..c8062a1a20 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -255,6 +255,7 @@ BernoulliSamplerData BgWorkerStartTime BgwHandleStatus BinaryArithmFunc +BinaryUpgradeClassOids BindParamCbData BipartiteMatchState BitString -- 2.25.1