I've recently committed some optimizations for dumping sequences and
pg_class information (commits 68e9629, bd15b7d, and 2329cad), and I noticed
that we are also executing a query per function in pg_dump. Commit be85727
optimized this by preparing the query ahead of time, but I found that we
can improve performance further by gathering all the relevant data in a
single query. Here are the results I see for a database with 10k simple
functions with and without the attached patch:
with patch:
$ time pg_dump postgres >/dev/null
pg_dump postgres > /dev/null 0.04s user 0.01s system 40% cpu 0.118
total
$ time pg_dump postgres >/dev/null
pg_dump postgres > /dev/null 0.04s user 0.01s system 41% cpu 0.107
total
$ time pg_dump postgres >/dev/null
pg_dump postgres > /dev/null 0.04s user 0.01s system 42% cpu 0.103
total
$ time pg_dump postgres >/dev/null
pg_dump postgres > /dev/null 0.04s user 0.01s system 44% cpu 0.105
total
without patch:
$ time pg_dump postgres >/dev/null
pg_dump postgres > /dev/null 0.05s user 0.03s system 32% cpu 0.253
total
$ time pg_dump postgres >/dev/null
pg_dump postgres > /dev/null 0.05s user 0.03s system 32% cpu 0.252
total
$ time pg_dump postgres >/dev/null
pg_dump postgres > /dev/null 0.06s user 0.03s system 32% cpu 0.251
total
$ time pg_dump postgres >/dev/null
pg_dump postgres > /dev/null 0.06s user 0.03s system 33% cpu 0.254
total
This one looks a little different than the sequence/pg_class commits. Much
of the function information isn't terribly conducive to parsing into
fixed-size variables in an array, so instead I've opted to just leave the
PGresult around for reference by dumpFunc(). This patch also creates an
ordered array of function OIDs to speed up locating the relevant index in
the PGresult for use in calls to PQgetvalue().
I may be running out of opportunities where this style of optimization
makes much difference. I'll likely start focusing on the restore side
soon.
--
nathan
>From da388f00c57ebc743e9229f0f306b074d35b0be5 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <[email protected]>
Date: Wed, 31 Jul 2024 16:47:44 -0500
Subject: [PATCH v1 1/1] optimize dumpFunc()
---
src/bin/pg_dump/pg_backup.h | 1 -
src/bin/pg_dump/pg_dump.c | 232 ++++++++++++++++++++----------------
2 files changed, 132 insertions(+), 101 deletions(-)
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index fbf5f1c515..c1e496ee71 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -68,7 +68,6 @@ enum _dumpPreparedQueries
PREPQUERY_DUMPCOMPOSITETYPE,
PREPQUERY_DUMPDOMAIN,
PREPQUERY_DUMPENUMTYPE,
- PREPQUERY_DUMPFUNC,
PREPQUERY_DUMPOPR,
PREPQUERY_DUMPRANGETYPE,
PREPQUERY_DUMPTABLEATTACH,
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 79190470f7..6297ac0599 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -209,6 +209,10 @@ static int nbinaryUpgradeClassOids = 0;
static SequenceItem *sequences = NULL;
static int nsequences = 0;
+/* functions */
+static Oid *funcoids = NULL;
+static PGresult *funcs = NULL;
+
/*
* The default number of rows per INSERT when
* --inserts is specified without --rows-per-insert
@@ -289,6 +293,7 @@ static void dumpCompositeTypeColComments(Archive *fout,
const TypeInfo *tyinfo,
PGresult *res);
static void dumpShellType(Archive *fout, const ShellTypeInfo *stinfo);
static void dumpProcLang(Archive *fout, const ProcLangInfo *plang);
+static void collectFuncs(Archive *fout);
static void dumpFunc(Archive *fout, const FuncInfo *finfo);
static void dumpCast(Archive *fout, const CastInfo *cast);
static void dumpTransform(Archive *fout, const TransformInfo *transform);
@@ -1032,6 +1037,10 @@ main(int argc, char **argv)
/* Collect sequence information. */
collectSequences(fout);
+ /* Collect function information. */
+ if (!dopt.dataOnly)
+ collectFuncs(fout);
+
/* Lastly, create dummy objects to represent the section boundaries */
boundaryObjs = createBoundaryObjects();
@@ -12065,6 +12074,101 @@ format_function_signature(Archive *fout, const
FuncInfo *finfo, bool honor_quote
return fn.data;
}
+/*
+ * bsearch() comparator for Oid
+ */
+static int
+OidCmp(const void *p1, const void *p2)
+{
+ Oid o1 = *((const Oid *) p1);
+ Oid o2 = *((const Oid *) p2);
+
+ return pg_cmp_u32(o1, o2);
+}
+
+/*
+ * collectFuncs
+ *
+ * Obtain function metadata and construct an ordered array of function OIDs for
+ * use by dumpFunc() to quickly find the index of a function entry.
+ */
+static void
+collectFuncs(Archive *fout)
+{
+ PQExpBuffer query = createPQExpBuffer();
+
+ Assert(!funcs);
+
+ appendPQExpBufferStr(query,
+ "SELECT\n"
+ "p.oid\n"
+ "proretset,\n"
+ "prosrc,\n"
+ "probin,\n"
+ "provolatile,\n"
+ "proisstrict,\n"
+ "prosecdef,\n"
+ "lanname,\n"
+ "proconfig,\n"
+ "procost,\n"
+ "prorows,\n"
+
"pg_catalog.pg_get_function_arguments(p.oid) AS funcargs,\n"
+
"pg_catalog.pg_get_function_identity_arguments(p.oid) AS funciargs,\n"
+
"pg_catalog.pg_get_function_result(p.oid) AS funcresult,\n"
+ "proleakproof,\n");
+
+ if (fout->remoteVersion >= 90500)
+ appendPQExpBufferStr(query,
+
"array_to_string(protrftypes, ' ') AS protrftypes,\n");
+ else
+ appendPQExpBufferStr(query,
+ "NULL AS
protrftypes,\n");
+
+ if (fout->remoteVersion >= 90600)
+ appendPQExpBufferStr(query,
+ "proparallel,\n");
+ else
+ appendPQExpBufferStr(query,
+ "'u' AS
proparallel,\n");
+
+ if (fout->remoteVersion >= 110000)
+ appendPQExpBufferStr(query,
+ "prokind,\n");
+ else
+ appendPQExpBufferStr(query,
+ "CASE WHEN proiswindow
THEN 'w' ELSE 'f' END AS prokind,\n");
+
+ if (fout->remoteVersion >= 120000)
+ appendPQExpBufferStr(query,
+ "prosupport,\n");
+ else
+ appendPQExpBufferStr(query,
+ "'-' AS
prosupport,\n");
+
+ if (fout->remoteVersion >= 140000)
+ appendPQExpBufferStr(query,
+
"pg_get_function_sqlbody(p.oid) AS prosqlbody\n");
+ else
+ appendPQExpBufferStr(query,
+ "NULL AS
prosqlbody\n");
+
+ appendPQExpBufferStr(query,
+ "FROM pg_catalog.pg_proc p,
pg_catalog.pg_language l\n"
+ "WHERE l.oid = p.prolang\n"
+ "ORDER BY p.oid");
+
+ funcs = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+ funcoids = (Oid *) pg_malloc(PQntuples(funcs) * sizeof(Oid));
+
+ for (int i = 0; i < PQntuples(funcs); i++)
+ funcoids[i] = atooid(PQgetvalue(funcs, i, 0));
+
+ /*
+ * NB: we intentionally do not free the PGresult storage because
+ * dumpFunc() will reference it.
+ */
+ destroyPQExpBuffer(query);
+}
/*
* dumpFunc:
@@ -12074,11 +12178,9 @@ static void
dumpFunc(Archive *fout, const FuncInfo *finfo)
{
DumpOptions *dopt = fout->dopt;
- PQExpBuffer query;
PQExpBuffer q;
PQExpBuffer delqry;
PQExpBuffer asPart;
- PGresult *res;
char *funcsig; /* identity signature */
char *funcfullsig = NULL; /* full signature */
char *funcsig_tag;
@@ -12105,118 +12207,51 @@ dumpFunc(Archive *fout, const FuncInfo *finfo)
char **configitems = NULL;
int nconfigitems = 0;
const char *keyword;
+ Oid *funcoid;
+ int funcidx;
/* Do nothing in data-only dump */
if (dopt->dataOnly)
return;
- query = createPQExpBuffer();
+ Assert(funcs);
+
q = createPQExpBuffer();
delqry = createPQExpBuffer();
asPart = createPQExpBuffer();
- if (!fout->is_prepared[PREPQUERY_DUMPFUNC])
- {
- /* Set up query for function-specific details */
- appendPQExpBufferStr(query,
- "PREPARE
dumpFunc(pg_catalog.oid) AS\n");
-
- appendPQExpBufferStr(query,
- "SELECT\n"
- "proretset,\n"
- "prosrc,\n"
- "probin,\n"
- "provolatile,\n"
- "proisstrict,\n"
- "prosecdef,\n"
- "lanname,\n"
- "proconfig,\n"
- "procost,\n"
- "prorows,\n"
-
"pg_catalog.pg_get_function_arguments(p.oid) AS funcargs,\n"
-
"pg_catalog.pg_get_function_identity_arguments(p.oid) AS funciargs,\n"
-
"pg_catalog.pg_get_function_result(p.oid) AS funcresult,\n"
- "proleakproof,\n");
-
- if (fout->remoteVersion >= 90500)
- appendPQExpBufferStr(query,
-
"array_to_string(protrftypes, ' ') AS protrftypes,\n");
- else
- appendPQExpBufferStr(query,
- "NULL AS
protrftypes,\n");
-
- if (fout->remoteVersion >= 90600)
- appendPQExpBufferStr(query,
-
"proparallel,\n");
- else
- appendPQExpBufferStr(query,
- "'u' AS
proparallel,\n");
+ funcoid = bsearch(&finfo->dobj.catId.oid, funcoids,
+ PQntuples(funcs), sizeof(Oid),
OidCmp);
+ funcidx = funcoid - funcoids;
- if (fout->remoteVersion >= 110000)
- appendPQExpBufferStr(query,
- "prokind,\n");
- else
- appendPQExpBufferStr(query,
- "CASE WHEN
proiswindow THEN 'w' ELSE 'f' END AS prokind,\n");
-
- if (fout->remoteVersion >= 120000)
- appendPQExpBufferStr(query,
-
"prosupport,\n");
- else
- appendPQExpBufferStr(query,
- "'-' AS
prosupport,\n");
-
- if (fout->remoteVersion >= 140000)
- appendPQExpBufferStr(query,
-
"pg_get_function_sqlbody(p.oid) AS prosqlbody\n");
- else
- appendPQExpBufferStr(query,
- "NULL AS
prosqlbody\n");
-
- appendPQExpBufferStr(query,
- "FROM
pg_catalog.pg_proc p, pg_catalog.pg_language l\n"
- "WHERE p.oid = $1 "
- "AND l.oid =
p.prolang");
-
- ExecuteSqlStatement(fout, query->data);
-
- fout->is_prepared[PREPQUERY_DUMPFUNC] = true;
- }
-
- printfPQExpBuffer(query,
- "EXECUTE dumpFunc('%u')",
- finfo->dobj.catId.oid);
-
- res = ExecuteSqlQueryForSingleRow(fout, query->data);
-
- proretset = PQgetvalue(res, 0, PQfnumber(res, "proretset"));
- if (PQgetisnull(res, 0, PQfnumber(res, "prosqlbody")))
+ proretset = PQgetvalue(funcs, funcidx, PQfnumber(funcs, "proretset"));
+ if (PQgetisnull(funcs, funcidx, PQfnumber(funcs, "prosqlbody")))
{
- prosrc = PQgetvalue(res, 0, PQfnumber(res, "prosrc"));
- probin = PQgetvalue(res, 0, PQfnumber(res, "probin"));
+ prosrc = PQgetvalue(funcs, funcidx, PQfnumber(funcs, "prosrc"));
+ probin = PQgetvalue(funcs, funcidx, PQfnumber(funcs, "probin"));
prosqlbody = NULL;
}
else
{
prosrc = NULL;
probin = NULL;
- prosqlbody = PQgetvalue(res, 0, PQfnumber(res, "prosqlbody"));
- }
- funcargs = PQgetvalue(res, 0, PQfnumber(res, "funcargs"));
- funciargs = PQgetvalue(res, 0, PQfnumber(res, "funciargs"));
- funcresult = PQgetvalue(res, 0, PQfnumber(res, "funcresult"));
- protrftypes = PQgetvalue(res, 0, PQfnumber(res, "protrftypes"));
- prokind = PQgetvalue(res, 0, PQfnumber(res, "prokind"));
- provolatile = PQgetvalue(res, 0, PQfnumber(res, "provolatile"));
- proisstrict = PQgetvalue(res, 0, PQfnumber(res, "proisstrict"));
- prosecdef = PQgetvalue(res, 0, PQfnumber(res, "prosecdef"));
- proleakproof = PQgetvalue(res, 0, PQfnumber(res, "proleakproof"));
- proconfig = PQgetvalue(res, 0, PQfnumber(res, "proconfig"));
- procost = PQgetvalue(res, 0, PQfnumber(res, "procost"));
- prorows = PQgetvalue(res, 0, PQfnumber(res, "prorows"));
- prosupport = PQgetvalue(res, 0, PQfnumber(res, "prosupport"));
- proparallel = PQgetvalue(res, 0, PQfnumber(res, "proparallel"));
- lanname = PQgetvalue(res, 0, PQfnumber(res, "lanname"));
+ prosqlbody = PQgetvalue(funcs, funcidx, PQfnumber(funcs,
"prosqlbody"));
+ }
+ funcargs = PQgetvalue(funcs, funcidx, PQfnumber(funcs, "funcargs"));
+ funciargs = PQgetvalue(funcs, funcidx, PQfnumber(funcs, "funciargs"));
+ funcresult = PQgetvalue(funcs, funcidx, PQfnumber(funcs, "funcresult"));
+ protrftypes = PQgetvalue(funcs, funcidx, PQfnumber(funcs,
"protrftypes"));
+ prokind = PQgetvalue(funcs, funcidx, PQfnumber(funcs, "prokind"));
+ provolatile = PQgetvalue(funcs, funcidx, PQfnumber(funcs,
"provolatile"));
+ proisstrict = PQgetvalue(funcs, funcidx, PQfnumber(funcs,
"proisstrict"));
+ prosecdef = PQgetvalue(funcs, funcidx, PQfnumber(funcs, "prosecdef"));
+ proleakproof = PQgetvalue(funcs, funcidx, PQfnumber(funcs,
"proleakproof"));
+ proconfig = PQgetvalue(funcs, funcidx, PQfnumber(funcs, "proconfig"));
+ procost = PQgetvalue(funcs, funcidx, PQfnumber(funcs, "procost"));
+ prorows = PQgetvalue(funcs, funcidx, PQfnumber(funcs, "prorows"));
+ prosupport = PQgetvalue(funcs, funcidx, PQfnumber(funcs, "prosupport"));
+ proparallel = PQgetvalue(funcs, funcidx, PQfnumber(funcs,
"proparallel"));
+ lanname = PQgetvalue(funcs, funcidx, PQfnumber(funcs, "lanname"));
/*
* See backend/commands/functioncmds.c for details of how the 'AS'
clause
@@ -12469,9 +12504,6 @@ dumpFunc(Archive *fout, const FuncInfo *finfo)
finfo->dobj.namespace->dobj.name,
NULL, finfo->rolname, &finfo->dacl);
- PQclear(res);
-
- destroyPQExpBuffer(query);
destroyPQExpBuffer(q);
destroyPQExpBuffer(delqry);
destroyPQExpBuffer(asPart);
--
2.39.3 (Apple Git-146)