Hi Daniel,
Thank you for your answer.
On 9/28/23 14:02, Daniel Gustafsson wrote:
On 28 Sep 2023, at 10:14, Frédéric Yhuel <frederic.yh...@dalibo.com> wrote:
After some time, we understood that the 20 million of large objects were
responsible for the huge memory usage (more than 10 GB) by pg_dump.
This sounds like a known issue [0] which has been reported several times, and
one we should get around to fixing sometime.
Indeed, I saw some of these reports afterwards :)
I think a more useful error message would help for such cases.
Knowing that this is case that pops up, I agree that we could do better around
the messaging here.
I haven't try to get the patch ready for review, I know that the format of the
messages isn't right, I'd like to know what do you think of the idea, first.
I don't think adding more details is a bad idea, but it shouldn't require any
knowledge about internals so I think messages like the one below needs to be
reworded to be more helpful.
+ if (loinfo == NULL)
+ {
+ pg_fatal("getLOs: out of memory");
+ }
OK, here is a second version of the patch.
I didn't try to fix the path getLOs -> AssignDumpId -> catalogid_insert
-> [...] -> catalogid_allocate, but that's annoying because it amounts
to 11% of the memory allocations from valgrind's output.
Best regards,
Frédéric
From 575903c1fd4ccbe49c762d1d6424e2264ba6cfad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Yhuel?= <frederic.yh...@dalibo.com>
Date: Mon, 18 Sep 2023 08:18:19 +0200
Subject: [PATCH] pg_dump: fix OOM handling
Exit with better error messages when there's not enough memory
to process large objects.
---
src/bin/pg_dump/pg_backup_archiver.c | 45 +++++++++++++++++++++-------
src/bin/pg_dump/pg_backup_archiver.h | 4 +++
src/bin/pg_dump/pg_dump.c | 33 ++++++++++++++++----
src/common/fe_memutils.c | 14 +++++++--
src/include/common/fe_memutils.h | 1 +
5 files changed, 80 insertions(+), 17 deletions(-)
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index ab351e457e..fedbe67a07 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -1101,6 +1101,22 @@ WriteData(Archive *AHX, const void *data, size_t dLen)
AH->WriteDataPtr(AH, data, dLen);
}
+#define fill_in_new_toc(a,b) \
+do { \
+ if (opts->b) \
+ { \
+ newToc->a = pg_strdup_extended(opts->b, MCXT_ALLOC_NO_OOM); \
+ if (newToc->a == NULL) \
+ { \
+ goto error; \
+ } \
+ } \
+ else \
+ { \
+ newToc->a = NULL; \
+ } \
+} while(0)
+
/*
* Create a new TOC entry. The TOC was designed as a TOC, but is now the
* repository for all metadata. But the name has stuck.
@@ -1118,7 +1134,11 @@ ArchiveEntry(Archive *AHX, CatalogId catalogId, DumpId dumpId,
ArchiveHandle *AH = (ArchiveHandle *) AHX;
TocEntry *newToc;
- newToc = (TocEntry *) pg_malloc0(sizeof(TocEntry));
+ newToc = (TocEntry *) pg_malloc_extended(sizeof(TocEntry), MCXT_ALLOC_NO_OOM|MCXT_ALLOC_ZERO);
+ if (newToc == NULL)
+ {
+ goto error;
+ }
AH->tocCount++;
if (dumpId > AH->maxDumpId)
@@ -1133,15 +1153,15 @@ ArchiveEntry(Archive *AHX, CatalogId catalogId, DumpId dumpId,
newToc->dumpId = dumpId;
newToc->section = opts->section;
- newToc->tag = pg_strdup(opts->tag);
- newToc->namespace = opts->namespace ? pg_strdup(opts->namespace) : NULL;
- newToc->tablespace = opts->tablespace ? pg_strdup(opts->tablespace) : NULL;
- newToc->tableam = opts->tableam ? pg_strdup(opts->tableam) : NULL;
- newToc->owner = opts->owner ? pg_strdup(opts->owner) : NULL;
- newToc->desc = pg_strdup(opts->description);
- newToc->defn = opts->createStmt ? pg_strdup(opts->createStmt) : NULL;
- newToc->dropStmt = opts->dropStmt ? pg_strdup(opts->dropStmt) : NULL;
- newToc->copyStmt = opts->copyStmt ? pg_strdup(opts->copyStmt) : NULL;
+ fill_in_new_toc(tag, tag);
+ fill_in_new_toc(namespace, namespace);
+ fill_in_new_toc(tablespace, tablespace);
+ fill_in_new_toc(tableam, tableam);
+ fill_in_new_toc(owner, owner);
+ fill_in_new_toc(desc, description);
+ fill_in_new_toc(defn, createStmt);
+ fill_in_new_toc(dropStmt, dropStmt);
+ fill_in_new_toc(copyStmt, copyStmt);
if (opts->nDeps > 0)
{
@@ -1166,6 +1186,11 @@ ArchiveEntry(Archive *AHX, CatalogId catalogId, DumpId dumpId,
AH->ArchiveEntryPtr(AH, newToc);
return newToc;
+
+error:
+ pg_log_error("Could not add a new archive entry: %s", strerror(errno));
+ pg_log_error_hint(LO_OOM_HINT);
+ exit(1);
}
/* Public */
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 18b38c17ab..626fe2cb11 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -97,6 +97,10 @@ typedef struct _archiveHandle ArchiveHandle;
typedef struct _tocEntry TocEntry;
struct ParallelState;
+#define LO_OOM_HINT "If the database contains a large number of large objects, "\
+ "consider using the \"-B\" option to exclude them from the dump "\
+ "if it makes sense. Otherwise, add more memory to your machine."
+
#define READ_ERROR_EXIT(fd) \
do { \
if (feof(fd)) \
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 7977d6a9c0..bc76224b5d 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -3566,7 +3566,11 @@ getLOs(Archive *fout)
/*
* Each large object has its own "BLOB" archive entry.
*/
- loinfo = (LoInfo *) pg_malloc(ntups * sizeof(LoInfo));
+ loinfo = (LoInfo *) pg_malloc_extended(ntups * sizeof(LoInfo), MCXT_ALLOC_NO_OOM);
+ if (loinfo == NULL)
+ {
+ goto error;
+ }
for (i = 0; i < ntups; i++)
{
@@ -3575,9 +3579,18 @@ getLOs(Archive *fout)
loinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
AssignDumpId(&loinfo[i].dobj);
- loinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_oid));
- loinfo[i].dacl.acl = pg_strdup(PQgetvalue(res, i, i_lomacl));
- loinfo[i].dacl.acldefault = pg_strdup(PQgetvalue(res, i, i_acldefault));
+ loinfo[i].dobj.name = pg_strdup_extended(PQgetvalue(res, i, i_oid), MCXT_ALLOC_NO_OOM);
+ if (loinfo[i].dobj.name == NULL)
+ goto error;
+
+ loinfo[i].dacl.acl = pg_strdup_extended(PQgetvalue(res, i, i_lomacl), MCXT_ALLOC_NO_OOM);
+ if (loinfo[i].dacl.acl == NULL)
+ goto error;
+
+ loinfo[i].dacl.acldefault = pg_strdup_extended(PQgetvalue(res, i, i_acldefault), MCXT_ALLOC_NO_OOM);
+ if (loinfo[i].dacl.acldefault == NULL)
+ goto error;
+
loinfo[i].dacl.privtype = 0;
loinfo[i].dacl.initprivs = NULL;
loinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_lomowner));
@@ -3606,7 +3619,12 @@ getLOs(Archive *fout)
*/
if (ntups > 0)
{
- lodata = (DumpableObject *) pg_malloc(sizeof(DumpableObject));
+ lodata = (DumpableObject *) pg_malloc_extended(sizeof(DumpableObject), MCXT_ALLOC_NO_OOM);
+ if (lodata == NULL)
+ {
+ goto error;
+ }
+
lodata->objType = DO_LARGE_OBJECT_DATA;
lodata->catId = nilCatalogId;
AssignDumpId(lodata);
@@ -3616,6 +3634,11 @@ getLOs(Archive *fout)
PQclear(res);
destroyPQExpBuffer(loQry);
+ return;
+error:
+ pg_log_error("Could not get schema-level data about large objects: %s", strerror(errno));
+ pg_log_error_hint(LO_OOM_HINT);
+ exit(1);
}
/*
diff --git a/src/common/fe_memutils.c b/src/common/fe_memutils.c
index 3bad81eafc..4d2077e03b 100644
--- a/src/common/fe_memutils.c
+++ b/src/common/fe_memutils.c
@@ -83,6 +83,12 @@ pg_realloc(void *ptr, size_t size)
*/
char *
pg_strdup(const char *in)
+{
+ return pg_strdup_extended(in, 0);
+}
+
+char *
+pg_strdup_extended(const char *in, int flags)
{
char *tmp;
@@ -95,8 +101,12 @@ pg_strdup(const char *in)
tmp = strdup(in);
if (!tmp)
{
- fprintf(stderr, _("out of memory\n"));
- exit(EXIT_FAILURE);
+ if ((flags & MCXT_ALLOC_NO_OOM) == 0)
+ {
+ fprintf(stderr, _("out of memory\n"));
+ exit(EXIT_FAILURE);
+ }
+ return NULL;
}
return tmp;
}
diff --git a/src/include/common/fe_memutils.h b/src/include/common/fe_memutils.h
index 89601cc778..9bbb618f40 100644
--- a/src/include/common/fe_memutils.h
+++ b/src/include/common/fe_memutils.h
@@ -23,6 +23,7 @@
* (except pg_malloc_extended with MCXT_ALLOC_NO_OOM)
*/
extern char *pg_strdup(const char *in);
+extern char *pg_strdup_extended(const char *in, int flags);
extern void *pg_malloc(size_t size);
extern void *pg_malloc0(size_t size);
extern void *pg_malloc_extended(size_t size, int flags);
--
2.39.2