Hello.
Added -hackers.
At Wed, 15 Apr 2020 12:14:25 +0200, "Peter J. Holzer" <[email protected]> wrote
in
> On 2020-04-15 12:01:46 +0200, Peter J. Holzer wrote:
> > I'm trying to restore a backup on a different machine and it terminates
> > with the not really helpful messages:
> >
> > pg_restore: [directory archiver] could not close data file: Success
> > pg_restore: [parallel archiver] a worker process died unexpectedly
> [...]
> > My guess is that maybe one of the data files is damaged
>
> As is often the case the matter became obvious a few minutes after
> writing the mail.
>
> There were indeed two file with length 0 in the dump. That happened
> because the backup failed because it couldn't obtain a lock on a table.
>
> I nicer error message (something like "cannot decompress '13503.dat.gz':
> Empty file") would have helped.
Unfortunately, just emptying .dat.gz file doesn't worked for me.
Anyway the message is emitted the following way.
pg_backup_directoy.c:
> if (cfclose(cfp) !=0)
> fatal("could not close data file: %m");
%m doesn't work for some kinds of errors about compressed files but
cfclose conseals the true cause.
I'm surprised to find an old thread about the same issue.
https://www.postgresql.org/message-id/20160307.174354.251049100.horiguchi.kyotaro%40lab.ntt.co.jp
But I don't think it's not acceptable that use fake errno for gzclose,
but cfclose properly passes-through the error code from gzclose, so it
is enought that the caller should recognize the difference.
Please find the attached.
regards.
--
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/bin/pg_dump/compress_io.c b/src/bin/pg_dump/compress_io.c
index 1417401086..3a8394d7f2 100644
--- a/src/bin/pg_dump/compress_io.c
+++ b/src/bin/pg_dump/compress_io.c
@@ -645,6 +645,13 @@ cfgets(cfp *fp, char *buf, int len)
return fgets(buf, len, fp->uncompressedfp);
}
+/*
+ * cfclose close the stream
+ *
+ * Returns 0 if successfully closed the cfp. Most of errors are reported as -1
+ * and errno is set. Otherwise the return value is the return value from
+ * gzclose and errno doesn't hold a meangful value.
+ */
int
cfclose(cfp *fp)
{
@@ -665,6 +672,11 @@ cfclose(cfp *fp)
#endif
{
result = fclose(fp->uncompressedfp);
+
+ /* normalize error return, just in case EOF is not -1 */
+ if (result != 0)
+ result = -1;
+
fp->uncompressedfp = NULL;
}
free_keep_errno(fp);
diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c
index c9cce5ed8a..ecc6aa5fbb 100644
--- a/src/bin/pg_dump/pg_backup_directory.c
+++ b/src/bin/pg_dump/pg_backup_directory.c
@@ -108,6 +108,7 @@ void
InitArchiveFmt_Directory(ArchiveHandle *AH)
{
lclContext *ctx;
+ int ret;
/* Assuming static functions, this can be copied for each format. */
AH->ArchiveEntryPtr = _ArchiveEntry;
@@ -218,8 +219,14 @@ InitArchiveFmt_Directory(ArchiveHandle *AH)
ReadToc(AH);
/* Nothing else in the file, so close it again... */
- if (cfclose(tocFH) != 0)
- fatal("could not close TOC file: %m");
+ ret = cfclose(tocFH);
+ if (ret < 0)
+ {
+ if (ret == -1)
+ fatal("could not close TOC file: %m");
+ else
+ fatal("could not close TOC file: zlib error (%d)", ret);
+ }
ctx->dataFH = NULL;
}
}
@@ -378,6 +385,7 @@ _PrintFileData(ArchiveHandle *AH, char *filename)
char *buf;
size_t buflen;
cfp *cfp;
+ int ret;
if (!filename)
return;
@@ -396,8 +404,15 @@ _PrintFileData(ArchiveHandle *AH, char *filename)
}
free(buf);
- if (cfclose(cfp) !=0)
- fatal("could not close data file: %m");
+
+ ret = cfclose(cfp);
+ if (ret < 0)
+ {
+ if (ret == -1)
+ fatal("could not close data file: %m");
+ else
+ fatal("could not close data file: zlib error (%d)", ret);
+ }
}
/*
@@ -429,6 +444,7 @@ _LoadBlobs(ArchiveHandle *AH)
lclContext *ctx = (lclContext *) AH->formatData;
char fname[MAXPGPATH];
char line[MAXPGPATH];
+ int ret;
StartRestoreBlobs(AH);
@@ -460,9 +476,16 @@ _LoadBlobs(ArchiveHandle *AH)
fatal("error reading large object TOC file \"%s\"",
fname);
- if (cfclose(ctx->blobsTocFH) != 0)
- fatal("could not close large object TOC file \"%s\": %m",
- fname);
+ ret = cfclose(ctx->blobsTocFH);
+ if (ret < 0)
+ {
+ if (ret == -1)
+ fatal("could not close large object TOC file \"%s\": %m",
+ fname);
+ else
+ fatal("could not close large object TOC file \"%s\": zlib error (%d)",
+ fname, ret);
+ }
ctx->blobsTocFH = NULL;
@@ -555,6 +578,7 @@ _CloseArchive(ArchiveHandle *AH)
{
cfp *tocFH;
char fname[MAXPGPATH];
+ int ret;
setFilePath(AH, fname, "toc.dat");
@@ -576,8 +600,14 @@ _CloseArchive(ArchiveHandle *AH)
WriteHead(AH);
AH->format = archDirectory;
WriteToc(AH);
- if (cfclose(tocFH) != 0)
- fatal("could not close TOC file: %m");
+ ret = cfclose(tocFH);
+ if (ret < 0)
+ {
+ if (ret == -1)
+ fatal("could not close TOC file: %m");
+ else
+ fatal("could not close TOC file: zlib error (%d)", ret);
+ }
WriteDataChunks(AH, ctx->pstate);
ParallelBackupEnd(AH, ctx->pstate);