At Thu, 03 Jun 2021 21:52:08 +0900 (JST), Kyotaro Horiguchi
<[email protected]> wrote in
>
> https://www.postgresql.org/docs/14/continuous-archiving.html
> > The archive command should generally be designed to refuse to
> > overwrite any pre-existing archive file. This is an important safety
> > feature to preserve the integrity of your archive in case of
> > administrator error (such as sending the output of two different
> > servers to the same archive directory).
>
> I'm not sure how we should treat this.. Since archive must store
> files actually applied to the server data, just being already archived
> cannot be the reason for omitting archiving. We need to make sure the
> new file is byte-identical to the already-archived version. We could
> compare just *restored* file to the same file in pg_wal but it might
> be too much of penalty for for the benefit. (Attached second file.)
(To recap: In a replication set using archive, startup tries to
restore WAL files from archive before checking pg_wal directory for
the desired file. The behavior itself is intentionally designed and
reasonable. However, the restore code notifies of a restored file
regardless of whether it has been already archived or not. If
archive_command is written so as to return error for overwriting as we
suggest in the documentation, that behavior causes archive failure.)
After playing with this, I see the problem just by restarting a
standby even in a simple archive-replication set after making
not-special prerequisites. So I think this is worth fixing.
With this patch, KeepFileRestoredFromArchive compares the contents of
just-restored file and the existing file for the same segment only
when:
- archive_mode = always
and - the file to restore already exists in pgwal
and - it has a .done and/or .ready status file.
which doesn't happen usually. Then the function skips archive
notification if the contents are identical. The included TAP test is
working both on Linux and Windows.
regards.
--
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/test/recovery/t/020_archive_status.pl b/src/test/recovery/t/020_archive_status.pl
index 777bf05274..ed5e80327b 100644
--- a/src/test/recovery/t/020_archive_status.pl
+++ b/src/test/recovery/t/020_archive_status.pl
@@ -8,7 +8,7 @@ use strict;
use warnings;
use PostgresNode;
use TestLib;
-use Test::More tests => 16;
+use Test::More tests => 17;
use Config;
my $primary = get_new_node('primary');
@@ -176,6 +176,7 @@ ok( -f "$standby1_data/$segment_path_2_done",
# will cause archiving failures.
my $standby2 = get_new_node('standby2');
$standby2->init_from_backup($primary, 'backup', has_restoring => 1);
+$standby2->enable_archiving;
$standby2->append_conf('postgresql.conf', 'archive_mode = always');
my $standby2_data = $standby2->data_dir;
$standby2->start;
@@ -234,3 +235,49 @@ ok( -f "$standby2_data/$segment_path_1_done"
&& -f "$standby2_data/$segment_path_2_done",
".done files created after archive success with archive_mode=always on standby"
);
+
+
+# Check if restart of a archive-replicated standby doesn't archive the
+# same file twice.
+
+# Use duplicate-resistent archive_command
+my $archpath = TestLib::perl2host($standby2->archive_dir);
+$archpath =~ s{\\}{\\\\}g if ($TestLib::windows_os);
+my $nodup_command =
+ $TestLib::windows_os
+ ? qq{if not exist "$archpath\\\\%f" (copy "%p" "$archpath\\\\%f") else (exit 1)}
+ : qq{test ! -f "$archpath/%f" && cp "%p" "$archpath/%f"};
+$standby2->safe_psql(
+ 'postgres', qq{
+ ALTER SYSTEM SET archive_command TO '$nodup_command';
+ SELECT pg_reload_conf();
+});
+
+# Remember the current semgent file, The +1 below is an adjustment to avoid
+# segment borders.
+my $cursegfile = $primary->safe_psql(
+ 'postgres',
+ 'SELECT pg_walfile_name(pg_current_wal_lsn() + 1)');
+$primary->psql('postgres', 'CHECKPOINT; SELECT pg_switch_wal();');
+$standby2->psql('postgres', 'CHECKPOINT');
+$standby2->poll_query_until(
+ 'postgres',
+ "SELECT last_archived_wal = '$cursegfile' FROM pg_stat_archiver")
+ or die "Timed out waiting for the segment to be archived";
+
+$standby2->restart;
+
+$primary->psql('postgres', 'CHECKPOINT; SELECT pg_switch_wal();');
+
+$standby2->poll_query_until(
+ 'postgres', qq[
+ SELECT last_archived_wal IS NOT NULL OR
+ last_failed_wal IS NOT NULL
+ FROM pg_stat_archiver]);
+
+my $result =
+ $standby2->safe_psql(
+ 'postgres',
+ "SELECT last_archived_wal, last_failed_wal FROM pg_stat_archiver");
+
+ok($result =~ /^[^|]+\|$/, 'check if archiving proceeds');
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c
index 26b023e754..7d94e7f963 100644
--- a/src/backend/access/transam/xlogarchive.c
+++ b/src/backend/access/transam/xlogarchive.c
@@ -382,6 +382,7 @@ KeepFileRestoredFromArchive(const char *path, const char *xlogfname)
{
char xlogfpath[MAXPGPATH];
bool reload = false;
+ bool already_archived = false;
struct stat statbuf;
snprintf(xlogfpath, MAXPGPATH, XLOGDIR "/%s", xlogfname);
@@ -416,6 +417,68 @@ KeepFileRestoredFromArchive(const char *path, const char *xlogfname)
/* same-size buffers, so this never truncates */
strlcpy(oldpath, xlogfpath, MAXPGPATH);
#endif
+ /*
+ * On a standby with archive_mode=always, there's the case where the
+ * same file is archived more than once. If the archive_command rejects
+ * overwriting, WAL-archiving won't go further than the file forever.
+ * Avoid duplicate archiving attempts when the file with the same
+ * content is known to have been already archived or notified.
+ */
+ if (XLogArchiveMode == ARCHIVE_MODE_ALWAYS &&
+ XLogArchiveIsReadyOrDone(xlogfname))
+ {
+ int fd1;
+ int fd2 = -1;
+
+ fd1 = BasicOpenFile(path, O_RDONLY | PG_BINARY);
+ if (fd1 >= 0)
+ fd2 = BasicOpenFile(oldpath, O_RDONLY | PG_BINARY);
+
+ if (fd1 < 0 || fd2 < 0)
+ {
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\", skip duplicate check: %m",
+ fd1 < 0 ? path : oldpath)));
+ if (fd1 >= 0)
+ close(fd1);
+ }
+ else
+ {
+ unsigned char srcbuf[XLOG_BLCKSZ];
+ unsigned char dstbuf[XLOG_BLCKSZ];
+ uint32 i;
+
+ /*
+ * Compare the two files' contents. We don't bother
+ * completing if something's wrong meanwhile.
+ */
+ for (i = 0 ; i < wal_segment_size / XLOG_BLCKSZ ; i++)
+ {
+ if (read(fd1, srcbuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+ break;
+
+ if (read(fd2, dstbuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+ break;
+
+ if (memcmp(srcbuf, dstbuf, XLOG_BLCKSZ) != 0)
+ break;
+ }
+
+ close(fd1);
+ close(fd2);
+
+ if (i == wal_segment_size / XLOG_BLCKSZ)
+ {
+ already_archived = true;
+
+ ereport(LOG,
+ (errmsg("log file \"%s\" have been already archived, skip archiving",
+ xlogfname)));
+ }
+ }
+ }
+
if (unlink(oldpath) != 0)
ereport(FATAL,
(errcode_for_file_access(),
@@ -432,7 +495,7 @@ KeepFileRestoredFromArchive(const char *path, const char *xlogfname)
*/
if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)
XLogArchiveForceDone(xlogfname);
- else
+ else if (!already_archived)
XLogArchiveNotify(xlogfname);
/*