At Mon, 22 Apr 2019 16:40:27 +0900 (Tokyo Standard Time), Kyotaro HORIGUCHI <horiguchi.kyot...@lab.ntt.co.jp> wrote in <20190422.164027.33866403.horiguchi.kyot...@lab.ntt.co.jp> > At Mon, 22 Apr 2019 16:15:13 +0900 (Tokyo Standard Time), Kyotaro HORIGUCHI > <horiguchi.kyot...@lab.ntt.co.jp> wrote in > <20190422.161513.258021727.horiguchi.kyot...@lab.ntt.co.jp> > > At Mon, 22 Apr 2019 12:36:43 +0800, Paul Guo <p...@pivotal.io> wrote in > > <CAEET0ZGpUrMGUzfyzVF9FuSq+zb=qovya2cvyrndotvz5vx...@mail.gmail.com> > > > Please see my replies inline. Thanks. > > > > > > On Fri, Apr 19, 2019 at 12:38 PM Asim R P <aprav...@pivotal.io> wrote: > > > > > > > On Wed, Apr 17, 2019 at 1:27 PM Paul Guo <p...@pivotal.io> wrote: > > > > > > > > > > create db with tablespace > > > > > drop database > > > > > drop tablespace. > > > > > > > > Essentially, that sequence of operations causes crash recovery to fail > > > > if the "drop tablespace" transaction was committed before crashing. > > > > This is a bug in crash recovery in general and should be reproducible > > > > without configuring a standby. Is that right? > > > > > > > > > > No. In general, checkpoint is done for drop_db/create_db/drop_tablespace > > > on > > > master. > > > That makes the file/directory update-to-date if I understand the related > > > code correctly. > > > For standby, checkpoint redo does not ensure that.
The attached exercises this sequence, needing some changes in PostgresNode.pm and RecursiveCopy.pm to allow tablespaces. regards. -- Kyotaro Horiguchi NTT Open Source Software Center
>From dbe6306a730f94a5bd8beaf0e534c28ebdd815d4 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horiguchi.kyot...@lab.ntt.co.jp> Date: Mon, 22 Apr 2019 20:10:20 +0900 Subject: [PATCH 1/2] Allow TAP test to excecise tablespace. To perform tablespace related checks, this patch lets PostgresNode::backup have a new parameter "tablespace_mapping", and make init_from_backup handle capable to handle a backup created using tablespace_mapping. --- src/test/perl/PostgresNode.pm | 10 ++++++++-- src/test/perl/RecursiveCopy.pm | 33 +++++++++++++++++++++++++++++---- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm index 76874141c5..59a939821d 100644 --- a/src/test/perl/PostgresNode.pm +++ b/src/test/perl/PostgresNode.pm @@ -540,13 +540,19 @@ target server since it isn't done by default. sub backup { - my ($self, $backup_name) = @_; + my ($self, $backup_name, %params) = @_; my $backup_path = $self->backup_dir . '/' . $backup_name; my $name = $self->name; + my @rest = (); + + if (defined $params{tablespace_mapping}) + { + push(@rest, "--tablespace-mapping=$params{tablespace_mapping}"); + } print "# Taking pg_basebackup $backup_name from node \"$name\"\n"; TestLib::system_or_bail('pg_basebackup', '-D', $backup_path, '-h', - $self->host, '-p', $self->port, '--no-sync'); + $self->host, '-p', $self->port, '--no-sync', @rest); print "# Backup finished\n"; return; } diff --git a/src/test/perl/RecursiveCopy.pm b/src/test/perl/RecursiveCopy.pm index baf5d0ac63..c912ce412d 100644 --- a/src/test/perl/RecursiveCopy.pm +++ b/src/test/perl/RecursiveCopy.pm @@ -22,6 +22,7 @@ use warnings; use Carp; use File::Basename; use File::Copy; +use TestLib; =pod @@ -97,14 +98,38 @@ sub _copypath_recurse # invoke the filter and skip all further operation if it returns false return 1 unless &$filterfn($curr_path); - # Check for symlink -- needed only on source dir - # (note: this will fall through quietly if file is already gone) - croak "Cannot operate on symlink \"$srcpath\"" if -l $srcpath; - # Abort if destination path already exists. Should we allow directories # to exist already? croak "Destination path \"$destpath\" already exists" if -e $destpath; + # Check for symlink -- needed only on source dir + # (note: this will fall through quietly if file is already gone) + if (-l $srcpath) + { + croak "Cannot operate on symlink \"$srcpath\"" + if ($srcpath !~ /\/(pg_tblspc\/[0-9]+)$/); + + # We have mapped tablespaces. Copy them individually + my $linkname = $1; + my $tmpdir = TestLib::tempdir; + my $dstrealdir = TestLib::real_dir($tmpdir); + my $srcrealdir = readlink($srcpath); + + opendir(my $dh, $srcrealdir); + while (readdir $dh) + { + next if (/^\.\.?$/); + my $spath = "$srcrealdir/$_"; + my $dpath = "$dstrealdir/$_"; + + copypath($spath, $dpath); + } + closedir $dh; + + symlink $dstrealdir, $destpath; + return 1; + } + # If this source path is a file, simply copy it to destination with the # same name and we're done. if (-f $srcpath) -- 2.16.3
>From 382910fbe3738c9098c0568cdc992928f471c7c5 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horiguchi.kyot...@lab.ntt.co.jp> Date: Mon, 22 Apr 2019 20:10:25 +0900 Subject: [PATCH 2/2] Add check for recovery failure caused by tablespace. Removal of a tablespace on master can cause recovery failure on standby. This patch adds the check for the case. --- src/test/recovery/t/011_crash_recovery.pl | 52 ++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/src/test/recovery/t/011_crash_recovery.pl b/src/test/recovery/t/011_crash_recovery.pl index 5dc52412ca..d1eb9edccf 100644 --- a/src/test/recovery/t/011_crash_recovery.pl +++ b/src/test/recovery/t/011_crash_recovery.pl @@ -15,7 +15,7 @@ if ($Config{osname} eq 'MSWin32') } else { - plan tests => 3; + plan tests => 4; } my $node = get_new_node('master'); @@ -66,3 +66,53 @@ is($node->safe_psql('postgres', qq[SELECT txid_status('$xid');]), 'aborted', 'xid is aborted after crash'); $tx->kill_kill; + + +# Ensure that tablespace removal doesn't cause error while recoverying +# the preceding create datbase or objects. + +my $node_master = get_new_node('master2'); +$node_master->init(allows_streaming => 1); +$node_master->start; + +# Create tablespace +my $tspDir_master = TestLib::tempdir; +my $realTSDir_master = TestLib::real_dir($tspDir_master); +$node_master->safe_psql('postgres', "CREATE TABLESPACE ts1 LOCATION '$realTSDir_master'"); + +my $tspDir_standby = TestLib::tempdir; +my $realTSDir_standby = TestLib::real_dir($tspDir_standby); + +# Take backup +my $backup_name = 'my_backup'; +$node_master->backup($backup_name, + tablespace_mapping => + "$realTSDir_master=$realTSDir_standby"); +my $node_standby = get_new_node('standby'); +$node_standby->init_from_backup($node_master, $backup_name, has_streaming => 1); +$node_standby->start; + +# Make sure connection is made +$node_master->poll_query_until( + 'postgres', 'SELECT count(*) = 1 FROM pg_stat_replication'); + +# Make sure to perform restartpoint after tablespace creation +$node_master->wait_for_catchup($node_standby, 'replay', + $node_master->lsn('replay')); +$node_standby->safe_psql('postgres', 'CHECKPOINT'); + +# Do immediate shutdown just after a sequence of CREAT DATABASE / DROP +# DATABASE / DROP TABLESPACE. This leaves a CREATE DATBASE WAL record +# that is to be applied to already-removed tablespace. +$node_master->safe_psql('postgres', + q[CREATE DATABASE db1 WITH TABLESPACE ts1; + DROP DATABASE db1; + DROP TABLESPACE ts1;]); +$node_master->wait_for_catchup($node_standby, 'replay', + $node_master->lsn('replay')); +$node_standby->stop('immediate'); + +# Should restart ignoring directory creation error. +is($node_standby->start(fail_ok => 1), 1); + + -- 2.16.3
>From 5e3a9b682e6ec2b6cb4e019409112687bd598ebc Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horiguchi.kyot...@lab.ntt.co.jp> Date: Mon, 22 Apr 2019 20:59:15 +0900 Subject: [PATCH 3/3] Fix failure of standby startup caused by tablespace removal When standby restarts after a crash after drop of a tablespace, there's a possibility that recovery fails trying an object-creation in already removed tablespace directory. Allow recovery to continue by ignoring the error if not reaching consistency point. --- src/backend/access/transam/xlogutils.c | 34 ++++++++++++++++++++++++++++++++++ src/backend/storage/file/copydir.c | 12 +++++++----- src/include/access/xlogutils.h | 1 + 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 10a663bae6..75cdb882cd 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -522,6 +522,40 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, return buffer; } +/* + * XLogMakePGDirectory + * + * There is a possibility that WAL replay causes an error by creation of a + * directory under a directory removed before the previous crash. Issuing + * ERROR prevents the caller from continuing recovery. + * + * To prevent that case, this function issues WARNING instead of ERROR on + * error if consistency is not reached yet. + */ +int +XLogMakePGDirectory(const char *directoryName) +{ + int ret; + + ret = MakePGDirectory(directoryName); + + if (ret != 0) + { + int elevel = ERROR; + + /* Don't issue ERROR for this failure before reaching consistency. */ + if (InRecovery && !reachedConsistency) + elevel = WARNING; + + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not create directory \"%s\": %m", directoryName))); + return ret; + } + + return 0; +} + /* * Struct actually returned by XLogFakeRelcacheEntry, though the declared * return type is Relation. diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c index 30f6200a86..0216270dd3 100644 --- a/src/backend/storage/file/copydir.c +++ b/src/backend/storage/file/copydir.c @@ -22,11 +22,11 @@ #include <unistd.h> #include <sys/stat.h> +#include "access/xlogutils.h" #include "storage/copydir.h" #include "storage/fd.h" #include "miscadmin.h" #include "pgstat.h" - /* * copydir: copy a directory * @@ -41,10 +41,12 @@ copydir(char *fromdir, char *todir, bool recurse) char fromfile[MAXPGPATH * 2]; char tofile[MAXPGPATH * 2]; - if (MakePGDirectory(todir) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not create directory \"%s\": %m", todir))); + /* + * We might have to skip copydir to continue recovery. See the function + * for details. + */ + if (XLogMakePGDirectory(todir) != 0) + return; xldir = AllocateDir(fromdir); diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h index 0ab5ba62f5..46a7596315 100644 --- a/src/include/access/xlogutils.h +++ b/src/include/access/xlogutils.h @@ -43,6 +43,7 @@ extern XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record, extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode); +extern int XLogMakePGDirectory(const char *directoryName); extern Relation CreateFakeRelcacheEntry(RelFileNode rnode); extern void FreeFakeRelcacheEntry(Relation fakerel); -- 2.16.3