On Mon, Jan 10, 2022 at 8:25 AM Andres Freund <and...@anarazel.de> wrote:
>
> Hi,
>
> On 2021-11-23 11:43:21 +0530, Amul Sul wrote:
> > Attached patch covers a case where TLI in the filename for a
> > record being read is different from where it belongs to. In other
> > words, it covers following case noted in StartupXLOG():
>
> > Thoughts? Suggestions?
>
> It seems the test isn't quite reliable. It occasionally fails on freebsd,
> macos, linux and always on windows (starting with the new CI stuff, before the
> test wasn't run).
>
> See 
> https://cirrus-ci.com/github/postgresql-cfbot/postgresql/commitfest/36/3427
>

Thanks for the note, I can see the same test is failing on my centos
vm too with the latest master head(376ce3e404b).  The failing reason is
the "recovery_target_inclusive = off" setting which is unnecessary for
this test, the attached patch removing the same.

Regards,
Amul
From 88ae9ea5a33c1ecc5b5493dae9c016ef19fbf88f Mon Sep 17 00:00:00 2001
From: Amul Sul <amul.sul@enterprisedb.com>
Date: Sun, 9 Jan 2022 23:10:07 -0500
Subject: [PATCH v2] TAP test for EndOfLogTLI

---
 src/test/recovery/t/003_recovery_targets.pl | 52 ++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl
index 24da78c0bcd..cf72b5d9343 100644
--- a/src/test/recovery/t/003_recovery_targets.pl
+++ b/src/test/recovery/t/003_recovery_targets.pl
@@ -6,7 +6,7 @@ use strict;
 use warnings;
 use PostgreSQL::Test::Cluster;
 use PostgreSQL::Test::Utils;
-use Test::More tests => 9;
+use Test::More tests => 10;
 use Time::HiRes qw(usleep);
 
 # Create and test a standby from given backup, with a certain recovery target.
@@ -182,3 +182,53 @@ $logfile = slurp_file($node_standby->logfile());
 ok( $logfile =~
 	  qr/FATAL: .* recovery ended before configured recovery target was reached/,
 	'recovery end before target reached is a fatal error');
+
+# Test to cover a case where that we are looking for WAL record that ought to be
+# in for e.g 000000010000000000000001 we don't find it; instead we find
+# 000000020000000000000003 because of various reasons such as there was a
+# timeline switch in that segment, and we were reading the old WAL from a
+# segment belonging to a higher timeline or our recovery target timeline is 2,
+# or something that has 2 in its history.
+
+# Insert few more data to primary
+$node_primary->safe_psql('postgres',
+	"INSERT INTO tab_int VALUES (generate_series(6001,7000))");
+my $lsn6 = $node_primary->safe_psql('postgres',
+	"SELECT pg_current_wal_lsn()");
+
+# Setup new standby and enable WAL archiving to archive WAL files at the same
+# location as the primary.
+my $archive_cmd = $node_primary->safe_psql('postgres',
+	"SELECT current_setting('archive_command')");
+$node_standby = PostgreSQL::Test::Cluster->new('standby_9');
+$node_standby->init_from_backup(
+	$node_primary, 'my_backup',
+	has_streaming => 1);
+$node_standby->append_conf(
+        'postgresql.conf', qq(
+archive_mode = on
+archive_command = '$archive_cmd'
+));
+$node_standby->start;
+# Wait until necessary replay has been done on standby
+$node_primary->wait_for_catchup($node_standby, 'replay',
+	$node_primary->lsn('write'));
+$node_standby->promote;
+$node_standby->safe_psql('postgres',
+	"INSERT INTO tab_int VALUES (generate_series(7001,8000))");
+# Force archiving of WAL file
+$node_standby->safe_psql('postgres', "SELECT pg_switch_wal()");
+$node_standby->stop;
+
+# Another standby whose recovery target lsn will be in the WAL file has
+# a different TLI than the target LSN belongs to.
+$node_standby = PostgreSQL::Test::Cluster->new('standby_10');
+$node_standby->init_from_backup(
+	$node_primary, 'my_backup',
+	has_restoring => 1);
+$node_standby->append_conf(
+        'postgresql.conf', qq(recovery_target_lsn = '$lsn6'));
+$node_standby->start;
+my $result = $node_standby->safe_psql('postgres',
+	"SELECT count(*) FROM tab_int");
+is($result, '7000', "check standby content before timeline switch $lsn6");
-- 
2.18.0

Reply via email to