Re: pg13: xlogreader API adjust

2020-05-21 Thread Kyotaro Horiguchi
At Fri, 15 May 2020 19:24:28 -0400, Alvaro Herrera  
wrote in 
> On 2020-May-15, Michael Paquier wrote:
> 
> > On Thu, May 14, 2020 at 02:12:25PM +0900, Kyotaro Horiguchi wrote:
> > > Good catch!  That's not only for CreateDecodingContet. That happens
> > > everywhere in the query loop in PostgresMain() until logreader is
> > > initialized.  So that also happens, for example, by starting logical
> > > replication using invalidated slot. Checking xlogreader != NULL in
> > > WalSndErrorCleanup is sufficient.  It doesn't make actual difference,
> > > but the attached explicitly initialize the pointer with NULL.
> > 
> > Alvaro, are you planning to look at that?  Should we have an open item
> > for this matter?
> 
> On it now.  I'm trying to add a test for this (needs a small change to
> PostgresNode->psql), but I'm probably doing something stupid in the Perl
> side, because it doesn't detect things as well as I'd like.  Still
> trying, but I may be asked to evict the office soon ...

FWIW, and I'm not sure which of the mail and the commit 1d3743023e was
earlier, but I confirmed that the committed test in
006_logical_decoding.pl causes a crash, and the crash is fixed by the
change of code.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center




Re: pg13: xlogreader API adjust

2020-05-15 Thread Alvaro Herrera
On 2020-May-15, Michael Paquier wrote:

> On Thu, May 14, 2020 at 02:12:25PM +0900, Kyotaro Horiguchi wrote:
> > Good catch!  That's not only for CreateDecodingContet. That happens
> > everywhere in the query loop in PostgresMain() until logreader is
> > initialized.  So that also happens, for example, by starting logical
> > replication using invalidated slot. Checking xlogreader != NULL in
> > WalSndErrorCleanup is sufficient.  It doesn't make actual difference,
> > but the attached explicitly initialize the pointer with NULL.
> 
> Alvaro, are you planning to look at that?  Should we have an open item
> for this matter?

On it now.  I'm trying to add a test for this (needs a small change to
PostgresNode->psql), but I'm probably doing something stupid in the Perl
side, because it doesn't detect things as well as I'd like.  Still
trying, but I may be asked to evict the office soon ...

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index 3f3a1d81f6..0645e07789 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -1386,6 +1386,12 @@ the B parameter is also given.
 If B is set and this parameter is given, the scalar it references
 is set to true if the psql call times out.
 
+=item replication => B
+
+If set, pass the value as B in the conninfo string.
+Passing literal B results in a logical replication connection.
+Passing literal B<1> results in a physical replication connection.
+
 =item extra_params => ['--single-transaction']
 
 If given, it must be an array reference containing additional parameters to B.
@@ -1414,10 +1420,14 @@ sub psql
 
 	my $stdout= $params{stdout};
 	my $stderr= $params{stderr};
+	my $replication   = $params{replication};
 	my $timeout   = undef;
 	my $timeout_exception = 'psql timed out';
 	my @psql_params =
-	  ('psql', '-XAtq', '-d', $self->connstr($dbname), '-f', '-');
+	  ('psql', '-XAtq', '-d',
+	   $self->connstr($dbname) .
+	   (defined $replication ? " replication=$replication" : ""),
+	   '-f', '-');
 
 	# If the caller wants an array and hasn't passed stdout/stderr
 	# references, allocate temporary ones to capture them so we
diff --git a/src/test/recovery/t/006_logical_decoding.pl b/src/test/recovery/t/006_logical_decoding.pl
index d40a500ed4..5c7e137e97 100644
--- a/src/test/recovery/t/006_logical_decoding.pl
+++ b/src/test/recovery/t/006_logical_decoding.pl
@@ -7,7 +7,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 12;
+use Test::More tests => 13;
 use Config;
 
 # Initialize master node
@@ -27,12 +27,20 @@ $node_master->safe_psql('postgres',
 	qq[SELECT pg_create_logical_replication_slot('test_slot', 'test_decoding');]
 );
 
+# Cover walsender error shutdown code
+my ($result, $stdout, $stderr) = $node_master->psql('template1',
+	qq[START_REPLICATION SLOT test_slot LOGICAL 0/0],
+	replication => 'database');
+ok(($result == 3) and
+   ($stderr =~ 'replication slot "test_slot" was not created in this database'),
+   "Logical decoding correctly fails to start");
+
 $node_master->safe_psql('postgres',
 	qq[INSERT INTO decoding_test(x,y) SELECT s, s::text FROM generate_series(1,10) s;]
 );
 
 # Basic decoding works
-my ($result) = $node_master->safe_psql('postgres',
+$result = $node_master->safe_psql('postgres',
 	qq[SELECT pg_logical_slot_get_changes('test_slot', NULL, NULL);]);
 is(scalar(my @foobar = split /^/m, $result),
 	12, 'Decoding produced 12 rows inc BEGIN/COMMIT');


Re: pg13: xlogreader API adjust

2020-05-15 Thread Michael Paquier
On Thu, May 14, 2020 at 02:12:25PM +0900, Kyotaro Horiguchi wrote:
> Good catch!  That's not only for CreateDecodingContet. That happens
> everywhere in the query loop in PostgresMain() until logreader is
> initialized.  So that also happens, for example, by starting logical
> replication using invalidated slot. Checking xlogreader != NULL in
> WalSndErrorCleanup is sufficient.  It doesn't make actual difference,
> but the attached explicitly initialize the pointer with NULL.

Alvaro, are you planning to look at that?  Should we have an open item
for this matter?
--
Michael


signature.asc
Description: PGP signature


Re: pg13: xlogreader API adjust

2020-05-13 Thread Kyotaro Horiguchi
At Thu, 14 May 2020 01:03:48 +, "Bossart, Nathan"  
wrote in 
> I think I've discovered a problem with 850196b6.  The following steps
> can be used to trigger a segfault:
> 
> # wal_level = logical
> psql postgres -c "create database testdb;"
> psql testdb -c "select pg_create_logical_replication_slot('slot', 
> 'test_decoding');"
> psql "dbname=postgres replication=database" -c "START_REPLICATION 
> SLOT slot LOGICAL 0/0;"
> 
> From a quick glance, I think the problem starts in
> StartLogicalReplication() in walsender.c.  The call to
> CreateDecodingContext() may ERROR before xlogreader is initialized in
> the next line, so the subsequent call to WalSndErrorCleanup()
> segfaults when it attempts to access xlogreader.

Good catch!  That's not only for CreateDecodingContet. That happens
everywhere in the query loop in PostgresMain() until logreader is
initialized.  So that also happens, for example, by starting logical
replication using invalidated slot. Checking xlogreader != NULL in
WalSndErrorCleanup is sufficient.  It doesn't make actual difference,
but the attached explicitly initialize the pointer with NULL.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
>From 5421f5b0e63abfe18b6f09d75c44697cdac699f3 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi 
Date: Thu, 14 May 2020 13:37:50 +0900
Subject: [PATCH] Don't check uninitialized xlog reader state

WanSndErrorCleanup may be called before initialization of the variable
xlogreader and crashes in that case.  Let the function not to examine
the xlogreader state if not yet initialized.
---
 src/backend/replication/walsender.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 3367aa98f8..661fa12a94 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -136,7 +136,7 @@ bool		wake_wal_senders = false;
  * routines.
  */
 static XLogReaderState fake_xlogreader;
-static XLogReaderState *xlogreader;
+static XLogReaderState *xlogreader = NULL;
 
 /*
  * These variables keep track of the state of the timeline we're currently
@@ -315,7 +315,7 @@ WalSndErrorCleanup(void)
 	ConditionVariableCancelSleep();
 	pgstat_report_wait_end();
 
-	if (xlogreader->seg.ws_file >= 0)
+	if (xlogreader != NULL && xlogreader->seg.ws_file >= 0)
 		wal_segment_close(xlogreader);
 
 	if (MyReplicationSlot != NULL)
-- 
2.18.2



Re: pg13: xlogreader API adjust

2020-05-13 Thread Bossart, Nathan
I think I've discovered a problem with 850196b6.  The following steps
can be used to trigger a segfault:

# wal_level = logical
psql postgres -c "create database testdb;"
psql testdb -c "select pg_create_logical_replication_slot('slot', 
'test_decoding');"
psql "dbname=postgres replication=database" -c "START_REPLICATION SLOT 
slot LOGICAL 0/0;"

From a quick glance, I think the problem starts in
StartLogicalReplication() in walsender.c.  The call to
CreateDecodingContext() may ERROR before xlogreader is initialized in
the next line, so the subsequent call to WalSndErrorCleanup()
segfaults when it attempts to access xlogreader.

Nathan



Re: pg13: xlogreader API adjust

2020-05-13 Thread Alvaro Herrera
On 2020-May-13, Tom Lane wrote:

> Alvaro Herrera  writes:
> > Pushed.  Thanks for the help!
> 
> This seems to have fixed bowerbird.  Were you expecting that?

Hm, not really.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services




Re: pg13: xlogreader API adjust

2020-05-13 Thread Tom Lane
Alvaro Herrera  writes:
> Pushed.  Thanks for the help!

This seems to have fixed bowerbird.  Were you expecting that?

regards, tom lane




Re: pg13: xlogreader API adjust

2020-05-13 Thread Alvaro Herrera
Pushed.  Thanks for the help!

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services




Re: pg13: xlogreader API adjust

2020-05-12 Thread Alvaro Herrera
On 2020-May-12, Kyotaro Horiguchi wrote:

> I'm not sure the reason for wal_segment_open and WalSndSegmentOpen
> being modified different way about error handling of BasicOpenFile, I
> prefer the WalSndSegmentOpen way.  However, that difference doesn't
> harm anything so I'm fine with the current patch.

Yeah, I couldn't decide which style I liked the most.  I used the one
you suggested.

> + fake_xlogreader.seg = *sendSeg;
> + fake_xlogreader.segcxt = *sendCxt;
> 
> fake_xlogreader.seg is a different instance from *sendSeg.  WALRead
> modifies fake_xlogreader.seg but does not modify *sendSeg. Thus the
> change doesn't persist.  On the other hand WalSndSegmentOpen reads
> *sendSeg, which is not under control of WALRead.
> 
> Maybe we had better to make fake_xlogreader be a global variable of
> walsender.c that covers the current sendSeg and sendCxt.

I tried that.  I was about to leave it at just modifying physical
walsender (simple enough, and it passed tests), but I noticed that
WalSndErrorCleanup() would be a problem because we don't know if it's
physical or logical walsender.  So in the end I added a global
'xlogreader' pointer in walsender.c -- logical walsender sets it to the
true xlogreader it has inside the logical decoding context, and physical
walsender sets it to its fake xlogreader.  That seems to work nicely.
sendSeg/sendCxt are gone entirely.  Logical walsender was doing
WALOpenSegmentInit() uselessly during InitWalSender(), since it was
using the separate sendSeg/sendCxt structs instead of the ones in its
xlogreader.  (Some mysteries become clearer!)  

It's slightly disquieting that the segment_close call in
WalSndErrorCleanup is not covered, but in any case this should work well
AFAICS.  I think this is simpler to understand than formerly.

Now the only silliness remaining is the fact that different users of the
xlogreader interface are doing different things about the TLI.
Hopefully we can unify everything to something sensible one day .. but
that's not going to happen in pg13.

I'll get this pushed tomorrow, unless there are further objections.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From 63160d69f734bdb2ed45aa839ec2903f12194d50 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera 
Date: Tue, 12 May 2020 19:39:57 -0400
Subject: [PATCH] Adjust walsender usage of xlogreader, simplify APIs

Per comments from Kyotaro Horiguchi
---
 src/backend/access/transam/xlogreader.c | 35 +
 src/backend/access/transam/xlogutils.c  | 16 ++--
 src/backend/replication/walsender.c | 98 -
 src/bin/pg_waldump/pg_waldump.c | 16 ++--
 src/include/access/xlogreader.h | 20 ++---
 src/include/access/xlogutils.h  |  3 +-
 6 files changed, 83 insertions(+), 105 deletions(-)

diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 7cee8b92c9..aae3fee24c 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -1044,14 +1044,12 @@ err:
 
 /*
  * Helper function to ease writing of XLogRoutine->page_read callbacks.
- * If this function is used, caller must supply an open_segment callback in
+ * If this function is used, caller must supply a segment_open callback in
  * 'state', as that is used here.
  *
  * Read 'count' bytes into 'buf', starting at location 'startptr', from WAL
  * fetched from timeline 'tli'.
  *
- * 'seg/segcxt' identify the last segment used.
- *
  * Returns true if succeeded, false if an error occurs, in which case
  * 'errinfo' receives error details.
  *
@@ -1061,7 +1059,6 @@ err:
 bool
 WALRead(XLogReaderState *state,
 		char *buf, XLogRecPtr startptr, Size count, TimeLineID tli,
-		WALOpenSegment *seg, WALSegmentContext *segcxt,
 		WALReadError *errinfo)
 {
 	char	   *p;
@@ -1078,34 +1075,36 @@ WALRead(XLogReaderState *state,
 		int			segbytes;
 		int			readbytes;
 
-		startoff = XLogSegmentOffset(recptr, segcxt->ws_segsize);
+		startoff = XLogSegmentOffset(recptr, state->segcxt.ws_segsize);
 
 		/*
 		 * If the data we want is not in a segment we have open, close what we
 		 * have (if anything) and open the next one, using the caller's
 		 * provided openSegment callback.
 		 */
-		if (seg->ws_file < 0 ||
-			!XLByteInSeg(recptr, seg->ws_segno, segcxt->ws_segsize) ||
-			tli != seg->ws_tli)
+		if (state->seg.ws_file < 0 ||
+			!XLByteInSeg(recptr, state->seg.ws_segno, state->segcxt.ws_segsize) ||
+			tli != state->seg.ws_tli)
 		{
 			XLogSegNo	nextSegNo;
 
-			if (seg->ws_file >= 0)
+			if (state->seg.ws_file >= 0)
 state->routine.segment_close(state);
 
-			XLByteToSeg(recptr, nextSegNo, segcxt->ws_segsize);
-			seg->ws_file = state->routine.segment_open(state, nextSegNo,
-	   segcxt, );
+			XLByteToSeg(recptr, nextSegNo, state->segcxt.ws_segsize);
+			state->routine.segment_open(state, nextSegNo, );
+
+			/* This shouldn't happen -- 

Re: pg13: xlogreader API adjust

2020-05-11 Thread Kyotaro Horiguchi
At Mon, 11 May 2020 16:33:36 -0400, Alvaro Herrera  
wrote in 
> Hello
> 
> Per discussion in thread [1], I propose the following patch to give
> another adjustment to the xlogreader API.  This results in a small but
> not insignificat net reduction of lines of code.  What this patch does
> is adjust the signature of these new xlogreader callbacks, making the
> API simpler.  The changes are:
> 
> * the segment_open callback installs the FD in xlogreader state itself,
>   instead of passing the FD back.  This was suggested by Kyotaro
>   Horiguchi in that thread[2].
> 
> * We no longer pass segcxt to segment_open; it's in XLogReaderState, 
>   which is already an argument.
> 
> * We no longer pass seg/segcxt to WALRead; instead, that function takes
>   them from XLogReaderState, which is already an argument.
>   (This means XLogSendPhysical has to drink more of the fake_xlogreader
>   kool-aid.)
> 
> I claim the reason to do it now instead of pg14 is to make it simpler
> for third-party xlogreader callers to adjust.
> 
> (Some might be thinking that I do this to avoid an API change later, but
> my guts tell me that we'll adjust xlogreader again in pg14 for the
> encryption stuff and other reasons, so.)
> 
> 
> [1] https://postgr.es/m/20200406025651.fpzdb5yyb7qyh...@alap3.anarazel.de
> [2] 
> https://postgr.es/m/20200508.114228.963995144765118400.horikyota@gmail.com

The simplified interface of WALRead looks far better to me since it no
longer has unreasonable duplicates of parameters.  I agree to the
discussion about third-party xlogreader callers but not sure about
back-patching burden.

I'm not sure the reason for wal_segment_open and WalSndSegmentOpen
being modified different way about error handling of BasicOpenFile, I
prefer the WalSndSegmentOpen way.  However, that difference doesn't
harm anything so I'm fine with the current patch.


+   fake_xlogreader.seg = *sendSeg;
+   fake_xlogreader.segcxt = *sendCxt;

fake_xlogreader.seg is a different instance from *sendSeg.  WALRead
modifies fake_xlogreader.seg but does not modify *sendSeg. Thus the
change doesn't persist.  On the other hand WalSndSegmentOpen reads
*sendSeg, which is not under control of WALRead.

Maybe we had better to make fake_xlogreader be a global variable of
walsender.c that covers the current sendSeg and sendCxt.

regards.


-- 
Kyotaro Horiguchi
NTT Open Source Software Center




pg13: xlogreader API adjust

2020-05-11 Thread Alvaro Herrera
Hello

Per discussion in thread [1], I propose the following patch to give
another adjustment to the xlogreader API.  This results in a small but
not insignificat net reduction of lines of code.  What this patch does
is adjust the signature of these new xlogreader callbacks, making the
API simpler.  The changes are:

* the segment_open callback installs the FD in xlogreader state itself,
  instead of passing the FD back.  This was suggested by Kyotaro
  Horiguchi in that thread[2].

* We no longer pass segcxt to segment_open; it's in XLogReaderState, 
  which is already an argument.

* We no longer pass seg/segcxt to WALRead; instead, that function takes
  them from XLogReaderState, which is already an argument.
  (This means XLogSendPhysical has to drink more of the fake_xlogreader
  kool-aid.)

I claim the reason to do it now instead of pg14 is to make it simpler
for third-party xlogreader callers to adjust.

(Some might be thinking that I do this to avoid an API change later, but
my guts tell me that we'll adjust xlogreader again in pg14 for the
encryption stuff and other reasons, so.)


[1] https://postgr.es/m/20200406025651.fpzdb5yyb7qyh...@alap3.anarazel.de
[2] 
https://postgr.es/m/20200508.114228.963995144765118400.horikyota@gmail.com

-- 
Álvaro Herrera Developer, https://www.PostgreSQL.org/
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 7cee8b92c9..4b6f4ada6e 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -1050,8 +1050,6 @@ err:
  * Read 'count' bytes into 'buf', starting at location 'startptr', from WAL
  * fetched from timeline 'tli'.
  *
- * 'seg/segcxt' identify the last segment used.
- *
  * Returns true if succeeded, false if an error occurs, in which case
  * 'errinfo' receives error details.
  *
@@ -1061,7 +1059,6 @@ err:
 bool
 WALRead(XLogReaderState *state,
 		char *buf, XLogRecPtr startptr, Size count, TimeLineID tli,
-		WALOpenSegment *seg, WALSegmentContext *segcxt,
 		WALReadError *errinfo)
 {
 	char	   *p;
@@ -1078,34 +1075,34 @@ WALRead(XLogReaderState *state,
 		int			segbytes;
 		int			readbytes;
 
-		startoff = XLogSegmentOffset(recptr, segcxt->ws_segsize);
+		startoff = XLogSegmentOffset(recptr, state->segcxt.ws_segsize);
 
 		/*
 		 * If the data we want is not in a segment we have open, close what we
 		 * have (if anything) and open the next one, using the caller's
 		 * provided openSegment callback.
 		 */
-		if (seg->ws_file < 0 ||
-			!XLByteInSeg(recptr, seg->ws_segno, segcxt->ws_segsize) ||
-			tli != seg->ws_tli)
+		if (state->seg.ws_file < 0 ||
+			!XLByteInSeg(recptr, state->seg.ws_segno, state->segcxt.ws_segsize) ||
+			tli != state->seg.ws_tli)
 		{
 			XLogSegNo	nextSegNo;
 
-			if (seg->ws_file >= 0)
+			if (state->seg.ws_file >= 0)
 state->routine.segment_close(state);
 
-			XLByteToSeg(recptr, nextSegNo, segcxt->ws_segsize);
-			seg->ws_file = state->routine.segment_open(state, nextSegNo,
-	   segcxt, );
+			XLByteToSeg(recptr, nextSegNo, state->segcxt.ws_segsize);
+			state->routine.segment_open(state, nextSegNo, );
+			Assert(state->seg.ws_file >= 0);	/* shouldn't happen */
 
 			/* Update the current segment info. */
-			seg->ws_tli = tli;
-			seg->ws_segno = nextSegNo;
+			state->seg.ws_tli = tli;
+			state->seg.ws_segno = nextSegNo;
 		}
 
 		/* How many bytes are within this segment? */
-		if (nbytes > (segcxt->ws_segsize - startoff))
-			segbytes = segcxt->ws_segsize - startoff;
+		if (nbytes > (state->segcxt.ws_segsize - startoff))
+			segbytes = state->segcxt.ws_segsize - startoff;
 		else
 			segbytes = nbytes;
 
@@ -1115,7 +1112,7 @@ WALRead(XLogReaderState *state,
 
 		/* Reset errno first; eases reporting non-errno-affecting errors */
 		errno = 0;
-		readbytes = pg_pread(seg->ws_file, p, segbytes, (off_t) startoff);
+		readbytes = pg_pread(state->seg.ws_file, p, segbytes, (off_t) startoff);
 
 #ifndef FRONTEND
 		pgstat_report_wait_end();
@@ -1127,7 +1124,7 @@ WALRead(XLogReaderState *state,
 			errinfo->wre_req = segbytes;
 			errinfo->wre_read = readbytes;
 			errinfo->wre_off = startoff;
-			errinfo->wre_seg = *seg;
+			errinfo->wre_seg = state->seg;
 			return false;
 		}
 
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index bbd801513a..0fc27215df 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -784,31 +784,28 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa
 }
 
 /* XLogReaderRoutine->segment_open callback for local pg_wal files */
-int
+void
 wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo,
- WALSegmentContext *segcxt, TimeLineID *tli_p)
+ TimeLineID *tli_p)
 {
 	TimeLineID	tli = *tli_p;
 	char		path[MAXPGPATH];
-	int			fd;
 
-	XLogFilePath(path, tli, nextSegNo, segcxt->ws_segsize);
-	fd = BasicOpenFile(path, O_RDONLY | PG_BINARY);
-	if