Fujii Masao wrote:
> Hi,
> 
> When the replication connection is closed unexpectedly, walsender might emit
> the following unfit messages. IOW, the loss of the connection might be wrongly
> regarded as an arrival of invalid message by the walsender. This looks messy.
> We should get rid of that unfit FATAL message, emit a COMMERROR message and
> just call proc_exit() when the loss of the connection is found?
> 
>> [2460]: LOG:  could not receive data from client: No connection could be 
>> made because the target machine actively refused it.
>> [2460]: FATAL:  invalid standby closing message type 4
>> [2460]: LOG:  could not send data to client: No connection could be made 
>> because the target machine actively refused it.
> 
> Also the walsender wrongly tries to send the FATAL message to the standby even
> though the connection has already been closed, and then gets the following LOG
> message after the FATAL one. This FATAL message is suitable, but output of the
> LOG message looks messy, too. We should use COMMERROR instead of FATAL and 
> then
> just call proc_exit() in order to prevent a message from being sent?
> 
>> [12586] FATAL:  unexpected EOF on standby connection
>> [12586] LOG:  could not send data to client: Broken pipe
> 
> The attached patch fixes those unfit messages.

Actually the pg_getbyte_if_available() function is a bit confused. The
comment above it claims that it returns 0 if no data is available
without blocking, but it actually returns -1 with errno==EWOULDBLOCK.
That stems from confusion in secure_read(); with SSL it returns 0 if it
would block, but without SSL it returns -1 and EWOULDBLOCK. We should
fix that so that secure_read() behaves consistently and so that
pq_getbyte_if_available() behaves like e.g pq_getbytes().

Patch attached for that. pq_getbyte_if_available() now reports any
errors with COMMERROR level, and returns EOF if the connection is closed
cleanly. If no data is available without blocking it now really returns
0 as the comment said. Walsender reports any unexpected EOF to the log
at COMMERROR level, similar to what normal backends do.

-- 
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
? aaa.patch
? postgres
? access/common/.deps
? access/gin/.deps
? access/gist/.deps
? access/hash/.deps
? access/heap/.deps
? access/index/.deps
? access/nbtree/.deps
? access/transam/.deps
? bootstrap/.deps
? catalog/.deps
? commands/.deps
? executor/.deps
? foreign/.deps
? foreign/dummy/.deps
? foreign/postgresql/.deps
? lib/.deps
? libpq/.deps
? main/.deps
? nodes/.deps
? optimizer/geqo/.deps
? optimizer/path/.deps
? optimizer/plan/.deps
? optimizer/prep/.deps
? optimizer/util/.deps
? parser/.deps
? po/af.mo
? po/cs.mo
? po/hr.mo
? po/hu.mo
? po/it.mo
? po/ko.mo
? po/nb.mo
? po/nl.mo
? po/pl.mo
? po/ro.mo
? po/ru.mo
? po/sk.mo
? po/sl.mo
? po/sv.mo
? po/zh_CN.mo
? po/zh_TW.mo
? port/.deps
? postmaster/.deps
? regex/.deps
? replication/.deps
? replication/libpqwalreceiver/.deps
? rewrite/.deps
? snowball/.deps
? snowball/snowball_create.sql
? storage/buffer/.deps
? storage/file/.deps
? storage/freespace/.deps
? storage/ipc/.deps
? storage/large_object/.deps
? storage/lmgr/.deps
? storage/page/.deps
? storage/smgr/.deps
? tcop/.deps
? tsearch/.deps
? utils/.deps
? utils/probes.h
? utils/adt/.deps
? utils/cache/.deps
? utils/error/.deps
? utils/fmgr/.deps
? utils/hash/.deps
? utils/init/.deps
? utils/mb/.deps
? utils/mb/Unicode/BIG5.TXT
? utils/mb/Unicode/CP950.TXT
? utils/mb/conversion_procs/conversion_create.sql
? utils/mb/conversion_procs/ascii_and_mic/.deps
? utils/mb/conversion_procs/cyrillic_and_mic/.deps
? utils/mb/conversion_procs/euc2004_sjis2004/.deps
? utils/mb/conversion_procs/euc_cn_and_mic/.deps
? utils/mb/conversion_procs/euc_jis_2004_and_shift_jis_2004/.deps
? utils/mb/conversion_procs/euc_jp_and_sjis/.deps
? utils/mb/conversion_procs/euc_kr_and_mic/.deps
? utils/mb/conversion_procs/euc_tw_and_big5/.deps
? utils/mb/conversion_procs/latin2_and_win1250/.deps
? utils/mb/conversion_procs/latin_and_mic/.deps
? utils/mb/conversion_procs/utf8_and_ascii/.deps
? utils/mb/conversion_procs/utf8_and_big5/.deps
? utils/mb/conversion_procs/utf8_and_cyrillic/.deps
? utils/mb/conversion_procs/utf8_and_euc2004/.deps
? utils/mb/conversion_procs/utf8_and_euc_cn/.deps
? utils/mb/conversion_procs/utf8_and_euc_jis_2004/.deps
? utils/mb/conversion_procs/utf8_and_euc_jp/.deps
? utils/mb/conversion_procs/utf8_and_euc_kr/.deps
? utils/mb/conversion_procs/utf8_and_euc_tw/.deps
? utils/mb/conversion_procs/utf8_and_gb18030/.deps
? utils/mb/conversion_procs/utf8_and_gbk/.deps
? utils/mb/conversion_procs/utf8_and_iso8859/.deps
? utils/mb/conversion_procs/utf8_and_iso8859_1/.deps
? utils/mb/conversion_procs/utf8_and_johab/.deps
? utils/mb/conversion_procs/utf8_and_shift_jis_2004/.deps
? utils/mb/conversion_procs/utf8_and_sjis/.deps
? utils/mb/conversion_procs/utf8_and_sjis2004/.deps
? utils/mb/conversion_procs/utf8_and_uhc/.deps
? utils/mb/conversion_procs/utf8_and_win/.deps
? utils/misc/.deps
? utils/mmgr/.deps
? utils/resowner/.deps
? utils/sort/.deps
? utils/time/.deps
Index: libpq/be-secure.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/libpq/be-secure.c,v
retrieving revision 1.96
diff -c -r1.96 be-secure.c
*** libpq/be-secure.c	15 Jan 2010 09:19:02 -0000	1.96
--- libpq/be-secure.c	18 Feb 2010 09:42:30 -0000
***************
*** 256,262 ****
  			case SSL_ERROR_WANT_READ:
  			case SSL_ERROR_WANT_WRITE:
  				if (port->noblock)
! 					return 0;
  #ifdef WIN32
  				pgwin32_waitforsinglesocket(SSL_get_fd(port->ssl),
  											(err == SSL_ERROR_WANT_READ) ?
--- 256,266 ----
  			case SSL_ERROR_WANT_READ:
  			case SSL_ERROR_WANT_WRITE:
  				if (port->noblock)
! 				{
! 					errno = EWOULDBLOCK;
! 					n = -1;
! 					break;
! 				}
  #ifdef WIN32
  				pgwin32_waitforsinglesocket(SSL_get_fd(port->ssl),
  											(err == SSL_ERROR_WANT_READ) ?
Index: libpq/pqcomm.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/libpq/pqcomm.c,v
retrieving revision 1.203
diff -c -r1.203 pqcomm.c
*** libpq/pqcomm.c	16 Feb 2010 19:26:02 -0000	1.203
--- libpq/pqcomm.c	18 Feb 2010 09:42:30 -0000
***************
*** 822,828 ****
   *			if available
   *
   * The received byte is stored in *c. Returns 1 if a byte was read, 0 if
!  * if no data was available, or EOF.
   * --------------------------------
   */
  int
--- 822,828 ----
   *			if available
   *
   * The received byte is stored in *c. Returns 1 if a byte was read, 0 if
!  * if no data was available, or EOF if trouble.
   * --------------------------------
   */
  int
***************
*** 848,853 ****
--- 848,879 ----
  	PG_TRY();
  	{
  		r = secure_read(MyProcPort, c, 1);
+ 		if (r < 0)
+ 		{
+ 			/*
+ 			 * Ok if would block or interrupted (though it really shouldn't
+ 			 * happen with a non-blocking operation).
+ 			 */
+ 			if (errno == EAGAIN || EWOULDBLOCK || errno == EINTR)
+ 				r = 0;
+ 			else
+ 			{
+ 				/*
+ 				 * Careful: an ereport() that tries to write to the client would
+ 				 * cause recursion to here, leading to stack overflow and core
+ 				 * dump!  This message must go *only* to the postmaster log.
+ 				 */
+ 				ereport(COMMERROR,
+ 						(errcode_for_socket_access(),
+ 						 errmsg("could not receive data from client: %m")));
+ 				return EOF;
+ 			}
+ 		}
+ 		else if (r == 0)
+ 		{
+ 			/* EOF detected */
+ 			r = EOF;
+ 		}
  	}
  	PG_CATCH();
  	{
Index: replication/walsender.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/replication/walsender.c,v
retrieving revision 1.6
diff -c -r1.6 walsender.c
*** replication/walsender.c	17 Feb 2010 04:19:39 -0000	1.6
--- replication/walsender.c	18 Feb 2010 09:42:30 -0000
***************
*** 286,300 ****
  				break;
  			}
  
! 			/* 'X' means that the standby is closing the connection */
  			case 'X':
  				proc_exit(0);
  
- 			case EOF:
- 				ereport(ERROR,
- 						(errcode(ERRCODE_PROTOCOL_VIOLATION),
- 						 errmsg("unexpected EOF on standby connection")));
- 
  			default:
  				ereport(FATAL,
  						(errcode(ERRCODE_PROTOCOL_VIOLATION),
--- 286,304 ----
  				break;
  			}
  
! 			/*
! 			 * 'X' means that the standby is closing the connection. EOF
! 			 * means unexpected loss of standby connection. Either way,
! 			 * perform normal shutdown.
! 			 */
! 			case EOF:
!  				ereport(COMMERROR,
!  						(errcode(ERRCODE_PROTOCOL_VIOLATION),
!  						 errmsg("unexpected EOF on standby connection")));
! 				/* fall through */
  			case 'X':
  				proc_exit(0);
  
  			default:
  				ereport(FATAL,
  						(errcode(ERRCODE_PROTOCOL_VIOLATION),
***************
*** 315,350 ****
  	r = pq_getbyte_if_available(&firstchar);
  	if (r < 0)
  	{
! 		/* no data available */
! 		if (errno == EAGAIN || errno == EWOULDBLOCK)
! 			return;
! 
! 		/*
! 		 * Ok if interrupted, though it shouldn't really happen with
! 		 * a non-blocking operation.
! 		 */
! 		if (errno == EINTR)
! 			return;
! 
  		ereport(COMMERROR,
! 				(errcode_for_socket_access(),
! 				 errmsg("could not receive data from client: %m")));
  	}
  	if (r == 0)
  	{
! 		/* standby disconnected unexpectedly */
! 		ereport(ERROR,
! 				(errcode(ERRCODE_PROTOCOL_VIOLATION),
! 				 errmsg("unexpected EOF on standby connection")));
  	}
  
  	/* Handle the very limited subset of commands expected in this phase */
  	switch (firstchar)
  	{
  		/*
! 		 * 'X' means that the standby is closing down the socket. EOF means
! 		 * unexpected loss of standby connection. Either way, perform normal
! 		 * shutdown.
  		 */
  		case 'X':
  			proc_exit(0);
--- 319,341 ----
  	r = pq_getbyte_if_available(&firstchar);
  	if (r < 0)
  	{
! 		/* unexpected error */
  		ereport(COMMERROR,
! 				(errcode(ERRCODE_PROTOCOL_VIOLATION),
! 				 errmsg("unexpected EOF on standby connection")));
! 		proc_exit(0);
  	}
  	if (r == 0)
  	{
! 		/* no data available without blocking */
! 		return;
  	}
  
  	/* Handle the very limited subset of commands expected in this phase */
  	switch (firstchar)
  	{
  		/*
! 		 * 'X' means that the standby is closing down the socket.
  		 */
  		case 'X':
  			proc_exit(0);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to