Petr Jelinek wrote:
> On 2015-09-02 16:14, Fujii Masao wrote:
> >On Wed, Aug 5, 2015 at 2:16 AM, Robert Haas <robertmh...@gmail.com> wrote:
> >>On Mon, Aug 3, 2015 at 10:31 AM, Fujii Masao <masao.fu...@gmail.com> wrote:
> >>>track_commit_timestamp tracks COMMIT PREPARED as expected in standby 
> >>>server,
> >>>but not in master server. Is this intentional? It should track COMMIT 
> >>>PREPARED
> >>>even in master? Otherwise, we cannot use commit_timestamp feature to check
> >>>the replication lag properly while we use 2PC.
> >>
> >>That sounds like it must be a bug.  I think you should add it to the
> >>open items list.
> 
> Attached fixes this. It includes advancement of replication origin as well.
> I didn't feel like doing refactor of commit code this late in 9.5 cycle
> though, so I went with the code duplication + note in xact.c.

Thanks, your proposed behavior looks reasonable.  I didn't like the
existing coding nor the fact that with your patch we'd have two copies
of it, so I changed a bit instead to be more understandable.  Hopefully I
didn't break too many things.  This patch includes the patch for the
other commitTS open item too.

-- 
Álvaro Herrera                http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
*** a/src/backend/access/transam/commit_ts.c
--- b/src/backend/access/transam/commit_ts.c
***************
*** 122,150 **** static void WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids,
   * subtrans implementation changes in the future, we might want to revisit the
   * decision of storing timestamp info for each subxid.
   *
!  * The do_xlog parameter tells us whether to include an XLog record of this
!  * or not.  Normal path through RecordTransactionCommit() will be related
!  * to a transaction commit XLog record, and so should pass "false" here.
!  * Other callers probably want to pass true, so that the given values persist
!  * in case of crashes.
   */
  void
  TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
  							   TransactionId *subxids, TimestampTz timestamp,
! 							   RepOriginId nodeid, bool do_xlog)
  {
  	int			i;
  	TransactionId headxid;
  	TransactionId newestXact;
  
! 	if (!track_commit_timestamp)
  		return;
  
  	/*
  	 * Comply with the WAL-before-data rule: if caller specified it wants this
  	 * value to be recorded in WAL, do so before touching the data.
  	 */
! 	if (do_xlog)
  		WriteSetTimestampXlogRec(xid, nsubxids, subxids, timestamp, nodeid);
  
  	/*
--- 122,160 ----
   * subtrans implementation changes in the future, we might want to revisit the
   * decision of storing timestamp info for each subxid.
   *
!  * The replaying_xlog parameter indicates whether the module should execute
!  * its write even if the feature is nominally disabled, because we're replaying
!  * a record generated from a master where the feature is enabled.
!  *
!  * The write_xlog parameter tells us whether to include an XLog record of this
!  * or not.  Normally, this is called from transaction commit routines (both
!  * normal and prepared) and the information will be stored in the transaction
!  * commit XLog record, and so they should pass "false" for this.  The XLog redo
!  * code should use "false" here as well.  Other callers probably want to pass
!  * true, so that the given values persist in case of crashes.
   */
  void
  TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
  							   TransactionId *subxids, TimestampTz timestamp,
! 							   RepOriginId nodeid,
! 							   bool replaying_xlog, bool write_xlog)
  {
  	int			i;
  	TransactionId headxid;
  	TransactionId newestXact;
  
! 	/* We'd better not try to write xlog during replay */
! 	Assert(!(write_xlog && replaying_xlog));
! 
! 	/* No-op if feature not enabled, unless replaying WAL */
! 	if (!track_commit_timestamp && !replaying_xlog)
  		return;
  
  	/*
  	 * Comply with the WAL-before-data rule: if caller specified it wants this
  	 * value to be recorded in WAL, do so before touching the data.
  	 */
! 	if (write_xlog)
  		WriteSetTimestampXlogRec(xid, nsubxids, subxids, timestamp, nodeid);
  
  	/*
***************
*** 906,912 **** commit_ts_redo(XLogReaderState *record)
  			subxids = NULL;
  
  		TransactionTreeSetCommitTsData(setts->mainxid, nsubxids, subxids,
! 									 setts->timestamp, setts->nodeid, false);
  		if (subxids)
  			pfree(subxids);
  	}
--- 916,923 ----
  			subxids = NULL;
  
  		TransactionTreeSetCommitTsData(setts->mainxid, nsubxids, subxids,
! 									 setts->timestamp, setts->nodeid, false,
! 									 true);
  		if (subxids)
  			pfree(subxids);
  	}
*** a/src/backend/access/transam/twophase.c
--- b/src/backend/access/transam/twophase.c
***************
*** 41,46 ****
--- 41,47 ----
  #include <time.h>
  #include <unistd.h>
  
+ #include "access/commit_ts.h"
  #include "access/htup_details.h"
  #include "access/subtrans.h"
  #include "access/transam.h"
***************
*** 56,63 ****
  #include "miscadmin.h"
  #include "pg_trace.h"
  #include "pgstat.h"
! #include "replication/walsender.h"
  #include "replication/syncrep.h"
  #include "storage/fd.h"
  #include "storage/ipc.h"
  #include "storage/predicate.h"
--- 57,65 ----
  #include "miscadmin.h"
  #include "pg_trace.h"
  #include "pgstat.h"
! #include "replication/origin.h"
  #include "replication/syncrep.h"
+ #include "replication/walsender.h"
  #include "storage/fd.h"
  #include "storage/ipc.h"
  #include "storage/predicate.h"
***************
*** 2070,2077 **** RecoverPreparedTransactions(void)
  /*
   *	RecordTransactionCommitPrepared
   *
!  * This is basically the same as RecordTransactionCommit: in particular,
!  * we must set the delayChkpt flag to avoid a race condition.
   *
   * We know the transaction made at least one XLOG entry (its PREPARE),
   * so it is never possible to optimize out the commit record.
--- 2072,2080 ----
  /*
   *	RecordTransactionCommitPrepared
   *
!  * This is basically the same as RecordTransactionCommit (q.v. if you change
!  * this function): in particular, we must set the delayChkpt flag to avoid a
!  * race condition.
   *
   * We know the transaction made at least one XLOG entry (its PREPARE),
   * so it is never possible to optimize out the commit record.
***************
*** 2087,2092 **** RecordTransactionCommitPrepared(TransactionId xid,
--- 2090,2101 ----
  								bool initfileinval)
  {
  	XLogRecPtr	recptr;
+ 	TimestampTz	committs = GetCurrentTimestamp();
+ 	bool		replorigin;
+ 
+ 	/* are we using the replication origins feature? */
+ 	replorigin = (replorigin_session_origin != InvalidRepOriginId &&
+ 				  replorigin_session_origin != DoNotReplicateId);
  
  	START_CRIT_SECTION();
  
***************
*** 2094,2105 **** RecordTransactionCommitPrepared(TransactionId xid,
  	MyPgXact->delayChkpt = true;
  
  	/* Emit the XLOG commit record */
! 	recptr = XactLogCommitRecord(GetCurrentTimestamp(),
  								 nchildren, children, nrels, rels,
  								 ninvalmsgs, invalmsgs,
  								 initfileinval, false,
  								 xid);
  
  	/*
  	 * We don't currently try to sleep before flush here ... nor is there any
  	 * support for async commit of a prepared xact (the very idea is probably
--- 2103,2135 ----
  	MyPgXact->delayChkpt = true;
  
  	/* Emit the XLOG commit record */
! 	recptr = XactLogCommitRecord(committs,
  								 nchildren, children, nrels, rels,
  								 ninvalmsgs, invalmsgs,
  								 initfileinval, false,
  								 xid);
  
+ 
+ 	if (replorigin)
+ 		/* Tell replorigin that this session is moving forward */
+ 		replorigin_session_advance(replorigin_session_origin_lsn,
+ 								   XactLastRecEnd);
+ 
+ 	/*
+ 	 * Record commit timestamp.  The value comes from plain commit timestamp if
+ 	 * replorigin is not enabled, or replorigin already set a value for us in
+ 	 * replorigin_session_origin_timestamp otherwise.
+ 	 *
+ 	 * We don't need to WAL-log anything here, as the commit record written
+ 	 * above already contains the data.
+ 	 */
+ 	if (!replorigin || replorigin_session_origin_timestamp == 0)
+ 		replorigin_session_origin_timestamp = committs;
+ 
+ 	TransactionTreeSetCommitTsData(xid, nchildren, children,
+ 								   replorigin_session_origin_timestamp,
+ 								   replorigin_session_origin, false, false);
+ 
  	/*
  	 * We don't currently try to sleep before flush here ... nor is there any
  	 * support for async commit of a prepared xact (the very idea is probably
*** a/src/backend/access/transam/xact.c
--- b/src/backend/access/transam/xact.c
***************
*** 42,50 ****
  #include "miscadmin.h"
  #include "pgstat.h"
  #include "replication/logical.h"
- #include "replication/walsender.h"
- #include "replication/syncrep.h"
  #include "replication/origin.h"
  #include "storage/fd.h"
  #include "storage/lmgr.h"
  #include "storage/predicate.h"
--- 42,50 ----
  #include "miscadmin.h"
  #include "pgstat.h"
  #include "replication/logical.h"
  #include "replication/origin.h"
+ #include "replication/syncrep.h"
+ #include "replication/walsender.h"
  #include "storage/fd.h"
  #include "storage/lmgr.h"
  #include "storage/predicate.h"
***************
*** 1119,1124 **** AtSubStart_ResourceOwner(void)
--- 1119,1126 ----
   *
   * Returns latest XID among xact and its children, or InvalidTransactionId
   * if the xact has no XID.  (We compute that here just because it's easier.)
+  *
+  * If you change this function, see RecordTransactionCommitPrepared also.
   */
  static TransactionId
  RecordTransactionCommit(void)
***************
*** 1172,1177 **** RecordTransactionCommit(void)
--- 1174,1188 ----
  	}
  	else
  	{
+ 		bool	replorigin;
+ 
+ 		/*
+ 		 * are we using the replication origins feature?  Or, in other words,
+ 		 * are we replaying remote actions?
+ 		 */
+ 		replorigin = (replorigin_session_origin != InvalidRepOriginId &&
+ 					  replorigin_session_origin != DoNotReplicateId);
+ 
  		/*
  		 * Begin commit critical section and insert the commit XLOG record.
  		 */
***************
*** 1206,1231 **** RecordTransactionCommit(void)
  							RelcacheInitFileInval, forceSyncCommit,
  							InvalidTransactionId /* plain commit */ );
  
! 		/*
! 		 * Record plain commit ts if not replaying remote actions, or if no
! 		 * timestamp is configured.
! 		 */
! 		if (replorigin_session_origin == InvalidRepOriginId ||
! 			replorigin_session_origin == DoNotReplicateId ||
! 			replorigin_session_origin_timestamp == 0)
! 			replorigin_session_origin_timestamp = xactStopTimestamp;
! 		else
  			replorigin_session_advance(replorigin_session_origin_lsn,
  									   XactLastRecEnd);
  
  		/*
! 		 * We don't need to WAL log origin or timestamp here, the commit
! 		 * record contains all the necessary information and will redo the SET
! 		 * action during replay.
  		 */
  		TransactionTreeSetCommitTsData(xid, nchildren, children,
  									   replorigin_session_origin_timestamp,
! 									   replorigin_session_origin, false);
  	}
  
  	/*
--- 1217,1243 ----
  							RelcacheInitFileInval, forceSyncCommit,
  							InvalidTransactionId /* plain commit */ );
  
! 		if (replorigin)
! 			/* Tell replorigin that this session is moving forward */
  			replorigin_session_advance(replorigin_session_origin_lsn,
  									   XactLastRecEnd);
  
  		/*
! 		 * Record commit timestamp.  The value comes from plain commit
! 		 * timestamp if replorigin is not enabled, or replorigin already set a
! 		 * value for us in replorigin_session_origin_timestamp otherwise.
! 		 *
! 		 * We don't need to WAL-log anything here, as the commit record written
! 		 * above already contains the data.
  		 */
+ 
+ 		if (!replorigin || replorigin_session_origin_timestamp == 0)
+ 			replorigin_session_origin_timestamp = xactStopTimestamp;
+ 
  		TransactionTreeSetCommitTsData(xid, nchildren, children,
  									   replorigin_session_origin_timestamp,
! 									   replorigin_session_origin,
! 									   false, false);
  	}
  
  	/*
***************
*** 5321,5327 **** xact_redo_commit(xl_xact_parsed_commit *parsed,
  	/* Set the transaction commit timestamp and metadata */
  	TransactionTreeSetCommitTsData(xid, parsed->nsubxacts, parsed->subxacts,
  								   commit_time, origin_id,
! 								   false);
  
  	if (standbyState == STANDBY_DISABLED)
  	{
--- 5333,5339 ----
  	/* Set the transaction commit timestamp and metadata */
  	TransactionTreeSetCommitTsData(xid, parsed->nsubxacts, parsed->subxacts,
  								   commit_time, origin_id,
! 								   false, true);
  
  	if (standbyState == STANDBY_DISABLED)
  	{
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 5826,5844 **** do { \
  						minValue))); \
  } while(0)
  
- #define RecoveryRequiresBoolParameter(param_name, currValue, masterValue) \
- do { \
- 	bool _currValue = (currValue); \
- 	bool _masterValue = (masterValue); \
- 	if (_currValue != _masterValue) \
- 		ereport(ERROR, \
- 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
- 				 errmsg("hot standby is not possible because it requires \"%s\" to be same on master and standby (master has \"%s\", standby has \"%s\")", \
- 						param_name, \
- 						_masterValue ? "true" : "false", \
- 						_currValue ? "true" : "false"))); \
- } while(0)
- 
  /*
   * Check to see if required parameters are set high enough on this server
   * for various aspects of recovery operation.
--- 5826,5831 ----
*** a/src/include/access/commit_ts.h
--- b/src/include/access/commit_ts.h
***************
*** 24,30 **** extern bool check_track_commit_timestamp(bool *newval, void **extra,
  
  extern void TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
  							   TransactionId *subxids, TimestampTz timestamp,
! 							   RepOriginId nodeid, bool do_xlog);
  extern bool TransactionIdGetCommitTsData(TransactionId xid,
  							 TimestampTz *ts, RepOriginId *nodeid);
  extern TransactionId GetLatestCommitTsData(TimestampTz *ts,
--- 24,31 ----
  
  extern void TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
  							   TransactionId *subxids, TimestampTz timestamp,
! 							   RepOriginId nodeid,
! 							   bool replaying_xlog, bool write_xlog);
  extern bool TransactionIdGetCommitTsData(TransactionId xid,
  							 TimestampTz *ts, RepOriginId *nodeid);
  extern TransactionId GetLatestCommitTsData(TimestampTz *ts,
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to