On Wed, 2008-10-22 at 18:53 -0400, Tom Lane wrote:
> Simon Riggs <[EMAIL PROTECTED]> writes:
> > My interest was really in maintaining ultra-correctness, while removing
> > the need to WAL log subcommits for Hot Standby. I think I achieved that,
> > almost, but if you see further optimizations that is good too.
> 
> I'm not focusing on performance here --- I'm focusing on whether I trust
> the patch at all.  

> I dislike blowing a hole that size in the sanity
> checks in TransactionIdSetStatus.  I note that the hole still isn't
> big enough, since presumably you'd have to allow ABORTED=>SUB_COMMITTED
> too.  

No, that is never required. We only set subcommitted state for a commit.

> That means that out of the four state transitions that are
> disallowed by the original coding of that Assert, you are now having to
> consider two as legal.  I don't like that, and I like even less that
> it's not even trying to determine whether this is a replay-driven
> change.

Presumably you would like to see an additional parameter to allow that
test to be more strictly determined? 

Bug fix v2 patch enclosed, mostly API changes.

-- 
 Simon Riggs           www.2ndQuadrant.com
 PostgreSQL Training, Services and Support
Index: src/backend/access/transam/clog.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/transam/clog.c,v
retrieving revision 1.48
diff -c -r1.48 clog.c
*** src/backend/access/transam/clog.c	20 Oct 2008 19:18:18 -0000	1.48
--- src/backend/access/transam/clog.c	23 Oct 2008 03:10:54 -0000
***************
*** 82,92 ****
  static void WriteTruncateXlogRec(int pageno);
  static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
  					   	   TransactionId *subxids, XidStatus status,
! 						   XLogRecPtr lsn, int pageno);
  static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status,
! 						  XLogRecPtr lsn, int slotno);
  static void set_status_by_pages(int nsubxids, TransactionId *subxids,
! 					XidStatus status, XLogRecPtr lsn);
  
  
  /*
--- 82,92 ----
  static void WriteTruncateXlogRec(int pageno);
  static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
  					   	   TransactionId *subxids, XidStatus status,
! 						   XLogRecPtr lsn, int pageno, bool isRedo);
  static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status,
! 						  XLogRecPtr lsn, int slotno, bool isRedo);
  static void set_status_by_pages(int nsubxids, TransactionId *subxids,
! 					XidStatus status, XLogRecPtr lsn, bool isRedo);
  
  
  /*
***************
*** 142,148 ****
   */
  void
  TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
! 				TransactionId *subxids, XidStatus status, XLogRecPtr lsn)
  {
  	int		pageno = TransactionIdToPage(xid); /* get page of parent */
  	int 	i;
--- 142,148 ----
   */
  void
  TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
! 			TransactionId *subxids, XidStatus status, XLogRecPtr lsn, bool isRedo)
  {
  	int		pageno = TransactionIdToPage(xid); /* get page of parent */
  	int 	i;
***************
*** 168,174 ****
  		 * Set the parent and all subtransactions in a single call
  		 */
  		TransactionIdSetPageStatus(xid, nsubxids, subxids, status, lsn,
! 								   pageno);
  	}
  	else
  	{
--- 168,174 ----
  		 * Set the parent and all subtransactions in a single call
  		 */
  		TransactionIdSetPageStatus(xid, nsubxids, subxids, status, lsn,
! 								   pageno, isRedo);
  	}
  	else
  	{
***************
*** 187,193 ****
  		if (status == TRANSACTION_STATUS_COMMITTED)
  			set_status_by_pages(nsubxids - nsubxids_on_first_page,
  								subxids + nsubxids_on_first_page,
! 								TRANSACTION_STATUS_SUB_COMMITTED, lsn);
  
  		/*
  		 * Now set the parent and subtransactions on same page as the parent,
--- 187,193 ----
  		if (status == TRANSACTION_STATUS_COMMITTED)
  			set_status_by_pages(nsubxids - nsubxids_on_first_page,
  								subxids + nsubxids_on_first_page,
! 								TRANSACTION_STATUS_SUB_COMMITTED, lsn, isRedo);
  
  		/*
  		 * Now set the parent and subtransactions on same page as the parent,
***************
*** 195,201 ****
  		 */
  		pageno = TransactionIdToPage(xid);
  		TransactionIdSetPageStatus(xid, nsubxids_on_first_page, subxids, status,
! 								   lsn, pageno);
  
  		/*
  		 * Now work through the rest of the subxids one clog page at a time,
--- 195,201 ----
  		 */
  		pageno = TransactionIdToPage(xid);
  		TransactionIdSetPageStatus(xid, nsubxids_on_first_page, subxids, status,
! 								   lsn, pageno, isRedo);
  
  		/*
  		 * Now work through the rest of the subxids one clog page at a time,
***************
*** 203,209 ****
  		 */
  		set_status_by_pages(nsubxids - nsubxids_on_first_page,
  							subxids + nsubxids_on_first_page,
! 							status, lsn);
  	}
  }
  
--- 203,209 ----
  		 */
  		set_status_by_pages(nsubxids - nsubxids_on_first_page,
  							subxids + nsubxids_on_first_page,
! 							status, lsn, isRedo);
  	}
  }
  
***************
*** 215,221 ****
   */
  static void
  set_status_by_pages(int nsubxids, TransactionId *subxids,
! 					XidStatus status, XLogRecPtr lsn)
  {
  	int		pageno = TransactionIdToPage(subxids[0]);
  	int		offset = 0;
--- 215,221 ----
   */
  static void
  set_status_by_pages(int nsubxids, TransactionId *subxids,
! 					XidStatus status, XLogRecPtr lsn, bool isRedo)
  {
  	int		pageno = TransactionIdToPage(subxids[0]);
  	int		offset = 0;
***************
*** 233,239 ****
  
  		TransactionIdSetPageStatus(InvalidTransactionId,
  								   num_on_page, subxids + offset,
! 								   status, lsn, pageno);
  		offset = i;
  		pageno = TransactionIdToPage(subxids[offset]);
  	}
--- 233,239 ----
  
  		TransactionIdSetPageStatus(InvalidTransactionId,
  								   num_on_page, subxids + offset,
! 								   status, lsn, pageno, isRedo);
  		offset = i;
  		pageno = TransactionIdToPage(subxids[offset]);
  	}
***************
*** 248,254 ****
  static void
  TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
  						   TransactionId *subxids, XidStatus status,
! 						   XLogRecPtr lsn, int pageno)
  {
  	int			slotno;
  	int 		i;
--- 248,254 ----
  static void
  TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
  						   TransactionId *subxids, XidStatus status,
! 						   XLogRecPtr lsn, int pageno, bool isRedo)
  {
  	int			slotno;
  	int 		i;
***************
*** 289,307 ****
  				Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
  				TransactionIdSetStatusBit(subxids[i],
  										  TRANSACTION_STATUS_SUB_COMMITTED,
! 										  lsn, slotno);
  			}
  		}
  
  		/* ... then the main transaction */
! 		TransactionIdSetStatusBit(xid, status, lsn, slotno);
  	}
  
  	/* Set the subtransactions */
  	for (i = 0; i < nsubxids; i++)
  	{
  		Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
! 		TransactionIdSetStatusBit(subxids[i], status, lsn, slotno);
  	}
  
  	ClogCtl->shared->page_dirty[slotno] = true;
--- 289,307 ----
  				Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
  				TransactionIdSetStatusBit(subxids[i],
  										  TRANSACTION_STATUS_SUB_COMMITTED,
! 										  lsn, slotno, isRedo);
  			}
  		}
  
  		/* ... then the main transaction */
! 		TransactionIdSetStatusBit(xid, status, lsn, slotno, isRedo);
  	}
  
  	/* Set the subtransactions */
  	for (i = 0; i < nsubxids; i++)
  	{
  		Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
! 		TransactionIdSetStatusBit(subxids[i], status, lsn, slotno, isRedo);
  	}
  
  	ClogCtl->shared->page_dirty[slotno] = true;
***************
*** 313,321 ****
   * Sets the commit status of a single transaction.
   *
   * Must be called with CLogControlLock held
   */
  static void
! TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno)
  {
  	int			byteno = TransactionIdToByte(xid);
  	int			bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
--- 313,326 ----
   * Sets the commit status of a single transaction.
   *
   * Must be called with CLogControlLock held
+  *
+  * We pass the isRedo flag all the way down to here to ensure that the
+  * setting of transaction status is valid for the original call and for
+  * recovery.
   */
  static void
! TransactionIdSetStatusBit(TransactionId xid, XidStatus status, 
! 							XLogRecPtr lsn, int slotno, bool isRedo)
  {
  	int			byteno = TransactionIdToByte(xid);
  	int			bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
***************
*** 327,333 ****
  	/* Current state should be 0, subcommitted or target state */
  	Assert(((*byteptr >> bshift) & CLOG_XACT_BITMASK) == 0 ||
  		   ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == TRANSACTION_STATUS_SUB_COMMITTED ||
! 		   ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == status);
  
  	/* note this assumes exclusive access to the clog page */
  	byteval = *byteptr;
--- 332,340 ----
  	/* Current state should be 0, subcommitted or target state */
  	Assert(((*byteptr >> bshift) & CLOG_XACT_BITMASK) == 0 ||
  		   ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == TRANSACTION_STATUS_SUB_COMMITTED ||
! 		   ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == status ||
! 		   (((*byteptr >> bshift) & CLOG_XACT_BITMASK) == TRANSACTION_STATUS_COMMITTED &&
! 			status == TRANSACTION_STATUS_SUB_COMMITTED && isRedo));
  
  	/* note this assumes exclusive access to the clog page */
  	byteval = *byteptr;
Index: src/backend/access/transam/transam.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/transam/transam.c,v
retrieving revision 1.78
diff -c -r1.78 transam.c
*** src/backend/access/transam/transam.c	20 Oct 2008 20:38:24 -0000	1.78
--- src/backend/access/transam/transam.c	23 Oct 2008 03:11:24 -0000
***************
*** 261,283 ****
   * are correctly marked subcommit first.
   */
  void
! TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids)
  {
  	TransactionIdSetTreeStatus(xid, nxids, xids,
  							   TRANSACTION_STATUS_COMMITTED,
! 							   InvalidXLogRecPtr);
  }
  
  /*
   * TransactionIdAsyncCommitTree
   *		Same as above, but for async commits.  The commit record LSN is needed.
   */
  void
  TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids,
  							 XLogRecPtr lsn)
  {
  	TransactionIdSetTreeStatus(xid, nxids, xids,
! 							   TRANSACTION_STATUS_COMMITTED, lsn);
  }
  
  /*
--- 261,286 ----
   * are correctly marked subcommit first.
   */
  void
! TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids, bool isRedo)
  {
  	TransactionIdSetTreeStatus(xid, nxids, xids,
  							   TRANSACTION_STATUS_COMMITTED,
! 							   InvalidXLogRecPtr, isRedo);
  }
  
  /*
   * TransactionIdAsyncCommitTree
   *		Same as above, but for async commits.  The commit record LSN is needed.
+  *
+  * 		Never called during recovery, so doesn't have an isRedo parameter.
   */
  void
  TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids,
  							 XLogRecPtr lsn)
  {
  	TransactionIdSetTreeStatus(xid, nxids, xids,
! 							   TRANSACTION_STATUS_COMMITTED, 
! 							   lsn, false);
  }
  
  /*
***************
*** 291,300 ****
   * will consider all the xacts as not-yet-committed anyway.
   */
  void
! TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids)
  {
  	TransactionIdSetTreeStatus(xid, nxids, xids,
! 							   TRANSACTION_STATUS_ABORTED, InvalidXLogRecPtr);
  }
  
  /*
--- 294,304 ----
   * will consider all the xacts as not-yet-committed anyway.
   */
  void
! TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids, bool isRedo)
  {
  	TransactionIdSetTreeStatus(xid, nxids, xids,
! 							   TRANSACTION_STATUS_ABORTED, 
! 							   InvalidXLogRecPtr, isRedo);
  }
  
  /*
Index: src/backend/access/transam/twophase.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/transam/twophase.c,v
retrieving revision 1.46
diff -c -r1.46 twophase.c
*** src/backend/access/transam/twophase.c	20 Oct 2008 19:18:18 -0000	1.46
--- src/backend/access/transam/twophase.c	23 Oct 2008 03:11:49 -0000
***************
*** 1745,1751 ****
  	XLogFlush(recptr);
  
  	/* Mark the transaction committed in pg_clog */
! 	TransactionIdCommitTree(xid, nchildren, children);
  
  	/* Checkpoint can proceed now */
  	MyProc->inCommit = false;
--- 1745,1751 ----
  	XLogFlush(recptr);
  
  	/* Mark the transaction committed in pg_clog */
! 	TransactionIdCommitTree(xid, nchildren, children, false);
  
  	/* Checkpoint can proceed now */
  	MyProc->inCommit = false;
***************
*** 1820,1826 ****
  	 * Mark the transaction aborted in clog.  This is not absolutely necessary
  	 * but we may as well do it while we are here.
  	 */
! 	TransactionIdAbortTree(xid, nchildren, children);
  
  	END_CRIT_SECTION();
  }
--- 1820,1826 ----
  	 * Mark the transaction aborted in clog.  This is not absolutely necessary
  	 * but we may as well do it while we are here.
  	 */
! 	TransactionIdAbortTree(xid, nchildren, children, false);
  
  	END_CRIT_SECTION();
  }
Index: src/backend/access/transam/xact.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/transam/xact.c,v
retrieving revision 1.266
diff -c -r1.266 xact.c
*** src/backend/access/transam/xact.c	20 Oct 2008 19:18:18 -0000	1.266
--- src/backend/access/transam/xact.c	23 Oct 2008 03:02:04 -0000
***************
*** 951,957 ****
  		 * Now we may update the CLOG, if we wrote a COMMIT record above
  		 */
  		if (markXidCommitted)
! 			TransactionIdCommitTree(xid, nchildren, children);
  	}
  	else
  	{
--- 951,957 ----
  		 * Now we may update the CLOG, if we wrote a COMMIT record above
  		 */
  		if (markXidCommitted)
! 			TransactionIdCommitTree(xid, nchildren, children, false);
  	}
  	else
  	{
***************
*** 1250,1256 ****
  	 * having flushed the ABORT record to disk, because in event of a crash
  	 * we'd be assumed to have aborted anyway.
  	 */
! 	TransactionIdAbortTree(xid, nchildren, children);
  
  	END_CRIT_SECTION();
  
--- 1250,1256 ----
  	 * having flushed the ABORT record to disk, because in event of a crash
  	 * we'd be assumed to have aborted anyway.
  	 */
! 	TransactionIdAbortTree(xid, nchildren, children, false);
  
  	END_CRIT_SECTION();
  
***************
*** 4219,4225 ****
  
  	/* Mark the transaction committed in pg_clog */
  	sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
! 	TransactionIdCommitTree(xid, xlrec->nsubxacts, sub_xids);
  
  	/* Make sure nextXid is beyond any XID mentioned in the record */
  	max_xid = xid;
--- 4219,4225 ----
  
  	/* Mark the transaction committed in pg_clog */
  	sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
! 	TransactionIdCommitTree(xid, xlrec->nsubxacts, sub_xids, true);
  
  	/* Make sure nextXid is beyond any XID mentioned in the record */
  	max_xid = xid;
***************
*** 4257,4263 ****
  
  	/* Mark the transaction aborted in pg_clog */
  	sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
! 	TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids);
  
  	/* Make sure nextXid is beyond any XID mentioned in the record */
  	max_xid = xid;
--- 4257,4263 ----
  
  	/* Mark the transaction aborted in pg_clog */
  	sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
! 	TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids, true);
  
  	/* Make sure nextXid is beyond any XID mentioned in the record */
  	max_xid = xid;
Index: src/include/access/clog.h
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/access/clog.h,v
retrieving revision 1.22
diff -c -r1.22 clog.h
*** src/include/access/clog.h	20 Oct 2008 19:18:18 -0000	1.22
--- src/include/access/clog.h	23 Oct 2008 03:06:48 -0000
***************
*** 33,39 ****
  
  
  extern void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids, 
! 					TransactionId *subxids, XidStatus status, XLogRecPtr lsn);
  extern XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn);
  
  extern Size CLOGShmemSize(void);
--- 33,39 ----
  
  
  extern void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids, 
! 				TransactionId *subxids, XidStatus status, XLogRecPtr lsn, bool isRedo);
  extern XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn);
  
  extern Size CLOGShmemSize(void);
Index: src/include/access/transam.h
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/access/transam.h,v
retrieving revision 1.66
diff -c -r1.66 transam.h
*** src/include/access/transam.h	20 Oct 2008 19:18:18 -0000	1.66
--- src/include/access/transam.h	23 Oct 2008 03:06:27 -0000
***************
*** 140,148 ****
  extern bool TransactionIdDidAbort(TransactionId transactionId);
  extern bool TransactionIdIsKnownCompleted(TransactionId transactionId);
  extern void TransactionIdAbort(TransactionId transactionId);
! extern void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids);
  extern void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn);
! extern void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids);
  extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2);
  extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2);
  extern bool TransactionIdFollows(TransactionId id1, TransactionId id2);
--- 140,148 ----
  extern bool TransactionIdDidAbort(TransactionId transactionId);
  extern bool TransactionIdIsKnownCompleted(TransactionId transactionId);
  extern void TransactionIdAbort(TransactionId transactionId);
! extern void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids, bool isRedo);
  extern void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn);
! extern void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids, bool isRedo);
  extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2);
  extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2);
  extern bool TransactionIdFollows(TransactionId id1, TransactionId id2);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to