On Wed, 2008-10-22 at 18:53 -0400, Tom Lane wrote: > Simon Riggs <[EMAIL PROTECTED]> writes: > > My interest was really in maintaining ultra-correctness, while removing > > the need to WAL log subcommits for Hot Standby. I think I achieved that, > > almost, but if you see further optimizations that is good too. > > I'm not focusing on performance here --- I'm focusing on whether I trust > the patch at all.
> I dislike blowing a hole that size in the sanity > checks in TransactionIdSetStatus. I note that the hole still isn't > big enough, since presumably you'd have to allow ABORTED=>SUB_COMMITTED > too. No, that is never required. We only set subcommitted state for a commit. > That means that out of the four state transitions that are > disallowed by the original coding of that Assert, you are now having to > consider two as legal. I don't like that, and I like even less that > it's not even trying to determine whether this is a replay-driven > change. Presumably you would like to see an additional parameter to allow that test to be more strictly determined? Bug fix v2 patch enclosed, mostly API changes. -- Simon Riggs www.2ndQuadrant.com PostgreSQL Training, Services and Support
Index: src/backend/access/transam/clog.c =================================================================== RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/transam/clog.c,v retrieving revision 1.48 diff -c -r1.48 clog.c *** src/backend/access/transam/clog.c 20 Oct 2008 19:18:18 -0000 1.48 --- src/backend/access/transam/clog.c 23 Oct 2008 03:10:54 -0000 *************** *** 82,92 **** static void WriteTruncateXlogRec(int pageno); static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, ! XLogRecPtr lsn, int pageno); static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status, ! XLogRecPtr lsn, int slotno); static void set_status_by_pages(int nsubxids, TransactionId *subxids, ! XidStatus status, XLogRecPtr lsn); /* --- 82,92 ---- static void WriteTruncateXlogRec(int pageno); static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, ! XLogRecPtr lsn, int pageno, bool isRedo); static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status, ! XLogRecPtr lsn, int slotno, bool isRedo); static void set_status_by_pages(int nsubxids, TransactionId *subxids, ! XidStatus status, XLogRecPtr lsn, bool isRedo); /* *************** *** 142,148 **** */ void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids, ! TransactionId *subxids, XidStatus status, XLogRecPtr lsn) { int pageno = TransactionIdToPage(xid); /* get page of parent */ int i; --- 142,148 ---- */ void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids, ! TransactionId *subxids, XidStatus status, XLogRecPtr lsn, bool isRedo) { int pageno = TransactionIdToPage(xid); /* get page of parent */ int i; *************** *** 168,174 **** * Set the parent and all subtransactions in a single call */ TransactionIdSetPageStatus(xid, nsubxids, subxids, status, lsn, ! pageno); } else { --- 168,174 ---- * Set the parent and all subtransactions in a single call */ TransactionIdSetPageStatus(xid, nsubxids, subxids, status, lsn, ! pageno, isRedo); } else { *************** *** 187,193 **** if (status == TRANSACTION_STATUS_COMMITTED) set_status_by_pages(nsubxids - nsubxids_on_first_page, subxids + nsubxids_on_first_page, ! TRANSACTION_STATUS_SUB_COMMITTED, lsn); /* * Now set the parent and subtransactions on same page as the parent, --- 187,193 ---- if (status == TRANSACTION_STATUS_COMMITTED) set_status_by_pages(nsubxids - nsubxids_on_first_page, subxids + nsubxids_on_first_page, ! TRANSACTION_STATUS_SUB_COMMITTED, lsn, isRedo); /* * Now set the parent and subtransactions on same page as the parent, *************** *** 195,201 **** */ pageno = TransactionIdToPage(xid); TransactionIdSetPageStatus(xid, nsubxids_on_first_page, subxids, status, ! lsn, pageno); /* * Now work through the rest of the subxids one clog page at a time, --- 195,201 ---- */ pageno = TransactionIdToPage(xid); TransactionIdSetPageStatus(xid, nsubxids_on_first_page, subxids, status, ! lsn, pageno, isRedo); /* * Now work through the rest of the subxids one clog page at a time, *************** *** 203,209 **** */ set_status_by_pages(nsubxids - nsubxids_on_first_page, subxids + nsubxids_on_first_page, ! status, lsn); } } --- 203,209 ---- */ set_status_by_pages(nsubxids - nsubxids_on_first_page, subxids + nsubxids_on_first_page, ! status, lsn, isRedo); } } *************** *** 215,221 **** */ static void set_status_by_pages(int nsubxids, TransactionId *subxids, ! XidStatus status, XLogRecPtr lsn) { int pageno = TransactionIdToPage(subxids[0]); int offset = 0; --- 215,221 ---- */ static void set_status_by_pages(int nsubxids, TransactionId *subxids, ! XidStatus status, XLogRecPtr lsn, bool isRedo) { int pageno = TransactionIdToPage(subxids[0]); int offset = 0; *************** *** 233,239 **** TransactionIdSetPageStatus(InvalidTransactionId, num_on_page, subxids + offset, ! status, lsn, pageno); offset = i; pageno = TransactionIdToPage(subxids[offset]); } --- 233,239 ---- TransactionIdSetPageStatus(InvalidTransactionId, num_on_page, subxids + offset, ! status, lsn, pageno, isRedo); offset = i; pageno = TransactionIdToPage(subxids[offset]); } *************** *** 248,254 **** static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, ! XLogRecPtr lsn, int pageno) { int slotno; int i; --- 248,254 ---- static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, ! XLogRecPtr lsn, int pageno, bool isRedo) { int slotno; int i; *************** *** 289,307 **** Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i])); TransactionIdSetStatusBit(subxids[i], TRANSACTION_STATUS_SUB_COMMITTED, ! lsn, slotno); } } /* ... then the main transaction */ ! TransactionIdSetStatusBit(xid, status, lsn, slotno); } /* Set the subtransactions */ for (i = 0; i < nsubxids; i++) { Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i])); ! TransactionIdSetStatusBit(subxids[i], status, lsn, slotno); } ClogCtl->shared->page_dirty[slotno] = true; --- 289,307 ---- Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i])); TransactionIdSetStatusBit(subxids[i], TRANSACTION_STATUS_SUB_COMMITTED, ! lsn, slotno, isRedo); } } /* ... then the main transaction */ ! TransactionIdSetStatusBit(xid, status, lsn, slotno, isRedo); } /* Set the subtransactions */ for (i = 0; i < nsubxids; i++) { Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i])); ! TransactionIdSetStatusBit(subxids[i], status, lsn, slotno, isRedo); } ClogCtl->shared->page_dirty[slotno] = true; *************** *** 313,321 **** * Sets the commit status of a single transaction. * * Must be called with CLogControlLock held */ static void ! TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno) { int byteno = TransactionIdToByte(xid); int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; --- 313,326 ---- * Sets the commit status of a single transaction. * * Must be called with CLogControlLock held + * + * We pass the isRedo flag all the way down to here to ensure that the + * setting of transaction status is valid for the original call and for + * recovery. */ static void ! TransactionIdSetStatusBit(TransactionId xid, XidStatus status, ! XLogRecPtr lsn, int slotno, bool isRedo) { int byteno = TransactionIdToByte(xid); int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; *************** *** 327,333 **** /* Current state should be 0, subcommitted or target state */ Assert(((*byteptr >> bshift) & CLOG_XACT_BITMASK) == 0 || ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == TRANSACTION_STATUS_SUB_COMMITTED || ! ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == status); /* note this assumes exclusive access to the clog page */ byteval = *byteptr; --- 332,340 ---- /* Current state should be 0, subcommitted or target state */ Assert(((*byteptr >> bshift) & CLOG_XACT_BITMASK) == 0 || ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == TRANSACTION_STATUS_SUB_COMMITTED || ! ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == status || ! (((*byteptr >> bshift) & CLOG_XACT_BITMASK) == TRANSACTION_STATUS_COMMITTED && ! status == TRANSACTION_STATUS_SUB_COMMITTED && isRedo)); /* note this assumes exclusive access to the clog page */ byteval = *byteptr; Index: src/backend/access/transam/transam.c =================================================================== RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/transam/transam.c,v retrieving revision 1.78 diff -c -r1.78 transam.c *** src/backend/access/transam/transam.c 20 Oct 2008 20:38:24 -0000 1.78 --- src/backend/access/transam/transam.c 23 Oct 2008 03:11:24 -0000 *************** *** 261,283 **** * are correctly marked subcommit first. */ void ! TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids) { TransactionIdSetTreeStatus(xid, nxids, xids, TRANSACTION_STATUS_COMMITTED, ! InvalidXLogRecPtr); } /* * TransactionIdAsyncCommitTree * Same as above, but for async commits. The commit record LSN is needed. */ void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn) { TransactionIdSetTreeStatus(xid, nxids, xids, ! TRANSACTION_STATUS_COMMITTED, lsn); } /* --- 261,286 ---- * are correctly marked subcommit first. */ void ! TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids, bool isRedo) { TransactionIdSetTreeStatus(xid, nxids, xids, TRANSACTION_STATUS_COMMITTED, ! InvalidXLogRecPtr, isRedo); } /* * TransactionIdAsyncCommitTree * Same as above, but for async commits. The commit record LSN is needed. + * + * Never called during recovery, so doesn't have an isRedo parameter. */ void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn) { TransactionIdSetTreeStatus(xid, nxids, xids, ! TRANSACTION_STATUS_COMMITTED, ! lsn, false); } /* *************** *** 291,300 **** * will consider all the xacts as not-yet-committed anyway. */ void ! TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids) { TransactionIdSetTreeStatus(xid, nxids, xids, ! TRANSACTION_STATUS_ABORTED, InvalidXLogRecPtr); } /* --- 294,304 ---- * will consider all the xacts as not-yet-committed anyway. */ void ! TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids, bool isRedo) { TransactionIdSetTreeStatus(xid, nxids, xids, ! TRANSACTION_STATUS_ABORTED, ! InvalidXLogRecPtr, isRedo); } /* Index: src/backend/access/transam/twophase.c =================================================================== RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/transam/twophase.c,v retrieving revision 1.46 diff -c -r1.46 twophase.c *** src/backend/access/transam/twophase.c 20 Oct 2008 19:18:18 -0000 1.46 --- src/backend/access/transam/twophase.c 23 Oct 2008 03:11:49 -0000 *************** *** 1745,1751 **** XLogFlush(recptr); /* Mark the transaction committed in pg_clog */ ! TransactionIdCommitTree(xid, nchildren, children); /* Checkpoint can proceed now */ MyProc->inCommit = false; --- 1745,1751 ---- XLogFlush(recptr); /* Mark the transaction committed in pg_clog */ ! TransactionIdCommitTree(xid, nchildren, children, false); /* Checkpoint can proceed now */ MyProc->inCommit = false; *************** *** 1820,1826 **** * Mark the transaction aborted in clog. This is not absolutely necessary * but we may as well do it while we are here. */ ! TransactionIdAbortTree(xid, nchildren, children); END_CRIT_SECTION(); } --- 1820,1826 ---- * Mark the transaction aborted in clog. This is not absolutely necessary * but we may as well do it while we are here. */ ! TransactionIdAbortTree(xid, nchildren, children, false); END_CRIT_SECTION(); } Index: src/backend/access/transam/xact.c =================================================================== RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/transam/xact.c,v retrieving revision 1.266 diff -c -r1.266 xact.c *** src/backend/access/transam/xact.c 20 Oct 2008 19:18:18 -0000 1.266 --- src/backend/access/transam/xact.c 23 Oct 2008 03:02:04 -0000 *************** *** 951,957 **** * Now we may update the CLOG, if we wrote a COMMIT record above */ if (markXidCommitted) ! TransactionIdCommitTree(xid, nchildren, children); } else { --- 951,957 ---- * Now we may update the CLOG, if we wrote a COMMIT record above */ if (markXidCommitted) ! TransactionIdCommitTree(xid, nchildren, children, false); } else { *************** *** 1250,1256 **** * having flushed the ABORT record to disk, because in event of a crash * we'd be assumed to have aborted anyway. */ ! TransactionIdAbortTree(xid, nchildren, children); END_CRIT_SECTION(); --- 1250,1256 ---- * having flushed the ABORT record to disk, because in event of a crash * we'd be assumed to have aborted anyway. */ ! TransactionIdAbortTree(xid, nchildren, children, false); END_CRIT_SECTION(); *************** *** 4219,4225 **** /* Mark the transaction committed in pg_clog */ sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]); ! TransactionIdCommitTree(xid, xlrec->nsubxacts, sub_xids); /* Make sure nextXid is beyond any XID mentioned in the record */ max_xid = xid; --- 4219,4225 ---- /* Mark the transaction committed in pg_clog */ sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]); ! TransactionIdCommitTree(xid, xlrec->nsubxacts, sub_xids, true); /* Make sure nextXid is beyond any XID mentioned in the record */ max_xid = xid; *************** *** 4257,4263 **** /* Mark the transaction aborted in pg_clog */ sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]); ! TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids); /* Make sure nextXid is beyond any XID mentioned in the record */ max_xid = xid; --- 4257,4263 ---- /* Mark the transaction aborted in pg_clog */ sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]); ! TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids, true); /* Make sure nextXid is beyond any XID mentioned in the record */ max_xid = xid; Index: src/include/access/clog.h =================================================================== RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/access/clog.h,v retrieving revision 1.22 diff -c -r1.22 clog.h *** src/include/access/clog.h 20 Oct 2008 19:18:18 -0000 1.22 --- src/include/access/clog.h 23 Oct 2008 03:06:48 -0000 *************** *** 33,39 **** extern void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids, ! TransactionId *subxids, XidStatus status, XLogRecPtr lsn); extern XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn); extern Size CLOGShmemSize(void); --- 33,39 ---- extern void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids, ! TransactionId *subxids, XidStatus status, XLogRecPtr lsn, bool isRedo); extern XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn); extern Size CLOGShmemSize(void); Index: src/include/access/transam.h =================================================================== RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/access/transam.h,v retrieving revision 1.66 diff -c -r1.66 transam.h *** src/include/access/transam.h 20 Oct 2008 19:18:18 -0000 1.66 --- src/include/access/transam.h 23 Oct 2008 03:06:27 -0000 *************** *** 140,148 **** extern bool TransactionIdDidAbort(TransactionId transactionId); extern bool TransactionIdIsKnownCompleted(TransactionId transactionId); extern void TransactionIdAbort(TransactionId transactionId); ! extern void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids); extern void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn); ! extern void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids); extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2); extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2); extern bool TransactionIdFollows(TransactionId id1, TransactionId id2); --- 140,148 ---- extern bool TransactionIdDidAbort(TransactionId transactionId); extern bool TransactionIdIsKnownCompleted(TransactionId transactionId); extern void TransactionIdAbort(TransactionId transactionId); ! extern void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids, bool isRedo); extern void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn); ! extern void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids, bool isRedo); extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2); extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2); extern bool TransactionIdFollows(TransactionId id1, TransactionId id2);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers