On Wed, 2008-10-22 at 18:53 -0400, Tom Lane wrote:
> Simon Riggs <[EMAIL PROTECTED]> writes:
> > My interest was really in maintaining ultra-correctness, while removing
> > the need to WAL log subcommits for Hot Standby. I think I achieved that,
> > almost, but if you see further optimizations that is good too.
>
> I'm not focusing on performance here --- I'm focusing on whether I trust
> the patch at all.
> I dislike blowing a hole that size in the sanity
> checks in TransactionIdSetStatus. I note that the hole still isn't
> big enough, since presumably you'd have to allow ABORTED=>SUB_COMMITTED
> too.
No, that is never required. We only set subcommitted state for a commit.
> That means that out of the four state transitions that are
> disallowed by the original coding of that Assert, you are now having to
> consider two as legal. I don't like that, and I like even less that
> it's not even trying to determine whether this is a replay-driven
> change.
Presumably you would like to see an additional parameter to allow that
test to be more strictly determined?
Bug fix v2 patch enclosed, mostly API changes.
--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support
Index: src/backend/access/transam/clog.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/transam/clog.c,v
retrieving revision 1.48
diff -c -r1.48 clog.c
*** src/backend/access/transam/clog.c 20 Oct 2008 19:18:18 -0000 1.48
--- src/backend/access/transam/clog.c 23 Oct 2008 03:10:54 -0000
***************
*** 82,92 ****
static void WriteTruncateXlogRec(int pageno);
static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
TransactionId *subxids, XidStatus status,
! XLogRecPtr lsn, int pageno);
static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status,
! XLogRecPtr lsn, int slotno);
static void set_status_by_pages(int nsubxids, TransactionId *subxids,
! XidStatus status, XLogRecPtr lsn);
/*
--- 82,92 ----
static void WriteTruncateXlogRec(int pageno);
static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
TransactionId *subxids, XidStatus status,
! XLogRecPtr lsn, int pageno, bool isRedo);
static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status,
! XLogRecPtr lsn, int slotno, bool isRedo);
static void set_status_by_pages(int nsubxids, TransactionId *subxids,
! XidStatus status, XLogRecPtr lsn, bool isRedo);
/*
***************
*** 142,148 ****
*/
void
TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
! TransactionId *subxids, XidStatus status, XLogRecPtr lsn)
{
int pageno = TransactionIdToPage(xid); /* get page of parent */
int i;
--- 142,148 ----
*/
void
TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
! TransactionId *subxids, XidStatus status, XLogRecPtr lsn, bool isRedo)
{
int pageno = TransactionIdToPage(xid); /* get page of parent */
int i;
***************
*** 168,174 ****
* Set the parent and all subtransactions in a single call
*/
TransactionIdSetPageStatus(xid, nsubxids, subxids, status, lsn,
! pageno);
}
else
{
--- 168,174 ----
* Set the parent and all subtransactions in a single call
*/
TransactionIdSetPageStatus(xid, nsubxids, subxids, status, lsn,
! pageno, isRedo);
}
else
{
***************
*** 187,193 ****
if (status == TRANSACTION_STATUS_COMMITTED)
set_status_by_pages(nsubxids - nsubxids_on_first_page,
subxids + nsubxids_on_first_page,
! TRANSACTION_STATUS_SUB_COMMITTED, lsn);
/*
* Now set the parent and subtransactions on same page as the parent,
--- 187,193 ----
if (status == TRANSACTION_STATUS_COMMITTED)
set_status_by_pages(nsubxids - nsubxids_on_first_page,
subxids + nsubxids_on_first_page,
! TRANSACTION_STATUS_SUB_COMMITTED, lsn, isRedo);
/*
* Now set the parent and subtransactions on same page as the parent,
***************
*** 195,201 ****
*/
pageno = TransactionIdToPage(xid);
TransactionIdSetPageStatus(xid, nsubxids_on_first_page, subxids, status,
! lsn, pageno);
/*
* Now work through the rest of the subxids one clog page at a time,
--- 195,201 ----
*/
pageno = TransactionIdToPage(xid);
TransactionIdSetPageStatus(xid, nsubxids_on_first_page, subxids, status,
! lsn, pageno, isRedo);
/*
* Now work through the rest of the subxids one clog page at a time,
***************
*** 203,209 ****
*/
set_status_by_pages(nsubxids - nsubxids_on_first_page,
subxids + nsubxids_on_first_page,
! status, lsn);
}
}
--- 203,209 ----
*/
set_status_by_pages(nsubxids - nsubxids_on_first_page,
subxids + nsubxids_on_first_page,
! status, lsn, isRedo);
}
}
***************
*** 215,221 ****
*/
static void
set_status_by_pages(int nsubxids, TransactionId *subxids,
! XidStatus status, XLogRecPtr lsn)
{
int pageno = TransactionIdToPage(subxids[0]);
int offset = 0;
--- 215,221 ----
*/
static void
set_status_by_pages(int nsubxids, TransactionId *subxids,
! XidStatus status, XLogRecPtr lsn, bool isRedo)
{
int pageno = TransactionIdToPage(subxids[0]);
int offset = 0;
***************
*** 233,239 ****
TransactionIdSetPageStatus(InvalidTransactionId,
num_on_page, subxids + offset,
! status, lsn, pageno);
offset = i;
pageno = TransactionIdToPage(subxids[offset]);
}
--- 233,239 ----
TransactionIdSetPageStatus(InvalidTransactionId,
num_on_page, subxids + offset,
! status, lsn, pageno, isRedo);
offset = i;
pageno = TransactionIdToPage(subxids[offset]);
}
***************
*** 248,254 ****
static void
TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
TransactionId *subxids, XidStatus status,
! XLogRecPtr lsn, int pageno)
{
int slotno;
int i;
--- 248,254 ----
static void
TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
TransactionId *subxids, XidStatus status,
! XLogRecPtr lsn, int pageno, bool isRedo)
{
int slotno;
int i;
***************
*** 289,307 ****
Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
TransactionIdSetStatusBit(subxids[i],
TRANSACTION_STATUS_SUB_COMMITTED,
! lsn, slotno);
}
}
/* ... then the main transaction */
! TransactionIdSetStatusBit(xid, status, lsn, slotno);
}
/* Set the subtransactions */
for (i = 0; i < nsubxids; i++)
{
Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
! TransactionIdSetStatusBit(subxids[i], status, lsn, slotno);
}
ClogCtl->shared->page_dirty[slotno] = true;
--- 289,307 ----
Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
TransactionIdSetStatusBit(subxids[i],
TRANSACTION_STATUS_SUB_COMMITTED,
! lsn, slotno, isRedo);
}
}
/* ... then the main transaction */
! TransactionIdSetStatusBit(xid, status, lsn, slotno, isRedo);
}
/* Set the subtransactions */
for (i = 0; i < nsubxids; i++)
{
Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
! TransactionIdSetStatusBit(subxids[i], status, lsn, slotno, isRedo);
}
ClogCtl->shared->page_dirty[slotno] = true;
***************
*** 313,321 ****
* Sets the commit status of a single transaction.
*
* Must be called with CLogControlLock held
*/
static void
! TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno)
{
int byteno = TransactionIdToByte(xid);
int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
--- 313,326 ----
* Sets the commit status of a single transaction.
*
* Must be called with CLogControlLock held
+ *
+ * We pass the isRedo flag all the way down to here to ensure that the
+ * setting of transaction status is valid for the original call and for
+ * recovery.
*/
static void
! TransactionIdSetStatusBit(TransactionId xid, XidStatus status,
! XLogRecPtr lsn, int slotno, bool isRedo)
{
int byteno = TransactionIdToByte(xid);
int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
***************
*** 327,333 ****
/* Current state should be 0, subcommitted or target state */
Assert(((*byteptr >> bshift) & CLOG_XACT_BITMASK) == 0 ||
((*byteptr >> bshift) & CLOG_XACT_BITMASK) == TRANSACTION_STATUS_SUB_COMMITTED ||
! ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == status);
/* note this assumes exclusive access to the clog page */
byteval = *byteptr;
--- 332,340 ----
/* Current state should be 0, subcommitted or target state */
Assert(((*byteptr >> bshift) & CLOG_XACT_BITMASK) == 0 ||
((*byteptr >> bshift) & CLOG_XACT_BITMASK) == TRANSACTION_STATUS_SUB_COMMITTED ||
! ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == status ||
! (((*byteptr >> bshift) & CLOG_XACT_BITMASK) == TRANSACTION_STATUS_COMMITTED &&
! status == TRANSACTION_STATUS_SUB_COMMITTED && isRedo));
/* note this assumes exclusive access to the clog page */
byteval = *byteptr;
Index: src/backend/access/transam/transam.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/transam/transam.c,v
retrieving revision 1.78
diff -c -r1.78 transam.c
*** src/backend/access/transam/transam.c 20 Oct 2008 20:38:24 -0000 1.78
--- src/backend/access/transam/transam.c 23 Oct 2008 03:11:24 -0000
***************
*** 261,283 ****
* are correctly marked subcommit first.
*/
void
! TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids)
{
TransactionIdSetTreeStatus(xid, nxids, xids,
TRANSACTION_STATUS_COMMITTED,
! InvalidXLogRecPtr);
}
/*
* TransactionIdAsyncCommitTree
* Same as above, but for async commits. The commit record LSN is needed.
*/
void
TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids,
XLogRecPtr lsn)
{
TransactionIdSetTreeStatus(xid, nxids, xids,
! TRANSACTION_STATUS_COMMITTED, lsn);
}
/*
--- 261,286 ----
* are correctly marked subcommit first.
*/
void
! TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids, bool isRedo)
{
TransactionIdSetTreeStatus(xid, nxids, xids,
TRANSACTION_STATUS_COMMITTED,
! InvalidXLogRecPtr, isRedo);
}
/*
* TransactionIdAsyncCommitTree
* Same as above, but for async commits. The commit record LSN is needed.
+ *
+ * Never called during recovery, so doesn't have an isRedo parameter.
*/
void
TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids,
XLogRecPtr lsn)
{
TransactionIdSetTreeStatus(xid, nxids, xids,
! TRANSACTION_STATUS_COMMITTED,
! lsn, false);
}
/*
***************
*** 291,300 ****
* will consider all the xacts as not-yet-committed anyway.
*/
void
! TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids)
{
TransactionIdSetTreeStatus(xid, nxids, xids,
! TRANSACTION_STATUS_ABORTED, InvalidXLogRecPtr);
}
/*
--- 294,304 ----
* will consider all the xacts as not-yet-committed anyway.
*/
void
! TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids, bool isRedo)
{
TransactionIdSetTreeStatus(xid, nxids, xids,
! TRANSACTION_STATUS_ABORTED,
! InvalidXLogRecPtr, isRedo);
}
/*
Index: src/backend/access/transam/twophase.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/transam/twophase.c,v
retrieving revision 1.46
diff -c -r1.46 twophase.c
*** src/backend/access/transam/twophase.c 20 Oct 2008 19:18:18 -0000 1.46
--- src/backend/access/transam/twophase.c 23 Oct 2008 03:11:49 -0000
***************
*** 1745,1751 ****
XLogFlush(recptr);
/* Mark the transaction committed in pg_clog */
! TransactionIdCommitTree(xid, nchildren, children);
/* Checkpoint can proceed now */
MyProc->inCommit = false;
--- 1745,1751 ----
XLogFlush(recptr);
/* Mark the transaction committed in pg_clog */
! TransactionIdCommitTree(xid, nchildren, children, false);
/* Checkpoint can proceed now */
MyProc->inCommit = false;
***************
*** 1820,1826 ****
* Mark the transaction aborted in clog. This is not absolutely necessary
* but we may as well do it while we are here.
*/
! TransactionIdAbortTree(xid, nchildren, children);
END_CRIT_SECTION();
}
--- 1820,1826 ----
* Mark the transaction aborted in clog. This is not absolutely necessary
* but we may as well do it while we are here.
*/
! TransactionIdAbortTree(xid, nchildren, children, false);
END_CRIT_SECTION();
}
Index: src/backend/access/transam/xact.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/transam/xact.c,v
retrieving revision 1.266
diff -c -r1.266 xact.c
*** src/backend/access/transam/xact.c 20 Oct 2008 19:18:18 -0000 1.266
--- src/backend/access/transam/xact.c 23 Oct 2008 03:02:04 -0000
***************
*** 951,957 ****
* Now we may update the CLOG, if we wrote a COMMIT record above
*/
if (markXidCommitted)
! TransactionIdCommitTree(xid, nchildren, children);
}
else
{
--- 951,957 ----
* Now we may update the CLOG, if we wrote a COMMIT record above
*/
if (markXidCommitted)
! TransactionIdCommitTree(xid, nchildren, children, false);
}
else
{
***************
*** 1250,1256 ****
* having flushed the ABORT record to disk, because in event of a crash
* we'd be assumed to have aborted anyway.
*/
! TransactionIdAbortTree(xid, nchildren, children);
END_CRIT_SECTION();
--- 1250,1256 ----
* having flushed the ABORT record to disk, because in event of a crash
* we'd be assumed to have aborted anyway.
*/
! TransactionIdAbortTree(xid, nchildren, children, false);
END_CRIT_SECTION();
***************
*** 4219,4225 ****
/* Mark the transaction committed in pg_clog */
sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
! TransactionIdCommitTree(xid, xlrec->nsubxacts, sub_xids);
/* Make sure nextXid is beyond any XID mentioned in the record */
max_xid = xid;
--- 4219,4225 ----
/* Mark the transaction committed in pg_clog */
sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
! TransactionIdCommitTree(xid, xlrec->nsubxacts, sub_xids, true);
/* Make sure nextXid is beyond any XID mentioned in the record */
max_xid = xid;
***************
*** 4257,4263 ****
/* Mark the transaction aborted in pg_clog */
sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
! TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids);
/* Make sure nextXid is beyond any XID mentioned in the record */
max_xid = xid;
--- 4257,4263 ----
/* Mark the transaction aborted in pg_clog */
sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
! TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids, true);
/* Make sure nextXid is beyond any XID mentioned in the record */
max_xid = xid;
Index: src/include/access/clog.h
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/access/clog.h,v
retrieving revision 1.22
diff -c -r1.22 clog.h
*** src/include/access/clog.h 20 Oct 2008 19:18:18 -0000 1.22
--- src/include/access/clog.h 23 Oct 2008 03:06:48 -0000
***************
*** 33,39 ****
extern void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
! TransactionId *subxids, XidStatus status, XLogRecPtr lsn);
extern XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn);
extern Size CLOGShmemSize(void);
--- 33,39 ----
extern void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
! TransactionId *subxids, XidStatus status, XLogRecPtr lsn, bool isRedo);
extern XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn);
extern Size CLOGShmemSize(void);
Index: src/include/access/transam.h
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/access/transam.h,v
retrieving revision 1.66
diff -c -r1.66 transam.h
*** src/include/access/transam.h 20 Oct 2008 19:18:18 -0000 1.66
--- src/include/access/transam.h 23 Oct 2008 03:06:27 -0000
***************
*** 140,148 ****
extern bool TransactionIdDidAbort(TransactionId transactionId);
extern bool TransactionIdIsKnownCompleted(TransactionId transactionId);
extern void TransactionIdAbort(TransactionId transactionId);
! extern void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids);
extern void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn);
! extern void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids);
extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2);
extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2);
extern bool TransactionIdFollows(TransactionId id1, TransactionId id2);
--- 140,148 ----
extern bool TransactionIdDidAbort(TransactionId transactionId);
extern bool TransactionIdIsKnownCompleted(TransactionId transactionId);
extern void TransactionIdAbort(TransactionId transactionId);
! extern void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids, bool isRedo);
extern void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn);
! extern void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids, bool isRedo);
extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2);
extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2);
extern bool TransactionIdFollows(TransactionId id1, TransactionId id2);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers