On 2013-11-19 15:20:01 +0100, Andres Freund wrote:
> Imo something the attached patch should be done. The description I came
> up with is:
> 
>     Fix Hot-Standby initialization of clog and subtrans.

Greetings,

Andres Freund

-- 
 Andres Freund                     http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
>From db5fc631102e2bf3be1034981bedb0b99871a2bb Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Tue, 19 Nov 2013 13:12:31 +0100
Subject: [PATCH] Fix Hot-Standby initialization of clog and subtrans.

These bugs can cause data loss on standbys started with hot_standby=on
at the moment they start to accept read only queries by marking
committed transactions as uncommited. The likelihood of such
corruptions is small unless the primary has a high transaction rate.

5a031a5556ff83b8a9646892715d7fef415b83c3 fixed bugs in HS's startup
logic by maintaining less state until at least
STANDBY_SNAPSHOT_PENDING state was reached, missing the fact that both
clog and subtrans are written to before that. This only failed to fail
in common cases because the usage of ExtendCLOG in procarray.c was
superflous since clog extensions are actually WAL logged.

f44eedc3f0f347a856eea8590730769125964597/I then tried to fix the
missing extensions of pg_subtrans due to the former commit's changes -
which are not WAL logged - by performing the extensions when switching
to a state > STANDBY_INITIALIZED and not performing xid assignments
before that - again missing the fact that ExtendCLOG is unneccessary -
but screwed up twice: Once because latestObservedXid wasn't updated
anymore in that state due to the earlier commit and once by having an
off-by-one error in the loop performing extensions.
This means that whenever a CLOG_XACTS_PER_PAGE (32768 with default
settings) boundary was crossed between the start of the checkpoint
recovery started from and the first xl_running_xact record old
transactions commit bits in pg_clog could be overwritten if they
started and committed in that window.

Fix this mess by not performing ExtendCLOG() in HS at all anymore
since it's unneeded and evidently dangerous and by performing subtrans
extensions even before reaching STANDBY_SNAPSHOT_PENDING.
---
 src/backend/access/transam/clog.c   |  2 +-
 src/backend/storage/ipc/procarray.c | 64 ++++++++++++++++++++++---------------
 2 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index cb95aa3..6a963b6 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -622,7 +622,7 @@ ExtendCLOG(TransactionId newestXact)
 	LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
 
 	/* Zero the page and make an XLOG entry about it */
-	ZeroCLOGPage(pageno, !InRecovery);
+	ZeroCLOGPage(pageno, true);
 
 	LWLockRelease(CLogControlLock);
 }
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index c2f86ff..4d0bfb8 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -484,8 +484,9 @@ ProcArrayInitRecovery(TransactionId initializedUptoXID)
 
 	/*
 	 * we set latestObservedXid to the xid SUBTRANS has been initialized upto
-	 * so we can extend it from that point onwards when we reach a consistent
-	 * state in ProcArrayApplyRecoveryInfo().
+	 * so we can extend it from that point onwards in
+	 * RecordKnownAssignedTransactionIds and when we get consistent in
+	 * ProcArrayApplyRecoveryInfo().
 	 */
 	latestObservedXid = initializedUptoXID;
 	TransactionIdRetreat(latestObservedXid);
@@ -661,17 +662,23 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
 	pfree(xids);
 
 	/*
-	 * latestObservedXid is set to the the point where SUBTRANS was started up
-	 * to, initialize subtrans from thereon, up to nextXid - 1.
+	 * latestObservedXid is at least set to the the point where SUBTRANS was
+	 * started up to (c.f. ProcArrayInitRecovery()) or to the biggest xid
+	 * RecordKnownAssignedTransactionIds() was called for.  Initialize
+	 * subtrans from thereon, up to nextXid - 1.
+	 *
+	 * We need to duplicate parts of RecordKnownAssignedTransactionId() here,
+	 * because we've just added xids to the known assigned xids machinery that
+	 * haven't gone through RecordKnownAssignedTransactionId().
 	 */
 	Assert(TransactionIdIsNormal(latestObservedXid));
+	TransactionIdAdvance(latestObservedXid);
 	while (TransactionIdPrecedes(latestObservedXid, running->nextXid))
 	{
-		ExtendCLOG(latestObservedXid);
 		ExtendSUBTRANS(latestObservedXid);
-
 		TransactionIdAdvance(latestObservedXid);
 	}
+	TransactionIdRetreat(latestObservedXid);  /* = running->nextXid - 1 */
 
 	/* ----------
 	 * Now we've got the running xids we need to set the global values that
@@ -756,10 +763,6 @@ ProcArrayApplyXidAssignment(TransactionId topxid,
 
 	Assert(standbyState >= STANDBY_INITIALIZED);
 
-	/* can't do anything useful unless we have more state setup */
-	if (standbyState == STANDBY_INITIALIZED)
-		return;
-
 	max_xid = TransactionIdLatest(topxid, nsubxids, subxids);
 
 	/*
@@ -786,6 +789,10 @@ ProcArrayApplyXidAssignment(TransactionId topxid,
 	for (i = 0; i < nsubxids; i++)
 		SubTransSetParent(subxids[i], topxid, false);
 
+	/* KnownAssignedXids isn't maintained yet, so we're done for now */
+	if (standbyState == STANDBY_INITIALIZED)
+		return;
+
 	/*
 	 * Uses same locking as transaction commit
 	 */
@@ -2661,19 +2668,12 @@ RecordKnownAssignedTransactionIds(TransactionId xid)
 {
 	Assert(standbyState >= STANDBY_INITIALIZED);
 	Assert(TransactionIdIsValid(xid));
+	Assert(TransactionIdIsValid(latestObservedXid));
 
 	elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u",
 		 xid, latestObservedXid);
 
 	/*
-	 * If the KnownAssignedXids machinery isn't up yet, do nothing.
-	 */
-	if (standbyState <= STANDBY_INITIALIZED)
-		return;
-
-	Assert(TransactionIdIsValid(latestObservedXid));
-
-	/*
 	 * When a newly observed xid arrives, it is frequently the case that it is
 	 * *not* the next xid in sequence. When this occurs, we must treat the
 	 * intervening xids as running also.
@@ -2683,22 +2683,34 @@ RecordKnownAssignedTransactionIds(TransactionId xid)
 		TransactionId next_expected_xid;
 
 		/*
-		 * Extend clog and subtrans like we do in GetNewTransactionId() during
-		 * normal operation using individual extend steps. Typical case
-		 * requires almost no activity.
+		 * Extend subtrans like we do in GetNewTransactionId() during normal
+		 * operation using individual extend steps. Note that we do not need
+		 * to extend clog since its extensions are WAL logged.
+		 *
+		 * This part has to be done regardless of standbyState since we
+		 * immediately start assigning subtransactions to their toplevel
+		 * transactions.
 		 */
 		next_expected_xid = latestObservedXid;
-		TransactionIdAdvance(next_expected_xid);
-		while (TransactionIdPrecedesOrEquals(next_expected_xid, xid))
+		while (TransactionIdPrecedes(next_expected_xid, xid))
 		{
-			ExtendCLOG(next_expected_xid);
+			TransactionIdAdvance(next_expected_xid);
 			ExtendSUBTRANS(next_expected_xid);
+		}
+		Assert(next_expected_xid == xid);
 
-			TransactionIdAdvance(next_expected_xid);
+		/*
+		 * If the KnownAssignedXids machinery isn't up yet, there's nothing
+		 * more to do since we don't track assigned xids yet.
+		 */
+		if (standbyState <= STANDBY_INITIALIZED)
+		{
+			latestObservedXid = xid;
+			return;
 		}
 
 		/*
-		 * Add the new xids onto the KnownAssignedXids array.
+		 * Add (latestObservedXid, xid] onto the KnownAssignedXids array.
 		 */
 		next_expected_xid = latestObservedXid;
 		TransactionIdAdvance(next_expected_xid);
-- 
1.8.3.251.g1462b67

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to