Initial patch. I will be testing over next day. No commit before at
least midday on Wed 7 Apr.

The existing call to PrescanPreparedTransactions() looks correct to me
but the comment is wrong. I will change that also, if we agree.

-- 
 Simon Riggs           www.2ndQuadrant.com
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index e2566a4..365cd17 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1719,6 +1719,88 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
 }
 
 /*
+ * StandbyRecoverPreparedTransactions
+ *
+ * Scan the pg_twophase directory and setup all the required information to
+ * allow standby queries to treat prepared transactions as still active.
+ * This is never called at the end of recovery - we use
+ * RecoverPreparedTransactions() at that point.
+ *
+ * Currently we simply call SubTransSetParent() for any subxids of prepared
+ * transactions.
+ */
+void
+StandbyRecoverPreparedTransactions(bool can_overwrite)
+{
+	DIR		   *cldir;
+	struct dirent *clde;
+
+	cldir = AllocateDir(TWOPHASE_DIR);
+	while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL)
+	{
+		if (strlen(clde->d_name) == 8 &&
+			strspn(clde->d_name, "0123456789ABCDEF") == 8)
+		{
+			TransactionId xid;
+			char	   *buf;
+			TwoPhaseFileHeader *hdr;
+			TransactionId *subxids;
+			int			i;
+
+			xid = (TransactionId) strtoul(clde->d_name, NULL, 16);
+
+			/* Already processed? */
+			if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
+			{
+				ereport(WARNING,
+						(errmsg("removing stale two-phase state file \"%s\"",
+								clde->d_name)));
+				RemoveTwoPhaseFile(xid, true);
+				continue;
+			}
+
+			/* Read and validate file */
+			buf = ReadTwoPhaseFile(xid, true);
+			if (buf == NULL)
+			{
+				ereport(WARNING,
+					  (errmsg("removing corrupt two-phase state file \"%s\"",
+							  clde->d_name)));
+				RemoveTwoPhaseFile(xid, true);
+				continue;
+			}
+
+			/* Deconstruct header */
+			hdr = (TwoPhaseFileHeader *) buf;
+			if (!TransactionIdEquals(hdr->xid, xid))
+			{
+				ereport(WARNING,
+					  (errmsg("removing corrupt two-phase state file \"%s\"",
+							  clde->d_name)));
+				RemoveTwoPhaseFile(xid, true);
+				pfree(buf);
+				continue;
+			}
+
+			/*
+			 * Examine subtransaction XIDs ... they should all follow main
+			 * XID, and they may force us to advance nextXid.
+			 */
+			subxids = (TransactionId *)
+				(buf + MAXALIGN(sizeof(TwoPhaseFileHeader)));
+			for (i = 0; i < hdr->nsubxacts; i++)
+			{
+				TransactionId subxid = subxids[i];
+
+				Assert(TransactionIdFollows(subxid, xid));
+				SubTransSetParent(xid, subxid, can_overwrite);
+			}
+		}
+	}
+	FreeDir(cldir);
+}
+
+/*
  * RecoverPreparedTransactions
  *
  * Scan the pg_twophase directory and reload shared-memory state for each
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index abdf4d8..08b4cf8 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5808,6 +5808,33 @@ StartupXLOG(void)
 			StartupMultiXact();
 
 			ProcArrayInitRecoveryInfo(oldestActiveXID);
+
+			/*
+			 * If we're beginning at a shutdown checkpoint, we know that
+			 * nothing was running on the master at this point. So fake-up
+			 * an empty running-xacts record and use that here and now.
+			 * Recover additional standby state for prepared transactions.
+			 */
+			if (wasShutdown)
+			{
+				RunningTransactionsData running;
+
+				/*
+				 * Construct a RunningTransactions snapshot representing a shut
+				 * down server, with only prepared transactions still alive.
+				 * We're never overflowed at this point because all subxids
+				 * are listed with their parent prepared transactions.
+				 */
+				running.xcnt = nxids;
+				running.subxid_overflow = false;
+				running.nextXid = checkPoint.nextXid;
+				running.oldestRunningXid = oldestActiveXID;
+				running.xids = xids;
+
+				ProcArrayApplyRecoveryInfo(&running);
+
+				StandbyRecoverPreparedTransactions(false);
+			}
 		}
 
 		/* Initialize resource managers */
@@ -7520,13 +7547,34 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
 		if (standbyState != STANDBY_DISABLED)
 			CheckRequiredParameterValues(checkPoint);
 
+		/*
+		 * If we're beginning at a shutdown checkpoint, we know that
+		 * nothing was running on the master at this point. So fake-up
+		 * an empty running-xacts record and use that here and now.
+		 * Recover additional standby state for prepared transactions.
+		 */
 		if (standbyState >= STANDBY_INITIALIZED)
 		{
+			TransactionId *xids;
+			int			nxids;
+			TransactionId oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
+			RunningTransactionsData running;
+
 			/*
-			 * Remove stale transactions, if any.
+			 * Construct a RunningTransactions snapshot representing a shut
+			 * down server, with only prepared transactions still alive.
+			 * We're never overflowed at this point because all subxids
+			 * are listed with their parent prepared transactions.
 			 */
-			ExpireOldKnownAssignedTransactionIds(checkPoint.nextXid);
-			StandbyReleaseOldLocks(checkPoint.nextXid);
+			running.xcnt = nxids;
+			running.subxid_overflow = false;
+			running.nextXid = checkPoint.nextXid;
+			running.oldestRunningXid = oldestActiveXID;
+			running.xids = xids;
+
+			ProcArrayApplyRecoveryInfo(&running);
+
+			StandbyRecoverPreparedTransactions(true);
 		}
 
 		/* ControlFile->checkPointCopy always tracks the latest ckpt XID */
diff --git a/src/include/access/twophase.h b/src/include/access/twophase.h
index 5d9d6cf..46f5fca 100644
--- a/src/include/access/twophase.h
+++ b/src/include/access/twophase.h
@@ -44,6 +44,7 @@ extern bool StandbyTransactionIdIsPrepared(TransactionId xid);
 
 extern TransactionId PrescanPreparedTransactions(TransactionId **xids_p,
 							int *nxids_p);
+extern void StandbyRecoverPreparedTransactions(bool can_overwrite);
 extern void RecoverPreparedTransactions(void);
 
 extern void RecreateTwoPhaseFile(TransactionId xid, void *content, int len);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to