From f63c57eda8f8f0555e8fd8ed2ddff17b8480eb74 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Thu, 5 Sep 2019 12:54:00 -0400
Subject: [PATCH v2] Move AtEOXact_Snapshot() back to AbortTransaction().

As a general rule, it seems preferable to do as much work as possible
in AbortTransaction(), leaving as little work as possible to be done
in CleanupTransaction(), because it's a good idea to hold on to
resources for the shortest possible amount of time.

Way back in 2011, commit 57eb009092684e6e1788dd0dae641ccee1668b10
moved AbortTransaction's AtEOXact_Snapshot call to
CleanupTransaction to fix a problem when a ROLLBACK statement was
prepared at the protocol level and executed in a transaction
with REPEATABLE READ or higher isolation. RevalidateCachedQuery
would attempt to obtain a snapshot and fail because the transaction
was already gone.  Commit ac63dca607e8e22247defbc8fe03b6baa3628c42
subsequently taught RevalidateCachedQuery not to obtain a snapshot
for such commands after all while fixing an unrelated bug, and
there now seems to be no case in which we obtain a snapshot in
an aborted transaction.

That's good, because it can't possibly be safe to use a snapshot in
an aborted transaction, for reasons explained in the comments
added by this patch. So, move the call back to AbortTransaction().
To help catch future cases of unsafe snapshot use, add a bunch of
Assert() statements to snapmgr.c to flag dangerous coding patterns.
Flesh out the comments in RevalidateCachedQuery a bit, too.
---
 src/backend/access/transam/xact.c   | 13 ++++++++++++-
 src/backend/utils/cache/plancache.c |  6 ++++++
 src/backend/utils/time/snapmgr.c    | 24 ++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index f594d33e7a..9993251607 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -2732,6 +2732,18 @@ AbortTransaction(void)
 		pgstat_report_xact_timestamp(0);
 	}
 
+	/*
+	 * Any snapshots taken by this transaction were unsafe to use even at the
+	 * time when we entered AbortTransaction(), since we might have, for
+	 * example, inserted a heap tuple and failed while inserting index tuples.
+	 * They were even more unsafe after ProcArrayEndTransaction(), since after
+	 * that point tuples we inserted could be pruned by other backends.
+	 * However, we postpone the cleanup until this point in the sequence
+	 * because it has to be done after ResourceOwnerRelease() has finishing
+	 * unregistering snapshots.
+	 */
+	AtEOXact_Snapshot(false, true);
+
 	/*
 	 * State remains TRANS_ABORT until CleanupTransaction().
 	 */
@@ -2757,7 +2769,6 @@ CleanupTransaction(void)
 	 * do abort cleanup processing
 	 */
 	AtCleanup_Portals();		/* now safe to release portal memory */
-	AtEOXact_Snapshot(false, true); /* and release the transaction's snapshots */
 
 	CurrentResourceOwner = NULL;	/* and resource owner */
 	if (TopTransactionResourceOwner)
diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c
index abc3062892..732c421690 100644
--- a/src/backend/utils/cache/plancache.c
+++ b/src/backend/utils/cache/plancache.c
@@ -663,6 +663,12 @@ RevalidateCachedQuery(CachedPlanSource *plansource,
 	 * checking whether parse analysis requires a snapshot; utility commands
 	 * don't have invalidatable plans, so we'd not get here for such a
 	 * command.
+	 *
+	 * Note that this code shouldn't be reached from within a failed
+	 * transaction, because only transaction control commands are legal in
+	 * that context, and we've already checked for those. That's important,
+	 * because we can't safely call GetTransactionSnapshot() from a failed
+	 * transaction.
 	 */
 	snapshot_set = false;
 	if (!ActiveSnapshotSet())
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 47b0517596..a9a1b3cbff 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -305,6 +305,9 @@ SnapMgrInit(void)
 Snapshot
 GetTransactionSnapshot(void)
 {
+	/* Unsafe if transaction is aborted, or if we're not in a transaction. */
+	Assert(IsTransactionState());
+
 	/*
 	 * Return historic snapshot if doing logical decoding. We'll never need a
 	 * non-historic transaction snapshot in this (sub-)transaction, so there's
@@ -380,6 +383,9 @@ GetTransactionSnapshot(void)
 Snapshot
 GetLatestSnapshot(void)
 {
+	/* Unsafe if transaction is aborted, or if we're not in a transaction. */
+	Assert(IsTransactionState());
+
 	/*
 	 * We might be able to relax this, but nothing that could otherwise work
 	 * needs it.
@@ -415,6 +421,9 @@ GetOldestSnapshot(void)
 	Snapshot	OldestRegisteredSnapshot = NULL;
 	XLogRecPtr	RegisteredLSN = InvalidXLogRecPtr;
 
+	/* Unsafe if transaction is aborted, or if we're not in a transaction. */
+	Assert(IsTransactionState());
+
 	if (!pairingheap_is_empty(&RegisteredSnapshots))
 	{
 		OldestRegisteredSnapshot = pairingheap_container(SnapshotData, ph_node,
@@ -441,6 +450,9 @@ GetOldestSnapshot(void)
 Snapshot
 GetCatalogSnapshot(Oid relid)
 {
+	/* Unsafe if transaction is aborted, or if we're not in a transaction. */
+	Assert(IsTransactionState());
+
 	/*
 	 * Return historic snapshot while we're doing logical decoding, so we can
 	 * see the appropriate state of the catalog.
@@ -463,6 +475,9 @@ GetCatalogSnapshot(Oid relid)
 Snapshot
 GetNonHistoricCatalogSnapshot(Oid relid)
 {
+	/* Unsafe if transaction is aborted, or if we're not in a transaction. */
+	Assert(IsTransactionState());
+
 	/*
 	 * If the caller is trying to scan a relation that has no syscache, no
 	 * catcache invalidations will be sent when it is updated.  For a few key
@@ -789,6 +804,9 @@ UpdateActiveSnapshotCommandId(void)
 	Assert(ActiveSnapshot->as_snap->active_count == 1);
 	Assert(ActiveSnapshot->as_snap->regd_count == 0);
 
+	/* This is not sensible except within a valid transaction. */
+	Assert(IsTransactionState());
+
 	/*
 	 * Don't allow modification of the active snapshot during parallel
 	 * operation.  We share the snapshot to worker backends at the beginning
@@ -842,6 +860,9 @@ GetActiveSnapshot(void)
 {
 	Assert(ActiveSnapshot != NULL);
 
+	/* Unsafe if transaction is aborted, or if we're not in a transaction. */
+	Assert(IsTransactionState());
+
 	return ActiveSnapshot->as_snap;
 }
 
@@ -882,6 +903,9 @@ RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
 	if (snapshot == InvalidSnapshot)
 		return InvalidSnapshot;
 
+	/* It's unsafe to use snapshots in an aborted transaction. */
+	Assert(!IsAbortedTransactionBlockState());
+
 	/* Static snapshot?  Create a persistent copy */
 	snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
 
-- 
2.17.2 (Apple Git-113)

