From 444ac0890030a7bf6804104dcb3663b661fe696a Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Mon, 6 Nov 2023 11:07:35 +0200
Subject: [PATCH] Fix false reports in pg_visibility

Currently pg_visibility computes its xid horizon using the
GetOldestNonRemovableTransactionId().  The problem is that this horizon can
sometimes go backwards.  That can lead to reporting false errors.

In order to fix that, this commit implements new funciton
GetStrictOldestNonRemovableTransactionId().  This function computes the xid
horizon, which would be guaranteed to be newer or equal to any xid horizon
computed before.

We have to do the following to achieve this.

1. Ignore processes xmin's, because they take into account connection to
   other databases which were ignored before.
2. Ignore KnownAssignedXids, because they are not database-aware.  While
   primary could compute its horizons database-aware.
3. Ignore walsender xmin, because it could go backward if some replication
   connections don't use replication slots.

Surely these would significantly sacrifice accuracy.  But we have to do so
in order to avoid reporting false errors.
---
 contrib/pg_visibility/pg_visibility.c | 11 +++----
 src/backend/storage/ipc/procarray.c   | 42 +++++++++++++++++++++++++++
 src/include/storage/procarray.h       |  1 +
 3 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
index 17c50a44722..42c654261f7 100644
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -19,6 +19,7 @@
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "storage/bufmgr.h"
+#include "storage/proc.h"
 #include "storage/procarray.h"
 #include "storage/smgr.h"
 #include "utils/rel.h"
@@ -563,7 +564,7 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
 	check_relation_relkind(rel);
 
 	if (all_visible)
-		OldestXmin = GetOldestNonRemovableTransactionId(rel);
+		OldestXmin = GetStrictOldestNonRemovableTransactionId();
 
 	nblocks = RelationGetNumberOfBlocks(rel);
 
@@ -671,11 +672,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
 				 * retake ProcArrayLock here while we're holding the buffer
 				 * exclusively locked, but it should be safe against
 				 * deadlocks, because surely
-				 * GetOldestNonRemovableTransactionId() should never take a
-				 * buffer lock. And this shouldn't happen often, so it's worth
-				 * being careful so as to avoid false positives.
+				 * GetStrictOldestNonRemovableTransactionId() should never
+				 * take a buffer lock. And this shouldn't happen often, so
+				 * it's worth being careful so as to avoid false positives.
 				 */
-				RecomputedOldestXmin = GetOldestNonRemovableTransactionId(rel);
+				RecomputedOldestXmin = GetStrictOldestNonRemovableTransactionId();
 
 				if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
 					record_corrupt_item(items, &tuple.t_self);
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index e2f71da4a01..50716c5ef99 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -237,6 +237,12 @@ typedef struct ComputeXidHorizonsResult
 	 */
 	TransactionId data_oldest_nonremovable;
 
+	/*
+	 * The "strict" oldest xid, which is guarantteed to be newer or equal to
+	 * any oldest xid computed before.
+	 */
+	TransactionId data_strict_oldest_nonremovable;
+
 	/*
 	 * Oldest xid for which deleted tuples need to be retained in this
 	 * session's temporary tables.
@@ -1743,6 +1749,7 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
 		h->oldest_considered_running = initial;
 		h->shared_oldest_nonremovable = initial;
 		h->data_oldest_nonremovable = initial;
+		h->data_strict_oldest_nonremovable = initial;
 
 		/*
 		 * Only modifications made by this backend affect the horizon for
@@ -1842,6 +1849,10 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
 		{
 			h->data_oldest_nonremovable =
 				TransactionIdOlder(h->data_oldest_nonremovable, xmin);
+
+			if (TransactionIdIsValid(xid))
+				h->data_strict_oldest_nonremovable =
+					TransactionIdOlder(h->data_strict_oldest_nonremovable, xid);
 		}
 	}
 
@@ -2005,6 +2016,37 @@ GetOldestNonRemovableTransactionId(Relation rel)
 	return InvalidTransactionId;
 }
 
+/*
+ * The "strict" version of GetOldestNonRemovableTransactionId().  The
+ * pg_visiblity check can tolerale false positives (don't report some of the
+ * errors), but can't toletate false negatives (report false errors).
+ * Normally, horozons moves forwards, but there are cases when it could move
+ * backwards (see comment for ComputeXidHorizons()).
+ *
+ * This is why we have to implement our own function for xid horizon, which
+ * would be guaranteed to be newer or equal to any xid horizon computed before.
+ * We have to do the following to achieve this.
+ *
+ * 1. Ignore processes xmin's, because they take into account connection to
+ *    other databases which were ignored before.
+ * 2. Ignore KnownAssignedXids, because they are not database-aware.  While
+ *    primary could compute its horizons database-aware.
+ * 3. Ignore walsender xmin, because it could go backward if some replication
+ *    connections don't use replication slots.
+ *
+ * Surely these would significantly sacrifice accuracy.  But we have to do so
+ * in order to avoid reporting false errors.
+ */
+TransactionId
+GetStrictOldestNonRemovableTransactionId(void)
+{
+	ComputeXidHorizonsResult horizons;
+
+	ComputeXidHorizons(&horizons);
+
+	return horizons.data_strict_oldest_nonremovable;
+}
+
 /*
  * Return the oldest transaction id any currently running backend might still
  * consider running. This should not be used for visibility / pruning
diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h
index f3eba9b7640..c1f09c5d812 100644
--- a/src/include/storage/procarray.h
+++ b/src/include/storage/procarray.h
@@ -55,6 +55,7 @@ extern RunningTransactions GetRunningTransactionData(void);
 extern bool TransactionIdIsInProgress(TransactionId xid);
 extern bool TransactionIdIsActive(TransactionId xid);
 extern TransactionId GetOldestNonRemovableTransactionId(Relation rel);
+extern TransactionId GetStrictOldestNonRemovableTransactionId(void);
 extern TransactionId GetOldestTransactionIdConsideredRunning(void);
 extern TransactionId GetOldestActiveTransactionId(void);
 extern TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly);
-- 
2.39.3 (Apple Git-145)

