From 7723ba395dbf15a0badd6cdafda2752c19356baa Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Mon, 6 Nov 2023 11:07:35 +0200
Subject: [PATCH] Fix false reports in pg_visibility

---
 contrib/pg_visibility/pg_visibility.c | 75 ++++++++++++++++++++++++---
 1 file changed, 69 insertions(+), 6 deletions(-)

diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
index 2a4acfd1eee..036a9fd7681 100644
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -19,6 +19,7 @@
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "storage/bufmgr.h"
+#include "storage/proc.h"
 #include "storage/procarray.h"
 #include "storage/smgr.h"
 #include "utils/rel.h"
@@ -532,6 +533,58 @@ collect_visibility_data(Oid relid, bool include_pd)
 	return info;
 }
 
+/*
+ * The "strict" version of GetOldestNonRemovableTransactionId().  The
+ * pg_visiblity check can tolerale false positives (don't report some of the
+ * errors), but can't toletate false negatives (report false errors).
+ * This makes us fundamentally unhappy with ComputeXidHorizons().  Normally,
+ * horozons moves forwards, but there are cases when it could move backwards
+ * (see comment for ComputeXidHorizons()).
+ *
+ * This is why we have to implement our own function for xid horizon, which
+ * would be guaranteed to be newer or equal to any xid horizon computed before.
+ * We have to do the following to achieve this.
+ *
+ * 1. Ignore processes xmin's, because they take into account connection to
+ *    other databases which were ignored before.
+ * 2. Ignore KnownAssignedXids, because they are not database-aware.  While
+ *    primary could compute its horizons database-aware.
+ * 3. Ignore walsender xmin, because it could go backward if some replication
+ *    connections don't use replication slots.
+ *
+ * Surely these would significantly sacrifice accuracy.  But we have to do so
+ * in order to avoid reporting false errors.
+ */
+static TransactionId
+get_strict_xid_horizon(void)
+{
+	TransactionId xmin = InvalidTransactionId;
+
+	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	for (int index = 0; index < ProcGlobal->allProcCount; index++)
+	{
+		TransactionId xid = (uint32) (*((volatile uint32 *) &ProcGlobal->xids[index]));
+		PGPROC	   *proc = &ProcGlobal->allProcs[index];
+		uint8		statusFlags;
+
+		if (likely(xid == InvalidTransactionId))
+			continue;
+
+		statusFlags = ProcGlobal->statusFlags[index];
+		if (statusFlags & (PROC_IN_LOGICAL_DECODING | PROC_IN_VACUUM))
+			continue;
+
+		if (proc->databaseId == MyDatabaseId ||
+			(statusFlags & PROC_AFFECTS_ALL_HORIZONS))
+		{
+			xmin = TransactionIdOlder(xmin, xid);
+		}
+	}
+	LWLockRelease(ProcArrayLock);
+
+	return xmin;
+}
+
 /*
  * Returns a list of items whose visibility map information does not match
  * the status of the tuples on the page.
@@ -562,8 +615,14 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
 	/* Only some relkinds have a visibility map */
 	check_relation_relkind(rel);
 
+	/* Mark ourselves as PROC_IN_VACUUM to exclude our influence on xmin's */
+	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	MyProc->statusFlags |= PROC_IN_VACUUM;
+	ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
+	LWLockRelease(ProcArrayLock);
+
 	if (all_visible)
-		OldestXmin = GetOldestNonRemovableTransactionId(rel);
+		OldestXmin = get_strict_xid_horizon();
 
 	nblocks = RelationGetNumberOfBlocks(rel);
 
@@ -670,12 +729,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
 				 * From a concurrency point of view, it sort of sucks to
 				 * retake ProcArrayLock here while we're holding the buffer
 				 * exclusively locked, but it should be safe against
-				 * deadlocks, because surely
-				 * GetOldestNonRemovableTransactionId() should never take a
-				 * buffer lock. And this shouldn't happen often, so it's worth
-				 * being careful so as to avoid false positives.
+				 * deadlocks, because surely get_strict_xid_horizon() should
+				 * never take a buffer lock. And this shouldn't happen often,
+				 * so it's worth being careful so as to avoid false positives.
 				 */
-				RecomputedOldestXmin = GetOldestNonRemovableTransactionId(rel);
+				RecomputedOldestXmin = get_strict_xid_horizon();
 
 				if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
 					record_corrupt_item(items, &tuple.t_self);
@@ -715,6 +773,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
 	items->count = items->next;
 	items->next = 0;
 
+	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	MyProc->statusFlags &= ~PROC_IN_VACUUM;
+	ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
+	LWLockRelease(ProcArrayLock);
+
 	return items;
 }
 
-- 
2.39.3 (Apple Git-145)

