From e866d587c6ef9d2fa9aa4a1fdfaedae3ddcfcd55 Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm+postgres@gmail.com>
Date: Sat, 20 Dec 2025 02:37:08 +0100
Subject: [PATCH v14 5/8] GIST: Fix visibility issues in IOS

Previously, GIST IOS could buffer tuples from pages while VACUUM came
along and cleaned up an ALL_DEAD tuple, marking the tuple's page
ALL_VISIBLE again and making IOS mistakenly believe the tuple is indeed
visible.

With this patch, pins now conflict with GIST vacuum, and we now do
preliminary visibility checks for IOS whilst holding the pin. This
allows us to return tuples to the scan after releasing the pin,
without breaking visibility rules.

Idea from Heikki Linnakangas
---
 src/backend/access/gist/gistget.c    | 125 ++++++++++++++++++++++++++-
 src/backend/access/gist/gistscan.c   |   6 ++
 src/backend/access/gist/gistvacuum.c |   6 +-
 src/include/access/gist_private.h    |  27 ++++--
 4 files changed, 151 insertions(+), 13 deletions(-)

diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c
index 9ba45acfff3..cc193280f74 100644
--- a/src/backend/access/gist/gistget.c
+++ b/src/backend/access/gist/gistget.c
@@ -17,6 +17,7 @@
 #include "access/genam.h"
 #include "access/gist_private.h"
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "lib/pairingheap.h"
 #include "miscadmin.h"
 #include "pgstat.h"
@@ -394,7 +395,11 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
 		return;
 	}
 
-	so->nPageData = so->curPageData = 0;
+	if (scan->numberOfOrderBys)
+		so->nsortItems = 0;
+	else
+		so->nPageData = so->curPageData = 0;
+
 	scan->xs_hitup = NULL;		/* might point into pageDataCxt */
 	if (so->pageDataCxt)
 		MemoryContextReset(so->pageDataCxt);
@@ -498,10 +503,16 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
 				item->data.heap.recheckDistances = recheck_distances;
 
 				/*
-				 * In an index-only scan, also fetch the data from the tuple.
+				 * In an index-only scan, also fetch the data from the tuple,
+				 * and keep a reference to the tuple so we can run visibility
+				 * checks on the tuples before we release the buffer.
 				 */
 				if (scan->xs_want_itup)
+				{
 					item->data.heap.recontup = gistFetchTuple(giststate, r, it);
+					so->sortItems[so->nsortItems] = &item->data.heap;
+					so->nsortItems += 1;
+				}
 			}
 			else
 			{
@@ -526,7 +537,104 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
 		}
 	}
 
-	UnlockReleaseBuffer(buffer);
+	/* Allow writes to the buffer, but don't yet allow VACUUM */
+	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+
+	/*
+	 * If we're in an index-only scan, we need to do VM-like visibility checks
+	 * before we release the pin.  This way, VACUUM can't clean up dead tuples
+	 * from this index page and mark the heap page ALL_VISIBLE before the tuple
+	 * was returned; or at least not without the index-only scan knowing to
+	 * still look at the heap page for the visibility of this tuple.
+	 *
+	 * See also docs section "Index Locking Considerations".
+	 */
+	if (scan->xs_want_itup)
+	{
+		TM_IndexVisibilityCheckOp	op;
+		op.vmbuf = &so->vmbuf;
+
+		/* get the number of TIDs we're about to check */
+		if (scan->numberOfOrderBys > 0)
+			op.checkntids = so->nsortItems;
+		else
+			op.checkntids = so->nPageData;
+
+		/* skip the rest of the vischeck code if nothing is to be done. */
+		if (op.checkntids == 0)
+			goto IOSVisChecksDone;
+
+		op.checktids = palloc_array(TM_VisCheck, op.checkntids);
+
+		/* Populate the visibility check items */
+		if (scan->numberOfOrderBys > 0)
+		{
+			for (int off = 0; off < op.checkntids; off++)
+			{
+				Assert(ItemPointerIsValid(&so->sortItems[off]->heapPtr));
+
+				PopulateTMVischeck(&op.checktids[off],
+								   &so->sortItems[off]->heapPtr,
+								   off);
+			}
+		}
+		else
+		{
+			for (int off = 0; off < op.checkntids; off++)
+			{
+				Assert(ItemPointerIsValid(&so->pageData[off].heapPtr));
+
+				PopulateTMVischeck(&op.checktids[off],
+								   &so->pageData[off].heapPtr,
+								   off);
+			}
+		}
+
+		/* ask the table for the visibility status of these tids */
+		table_index_vischeck_tuples(scan->heapRelation, &op);
+
+		/* and copy the visibility status into the GISTSearchItems */
+		if (scan->numberOfOrderBys > 0)
+		{
+			for (int off = 0; off < op.checkntids; off++)
+			{
+				TM_VisCheck *check = &op.checktids[off];
+				GISTSearchHeapItem *item = so->sortItems[check->idxoffnum];
+
+				/* sanity checks */
+				Assert(check->idxoffnum < op.checkntids);
+				Assert(check->tidblkno == ItemPointerGetBlockNumberNoCheck(&item->heapPtr));
+				Assert(check->tidoffset == ItemPointerGetOffsetNumberNoCheck(&item->heapPtr));
+
+				item->visrecheck = check->vischeckresult;
+			}
+
+			/* reset the temporary state used for tracking IOS items */
+			so->nsortItems = 0;
+		}
+		else
+		{
+			for (int off = 0; off < op.checkntids; off++)
+			{
+				TM_VisCheck *check = &op.checktids[off];
+				GISTSearchHeapItem *item = &so->pageData[check->idxoffnum];
+
+				Assert(check->idxoffnum < op.checkntids);
+				Assert(check->tidblkno == ItemPointerGetBlockNumberNoCheck(&item->heapPtr));
+				Assert(check->tidoffset == ItemPointerGetOffsetNumberNoCheck(&item->heapPtr));
+
+				item->visrecheck = check->vischeckresult;
+			}
+		}
+
+		/* finally, clean up the used resources */
+		pfree(op.checktids);
+	}
+
+IOSVisChecksDone:
+
+	/* Allow VACUUM to process the buffer again */
+	ReleaseBuffer(buffer);
 }
 
 /*
@@ -586,9 +694,15 @@ getNextNearest(IndexScanDesc scan)
 												 item->distances,
 												 item->data.heap.recheckDistances);
 
-			/* in an index-only scan, also return the reconstructed tuple. */
+			/*
+			 * In an index-only scan, also return the reconstructed tuple,
+			 * and store the visibility check's result.
+			 */
 			if (scan->xs_want_itup)
+			{
 				scan->xs_hitup = item->data.heap.recontup;
+				scan->xs_visrecheck = item->data.heap.visrecheck;
+			}
 			res = true;
 		}
 		else
@@ -675,7 +789,10 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
 
 				/* in an index-only scan, also return the reconstructed tuple */
 				if (scan->xs_want_itup)
+				{
 					scan->xs_hitup = so->pageData[so->curPageData].recontup;
+					scan->xs_visrecheck = so->pageData[so->curPageData].visrecheck;
+				}
 
 				so->curPageData++;
 
diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c
index 01b8ff0b6fa..bf6b1a82548 100644
--- a/src/backend/access/gist/gistscan.c
+++ b/src/backend/access/gist/gistscan.c
@@ -348,6 +348,12 @@ gistendscan(IndexScanDesc scan)
 {
 	GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
 
+	if (BufferIsValid(so->vmbuf))
+	{
+		ReleaseBuffer(so->vmbuf);
+		so->vmbuf = InvalidBuffer;
+	}
+
 	/*
 	 * freeGISTstate is enough to clean up everything made by gistbeginscan,
 	 * as well as the queueCxt if there is a separate context for it.
diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c
index 7591ad4da1a..fc541ff5efa 100644
--- a/src/backend/access/gist/gistvacuum.c
+++ b/src/backend/access/gist/gistvacuum.c
@@ -326,10 +326,10 @@ restart:
 	recurse_to = InvalidBlockNumber;
 
 	/*
-	 * We are not going to stay here for a long time, aggressively grab an
-	 * exclusive lock.
+	 * We are not going to stay here for a long time, aggressively grab a
+	 * cleanup lock.
 	 */
-	LockBuffer(buffer, GIST_EXCLUSIVE);
+	LockBufferForCleanup(buffer);
 	page = BufferGetPage(buffer);
 
 	if (gistPageRecyclable(page))
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 39404ec7cdb..272c18ea17d 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -22,6 +22,7 @@
 #include "storage/buffile.h"
 #include "utils/hsearch.h"
 #include "access/genam.h"
+#include "tableam.h"
 
 /*
  * Maximum number of "halves" a page can be split into in one operation.
@@ -124,6 +125,8 @@ typedef struct GISTSearchHeapItem
 								 * index-only scans */
 	OffsetNumber offnum;		/* track offset in page to mark tuple as
 								 * LP_DEAD */
+	uint8		visrecheck;		/* Cached visibility check result for this
+								 * heap pointer. */
 } GISTSearchHeapItem;
 
 /* Unvisited item, either index page or heap tuple */
@@ -170,12 +173,24 @@ typedef struct GISTScanOpaqueData
 	BlockNumber curBlkno;		/* current number of block */
 	GistNSN		curPageLSN;		/* pos in the WAL stream when page was read */
 
-	/* In a non-ordered search, returnable heap items are stored here: */
-	GISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)];
-	OffsetNumber nPageData;		/* number of valid items in array */
-	OffsetNumber curPageData;	/* next item to return */
-	MemoryContext pageDataCxt;	/* context holding the fetched tuples, for
-								 * index-only scans */
+	/* info used by Index-Only Scans */
+	Buffer		vmbuf;			/* reusable buffer for IOS' vm lookups */
+
+	union {
+		struct {
+			/* In a non-ordered search, returnable heap items are stored here: */
+			GISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)];
+			OffsetNumber nPageData;		/* number of valid items in array */
+			OffsetNumber curPageData;	/* next item to return */
+			MemoryContext pageDataCxt;	/* context holding the fetched tuples,
+										 * for index-only scans */
+		};
+		struct {
+			/* In an ordered search, we use this as scratch space for IOS */
+			GISTSearchHeapItem *sortItems[BLCKSZ / sizeof(IndexTupleData)];
+			OffsetNumber	nsortItems;	/* number of items in sortData */
+		};
+	};
 } GISTScanOpaqueData;
 
 typedef GISTScanOpaqueData *GISTScanOpaque;
-- 
2.50.1 (Apple Git-155)

