From 00bb5187dba75601c8d3d37db9ce8b56e377a742 Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm+postgres@gmail.com>
Date: Sat, 8 Mar 2025 01:15:08 +0100
Subject: [PATCH v12 3/5] SP-GIST: Fix visibility issues in IOS

Previously, SP-GIST IOS could buffer tuples from pages while VACUUM came
along and cleaned up an ALL_DEAD tuple, marking the tuple's page
ALL_VISIBLE again and making IOS mistakenly believe the tuple is indeed
visible.

With this patch, pins now conflict with SP-GIST vacuum, and we now do
preliminary visibility checks to be used by IOS so that the IOS
infrastructure knows to recheck the heap page even if that page is now
ALL_VISIBLE.

Idea from Heikki Linnakangas
---
 src/backend/access/spgist/spgscan.c   | 182 ++++++++++++++++++++++++--
 src/backend/access/spgist/spgvacuum.c |   2 +-
 src/include/access/spgist_private.h   |   9 +-
 3 files changed, 179 insertions(+), 14 deletions(-)

diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c
index 25893050c58..42784fdc4ca 100644
--- a/src/backend/access/spgist/spgscan.c
+++ b/src/backend/access/spgist/spgscan.c
@@ -30,7 +30,8 @@
 typedef void (*storeRes_func) (SpGistScanOpaque so, ItemPointer heapPtr,
 							   Datum leafValue, bool isNull,
 							   SpGistLeafTuple leafTuple, bool recheck,
-							   bool recheckDistances, double *distances);
+							   bool recheckDistances, double *distances,
+							   TMVC_Result visrecheck);
 
 /*
  * Pairing heap comparison function for the SpGistSearchItem queue.
@@ -142,6 +143,7 @@ spgAddStartItem(SpGistScanOpaque so, bool isnull)
 	startEntry->traversalValue = NULL;
 	startEntry->recheck = false;
 	startEntry->recheckDistances = false;
+	startEntry->visrecheck = TMVC_Unchecked;
 
 	spgAddSearchItemToQueue(so, startEntry);
 }
@@ -386,6 +388,19 @@ spgrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
 	if (scankey && scan->numberOfKeys > 0)
 		memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
 
+	/* prepare index-only scan requirements */
+	so->nReorderThisPage = 0;
+	if (scan->xs_want_itup)
+	{
+		if (so->visrecheck == NULL)
+			so->visrecheck = palloc(MaxIndexTuplesPerPage);
+
+		if (scan->numberOfOrderBys > 0 && so->items == NULL)
+		{
+			so->items = palloc_array(SpGistSearchItem *, MaxIndexTuplesPerPage);
+		}
+	}
+
 	/* initialize order-by data if needed */
 	if (orderbys && scan->numberOfOrderBys > 0)
 	{
@@ -453,6 +468,9 @@ spgendscan(IndexScanDesc scan)
 		pfree(scan->xs_orderbynulls);
 	}
 
+	if (BufferIsValid(so->vmbuf))
+		ReleaseBuffer(so->vmbuf);
+
 	pfree(so);
 }
 
@@ -502,6 +520,7 @@ spgNewHeapItem(SpGistScanOpaque so, int level, SpGistLeafTuple leafTuple,
 	item->isLeaf = true;
 	item->recheck = recheck;
 	item->recheckDistances = recheckDistances;
+	item->visrecheck = TMVC_Unchecked;
 
 	return item;
 }
@@ -584,6 +603,14 @@ spgLeafTest(SpGistScanOpaque so, SpGistSearchItem *item,
 														isnull,
 														distances);
 
+			if (so->want_itup)
+			{
+				Assert(PointerIsValid(so->items));
+
+				so->items[so->nReorderThisPage] = heapItem;
+				so->nReorderThisPage++;
+			}
+
 			spgAddSearchItemToQueue(so, heapItem);
 
 			MemoryContextSwitchTo(oldCxt);
@@ -593,7 +620,7 @@ spgLeafTest(SpGistScanOpaque so, SpGistSearchItem *item,
 			/* non-ordered scan, so report the item right away */
 			Assert(!recheckDistances);
 			storeRes(so, &leafTuple->heapPtr, leafValue, isnull,
-					 leafTuple, recheck, false, NULL);
+					 leafTuple, recheck, false, NULL, TMVC_Unchecked);
 			*reportedSome = true;
 		}
 	}
@@ -806,6 +833,88 @@ spgTestLeafTuple(SpGistScanOpaque so,
 	return SGLT_GET_NEXTOFFSET(leafTuple);
 }
 
+/*
+ * Pupulate so->visrecheck based on tuples which are cached for a currently
+ * pinned page.
+ */
+static void
+spgPopulateUnorderedVischecks(IndexScanDesc scan, SpGistScanOpaqueData *so)
+{
+	TM_IndexVisibilityCheckOp op;
+
+	Assert(scan->numberOfOrderBys == 0);
+
+	if (so->nPtrs == 0)
+		return;
+
+	op.checkntids = so->nPtrs;
+	op.checktids = palloc_array(TM_VisCheck, so->nPtrs);
+	op.vmbuf = &so->vmbuf;
+
+	for (int i = 0; i < op.checkntids; i++)
+	{
+		Assert(ItemPointerIsValid(&so->heapPtrs[i]));
+
+		PopulateTMVischeck(&op.checktids[i], &so->heapPtrs[i], i);
+	}
+
+	table_index_vischeck_tuples(scan->heapRelation, &op);
+
+	for (int i = 0; i < op.checkntids; i++)
+	{
+		TM_VisCheck *check = &op.checktids[i];
+
+		Assert(check->tidblkno == ItemPointerGetBlockNumberNoCheck(&so->heapPtrs[check->idxoffnum]));
+		Assert(check->tidoffset == ItemPointerGetOffsetNumberNoCheck(&so->heapPtrs[check->idxoffnum]));
+		Assert(check->idxoffnum < op.checkntids);
+
+		so->visrecheck[check->idxoffnum] = check->vischeckresult;
+	}
+
+	pfree(op.checktids);
+}
+
+/* pupulate so->visrecheck based on current cached tuples */
+static void
+spgPopulateOrderedVisChecks(IndexScanDesc scan, SpGistScanOpaqueData *so)
+{
+	TM_IndexVisibilityCheckOp op;
+
+	if (so->nReorderThisPage == 0)
+		return;
+
+	Assert(so->nReorderThisPage > 0);
+	Assert(scan->numberOfOrderBys > 0);
+	Assert(PointerIsValid(so->items));
+
+	op.checkntids = so->nReorderThisPage;
+	op.checktids = palloc_array(TM_VisCheck, so->nReorderThisPage);
+	op.vmbuf = &so->vmbuf;
+
+	for (int i = 0; i < op.checkntids; i++)
+	{
+		PopulateTMVischeck(&op.checktids[i], &so->items[i]->heapPtr, i);
+		Assert(ItemPointerIsValid(&so->items[i]->heapPtr));
+		Assert(so->items[i]->isLeaf);
+	}
+
+	table_index_vischeck_tuples(scan->heapRelation, &op);
+
+	for (int i = 0; i < op.checkntids; i++)
+	{
+		TM_VisCheck *check = &op.checktids[i];
+
+		Assert(check->idxoffnum < op.checkntids);
+		Assert(check->tidblkno == ItemPointerGetBlockNumberNoCheck(&so->items[check->idxoffnum]->heapPtr));
+		Assert(check->tidoffset == ItemPointerGetOffsetNumberNoCheck(&so->items[check->idxoffnum]->heapPtr));
+
+		so->items[check->idxoffnum]->visrecheck = check->vischeckresult;
+	}
+
+	pfree(op.checktids);
+	so->nReorderThisPage = 0;
+}
+
 /*
  * Walk the tree and report all tuples passing the scan quals to the storeRes
  * subroutine.
@@ -814,8 +923,8 @@ spgTestLeafTuple(SpGistScanOpaque so,
  * next page boundary once we have reported at least one tuple.
  */
 static void
-spgWalk(Relation index, SpGistScanOpaque so, bool scanWholeIndex,
-		storeRes_func storeRes)
+spgWalk(IndexScanDesc scan, Relation index, SpGistScanOpaque so,
+		bool scanWholeIndex, storeRes_func storeRes)
 {
 	Buffer		buffer = InvalidBuffer;
 	bool		reportedSome = false;
@@ -835,9 +944,23 @@ redirect:
 		{
 			/* We store heap items in the queue only in case of ordered search */
 			Assert(so->numberOfNonNullOrderBys > 0);
+
+			/*
+			 * If an item we found on a page is retrieved immediately after
+			 * processing that page, we won't yet have released the page pin,
+			 * and thus won't yet have processed the visibility data of the
+			 * page's (now) ordered tuples.
+			 * Do that now, so that all tuples on the page we're about to
+			 * unpin were checked for visibility before we returned any. 
+			 */
+			if (so->want_itup && so->nReorderThisPage)
+				spgPopulateOrderedVisChecks(scan, so);
+
+			Assert(!so->want_itup || item->visrecheck != TMVC_Unchecked);
 			storeRes(so, &item->heapPtr, item->value, item->isNull,
 					 item->leafTuple, item->recheck,
-					 item->recheckDistances, item->distances);
+					 item->recheckDistances, item->distances,
+					 item->visrecheck);
 			reportedSome = true;
 		}
 		else
@@ -854,7 +977,15 @@ redirect:
 			}
 			else if (blkno != BufferGetBlockNumber(buffer))
 			{
-				UnlockReleaseBuffer(buffer);
+				LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+
+				Assert(so->numberOfOrderBys >= 0);
+				if (so->numberOfOrderBys == 0)
+					spgPopulateUnorderedVischecks(scan, so);
+				else
+					spgPopulateOrderedVisChecks(scan, so);
+
+				ReleaseBuffer(buffer);
 				buffer = ReadBuffer(index, blkno);
 				LockBuffer(buffer, BUFFER_LOCK_SHARE);
 			}
@@ -922,16 +1053,36 @@ redirect:
 	}
 
 	if (buffer != InvalidBuffer)
-		UnlockReleaseBuffer(buffer);
-}
+	{
+		/* Unlock the buffer for concurrent accesses except VACUUM */
+		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
+		/*
+		 * If we're in an index-only scan, pre-check visibility of the tuples,
+		 * so we can drop the pin without causing visibility bugs.
+		 */
+		if (so->want_itup)
+		{
+			Assert(scan->numberOfOrderBys >= 0);
+
+			if (scan->numberOfOrderBys == 0)
+				spgPopulateUnorderedVischecks(scan, so);
+			else
+				spgPopulateOrderedVisChecks(scan, so);
+		}
+
+		/* Release the page */
+		ReleaseBuffer(buffer);
+	}
+}
 
 /* storeRes subroutine for getbitmap case */
 static void
 storeBitmap(SpGistScanOpaque so, ItemPointer heapPtr,
 			Datum leafValue, bool isnull,
 			SpGistLeafTuple leafTuple, bool recheck,
-			bool recheckDistances, double *distances)
+			bool recheckDistances, double *distances,
+			TMVC_Result visres)
 {
 	Assert(!recheckDistances && !distances);
 	tbm_add_tuples(so->tbm, heapPtr, 1, recheck);
@@ -949,7 +1100,7 @@ spggetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 	so->tbm = tbm;
 	so->ntids = 0;
 
-	spgWalk(scan->indexRelation, so, true, storeBitmap);
+	spgWalk(scan, scan->indexRelation, so, true, storeBitmap);
 
 	return so->ntids;
 }
@@ -959,12 +1110,15 @@ static void
 storeGettuple(SpGistScanOpaque so, ItemPointer heapPtr,
 			  Datum leafValue, bool isnull,
 			  SpGistLeafTuple leafTuple, bool recheck,
-			  bool recheckDistances, double *nonNullDistances)
+			  bool recheckDistances, double *nonNullDistances,
+			  TMVC_Result visres)
 {
 	Assert(so->nPtrs < MaxIndexTuplesPerPage);
 	so->heapPtrs[so->nPtrs] = *heapPtr;
 	so->recheck[so->nPtrs] = recheck;
 	so->recheckDistances[so->nPtrs] = recheckDistances;
+	if (so->want_itup)
+		so->visrecheck[so->nPtrs] = visres;
 
 	if (so->numberOfOrderBys > 0)
 	{
@@ -1041,6 +1195,10 @@ spggettuple(IndexScanDesc scan, ScanDirection dir)
 			scan->xs_heaptid = so->heapPtrs[so->iPtr];
 			scan->xs_recheck = so->recheck[so->iPtr];
 			scan->xs_hitup = so->reconTups[so->iPtr];
+			if (so->want_itup)
+				scan->xs_visrecheck = so->visrecheck[so->iPtr];
+
+			Assert(!scan->xs_want_itup || scan->xs_visrecheck != TMVC_Unchecked);
 
 			if (so->numberOfOrderBys > 0)
 				index_store_float8_orderby_distances(scan, so->orderByTypes,
@@ -1070,7 +1228,7 @@ spggettuple(IndexScanDesc scan, ScanDirection dir)
 		}
 		so->iPtr = so->nPtrs = 0;
 
-		spgWalk(scan->indexRelation, so, false, storeGettuple);
+		spgWalk(scan, scan->indexRelation, so, false, storeGettuple);
 
 		if (so->nPtrs == 0)
 			break;				/* must have completed scan */
diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c
index 81171f35451..04836a29304 100644
--- a/src/backend/access/spgist/spgvacuum.c
+++ b/src/backend/access/spgist/spgvacuum.c
@@ -625,7 +625,7 @@ spgvacuumpage(spgBulkDeleteState *bds, Buffer buffer)
 	BlockNumber blkno = BufferGetBlockNumber(buffer);
 	Page		page;
 
-	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+	LockBufferForCleanup(buffer);
 	page = (Page) BufferGetPage(buffer);
 
 	if (PageIsNew(page))
diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h
index cb43a278f46..63e970468c7 100644
--- a/src/include/access/spgist_private.h
+++ b/src/include/access/spgist_private.h
@@ -21,6 +21,7 @@
 #include "storage/buf.h"
 #include "utils/geo_decls.h"
 #include "utils/relcache.h"
+#include "tableam.h"
 
 
 typedef struct SpGistOptions
@@ -175,7 +176,7 @@ typedef struct SpGistSearchItem
 	bool		isLeaf;			/* SearchItem is heap item */
 	bool		recheck;		/* qual recheck is needed */
 	bool		recheckDistances;	/* distance recheck is needed */
-
+	uint8		visrecheck;		/* IOS: TMVC_Result of contained heap tuple */
 	/* array with numberOfOrderBys entries */
 	double		distances[FLEXIBLE_ARRAY_MEMBER];
 } SpGistSearchItem;
@@ -223,6 +224,7 @@ typedef struct SpGistScanOpaqueData
 
 	/* These fields are only used in amgettuple scans: */
 	bool		want_itup;		/* are we reconstructing tuples? */
+	Buffer		vmbuf;			/* IOS: used for table_index_vischeck_tuples */
 	TupleDesc	reconTupDesc;	/* if so, descriptor for reconstructed tuples */
 	int			nPtrs;			/* number of TIDs found on current page */
 	int			iPtr;			/* index for scanning through same */
@@ -235,6 +237,11 @@ typedef struct SpGistScanOpaqueData
 	/* distances (for recheck) */
 	IndexOrderByDistance *distances[MaxIndexTuplesPerPage];
 
+	/* support for IOS */
+	int			nReorderThisPage;
+	uint8	   *visrecheck;	/* IOS vis check results, counted by nPtrs */
+	SpGistSearchItem **items; /* counted by nReorderThisPage */
+
 	/*
 	 * Note: using MaxIndexTuplesPerPage above is a bit hokey since
 	 * SpGistLeafTuples aren't exactly IndexTuples; however, they are larger,
-- 
2.48.1

