From e7432523481b9b6184d25848a3a722a3cce296cd Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm+postgres@gmail.com>
Date: Fri, 19 Dec 2025 23:58:40 +0100
Subject: [PATCH v13 3/8] TableAM: Support AM-specific fast visibility tests

Previously, we assumed VM_ALL_VISIBLE(...) is universal across all
AMs. This is probably not the case, so we introduce a new table
method called "table_index_vischeck_tuples" which allows anyone to
ask the AM whether a tuple (or list of tuples) is definitely visible
to us, or might be deleted or otherwise invisible.

We implement that method directly for HeapAM; usage of the facility
will follow in later commits.
---
 src/backend/access/heap/heapam.c         | 124 ++++++++++++++++++++++
 src/backend/access/heap/heapam_handler.c |   1 +
 src/backend/access/table/tableamapi.c    |   1 +
 src/include/access/heapam.h              |   2 +
 src/include/access/tableam.h             | 125 +++++++++++++++++++++++
 5 files changed, 253 insertions(+)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 6daf4a87dec..d29346a2fee 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -106,6 +106,20 @@ static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup);
 static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
 										bool *copy);
 
+/* sort template definitions for index visibility checks */
+#define ST_SORT heap_ivc_sortby_tidheapblk
+#define ST_ELEMENT_TYPE TM_VisCheck
+#define ST_DECLARE
+#define ST_DEFINE
+#define ST_SCOPE static inline
+#define ST_COMPARE(a, b) ( \
+	a->tidblkno < b->tidblkno ? -1 : ( \
+		a->tidblkno > b->tidblkno ? 1 : 0 \
+	) \
+)
+
+#include "lib/sort_template.h"
+
 
 /*
  * Each tuple lock mode has a corresponding heavyweight lock, and one or two
@@ -8813,6 +8827,116 @@ bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
 	return nblocksfavorable;
 }
 
+/*
+ * heapam implementation of tableam's index_vischeck_tuples interface.
+ *
+ * This helper function is called by index AMs during index-only scans,
+ * to do VM-based visibility checks on individual tuples, so that the AM
+ * can hold the tuple in memory for e.g. reordering for extended periods of
+ * time while without holding thousands of pins to conflict with VACUUM.
+ *
+ * It's possible for this to generate a fair amount of I/O, since we may be
+ * checking hundreds of tuples from a single index block, but that is
+ * preferred over holding thousands of pins.
+ *
+ * We use heuristics to balance the costs of sorting TIDs with VM page
+ * lookups.
+ */
+void
+heap_index_vischeck_tuples(Relation rel, TM_IndexVisibilityCheckOp *checkop)
+{
+	TM_VisCheck	   *checks = checkop->checktids;
+	int				checkntids = checkop->checkntids;
+	int				nblocks = 1;
+	BlockNumber	   *blknos;
+	uint8		   *status;
+	TMVC_Result		res;
+
+	if (checkntids == 0)
+		return;
+
+	/*
+	 * Order the TIDs to heap order, so that we will only need to visit every
+	 * VM page at most once.
+	 */
+	heap_ivc_sortby_tidheapblk(checks, checkntids);
+
+	for (int i = 0; i < checkntids - 1; i++)
+	{
+		if (checks[i].tidblkno != checks[i + 1].tidblkno)
+		{
+			Assert(checks[i].tidblkno < checks[i + 1].tidblkno);
+			nblocks++;
+		}
+	}
+
+	/*
+	 * No need to allocate arrays or do other (comparatively expensive)
+	 * bookkeeping when we have only one block to check.
+	 */
+	if (nblocks == 1)
+	{
+		if (VM_ALL_VISIBLE(rel, checks[0].tidblkno, checkop->vmbuf))
+			res = TMVC_Visible;
+		else
+			res = TMVC_MaybeVisible;
+
+		for (int i = 0; i < checkntids; i++)
+			checks[i].vischeckresult = res;
+
+		return;
+	}
+
+	blknos = palloc_array(BlockNumber, nblocks);
+	status = palloc_array(uint8, nblocks);
+
+	blknos[0] = checks[0].tidblkno;
+
+	/* fill in the rest of the blknos array with unique block numbers */
+	for (int i = 0, j = 0; i < checkntids; i++)
+	{
+		Assert(BlockNumberIsValid(checks[i].tidblkno));
+
+		if (checks[i].tidblkno != blknos[j])
+			blknos[++j] = checks[i].tidblkno;
+	}
+
+	/* do the actual visibility checks */
+	visibilitymap_get_statusv(rel, blknos, status, nblocks, checkop->vmbuf);
+
+	/*
+	 * 'res' is the current TMVC value for blknos[j] below. It is updated
+	 * inside the loop, but only when j is updated, so we must initialize it
+	 * here, or we'll store uninitialized data instead of an TMVC value for
+	 * the first block's result.
+	 */
+	if (status[0] & VISIBILITYMAP_ALL_VISIBLE)
+		res = TMVC_Visible;
+	else
+		res = TMVC_MaybeVisible;
+
+	/* copy the results of blknos into the TM_VisChecks */
+	for (int i = 0, j = 0; i < checkntids; i++)
+	{
+		if (checks[i].tidblkno != blknos[j])
+		{
+			j += 1;
+			Assert(checks[i].tidblkno == blknos[j]);
+
+			if (status[j] & VISIBILITYMAP_ALL_VISIBLE)
+				res = TMVC_Visible;
+			else
+				res = TMVC_MaybeVisible;
+		}
+
+		checks[i].vischeckresult = res;
+	}
+
+	/* and clean up the resources we'd used */
+	pfree(status);
+	pfree(blknos);
+}
+
 /*
  * Perform XLogInsert for a heap-visible operation.  'block' is the block
  * being marked all-visible, and vm_buffer is the buffer containing the
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index dd4fe6bf62f..6189557cbbb 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -2648,6 +2648,7 @@ static const TableAmRoutine heapam_methods = {
 	.tuple_tid_valid = heapam_tuple_tid_valid,
 	.tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
 	.index_delete_tuples = heap_index_delete_tuples,
+	.index_vischeck_tuples = heap_index_vischeck_tuples,
 
 	.relation_set_new_filelocator = heapam_relation_set_new_filelocator,
 	.relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
diff --git a/src/backend/access/table/tableamapi.c b/src/backend/access/table/tableamapi.c
index 476663b66aa..b3ce90ceaea 100644
--- a/src/backend/access/table/tableamapi.c
+++ b/src/backend/access/table/tableamapi.c
@@ -61,6 +61,7 @@ GetTableAmRoutine(Oid amhandler)
 	Assert(routine->tuple_get_latest_tid != NULL);
 	Assert(routine->tuple_satisfies_snapshot != NULL);
 	Assert(routine->index_delete_tuples != NULL);
+	Assert(routine->index_vischeck_tuples != NULL);
 
 	Assert(routine->tuple_insert != NULL);
 
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index f7e4ae3843c..faf4f3a585a 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -407,6 +407,8 @@ extern void simple_heap_update(Relation relation, const ItemPointerData *otid,
 
 extern TransactionId heap_index_delete_tuples(Relation rel,
 											  TM_IndexDeleteOp *delstate);
+extern void heap_index_vischeck_tuples(Relation rel,
+									   TM_IndexVisibilityCheckOp *checkop);
 
 /* in heap/pruneheap.c */
 extern void heap_page_prune_opt(Relation relation, Buffer buffer);
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 2fa790b6bf5..52acf8c1985 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -254,6 +254,69 @@ typedef struct TM_IndexDeleteOp
 	TM_IndexStatus *status;
 } TM_IndexDeleteOp;
 
+/*
+ * State used when calling table_index_delete_tuples()
+ *
+ * Index-only scans need to know the visibility of the associated table tuples
+ * before they can return the index tuple.  If the index tuple is known to be
+ * visible with a cheap check, we can return it directly without requesting
+ * the visibility info from the table AM directly.
+ *
+ * This AM API exposes a cheap bulk visibility checking API to indexes,
+ * allowing these indexes to check multiple tuples worth of visibility info at
+ * once, and allows the AM to store these checks.  This improves the pinning
+ * ergonomics of index AMs by allowing a scan to cache index tuples in memory
+ * without holding pins on these index tuple pages until the index tuples are
+ * returned.
+ *
+ * The method is called with a list of TIDs, and its output will indicate the
+ * visibility state of each tuple: Unchecked, Dead, MaybeVisible, or Visible.
+ *
+ * HeapAM's implementation of visibility maps only allows for cheap checks of
+ * *definitely visible*; all other results are *maybe visible*. A result for
+ * *definitely not visible* aka dead is currently not accounted for by lack of
+ * Table AMs which support such visibility lookups cheaply. However, if a
+ * Table AM were to implement this, it could be used to quickly skip the
+ * current tuple in index scans, without having to ask the Table AM for that
+ * TID's data.
+ */
+typedef enum TMVC_Result
+{
+	TMVC_Unchecked = 0,
+	TMVC_Visible = 1,
+	TMVC_MaybeVisible = 2,
+
+#define TMVC_MAX TMVC_MaybeVisible
+} TMVC_Result;
+
+typedef struct TM_VisCheck
+{
+	/* TID from index tuple; deformed to not waste time during sort ops */
+	BlockNumber		tidblkno;
+	uint16			tidoffset;
+	/* identifier for the TID in this visibility check operation context */
+	OffsetNumber	idxoffnum;
+	/* the result of the visibility check operation */
+	TMVC_Result		vischeckresult;
+} TM_VisCheck;
+
+static inline void
+PopulateTMVischeck(TM_VisCheck *check, ItemPointer tid, OffsetNumber idxoff)
+{
+	Assert(ItemPointerIsValid(tid));
+	check->tidblkno = ItemPointerGetBlockNumberNoCheck(tid);
+	check->tidoffset = ItemPointerGetOffsetNumberNoCheck(tid);
+	check->idxoffnum = idxoff;
+	check->vischeckresult = TMVC_Unchecked;
+}
+
+typedef struct TM_IndexVisibilityCheckOp
+{
+	int			checkntids;			/* number of TIDs to check */
+	Buffer	   *vmbuf;				/* pointer to VM buffer to reuse across calls */
+	TM_VisCheck *checktids;			/* the checks to execute */
+} TM_IndexVisibilityCheckOp;
+
 /* "options" flag bits for table_tuple_insert */
 /* TABLE_INSERT_SKIP_WAL was 0x0001; RelationNeedsWAL() now governs */
 #define TABLE_INSERT_SKIP_FSM		0x0002
@@ -500,6 +563,10 @@ typedef struct TableAmRoutine
 	TransactionId (*index_delete_tuples) (Relation rel,
 										  TM_IndexDeleteOp *delstate);
 
+	/* see table_index_vischeck_tuples() */
+	void		(*index_vischeck_tuples) (Relation rel,
+										  TM_IndexVisibilityCheckOp *checkop);
+
 
 	/* ------------------------------------------------------------------------
 	 * Manipulations of physical tuples.
@@ -1333,6 +1400,64 @@ table_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
 	return rel->rd_tableam->index_delete_tuples(rel, delstate);
 }
 
+/*
+ * Determine rough visibility information of index tuples based on each TID.
+ *
+ * Determines which entries from index AM caller's TM_IndexVisibilityCheckOp
+ * state point to TMVC_VISIBLE or TMVC_MAYBE_VISIBLE table tuples, at low IO
+ * overhead.  For the heap AM, the implementation is effectively a wrapper
+ * around VM_ALL_FROZEN.
+ *
+ * On return, all TM_VisChecks indicated by checkop->checktids will have been
+ * updated with the correct visibility status.
+ *
+ * Note that there is no value for "definitely dead" tuples, as the Heap AM
+ * doesn't have an efficient method to determine that a tuple is dead to all
+ * users, as it would have to go into the heap.  If and when AMs are built
+ * that would support VM checks with an equivalent to VM_ALL_DEAD this
+ * decision can be reconsidered.
+ */
+static inline void
+table_index_vischeck_tuples(Relation rel, TM_IndexVisibilityCheckOp *checkop)
+{
+	rel->rd_tableam->index_vischeck_tuples(rel, checkop);
+
+#if USE_ASSERT_CHECKING
+	for (int i = 0; i < checkop->checkntids; i++)
+	{
+		TMVC_Result res = checkop->checktids[i].vischeckresult;
+
+		if (res <= TMVC_Unchecked || res > TMVC_MAX)
+		{
+			elog(PANIC, "Unexpected vischeckresult %d at offset %d/%d, expected value between %d and %d inclusive",
+				 checkop->checktids[i].vischeckresult,
+				 i, checkop->checkntids,
+				 TMVC_Visible,
+				 TMVC_MaybeVisible);
+		}
+	}
+#endif
+}
+
+static inline TMVC_Result
+table_index_vischeck_tuple(Relation rel, Buffer *vmbuffer, ItemPointer tid)
+{
+	TM_IndexVisibilityCheckOp checkOp;
+	TM_VisCheck		op;
+
+	PopulateTMVischeck(&op, tid, 0);
+
+	checkOp.checktids = &op;
+	checkOp.checkntids = 1;
+	checkOp.vmbuf = vmbuffer;
+
+	rel->rd_tableam->index_vischeck_tuples(rel, &checkOp);
+
+	Assert(op.vischeckresult != TMVC_Unchecked);
+
+	return op.vischeckresult;
+}
+
 
 /* ----------------------------------------------------------------------------
  *  Functions for manipulations of physical tuples.
-- 
2.50.1 (Apple Git-155)

