From 4838129ccdf3917f71f52035330752e3b5d7416a Mon Sep 17 00:00:00 2001
From: Mark Dilger <mark.dilger@enterprisedb.com>
Date: Mon, 29 Mar 2021 14:31:13 -0700
Subject: [PATCH v13 2/4] Replacing implementation of check_tuple_visibility

Using a modified version of HeapTupleSatisfiesVacuumHorizon.
---
 contrib/amcheck/verify_heapam.c | 480 +++++++++++++++++++++++++-------
 1 file changed, 372 insertions(+), 108 deletions(-)

diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c
index 9172b5fd81..59b13180d9 100644
--- a/contrib/amcheck/verify_heapam.c
+++ b/contrib/amcheck/verify_heapam.c
@@ -73,6 +73,8 @@ typedef struct HeapCheckContext
 	TransactionId oldest_xid;	/* ShmemVariableCache->oldestXid */
 	FullTransactionId oldest_fxid;	/* 64-bit version of oldest_xid, computed
 									 * relative to next_fxid */
+	TransactionId safe_xmin;	/* this XID and newer ones can't become
+								 * all-visible while we're running */
 
 	/*
 	 * Cached copy of value from MultiXactState
@@ -114,6 +116,9 @@ typedef struct HeapCheckContext
 	uint32		offset;			/* offset in tuple data */
 	AttrNumber	attnum;
 
+	/* True if toast for this tuple could be vacuumed away */
+	bool		tuple_is_volatile;
+
 	/* Values for iterating over toast for the attribute */
 	int32		chunkno;
 	int32		attrsize;
@@ -249,6 +254,12 @@ verify_heapam(PG_FUNCTION_ARGS)
 	memset(&ctx, 0, sizeof(HeapCheckContext));
 	ctx.cached_xid = InvalidTransactionId;
 
+	/*
+	 * Any xmin newer than the xmin of our snapshot can't become all-visible
+	 * while we're running.
+	 */
+	ctx.safe_xmin = GetTransactionSnapshot()->xmin;
+
 	/*
 	 * If we report corruption when not examining some individual attribute,
 	 * we need attnum to be reported as NULL.  Set that up before any
@@ -640,189 +651,442 @@ check_tuple_header(HeapCheckContext *ctx)
 }
 
 /*
- * Checks whether a tuple is visible for checking.
+ * Checks whether a tuple is visible to our transaction for checking, which is
+ * not a question of whether we should be able to see the tuple relative to any
+ * particular snapshot, but rather a question of whether it is safe and
+ * reasonable to check the tuple attributes.  The caller should already have
+ * checked that the tuple is sufficiently sensible for us to evaluate.
  *
- * Since we do not hold a snapshot, tuple visibility is not a question of
- * whether we should be able to see the tuple relative to any particular
- * snapshot, but rather a question of whether it is safe and reasonable to
- * check the tuple attributes.
+ * If a tuple could have been inserted by a transaction that also added a
+ * column to the table, but which ultimately did not commit, or which has not
+ * yet committed, then the table's current TupleDesc might differ from the one
+ * used to construct this tuple, so we must not check it.
  *
- * For visibility determination not specifically related to corruption, what we
- * want to know is if a tuple is potentially visible to any running
- * transaction.  If you are tempted to replace this function's visibility logic
- * with a call to another visibility checking function, keep in mind that this
- * function does not update hint bits, as it seems imprudent to write hint bits
- * (or anything at all) to a table during a corruption check.  Nor does this
- * function bother classifying tuple visibility beyond a boolean visible vs.
- * not visible.
+ * As a special case, if our own transaction inserted the tuple, even if we
+ * added a column to the table, our TupleDesc should match.  We could check the
+ * tuple, but choose not to do so.
  *
- * Returns whether the tuple is visible for checking.
+ * If a tuple has been updated or deleted, we can still read the old tuple for
+ * corruption checking purposes, as long as we are careful about concurrent
+ * vacuums.  The main table tuple itself cannot be vacuumed away because we
+ * hold a buffer lock on the page, but if the deleting transaction is older
+ * than our transaction snapshot's xmin, then vacuum could remove the toast at
+ * any time, so we must not check the toast.
+ *
+ * If xmin or xmax values are older than can be checked against clog, or appear
+ * to be in the future (possibly due to wrap-around), then we cannot make a
+ * determination about the visibility of the tuple, so we must not check it.
+ *
+ * Returns true if the tuple should be checked, false otherwise.  Sets
+ * ctx->toast_is_volatile true if the toast might be vacuumed away, false
+ * otherwise.
  */
 static bool
 check_tuple_visibility(HeapCheckContext *ctx)
 {
+	TransactionId xmin;
+	TransactionId xvac;
+	TransactionId xmax;
+	XidCommitStatus xmin_status;
+	XidCommitStatus xvac_status;
+	XidCommitStatus xmax_status;
 	HeapTupleHeader tuphdr = ctx->tuphdr;
-	uint16		infomask = tuphdr->t_infomask;
 
-	if (!HeapTupleHeaderXminCommitted(tuphdr))
+	ctx->tuple_is_volatile = true;	/* have not yet proven otherwise */
+
+	/* If xmin is normal, it should be within valid range */
+	xmin = HeapTupleHeaderGetXmin(tuphdr);
+	switch (get_xid_status(xmin, ctx, &xmin_status))
 	{
-		TransactionId raw_xmin = HeapTupleHeaderGetRawXmin(tuphdr);
+		case XID_INVALID:
+		case XID_BOUNDS_OK:
+			break;
+		case XID_IN_FUTURE:
+			report_corruption(ctx,
+							  psprintf("xmin %u equals or exceeds next valid transaction ID %u:%u",
+									   xmin,
+									   EpochFromFullTransactionId(ctx->next_fxid),
+									   XidFromFullTransactionId(ctx->next_fxid)));
+			return false;		/* corrupt */
+		case XID_PRECEDES_CLUSTERMIN:
+			report_corruption(ctx,
+							  psprintf("xmin %u precedes oldest valid transaction ID %u:%u",
+									   xmin,
+									   EpochFromFullTransactionId(ctx->oldest_fxid),
+									   XidFromFullTransactionId(ctx->oldest_fxid)));
+			return false;		/* corrupt */
+		case XID_PRECEDES_RELMIN:
+			report_corruption(ctx,
+							  psprintf("xmin %u precedes relation freeze threshold %u:%u",
+									   xmin,
+									   EpochFromFullTransactionId(ctx->relfrozenfxid),
+									   XidFromFullTransactionId(ctx->relfrozenfxid)));
+			return false;		/* corrupt */
+	}
 
+	/*
+	 * Has inserting transaction committed?
+	 */
+	if (!HeapTupleHeaderXminCommitted(tuphdr))
+	{
 		if (HeapTupleHeaderXminInvalid(tuphdr))
-			return false;		/* HEAPTUPLE_DEAD */
+
+			/*
+			 * The inserting transaction aborted.  The structure of the tuple
+			 * may not match our relation description, so we cannot check it.
+			 */
+			return false;		/* uncheckable */
 		/* Used by pre-9.0 binary upgrades */
-		else if (infomask & HEAP_MOVED_OFF ||
-				 infomask & HEAP_MOVED_IN)
+		else if (tuphdr->t_infomask & HEAP_MOVED_OFF)
 		{
-			XidCommitStatus status;
-			TransactionId xvac = HeapTupleHeaderGetXvac(tuphdr);
+			xvac = HeapTupleHeaderGetXvac(tuphdr);
 
-			switch (get_xid_status(xvac, ctx, &status))
+			switch (get_xid_status(xvac, ctx, &xvac_status))
 			{
 				case XID_INVALID:
 					report_corruption(ctx,
-									  pstrdup("old-style VACUUM FULL transaction ID is invalid"));
+									  pstrdup("old-style VACUUM FULL transaction ID for moved off tuple is invalid"));
 					return false;	/* corrupt */
 				case XID_IN_FUTURE:
 					report_corruption(ctx,
-									  psprintf("old-style VACUUM FULL transaction ID %u equals or exceeds next valid transaction ID %u:%u",
+									  psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple equals or exceeds next valid transaction ID %u:%u",
 											   xvac,
 											   EpochFromFullTransactionId(ctx->next_fxid),
 											   XidFromFullTransactionId(ctx->next_fxid)));
 					return false;	/* corrupt */
 				case XID_PRECEDES_RELMIN:
 					report_corruption(ctx,
-									  psprintf("old-style VACUUM FULL transaction ID %u precedes relation freeze threshold %u:%u",
+									  psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple precedes relation freeze threshold %u:%u",
 											   xvac,
 											   EpochFromFullTransactionId(ctx->relfrozenfxid),
 											   XidFromFullTransactionId(ctx->relfrozenfxid)));
 					return false;	/* corrupt */
-					break;
 				case XID_PRECEDES_CLUSTERMIN:
 					report_corruption(ctx,
-									  psprintf("old-style VACUUM FULL transaction ID %u precedes oldest valid transaction ID %u:%u",
+									  psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple precedes oldest valid transaction ID %u:%u",
 											   xvac,
 											   EpochFromFullTransactionId(ctx->oldest_fxid),
 											   XidFromFullTransactionId(ctx->oldest_fxid)));
 					return false;	/* corrupt */
-					break;
 				case XID_BOUNDS_OK:
-					switch (status)
-					{
-						case XID_IN_PROGRESS:
-						case XID_IS_CURRENT_XID:
-							return true;	/* HEAPTUPLE_DELETE_IN_PROGRESS */
-						case XID_COMMITTED:
-						case XID_ABORTED:
-							return false;	/* HEAPTUPLE_DEAD */
-					}
+					break;
 			}
-		}
-		else
-		{
-			XidCommitStatus status;
 
-			switch (get_xid_status(raw_xmin, ctx, &status))
+			switch (xvac_status)
 			{
-				case XID_INVALID:
-					report_corruption(ctx,
-									  pstrdup("raw xmin is invalid"));
-					return false;
-				case XID_IN_FUTURE:
+				case XID_IS_CURRENT_XID:
 					report_corruption(ctx,
-									  psprintf("raw xmin %u equals or exceeds next valid transaction ID %u:%u",
-											   raw_xmin,
-											   EpochFromFullTransactionId(ctx->next_fxid),
-											   XidFromFullTransactionId(ctx->next_fxid)));
+									  psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple matches our current transaction ID",
+											   xvac));
 					return false;	/* corrupt */
-				case XID_PRECEDES_RELMIN:
+				case XID_IN_PROGRESS:
 					report_corruption(ctx,
-									  psprintf("raw xmin %u precedes relation freeze threshold %u:%u",
-											   raw_xmin,
-											   EpochFromFullTransactionId(ctx->relfrozenfxid),
-											   XidFromFullTransactionId(ctx->relfrozenfxid)));
+									  psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple appears to be in progress",
+											   xvac));
 					return false;	/* corrupt */
-				case XID_PRECEDES_CLUSTERMIN:
-					report_corruption(ctx,
-									  psprintf("raw xmin %u precedes oldest valid transaction ID %u:%u",
-											   raw_xmin,
-											   EpochFromFullTransactionId(ctx->oldest_fxid),
-											   XidFromFullTransactionId(ctx->oldest_fxid)));
-					return false;	/* corrupt */
-				case XID_BOUNDS_OK:
-					switch (status)
-					{
-						case XID_COMMITTED:
-							break;
-						case XID_IN_PROGRESS:
-						case XID_IS_CURRENT_XID:
-							return true;	/* insert or delete in progress */
-						case XID_ABORTED:
-							return false;	/* HEAPTUPLE_DEAD */
-					}
+
+				case XID_COMMITTED:
+
+					/*
+					 * The VACUUM FULL committed, so this tuple is dead and
+					 * could be vacuumed away at any time.  It's ok to check
+					 * the tuple because we have a buffer lock for the page,
+					 * but not safe to check the toast.  We don't bother
+					 * comparing against safe_xmin because the VACUUM FULL
+					 * must have committed prior to an upgrade and can't still
+					 * be running.
+					 */
+					return true;	/* checkable */
+
+				case XID_ABORTED:
+					break;
 			}
 		}
-	}
-
-	if (!(infomask & HEAP_XMAX_INVALID) && !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
-	{
-		if (infomask & HEAP_XMAX_IS_MULTI)
+		/* Used by pre-9.0 binary upgrades */
+		else if (tuphdr->t_infomask & HEAP_MOVED_IN)
 		{
-			XidCommitStatus status;
-			TransactionId xmax = HeapTupleGetUpdateXid(tuphdr);
+			xvac = HeapTupleHeaderGetXvac(tuphdr);
 
-			switch (get_xid_status(xmax, ctx, &status))
+			switch (get_xid_status(xvac, ctx, &xvac_status))
 			{
-					/* not LOCKED_ONLY, so it has to have an xmax */
 				case XID_INVALID:
 					report_corruption(ctx,
-									  pstrdup("xmax is invalid"));
+									  pstrdup("old-style VACUUM FULL transaction ID for moved in tuple is invalid"));
 					return false;	/* corrupt */
 				case XID_IN_FUTURE:
 					report_corruption(ctx,
-									  psprintf("xmax %u equals or exceeds next valid transaction ID %u:%u",
-											   xmax,
+									  psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple equals or exceeds next valid transaction ID %u:%u",
+											   xvac,
 											   EpochFromFullTransactionId(ctx->next_fxid),
 											   XidFromFullTransactionId(ctx->next_fxid)));
 					return false;	/* corrupt */
 				case XID_PRECEDES_RELMIN:
 					report_corruption(ctx,
-									  psprintf("xmax %u precedes relation freeze threshold %u:%u",
-											   xmax,
+									  psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple precedes relation freeze threshold %u:%u",
+											   xvac,
 											   EpochFromFullTransactionId(ctx->relfrozenfxid),
 											   XidFromFullTransactionId(ctx->relfrozenfxid)));
 					return false;	/* corrupt */
 				case XID_PRECEDES_CLUSTERMIN:
 					report_corruption(ctx,
-									  psprintf("xmax %u precedes oldest valid transaction ID %u:%u",
-											   xmax,
+									  psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple precedes oldest valid transaction ID %u:%u",
+											   xvac,
 											   EpochFromFullTransactionId(ctx->oldest_fxid),
 											   XidFromFullTransactionId(ctx->oldest_fxid)));
 					return false;	/* corrupt */
 				case XID_BOUNDS_OK:
-					switch (status)
-					{
-						case XID_IN_PROGRESS:
-						case XID_IS_CURRENT_XID:
-							return true;	/* HEAPTUPLE_DELETE_IN_PROGRESS */
-						case XID_COMMITTED:
-						case XID_ABORTED:
-							return false;	/* HEAPTUPLE_RECENTLY_DEAD or
-											 * HEAPTUPLE_DEAD */
-					}
+					break;
 			}
 
-			/* Ok, the tuple is live */
+			switch (xvac_status)
+			{
+				case XID_IS_CURRENT_XID:
+					report_corruption(ctx,
+									  psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple matches our current transaction ID",
+											   xvac));
+					return false;	/* corrupt */
+				case XID_IN_PROGRESS:
+					report_corruption(ctx,
+									  psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple appears to be in progress",
+											   xvac));
+					return false;	/* corrupt */
+
+				case XID_COMMITTED:
+					break;
+
+				case XID_ABORTED:
+
+					/*
+					 * The VACUUM FULL aborted, so this tuple is dead and
+					 * could be vacuumed away at any time.  It's ok to check
+					 * the tuple because we have a buffer lock for the page,
+					 * but not safe to check the toast.
+					 */
+					return true;	/* checkable */
+			}
+		}
+		else if (xmin_status == XID_IS_CURRENT_XID)
+		{
+			/*
+			 * Don't check tuples from currently running transactions, not
+			 * even our own.
+			 */
+			return false;		/* checkable, but don't check */
+		}
+		else if (xmin_status == XID_IN_PROGRESS)
+		{
+			/* Don't check tuples from currently running transactions */
+			return false;		/* uncheckable */
+		}
+		else if (xmin_status != XID_COMMITTED)
+		{
+			/*
+			 * Inserting transaction is not in progress, and not committed, so
+			 * it either aborted or crashed. We cannot check.
+			 */
+			return false;		/* uncheckable */
 		}
-		else if (!(infomask & HEAP_XMAX_COMMITTED))
-			return true;		/* HEAPTUPLE_DELETE_IN_PROGRESS or
-								 * HEAPTUPLE_LIVE */
-		else
-			return false;		/* HEAPTUPLE_RECENTLY_DEAD or HEAPTUPLE_DEAD */
 	}
-	return true;				/* not dead */
+
+	/*
+	 * Okay, the inserter committed, so it was good at some point.  Now what
+	 * about the deleting transaction?
+	 */
+
+	if (tuphdr->t_infomask & HEAP_XMAX_IS_MULTI)
+	{
+		/*
+		 * xmax is a multixact, so it should be within valid MXID range.  We
+		 * cannot safely look up the update xid if the multixact is out of
+		 * bounds, and must stop checking this tuple.
+		 */
+		xmax = HeapTupleHeaderGetRawXmax(tuphdr);
+		switch (check_mxid_valid_in_rel(xmax, ctx))
+		{
+			case XID_INVALID:
+				report_corruption(ctx,
+								  pstrdup("multitransaction ID is invalid"));
+				return false;	/* corrupt */
+			case XID_PRECEDES_RELMIN:
+				report_corruption(ctx,
+								  psprintf("multitransaction ID %u precedes relation minimum multitransaction ID threshold %u",
+										   xmax, ctx->relminmxid));
+				return false;	/* corrupt */
+			case XID_PRECEDES_CLUSTERMIN:
+				report_corruption(ctx,
+								  psprintf("multitransaction ID %u precedes oldest valid multitransaction ID threshold %u",
+										   xmax, ctx->oldest_mxact));
+				return false;	/* corrupt */
+			case XID_IN_FUTURE:
+				report_corruption(ctx,
+								  psprintf("multitransaction ID %u equals or exceeds next valid multitransaction ID %u",
+										   xmax,
+										   ctx->next_mxact));
+				return false;	/* corrupt */
+			case XID_BOUNDS_OK:
+				break;
+		}
+	}
+
+	if (tuphdr->t_infomask & HEAP_XMAX_INVALID)
+	{
+		/*
+		 * This tuple is live.  A concurrently running transaction could
+		 * delete it before we get around to checking the toast, but any such
+		 * running transaction is surely not less than our safe_xmin, so the
+		 * toast cannot be vacuumed out from under us.
+		 */
+		ctx->tuple_is_volatile = false;
+		return true;			/* checkable */
+	}
+
+	if (HEAP_XMAX_IS_LOCKED_ONLY(tuphdr->t_infomask))
+	{
+		/*
+		 * "Deleting" xact really only locked it, so the tuple is live in any
+		 * case.  As above, a concurrently running transaction could delete
+		 * it, but it cannot be vacuumed out from under us.
+		 */
+		ctx->tuple_is_volatile = false;
+		return true;			/* checkable */
+	}
+
+	if (tuphdr->t_infomask & HEAP_XMAX_IS_MULTI)
+	{
+		/*
+		 * We already checked above that this multixact is within limits for
+		 * this table.  Now check the update xid from this multixact.
+		 */
+		xmax = HeapTupleGetUpdateXid(tuphdr);
+		switch (get_xid_status(xmax, ctx, &xmax_status))
+		{
+				/* not LOCKED_ONLY, so it has to have an xmax */
+			case XID_INVALID:
+				report_corruption(ctx,
+								  pstrdup("update xid is invalid"));
+				return false;	/* corrupt */
+			case XID_IN_FUTURE:
+				report_corruption(ctx,
+								  psprintf("update xid %u equals or exceeds next valid transaction ID %u:%u",
+										   xmax,
+										   EpochFromFullTransactionId(ctx->next_fxid),
+										   XidFromFullTransactionId(ctx->next_fxid)));
+				return false;	/* corrupt */
+			case XID_PRECEDES_RELMIN:
+				report_corruption(ctx,
+								  psprintf("update xid %u precedes relation freeze threshold %u:%u",
+										   xmax,
+										   EpochFromFullTransactionId(ctx->relfrozenfxid),
+										   XidFromFullTransactionId(ctx->relfrozenfxid)));
+				return false;	/* corrupt */
+			case XID_PRECEDES_CLUSTERMIN:
+				report_corruption(ctx,
+								  psprintf("update xid %u precedes oldest valid transaction ID %u:%u",
+										   xmax,
+										   EpochFromFullTransactionId(ctx->oldest_fxid),
+										   XidFromFullTransactionId(ctx->oldest_fxid)));
+				return false;	/* corrupt */
+			case XID_BOUNDS_OK:
+				break;
+		}
+
+		switch (xmax_status)
+		{
+			case XID_IS_CURRENT_XID:
+			case XID_IN_PROGRESS:
+
+				/*
+				 * The delete is in progress, so it cannot be visible to our
+				 * snapshot.
+				 */
+				ctx->tuple_is_volatile = false;
+				return true;	/* checkable */
+			case XID_COMMITTED:
+
+				/*
+				 * The delete committed.  Whether the toast can be vacuumed
+				 * away depends on how old the deleting transaction is.
+				 */
+				ctx->tuple_is_volatile = TransactionIdPrecedes(xmax,
+															   ctx->safe_xmin);
+				return true;	/* checkable */
+			case XID_ABORTED:
+
+				/*
+				 * The delete aborted or crashed.  The tuple is still live.
+				 */
+				ctx->tuple_is_volatile = false;
+				return true;	/* checkable */
+		}
+	}
+
+	/*
+	 * The tuple is deleted.  Whether the toast can be vacuumed away depends
+	 * on how old the deleting transaction is.
+	 */
+	xmax = HeapTupleHeaderGetRawXmax(tuphdr);
+
+	switch (get_xid_status(xmax, ctx, &xmax_status))
+	{
+		case XID_IN_FUTURE:
+			report_corruption(ctx,
+							  psprintf("xmax %u equals or exceeds next valid transaction ID %u:%u",
+									   xmax,
+									   EpochFromFullTransactionId(ctx->next_fxid),
+									   XidFromFullTransactionId(ctx->next_fxid)));
+			return false;		/* corrupt */
+		case XID_PRECEDES_RELMIN:
+			report_corruption(ctx,
+							  psprintf("xmax %u precedes relation freeze threshold %u:%u",
+									   xmax,
+									   EpochFromFullTransactionId(ctx->relfrozenfxid),
+									   XidFromFullTransactionId(ctx->relfrozenfxid)));
+			return false;		/* corrupt */
+		case XID_PRECEDES_CLUSTERMIN:
+			report_corruption(ctx,
+							  psprintf("xmax %u precedes oldest valid transaction ID %u:%u",
+									   xmax,
+									   EpochFromFullTransactionId(ctx->oldest_fxid),
+									   XidFromFullTransactionId(ctx->oldest_fxid)));
+			return false;		/* corrupt */
+		case XID_BOUNDS_OK:
+		case XID_INVALID:
+			break;
+	}
+
+	switch (xmax_status)
+	{
+		case XID_IS_CURRENT_XID:
+		case XID_IN_PROGRESS:
+
+			/*
+			 * The delete is in progress, so it cannot be visible to our
+			 * snapshot.
+			 */
+			ctx->tuple_is_volatile = false;
+			return true;		/* checkable */
+		case XID_COMMITTED:
+
+			/*
+			 * The delete committed.  Whether the toast can be vacuumed away
+			 * depends on how old the deleting transaction is.
+			 */
+			ctx->tuple_is_volatile = TransactionIdPrecedes(xmax,
+														   ctx->safe_xmin);
+			return true;		/* checkable */
+		case XID_ABORTED:
+
+			/*
+			 * The delete aborted or crashed.  The tuple is still live.
+			 */
+			ctx->tuple_is_volatile = false;
+			return true;		/* checkable */
+	}
+
+	return false;				/* not reached */
 }
 
+
 /*
  * Check the current toast tuple against the state tracked in ctx, recording
  * any corruption found in ctx->tupstore.
-- 
2.21.1 (Apple Git-122.3)

