On Tue, Apr 21, 2020 at 2:05 PM Thomas Munro <thomas.mu...@gmail.com> wrote:
> As before, these two apply on top of Robert's patches (or at least his
> 0001 and 0002).

While trying to figure out if Robert's 0003 patch was correct, I added
yet another patch to this stack to test it.  0006 does basic xid map
maintenance that exercises the cases 0003 fixes, and I think it
demonstrates that they now work correctly.  Also some minor perl
improvements to 0005.  I'll attach 0001-0004 again but they are
unchanged.

Since confusion about head vs tail seems to have been at the root of
the bugs addressed by 0003, I wonder if we should also rename
head_{timestamp,offset} to oldest_{timestamp,offset}.
From 192ff883fd834371cc1f674d6a4cdfee89729cb0 Mon Sep 17 00:00:00 2001
From: Robert Haas <rh...@postgresql.org>
Date: Thu, 16 Apr 2020 09:37:31 -0400
Subject: [PATCH v5 1/6] Expose oldSnapshotControl.

---
 src/backend/utils/time/snapmgr.c | 55 +----------------------
 src/include/utils/old_snapshot.h | 75 ++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 53 deletions(-)
 create mode 100644 src/include/utils/old_snapshot.h

diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 1c063c592c..abaaea569a 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -63,6 +63,7 @@
 #include "storage/spin.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"
+#include "utils/old_snapshot.h"
 #include "utils/rel.h"
 #include "utils/resowner_private.h"
 #include "utils/snapmgr.h"
@@ -74,59 +75,7 @@
  */
 int			old_snapshot_threshold; /* number of minutes, -1 disables */
 
-/*
- * Structure for dealing with old_snapshot_threshold implementation.
- */
-typedef struct OldSnapshotControlData
-{
-	/*
-	 * Variables for old snapshot handling are shared among processes and are
-	 * only allowed to move forward.
-	 */
-	slock_t		mutex_current;	/* protect current_timestamp */
-	TimestampTz current_timestamp;	/* latest snapshot timestamp */
-	slock_t		mutex_latest_xmin;	/* protect latest_xmin and next_map_update */
-	TransactionId latest_xmin;	/* latest snapshot xmin */
-	TimestampTz next_map_update;	/* latest snapshot valid up to */
-	slock_t		mutex_threshold;	/* protect threshold fields */
-	TimestampTz threshold_timestamp;	/* earlier snapshot is old */
-	TransactionId threshold_xid;	/* earlier xid may be gone */
-
-	/*
-	 * Keep one xid per minute for old snapshot error handling.
-	 *
-	 * Use a circular buffer with a head offset, a count of entries currently
-	 * used, and a timestamp corresponding to the xid at the head offset.  A
-	 * count_used value of zero means that there are no times stored; a
-	 * count_used value of OLD_SNAPSHOT_TIME_MAP_ENTRIES means that the buffer
-	 * is full and the head must be advanced to add new entries.  Use
-	 * timestamps aligned to minute boundaries, since that seems less
-	 * surprising than aligning based on the first usage timestamp.  The
-	 * latest bucket is effectively stored within latest_xmin.  The circular
-	 * buffer is updated when we get a new xmin value that doesn't fall into
-	 * the same interval.
-	 *
-	 * It is OK if the xid for a given time slot is from earlier than
-	 * calculated by adding the number of minutes corresponding to the
-	 * (possibly wrapped) distance from the head offset to the time of the
-	 * head entry, since that just results in the vacuuming of old tuples
-	 * being slightly less aggressive.  It would not be OK for it to be off in
-	 * the other direction, since it might result in vacuuming tuples that are
-	 * still expected to be there.
-	 *
-	 * Use of an SLRU was considered but not chosen because it is more
-	 * heavyweight than is needed for this, and would probably not be any less
-	 * code to implement.
-	 *
-	 * Persistence is not needed.
-	 */
-	int			head_offset;	/* subscript of oldest tracked time */
-	TimestampTz head_timestamp; /* time corresponding to head xid */
-	int			count_used;		/* how many slots are in use */
-	TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
-} OldSnapshotControlData;
-
-static volatile OldSnapshotControlData *oldSnapshotControl;
+volatile OldSnapshotControlData *oldSnapshotControl;
 
 
 /*
diff --git a/src/include/utils/old_snapshot.h b/src/include/utils/old_snapshot.h
new file mode 100644
index 0000000000..284af7d508
--- /dev/null
+++ b/src/include/utils/old_snapshot.h
@@ -0,0 +1,75 @@
+/*-------------------------------------------------------------------------
+ *
+ * old_snapshot.h
+ *		Data structures for 'snapshot too old'
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/include/utils/old_snapshot.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef OLD_SNAPSHOT_H
+#define OLD_SNAPSHOT_H
+
+#include "datatype/timestamp.h"
+#include "storage/s_lock.h"
+
+/*
+ * Structure for dealing with old_snapshot_threshold implementation.
+ */
+typedef struct OldSnapshotControlData
+{
+	/*
+	 * Variables for old snapshot handling are shared among processes and are
+	 * only allowed to move forward.
+	 */
+	slock_t		mutex_current;	/* protect current_timestamp */
+	TimestampTz current_timestamp;	/* latest snapshot timestamp */
+	slock_t		mutex_latest_xmin;	/* protect latest_xmin and next_map_update */
+	TransactionId latest_xmin;	/* latest snapshot xmin */
+	TimestampTz next_map_update;	/* latest snapshot valid up to */
+	slock_t		mutex_threshold;	/* protect threshold fields */
+	TimestampTz threshold_timestamp;	/* earlier snapshot is old */
+	TransactionId threshold_xid;	/* earlier xid may be gone */
+
+	/*
+	 * Keep one xid per minute for old snapshot error handling.
+	 *
+	 * Use a circular buffer with a head offset, a count of entries currently
+	 * used, and a timestamp corresponding to the xid at the head offset.  A
+	 * count_used value of zero means that there are no times stored; a
+	 * count_used value of OLD_SNAPSHOT_TIME_MAP_ENTRIES means that the buffer
+	 * is full and the head must be advanced to add new entries.  Use
+	 * timestamps aligned to minute boundaries, since that seems less
+	 * surprising than aligning based on the first usage timestamp.  The
+	 * latest bucket is effectively stored within latest_xmin.  The circular
+	 * buffer is updated when we get a new xmin value that doesn't fall into
+	 * the same interval.
+	 *
+	 * It is OK if the xid for a given time slot is from earlier than
+	 * calculated by adding the number of minutes corresponding to the
+	 * (possibly wrapped) distance from the head offset to the time of the
+	 * head entry, since that just results in the vacuuming of old tuples
+	 * being slightly less aggressive.  It would not be OK for it to be off in
+	 * the other direction, since it might result in vacuuming tuples that are
+	 * still expected to be there.
+	 *
+	 * Use of an SLRU was considered but not chosen because it is more
+	 * heavyweight than is needed for this, and would probably not be any less
+	 * code to implement.
+	 *
+	 * Persistence is not needed.
+	 */
+	int			head_offset;	/* subscript of oldest tracked time */
+	TimestampTz head_timestamp; /* time corresponding to head xid */
+	int			count_used;		/* how many slots are in use */
+	TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
+} OldSnapshotControlData;
+
+extern volatile OldSnapshotControlData *oldSnapshotControl;
+
+#endif
-- 
2.20.1

From 4dd4e86de6b4435f3cc86af57c54a8862b2d5069 Mon Sep 17 00:00:00 2001
From: Robert Haas <rh...@postgresql.org>
Date: Thu, 16 Apr 2020 12:14:32 -0400
Subject: [PATCH v5 2/6] contrib/old_snapshot: time->xid mapping.

---
 contrib/Makefile                           |   1 +
 contrib/old_snapshot/Makefile              |  24 ++++
 contrib/old_snapshot/old_snapshot--1.0.sql |  14 ++
 contrib/old_snapshot/old_snapshot.control  |   5 +
 contrib/old_snapshot/time_mapping.c        | 159 +++++++++++++++++++++
 5 files changed, 203 insertions(+)
 create mode 100644 contrib/old_snapshot/Makefile
 create mode 100644 contrib/old_snapshot/old_snapshot--1.0.sql
 create mode 100644 contrib/old_snapshot/old_snapshot.control
 create mode 100644 contrib/old_snapshot/time_mapping.c

diff --git a/contrib/Makefile b/contrib/Makefile
index 1846d415b6..452ade0782 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -27,6 +27,7 @@ SUBDIRS = \
 		lo		\
 		ltree		\
 		oid2name	\
+		old_snapshot	\
 		pageinspect	\
 		passwordcheck	\
 		pg_buffercache	\
diff --git a/contrib/old_snapshot/Makefile b/contrib/old_snapshot/Makefile
new file mode 100644
index 0000000000..091231f25f
--- /dev/null
+++ b/contrib/old_snapshot/Makefile
@@ -0,0 +1,24 @@
+# contrib/old_snapshot/Makefile
+
+MODULE_big = old_snapshot
+OBJS = \
+	$(WIN32RES) \
+	time_mapping.o
+PG_CPPFLAGS = -I$(libpq_srcdir)
+
+EXTENSION = old_snapshot
+DATA = old_snapshot--1.0.sql
+PGFILEDESC = "old_snapshot - utilities in support of old_snapshot_threshold"
+
+REGRESS = old_snapshot
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/old_snapshot
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/old_snapshot/old_snapshot--1.0.sql b/contrib/old_snapshot/old_snapshot--1.0.sql
new file mode 100644
index 0000000000..9ebb8829e3
--- /dev/null
+++ b/contrib/old_snapshot/old_snapshot--1.0.sql
@@ -0,0 +1,14 @@
+/* contrib/old_snapshot/old_snapshot--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION old_snapshot" to load this file. \quit
+
+-- Show visibility map and page-level visibility information for each block.
+CREATE FUNCTION pg_old_snapshot_time_mapping(array_offset OUT int4,
+											 end_timestamp OUT timestamptz,
+											 newest_xmin OUT xid)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_old_snapshot_time_mapping'
+LANGUAGE C STRICT;
+
+-- XXX. Do we want REVOKE commands here?
diff --git a/contrib/old_snapshot/old_snapshot.control b/contrib/old_snapshot/old_snapshot.control
new file mode 100644
index 0000000000..491eec536c
--- /dev/null
+++ b/contrib/old_snapshot/old_snapshot.control
@@ -0,0 +1,5 @@
+# old_snapshot extension
+comment = 'utilities in support of old_snapshot_threshold'
+default_version = '1.0'
+module_pathname = '$libdir/old_snapshot'
+relocatable = true
diff --git a/contrib/old_snapshot/time_mapping.c b/contrib/old_snapshot/time_mapping.c
new file mode 100644
index 0000000000..37e0055a00
--- /dev/null
+++ b/contrib/old_snapshot/time_mapping.c
@@ -0,0 +1,159 @@
+/*-------------------------------------------------------------------------
+ *
+ * time_mapping.c
+ *	  time to XID mapping information
+ *
+ * Copyright (c) 2020, PostgreSQL Global Development Group
+ *
+ *	  contrib/old_snapshot/time_mapping.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "storage/lwlock.h"
+#include "utils/old_snapshot.h"
+#include "utils/snapmgr.h"
+#include "utils/timestamp.h"
+
+/*
+ * Backend-private copy of the information from oldSnapshotControl which relates
+ * to the time to XID mapping, plus an index so that we can iterate.
+ *
+ * Note that the length of the xid_by_minute array is given by
+ * OLD_SNAPSHOT_TIME_MAP_ENTRIES (which is not a compile-time constant).
+ */
+typedef struct
+{
+	int				current_index;
+	int				head_offset;
+	TimestampTz		head_timestamp;
+	int				count_used;
+	TransactionId	xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
+} OldSnapshotTimeMapping;
+
+#define NUM_TIME_MAPPING_COLUMNS 3
+
+PG_MODULE_MAGIC;
+PG_FUNCTION_INFO_V1(pg_old_snapshot_time_mapping);
+
+static OldSnapshotTimeMapping *GetOldSnapshotTimeMapping(void);
+static TupleDesc MakeOldSnapshotTimeMappingTupleDesc(void);
+static HeapTuple MakeOldSnapshotTimeMappingTuple(TupleDesc tupdesc,
+												 OldSnapshotTimeMapping *mapping);
+
+/*
+ * SQL-callable set-returning function.
+ */
+Datum
+pg_old_snapshot_time_mapping(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	OldSnapshotTimeMapping *mapping;
+
+	if (SRF_IS_FIRSTCALL())
+	{
+		MemoryContext	oldcontext;
+
+		funcctx = SRF_FIRSTCALL_INIT();
+		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+		mapping = GetOldSnapshotTimeMapping();
+		funcctx->user_fctx = mapping;
+		funcctx->tuple_desc = MakeOldSnapshotTimeMappingTupleDesc();
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	funcctx = SRF_PERCALL_SETUP();
+	mapping = (OldSnapshotTimeMapping *) funcctx->user_fctx;
+
+	while (mapping->current_index < mapping->count_used)
+	{
+		HeapTuple	tuple;
+
+		tuple = MakeOldSnapshotTimeMappingTuple(funcctx->tuple_desc, mapping);
+		++mapping->current_index;
+		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+	}
+
+	SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * Get the old snapshot time mapping data from shared memory.
+ */
+static OldSnapshotTimeMapping *
+GetOldSnapshotTimeMapping(void)
+{
+	OldSnapshotTimeMapping *mapping;
+
+	mapping = palloc(offsetof(OldSnapshotTimeMapping, xid_by_minute)
+					 + sizeof(TransactionId) * OLD_SNAPSHOT_TIME_MAP_ENTRIES);
+	mapping->current_index = 0;
+
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
+	mapping->head_offset = oldSnapshotControl->head_offset;
+	mapping->head_timestamp = oldSnapshotControl->head_timestamp;
+	mapping->count_used = oldSnapshotControl->count_used;
+	for (int i = 0; i < OLD_SNAPSHOT_TIME_MAP_ENTRIES; ++i)
+		mapping->xid_by_minute[i] = oldSnapshotControl->xid_by_minute[i];
+	LWLockRelease(OldSnapshotTimeMapLock);
+
+	return mapping;
+}
+
+/*
+ * Build a tuple descriptor for the pg_old_snapshot_time_mapping() SRF.
+ */
+static TupleDesc
+MakeOldSnapshotTimeMappingTupleDesc(void)
+{
+	TupleDesc	tupdesc;
+
+	tupdesc = CreateTemplateTupleDesc(NUM_TIME_MAPPING_COLUMNS);
+
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "array_offset",
+					   INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "end_timestamp",
+					   TIMESTAMPTZOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "newest_xmin",
+					   XIDOID, -1, 0);
+
+	return BlessTupleDesc(tupdesc);
+}
+
+/*
+ * Convert one entry from the old snapshot time mapping to a HeapTuple.
+ */
+static HeapTuple
+MakeOldSnapshotTimeMappingTuple(TupleDesc tupdesc, OldSnapshotTimeMapping *mapping)
+{
+	Datum	values[NUM_TIME_MAPPING_COLUMNS];
+	bool	nulls[NUM_TIME_MAPPING_COLUMNS];
+	int		array_position;
+	TimestampTz	timestamp;
+
+	/*
+	 * Figure out the array position corresponding to the current index.
+	 *
+	 * Index 0 means the oldest entry in the mapping, which is stored at
+	 * mapping->head_offset. Index 1 means the next-oldest entry, which is a the
+	 * following index, and so on. We wrap around when we reach the end of the array.
+	 */
+	array_position = (mapping->head_offset + mapping->current_index)
+		% OLD_SNAPSHOT_TIME_MAP_ENTRIES;
+
+	/*
+	 * No explicit timestamp is stored for any entry other than the oldest one,
+	 * but each entry corresponds to 1-minute period, so we can just add.
+	 */
+	timestamp = TimestampTzPlusMilliseconds(mapping->head_timestamp,
+											mapping->current_index * 60000);
+
+	/* Initialize nulls and values arrays. */
+	memset(nulls, 0, sizeof(nulls));
+	values[0] = Int32GetDatum(array_position);
+	values[1] = TimestampTzGetDatum(timestamp);
+	values[2] = TransactionIdGetDatum(mapping->xid_by_minute[array_position]);
+
+	return heap_form_tuple(tupdesc, values, nulls);
+}
-- 
2.20.1

From 428330154510850075cfa492340ab46d1df3042e Mon Sep 17 00:00:00 2001
From: Robert Haas <rh...@postgresql.org>
Date: Thu, 16 Apr 2020 12:15:57 -0400
Subject: [PATCH v5 3/6] Fix bugs in MaintainOldSnapshotTimeMapping.

---
 src/backend/utils/time/snapmgr.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index abaaea569a..72b2c61a07 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1926,10 +1926,32 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 	else
 	{
 		/* We need a new bucket, but it might not be the very next one. */
-		int			advance = ((ts - oldSnapshotControl->head_timestamp)
-							   / USECS_PER_MINUTE);
+		int			distance_to_new_tail;
+		int			distance_to_current_tail;
+		int			advance;
 
-		oldSnapshotControl->head_timestamp = ts;
+		/*
+		 * Our goal is for the new "tail" of the mapping, that is, the entry
+		 * which is newest and thus furthest from the "head" entry, to
+		 * correspond to "ts". Since there's one entry per minute, the
+		 * distance between the current head and the new tail is just the
+		 * number of minutes of difference between ts and the current
+		 * head_timestamp.
+		 *
+		 * The distance from the current head to the current tail is one
+		 * less than the number of entries in the mapping, because the
+		 * entry at the head_offset is for 0 minutes after head_timestamp.
+		 *
+		 * The difference between these two values is the number of minutes
+		 * by which we need to advance the mapping, either adding new entries
+		 * or rotating old ones out.
+		 */
+		distance_to_new_tail =
+			(ts - oldSnapshotControl->head_timestamp) / USECS_PER_MINUTE;
+		distance_to_current_tail =
+			oldSnapshotControl->count_used - 1;
+		advance = distance_to_new_tail - distance_to_current_tail;
+		Assert(advance > 0);
 
 		if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES)
 		{
@@ -1937,6 +1959,7 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 			oldSnapshotControl->head_offset = 0;
 			oldSnapshotControl->count_used = 1;
 			oldSnapshotControl->xid_by_minute[0] = xmin;
+			oldSnapshotControl->head_timestamp = ts;
 		}
 		else
 		{
@@ -1955,6 +1978,7 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 					else
 						oldSnapshotControl->head_offset = old_head + 1;
 					oldSnapshotControl->xid_by_minute[old_head] = xmin;
+					oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
 				}
 				else
 				{
-- 
2.20.1

From 9bb6110de36c6df7a8117df602bbb08dbc643b71 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Mon, 20 Apr 2020 16:23:02 +1200
Subject: [PATCH v5 4/6] Rewrite the "snapshot_too_old" tests.

Previously the snapshot too old feature used a special test value for
old_snapshot_threshold.  Instead, use a new approach based on explicitly
advancing the "current" timestamp used in snapshot-too-old book keeping,
so that we can control the timeline precisely without having to resort
to sleeping or special test branches in the code.

Also check that early pruning actually happens, by vacuuming and
inspecting the visibility map at key points in the test schedule.
---
 src/backend/utils/time/snapmgr.c              |  24 ---
 src/test/modules/snapshot_too_old/Makefile    |  23 +--
 .../expected/sto_using_cursor.out             |  75 ++++-----
 .../expected/sto_using_hash_index.out         |  26 ++-
 .../expected/sto_using_select.out             | 157 +++++++++++++++---
 .../specs/sto_using_cursor.spec               |  30 ++--
 .../specs/sto_using_hash_index.spec           |  19 ++-
 .../specs/sto_using_select.spec               |  50 ++++--
 src/test/modules/snapshot_too_old/sto.conf    |   2 +-
 .../snapshot_too_old/test_sto--1.0.sql        |  14 ++
 src/test/modules/snapshot_too_old/test_sto.c  |  74 +++++++++
 .../modules/snapshot_too_old/test_sto.control |   5 +
 12 files changed, 366 insertions(+), 133 deletions(-)
 create mode 100644 src/test/modules/snapshot_too_old/test_sto--1.0.sql
 create mode 100644 src/test/modules/snapshot_too_old/test_sto.c
 create mode 100644 src/test/modules/snapshot_too_old/test_sto.control

diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 72b2c61a07..19e6c52b80 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1739,26 +1739,6 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
 		update_ts = oldSnapshotControl->next_map_update;
 		SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
 
-		/*
-		 * Zero threshold always overrides to latest xmin, if valid.  Without
-		 * some heuristic it will find its own snapshot too old on, for
-		 * example, a simple UPDATE -- which would make it useless for most
-		 * testing, but there is no principled way to ensure that it doesn't
-		 * fail in this way.  Use a five-second delay to try to get useful
-		 * testing behavior, but this may need adjustment.
-		 */
-		if (old_snapshot_threshold == 0)
-		{
-			if (TransactionIdPrecedes(latest_xmin, MyPgXact->xmin)
-				&& TransactionIdFollows(latest_xmin, xlimit))
-				xlimit = latest_xmin;
-
-			ts -= 5 * USECS_PER_SEC;
-			SetOldSnapshotThresholdTimestamp(ts, xlimit);
-
-			return xlimit;
-		}
-
 		ts = AlignTimestampToMinuteBoundary(ts)
 			- (old_snapshot_threshold * USECS_PER_MINUTE);
 
@@ -1860,10 +1840,6 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 	if (!map_update_required)
 		return;
 
-	/* No further tracking needed for 0 (used for testing). */
-	if (old_snapshot_threshold == 0)
-		return;
-
 	/*
 	 * We don't want to do something stupid with unusual values, but we don't
 	 * want to litter the log with warnings or break otherwise normal
diff --git a/src/test/modules/snapshot_too_old/Makefile b/src/test/modules/snapshot_too_old/Makefile
index dfb4537f63..81836e9953 100644
--- a/src/test/modules/snapshot_too_old/Makefile
+++ b/src/test/modules/snapshot_too_old/Makefile
@@ -1,14 +1,22 @@
 # src/test/modules/snapshot_too_old/Makefile
 
-# Note: because we don't tell the Makefile there are any regression tests,
-# we have to clean those result files explicitly
-EXTRA_CLEAN = $(pg_regress_clean_files)
+MODULE_big = test_sto
+OBJS = \
+	$(WIN32RES) \
+	test_sto.o
+
+EXTENSION = test_sto
+DATA = test_sto--1.0.sql
+PGDESCFILE = "test_sto -- internal testing for snapshot too old errors"
+
+EXTRA_INSTALL = contrib/pg_visibility
 
 ISOLATION = sto_using_cursor sto_using_select sto_using_hash_index
 ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/sto.conf
 
-# Disabled because these tests require "old_snapshot_threshold" >= 0, which
-# typical installcheck users do not have (e.g. buildfarm clients).
+# Disabled because these tests require "old_snapshot_threshold" = 10, which
+# typical installcheck users do not have (e.g. buildfarm clients) and also
+# because it'd be dangerous on a production system.
 NO_INSTALLCHECK = 1
 
 ifdef USE_PGXS
@@ -21,8 +29,3 @@ top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 include $(top_srcdir)/contrib/contrib-global.mk
 endif
-
-# But it can nonetheless be very helpful to run tests on preexisting
-# installation, allow to do so, but only if requested explicitly.
-installcheck-force:
-	$(pg_isolation_regress_installcheck) $(ISOLATION)
diff --git a/src/test/modules/snapshot_too_old/expected/sto_using_cursor.out b/src/test/modules/snapshot_too_old/expected/sto_using_cursor.out
index 8cc29ec82f..b007e2dc17 100644
--- a/src/test/modules/snapshot_too_old/expected/sto_using_cursor.out
+++ b/src/test/modules/snapshot_too_old/expected/sto_using_cursor.out
@@ -1,73 +1,60 @@
-Parsed test spec with 2 sessions
+Parsed test spec with 3 sessions
 
-starting permutation: s1decl s1f1 s1sleep s1f2 s2u
+starting permutation: t00 s1decl s1f1 t10 s2u s1f2 t20 s1f3
+step t00: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
 step s1decl: DECLARE cursor1 CURSOR FOR SELECT c FROM sto1;
 step s1f1: FETCH FIRST FROM cursor1;
 c              
 
 1              
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+step t10: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:10:00Z');
+test_sto_clobber_snapshot_timestamp
 
-0                             
+               
+step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
 step s1f2: FETCH FIRST FROM cursor1;
 c              
 
 1              
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
+step t20: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:20:00Z');
+test_sto_clobber_snapshot_timestamp
 
-starting permutation: s1decl s1f1 s1sleep s2u s1f2
-step s1decl: DECLARE cursor1 CURSOR FOR SELECT c FROM sto1;
-step s1f1: FETCH FIRST FROM cursor1;
-c              
+               
+step s1f3: FETCH FIRST FROM cursor1;
+ERROR:  snapshot too old
+test_sto_reset_all_state
 
-1              
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+               
 
-0                             
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1f2: FETCH FIRST FROM cursor1;
-ERROR:  snapshot too old
+starting permutation: t00 s1decl s1f1 t10 s1f2 t20 s1f3
+step t00: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z');
+test_sto_clobber_snapshot_timestamp
 
-starting permutation: s1decl s1f1 s2u s1sleep s1f2
+               
 step s1decl: DECLARE cursor1 CURSOR FOR SELECT c FROM sto1;
 step s1f1: FETCH FIRST FROM cursor1;
 c              
 
 1              
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+step t10: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:10:00Z');
+test_sto_clobber_snapshot_timestamp
 
-0                             
+               
 step s1f2: FETCH FIRST FROM cursor1;
-ERROR:  snapshot too old
-
-starting permutation: s1decl s2u s1f1 s1sleep s1f2
-step s1decl: DECLARE cursor1 CURSOR FOR SELECT c FROM sto1;
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1f1: FETCH FIRST FROM cursor1;
 c              
 
 1              
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
-
-0                             
-step s1f2: FETCH FIRST FROM cursor1;
-ERROR:  snapshot too old
+step t20: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:20:00Z');
+test_sto_clobber_snapshot_timestamp
 
-starting permutation: s2u s1decl s1f1 s1sleep s1f2
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1decl: DECLARE cursor1 CURSOR FOR SELECT c FROM sto1;
-step s1f1: FETCH FIRST FROM cursor1;
+               
+step s1f3: FETCH FIRST FROM cursor1;
 c              
 
-2              
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+1              
+test_sto_reset_all_state
 
-0                             
-step s1f2: FETCH FIRST FROM cursor1;
-ERROR:  snapshot too old
+               
diff --git a/src/test/modules/snapshot_too_old/expected/sto_using_hash_index.out b/src/test/modules/snapshot_too_old/expected/sto_using_hash_index.out
index bf94054790..091c212adc 100644
--- a/src/test/modules/snapshot_too_old/expected/sto_using_hash_index.out
+++ b/src/test/modules/snapshot_too_old/expected/sto_using_hash_index.out
@@ -1,15 +1,31 @@
-Parsed test spec with 2 sessions
+Parsed test spec with 3 sessions
 
-starting permutation: noseq s1f1 s2sleep s2u s1f2
+starting permutation: t00 noseq s1f1 t10 s2u s2v1 s1f2 t22 s2v2 s1f3
+step t00: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
 step noseq: SET enable_seqscan = false;
 step s1f1: SELECT c FROM sto1 where c = 1000;
 c              
 
 1000           
-step s2sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+step t10: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:10:00Z');
+test_sto_clobber_snapshot_timestamp
 
-0                             
+               
 step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1000;
+step s2v1: VACUUM sto1;
 step s1f2: SELECT c FROM sto1 where c = 1001;
+c              
+
+step t22: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:22:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
+step s2v2: VACUUM sto1;
+step s1f3: SELECT c FROM sto1 where c = 1001;
 ERROR:  snapshot too old
+test_sto_reset_all_state
+
+               
diff --git a/src/test/modules/snapshot_too_old/expected/sto_using_select.out b/src/test/modules/snapshot_too_old/expected/sto_using_select.out
index eb15bc23bf..201c106754 100644
--- a/src/test/modules/snapshot_too_old/expected/sto_using_select.out
+++ b/src/test/modules/snapshot_too_old/expected/sto_using_select.out
@@ -1,55 +1,164 @@
-Parsed test spec with 2 sessions
+Parsed test spec with 3 sessions
 
-starting permutation: s1f1 s1sleep s1f2 s2u
+starting permutation: t00 s2vis1 s1f1 t01 s2u s2vis2 s1f2 t11 s2vac1 s2vis3 s1f3 t12 s1f4 s2vac2 s2vis4
+step t00: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
+step s2vis1: SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass);
+every          
+
+t              
 step s1f1: SELECT c FROM sto1 ORDER BY c LIMIT 1;
 c              
 
 1              
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+step t01: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:01:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
+step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
+step s2vis2: SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass);
+every          
 
-0                             
+f              
 step s1f2: SELECT c FROM sto1 ORDER BY c LIMIT 1;
 c              
 
 1              
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
+step t11: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:11:00Z');
+test_sto_clobber_snapshot_timestamp
 
-starting permutation: s1f1 s1sleep s2u s1f2
-step s1f1: SELECT c FROM sto1 ORDER BY c LIMIT 1;
+               
+step s2vac1: VACUUM sto1;
+step s2vis3: SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass);
+every          
+
+f              
+step s1f3: SELECT c FROM sto1 ORDER BY c LIMIT 1;
 c              
 
 1              
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+step t12: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:12:00Z');
+test_sto_clobber_snapshot_timestamp
 
-0                             
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1f2: SELECT c FROM sto1 ORDER BY c LIMIT 1;
+               
+step s1f4: SELECT c FROM sto1 ORDER BY c LIMIT 1;
 ERROR:  snapshot too old
+step s2vac2: VACUUM sto1;
+step s2vis4: SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass);
+every          
+
+t              
+test_sto_reset_all_state
+
+               
+
+starting permutation: t00 s2vis1 s1f1 t01 s2u s2vis2 s1f2 t11 s2vac1 s2vis3 s1f3 t12 s2vac2 s2vis4 s1f4
+step t00: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z');
+test_sto_clobber_snapshot_timestamp
 
-starting permutation: s1f1 s2u s1sleep s1f2
+               
+step s2vis1: SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass);
+every          
+
+t              
 step s1f1: SELECT c FROM sto1 ORDER BY c LIMIT 1;
 c              
 
 1              
+step t01: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:01:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
 step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+step s2vis2: SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass);
+every          
 
-0                             
+f              
 step s1f2: SELECT c FROM sto1 ORDER BY c LIMIT 1;
+c              
+
+1              
+step t11: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:11:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
+step s2vac1: VACUUM sto1;
+step s2vis3: SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass);
+every          
+
+f              
+step s1f3: SELECT c FROM sto1 ORDER BY c LIMIT 1;
+c              
+
+1              
+step t12: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:12:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
+step s2vac2: VACUUM sto1;
+step s2vis4: SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass);
+every          
+
+t              
+step s1f4: SELECT c FROM sto1 ORDER BY c LIMIT 1;
 ERROR:  snapshot too old
+test_sto_reset_all_state
 
-starting permutation: s2u s1f1 s1sleep s1f2
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
+               
+
+starting permutation: t00 s2vis1 s1f1 t01 s2vis2 s1f2 t11 s2vac1 s2vis3 s1f3 t12 s2vac2 s2vis4 s1f4
+step t00: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
+step s2vis1: SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass);
+every          
+
+t              
 step s1f1: SELECT c FROM sto1 ORDER BY c LIMIT 1;
 c              
 
-2              
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+1              
+step t01: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:01:00Z');
+test_sto_clobber_snapshot_timestamp
 
-0                             
+               
+step s2vis2: SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass);
+every          
+
+t              
 step s1f2: SELECT c FROM sto1 ORDER BY c LIMIT 1;
-ERROR:  snapshot too old
+c              
+
+1              
+step t11: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:11:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
+step s2vac1: VACUUM sto1;
+step s2vis3: SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass);
+every          
+
+t              
+step s1f3: SELECT c FROM sto1 ORDER BY c LIMIT 1;
+c              
+
+1              
+step t12: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:12:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
+step s2vac2: VACUUM sto1;
+step s2vis4: SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass);
+every          
+
+t              
+step s1f4: SELECT c FROM sto1 ORDER BY c LIMIT 1;
+c              
+
+1              
+test_sto_reset_all_state
+
+               
diff --git a/src/test/modules/snapshot_too_old/specs/sto_using_cursor.spec b/src/test/modules/snapshot_too_old/specs/sto_using_cursor.spec
index eac18ca5b9..3be084b8fe 100644
--- a/src/test/modules/snapshot_too_old/specs/sto_using_cursor.spec
+++ b/src/test/modules/snapshot_too_old/specs/sto_using_cursor.spec
@@ -1,19 +1,14 @@
 # This test provokes a "snapshot too old" error using a cursor.
 #
-# The sleep is needed because with a threshold of zero a statement could error
-# on changes it made.  With more normal settings no external delay is needed,
-# but we don't want these tests to run long enough to see that, since
-# granularity is in minutes.
-#
-# Since results depend on the value of old_snapshot_threshold, sneak that into
-# the line generated by the sleep, so that a surprising values isn't so hard
-# to identify.
+# Expects old_snapshot_threshold = 10.  Not suitable for installcheck since it
+# messes with internal snapmgr.c state.
 
 setup
 {
+    CREATE EXTENSION IF NOT EXISTS test_sto;
+    SELECT test_sto_reset_all_state();
     CREATE TABLE sto1 (c int NOT NULL);
     INSERT INTO sto1 SELECT generate_series(1, 1000);
-    CREATE TABLE sto2 (c int NOT NULL);
 }
 setup
 {
@@ -22,16 +17,29 @@ setup
 
 teardown
 {
-    DROP TABLE sto1, sto2;
+    DROP TABLE sto1;
+    SELECT test_sto_reset_all_state();
 }
 
 session "s1"
 setup			{ BEGIN ISOLATION LEVEL REPEATABLE READ; }
 step "s1decl"	{ DECLARE cursor1 CURSOR FOR SELECT c FROM sto1; }
 step "s1f1"		{ FETCH FIRST FROM cursor1; }
-step "s1sleep"	{ SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; }
 step "s1f2"		{ FETCH FIRST FROM cursor1; }
+step "s1f3"		{ FETCH FIRST FROM cursor1; }
 teardown		{ COMMIT; }
 
 session "s2"
 step "s2u"		{ UPDATE sto1 SET c = 1001 WHERE c = 1; }
+
+session "time"
+step "t00"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z'); }
+step "t10"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:10:00Z'); }
+step "t20"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:20:00Z'); }
+
+# if there's an update, we get a snapshot too old error at time 00:20 (not before,
+# because we need page pruning to see the xmin level change from 10 minutes earlier)
+permutation "t00" "s1decl" "s1f1" "t10" "s2u" "s1f2" "t20" "s1f3"
+
+# if there's no update, no snapshot too old error at time 00:20
+permutation "t00" "s1decl" "s1f1" "t10"       "s1f2" "t20" "s1f3"
diff --git a/src/test/modules/snapshot_too_old/specs/sto_using_hash_index.spec b/src/test/modules/snapshot_too_old/specs/sto_using_hash_index.spec
index 33d91ff852..f90bca3b7a 100644
--- a/src/test/modules/snapshot_too_old/specs/sto_using_hash_index.spec
+++ b/src/test/modules/snapshot_too_old/specs/sto_using_hash_index.spec
@@ -1,8 +1,12 @@
 # This test is like sto_using_select, except that we test access via a
-# hash index.
+# hash index.  Explicit vacuuming is required in this version because
+# there is are no incidental calls to heap_page_prune_opt() that can
+# call SetOldSnapshotThresholdTimestamp().
 
 setup
 {
+	CREATE EXTENSION IF NOT EXISTS test_sto;
+	SELECT test_sto_reset_all_state();
     CREATE TABLE sto1 (c int NOT NULL);
     INSERT INTO sto1 SELECT generate_series(1, 1000);
     CREATE INDEX idx_sto1 ON sto1 USING HASH (c);
@@ -15,6 +19,7 @@ setup
 teardown
 {
     DROP TABLE sto1;
+	SELECT test_sto_reset_all_state();
 }
 
 session "s1"
@@ -22,10 +27,18 @@ setup			{ BEGIN ISOLATION LEVEL REPEATABLE READ; }
 step "noseq"	{ SET enable_seqscan = false; }
 step "s1f1"		{ SELECT c FROM sto1 where c = 1000; }
 step "s1f2"		{ SELECT c FROM sto1 where c = 1001; }
+step "s1f3"		{ SELECT c FROM sto1 where c = 1001; }
 teardown		{ ROLLBACK; }
 
 session "s2"
-step "s2sleep"	{ SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; }
 step "s2u"		{ UPDATE sto1 SET c = 1001 WHERE c = 1000; }
+step "s2v1"		{ VACUUM sto1; }
+step "s2v2"		{ VACUUM sto1; }
 
-permutation "noseq" "s1f1" "s2sleep" "s2u" "s1f2"
+session "time"
+step "t00"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z'); }
+step "t10"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:10:00Z'); }
+step "t22"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:22:00Z'); }
+
+# snapshot too old at t22
+permutation "t00" "noseq" "s1f1" "t10" "s2u" "s2v1" "s1f2" "t22" "s2v2" "s1f3"
diff --git a/src/test/modules/snapshot_too_old/specs/sto_using_select.spec b/src/test/modules/snapshot_too_old/specs/sto_using_select.spec
index d7c34f3d89..cd7a97b742 100644
--- a/src/test/modules/snapshot_too_old/specs/sto_using_select.spec
+++ b/src/test/modules/snapshot_too_old/specs/sto_using_select.spec
@@ -1,19 +1,15 @@
 # This test provokes a "snapshot too old" error using SELECT statements.
 #
-# The sleep is needed because with a threshold of zero a statement could error
-# on changes it made.  With more normal settings no external delay is needed,
-# but we don't want these tests to run long enough to see that, since
-# granularity is in minutes.
-#
-# Since results depend on the value of old_snapshot_threshold, sneak that into
-# the line generated by the sleep, so that a surprising values isn't so hard
-# to identify.
+# Expects old_snapshot_threshold = 10.  Not suitable for installcheck since it
+# messes with internal snapmgr.c state.
 
 setup
 {
+	CREATE EXTENSION IF NOT EXISTS test_sto;
+	CREATE EXTENSION IF NOT EXISTS pg_visibility;
+	SELECT test_sto_reset_all_state();
     CREATE TABLE sto1 (c int NOT NULL);
     INSERT INTO sto1 SELECT generate_series(1, 1000);
-    CREATE TABLE sto2 (c int NOT NULL);
 }
 setup
 {
@@ -22,15 +18,47 @@ setup
 
 teardown
 {
-    DROP TABLE sto1, sto2;
+	DROP TABLE sto1;
+    SELECT test_sto_reset_all_state();
 }
 
 session "s1"
 setup			{ BEGIN ISOLATION LEVEL REPEATABLE READ; }
 step "s1f1"		{ SELECT c FROM sto1 ORDER BY c LIMIT 1; }
-step "s1sleep"	{ SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; }
 step "s1f2"		{ SELECT c FROM sto1 ORDER BY c LIMIT 1; }
+step "s1f3"		{ SELECT c FROM sto1 ORDER BY c LIMIT 1; }
+step "s1f4"		{ SELECT c FROM sto1 ORDER BY c LIMIT 1; }
 teardown		{ COMMIT; }
 
 session "s2"
+step "s2vis1"	{ SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass); }
 step "s2u"		{ UPDATE sto1 SET c = 1001 WHERE c = 1; }
+step "s2vis2"	{ SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass); }
+step "s2vac1"	{ VACUUM sto1; }
+step "s2vis3"	{ SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass); }
+step "s2vac2"	{ VACUUM sto1; }
+step "s2vis4"	{ SELECT EVERY(all_visible) FROM pg_visibility_map('sto1'::regclass); }
+
+session "time"
+step "t00"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z'); }
+step "t01"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:01:00Z'); }
+step "t11"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:11:00Z'); }
+step "t12"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:12:00Z'); }
+
+# If there's an update, we get a snapshot too old error at time 00:12, and
+# VACUUM is allowed to remove the tuple our snapshot could see, which we know
+# because we see that the relation becomes all visible.  The earlier VACUUMs
+# were unable to remove the tuple we could see, which is is obvious because we
+# can see the row with value 1, and from the relation not being all visible
+# after the VACUUM.
+permutation "t00" "s2vis1" "s1f1" "t01" "s2u" "s2vis2" "s1f2" "t11" "s2vac1" "s2vis3" "s1f3" "t12" "s1f4" "s2vac2" "s2vis4"
+
+# Almost the same schedule, but this time we'll put s2vac2 and s2vis4 before
+# s1f4 just to demonstrate that the early pruning is allowed before the error
+# aborts s1's transaction.
+permutation "t00" "s2vis1" "s1f1" "t01" "s2u" "s2vis2" "s1f2" "t11" "s2vac1" "s2vis3" "s1f3" "t12" "s2vac2" "s2vis4" "s1f4"
+
+# If we run the same schedule as above but without the update, we get no
+# snapshot too old error (even though our snapshot is older than the
+# threshold), and the relation remains all visible.
+permutation "t00" "s2vis1" "s1f1" "t01"       "s2vis2" "s1f2" "t11" "s2vac1" "s2vis3" "s1f3" "t12" "s2vac2" "s2vis4" "s1f4"
diff --git a/src/test/modules/snapshot_too_old/sto.conf b/src/test/modules/snapshot_too_old/sto.conf
index 7eeaeeb0dc..5ed46b3560 100644
--- a/src/test/modules/snapshot_too_old/sto.conf
+++ b/src/test/modules/snapshot_too_old/sto.conf
@@ -1,2 +1,2 @@
 autovacuum = off
-old_snapshot_threshold = 0
+old_snapshot_threshold = 10
diff --git a/src/test/modules/snapshot_too_old/test_sto--1.0.sql b/src/test/modules/snapshot_too_old/test_sto--1.0.sql
new file mode 100644
index 0000000000..c10afcf23a
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/test_sto--1.0.sql
@@ -0,0 +1,14 @@
+/* src/test/modules/snapshot_too_old/test_sto--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION test_sto" to load this file. \quit
+
+CREATE FUNCTION test_sto_clobber_snapshot_timestamp(now timestamptz)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'test_sto_clobber_snapshot_timestamp'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION test_sto_reset_all_state()
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'test_sto_reset_all_state'
+LANGUAGE C STRICT;
diff --git a/src/test/modules/snapshot_too_old/test_sto.c b/src/test/modules/snapshot_too_old/test_sto.c
new file mode 100644
index 0000000000..f6c9a1a000
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/test_sto.c
@@ -0,0 +1,74 @@
+/*-------------------------------------------------------------------------
+ *
+ * test_sto.c
+ *	  Functions to support isolation tests for snapshot too old.
+ *
+ * These functions are not intended for use in a production database and
+ * could cause corruption.
+ *
+ * Copyright (c) 2020, PostgreSQL Global Development Group
+ *
+ *	  src/test/modules/snapshot_too_old/test_sto.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "storage/lwlock.h"
+#include "utils/old_snapshot.h"
+#include "utils/snapmgr.h"
+#include "utils/timestamp.h"
+
+PG_MODULE_MAGIC;
+PG_FUNCTION_INFO_V1(test_sto_reset_all_state);
+PG_FUNCTION_INFO_V1(test_sto_clobber_snapshot_timestamp);
+
+/*
+ * Revert to initial state.  This is not safe except in carefully
+ * controlled tests.
+ */
+Datum
+test_sto_reset_all_state(PG_FUNCTION_ARGS)
+{
+
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
+	oldSnapshotControl->count_used = 0;
+	oldSnapshotControl->current_timestamp = 0;
+	oldSnapshotControl->head_offset = 0;
+	oldSnapshotControl->head_timestamp = 0;
+	LWLockRelease(OldSnapshotTimeMapLock);
+
+	SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
+	oldSnapshotControl->latest_xmin = InvalidTransactionId;
+	oldSnapshotControl->next_map_update = 0;
+	SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
+
+	SpinLockAcquire(&oldSnapshotControl->mutex_current);
+	oldSnapshotControl->current_timestamp = 0;
+	SpinLockRelease(&oldSnapshotControl->mutex_current);
+
+	SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
+	oldSnapshotControl->threshold_timestamp = 0;
+	oldSnapshotControl->threshold_xid = InvalidTransactionId;
+	SpinLockRelease(&oldSnapshotControl->mutex_threshold);
+
+	PG_RETURN_NULL();
+}
+
+/*
+ * Update the minimum time used in snapshot-too-old code.  If set ahead of the
+ * current wall clock time (for example, the year 3000), this allows testing
+ * with arbitrary times.  This is not safe except in carefully controlled
+ * tests.
+ */
+Datum
+test_sto_clobber_snapshot_timestamp(PG_FUNCTION_ARGS)
+{
+	TimestampTz new_current_timestamp = PG_GETARG_TIMESTAMPTZ(0);
+
+	SpinLockAcquire(&oldSnapshotControl->mutex_current);
+	oldSnapshotControl->current_timestamp = new_current_timestamp;
+	SpinLockRelease(&oldSnapshotControl->mutex_current);
+
+	PG_RETURN_NULL();
+}
diff --git a/src/test/modules/snapshot_too_old/test_sto.control b/src/test/modules/snapshot_too_old/test_sto.control
new file mode 100644
index 0000000000..e497e5450e
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/test_sto.control
@@ -0,0 +1,5 @@
+# test_sto test module
+comment = 'functions for internal testing of snapshot too old errors'
+default_version = '1.0'
+module_pathname = '$libdir/test_sto'
+relocatable = true
-- 
2.20.1

From 49f29afa0f7ee3da13326fb42cc77d00c15160d7 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Mon, 20 Apr 2020 17:05:42 +1200
Subject: [PATCH v5 5/6] Truncate snapshot-too-old time map when CLOG is
 truncated.

It's not safe to leave xids in the map that have wrapped around,
although it's probably very hard to actually reach that state.

Reported-by: Andres Freund
---
 src/backend/commands/vacuum.c                 |   3 +
 src/backend/utils/time/snapmgr.c              |  21 ++++
 src/include/utils/snapmgr.h                   |   1 +
 src/test/modules/snapshot_too_old/Makefile    |   4 +-
 .../snapshot_too_old/t/001_truncate.pl        | 100 ++++++++++++++++++
 5 files changed, 128 insertions(+), 1 deletion(-)
 create mode 100644 src/test/modules/snapshot_too_old/t/001_truncate.pl

diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 5a110edb07..37ead45fa5 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1627,6 +1627,9 @@ vac_truncate_clog(TransactionId frozenXID,
 	 */
 	AdvanceOldestCommitTsXid(frozenXID);
 
+	/* Make sure snapshot_too_old drops old XIDs. */
+	TruncateOldSnapshotTimeMapping(frozenXID);
+
 	/*
 	 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
 	 */
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 19e6c52b80..edb47c9664 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1974,6 +1974,27 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 }
 
 
+/*
+ * Remove old xids from the timing map, so the CLOG can be truncated.
+ */
+void
+TruncateOldSnapshotTimeMapping(TransactionId frozenXID)
+{
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
+	while (oldSnapshotControl->count_used > 0 &&
+		   TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[oldSnapshotControl->head_offset],
+								 frozenXID))
+	{
+		oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
+		oldSnapshotControl->head_offset =
+			(oldSnapshotControl->head_offset + 1) %
+			OLD_SNAPSHOT_TIME_MAP_ENTRIES;
+		oldSnapshotControl->count_used--;
+	}
+	LWLockRelease(OldSnapshotTimeMapLock);
+}
+
+
 /*
  * Setup a snapshot that replaces normal catalog snapshots that allows catalog
  * access to behave just like it did at a certain point in the past.
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index b28d13ce84..4f53aad956 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -135,6 +135,7 @@ extern TransactionId TransactionIdLimitedForOldSnapshots(TransactionId recentXmi
 														 Relation relation);
 extern void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken,
 										   TransactionId xmin);
+extern void TruncateOldSnapshotTimeMapping(TransactionId frozenXID);
 
 extern char *ExportSnapshot(Snapshot snapshot);
 
diff --git a/src/test/modules/snapshot_too_old/Makefile b/src/test/modules/snapshot_too_old/Makefile
index 81836e9953..f919944984 100644
--- a/src/test/modules/snapshot_too_old/Makefile
+++ b/src/test/modules/snapshot_too_old/Makefile
@@ -9,7 +9,7 @@ EXTENSION = test_sto
 DATA = test_sto--1.0.sql
 PGDESCFILE = "test_sto -- internal testing for snapshot too old errors"
 
-EXTRA_INSTALL = contrib/pg_visibility
+EXTRA_INSTALL = contrib/pg_visibility contrib/old_snapshot
 
 ISOLATION = sto_using_cursor sto_using_select sto_using_hash_index
 ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/sto.conf
@@ -19,6 +19,8 @@ ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/s
 # because it'd be dangerous on a production system.
 NO_INSTALLCHECK = 1
 
+TAP_TESTS = 1
+
 ifdef USE_PGXS
 PG_CONFIG = pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
diff --git a/src/test/modules/snapshot_too_old/t/001_truncate.pl b/src/test/modules/snapshot_too_old/t/001_truncate.pl
new file mode 100644
index 0000000000..afcca232f2
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/t/001_truncate.pl
@@ -0,0 +1,100 @@
+# Test truncation of the old snapshot time mapping, to check
+# that we can't get into trouble when xids wrap around.
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 6;
+
+my $node = get_new_node('master');
+$node->init;
+$node->append_conf("postgresql.conf", "timezone = UTC");
+$node->append_conf("postgresql.conf", "old_snapshot_threshold=10");
+$node->append_conf("postgresql.conf", "max_prepared_transactions=10");
+$node->append_conf("postgresql.conf", "autovacuum=off");
+$node->start;
+$node->psql('postgres', 'update pg_database set datallowconn = true');
+$node->psql('postgres', 'create extension old_snapshot');
+$node->psql('postgres', 'create extension test_sto');
+
+note "check time map is truncated when CLOG is";
+
+sub set_time
+{
+	my $time = shift;
+	$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('$time')");
+}
+
+sub advance_xid
+{
+	my $time = shift;
+	$node->psql('postgres', "select pg_current_xact_id()");
+}
+
+sub summarize_mapping
+{
+	my $out;
+	$node->psql('postgres',
+				"select count(*),
+						to_char(min(end_timestamp), 'HH24:MI:SS'),
+						to_char(max(end_timestamp), 'HH24:MI:SS')
+						from pg_old_snapshot_time_mapping()",
+				stdout => \$out);
+	return $out;
+}
+
+sub vacuum_freeze_all
+{
+	$node->psql('postgres', 'vacuum freeze');
+	$node->psql('template0', 'vacuum freeze');
+	$node->psql('template1', 'vacuum freeze');
+}
+
+# build up a time map with 4 entries
+set_time('3000-01-01 00:00:00Z');
+advance_xid();
+set_time('3000-01-01 00:01:00Z');
+advance_xid();
+set_time('3000-01-01 00:02:00Z');
+advance_xid();
+set_time('3000-01-01 00:03:00Z');
+advance_xid();
+is(summarize_mapping(), "4|00:00:00|00:03:00");
+
+# now cause frozen XID to advance
+vacuum_freeze_all();
+
+# we expect all XIDs to have been truncated
+is(summarize_mapping(), "0||");
+
+# put two more in the map
+set_time('3000-01-01 00:04:00Z');
+advance_xid();
+set_time('3000-01-01 00:05:00Z');
+advance_xid();
+is(summarize_mapping(), "2|00:04:00|00:05:00");
+
+# prepare a transaction, to stop xmin from getting further ahead
+$node->psql('postgres', "begin; select pg_current_xact_id(); prepare transaction 'tx1'");
+
+# add 16 more minutes; we should now have 18
+set_time('3000-01-01 00:21:00Z');
+advance_xid();
+is(summarize_mapping(), "18|00:04:00|00:21:00");
+
+# now cause frozen XID to advance
+vacuum_freeze_all();
+
+# this should leave just 16, because 2 were truncated
+is(summarize_mapping(), "16|00:06:00|00:21:00");
+
+# commit tx1, and then freeze again to get rid of all of them
+$node->psql('postgres', "commit prepared 'tx1'");
+
+# now cause frozen XID to advance
+vacuum_freeze_all();
+
+# we should now be back to empty
+is(summarize_mapping(), "0||");
+
+$node->stop;
-- 
2.20.1

From 6409467a8c3a2b7507349850906be85abaacbaac Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Tue, 21 Apr 2020 20:48:20 +1200
Subject: [PATCH v5 6/6] Add TAP test for snapshot too old time map
 maintenance.

---
 .../t/002_xid_map_maintenance.pl              | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 src/test/modules/snapshot_too_old/t/002_xid_map_maintenance.pl

diff --git a/src/test/modules/snapshot_too_old/t/002_xid_map_maintenance.pl b/src/test/modules/snapshot_too_old/t/002_xid_map_maintenance.pl
new file mode 100644
index 0000000000..eddd0ce5ae
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/t/002_xid_map_maintenance.pl
@@ -0,0 +1,63 @@
+# Test xid various time/xid map maintenance edge cases
+# that were historically buggy.
+
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 8;
+
+my $node = get_new_node('master');
+$node->init;
+$node->append_conf("postgresql.conf", "timezone = UTC");
+$node->append_conf("postgresql.conf", "old_snapshot_threshold=10");
+$node->append_conf("postgresql.conf", "autovacuum = off");
+$node->start;
+$node->psql('postgres', 'create extension test_sto');
+$node->psql('postgres', 'create extension old_snapshot');
+
+sub set_time
+{
+	my $time = shift;
+	$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('$time')");
+}
+
+sub summarize_mapping
+{
+	my $out;
+	$node->psql('postgres',
+				"select count(*),
+						to_char(min(end_timestamp), 'HH24:MI:SS'),
+						to_char(max(end_timestamp), 'HH24:MI:SS')
+						from pg_old_snapshot_time_mapping()",
+				stdout => \$out);
+	return $out;
+}
+
+# fill the map up to maximum capacity
+set_time('3000-01-01 00:00:00Z');
+set_time('3000-01-01 00:19:00Z');
+is(summarize_mapping(), "20|00:00:00|00:19:00");
+
+# make a jump larger than capacity; the mapping is blown away,
+# and our new minute is now the only one
+set_time('3000-01-01 02:00:00Z');
+is(summarize_mapping(), "1|02:00:00|02:00:00");
+
+# test adding minutes while the map is not full
+set_time('3000-01-01 02:01:00Z');
+is(summarize_mapping(), "2|02:00:00|02:01:00");
+set_time('3000-01-01 02:05:00Z');
+is(summarize_mapping(), "6|02:00:00|02:05:00");
+set_time('3000-01-01 02:19:00Z');
+is(summarize_mapping(), "20|02:00:00|02:19:00");
+
+# test adding minutes while the map is full
+set_time('3000-01-01 02:20:00Z');
+is(summarize_mapping(), "20|02:01:00|02:20:00");
+set_time('3000-01-01 02:22:00Z');
+is(summarize_mapping(), "20|02:03:00|02:22:00");
+set_time('3000-01-01 02:22:01Z'); # one second past
+is(summarize_mapping(), "20|02:04:00|02:23:00");
+
+$node->stop;
-- 
2.20.1

Reply via email to