On Fri, Apr 17, 2020 at 2:12 PM Thomas Munro <thomas.mu...@gmail.com> wrote:
> What about a contrib function that lets you clobber
> oldSnapshotControl->current_timestamp?  It looks like all times in
> this system come ultimately from GetSnapshotCurrentTimestamp(), which
> uses that variable to make sure that time never goes backwards.

Here's a draft TAP test that uses that technique successfully, as a
POC.  It should probably be extended to cover more cases, but I
thought I'd check what people thought of the concept first before
going further.  I didn't see a way to do overlapping transactions with
PostgresNode.pm, so I invented one (please excuse the bad perl); am I
missing something?  Maybe it'd be better to do 002 with an isolation
test instead, but I suppose 001 can't be in an isolation test, since
it needs to connect to multiple databases, and it seemed better to do
them both the same way.  It's also not entirely clear to me that
isolation tests can expect a database to be fresh and then mess with
dangerous internal state, whereas TAP tests set up and tear down a
cluster each time.

I think I found another bug in MaintainOldSnapshotTimeMapping(): if
you make time jump by more than old_snapshot_threshold in one go, then
the map gets cleared and then no early pruning or snapshot-too-old
errors happen.  That's why in 002_too_old.pl it currently advances
time by 10 minutes twice, instead of 20 minutes once.  To be
continued.
From b78644a0f9580934b136ca8413366de91198203f Mon Sep 17 00:00:00 2001
From: Robert Haas <rh...@postgresql.org>
Date: Thu, 16 Apr 2020 09:37:31 -0400
Subject: [PATCH v2 1/6] Expose oldSnapshotControl.

---
 src/backend/utils/time/snapmgr.c | 55 +----------------------
 src/include/utils/old_snapshot.h | 75 ++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 53 deletions(-)
 create mode 100644 src/include/utils/old_snapshot.h

diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 1c063c592c..abaaea569a 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -63,6 +63,7 @@
 #include "storage/spin.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"
+#include "utils/old_snapshot.h"
 #include "utils/rel.h"
 #include "utils/resowner_private.h"
 #include "utils/snapmgr.h"
@@ -74,59 +75,7 @@
  */
 int			old_snapshot_threshold; /* number of minutes, -1 disables */
 
-/*
- * Structure for dealing with old_snapshot_threshold implementation.
- */
-typedef struct OldSnapshotControlData
-{
-	/*
-	 * Variables for old snapshot handling are shared among processes and are
-	 * only allowed to move forward.
-	 */
-	slock_t		mutex_current;	/* protect current_timestamp */
-	TimestampTz current_timestamp;	/* latest snapshot timestamp */
-	slock_t		mutex_latest_xmin;	/* protect latest_xmin and next_map_update */
-	TransactionId latest_xmin;	/* latest snapshot xmin */
-	TimestampTz next_map_update;	/* latest snapshot valid up to */
-	slock_t		mutex_threshold;	/* protect threshold fields */
-	TimestampTz threshold_timestamp;	/* earlier snapshot is old */
-	TransactionId threshold_xid;	/* earlier xid may be gone */
-
-	/*
-	 * Keep one xid per minute for old snapshot error handling.
-	 *
-	 * Use a circular buffer with a head offset, a count of entries currently
-	 * used, and a timestamp corresponding to the xid at the head offset.  A
-	 * count_used value of zero means that there are no times stored; a
-	 * count_used value of OLD_SNAPSHOT_TIME_MAP_ENTRIES means that the buffer
-	 * is full and the head must be advanced to add new entries.  Use
-	 * timestamps aligned to minute boundaries, since that seems less
-	 * surprising than aligning based on the first usage timestamp.  The
-	 * latest bucket is effectively stored within latest_xmin.  The circular
-	 * buffer is updated when we get a new xmin value that doesn't fall into
-	 * the same interval.
-	 *
-	 * It is OK if the xid for a given time slot is from earlier than
-	 * calculated by adding the number of minutes corresponding to the
-	 * (possibly wrapped) distance from the head offset to the time of the
-	 * head entry, since that just results in the vacuuming of old tuples
-	 * being slightly less aggressive.  It would not be OK for it to be off in
-	 * the other direction, since it might result in vacuuming tuples that are
-	 * still expected to be there.
-	 *
-	 * Use of an SLRU was considered but not chosen because it is more
-	 * heavyweight than is needed for this, and would probably not be any less
-	 * code to implement.
-	 *
-	 * Persistence is not needed.
-	 */
-	int			head_offset;	/* subscript of oldest tracked time */
-	TimestampTz head_timestamp; /* time corresponding to head xid */
-	int			count_used;		/* how many slots are in use */
-	TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
-} OldSnapshotControlData;
-
-static volatile OldSnapshotControlData *oldSnapshotControl;
+volatile OldSnapshotControlData *oldSnapshotControl;
 
 
 /*
diff --git a/src/include/utils/old_snapshot.h b/src/include/utils/old_snapshot.h
new file mode 100644
index 0000000000..284af7d508
--- /dev/null
+++ b/src/include/utils/old_snapshot.h
@@ -0,0 +1,75 @@
+/*-------------------------------------------------------------------------
+ *
+ * old_snapshot.h
+ *		Data structures for 'snapshot too old'
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/include/utils/old_snapshot.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef OLD_SNAPSHOT_H
+#define OLD_SNAPSHOT_H
+
+#include "datatype/timestamp.h"
+#include "storage/s_lock.h"
+
+/*
+ * Structure for dealing with old_snapshot_threshold implementation.
+ */
+typedef struct OldSnapshotControlData
+{
+	/*
+	 * Variables for old snapshot handling are shared among processes and are
+	 * only allowed to move forward.
+	 */
+	slock_t		mutex_current;	/* protect current_timestamp */
+	TimestampTz current_timestamp;	/* latest snapshot timestamp */
+	slock_t		mutex_latest_xmin;	/* protect latest_xmin and next_map_update */
+	TransactionId latest_xmin;	/* latest snapshot xmin */
+	TimestampTz next_map_update;	/* latest snapshot valid up to */
+	slock_t		mutex_threshold;	/* protect threshold fields */
+	TimestampTz threshold_timestamp;	/* earlier snapshot is old */
+	TransactionId threshold_xid;	/* earlier xid may be gone */
+
+	/*
+	 * Keep one xid per minute for old snapshot error handling.
+	 *
+	 * Use a circular buffer with a head offset, a count of entries currently
+	 * used, and a timestamp corresponding to the xid at the head offset.  A
+	 * count_used value of zero means that there are no times stored; a
+	 * count_used value of OLD_SNAPSHOT_TIME_MAP_ENTRIES means that the buffer
+	 * is full and the head must be advanced to add new entries.  Use
+	 * timestamps aligned to minute boundaries, since that seems less
+	 * surprising than aligning based on the first usage timestamp.  The
+	 * latest bucket is effectively stored within latest_xmin.  The circular
+	 * buffer is updated when we get a new xmin value that doesn't fall into
+	 * the same interval.
+	 *
+	 * It is OK if the xid for a given time slot is from earlier than
+	 * calculated by adding the number of minutes corresponding to the
+	 * (possibly wrapped) distance from the head offset to the time of the
+	 * head entry, since that just results in the vacuuming of old tuples
+	 * being slightly less aggressive.  It would not be OK for it to be off in
+	 * the other direction, since it might result in vacuuming tuples that are
+	 * still expected to be there.
+	 *
+	 * Use of an SLRU was considered but not chosen because it is more
+	 * heavyweight than is needed for this, and would probably not be any less
+	 * code to implement.
+	 *
+	 * Persistence is not needed.
+	 */
+	int			head_offset;	/* subscript of oldest tracked time */
+	TimestampTz head_timestamp; /* time corresponding to head xid */
+	int			count_used;		/* how many slots are in use */
+	TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
+} OldSnapshotControlData;
+
+extern volatile OldSnapshotControlData *oldSnapshotControl;
+
+#endif
-- 
2.20.1

From e23d46f8482561fc2369525d5625058dad0ded5c Mon Sep 17 00:00:00 2001
From: Robert Haas <rh...@postgresql.org>
Date: Thu, 16 Apr 2020 12:14:32 -0400
Subject: [PATCH v2 2/6] contrib/old_snapshot: time->xid mapping.

---
 contrib/Makefile                           |   1 +
 contrib/old_snapshot/Makefile              |  24 ++++
 contrib/old_snapshot/old_snapshot--1.0.sql |  14 ++
 contrib/old_snapshot/old_snapshot.control  |   5 +
 contrib/old_snapshot/time_mapping.c        | 159 +++++++++++++++++++++
 5 files changed, 203 insertions(+)
 create mode 100644 contrib/old_snapshot/Makefile
 create mode 100644 contrib/old_snapshot/old_snapshot--1.0.sql
 create mode 100644 contrib/old_snapshot/old_snapshot.control
 create mode 100644 contrib/old_snapshot/time_mapping.c

diff --git a/contrib/Makefile b/contrib/Makefile
index 1846d415b6..452ade0782 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -27,6 +27,7 @@ SUBDIRS = \
 		lo		\
 		ltree		\
 		oid2name	\
+		old_snapshot	\
 		pageinspect	\
 		passwordcheck	\
 		pg_buffercache	\
diff --git a/contrib/old_snapshot/Makefile b/contrib/old_snapshot/Makefile
new file mode 100644
index 0000000000..091231f25f
--- /dev/null
+++ b/contrib/old_snapshot/Makefile
@@ -0,0 +1,24 @@
+# contrib/old_snapshot/Makefile
+
+MODULE_big = old_snapshot
+OBJS = \
+	$(WIN32RES) \
+	time_mapping.o
+PG_CPPFLAGS = -I$(libpq_srcdir)
+
+EXTENSION = old_snapshot
+DATA = old_snapshot--1.0.sql
+PGFILEDESC = "old_snapshot - utilities in support of old_snapshot_threshold"
+
+REGRESS = old_snapshot
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/old_snapshot
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/old_snapshot/old_snapshot--1.0.sql b/contrib/old_snapshot/old_snapshot--1.0.sql
new file mode 100644
index 0000000000..9ebb8829e3
--- /dev/null
+++ b/contrib/old_snapshot/old_snapshot--1.0.sql
@@ -0,0 +1,14 @@
+/* contrib/old_snapshot/old_snapshot--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION old_snapshot" to load this file. \quit
+
+-- Show visibility map and page-level visibility information for each block.
+CREATE FUNCTION pg_old_snapshot_time_mapping(array_offset OUT int4,
+											 end_timestamp OUT timestamptz,
+											 newest_xmin OUT xid)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_old_snapshot_time_mapping'
+LANGUAGE C STRICT;
+
+-- XXX. Do we want REVOKE commands here?
diff --git a/contrib/old_snapshot/old_snapshot.control b/contrib/old_snapshot/old_snapshot.control
new file mode 100644
index 0000000000..491eec536c
--- /dev/null
+++ b/contrib/old_snapshot/old_snapshot.control
@@ -0,0 +1,5 @@
+# old_snapshot extension
+comment = 'utilities in support of old_snapshot_threshold'
+default_version = '1.0'
+module_pathname = '$libdir/old_snapshot'
+relocatable = true
diff --git a/contrib/old_snapshot/time_mapping.c b/contrib/old_snapshot/time_mapping.c
new file mode 100644
index 0000000000..37e0055a00
--- /dev/null
+++ b/contrib/old_snapshot/time_mapping.c
@@ -0,0 +1,159 @@
+/*-------------------------------------------------------------------------
+ *
+ * time_mapping.c
+ *	  time to XID mapping information
+ *
+ * Copyright (c) 2020, PostgreSQL Global Development Group
+ *
+ *	  contrib/old_snapshot/time_mapping.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "storage/lwlock.h"
+#include "utils/old_snapshot.h"
+#include "utils/snapmgr.h"
+#include "utils/timestamp.h"
+
+/*
+ * Backend-private copy of the information from oldSnapshotControl which relates
+ * to the time to XID mapping, plus an index so that we can iterate.
+ *
+ * Note that the length of the xid_by_minute array is given by
+ * OLD_SNAPSHOT_TIME_MAP_ENTRIES (which is not a compile-time constant).
+ */
+typedef struct
+{
+	int				current_index;
+	int				head_offset;
+	TimestampTz		head_timestamp;
+	int				count_used;
+	TransactionId	xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
+} OldSnapshotTimeMapping;
+
+#define NUM_TIME_MAPPING_COLUMNS 3
+
+PG_MODULE_MAGIC;
+PG_FUNCTION_INFO_V1(pg_old_snapshot_time_mapping);
+
+static OldSnapshotTimeMapping *GetOldSnapshotTimeMapping(void);
+static TupleDesc MakeOldSnapshotTimeMappingTupleDesc(void);
+static HeapTuple MakeOldSnapshotTimeMappingTuple(TupleDesc tupdesc,
+												 OldSnapshotTimeMapping *mapping);
+
+/*
+ * SQL-callable set-returning function.
+ */
+Datum
+pg_old_snapshot_time_mapping(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	OldSnapshotTimeMapping *mapping;
+
+	if (SRF_IS_FIRSTCALL())
+	{
+		MemoryContext	oldcontext;
+
+		funcctx = SRF_FIRSTCALL_INIT();
+		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+		mapping = GetOldSnapshotTimeMapping();
+		funcctx->user_fctx = mapping;
+		funcctx->tuple_desc = MakeOldSnapshotTimeMappingTupleDesc();
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	funcctx = SRF_PERCALL_SETUP();
+	mapping = (OldSnapshotTimeMapping *) funcctx->user_fctx;
+
+	while (mapping->current_index < mapping->count_used)
+	{
+		HeapTuple	tuple;
+
+		tuple = MakeOldSnapshotTimeMappingTuple(funcctx->tuple_desc, mapping);
+		++mapping->current_index;
+		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+	}
+
+	SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * Get the old snapshot time mapping data from shared memory.
+ */
+static OldSnapshotTimeMapping *
+GetOldSnapshotTimeMapping(void)
+{
+	OldSnapshotTimeMapping *mapping;
+
+	mapping = palloc(offsetof(OldSnapshotTimeMapping, xid_by_minute)
+					 + sizeof(TransactionId) * OLD_SNAPSHOT_TIME_MAP_ENTRIES);
+	mapping->current_index = 0;
+
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
+	mapping->head_offset = oldSnapshotControl->head_offset;
+	mapping->head_timestamp = oldSnapshotControl->head_timestamp;
+	mapping->count_used = oldSnapshotControl->count_used;
+	for (int i = 0; i < OLD_SNAPSHOT_TIME_MAP_ENTRIES; ++i)
+		mapping->xid_by_minute[i] = oldSnapshotControl->xid_by_minute[i];
+	LWLockRelease(OldSnapshotTimeMapLock);
+
+	return mapping;
+}
+
+/*
+ * Build a tuple descriptor for the pg_old_snapshot_time_mapping() SRF.
+ */
+static TupleDesc
+MakeOldSnapshotTimeMappingTupleDesc(void)
+{
+	TupleDesc	tupdesc;
+
+	tupdesc = CreateTemplateTupleDesc(NUM_TIME_MAPPING_COLUMNS);
+
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "array_offset",
+					   INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "end_timestamp",
+					   TIMESTAMPTZOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "newest_xmin",
+					   XIDOID, -1, 0);
+
+	return BlessTupleDesc(tupdesc);
+}
+
+/*
+ * Convert one entry from the old snapshot time mapping to a HeapTuple.
+ */
+static HeapTuple
+MakeOldSnapshotTimeMappingTuple(TupleDesc tupdesc, OldSnapshotTimeMapping *mapping)
+{
+	Datum	values[NUM_TIME_MAPPING_COLUMNS];
+	bool	nulls[NUM_TIME_MAPPING_COLUMNS];
+	int		array_position;
+	TimestampTz	timestamp;
+
+	/*
+	 * Figure out the array position corresponding to the current index.
+	 *
+	 * Index 0 means the oldest entry in the mapping, which is stored at
+	 * mapping->head_offset. Index 1 means the next-oldest entry, which is a the
+	 * following index, and so on. We wrap around when we reach the end of the array.
+	 */
+	array_position = (mapping->head_offset + mapping->current_index)
+		% OLD_SNAPSHOT_TIME_MAP_ENTRIES;
+
+	/*
+	 * No explicit timestamp is stored for any entry other than the oldest one,
+	 * but each entry corresponds to 1-minute period, so we can just add.
+	 */
+	timestamp = TimestampTzPlusMilliseconds(mapping->head_timestamp,
+											mapping->current_index * 60000);
+
+	/* Initialize nulls and values arrays. */
+	memset(nulls, 0, sizeof(nulls));
+	values[0] = Int32GetDatum(array_position);
+	values[1] = TimestampTzGetDatum(timestamp);
+	values[2] = TransactionIdGetDatum(mapping->xid_by_minute[array_position]);
+
+	return heap_form_tuple(tupdesc, values, nulls);
+}
-- 
2.20.1

From 73d5a1c55b9c3f0166623d7a89b7fb1a73160c42 Mon Sep 17 00:00:00 2001
From: Robert Haas <rh...@postgresql.org>
Date: Thu, 16 Apr 2020 12:15:57 -0400
Subject: [PATCH v2 3/6] Fix bugs in MaintainOldSnapshotTimeMapping.

---
 src/backend/utils/time/snapmgr.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index abaaea569a..72b2c61a07 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1926,10 +1926,32 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 	else
 	{
 		/* We need a new bucket, but it might not be the very next one. */
-		int			advance = ((ts - oldSnapshotControl->head_timestamp)
-							   / USECS_PER_MINUTE);
+		int			distance_to_new_tail;
+		int			distance_to_current_tail;
+		int			advance;
 
-		oldSnapshotControl->head_timestamp = ts;
+		/*
+		 * Our goal is for the new "tail" of the mapping, that is, the entry
+		 * which is newest and thus furthest from the "head" entry, to
+		 * correspond to "ts". Since there's one entry per minute, the
+		 * distance between the current head and the new tail is just the
+		 * number of minutes of difference between ts and the current
+		 * head_timestamp.
+		 *
+		 * The distance from the current head to the current tail is one
+		 * less than the number of entries in the mapping, because the
+		 * entry at the head_offset is for 0 minutes after head_timestamp.
+		 *
+		 * The difference between these two values is the number of minutes
+		 * by which we need to advance the mapping, either adding new entries
+		 * or rotating old ones out.
+		 */
+		distance_to_new_tail =
+			(ts - oldSnapshotControl->head_timestamp) / USECS_PER_MINUTE;
+		distance_to_current_tail =
+			oldSnapshotControl->count_used - 1;
+		advance = distance_to_new_tail - distance_to_current_tail;
+		Assert(advance > 0);
 
 		if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES)
 		{
@@ -1937,6 +1959,7 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 			oldSnapshotControl->head_offset = 0;
 			oldSnapshotControl->count_used = 1;
 			oldSnapshotControl->xid_by_minute[0] = xmin;
+			oldSnapshotControl->head_timestamp = ts;
 		}
 		else
 		{
@@ -1955,6 +1978,7 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 					else
 						oldSnapshotControl->head_offset = old_head + 1;
 					oldSnapshotControl->xid_by_minute[old_head] = xmin;
+					oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
 				}
 				else
 				{
-- 
2.20.1

From a594fb6f551ee545f66182c7f2098c73e196416c Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Fri, 17 Apr 2020 14:10:35 +1200
Subject: [PATCH v2 4/6] Add pg_clobber_current_snapshot_timestamp().

---
 contrib/old_snapshot/old_snapshot--1.0.sql |  5 +++++
 contrib/old_snapshot/time_mapping.c        | 13 +++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/contrib/old_snapshot/old_snapshot--1.0.sql b/contrib/old_snapshot/old_snapshot--1.0.sql
index 9ebb8829e3..aacf1704b5 100644
--- a/contrib/old_snapshot/old_snapshot--1.0.sql
+++ b/contrib/old_snapshot/old_snapshot--1.0.sql
@@ -11,4 +11,9 @@ RETURNS SETOF record
 AS 'MODULE_PATHNAME', 'pg_old_snapshot_time_mapping'
 LANGUAGE C STRICT;
 
+CREATE FUNCTION pg_clobber_current_snapshot_timestamp(now timestamptz)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'pg_clobber_current_snapshot_timestamp'
+LANGUAGE C STRICT;
+
 -- XXX. Do we want REVOKE commands here?
diff --git a/contrib/old_snapshot/time_mapping.c b/contrib/old_snapshot/time_mapping.c
index 37e0055a00..8728c4ddb5 100644
--- a/contrib/old_snapshot/time_mapping.c
+++ b/contrib/old_snapshot/time_mapping.c
@@ -36,6 +36,7 @@ typedef struct
 
 PG_MODULE_MAGIC;
 PG_FUNCTION_INFO_V1(pg_old_snapshot_time_mapping);
+PG_FUNCTION_INFO_V1(pg_clobber_current_snapshot_timestamp);
 
 static OldSnapshotTimeMapping *GetOldSnapshotTimeMapping(void);
 static TupleDesc MakeOldSnapshotTimeMappingTupleDesc(void);
@@ -157,3 +158,15 @@ MakeOldSnapshotTimeMappingTuple(TupleDesc tupdesc, OldSnapshotTimeMapping *mappi
 
 	return heap_form_tuple(tupdesc, values, nulls);
 }
+
+Datum
+pg_clobber_current_snapshot_timestamp(PG_FUNCTION_ARGS)
+{
+	TimestampTz new_current_timestamp = PG_GETARG_TIMESTAMPTZ(0);
+
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
+	oldSnapshotControl->current_timestamp = new_current_timestamp;
+	LWLockRelease(OldSnapshotTimeMapLock);
+
+	PG_RETURN_NULL();
+}
-- 
2.20.1

From 81163b9592f7762d83549962ad4e50e2abb80c07 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Fri, 17 Apr 2020 15:18:49 +1200
Subject: [PATCH v2 5/6] Truncate old snapshot XIDs before truncating CLOG.

---
 contrib/old_snapshot/Makefile          |  2 +-
 contrib/old_snapshot/t/001_truncate.pl | 80 ++++++++++++++++++++++++++
 src/backend/commands/vacuum.c          |  3 +
 src/backend/utils/time/snapmgr.c       | 21 +++++++
 src/include/utils/snapmgr.h            |  1 +
 5 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 contrib/old_snapshot/t/001_truncate.pl

diff --git a/contrib/old_snapshot/Makefile b/contrib/old_snapshot/Makefile
index 091231f25f..c839d30346 100644
--- a/contrib/old_snapshot/Makefile
+++ b/contrib/old_snapshot/Makefile
@@ -10,7 +10,7 @@ EXTENSION = old_snapshot
 DATA = old_snapshot--1.0.sql
 PGFILEDESC = "old_snapshot - utilities in support of old_snapshot_threshold"
 
-REGRESS = old_snapshot
+TAP_TESTS = 1
 
 ifdef USE_PGXS
 PG_CONFIG = pg_config
diff --git a/contrib/old_snapshot/t/001_truncate.pl b/contrib/old_snapshot/t/001_truncate.pl
new file mode 100644
index 0000000000..d6c0def00f
--- /dev/null
+++ b/contrib/old_snapshot/t/001_truncate.pl
@@ -0,0 +1,80 @@
+# Test truncation of the old snapshot time mapping, to check
+# that we can't get into trouble when xids wrap around.
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 6;
+
+my $node = get_new_node('master');
+$node->init;
+$node->append_conf("postgresql.conf", "timezone = UTC");
+$node->append_conf("postgresql.conf", "old_snapshot_threshold=10");
+$node->append_conf("postgresql.conf", "max_prepared_transactions=10");
+$node->start;
+$node->psql('postgres', 'update pg_database set datallowconn = true');
+$node->psql('postgres', 'create extension old_snapshot');
+
+note "check time map is truncated when CLOG is";
+
+# build up a time map with 4 entries
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:00:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:01:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:02:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:03:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+my $count;
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 4, "expected to have 4 entries in the old snapshot time map");
+
+# now cause frozen XID to advance
+$node->psql('postgres', 'vacuum freeze');
+$node->psql('template0', 'vacuum freeze');
+$node->psql('template1', 'vacuum freeze');
+
+# we expect all XIDs to have been truncated
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 0, "expected to have 0 entries in the old snapshot time map");
+
+# put two more in the map
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:04:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:05:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 2, "expected to have 2 entries in the old snapshot time map");
+
+# prepare a transaction, to stop xmin from getting further ahead
+$node->psql('postgres', "begin; select pg_current_xact_id(); prepare transaction 'tx1'");
+
+# add 16 more minutes (this tests wrapping around the mapping array, which is of size 10 + 10)...
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:21:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 18, "expected to have 18 entries in the old snapshot time map");
+
+# now cause frozen XID to advance
+$node->psql('postgres', 'vacuum freeze');
+$node->psql('template0', 'vacuum freeze');
+$node->psql('template1', 'vacuum freeze');
+
+# this should leave just 16
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 16, "expected to have 16 entries in the old snapshot time map");
+
+# commit tx1, and then freeze again to get rid of all of them
+$node->psql('postgres', "commit prepared 'tx1'");
+
+# now cause frozen XID to advance
+$node->psql('postgres', 'vacuum freeze');
+$node->psql('template0', 'vacuum freeze');
+$node->psql('template1', 'vacuum freeze');
+
+# we should now be back to empty
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 0, "expected to have 0 entries in the old snapshot time map");
+
+$node->stop;
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 5a110edb07..37ead45fa5 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1627,6 +1627,9 @@ vac_truncate_clog(TransactionId frozenXID,
 	 */
 	AdvanceOldestCommitTsXid(frozenXID);
 
+	/* Make sure snapshot_too_old drops old XIDs. */
+	TruncateOldSnapshotTimeMapping(frozenXID);
+
 	/*
 	 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
 	 */
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 72b2c61a07..d604e69270 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1998,6 +1998,27 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 }
 
 
+/*
+ * Remove old xids from the timing map, so the CLOG can be truncated.
+ */
+void
+TruncateOldSnapshotTimeMapping(TransactionId frozenXID)
+{
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
+	while (oldSnapshotControl->count_used > 0 &&
+		   TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[oldSnapshotControl->head_offset],
+								 frozenXID))
+	{
+		oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
+		oldSnapshotControl->head_offset =
+			(oldSnapshotControl->head_offset + 1) %
+			OLD_SNAPSHOT_TIME_MAP_ENTRIES;
+		oldSnapshotControl->count_used--;
+	}
+	LWLockRelease(OldSnapshotTimeMapLock);
+}
+
+
 /*
  * Setup a snapshot that replaces normal catalog snapshots that allows catalog
  * access to behave just like it did at a certain point in the past.
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index b28d13ce84..4f53aad956 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -135,6 +135,7 @@ extern TransactionId TransactionIdLimitedForOldSnapshots(TransactionId recentXmi
 														 Relation relation);
 extern void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken,
 										   TransactionId xmin);
+extern void TruncateOldSnapshotTimeMapping(TransactionId frozenXID);
 
 extern char *ExportSnapshot(Snapshot snapshot);
 
-- 
2.20.1

From e2ab8832fa476b14b44f7452985b1b553bd44b6a Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Sat, 18 Apr 2020 16:16:06 +1200
Subject: [PATCH v2 6/6] Add TAP test for snapshot-too-old.

---
 contrib/old_snapshot/t/002_too_old.pl |  75 ++++++++++++++++
 src/test/perl/PsqlSession.pm          | 122 ++++++++++++++++++++++++++
 2 files changed, 197 insertions(+)
 create mode 100644 contrib/old_snapshot/t/002_too_old.pl
 create mode 100644 src/test/perl/PsqlSession.pm

diff --git a/contrib/old_snapshot/t/002_too_old.pl b/contrib/old_snapshot/t/002_too_old.pl
new file mode 100644
index 0000000000..097a464763
--- /dev/null
+++ b/contrib/old_snapshot/t/002_too_old.pl
@@ -0,0 +1,75 @@
+# Simple test of early pruning and snapshot-too-old errors.
+use strict;
+use warnings;
+use PostgresNode;
+use PsqlSession;
+use TestLib;
+use Test::More tests => 5;
+
+my $node = get_new_node('master');
+$node->init;
+$node->append_conf("postgresql.conf", "timezone = UTC");
+$node->append_conf("postgresql.conf", "old_snapshot_threshold=10");
+$node->append_conf("postgresql.conf", "max_prepared_transactions=10");
+$node->start;
+$node->psql('postgres', 'create extension old_snapshot');
+$node->psql('postgres', 'create table t (i int)');
+$node->psql('postgres', 'insert into t select generate_series(1, 42)');
+
+# start an interactive session that we can use to interleave statements
+my $session = PsqlSession->new($node, "postgres");
+$session->send("\\set PROMPT1 ''\n", 2);
+$session->send("\\set PROMPT2 ''\n", 1);
+
+my @lines;
+my $command_tag;
+my $result;
+
+# begin a session that we can interleave with vacuum activity
+@lines = $session->send("begin transaction isolation level repeatable read;\n", 2);
+shift @lines;
+$command_tag = shift @lines;
+is($command_tag, "BEGIN");
+
+# take a snapshot at time 0
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:00:00Z')");
+@lines = $session->send("select * from t order by i limit 1;\n", 2);
+shift @lines;
+$result = shift @lines;
+is($result, "1");
+
+# advance time by 10 minutes, then UPDATE and VACUUM the table
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:10:00Z')");
+$node->psql('postgres', "update t set i = 1001 where i = 1");
+$node->psql('postgres', "vacuum analyze t");
+
+# our snapshot is not too old yet, so we can still use it
+@lines = $session->send("select * from t order by i limit 1;\n", 2);
+shift @lines;
+$result = shift @lines;
+is($result, "1");
+
+# advance time by 10 more minutes, then UPDATE and VACUUM the table
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:20:00Z')");
+$node->psql('postgres', "update t set i = 1001 where i = 1");
+$node->psql('postgres', "vacuum analyze t");
+
+# our snapshot is not too old yet, so we can still use it
+@lines = $session->send("select * from t order by i limit 1;\n", 2);
+shift @lines;
+$result = shift @lines;
+is($result, "1");
+
+# advance time by just one more minute, then UPDATE and VACUUM the table
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:21:00Z')");
+$node->psql('postgres', "update t set i = 1002 where i = 1");
+$node->psql('postgres', "vacuum analyze t");
+
+# our snapshot is too old!  the thing it wants to see has been removed
+@lines = $session->send("select * from t order by i limit 1;\n", 2);
+shift @lines;
+$result = shift @lines;
+is($result, "ERROR:  snapshot too old");
+
+$session->close;
+$node->stop;
diff --git a/src/test/perl/PsqlSession.pm b/src/test/perl/PsqlSession.pm
new file mode 100644
index 0000000000..b34dcca502
--- /dev/null
+++ b/src/test/perl/PsqlSession.pm
@@ -0,0 +1,122 @@
+=pod
+
+=head1 NAME
+
+PsqlSession - class representing psql connection
+
+=head1 SYNOPSIS
+
+  use PsqlSession;
+
+  my $node = PostgresNode->get_new_node('mynode');
+  my $session = PsqlSession->new($node, "dbname");
+
+  # send simple query and wait for one line response
+  my $result = $session->send("SELECT 42;", 1);
+
+  # close connection
+  $session->close();
+
+=head1 DESCRIPTION
+
+PsqlSession allows for tests of interleaved operations, similar to
+isolation tests.
+
+=cut
+
+package PsqlSession;
+
+use strict;
+use warnings;
+
+use PostgresNode;
+use TestLib;
+use IPC::Run qw(pump finish timer);
+
+our @EXPORT = qw(
+  new
+  send
+  close
+);
+
+=pod
+
+=head1 METHODS
+
+=over
+
+=item PsqlSession::new($class, $node, $dbname)
+
+Create a new PsqlSession instance, connected to a database.
+
+=cut
+
+sub new
+{
+	my ($class, $node, $dbname) = @_;
+	my $timer = timer(5);
+	my $stdin = '';
+	my $stdout = '';
+	my $harness = $node->interactive_psql($dbname, \$stdin, \$stdout, $timer);
+	my $self = {
+		_harness => $harness,
+		_stdin => \$stdin,
+		_stdout => \$stdout,
+		_timer => $timer
+	};
+	bless $self, $class;
+	return $self;
+}
+
+=pod
+
+=item $session->send($input, $lines)
+
+Send the given input to psql, and then wait for the given number of lines
+of output, or a timeout.
+
+=cut
+
+sub count_lines
+{
+	my ($s) = @_;
+	return $s =~ tr/\n//;
+}
+
+sub send
+{
+	my ($self, $statement, $lines) = @_;
+	${$self->{_stdout}} = '';
+	${$self->{_stdin}} .= $statement;
+	$self->{_timer}->start(5);
+	pump $self->{_harness} until count_lines(${$self->{_stdout}}) == $lines || $self->{_timer}->is_expired;
+	die "expected ${lines} lines but after timeout, received only: ${$self->{_stdout}}" if $self->{_timer}->is_expired;
+	my @result = split /\n/, ${$self->{_stdout}};
+	chop(@result);
+	return @result;
+}
+
+=pod
+
+=item $session->close()
+
+Close a PsqlSession connection.
+
+=cut
+
+sub close
+{
+	my ($self) = @_;
+	$self->{_timer}->start(5);
+	${$self->{_stdin}} .= "\\q\n";
+	finish $self->{_harness} or die "psql returned $?";
+	$self->{_timer}->reset;
+}
+
+=pod
+
+=back
+
+=cut
+
+1;
-- 
2.20.1

Reply via email to