Hi,

I've, for a while, pondered whether we couldn't find a easier way than
CSN to make snapshots cheaper as GetSnapshotData() very frequently is
one of the top profile entries. Especially on bigger servers, where the
pretty much guaranteed cachemisses are quite visibile.

My idea is based on the observation that even in very write heavy
environments the frequency of relevant PGXACT changes is noticeably
lower than GetSnapshotData() calls.

My idea is to simply cache the results of a GetSnapshotData() result in
shared memory and invalidate it everytime something happens that affects
the results. Then GetSnapshotData() can do a couple of memcpy() calls to
get the snapshot - which will be significantly faster in a large number
of cases. For one often enough there's many transactions without an xid
assigned (and thus xip/subxip are small), for another, even if that's
not the case it's linear copies instead of unpredicable random accesses
through PGXACT/PGPROC.

Now, that idea is pretty handwavy. After talking about it with a couple
of people I've decided to write a quick POC to check whether it's
actually beneficial. That POC isn't anything close to being ready or
complete. I just wanted to evaluate whether the idea has some merit or
not. That said, it survives make installcheck-parallel.

Some very preliminary performance results indicate a growth of between
25% (pgbench -cj 796 -m prepared -f 'SELECT 1'), 15% (pgbench -s 300 -S
-cj 796), 2% (pgbench -cj 96 -s 300) on a 4 x E5-4620 system. Even on my
laptop I can measure benefits in a readonly, highly concurrent,
workload; although unsurprisingly much smaller.

Now, these are all somewhat extreme workloads, but still. It's a nice
improvement for a quick POC.

So far the implemented idea is to just completely wipe the cached
snapshot everytime somebody commits. I've afterwards not been able to
see GetSnapshotData() in the profile at all - so that possibly is
actually sufficient?

This implementation probably has major holes. Like it probably ends up
not really increasing the xmin horizon when a longrunning readonly
transaction without an xid commits...

Comments about the idea?

Greetings,

Andres Freund

-- 
 Andres Freund                     http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
>From 3f800e9363909d2fcf80cb5f9b4f68579a3cb328 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Sun, 1 Feb 2015 21:04:42 +0100
Subject: [PATCH] Heavily-WIP: Cache snapshots in GetSnapshotData()

---
 src/backend/commands/cluster.c      |  1 +
 src/backend/storage/ipc/procarray.c | 67 ++++++++++++++++++++++++++++++++-----
 src/backend/storage/lmgr/proc.c     | 13 +++++++
 src/include/storage/proc.h          |  6 ++++
 4 files changed, 78 insertions(+), 9 deletions(-)

diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index dc1b37c..3def86a 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -1558,6 +1558,7 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
 			elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
 		relform = (Form_pg_class) GETSTRUCT(reltup);
 
+		Assert(TransactionIdIsNormal(frozenXid));
 		relform->relfrozenxid = frozenXid;
 		relform->relminmxid = cutoffMulti;
 
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index a1ebc72..66be489 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -421,6 +421,8 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
 								  latestXid))
 			ShmemVariableCache->latestCompletedXid = latestXid;
 
+		ProcGlobal->cached_snapshot_valid = false;
+
 		LWLockRelease(ProcArrayLock);
 	}
 	else
@@ -1403,6 +1405,8 @@ GetSnapshotData(Snapshot snapshot)
 					 errmsg("out of memory")));
 	}
 
+	snapshot->takenDuringRecovery = RecoveryInProgress();
+
 	/*
 	 * It is sufficient to get shared lock on ProcArrayLock, even if we are
 	 * going to set MyPgXact->xmin.
@@ -1417,9 +1421,32 @@ GetSnapshotData(Snapshot snapshot)
 	/* initialize xmin calculation with xmax */
 	globalxmin = xmin = xmax;
 
-	snapshot->takenDuringRecovery = RecoveryInProgress();
+	if (!snapshot->takenDuringRecovery && ProcGlobal->cached_snapshot_valid)
+	{
+		Snapshot csnap = &ProcGlobal->cached_snapshot;
+		TransactionId *saved_xip;
+		TransactionId *saved_subxip;
+
+		saved_xip = snapshot->xip;
+		saved_subxip = snapshot->subxip;
+
+		memcpy(snapshot, csnap, sizeof(SnapshotData));
+
+		snapshot->xip = saved_xip;
+		snapshot->subxip = saved_subxip;
+
+		memcpy(snapshot->xip, csnap->xip,
+			   sizeof(TransactionId) * csnap->xcnt);
+		memcpy(snapshot->subxip, csnap->subxip,
+			   sizeof(TransactionId) * csnap->subxcnt);
 
-	if (!snapshot->takenDuringRecovery)
+		globalxmin = ProcGlobal->cached_snapshot_globalxmin;
+		xmin = csnap->xmin;
+
+		Assert(TransactionIdIsValid(globalxmin));
+		Assert(TransactionIdIsValid(xmin));
+	}
+	else if (!snapshot->takenDuringRecovery)
 	{
 		int		   *pgprocnos = arrayP->pgprocnos;
 		int			numProcs;
@@ -1437,14 +1464,11 @@ GetSnapshotData(Snapshot snapshot)
 			TransactionId xid;
 
 			/*
-			 * Backend is doing logical decoding which manages xmin
-			 * separately, check below.
+			 * Ignore procs running LAZY VACUUM (which don't need to retain
+			 * rows) and backends doing logical decoding (which manages xmin
+			 * separately, check below).
 			 */
-			if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING)
-				continue;
-
-			/* Ignore procs running LAZY VACUUM */
-			if (pgxact->vacuumFlags & PROC_IN_VACUUM)
+			if (pgxact->vacuumFlags & (PROC_IN_LOGICAL_DECODING | PROC_IN_VACUUM))
 				continue;
 
 			/* Update globalxmin to be the smallest valid xmin */
@@ -1513,6 +1537,31 @@ GetSnapshotData(Snapshot snapshot)
 				}
 			}
 		}
+
+		/* upate cache */
+		{
+			Snapshot csnap = &ProcGlobal->cached_snapshot;
+			TransactionId *saved_xip;
+			TransactionId *saved_subxip;
+
+			ProcGlobal->cached_snapshot_globalxmin = globalxmin;
+
+			saved_xip = csnap->xip;
+			saved_subxip = csnap->subxip;
+			memcpy(csnap, snapshot, sizeof(SnapshotData));
+			csnap->xip = saved_xip;
+			csnap->subxip = saved_subxip;
+
+			/* not yet stored. Yuck */
+			csnap->xmin = xmin;
+
+			memcpy(csnap->xip, snapshot->xip,
+				   sizeof(TransactionId) * csnap->xcnt);
+			memcpy(csnap->subxip, snapshot->subxip,
+				   sizeof(TransactionId) * csnap->subxcnt);
+			ProcGlobal->cached_snapshot_valid = true;
+		}
+
 	}
 	else
 	{
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 65e8afe..a6ef687 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -112,6 +112,13 @@ ProcGlobalShmemSize(void)
 	size = add_size(size, mul_size(NUM_AUXILIARY_PROCS, sizeof(PGXACT)));
 	size = add_size(size, mul_size(max_prepared_xacts, sizeof(PGXACT)));
 
+	/* for the cached snapshot */
+#define PROCARRAY_MAXPROCS	(MaxBackends + max_prepared_xacts)
+	size = add_size(size, mul_size(sizeof(TransactionId), PROCARRAY_MAXPROCS));
+#define TOTAL_MAX_CACHED_SUBXIDS \
+	((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS)
+	size = add_size(size, mul_size(sizeof(TransactionId), TOTAL_MAX_CACHED_SUBXIDS));
+
 	return size;
 }
 
@@ -269,6 +276,12 @@ InitProcGlobal(void)
 	/* Create ProcStructLock spinlock, too */
 	ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t));
 	SpinLockInit(ProcStructLock);
+
+	/* cached snapshot */
+	ProcGlobal->cached_snapshot_valid = false;
+	ProcGlobal->cached_snapshot.xip = ShmemAlloc(PROCARRAY_MAXPROCS * sizeof(TransactionId));
+	ProcGlobal->cached_snapshot.subxip = ShmemAlloc(TOTAL_MAX_CACHED_SUBXIDS * sizeof(TransactionId));
+	ProcGlobal->cached_snapshot_globalxmin = InvalidTransactionId;
 }
 
 /*
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index d194f38..f483d3b 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -16,6 +16,7 @@
 
 #include "access/xlogdefs.h"
 #include "lib/ilist.h"
+#include "utils/snapshot.h"
 #include "storage/latch.h"
 #include "storage/lock.h"
 #include "storage/pg_sema.h"
@@ -206,6 +207,11 @@ typedef struct PROC_HDR
 	int			startupProcPid;
 	/* Buffer id of the buffer that Startup process waits for pin on, or -1 */
 	int			startupBufferPinWaitBufId;
+
+	/* Cached snapshot */
+	bool		cached_snapshot_valid;
+	SnapshotData cached_snapshot;
+	TransactionId cached_snapshot_globalxmin;
 } PROC_HDR;
 
 extern PROC_HDR *ProcGlobal;
-- 
2.2.1.212.gc5b9256

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to