Andrey Borodin wrote 2022-08-18 06:35:

I like the idea of one knob instead of one per each SLRU. Maybe we
even could deduce sane value from NBuffers? That would effectively
lead to 0 knobs :)

Your patch have a prefix "v22-0006", does it mean there are 5 previous
steps of the patchset?

Thank you!


Best regards, Andrey Borodin.

Not sure it's possible to deduce from NBuffers only. slru_buffers_scale_shift looks like relief valve for systems with ultra scaled NBuffers.

Regarding v22-0006 I just tried to choose index unique for this thread so now it's fixed to 0001 indexing.
Index: src/include/miscadmin.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
--- a/src/include/miscadmin.h	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/miscadmin.h	(date 1660678108730)
@@ -176,6 +176,7 @@
 extern PGDLLIMPORT int MaxConnections;
 extern PGDLLIMPORT int max_worker_processes;
 extern PGDLLIMPORT int max_parallel_workers;
+extern PGDLLIMPORT int slru_buffers_size_scale;
 
 extern PGDLLIMPORT int MyProcPid;
 extern PGDLLIMPORT pg_time_t MyStartTime;
Index: src/include/access/subtrans.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h
--- a/src/include/access/subtrans.h	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/access/subtrans.h	(date 1660678108698)
@@ -12,7 +12,7 @@
 #define SUBTRANS_H
 
 /* Number of SLRU buffers to use for subtrans */
-#define NUM_SUBTRANS_BUFFERS	32
+#define NUM_SUBTRANS_BUFFERS	(32 << slru_buffers_size_scale)
 
 extern void SubTransSetParent(TransactionId xid, TransactionId parent);
 extern TransactionId SubTransGetParent(TransactionId xid);
Index: src/backend/utils/init/globals.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
--- a/src/backend/utils/init/globals.c	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/utils/init/globals.c	(date 1660678064048)
@@ -150,3 +150,5 @@
 
 int			VacuumCostBalance = 0;	/* working state for vacuum */
 bool		VacuumCostActive = false;
+
+int			slru_buffers_size_scale = 2;	/* power 2 scale for SLRU buffers */
Index: src/backend/access/transam/slru.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
--- a/src/backend/access/transam/slru.c	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/access/transam/slru.c	(date 1660678063980)
@@ -59,6 +59,7 @@
 #include "pgstat.h"
 #include "storage/fd.h"
 #include "storage/shmem.h"
+#include "port/pg_bitutils.h"
 
 #define SlruFileName(ctl, path, seg) \
 	snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
@@ -71,6 +72,17 @@
  */
 #define MAX_WRITEALL_BUFFERS	16
 
+/*
+ * To avoid overflowing internal arithmetic and the size_t data type, the
+ * number of buffers should not exceed this number.
+ */
+#define SLRU_MAX_ALLOWED_BUFFERS ((1024 * 1024 * 1024) / BLCKSZ)
+
+/*
+ * SLRU bank size for slotno hash banks
+ */
+#define SLRU_BANK_SIZE 8
+
 typedef struct SlruWriteAllData
 {
 	int			num_files;		/* # files actually open */
@@ -134,7 +146,7 @@
 static SlruErrorCause slru_errcause;
 static int	slru_errno;
 
-
+static void SlruAdjustNSlots(int *nslots, int *bankmask);
 static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
 static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
 static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata);
@@ -148,6 +160,25 @@
 									  int segpage, void *data);
 static void SlruInternalDeleteSegment(SlruCtl ctl, int segno);
 
+/*
+ * Pick number of slots and bank size optimal for hashed associative SLRU buffers.
+ * We declare SLRU nslots is always power of 2.
+ * We split SLRU to 8-sized hash banks, after some performance benchmarks.
+ * We hash pageno to banks by pageno masked by 3 upper bits.
+ */
+static void
+SlruAdjustNSlots(int *nslots, int *bankmask)
+{
+	Assert(*nslots > 0);
+	Assert(*nslots <= SLRU_MAX_ALLOWED_BUFFERS);
+
+	*nslots = (int) pg_nextpower2_32(Max(SLRU_BANK_SIZE, Min(*nslots, NBuffers / 256)));
+
+	*bankmask = *nslots / SLRU_BANK_SIZE - 1;
+
+	elog(DEBUG5, "nslots %d banksize %d nbanks %d bankmask %x", *nslots, SLRU_BANK_SIZE, *nslots / SLRU_BANK_SIZE, *bankmask);
+}
+
 /*
  * Initialization of shared memory
  */
@@ -156,6 +187,9 @@
 SimpleLruShmemSize(int nslots, int nlsns)
 {
 	Size		sz;
+	int			bankmask_ignore;
+
+	SlruAdjustNSlots(&nslots, &bankmask_ignore);
 
 	/* we assume nslots isn't so large as to risk overflow */
 	sz = MAXALIGN(sizeof(SlruSharedData));
@@ -190,6 +224,9 @@
 {
 	SlruShared	shared;
 	bool		found;
+	int			bankmask;
+
+	SlruAdjustNSlots(&nslots, &bankmask);
 
 	shared = (SlruShared) ShmemInitStruct(name,
 										  SimpleLruShmemSize(nslots, nlsns),
@@ -257,7 +294,10 @@
 		Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
 	}
 	else
+	{
 		Assert(found);
+		Assert(shared->num_slots == nslots);
+	}
 
 	/*
 	 * Initialize the unshared control struct, including directory path. We
@@ -265,6 +305,7 @@
 	 */
 	ctl->shared = shared;
 	ctl->sync_handler = sync_handler;
+	ctl->bank_mask = bankmask;
 	strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
 }
 
@@ -496,12 +537,14 @@
 {
 	SlruShared	shared = ctl->shared;
 	int			slotno;
+	int			bankstart = (pageno & ctl->bank_mask) * SLRU_BANK_SIZE;
+	int			bankend = bankstart + SLRU_BANK_SIZE;
 
 	/* Try to find the page while holding only shared lock */
 	LWLockAcquire(shared->ControlLock, LW_SHARED);
 
 	/* See if page is already in a buffer */
-	for (slotno = 0; slotno < shared->num_slots; slotno++)
+	for (slotno = bankstart; slotno < bankend; slotno++)
 	{
 		if (shared->page_number[slotno] == pageno &&
 			shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
@@ -1030,7 +1073,10 @@
 		int			best_invalid_page_number = 0;	/* keep compiler quiet */
 
 		/* See if page already has a buffer assigned */
-		for (slotno = 0; slotno < shared->num_slots; slotno++)
+		int			bankstart = (pageno & ctl->bank_mask) * SLRU_BANK_SIZE;
+		int			bankend = bankstart + SLRU_BANK_SIZE;
+
+		for (slotno = bankstart; slotno < bankend; slotno++)
 		{
 			if (shared->page_number[slotno] == pageno &&
 				shared->page_status[slotno] != SLRU_PAGE_EMPTY)
@@ -1065,7 +1111,7 @@
 		 * multiple pages with the same lru_count.
 		 */
 		cur_count = (shared->cur_lru_count)++;
-		for (slotno = 0; slotno < shared->num_slots; slotno++)
+		for (slotno = bankstart; slotno < bankend; slotno++)
 		{
 			int			this_delta;
 			int			this_page_number;
Index: src/backend/access/transam/clog.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
--- a/src/backend/access/transam/clog.c	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/access/transam/clog.c	(date 1660677867877)
@@ -74,6 +74,8 @@
 #define GetLSNIndex(slotno, xid)	((slotno) * CLOG_LSNS_PER_PAGE + \
 	((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) / CLOG_XACTS_PER_LSN_GROUP)
 
+#define NUM_CLOG_BUFFERS 	(128 << slru_buffers_size_scale)
+
 /*
  * The number of subtransactions below which we consider to apply clog group
  * update optimization.  Testing reveals that the number higher than this can
@@ -661,42 +663,20 @@
 	return status;
 }
 
-/*
- * Number of shared CLOG buffers.
- *
- * On larger multi-processor systems, it is possible to have many CLOG page
- * requests in flight at one time which could lead to disk access for CLOG
- * page if the required page is not found in memory.  Testing revealed that we
- * can get the best performance by having 128 CLOG buffers, more than that it
- * doesn't improve performance.
- *
- * Unconditionally keeping the number of CLOG buffers to 128 did not seem like
- * a good idea, because it would increase the minimum amount of shared memory
- * required to start, which could be a problem for people running very small
- * configurations.  The following formula seems to represent a reasonable
- * compromise: people with very low values for shared_buffers will get fewer
- * CLOG buffers as well, and everyone else will get 128.
- */
-Size
-CLOGShmemBuffers(void)
-{
-	return Min(128, Max(4, NBuffers / 512));
-}
-
 /*
  * Initialization of shared memory for CLOG
  */
 Size
 CLOGShmemSize(void)
 {
-	return SimpleLruShmemSize(CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE);
+	return SimpleLruShmemSize(NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE);
 }
 
 void
 CLOGShmemInit(void)
 {
 	XactCtl->PagePrecedes = CLOGPagePrecedes;
-	SimpleLruInit(XactCtl, "Xact", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE,
+	SimpleLruInit(XactCtl, "Xact", NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE,
 				  XactSLRULock, "pg_xact", LWTRANCHE_XACT_BUFFER,
 				  SYNC_HANDLER_CLOG);
 	SlruPagePrecedesUnitTests(XactCtl, CLOG_XACTS_PER_PAGE);
Index: src/include/storage/predicate.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/storage/predicate.h b/src/include/storage/predicate.h
--- a/src/include/storage/predicate.h	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/storage/predicate.h	(date 1660678108718)
@@ -28,7 +28,7 @@
 
 
 /* Number of SLRU buffers to use for Serial SLRU */
-#define NUM_SERIAL_BUFFERS		16
+#define NUM_SERIAL_BUFFERS	(16 << slru_buffers_size_scale)
 
 /*
  * A handle used for sharing SERIALIZABLEXACT objects between the participants
Index: src/backend/utils/misc/postgresql.conf.sample
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
--- a/src/backend/utils/misc/postgresql.conf.sample	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/utils/misc/postgresql.conf.sample	(date 1660678108650)
@@ -155,6 +155,8 @@
 					#   mmap
 					# (change requires restart)
 #min_dynamic_shared_memory = 0MB	# (change requires restart)
+#slru_buffers_size_scale = 2 # SLRU buffers size scale of power 2, range 0..7
+					# (change requires restart)
 
 # - Disk -
 
Index: src/include/access/multixact.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
--- a/src/include/access/multixact.h	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/access/multixact.h	(date 1660678108678)
@@ -30,8 +30,8 @@
 #define MaxMultiXactOffset	((MultiXactOffset) 0xFFFFFFFF)
 
 /* Number of SLRU buffers to use for multixact */
-#define NUM_MULTIXACTOFFSET_BUFFERS		8
-#define NUM_MULTIXACTMEMBER_BUFFERS		16
+#define NUM_MULTIXACTOFFSET_BUFFERS		(16 << slru_buffers_size_scale)
+#define NUM_MULTIXACTMEMBER_BUFFERS		(32 << slru_buffers_size_scale)
 
 /*
  * Possible multixact lock modes ("status").  The first four modes are for
Index: src/backend/utils/misc/guc.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
--- a/src/backend/utils/misc/guc.c	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/utils/misc/guc.c	(date 1660678064216)
@@ -32,9 +32,11 @@
 #endif
 #include <unistd.h>
 
+#include "access/clog.h"
 #include "access/commit_ts.h"
 #include "access/gin.h"
 #include "access/rmgr.h"
+#include "access/slru.h"
 #include "access/tableam.h"
 #include "access/toast_compression.h"
 #include "access/transam.h"
@@ -2375,6 +2377,16 @@
 		-1, -1, INT_MAX,
 		NULL, NULL, NULL
 	},
+
+	{
+		{"slru_buffers_size_scale", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("SLRU buffers size scale of power 2"),
+			NULL
+		},
+		&slru_buffers_size_scale,
+		2, 0, 7,
+		NULL, NULL, NULL
+	},
 
 	{
 		{"temp_buffers", PGC_USERSET, RESOURCES_MEM,
Index: src/include/commands/async.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/commands/async.h b/src/include/commands/async.h
--- a/src/include/commands/async.h	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/commands/async.h	(date 1660678108702)
@@ -18,7 +18,7 @@
 /*
  * The number of SLRU page buffers we use for the notification queue.
  */
-#define NUM_NOTIFY_BUFFERS	8
+#define NUM_NOTIFY_BUFFERS	(16 << slru_buffers_size_scale)
 
 extern bool Trace_notify;
 extern volatile sig_atomic_t notifyInterruptPending;
Index: src/include/access/slru.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
--- a/src/include/access/slru.h	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/access/slru.h	(date 1660678108690)
@@ -134,6 +134,11 @@
 	 * it's always the same, it doesn't need to be in shared memory.
 	 */
 	char		Dir[64];
+
+	/*
+	 * mask for slotno hash bank
+	 */
+	Size		bank_mask;
 } SlruCtlData;
 
 typedef SlruCtlData *SlruCtl;
Index: src/backend/access/transam/subtrans.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
--- a/src/backend/access/transam/subtrans.c	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/access/transam/subtrans.c	(date 1660678064032)
@@ -31,6 +31,7 @@
 #include "access/slru.h"
 #include "access/subtrans.h"
 #include "access/transam.h"
+#include "miscadmin.h"
 #include "pg_trace.h"
 #include "utils/snapmgr.h"
 
Index: doc/src/sgml/config.sgml
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
--- a/doc/src/sgml/config.sgml	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/doc/src/sgml/config.sgml	(date 1660677867861)
@@ -1953,6 +1953,37 @@
       </listitem>
      </varlistentry>
 
+    <varlistentry id="guc-slru-buffers-size-scale" xreflabel="slru_buffers_size_scale">
+     <term><varname>slru_buffers_size_scale</varname> (<type>integer</type>)
+     <indexterm>
+      <primary><varname>slru_buffers_size_scale</varname> configuration parameter</primary>
+     </indexterm>
+     </term>
+     <listitem>
+      <para>
+       Specifies power 2 scale for all SLRU shared memory buffers sizes. Buffers sizes depends on
+       both <literal>guc_slru_buffers_size_scale</literal> and <literal>shared_buffers</literal> params.
+      </para>
+      <para>
+       This affects on buffers in the list below (see also <xref linkend="pgdata-contents-table"/>):
+        <itemizedlist>
+         <listitem><para><literal>NUM_MULTIXACTOFFSET_BUFFERS = Min(32 &lt;&lt; slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+         <listitem><para><literal>NUM_MULTIXACTMEMBER_BUFFERS = Min(64 &lt;&lt; slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+         <listitem><para><literal>NUM_SUBTRANS_BUFFERS = Min(64 &lt;&lt; slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+         <listitem><para><literal>NUM_NOTIFY_BUFFERS = Min(32 &lt;&lt; slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+         <listitem><para><literal>NUM_SERIAL_BUFFERS = Min(32 &lt;&lt; slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+         <listitem><para><literal>NUM_CLOG_BUFFERS = Min(128 &lt;&lt; slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+         <listitem><para><literal>NUM_COMMIT_TS_BUFFERS = Min(128 &lt;&lt; slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+        </itemizedlist>
+      </para>
+      <para>
+       Value is in <literal>0..7</literal> bounds.
+       The default value is <literal>2</literal>.
+       This parameter can only be set at server start.
+      </para>
+     </listitem>
+    </varlistentry>
+
      <varlistentry id="guc-max-stack-depth" xreflabel="max_stack_depth">
       <term><varname>max_stack_depth</varname> (<type>integer</type>)
       <indexterm>
Index: src/backend/access/transam/commit_ts.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
--- a/src/backend/access/transam/commit_ts.c	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/access/transam/commit_ts.c	(date 1660677867889)
@@ -73,6 +73,8 @@
 #define TransactionIdToCTsEntry(xid)	\
 	((xid) % (TransactionId) COMMIT_TS_XACTS_PER_PAGE)
 
+#define NUM_COMMIT_TS_BUFFERS	(128 << slru_buffers_size_scale)
+
 /*
  * Link to shared-memory data structures for CommitTs control
  */
@@ -508,26 +510,13 @@
 	PG_RETURN_DATUM(HeapTupleGetDatum(htup));
 }
 
-/*
- * Number of shared CommitTS buffers.
- *
- * We use a very similar logic as for the number of CLOG buffers (except we
- * scale up twice as fast with shared buffers, and the maximum is twice as
- * high); see comments in CLOGShmemBuffers.
- */
-Size
-CommitTsShmemBuffers(void)
-{
-	return Min(256, Max(4, NBuffers / 256));
-}
-
 /*
  * Shared memory sizing for CommitTs
  */
 Size
 CommitTsShmemSize(void)
 {
-	return SimpleLruShmemSize(CommitTsShmemBuffers(), 0) +
+	return SimpleLruShmemSize(NUM_COMMIT_TS_BUFFERS, 0) +
 		sizeof(CommitTimestampShared);
 }
 
@@ -541,7 +530,7 @@
 	bool		found;
 
 	CommitTsCtl->PagePrecedes = CommitTsPagePrecedes;
-	SimpleLruInit(CommitTsCtl, "CommitTs", CommitTsShmemBuffers(), 0,
+	SimpleLruInit(CommitTsCtl, "CommitTs", NUM_COMMIT_TS_BUFFERS, 0,
 				  CommitTsSLRULock, "pg_commit_ts",
 				  LWTRANCHE_COMMITTS_BUFFER,
 				  SYNC_HANDLER_COMMIT_TS);

Reply via email to