Andrey Borodin wrote 2022-08-18 06:35:
I like the idea of one knob instead of one per each SLRU. Maybe we
even could deduce sane value from NBuffers? That would effectively
lead to 0 knobs :)
Your patch have a prefix "v22-0006", does it mean there are 5 previous
steps of the patchset?
Thank you!
Best regards, Andrey Borodin.
Not sure it's possible to deduce from NBuffers only.
slru_buffers_scale_shift looks like relief valve for systems with ultra
scaled NBuffers.
Regarding v22-0006 I just tried to choose index unique for this thread
so now it's fixed to 0001 indexing.Index: src/include/miscadmin.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
--- a/src/include/miscadmin.h (revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/miscadmin.h (date 1660678108730)
@@ -176,6 +176,7 @@
extern PGDLLIMPORT int MaxConnections;
extern PGDLLIMPORT int max_worker_processes;
extern PGDLLIMPORT int max_parallel_workers;
+extern PGDLLIMPORT int slru_buffers_size_scale;
extern PGDLLIMPORT int MyProcPid;
extern PGDLLIMPORT pg_time_t MyStartTime;
Index: src/include/access/subtrans.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h
--- a/src/include/access/subtrans.h (revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/access/subtrans.h (date 1660678108698)
@@ -12,7 +12,7 @@
#define SUBTRANS_H
/* Number of SLRU buffers to use for subtrans */
-#define NUM_SUBTRANS_BUFFERS 32
+#define NUM_SUBTRANS_BUFFERS (32 << slru_buffers_size_scale)
extern void SubTransSetParent(TransactionId xid, TransactionId parent);
extern TransactionId SubTransGetParent(TransactionId xid);
Index: src/backend/utils/init/globals.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
--- a/src/backend/utils/init/globals.c (revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/utils/init/globals.c (date 1660678064048)
@@ -150,3 +150,5 @@
int VacuumCostBalance = 0; /* working state for vacuum */
bool VacuumCostActive = false;
+
+int slru_buffers_size_scale = 2; /* power 2 scale for SLRU buffers */
Index: src/backend/access/transam/slru.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
--- a/src/backend/access/transam/slru.c (revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/access/transam/slru.c (date 1660678063980)
@@ -59,6 +59,7 @@
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/shmem.h"
+#include "port/pg_bitutils.h"
#define SlruFileName(ctl, path, seg) \
snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
@@ -71,6 +72,17 @@
*/
#define MAX_WRITEALL_BUFFERS 16
+/*
+ * To avoid overflowing internal arithmetic and the size_t data type, the
+ * number of buffers should not exceed this number.
+ */
+#define SLRU_MAX_ALLOWED_BUFFERS ((1024 * 1024 * 1024) / BLCKSZ)
+
+/*
+ * SLRU bank size for slotno hash banks
+ */
+#define SLRU_BANK_SIZE 8
+
typedef struct SlruWriteAllData
{
int num_files; /* # files actually open */
@@ -134,7 +146,7 @@
static SlruErrorCause slru_errcause;
static int slru_errno;
-
+static void SlruAdjustNSlots(int *nslots, int *bankmask);
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata);
@@ -148,6 +160,25 @@
int segpage, void *data);
static void SlruInternalDeleteSegment(SlruCtl ctl, int segno);
+/*
+ * Pick number of slots and bank size optimal for hashed associative SLRU buffers.
+ * We declare SLRU nslots is always power of 2.
+ * We split SLRU to 8-sized hash banks, after some performance benchmarks.
+ * We hash pageno to banks by pageno masked by 3 upper bits.
+ */
+static void
+SlruAdjustNSlots(int *nslots, int *bankmask)
+{
+ Assert(*nslots > 0);
+ Assert(*nslots <= SLRU_MAX_ALLOWED_BUFFERS);
+
+ *nslots = (int) pg_nextpower2_32(Max(SLRU_BANK_SIZE, Min(*nslots, NBuffers / 256)));
+
+ *bankmask = *nslots / SLRU_BANK_SIZE - 1;
+
+ elog(DEBUG5, "nslots %d banksize %d nbanks %d bankmask %x", *nslots, SLRU_BANK_SIZE, *nslots / SLRU_BANK_SIZE, *bankmask);
+}
+
/*
* Initialization of shared memory
*/
@@ -156,6 +187,9 @@
SimpleLruShmemSize(int nslots, int nlsns)
{
Size sz;
+ int bankmask_ignore;
+
+ SlruAdjustNSlots(&nslots, &bankmask_ignore);
/* we assume nslots isn't so large as to risk overflow */
sz = MAXALIGN(sizeof(SlruSharedData));
@@ -190,6 +224,9 @@
{
SlruShared shared;
bool found;
+ int bankmask;
+
+ SlruAdjustNSlots(&nslots, &bankmask);
shared = (SlruShared) ShmemInitStruct(name,
SimpleLruShmemSize(nslots, nlsns),
@@ -257,7 +294,10 @@
Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
}
else
+ {
Assert(found);
+ Assert(shared->num_slots == nslots);
+ }
/*
* Initialize the unshared control struct, including directory path. We
@@ -265,6 +305,7 @@
*/
ctl->shared = shared;
ctl->sync_handler = sync_handler;
+ ctl->bank_mask = bankmask;
strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
}
@@ -496,12 +537,14 @@
{
SlruShared shared = ctl->shared;
int slotno;
+ int bankstart = (pageno & ctl->bank_mask) * SLRU_BANK_SIZE;
+ int bankend = bankstart + SLRU_BANK_SIZE;
/* Try to find the page while holding only shared lock */
LWLockAcquire(shared->ControlLock, LW_SHARED);
/* See if page is already in a buffer */
- for (slotno = 0; slotno < shared->num_slots; slotno++)
+ for (slotno = bankstart; slotno < bankend; slotno++)
{
if (shared->page_number[slotno] == pageno &&
shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
@@ -1030,7 +1073,10 @@
int best_invalid_page_number = 0; /* keep compiler quiet */
/* See if page already has a buffer assigned */
- for (slotno = 0; slotno < shared->num_slots; slotno++)
+ int bankstart = (pageno & ctl->bank_mask) * SLRU_BANK_SIZE;
+ int bankend = bankstart + SLRU_BANK_SIZE;
+
+ for (slotno = bankstart; slotno < bankend; slotno++)
{
if (shared->page_number[slotno] == pageno &&
shared->page_status[slotno] != SLRU_PAGE_EMPTY)
@@ -1065,7 +1111,7 @@
* multiple pages with the same lru_count.
*/
cur_count = (shared->cur_lru_count)++;
- for (slotno = 0; slotno < shared->num_slots; slotno++)
+ for (slotno = bankstart; slotno < bankend; slotno++)
{
int this_delta;
int this_page_number;
Index: src/backend/access/transam/clog.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
--- a/src/backend/access/transam/clog.c (revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/access/transam/clog.c (date 1660677867877)
@@ -74,6 +74,8 @@
#define GetLSNIndex(slotno, xid) ((slotno) * CLOG_LSNS_PER_PAGE + \
((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) / CLOG_XACTS_PER_LSN_GROUP)
+#define NUM_CLOG_BUFFERS (128 << slru_buffers_size_scale)
+
/*
* The number of subtransactions below which we consider to apply clog group
* update optimization. Testing reveals that the number higher than this can
@@ -661,42 +663,20 @@
return status;
}
-/*
- * Number of shared CLOG buffers.
- *
- * On larger multi-processor systems, it is possible to have many CLOG page
- * requests in flight at one time which could lead to disk access for CLOG
- * page if the required page is not found in memory. Testing revealed that we
- * can get the best performance by having 128 CLOG buffers, more than that it
- * doesn't improve performance.
- *
- * Unconditionally keeping the number of CLOG buffers to 128 did not seem like
- * a good idea, because it would increase the minimum amount of shared memory
- * required to start, which could be a problem for people running very small
- * configurations. The following formula seems to represent a reasonable
- * compromise: people with very low values for shared_buffers will get fewer
- * CLOG buffers as well, and everyone else will get 128.
- */
-Size
-CLOGShmemBuffers(void)
-{
- return Min(128, Max(4, NBuffers / 512));
-}
-
/*
* Initialization of shared memory for CLOG
*/
Size
CLOGShmemSize(void)
{
- return SimpleLruShmemSize(CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE);
+ return SimpleLruShmemSize(NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE);
}
void
CLOGShmemInit(void)
{
XactCtl->PagePrecedes = CLOGPagePrecedes;
- SimpleLruInit(XactCtl, "Xact", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE,
+ SimpleLruInit(XactCtl, "Xact", NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE,
XactSLRULock, "pg_xact", LWTRANCHE_XACT_BUFFER,
SYNC_HANDLER_CLOG);
SlruPagePrecedesUnitTests(XactCtl, CLOG_XACTS_PER_PAGE);
Index: src/include/storage/predicate.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/storage/predicate.h b/src/include/storage/predicate.h
--- a/src/include/storage/predicate.h (revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/storage/predicate.h (date 1660678108718)
@@ -28,7 +28,7 @@
/* Number of SLRU buffers to use for Serial SLRU */
-#define NUM_SERIAL_BUFFERS 16
+#define NUM_SERIAL_BUFFERS (16 << slru_buffers_size_scale)
/*
* A handle used for sharing SERIALIZABLEXACT objects between the participants
Index: src/backend/utils/misc/postgresql.conf.sample
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
--- a/src/backend/utils/misc/postgresql.conf.sample (revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/utils/misc/postgresql.conf.sample (date 1660678108650)
@@ -155,6 +155,8 @@
# mmap
# (change requires restart)
#min_dynamic_shared_memory = 0MB # (change requires restart)
+#slru_buffers_size_scale = 2 # SLRU buffers size scale of power 2, range 0..7
+ # (change requires restart)
# - Disk -
Index: src/include/access/multixact.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
--- a/src/include/access/multixact.h (revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/access/multixact.h (date 1660678108678)
@@ -30,8 +30,8 @@
#define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF)
/* Number of SLRU buffers to use for multixact */
-#define NUM_MULTIXACTOFFSET_BUFFERS 8
-#define NUM_MULTIXACTMEMBER_BUFFERS 16
+#define NUM_MULTIXACTOFFSET_BUFFERS (16 << slru_buffers_size_scale)
+#define NUM_MULTIXACTMEMBER_BUFFERS (32 << slru_buffers_size_scale)
/*
* Possible multixact lock modes ("status"). The first four modes are for
Index: src/backend/utils/misc/guc.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
--- a/src/backend/utils/misc/guc.c (revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/utils/misc/guc.c (date 1660678064216)
@@ -32,9 +32,11 @@
#endif
#include <unistd.h>
+#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/gin.h"
#include "access/rmgr.h"
+#include "access/slru.h"
#include "access/tableam.h"
#include "access/toast_compression.h"
#include "access/transam.h"
@@ -2375,6 +2377,16 @@
-1, -1, INT_MAX,
NULL, NULL, NULL
},
+
+ {
+ {"slru_buffers_size_scale", PGC_POSTMASTER, RESOURCES_MEM,
+ gettext_noop("SLRU buffers size scale of power 2"),
+ NULL
+ },
+ &slru_buffers_size_scale,
+ 2, 0, 7,
+ NULL, NULL, NULL
+ },
{
{"temp_buffers", PGC_USERSET, RESOURCES_MEM,
Index: src/include/commands/async.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/commands/async.h b/src/include/commands/async.h
--- a/src/include/commands/async.h (revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/commands/async.h (date 1660678108702)
@@ -18,7 +18,7 @@
/*
* The number of SLRU page buffers we use for the notification queue.
*/
-#define NUM_NOTIFY_BUFFERS 8
+#define NUM_NOTIFY_BUFFERS (16 << slru_buffers_size_scale)
extern bool Trace_notify;
extern volatile sig_atomic_t notifyInterruptPending;
Index: src/include/access/slru.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
--- a/src/include/access/slru.h (revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/access/slru.h (date 1660678108690)
@@ -134,6 +134,11 @@
* it's always the same, it doesn't need to be in shared memory.
*/
char Dir[64];
+
+ /*
+ * mask for slotno hash bank
+ */
+ Size bank_mask;
} SlruCtlData;
typedef SlruCtlData *SlruCtl;
Index: src/backend/access/transam/subtrans.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
--- a/src/backend/access/transam/subtrans.c (revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/access/transam/subtrans.c (date 1660678064032)
@@ -31,6 +31,7 @@
#include "access/slru.h"
#include "access/subtrans.h"
#include "access/transam.h"
+#include "miscadmin.h"
#include "pg_trace.h"
#include "utils/snapmgr.h"
Index: doc/src/sgml/config.sgml
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
--- a/doc/src/sgml/config.sgml (revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/doc/src/sgml/config.sgml (date 1660677867861)
@@ -1953,6 +1953,37 @@
</listitem>
</varlistentry>
+ <varlistentry id="guc-slru-buffers-size-scale" xreflabel="slru_buffers_size_scale">
+ <term><varname>slru_buffers_size_scale</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>slru_buffers_size_scale</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies power 2 scale for all SLRU shared memory buffers sizes. Buffers sizes depends on
+ both <literal>guc_slru_buffers_size_scale</literal> and <literal>shared_buffers</literal> params.
+ </para>
+ <para>
+ This affects on buffers in the list below (see also <xref linkend="pgdata-contents-table"/>):
+ <itemizedlist>
+ <listitem><para><literal>NUM_MULTIXACTOFFSET_BUFFERS = Min(32 << slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+ <listitem><para><literal>NUM_MULTIXACTMEMBER_BUFFERS = Min(64 << slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+ <listitem><para><literal>NUM_SUBTRANS_BUFFERS = Min(64 << slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+ <listitem><para><literal>NUM_NOTIFY_BUFFERS = Min(32 << slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+ <listitem><para><literal>NUM_SERIAL_BUFFERS = Min(32 << slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+ <listitem><para><literal>NUM_CLOG_BUFFERS = Min(128 << slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+ <listitem><para><literal>NUM_COMMIT_TS_BUFFERS = Min(128 << slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ Value is in <literal>0..7</literal> bounds.
+ The default value is <literal>2</literal>.
+ This parameter can only be set at server start.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-max-stack-depth" xreflabel="max_stack_depth">
<term><varname>max_stack_depth</varname> (<type>integer</type>)
<indexterm>
Index: src/backend/access/transam/commit_ts.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
--- a/src/backend/access/transam/commit_ts.c (revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/access/transam/commit_ts.c (date 1660677867889)
@@ -73,6 +73,8 @@
#define TransactionIdToCTsEntry(xid) \
((xid) % (TransactionId) COMMIT_TS_XACTS_PER_PAGE)
+#define NUM_COMMIT_TS_BUFFERS (128 << slru_buffers_size_scale)
+
/*
* Link to shared-memory data structures for CommitTs control
*/
@@ -508,26 +510,13 @@
PG_RETURN_DATUM(HeapTupleGetDatum(htup));
}
-/*
- * Number of shared CommitTS buffers.
- *
- * We use a very similar logic as for the number of CLOG buffers (except we
- * scale up twice as fast with shared buffers, and the maximum is twice as
- * high); see comments in CLOGShmemBuffers.
- */
-Size
-CommitTsShmemBuffers(void)
-{
- return Min(256, Max(4, NBuffers / 256));
-}
-
/*
* Shared memory sizing for CommitTs
*/
Size
CommitTsShmemSize(void)
{
- return SimpleLruShmemSize(CommitTsShmemBuffers(), 0) +
+ return SimpleLruShmemSize(NUM_COMMIT_TS_BUFFERS, 0) +
sizeof(CommitTimestampShared);
}
@@ -541,7 +530,7 @@
bool found;
CommitTsCtl->PagePrecedes = CommitTsPagePrecedes;
- SimpleLruInit(CommitTsCtl, "CommitTs", CommitTsShmemBuffers(), 0,
+ SimpleLruInit(CommitTsCtl, "CommitTs", NUM_COMMIT_TS_BUFFERS, 0,
CommitTsSLRULock, "pg_commit_ts",
LWTRANCHE_COMMITTS_BUFFER,
SYNC_HANDLER_COMMIT_TS);