sorry, forgot to add a patch to the letter
чт, 20 мая 2021 г. в 13:52, Кирилл Решке <[email protected]>:
> Hi,
> I recently ran into a problem in one of our production postgresql cluster.
> I had noticed lock contention on procarray lock on standby, which causes
> WAL replay lag growth.
> To reproduce this, you can do the following:
>
> 1) set max_connections to big number, like 100000
> 2) begin a transaction on primary
> 3) start pgbench workload on primary and on standby
>
> After a while it will be possible to see KnownAssignedXidsGetAndSetXmin in
> perf top consuming abount 75 % of CPU.
>
> %%
> PerfTop: 1060 irqs/sec kernel: 0.0% exact: 0.0% [4000Hz cycles:u],
> (target_pid: 273361)
>
> -------------------------------------------------------------------------------
>
> 73.92% postgres [.] KnownAssignedXidsGetAndSetXmin
> 1.40% postgres [.] base_yyparse
> 0.96% postgres [.] LWLockAttemptLock
> 0.84% postgres [.] hash_search_with_hash_value
> 0.84% postgres [.] AtEOXact_GUC
> 0.72% postgres [.] ResetAllOptions
> 0.70% postgres [.] AllocSetAlloc
> 0.60% postgres [.] _bt_compare
> 0.55% postgres [.] core_yylex
> 0.42% libc-2.27.so [.] __strlen_avx2
> 0.23% postgres [.] LWLockRelease
> 0.19% postgres [.] MemoryContextAllocZeroAligned
> 0.18% postgres [.] expression_tree_walker.part.3
> 0.18% libc-2.27.so [.] __memmove_avx_unaligned_erms
> 0.17% postgres [.] PostgresMain
> 0.17% postgres [.] palloc
> 0.17% libc-2.27.so [.] _int_malloc
> 0.17% postgres [.] set_config_option
> 0.17% postgres [.] ScanKeywordLookup
> 0.16% postgres [.] _bt_checkpage
>
> %%
>
>
> We have tried to fix this by using BitMapSet instead of boolean array
> KnownAssignedXidsValid, but this does not help too much.
>
> Instead, using a doubly linked list helps a little more, we got +1000 tps
> on pgbench workload with patched postgresql. The general idea of this patch
> is that, instead of memorizing which elements in KnownAssignedXids are
> valid, lets maintain a doubly linked list of them. This solution will work
> in exactly the same way, except that taking a snapshot on the replica is
> now O(running transaction) instead of O(head - tail) which is significantly
> faster under some workloads. The patch helps to reduce CPU usage of
> KnownAssignedXidsGetAndSetXmin to ~48% instead of ~74%, but does eliminate
> it from perf top.
>
> The problem is better reproduced on PG13 since PG14 has some snapshot
> optimization.
>
> Thanks!
>
> Best regards, reshke
>
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 5ff8cab394..165cf56ea9 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -255,9 +255,18 @@ static PGPROC *allProcs;
* Bookkeeping for tracking emulated transactions in recovery
*/
static TransactionId *KnownAssignedXids;
-static bool *KnownAssignedXidsValid;
+
+typedef struct {
+ int prv;
+ int nxt;
+} KnownAssignedXidValidNode;
+
+// Doubly linked list of valid xids
+static KnownAssignedXidValidNode *KnownAssignedXidsValidDLL;
static TransactionId latestObservedXid = InvalidTransactionId;
+
+
/*
* If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is
* the highest xid that might still be running that we don't have in
@@ -348,6 +357,9 @@ static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons);
/*
* Report shared-memory space needed by CreateSharedProcArray.
*/
+
+static KnownAssignedXidValidNode KAX_E_INVALID;
+
Size
ProcArrayShmemSize(void)
{
@@ -380,13 +392,20 @@ ProcArrayShmemSize(void)
size = add_size(size,
mul_size(sizeof(TransactionId),
TOTAL_MAX_CACHED_SUBXIDS));
- size = add_size(size,
- mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS));
+ size = add_size(size,
+ mul_size(sizeof(KnownAssignedXidValidNode), TOTAL_MAX_CACHED_SUBXIDS));
}
+ KAX_E_INVALID.prv = -1;
+ KAX_E_INVALID.nxt = TOTAL_MAX_CACHED_SUBXIDS;
+
return size;
}
+#define KAX_DLL_INDEX_VALID(i) (-1 < i && i < TOTAL_MAX_CACHED_SUBXIDS)
+#define KAX_DLL_ENTRY_INVALID(i) (-1 == KnownAssignedXidsValidDLL[i].prv && KnownAssignedXidsValidDLL[i].nxt == TOTAL_MAX_CACHED_SUBXIDS)
+
+
/*
* Initialize the shared PGPROC array during postmaster startup.
*/
@@ -431,10 +450,15 @@ CreateSharedProcArray(void)
mul_size(sizeof(TransactionId),
TOTAL_MAX_CACHED_SUBXIDS),
&found);
- KnownAssignedXidsValid = (bool *)
- ShmemInitStruct("KnownAssignedXidsValid",
- mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS),
+
+ KnownAssignedXidsValidDLL = (KnownAssignedXidValidNode*)
+ ShmemInitStruct("KnownAssignedXidsValidDLL",
+ mul_size(sizeof(KnownAssignedXidValidNode), TOTAL_MAX_CACHED_SUBXIDS),
&found);
+
+ for (int i = 0; i < TOTAL_MAX_CACHED_SUBXIDS; ++ i) {
+ KnownAssignedXidsValidDLL[i] = KAX_E_INVALID;
+ }
}
}
@@ -4545,15 +4569,17 @@ KnownAssignedXidsCompress(bool force)
* re-aligning data to 0th element.
*/
compress_index = 0;
- for (i = tail; i < head; i++)
+ int prv = -1;
+ for (i = tail; i < head; i = KnownAssignedXidsValidDLL[i].nxt)
{
- if (KnownAssignedXidsValid[i])
- {
- KnownAssignedXids[compress_index] = KnownAssignedXids[i];
- KnownAssignedXidsValid[compress_index] = true;
- compress_index++;
- }
+ KnownAssignedXids[compress_index] = KnownAssignedXids[i];
+ KnownAssignedXidsValidDLL[compress_index].prv = prv;
+ KnownAssignedXidsValidDLL[compress_index].nxt = compress_index + 1;
+
+ compress_index++;
}
+ // fix last entry
+ KnownAssignedXidsValidDLL[compress_index - 1].nxt = TOTAL_MAX_CACHED_SUBXIDS;
pArray->tailKnownAssignedXids = 0;
pArray->headKnownAssignedXids = compress_index;
@@ -4652,7 +4678,8 @@ KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
for (i = 0; i < nxids; i++)
{
KnownAssignedXids[head] = next_xid;
- KnownAssignedXidsValid[head] = true;
+ KnownAssignedXidsValidDLL[head].nxt = 1 + head;
+ KnownAssignedXidsValidDLL[head + 1].prv = head;
TransactionIdAdvance(next_xid);
head++;
}
@@ -4741,12 +4768,23 @@ KnownAssignedXidsSearch(TransactionId xid, bool remove)
if (result_index < 0)
return false; /* not in array */
- if (!KnownAssignedXidsValid[result_index])
+ if (KAX_DLL_ENTRY_INVALID(result_index))
return false; /* in array, but invalid */
if (remove)
{
- KnownAssignedXidsValid[result_index] = false;
+ int prv = KnownAssignedXidsValidDLL[result_index].prv;
+ int nxt = KnownAssignedXidsValidDLL[result_index].nxt;
+
+ KnownAssignedXidsValidDLL[result_index] = KAX_E_INVALID;
+
+ if (KAX_DLL_INDEX_VALID(prv)) {
+ KnownAssignedXidsValidDLL[prv].nxt = nxt;
+ }
+
+ if (KAX_DLL_INDEX_VALID(nxt)) {
+ KnownAssignedXidsValidDLL[nxt].prv = prv;
+ }
pArray->numKnownAssignedXids--;
Assert(pArray->numKnownAssignedXids >= 0);
@@ -4757,9 +4795,7 @@ KnownAssignedXidsSearch(TransactionId xid, bool remove)
*/
if (result_index == tail)
{
- tail++;
- while (tail < head && !KnownAssignedXidsValid[tail])
- tail++;
+ tail = KnownAssignedXidsValidDLL[tail].nxt;
if (tail >= head)
{
/* Array is empty, so we can reset both pointers */
@@ -4868,21 +4904,23 @@ KnownAssignedXidsRemovePreceding(TransactionId removeXid)
tail = pArray->tailKnownAssignedXids;
head = pArray->headKnownAssignedXids;
- for (i = tail; i < head; i++)
+ for (i = tail; i < head; i = KnownAssignedXidsValidDLL[i].nxt)
{
- if (KnownAssignedXidsValid[i])
- {
- TransactionId knownXid = KnownAssignedXids[i];
+ TransactionId knownXid = KnownAssignedXids[i];
- if (TransactionIdFollowsOrEquals(knownXid, removeXid))
- break;
+ if (TransactionIdFollowsOrEquals(knownXid, removeXid))
+ break;
- if (!StandbyTransactionIdIsPrepared(knownXid))
- {
- KnownAssignedXidsValid[i] = false;
- count++;
- }
- }
+ if (!StandbyTransactionIdIsPrepared(knownXid))
+ {
+ int nxt = KnownAssignedXidsValidDLL[i].nxt;
+ KnownAssignedXidsValidDLL[i] = KAX_E_INVALID;
+
+ if (KAX_DLL_INDEX_VALID(nxt)) {
+ KnownAssignedXidsValidDLL[i].prv = -1;
+ }
+ count++;
+ }
}
pArray->numKnownAssignedXids -= count;
@@ -4891,21 +4929,20 @@ KnownAssignedXidsRemovePreceding(TransactionId removeXid)
/*
* Advance the tail pointer if we've marked the tail item invalid.
*/
- for (i = tail; i < head; i++)
- {
- if (KnownAssignedXidsValid[i])
- break;
- }
- if (i >= head)
- {
- /* Array is empty, so we can reset both pointers */
- pArray->headKnownAssignedXids = 0;
- pArray->tailKnownAssignedXids = 0;
- }
- else
- {
- pArray->tailKnownAssignedXids = i;
- }
+ if (KAX_DLL_ENTRY_INVALID(tail)) {
+ tail = KnownAssignedXidsValidDLL[tail].nxt;
+
+ if (!KAX_DLL_INDEX_VALID(tail))
+ {
+ /* Array is empty, so we can reset both pointers */
+ pArray->headKnownAssignedXids = 0;
+ pArray->tailKnownAssignedXids = 0;
+ }
+ else
+ {
+ pArray->tailKnownAssignedXids = tail;
+ }
+ }
/* Opportunistically compress the array */
KnownAssignedXidsCompress(false);
@@ -4957,32 +4994,29 @@ KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
head = procArray->headKnownAssignedXids;
SpinLockRelease(&procArray->known_assigned_xids_lck);
- for (i = tail; i < head; i++)
+ for (i = tail; i < head; i = KnownAssignedXidsValidDLL[i].nxt)
{
/* Skip any gaps in the array */
- if (KnownAssignedXidsValid[i])
- {
- TransactionId knownXid = KnownAssignedXids[i];
-
- /*
- * Update xmin if required. Only the first XID need be checked,
- * since the array is sorted.
- */
- if (count == 0 &&
- TransactionIdPrecedes(knownXid, *xmin))
- *xmin = knownXid;
-
- /*
- * Filter out anything >= xmax, again relying on sorted property
- * of array.
- */
- if (TransactionIdIsValid(xmax) &&
- TransactionIdFollowsOrEquals(knownXid, xmax))
- break;
-
- /* Add knownXid into output array */
- xarray[count++] = knownXid;
- }
+ TransactionId knownXid = KnownAssignedXids[i];
+
+ /*
+ * Update xmin if required. Only the first XID need be checked,
+ * since the array is sorted.
+ */
+ if (count == 0 &&
+ TransactionIdPrecedes(knownXid, *xmin))
+ *xmin = knownXid;
+
+ /*
+ * Filter out anything >= xmax, again relying on sorted property
+ * of array.
+ */
+ if (TransactionIdIsValid(xmax) &&
+ TransactionIdFollowsOrEquals(knownXid, xmax))
+ break;
+
+ /* Add knownXid into output array */
+ xarray[count++] = knownXid;
}
return count;
@@ -5007,12 +5041,12 @@ KnownAssignedXidsGetOldestXmin(void)
head = procArray->headKnownAssignedXids;
SpinLockRelease(&procArray->known_assigned_xids_lck);
- for (i = tail; i < head; i++)
- {
- /* Skip any gaps in the array */
- if (KnownAssignedXidsValid[i])
- return KnownAssignedXids[i];
- }
+ i = KnownAssignedXidsValidDLL[i].nxt;
+
+ /* Skip any gaps in the array */
+ if (i < head) {
+ return KnownAssignedXids[i];
+ }
return InvalidTransactionId;
}
@@ -5042,13 +5076,10 @@ KnownAssignedXidsDisplay(int trace_level)
initStringInfo(&buf);
- for (i = tail; i < head; i++)
+ for (i = tail; i < head; i = KnownAssignedXidsValidDLL[i].nxt)
{
- if (KnownAssignedXidsValid[i])
- {
- nxids++;
- appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]);
- }
+ nxids++;
+ appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]);
}
elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s",