Hi Hackers,
This patch addresses severe LWLock contention observed on high-core systems
where hundreds of processors concurrently access frequently-shared locks.
Specifically for ProcArrayLock (exhibiting 93.5% shared-mode acquires), we
implement a new ReadBiasedLWLock mechanism to eliminate the atomic operation
bottleneck.
Key aspects:
1. Problem: Previous optimizations[1] left LWLockAttemptLock/Release
consuming
~25% total CPU cycles on 384-vCPU systems due to contention on a single
lock-state cache line. Shared lock attempts showed 37x higher cumulative
latency than exclusive mode for ProcArrayLock.
2. Solution: ReadBiasedLWLock partitions lock state across 16 cache lines
(READ_BIASED_LOCK_STATE_COUNT):
- Readers acquire/release only their designated LWLock (indexed by
pid % 16) using a single atomic operation
- Writers pay higher cost by acquiring all 16 sub-locks exclusively
- Maintains LWLock's "acquiring process must release" semantics
3. Performance: HammerDB/TPCC shows 35.3% NOPM improvement over baseline
- Lock acquisition CPU cycles reduced from 16.7% to 7.4%
- Lock release cycles reduced from 7.9% to 2.2%
4. Implementation:
- Core infrastructure for ReadBiasedLWLock
- ProcArrayLock converted as proof-of-concept
- Maintains full LWLock API compatibility
Known considerations:
- Increased writer acquisition cost (acceptable given rarity of exclusive
acquisitions for biased locks like ProcArrayLock)
- Memory overhead: 16x size increase per converted lock
- Currently validated for ProcArrayLock; other heavily-shared locks may be
candidates after further analysis
This is a preliminary version for community feedback. We're actively:
1. Refining the implementation details
2. Expanding test coverage
3. Investigating additional lock candidates
4. Optimizing writer-fast-path opportunities
Test results, profiling data, and design details can be shared upon request.
We appreciate all comments and suggestions for improvement.
[1]Optimize shared LWLock acquisition for high-core-count systems:
https://www.postgresql.org/message-id/flat/73d53acf-4f66-41df-b438-5c2e6115d4de%40intel.com
Regards,
Zhiguo
From b571df421a30a37d3d693822754f3beaa495b798 Mon Sep 17 00:00:00 2001
From: Zhiguo Zhou <zhiguo.z...@intel.com>
Date: Thu, 26 Jun 2025 13:11:48 +0800
Subject: [PATCH v0] Optimize lock acquisition/release with ReadBiasedLWLock
- Introduced the ReadBiasedLWLock structure and related API for
read-biased locking, including initialization, acquire, conditional
acquire, release, and held-by-me checks.
- Allocated and initialized a global MainReadBiasedLWLockArray for
read-biased locks.
- Replaced all uses of ProcArrayLock with ReadBiasedLWLock API in
backend code, including xlog, vacuum, logical decoding, replication,
standby, and procarray.
- Updated lock acquisition and release logic to use the new read-biased
lock for ProcArrayLock, including conditional and assertion checks.
---
src/backend/access/transam/xlog.c | 4 +-
src/backend/commands/indexcmds.c | 4 +-
src/backend/commands/vacuum.c | 4 +-
src/backend/replication/logical/logical.c | 8 +-
src/backend/replication/logical/slotsync.c | 4 +-
src/backend/replication/logical/snapbuild.c | 4 +-
src/backend/replication/slot.c | 4 +-
src/backend/replication/walsender.c | 4 +-
src/backend/storage/ipc/procarray.c | 176 +++++++++---------
src/backend/storage/ipc/standby.c | 4 +-
.../storage/lmgr/generate-lwlocknames.pl | 1 +
src/backend/storage/lmgr/lock.c | 4 +-
src/backend/storage/lmgr/lwlock.c | 130 +++++++++++++
src/backend/storage/lmgr/proc.c | 4 +-
.../utils/activity/wait_event_names.txt | 2 +-
src/include/storage/lwlock.h | 15 ++
src/include/storage/lwlocklist.h | 2 +-
17 files changed, 260 insertions(+), 114 deletions(-)
diff --git a/src/backend/access/transam/xlog.c
b/src/backend/access/transam/xlog.c
index 1914859b2ee..f41083ea0f8 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6268,10 +6268,10 @@ StartupXLOG(void)
XLogCtl->lastSegSwitchLSN = EndOfLog;
/* also initialize latestCompletedXid, to nextXid - 1 */
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
TransamVariables->latestCompletedXid = TransamVariables->nextXid;
FullTransactionIdRetreat(&TransamVariables->latestCompletedXid);
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
/*
* Start up subtrans, if not already done for hot standby. (commit
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index d962fe392cd..4cf17ae17cb 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -4585,8 +4585,8 @@ set_indexsafe_procflags(void)
Assert(MyProc->xid == InvalidTransactionId &&
MyProc->xmin == InvalidTransactionId);
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->statusFlags |= PROC_IN_SAFE_IC;
ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 33a33bf6b1c..8f12cac64f9 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -2038,12 +2038,12 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams
*params,
* set PROC_IN_VACUUM *before* taking our own snapshot, so that
our
* xmin doesn't become visible ahead of setting the flag.)
*/
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->statusFlags |= PROC_IN_VACUUM;
if (params->is_wraparound)
MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
ProcGlobal->statusFlags[MyProc->pgxactoff] =
MyProc->statusFlags;
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
/*
diff --git a/src/backend/replication/logical/logical.c
b/src/backend/replication/logical/logical.c
index 1d56d0c4ef3..1ab19d84073 100644
--- a/src/backend/replication/logical/logical.c
+++ b/src/backend/replication/logical/logical.c
@@ -195,10 +195,10 @@ StartupDecodingContext(List *output_plugin_options,
*/
if (!IsTransactionOrTransactionBlock())
{
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->statusFlags |= PROC_IN_LOGICAL_DECODING;
ProcGlobal->statusFlags[MyProc->pgxactoff] =
MyProc->statusFlags;
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
ctx->slot = slot;
@@ -420,7 +420,7 @@ CreateInitDecodingContext(const char *plugin,
*
* ----
*/
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
xmin_horizon = GetOldestSafeDecodingTransactionId(!need_full_snapshot);
@@ -433,7 +433,7 @@ CreateInitDecodingContext(const char *plugin,
ReplicationSlotsComputeRequiredXmin(true);
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
ReplicationSlotMarkDirty();
ReplicationSlotSave();
diff --git a/src/backend/replication/logical/slotsync.c
b/src/backend/replication/logical/slotsync.c
index 656e66e0ae0..51b84372a4d 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -776,14 +776,14 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid
remote_dbid)
reserve_wal_for_local_slot(remote_slot->restart_lsn);
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
xmin_horizon = GetOldestSafeDecodingTransactionId(true);
SpinLockAcquire(&slot->mutex);
slot->effective_catalog_xmin = xmin_horizon;
slot->data.catalog_xmin = xmin_horizon;
SpinLockRelease(&slot->mutex);
ReplicationSlotsComputeRequiredXmin(true);
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
update_and_persist_local_synced_slot(remote_slot, remote_dbid);
diff --git a/src/backend/replication/logical/snapbuild.c
b/src/backend/replication/logical/snapbuild.c
index 0d7bddbe4ed..cd89a67f3ea 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -475,9 +475,9 @@ SnapBuildInitialSnapshot(SnapBuild *builder)
* horizon would have bad consequences, therefore always double-check
that
* the horizon is enforced.
*/
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
safeXid = GetOldestSafeDecodingTransactionId(false);
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
if (TransactionIdFollows(safeXid, snap->xmin))
elog(ERROR, "cannot build an initial slot snapshot as oldest
safe xid %u follows snapshot's xmin %u",
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 600b87fa9cb..11f2b338069 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -747,10 +747,10 @@ ReplicationSlotRelease(void)
MyReplicationSlot = NULL;
/* might not have been set when we've been a plain slot */
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->statusFlags &= ~PROC_IN_LOGICAL_DECODING;
ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
if (am_walsender)
{
diff --git a/src/backend/replication/walsender.c
b/src/backend/replication/walsender.c
index 9fa8beb6103..7f1bd5bc98d 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -309,10 +309,10 @@ InitWalSender(void)
if (MyDatabaseId == InvalidOid)
{
Assert(MyProc->xmin == InvalidTransactionId);
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->statusFlags |= PROC_AFFECTS_ALL_HORIZONS;
ProcGlobal->statusFlags[MyProc->pgxactoff] =
MyProc->statusFlags;
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
/* Initialize empty timestamp buffer for lag tracking. */
diff --git a/src/backend/storage/ipc/procarray.c
b/src/backend/storage/ipc/procarray.c
index e5b945a9ee3..328c0a025e0 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -473,7 +473,7 @@ ProcArrayAdd(PGPROC *proc)
int movecount;
/* See ProcGlobal comment explaining why both locks are held */
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
if (arrayP->numProcs >= arrayP->maxProcs)
@@ -548,7 +548,7 @@ ProcArrayAdd(PGPROC *proc)
* wait for XidGenLock while holding ProcArrayLock.
*/
LWLockRelease(XidGenLock);
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
/*
@@ -575,7 +575,7 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
#endif
/* See ProcGlobal comment explaining why both locks are held */
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
myoff = proc->pgxactoff;
@@ -646,7 +646,7 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
* wait for XidGenLock while holding ProcArrayLock.
*/
LWLockRelease(XidGenLock);
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
@@ -681,10 +681,10 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId
latestXid)
* and release the lock. If not, use group XID clearing to
improve
* efficiency.
*/
- if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE))
+ if (ReadBiasedLWLockConditionalAcquire(ProcArrayLock,
LW_EXCLUSIVE))
{
ProcArrayEndTransactionInternal(proc, latestXid);
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
else
ProcArrayGroupClearXid(proc, latestXid);
@@ -712,12 +712,12 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId
latestXid)
/* avoid unnecessarily dirtying shared cachelines */
if (proc->statusFlags & PROC_VACUUM_STATE_MASK)
{
- Assert(!LWLockHeldByMe(ProcArrayLock));
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ Assert(!ReadBiasedLWLockHeldByMe(ProcArrayLock));
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
Assert(proc->statusFlags ==
ProcGlobal->statusFlags[proc->pgxactoff]);
proc->statusFlags &= ~PROC_VACUUM_STATE_MASK;
ProcGlobal->statusFlags[proc->pgxactoff] =
proc->statusFlags;
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
}
}
@@ -736,7 +736,7 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId
latestXid)
* Note: we need exclusive lock here because we're going to change other
* processes' PGPROC entries.
*/
- Assert(LWLockHeldByMeInMode(ProcArrayLock, LW_EXCLUSIVE));
+ Assert(ReadBiasedLWLockHeldByMeInMode(ProcArrayLock, LW_EXCLUSIVE));
Assert(TransactionIdIsValid(ProcGlobal->xids[pgxactoff]));
Assert(ProcGlobal->xids[pgxactoff] == proc->xid);
@@ -844,7 +844,7 @@ ProcArrayGroupClearXid(PGPROC *proc, TransactionId
latestXid)
}
/* We are the leader. Acquire the lock on behalf of everyone. */
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
/*
* Now that we've got the lock, clear the list of processes waiting for
@@ -869,7 +869,7 @@ ProcArrayGroupClearXid(PGPROC *proc, TransactionId
latestXid)
}
/* We're done with the lock now. */
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
/*
* Now that we've released the lock, go back and wake everybody up. We
@@ -922,7 +922,7 @@ ProcArrayClearTransaction(PGPROC *proc)
* bottleneck it may also be worth considering to combine this with the
* subsequent ProcArrayRemove()
*/
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
pgxactoff = proc->pgxactoff;
@@ -956,7 +956,7 @@ ProcArrayClearTransaction(PGPROC *proc)
proc->subxidStatus.overflowed = false;
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
/*
@@ -970,7 +970,7 @@ MaintainLatestCompletedXid(TransactionId latestXid)
Assert(FullTransactionIdIsValid(cur_latest));
Assert(!RecoveryInProgress());
- Assert(LWLockHeldByMe(ProcArrayLock));
+ Assert(ReadBiasedLWLockHeldByMe(ProcArrayLock));
if (TransactionIdPrecedes(XidFromFullTransactionId(cur_latest),
latestXid))
{
@@ -992,7 +992,7 @@ MaintainLatestCompletedXidRecovery(TransactionId latestXid)
FullTransactionId rel;
Assert(AmStartupProcess() || !IsUnderPostmaster);
- Assert(LWLockHeldByMe(ProcArrayLock));
+ Assert(ReadBiasedLWLockHeldByMe(ProcArrayLock));
/*
* Need a FullTransactionId to compare latestXid with. Can't rely on
@@ -1144,7 +1144,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
/*
* Nobody else is running yet, but take locks anyhow
*/
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
/*
* KnownAssignedXids is sorted so we cannot just add the xids, we have
to
@@ -1188,7 +1188,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
{
if (procArray->numKnownAssignedXids != 0)
{
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
elog(ERROR, "KnownAssignedXids is not empty");
}
@@ -1297,7 +1297,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
* nobody can see it yet.
*/
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
KnownAssignedXidsDisplay(DEBUG3);
if (standbyState == STANDBY_SNAPSHOT_READY)
@@ -1356,7 +1356,7 @@ ProcArrayApplyXidAssignment(TransactionId topxid,
/*
* Uses same locking as transaction commit
*/
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
/*
* Remove subxids from known-assigned-xacts.
@@ -1369,7 +1369,7 @@ ProcArrayApplyXidAssignment(TransactionId topxid,
if (TransactionIdPrecedes(procArray->lastOverflowedXid, max_xid))
procArray->lastOverflowedXid = max_xid;
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
/*
@@ -1468,7 +1468,7 @@ TransactionIdIsInProgress(TransactionId xid)
other_xids = ProcGlobal->xids;
other_subxidstates = ProcGlobal->subxidStates;
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
/*
* Now that we have the lock, we can check latestCompletedXid; if the
@@ -1478,7 +1478,7 @@ TransactionIdIsInProgress(TransactionId xid)
XidFromFullTransactionId(TransamVariables->latestCompletedXid);
if (TransactionIdPrecedes(latestCompletedXid, xid))
{
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
xc_by_latest_xid_inc();
return true;
}
@@ -1508,7 +1508,7 @@ TransactionIdIsInProgress(TransactionId xid)
*/
if (TransactionIdEquals(pxid, xid))
{
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
xc_by_main_xid_inc();
return true;
}
@@ -1534,7 +1534,7 @@ TransactionIdIsInProgress(TransactionId xid)
if (TransactionIdEquals(cxid, xid))
{
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
xc_by_child_xid_inc();
return true;
}
@@ -1562,7 +1562,7 @@ TransactionIdIsInProgress(TransactionId xid)
if (KnownAssignedXidExists(xid))
{
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
xc_by_known_assigned_inc();
return true;
}
@@ -1578,7 +1578,7 @@ TransactionIdIsInProgress(TransactionId xid)
nxids = KnownAssignedXidsGet(xids, xid);
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
/*
* If none of the relevant caches overflowed, we know the Xid is not
@@ -1645,7 +1645,7 @@ TransactionIdIsActive(TransactionId xid)
if (TransactionIdPrecedes(xid, RecentXmin))
return false;
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
for (i = 0; i < arrayP->numProcs; i++)
{
@@ -1669,7 +1669,7 @@ TransactionIdIsActive(TransactionId xid)
}
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
return result;
}
@@ -1742,7 +1742,7 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
/* inferred after ProcArrayLock is released */
h->catalog_oldest_nonremovable = InvalidTransactionId;
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
h->latest_completed = TransamVariables->latestCompletedXid;
@@ -1875,7 +1875,7 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
* No other information from shared state is needed, release the lock
* immediately. The rest of the computations can be done without a lock.
*/
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
if (in_recovery)
{
@@ -2096,7 +2096,7 @@ GetSnapshotDataReuse(Snapshot snapshot)
{
uint64 curXactCompletionCount;
- Assert(LWLockHeldByMe(ProcArrayLock));
+ Assert(ReadBiasedLWLockHeldByMe(ProcArrayLock));
if (unlikely(snapshot->snapXactCompletionCount == 0))
return false;
@@ -2228,11 +2228,11 @@ GetSnapshotData(Snapshot snapshot)
* It is sufficient to get shared lock on ProcArrayLock, even if we are
* going to set MyProc->xmin.
*/
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
if (GetSnapshotDataReuse(snapshot))
{
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
return snapshot;
}
@@ -2413,7 +2413,7 @@ GetSnapshotData(Snapshot snapshot)
if (!TransactionIdIsValid(MyProc->xmin))
MyProc->xmin = TransactionXmin = xmin;
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
/* maintain state for GlobalVis* */
{
@@ -2541,7 +2541,7 @@ ProcArrayInstallImportedXmin(TransactionId xmin,
return false;
/* Get lock so source xact can't end while we're doing this */
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
/*
* Find the PGPROC entry of the source transaction. (This could use
@@ -2594,7 +2594,7 @@ ProcArrayInstallImportedXmin(TransactionId xmin,
break;
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
return result;
}
@@ -2624,7 +2624,7 @@ ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC
*proc)
/*
* Get an exclusive lock so that we can copy statusFlags from source
proc.
*/
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
/*
* Be certain that the referenced PGPROC has an advertised xmin which is
@@ -2649,7 +2649,7 @@ ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC
*proc)
result = true;
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
return result;
}
@@ -2736,7 +2736,7 @@ GetRunningTransactionData(void)
* Ensure that no xids enter or leave the procarray while we obtain
* snapshot.
*/
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
LWLockAcquire(XidGenLock, LW_SHARED);
latestCompletedXid =
@@ -2900,7 +2900,7 @@ GetOldestActiveTransactionId(void)
/*
* Spin over procArray collecting all xids and subxids.
*/
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
for (index = 0; index < arrayP->numProcs; index++)
{
TransactionId xid;
@@ -2920,7 +2920,7 @@ GetOldestActiveTransactionId(void)
* smaller than oldestRunningXid
*/
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
return oldestRunningXid;
}
@@ -2949,7 +2949,7 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly)
int index;
bool recovery_in_progress = RecoveryInProgress();
- Assert(LWLockHeldByMe(ProcArrayLock));
+ Assert(ReadBiasedLWLockHeldByMe(ProcArrayLock));
/*
* Acquire XidGenLock, so no transactions can acquire an xid while we're
@@ -3053,7 +3053,7 @@ GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
vxids = (VirtualTransactionId *)
palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
for (index = 0; index < arrayP->numProcs; index++)
{
@@ -3070,7 +3070,7 @@ GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
}
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
*nvxids = count;
return vxids;
@@ -3094,7 +3094,7 @@ HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids,
int nvxids, int type)
Assert(type != 0);
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
for (index = 0; index < arrayP->numProcs; index++)
{
@@ -3122,7 +3122,7 @@ HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids,
int nvxids, int type)
}
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
return result;
}
@@ -3172,7 +3172,7 @@ ProcNumberGetTransactionIds(ProcNumber procNumber,
TransactionId *xid,
proc = GetPGProcByNumber(procNumber);
/* Need to lock out additions/removals of backends */
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
if (proc->pid != 0)
{
@@ -3182,7 +3182,7 @@ ProcNumberGetTransactionIds(ProcNumber procNumber,
TransactionId *xid,
*overflowed = proc->subxidStatus.overflowed;
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
/*
@@ -3200,11 +3200,11 @@ BackendPidGetProc(int pid)
if (pid == 0) /* never match dummy PGPROCs */
return NULL;
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
result = BackendPidGetProcWithLock(pid);
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
return result;
}
@@ -3263,7 +3263,7 @@ BackendXidGetPid(TransactionId xid)
if (xid == InvalidTransactionId) /* never match invalid xid */
return 0;
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
for (index = 0; index < arrayP->numProcs; index++)
{
@@ -3277,7 +3277,7 @@ BackendXidGetPid(TransactionId xid)
}
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
return result;
}
@@ -3334,7 +3334,7 @@ GetCurrentVirtualXIDs(TransactionId limitXmin, bool
excludeXmin0,
vxids = (VirtualTransactionId *)
palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
for (index = 0; index < arrayP->numProcs; index++)
{
@@ -3372,7 +3372,7 @@ GetCurrentVirtualXIDs(TransactionId limitXmin, bool
excludeXmin0,
}
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
*nvxids = count;
return vxids;
@@ -3436,7 +3436,7 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid
dbOid)
errmsg("out of memory")));
}
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
for (index = 0; index < arrayP->numProcs; index++)
{
@@ -3473,7 +3473,7 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid
dbOid)
}
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
/* add the terminator */
vxids[count].procNumber = INVALID_PROC_NUMBER;
@@ -3501,7 +3501,7 @@ SignalVirtualTransaction(VirtualTransactionId vxid,
ProcSignalReason sigmode,
int index;
pid_t pid = 0;
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
for (index = 0; index < arrayP->numProcs; index++)
{
@@ -3528,7 +3528,7 @@ SignalVirtualTransaction(VirtualTransactionId vxid,
ProcSignalReason sigmode,
}
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
return pid;
}
@@ -3602,7 +3602,7 @@ CountDBBackends(Oid databaseid)
int count = 0;
int index;
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
for (index = 0; index < arrayP->numProcs; index++)
{
@@ -3616,7 +3616,7 @@ CountDBBackends(Oid databaseid)
count++;
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
return count;
}
@@ -3631,7 +3631,7 @@ CountDBConnections(Oid databaseid)
int count = 0;
int index;
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
for (index = 0; index < arrayP->numProcs; index++)
{
@@ -3647,7 +3647,7 @@ CountDBConnections(Oid databaseid)
count++;
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
return count;
}
@@ -3662,7 +3662,7 @@ CancelDBBackends(Oid databaseid, ProcSignalReason
sigmode, bool conflictPending)
int index;
/* tell all backends to die */
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
for (index = 0; index < arrayP->numProcs; index++)
{
@@ -3689,7 +3689,7 @@ CancelDBBackends(Oid databaseid, ProcSignalReason
sigmode, bool conflictPending)
}
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
/*
@@ -3703,7 +3703,7 @@ CountUserBackends(Oid roleid)
int count = 0;
int index;
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
for (index = 0; index < arrayP->numProcs; index++)
{
@@ -3718,7 +3718,7 @@ CountUserBackends(Oid roleid)
count++;
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
return count;
}
@@ -3766,7 +3766,7 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int
*nprepared)
*nbackends = *nprepared = 0;
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
for (index = 0; index < arrayP->numProcs; index++)
{
@@ -3792,7 +3792,7 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int
*nprepared)
}
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
if (!found)
return false; /* no conflicting backends, so
done */
@@ -3832,7 +3832,7 @@ TerminateOtherDBBackends(Oid databaseId)
int nprepared = 0;
int i;
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
for (i = 0; i < procArray->numProcs; i++)
{
@@ -3850,7 +3850,7 @@ TerminateOtherDBBackends(Oid databaseId)
nprepared++;
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
if (nprepared > 0)
ereport(ERROR,
@@ -3943,16 +3943,16 @@ void
ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin,
bool
already_locked)
{
- Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));
+ Assert(!already_locked || ReadBiasedLWLockHeldByMe(ProcArrayLock));
if (!already_locked)
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
procArray->replication_slot_xmin = xmin;
procArray->replication_slot_catalog_xmin = catalog_xmin;
if (!already_locked)
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
elog(DEBUG1, "xmin required by slots: data %u, catalog %u",
xmin, catalog_xmin);
@@ -3968,7 +3968,7 @@ void
ProcArrayGetReplicationSlotXmin(TransactionId *xmin,
TransactionId
*catalog_xmin)
{
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
if (xmin != NULL)
*xmin = procArray->replication_slot_xmin;
@@ -3976,7 +3976,7 @@ ProcArrayGetReplicationSlotXmin(TransactionId *xmin,
if (catalog_xmin != NULL)
*catalog_xmin = procArray->replication_slot_catalog_xmin;
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
/*
@@ -4010,7 +4010,7 @@ XidCacheRemoveRunningXids(TransactionId xid,
* relevant fields of MyProc/ProcGlobal->xids[]. But we do have to be
* careful about our own writes being well ordered.
*/
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
mysubxidstat = &ProcGlobal->subxidStates[MyProc->pgxactoff];
@@ -4067,7 +4067,7 @@ XidCacheRemoveRunningXids(TransactionId xid,
/* ... and xactCompletionCount */
TransamVariables->xactCompletionCount++;
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
#ifdef XIDCACHE_DEBUG
@@ -4477,7 +4477,7 @@ ExpireTreeKnownAssignedTransactionIds(TransactionId xid,
int nsubxids,
/*
* Uses same locking as transaction commit
*/
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
KnownAssignedXidsRemoveTree(xid, nsubxids, subxids);
@@ -4487,7 +4487,7 @@ ExpireTreeKnownAssignedTransactionIds(TransactionId xid,
int nsubxids,
/* ... and xactCompletionCount */
TransamVariables->xactCompletionCount++;
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
/*
@@ -4499,7 +4499,7 @@ ExpireAllKnownAssignedTransactionIds(void)
{
FullTransactionId latestXid;
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
KnownAssignedXidsRemovePreceding(InvalidTransactionId);
/* Reset latestCompletedXid to nextXid - 1 */
@@ -4520,7 +4520,7 @@ ExpireAllKnownAssignedTransactionIds(void)
* ExpireOldKnownAssignedTransactionIds() do.
*/
procArray->lastOverflowedXid = InvalidTransactionId;
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
/*
@@ -4533,7 +4533,7 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
{
TransactionId latestXid;
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
/* As in ProcArrayEndTransaction, advance latestCompletedXid */
latestXid = xid;
@@ -4552,7 +4552,7 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
if (TransactionIdPrecedes(procArray->lastOverflowedXid, xid))
procArray->lastOverflowedXid = InvalidTransactionId;
KnownAssignedXidsRemovePreceding(xid);
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
/*
@@ -4739,7 +4739,7 @@ KnownAssignedXidsCompress(KAXCompressReason reason, bool
haveLock)
/* Need to compress, so get the lock if we don't have it. */
if (!haveLock)
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
/*
* We compress the array by reading the valid values from tail to head,
@@ -4761,7 +4761,7 @@ KnownAssignedXidsCompress(KAXCompressReason reason, bool
haveLock)
pArray->headKnownAssignedXids = compress_index;
if (!haveLock)
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
/* Update timestamp for maintenance. No need to hold lock for this. */
lastCompressTs = GetCurrentTimestamp();
@@ -5257,11 +5257,11 @@ KnownAssignedXidsReset(void)
{
ProcArrayStruct *pArray = procArray;
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
pArray->numKnownAssignedXids = 0;
pArray->tailKnownAssignedXids = 0;
pArray->headKnownAssignedXids = 0;
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
}
diff --git a/src/backend/storage/ipc/standby.c
b/src/backend/storage/ipc/standby.c
index 7fa8d9247e0..dd35a821a13 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -1326,13 +1326,13 @@ LogStandbySnapshot(void)
* only a shared lock.
*/
if (wal_level < WAL_LEVEL_LOGICAL)
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
recptr = LogCurrentRunningXacts(running);
/* Release lock if we kept it longer ... */
if (wal_level >= WAL_LEVEL_LOGICAL)
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
/* GetRunningTransactionData() acquired XidGenLock, we must release it
*/
LWLockRelease(XidGenLock);
diff --git a/src/backend/storage/lmgr/generate-lwlocknames.pl
b/src/backend/storage/lmgr/generate-lwlocknames.pl
index 4441b7cba0c..c60af382950 100644
--- a/src/backend/storage/lmgr/generate-lwlocknames.pl
+++ b/src/backend/storage/lmgr/generate-lwlocknames.pl
@@ -119,6 +119,7 @@ die
print $h "\n";
printf $h "#define NUM_INDIVIDUAL_LWLOCKS %s\n", $lastlockidx + 1;
+print $h "#define ProcArrayLock (MainReadBiasedLWLockArray)\n";
close $h;
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 86b06b9223f..4245bfdf009 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -3994,7 +3994,7 @@ GetBlockerStatusData(int blocked_pid)
* does have the advantage that we're guaranteed to return a
* self-consistent instantaneous state.
*/
- LWLockAcquire(ProcArrayLock, LW_SHARED);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_SHARED);
proc = BackendPidGetProcWithLock(blocked_pid);
@@ -4036,7 +4036,7 @@ GetBlockerStatusData(int blocked_pid)
Assert(data->nprocs <= data->maxprocs);
}
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
return data;
}
diff --git a/src/backend/storage/lmgr/lwlock.c
b/src/backend/storage/lmgr/lwlock.c
index 4c29016ce35..fe929ba5751 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -199,6 +199,7 @@ static const char *const BuiltinTrancheNames[] = {
[LWTRANCHE_XACT_SLRU] = "XactSLRU",
[LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA",
[LWTRANCHE_AIO_URING_COMPLETION] = "AioUringCompletion",
+ [LWTRANCHE_READ_BIASED] = "ReadBiased",
};
StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
@@ -219,6 +220,7 @@ static int LWLockTrancheNamesAllocated = 0;
* where we have special measures to pass it down).
*/
LWLockPadded *MainLWLockArray = NULL;
+ReadBiasedLWLock *MainReadBiasedLWLockArray = NULL;
/*
* We use this structure to keep track of locked LWLocks for release
@@ -464,6 +466,8 @@ LWLockShmemSize(void)
/* Space for the LWLock array. */
size = mul_size(numLocks, sizeof(LWLockPadded));
+ size = add_size(size, mul_size(1, sizeof(ReadBiasedLWLock)));
+
/* Space for dynamic allocation counter, plus room for alignment. */
size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE);
@@ -489,7 +493,9 @@ CreateLWLocks(void)
Size spaceLocks = LWLockShmemSize();
int *LWLockCounter;
char *ptr;
+ int numLocks = NUM_FIXED_LWLOCKS;
+ numLocks += NumLWLocksForNamedTranches();
/* Allocate space */
ptr = (char *) ShmemAlloc(spaceLocks);
@@ -508,8 +514,12 @@ CreateLWLocks(void)
LWLockCounter = (int *) ((char *) MainLWLockArray -
sizeof(int));
*LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
+ ptr += mul_size(numLocks, sizeof(LWLockPadded));
+
+ MainReadBiasedLWLockArray = (ReadBiasedLWLock *) ptr;
/* Initialize all LWLocks */
InitializeLWLocks();
+ ReadBiasedLWLockInitialize(MainReadBiasedLWLockArray);
}
/* Register named extension LWLock tranches in the current process. */
@@ -2076,3 +2086,123 @@ LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
}
return false;
}
+
+void
+ReadBiasedLWLockInitialize(ReadBiasedLWLock *lock)
+{
+ for (int i = 0; i < READ_BIASED_LOCK_STATE_COUNT; i++)
+ {
+ LWLockInitialize(&lock->lwlocks[i].lock, LWTRANCHE_READ_BIASED);
+ }
+}
+
+bool
+ReadBiasedLWLockAcquire(ReadBiasedLWLock *lock, LWLockMode mode)
+{
+ PGPROC *proc = MyProc;
+ int i;
+ bool result = true;
+
+ Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
+
+ if (proc == NULL)
+ elog(PANIC, "cannot acquire ReadBiasedLWLock without a PGPROC
structure");
+
+ if (mode == LW_SHARED)
+ {
+ /* Acquire the shared lock */
+ return LWLockAcquire(&lock->lwlocks[proc->pid %
READ_BIASED_LOCK_STATE_COUNT].lock, mode);
+ }
+
+ for (i = 0; i < READ_BIASED_LOCK_STATE_COUNT; i++)
+ {
+ result = LWLockAcquire(&lock->lwlocks[i].lock, mode) && result;
+ }
+
+ return result;
+}
+
+bool
+ReadBiasedLWLockConditionalAcquire(ReadBiasedLWLock *lock, LWLockMode mode)
+{
+ PGPROC *proc = MyProc;
+ int i;
+
+ Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
+
+ if (proc == NULL)
+ elog(PANIC, "cannot acquire ReadBiasedLWLock without a PGPROC
structure");
+
+ if (mode == LW_SHARED)
+ {
+ /* Acquire the shared lock */
+ return LWLockConditionalAcquire(&lock->lwlocks[proc->pid %
READ_BIASED_LOCK_STATE_COUNT].lock, mode);
+ }
+
+ for (i = 0; i < READ_BIASED_LOCK_STATE_COUNT; i++)
+ {
+ if (!LWLockConditionalAcquire(&lock->lwlocks[i].lock, mode))
+ break;
+ }
+
+ if (i == READ_BIASED_LOCK_STATE_COUNT)
+ return true;
+
+ for (i = i - 1; i >= 0; i--)
+ {
+ LWLockRelease(&lock->lwlocks[i].lock);
+ }
+
+ return false;
+}
+
+void
+ReadBiasedLWLockRelease(ReadBiasedLWLock *lock)
+{
+ PGPROC *proc = MyProc;
+ LWLockMode mode;
+ uint32 lockstate;
+ int i;
+
+ if (proc == NULL)
+ elog(PANIC, "cannot acquire ReadBiasedLWLock without a PGPROC
structure");
+
+ lockstate = pg_atomic_read_u32(&lock->lwlocks[proc->pid %
READ_BIASED_LOCK_STATE_COUNT].lock.state);
+
+ Assert(lockstate & LW_LOCK_MASK != 0);
+ mode = (lockstate & LW_VAL_EXCLUSIVE) ? LW_EXCLUSIVE : LW_SHARED;
+ if (mode == LW_SHARED)
+ {
+ /* Release the shared lock */
+ LWLockRelease(&lock->lwlocks[proc->pid %
READ_BIASED_LOCK_STATE_COUNT].lock);
+ return;
+ }
+ else
+ {
+ /* Release all exclusive locks */
+ for (i = READ_BIASED_LOCK_STATE_COUNT - 1; i >= 0; i--)
+ {
+ LWLockRelease(&lock->lwlocks[i].lock);
+ }
+ }
+}
+
+bool ReadBiasedLWLockHeldByMe(ReadBiasedLWLock *lock)
+{
+ PGPROC *proc = MyProc;
+
+ if (proc == NULL)
+ elog(PANIC, "cannot acquire ReadBiasedLWLock without a PGPROC
structure");
+
+ return LWLockHeldByMe(&lock->lwlocks[proc->pid %
READ_BIASED_LOCK_STATE_COUNT].lock);
+}
+
+bool ReadBiasedLWLockHeldByMeInMode(ReadBiasedLWLock *lock, LWLockMode mode)
+{
+ PGPROC *proc = MyProc;
+
+ if (proc == NULL)
+ elog(PANIC, "cannot acquire ReadBiasedLWLock without a PGPROC
structure");
+
+ return LWLockHeldByMeInMode(&lock->lwlocks[proc->pid %
READ_BIASED_LOCK_STATE_COUNT].lock, mode);
+}
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index e9ef0fbfe32..94c8e7d651a 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -1489,11 +1489,11 @@ ProcSleep(LOCALLOCK *locallock)
* that could happen in any case unless we were to do
kill() with
* the lock held, which is much more undesirable.
*/
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ ReadBiasedLWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
statusFlags =
ProcGlobal->statusFlags[autovac->pgxactoff];
lockmethod_copy = lock->tag.locktag_lockmethodid;
locktag_copy = lock->tag;
- LWLockRelease(ProcArrayLock);
+ ReadBiasedLWLockRelease(ProcArrayLock);
/*
* Only do it if the worker is not working to protect
against Xid
diff --git a/src/backend/utils/activity/wait_event_names.txt
b/src/backend/utils/activity/wait_event_names.txt
index 5d9e04d6823..00db07a027e 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -313,7 +313,7 @@ Section: ClassName - WaitEventLWLock
ShmemIndex "Waiting to find or allocate space in shared memory."
OidGen "Waiting to allocate a new OID."
XidGen "Waiting to allocate a new transaction ID."
-ProcArray "Waiting to access the shared per-process data structures
(typically, to get a snapshot or report a session's transaction ID)."
+# ProcArray "Waiting to access the shared per-process data structures
(typically, to get a snapshot or report a session's transaction ID)."
SInvalRead "Waiting to retrieve messages from the shared catalog
invalidation queue."
SInvalWrite "Waiting to add a message to the shared catalog invalidation
queue."
WALWrite "Waiting for WAL buffers to be written to disk."
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 08a72569ae5..57168d34f61 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -71,7 +71,14 @@ typedef union LWLockPadded
char pad[LWLOCK_PADDED_SIZE];
} LWLockPadded;
+#define READ_BIASED_LOCK_STATE_COUNT 16
+typedef struct ReadBiasedLWLock
+{
+ LWLockPadded lwlocks[READ_BIASED_LOCK_STATE_COUNT];
+} ReadBiasedLWLock;
+
extern PGDLLIMPORT LWLockPadded *MainLWLockArray;
+extern PGDLLIMPORT ReadBiasedLWLock *MainReadBiasedLWLockArray;
/* struct for storing named tranche information */
typedef struct NamedLWLockTranche
@@ -172,6 +179,13 @@ extern int LWLockNewTrancheId(void);
extern void LWLockRegisterTranche(int tranche_id, const char *tranche_name);
extern void LWLockInitialize(LWLock *lock, int tranche_id);
+extern void ReadBiasedLWLockInitialize(ReadBiasedLWLock *lock);
+extern bool ReadBiasedLWLockAcquire(ReadBiasedLWLock *lock, LWLockMode mode);
+extern bool ReadBiasedLWLockConditionalAcquire(ReadBiasedLWLock *lock,
LWLockMode mode);
+extern void ReadBiasedLWLockRelease(ReadBiasedLWLock *lock);
+extern bool ReadBiasedLWLockHeldByMe(ReadBiasedLWLock *lock);
+extern bool ReadBiasedLWLockHeldByMeInMode(ReadBiasedLWLock *lock, LWLockMode
mode);
+
/*
* Every tranche ID less than NUM_INDIVIDUAL_LWLOCKS is reserved; also,
* we reserve additional tranche IDs for builtin tranches not included in
@@ -221,6 +235,7 @@ typedef enum BuiltinTrancheIds
LWTRANCHE_XACT_SLRU,
LWTRANCHE_PARALLEL_VACUUM_DSA,
LWTRANCHE_AIO_URING_COMPLETION,
+ LWTRANCHE_READ_BIASED,
LWTRANCHE_FIRST_USER_DEFINED,
} BuiltinTrancheIds;
diff --git a/src/include/storage/lwlocklist.h b/src/include/storage/lwlocklist.h
index a9681738146..ecdbb7deb41 100644
--- a/src/include/storage/lwlocklist.h
+++ b/src/include/storage/lwlocklist.h
@@ -34,7 +34,7 @@
PG_LWLOCK(1, ShmemIndex)
PG_LWLOCK(2, OidGen)
PG_LWLOCK(3, XidGen)
-PG_LWLOCK(4, ProcArray)
+/* 4 was ProcArray */
PG_LWLOCK(5, SInvalRead)
PG_LWLOCK(6, SInvalWrite)
/* 7 was WALBufMapping */
--
2.43.0