Patch to reduce the contention on SInvalLock, as discussed here:
http://archives.postgresql.org/pgsql-hackers/2007-09/msg00501.php
and
http://archives.postgresql.org/pgsql-performance/2008-01/msg00023.php
For discussion.
--
Simon Riggs
2ndQuadrant http://www.2ndQuadrant.com
Index: src/backend/storage/ipc/sinval.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/storage/ipc/sinval.c,v
retrieving revision 1.83
diff -c -r1.83 sinval.c
*** src/backend/storage/ipc/sinval.c 1 Jan 2008 19:45:51 -0000 1.83
--- src/backend/storage/ipc/sinval.c 25 Jan 2008 23:19:24 -0000
***************
*** 113,119 ****
{
SharedInvalidationMessage data;
int getResult;
! bool gotMessage = false;
for (;;)
{
--- 113,120 ----
{
SharedInvalidationMessage data;
int getResult;
! bool delMessages = false;
! int numMsgs = 0;
for (;;)
{
***************
*** 137,145 ****
getResult = SIGetDataEntry(shmInvalBuffer, MyBackendId, &data);
LWLockRelease(SInvalLock);
! if (getResult == 0)
break; /* nothing more to do */
! if (getResult < 0)
{
/* got a reset message */
elog(DEBUG4, "cache state reset");
--- 138,147 ----
getResult = SIGetDataEntry(shmInvalBuffer, MyBackendId, &data);
LWLockRelease(SInvalLock);
! if (getResult == SI_GET_NO_MSG)
break; /* nothing more to do */
!
! if (getResult == SI_GET_NEED_RESET)
{
/* got a reset message */
elog(DEBUG4, "cache state reset");
***************
*** 147,160 ****
}
else
{
/* got a normal data message */
invalFunction(&data);
}
! gotMessage = true;
}
! /* If we got any messages, try to release dead messages */
! if (gotMessage)
{
LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
SIDelExpiredDataEntries(shmInvalBuffer);
--- 149,183 ----
}
else
{
+ Assert(getResult == SI_GET_MSG && getResult == SI_GET_MIN_MSG);
+
/* got a normal data message */
invalFunction(&data);
+
+ /*
+ * If getResult is positive, then it is the value of the
+ * lowest message number. In that case it's a fair bet that we
+ * will finish last reading our messages, so by the time we do
+ * that we should be able to grab ExclusiveLock and delete
+ * messages fairly easily. Doing this might mean the lowest
+ * numbered backends all go away before deleting the queue
+ * so it could lead to queue growth, so we delete the
+ * queue at insert time also, when queue is long.
+ */
+ if (getResult == SI_GET_MIN_MSG)
+ delMessages = true;
}
! numMsgs++;
}
! /*
! * If we were the lowest message number and we've just cleared out
! * a long queue, assume it was because of a pm signal. So attempt
! * to delete the queue. Don't worry about this normally, just let
! * the queue grow until it gets pruned during SendSharedInvalidMessage()
! * so the invalidator pays the cost for dirtying the cache.
! */
! if (delMessages && (numMsgs > (MAXNUMMESSAGES / 4)))
{
LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
SIDelExpiredDataEntries(shmInvalBuffer);
Index: src/backend/storage/ipc/sinvaladt.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/storage/ipc/sinvaladt.c,v
retrieving revision 1.66
diff -c -r1.66 sinvaladt.c
*** src/backend/storage/ipc/sinvaladt.c 1 Jan 2008 19:45:51 -0000 1.66
--- src/backend/storage/ipc/sinvaladt.c 25 Jan 2008 23:18:07 -0000
***************
*** 78,83 ****
--- 78,85 ----
segP->maxBackends = MaxBackends;
segP->freeBackends = MaxBackends;
+ segP->allow_pm_signals = true;
+
/* The buffer[] array is initially all unused, so we need not fill it */
/* Mark all backends inactive, and initialize nextLXID */
***************
*** 233,238 ****
--- 235,256 ----
}
/*
+ * Try to prevent table overflow. When the table is >50% full
+ * issue a delete every ~6% of messages insertions. We do this
+ * to avoid hitting the next higher limit where we wake everybody.
+ */
+ if ((numMsgs > (MAXNUMMESSAGES / 2)) && (segP->maxMsgNum % (MAXNUMMESSAGES / 16) == 0))
+ SIDelExpiredDataEntries(segP);
+
+ /*
+ * Re-arm the ability to send pm signals, if queue length has
+ * now reduced since the last time we sent a signal
+ */
+ if (!segP->allow_pm_signals &&
+ numMsgs < (MAXNUMMESSAGES * 30 / 100))
+ segP->allow_pm_signals = true;
+
+ /*
* Try to prevent table overflow. When the table is 70% full send a
* WAKEN_CHILDREN request to the postmaster. The postmaster will send a
* SIGUSR1 signal to all the backends, which will cause sinval.c to read
***************
*** 242,252 ****
* queries, but if a backend is sitting idle then it won't be starting
* transactions and so won't be reading SI entries.
*/
! if (numMsgs == (MAXNUMMESSAGES * 70 / 100) &&
IsUnderPostmaster)
{
elog(DEBUG4, "SI table is 70%% full, signaling postmaster");
SendPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN);
}
/*
--- 260,285 ----
* queries, but if a backend is sitting idle then it won't be starting
* transactions and so won't be reading SI entries.
*/
! if (segP->allow_pm_signals &&
! numMsgs == (MAXNUMMESSAGES * 70 / 100) &&
IsUnderPostmaster)
{
elog(DEBUG4, "SI table is 70%% full, signaling postmaster");
SendPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN);
+
+ /*
+ * Disarm the ability to send pm signals until we
+ * have reduced the queue size sufficiently to re-arm it.
+ * One signal is sufficient to reduce the queue length to
+ * a minimum, though that will take some time.
+ *
+ * While that is happening, we want to avoid re-triggering
+ * the pm signal, to reduce contention as much as possible.
+ * Otherwise a steady flow of incoming messages may cause
+ * the queue to fluctuate around the 70th percentile,
+ * causing repeated retriggering of pm signals.
+ */
+ segP->allow_pm_signals = false;
}
/*
***************
*** 272,277 ****
--- 305,312 ----
segP->minMsgNum = 0;
segP->maxMsgNum = 0;
+ segP->allow_pm_signals = true;
+
for (i = 0; i < segP->lastBackend; i++)
{
if (segP->procState[i].nextMsgNum >= 0) /* active backend? */
***************
*** 287,296 ****
* get next SI message for specified backend, if there is one
*
* Possible return values:
! * 0: no SI message available
! * 1: next SI message has been extracted into *data
! * (there may be more messages available after this one!)
! * -1: SI reset message extracted
*
* NB: this can run in parallel with other instances of SIGetDataEntry
* executing on behalf of other backends. See comments in sinval.c in
--- 322,333 ----
* get next SI message for specified backend, if there is one
*
* Possible return values:
! * SI_GET_NO_MSG: no SI message available
! * SI_GET_MSG: next SI message has been extracted into *data
! * there may be more messages available after this one!
! * SI_GET_MIN_MSG: next SI message extracted, but we are the ones
! * holding up the queue (possibly multiple backends)
! * SI_GET_NEED_RESET: SI reset message extracted
*
* NB: this can run in parallel with other instances of SIGetDataEntry
* executing on behalf of other backends. See comments in sinval.c in
***************
*** 301,306 ****
--- 338,344 ----
SharedInvalidationMessage *data)
{
ProcState *stateP = &segP->procState[backendId - 1];
+ bool min_message = false;
if (stateP->resetState)
{
***************
*** 310,320 ****
*/
stateP->resetState = false;
stateP->nextMsgNum = segP->maxMsgNum;
! return -1;
}
if (stateP->nextMsgNum >= segP->maxMsgNum)
! return 0; /* nothing to read */
/*
* Retrieve message and advance my counter.
--- 348,361 ----
*/
stateP->resetState = false;
stateP->nextMsgNum = segP->maxMsgNum;
! return SI_GET_NEED_RESET;
}
if (stateP->nextMsgNum >= segP->maxMsgNum)
! return SI_GET_NO_MSG; /* nothing to read */
!
! if (stateP->nextMsgNum == segP->minMsgNum)
! min_message = true;
/*
* Retrieve message and advance my counter.
***************
*** 327,333 ****
* delete it here. SIDelExpiredDataEntries() should be called to remove
* dead messages.
*/
! return 1; /* got a message */
}
/*
--- 368,374 ----
* delete it here. SIDelExpiredDataEntries() should be called to remove
* dead messages.
*/
! return (min_message ? SI_GET_MIN_MSG : SI_GET_MSG); /* got a message */
}
/*
Index: src/include/storage/sinvaladt.h
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/storage/sinvaladt.h,v
retrieving revision 1.45
diff -c -r1.45 sinvaladt.h
*** src/include/storage/sinvaladt.h 1 Jan 2008 19:45:59 -0000 1.45
--- src/include/storage/sinvaladt.h 25 Jan 2008 23:22:00 -0000
***************
*** 85,90 ****
--- 85,92 ----
int maxBackends; /* size of procState array */
int freeBackends; /* number of empty procState slots */
+ bool allow_pm_signals;
+
/*
* Next LocalTransactionId to use for each idle backend slot. We keep
* this here because it is indexed by BackendId and it is convenient to
***************
*** 119,124 ****
--- 121,134 ----
extern bool SIInsertDataEntry(SISeg *segP, SharedInvalidationMessage *data);
extern int SIGetDataEntry(SISeg *segP, int backendId,
SharedInvalidationMessage *data);
+
+ /* Return values from SIGetDataEntry() */
+ #define SI_GET_NO_MSG 0
+ #define SI_GET_MSG 1
+ #define SI_GET_MIN_MSG 2
+ #define SI_GET_NEED_RESET 3
+
+
extern void SIDelExpiredDataEntries(SISeg *segP);
extern LocalTransactionId GetNextLocalTransactionId(void);
---------------------------(end of broadcast)---------------------------
TIP 6: explain analyze is your friend