As part of the dynamic background worker mechanism, I introduced a flag set_latch_on_sigusr1. When set, a process sets its own latch any time it receives SIGUSR1. The original motivation for this was that the postmaster sends SIGUSR1 to a process to indicate the death of a background worker, but cannot directly set the process latch since we want to minimize the postmaster's contact surface with the main shared memory segment. The reason I introduced a new flag instead of just *always* setting the latch was because I thought somebody might complain about the cost of setting the latch every time. But now I think that's exactly what we should do: forget the flag, always set the latch. If you get a SIGUSR1, somebody is trying to get your attention. At worst, setting the latch will cause whatever latch-wait loop got interrupted to re-execute without making forward progress. Most of the time, though, you'll do something like CHECK_FOR_INTERRUPTS() in that loop, and you'll probably find that you've got one.
The code we're talking about here is in procsignal_sigusr1_handler. That function can potentially call HandleCatchupInterrupt, HandleNotifyInterrupt, HandleParallelMessageInterrupt, or RecoveryConflictInterrupt. And all of those functions set the process latch. So the only thing we're gaining by having set_latch_on_sigusr1 is the possibility that we might avoid setting the latch in response to either (1) a postmaster signal related to a background worker that we happen not to care about at the moment or (2) a totally spurious SIGUSR1. But neither of those events should happen very often, so what's the point? The cost of setting the latch when we don't need to is typically small, but NOT setting it when we should have done can lead to an indefinite hang. As it happens, the TupleQueueFunnelNext function I recently committed has such a hazard, which I failed to spot during review and testing. If people don't like this, I can instead cause that function to set the flag. But every place that sets the flag has to use a PG_TRY()/PG_CATCH() block to make sure the old value of the flag gets restored. I'm pretty sure that's going to burn more cycles than the flag can ever hope to save, not to mention the risk of bugs due to people forgetting to add necessary volatile qualifiers. We've already got four PG_TRY() blocks in the code to cater to this stupid flag, and if we keep it around I'm sure we'll accumulate at least a few more. Patch attached. Objections? Suggestions? Comments? -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company
diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c index 68c9505..c38d486 100644 --- a/src/backend/postmaster/bgworker.c +++ b/src/backend/postmaster/bgworker.c @@ -954,45 +954,31 @@ WaitForBackgroundWorkerStartup(BackgroundWorkerHandle *handle, pid_t *pidp) { BgwHandleStatus status; int rc; - bool save_set_latch_on_sigusr1; - save_set_latch_on_sigusr1 = set_latch_on_sigusr1; - set_latch_on_sigusr1 = true; - - PG_TRY(); + for (;;) { - for (;;) - { - pid_t pid; + pid_t pid; - CHECK_FOR_INTERRUPTS(); + CHECK_FOR_INTERRUPTS(); - status = GetBackgroundWorkerPid(handle, &pid); - if (status == BGWH_STARTED) - *pidp = pid; - if (status != BGWH_NOT_YET_STARTED) - break; - - rc = WaitLatch(MyLatch, - WL_LATCH_SET | WL_POSTMASTER_DEATH, 0); + status = GetBackgroundWorkerPid(handle, &pid); + if (status == BGWH_STARTED) + *pidp = pid; + if (status != BGWH_NOT_YET_STARTED) + break; - if (rc & WL_POSTMASTER_DEATH) - { - status = BGWH_POSTMASTER_DIED; - break; - } + rc = WaitLatch(MyLatch, + WL_LATCH_SET | WL_POSTMASTER_DEATH, 0); - ResetLatch(MyLatch); + if (rc & WL_POSTMASTER_DEATH) + { + status = BGWH_POSTMASTER_DIED; + break; } + + ResetLatch(MyLatch); } - PG_CATCH(); - { - set_latch_on_sigusr1 = save_set_latch_on_sigusr1; - PG_RE_THROW(); - } - PG_END_TRY(); - set_latch_on_sigusr1 = save_set_latch_on_sigusr1; return status; } @@ -1009,40 +995,26 @@ WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle) { BgwHandleStatus status; int rc; - bool save_set_latch_on_sigusr1; - - save_set_latch_on_sigusr1 = set_latch_on_sigusr1; - set_latch_on_sigusr1 = true; - PG_TRY(); + for (;;) { - for (;;) - { - pid_t pid; + pid_t pid; - CHECK_FOR_INTERRUPTS(); + CHECK_FOR_INTERRUPTS(); - status = GetBackgroundWorkerPid(handle, &pid); - if (status == BGWH_STOPPED) - return status; + status = GetBackgroundWorkerPid(handle, &pid); + if (status == BGWH_STOPPED) + return status; - rc = WaitLatch(&MyProc->procLatch, - WL_LATCH_SET | WL_POSTMASTER_DEATH, 0); + rc = WaitLatch(&MyProc->procLatch, + WL_LATCH_SET | WL_POSTMASTER_DEATH, 0); - if (rc & WL_POSTMASTER_DEATH) - return BGWH_POSTMASTER_DIED; + if (rc & WL_POSTMASTER_DEATH) + return BGWH_POSTMASTER_DIED; - ResetLatch(&MyProc->procLatch); - } - } - PG_CATCH(); - { - set_latch_on_sigusr1 = save_set_latch_on_sigusr1; - PG_RE_THROW(); + ResetLatch(&MyProc->procLatch); } - PG_END_TRY(); - set_latch_on_sigusr1 = save_set_latch_on_sigusr1; return status; } diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index 0abde43..acfb4e9 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -59,14 +59,6 @@ typedef struct */ #define NumProcSignalSlots (MaxBackends + NUM_AUXPROCTYPES) -/* - * If this flag is set, the process latch will be set whenever SIGUSR1 - * is received. This is useful when waiting for a signal from the postmaster. - * Spurious wakeups must be expected. Make sure that the flag is cleared - * in the error path. - */ -bool set_latch_on_sigusr1; - static ProcSignalSlot *ProcSignalSlots = NULL; static volatile ProcSignalSlot *MyProcSignalSlot = NULL; @@ -296,8 +288,7 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); - if (set_latch_on_sigusr1) - SetLatch(MyLatch); + SetLatch(MyLatch); latch_sigusr1_handler(); diff --git a/src/backend/storage/ipc/shm_mq.c b/src/backend/storage/ipc/shm_mq.c index c78f165..80956ce 100644 --- a/src/backend/storage/ipc/shm_mq.c +++ b/src/backend/storage/ipc/shm_mq.c @@ -962,63 +962,49 @@ static bool shm_mq_wait_internal(volatile shm_mq *mq, PGPROC *volatile * ptr, BackgroundWorkerHandle *handle) { - bool save_set_latch_on_sigusr1; bool result = false; - save_set_latch_on_sigusr1 = set_latch_on_sigusr1; - if (handle != NULL) - set_latch_on_sigusr1 = true; - - PG_TRY(); + for (;;) { - for (;;) + BgwHandleStatus status; + pid_t pid; + bool detached; + + /* Acquire the lock just long enough to check the pointer. */ + SpinLockAcquire(&mq->mq_mutex); + detached = mq->mq_detached; + result = (*ptr != NULL); + SpinLockRelease(&mq->mq_mutex); + + /* Fail if detached; else succeed if initialized. */ + if (detached) + { + result = false; + break; + } + if (result) + break; + + if (handle != NULL) { - BgwHandleStatus status; - pid_t pid; - bool detached; - - /* Acquire the lock just long enough to check the pointer. */ - SpinLockAcquire(&mq->mq_mutex); - detached = mq->mq_detached; - result = (*ptr != NULL); - SpinLockRelease(&mq->mq_mutex); - - /* Fail if detached; else succeed if initialized. */ - if (detached) + /* Check for unexpected worker death. */ + status = GetBackgroundWorkerPid(handle, &pid); + if (status != BGWH_STARTED && status != BGWH_NOT_YET_STARTED) { result = false; break; } - if (result) - break; - - if (handle != NULL) - { - /* Check for unexpected worker death. */ - status = GetBackgroundWorkerPid(handle, &pid); - if (status != BGWH_STARTED && status != BGWH_NOT_YET_STARTED) - { - result = false; - break; - } - } + } - /* Wait to be signalled. */ - WaitLatch(MyLatch, WL_LATCH_SET, 0); + /* Wait to be signalled. */ + WaitLatch(MyLatch, WL_LATCH_SET, 0); - /* An interrupt may have occurred while we were waiting. */ - CHECK_FOR_INTERRUPTS(); + /* An interrupt may have occurred while we were waiting. */ + CHECK_FOR_INTERRUPTS(); - /* Reset the latch so we don't spin. */ - ResetLatch(MyLatch); - } - } - PG_CATCH(); - { - set_latch_on_sigusr1 = save_set_latch_on_sigusr1; - PG_RE_THROW(); + /* Reset the latch so we don't spin. */ + ResetLatch(MyLatch); } - PG_END_TRY(); return result; } diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index aee13ae..d30fe35 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -2825,9 +2825,7 @@ RecoveryConflictInterrupt(ProcSignalReason reason) /* * Set the process latch. This function essentially emulates signal * handlers like die() and StatementCancelHandler() and it seems prudent - * to behave similarly as they do. Alternatively all plain backend code - * waiting on that latch, expecting to get interrupted by query cancels et - * al., would also need to set set_latch_on_sigusr1. + * to behave similarly as they do. */ SetLatch(MyLatch); diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index af1a0cd..50ffb13 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -55,6 +55,5 @@ extern int SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId); extern void procsignal_sigusr1_handler(SIGNAL_ARGS); -extern PGDLLIMPORT bool set_latch_on_sigusr1; #endif /* PROCSIGNAL_H */ diff --git a/src/test/modules/test_shm_mq/setup.c b/src/test/modules/test_shm_mq/setup.c index 7f2f5fd..dedd72f 100644 --- a/src/test/modules/test_shm_mq/setup.c +++ b/src/test/modules/test_shm_mq/setup.c @@ -255,51 +255,38 @@ static void wait_for_workers_to_become_ready(worker_state *wstate, volatile test_shm_mq_header *hdr) { - bool save_set_latch_on_sigusr1; bool result = false; - save_set_latch_on_sigusr1 = set_latch_on_sigusr1; - set_latch_on_sigusr1 = true; - - PG_TRY(); + for (;;) { - for (;;) + int workers_ready; + + /* If all the workers are ready, we have succeeded. */ + SpinLockAcquire(&hdr->mutex); + workers_ready = hdr->workers_ready; + SpinLockRelease(&hdr->mutex); + if (workers_ready >= wstate->nworkers) { - int workers_ready; - - /* If all the workers are ready, we have succeeded. */ - SpinLockAcquire(&hdr->mutex); - workers_ready = hdr->workers_ready; - SpinLockRelease(&hdr->mutex); - if (workers_ready >= wstate->nworkers) - { - result = true; - break; - } - - /* If any workers (or the postmaster) have died, we have failed. */ - if (!check_worker_status(wstate)) - { - result = false; - break; - } - - /* Wait to be signalled. */ - WaitLatch(MyLatch, WL_LATCH_SET, 0); - - /* An interrupt may have occurred while we were waiting. */ - CHECK_FOR_INTERRUPTS(); - - /* Reset the latch so we don't spin. */ - ResetLatch(MyLatch); + result = true; + break; } + + /* If any workers (or the postmaster) have died, we have failed. */ + if (!check_worker_status(wstate)) + { + result = false; + break; + } + + /* Wait to be signalled. */ + WaitLatch(MyLatch, WL_LATCH_SET, 0); + + /* An interrupt may have occurred while we were waiting. */ + CHECK_FOR_INTERRUPTS(); + + /* Reset the latch so we don't spin. */ + ResetLatch(MyLatch); } - PG_CATCH(); - { - set_latch_on_sigusr1 = save_set_latch_on_sigusr1; - PG_RE_THROW(); - } - PG_END_TRY(); if (!result) ereport(ERROR,
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers