On 4 January 2012 07:24, Heikki Linnakangas <heikki.linnakan...@enterprisedb.com> wrote: > I think SetBufferCommitInfoNeedsSave() needs the same treatment as > MarkBufferDirty(). And it would probably be good to only set the latch if > the buffer wasn't dirty already. Setting a latch that's already set is fast, > but surely it's even faster to not even try.
That seems reasonable. Revised patch is attached. > Yeah, I'd like to see a micro-benchmark of a worst-case scenario. I'm a bit > worried about the impact on systems with a lot of CPUs. If you have a lot of > CPUs writing to the same cache line that contains the latch's flag, that > might get expensive. Also reasonable, but I don't think that I'll get around to it until after the final commitfest deadline. -- Peter Geoghegan http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training and Services
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c new file mode 100644 index e3ae92d..0fcaf01 *** a/src/backend/bootstrap/bootstrap.c --- b/src/backend/bootstrap/bootstrap.c *************** AuxiliaryProcessMain(int argc, char *arg *** 416,421 **** --- 416,422 ---- case BgWriterProcess: /* don't set signals, bgwriter has its own agenda */ + ProcGlobal->bgwriterLatch = &MyProc->procLatch; /* Expose */ BackgroundWriterMain(); proc_exit(1); /* should never return */ diff --git a/src/backend/port/unix_latch.c b/src/backend/port/unix_latch.c new file mode 100644 index fc1a579..a5248ba *** a/src/backend/port/unix_latch.c --- b/src/backend/port/unix_latch.c *************** *** 50,55 **** --- 50,56 ---- #include "miscadmin.h" #include "postmaster/postmaster.h" #include "storage/latch.h" + #include "storage/proc.h" #include "storage/shmem.h" /* Are we currently in WaitLatch? The signal handler would like to know. */ diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c new file mode 100644 index 1f8d2d6..59e5180 *** a/src/backend/postmaster/bgwriter.c --- b/src/backend/postmaster/bgwriter.c *************** *** 51,56 **** --- 51,58 ---- #include "storage/ipc.h" #include "storage/lwlock.h" #include "storage/pmsignal.h" + #include "storage/proc.h" + #include "storage/procsignal.h" #include "storage/shmem.h" #include "storage/smgr.h" #include "storage/spin.h" *************** static volatile sig_atomic_t shutdown_re *** 73,78 **** --- 75,82 ---- /* * Private state */ + #define BGWRITER_HIBERNATE_MS 10000 + static bool am_bg_writer = false; /* Prototypes for private functions */ *************** BackgroundWriterMain(void) *** 123,129 **** pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); ! pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */ pqsignal(SIGUSR2, SIG_IGN); /* --- 127,133 ---- pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); ! pqsignal(SIGUSR1, procsignal_sigusr1_handler); pqsignal(SIGUSR2, SIG_IGN); /* *************** BackgroundWriterMain(void) *** 238,278 **** for (;;) { /* ! * Emergency bailout if postmaster has died. This is to avoid the ! * necessity for manual cleanup of all postmaster children. */ ! if (!PostmasterIsAlive()) ! exit(1); ! ! if (got_SIGHUP) { ! got_SIGHUP = false; ! ProcessConfigFile(PGC_SIGHUP); ! /* update global shmem state for sync rep */ } ! if (shutdown_requested) { /* ! * From here on, elog(ERROR) should end with exit(1), not send ! * control back to the sigsetjmp block above */ ! ExitOnAnyError = true; ! /* Normal exit from the bgwriter is here */ ! proc_exit(0); /* done */ ! } ! /* ! * Do one cycle of dirty-buffer writing. ! */ ! BgBufferSync(); ! /* Nap for the configured time. */ ! BgWriterNap(); } } /* * BgWriterNap -- Nap for the configured time or until a signal is received. */ static void BgWriterNap(void) --- 242,337 ---- for (;;) { /* ! * Do one cycle of dirty-buffer writing, potentially hibernating if ! * there have been no buffers to write. */ ! if (!BgBufferSync()) { ! /* Clock sweep was not "lapped" - nap for the configured time. */ ! BgWriterNap(); } ! else { + int res = 0; /* ! * Initiate hibernation by "arming" the process latch. This usage ! * differs from the standard pattern for latches outlined in ! * latch.h, where the latch is generally reset immediately after ! * WaitLatch returns, in the next iteration of an infinite loop. ! * ! * It should only be possible to *really* set the latch from client ! * backends while the bgwriter is idle, and not during productive ! * cycles where buffers are written, or shortly thereafter. It's ! * important that the SetLatch() call within the buffer manager ! * usually inexpensively returns having found that the latch is ! * already set. For write-heavy workloads, it's quite possible that ! * those calls will never actually get the opportunity to really ! * set the latch. ! * ! * By only giving backends the opportunity to really set the ! * latch at this point via ResetLatch(), we avoid the overhead of ! * sending a signal perhaps as much as once per bgwriter cycle, or ! * maybe even more frequently than that due to the asynchronous ! * nature of signals. */ ! ResetLatch(&MyProc->procLatch); ! /* ! * Another nap and sync, in case some buffers were dirtied since ! * the immediately prior BgBufferSync() call and we quite ! * naturally didn't hear about it because the SetLatch calls ! * found the latch already set (i.e. it was "disarmed"). ! */ ! BgWriterNap(); ! /* ! * When syncing, take the opportunity to check one last time if ! * we're still "lapping" the clock sweep, and only proceed if we are. ! */ ! if (BgBufferSync()) ! /* ! * Set a timeout so that we can still update BufferAlloc stats ! * reasonably promptly. ! */ ! res = WaitLatch(&MyProc->procLatch, ! WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, ! BGWRITER_HIBERNATE_MS); ! else ! /* ! * Changed our mind, set own proc latch, thereby often avoiding ! * signaling from a backend. ! */ ! SetLatch(&MyProc->procLatch); ! ! /* ! * Ensure that no less than BgWriterDelay ms has passed between ! * BgBufferSyncs - WaitLatch() might have returned instantaneously. ! * ! * Napping here rather than immediately prior to the WaitLatch() ! * call ensures that a sudden burst of dirtied buffers won't result ! * in the initial background writer cycle ineffectually cleaning ! * only a few pages dirtied earlier in the burst, rather than ! * cleaning a number that is somewhere closer to optimal for the ! * cycle, as modeled by the background writing strategy. We'll also ! * want to nap if we changed our minds, before starting a new cycle. ! * ! * We wake on a buffer being dirtied. It's possible that some ! * useful work will become available for the bgwriter to do without ! * a buffer actually being dirtied, as when a dirty buffer's usage ! * count is decremented to zero or it's unpinned. This corner case ! * is judged as too marginal to justify adding additional SetLatch() ! * calls in very hot code paths, cheap though those calls may be. ! */ ! if (!(res & WL_TIMEOUT)) ! BgWriterNap(); ! } } } /* * BgWriterNap -- Nap for the configured time or until a signal is received. + * + * Respond to a signal handler having set a flag for us. */ static void BgWriterNap(void) *************** BgWriterNap(void) *** 285,302 **** pgstat_send_bgwriter(); /* ! * Nap for the configured time, or sleep for 10 seconds if there is no ! * bgwriter activity configured. * * On some platforms, signals won't interrupt the sleep. To ensure we * respond reasonably promptly when someone signals us, break down the * sleep into 1-second increments, and check for interrupts after each * nap. */ ! if (bgwriter_lru_maxpages > 0) ! udelay = BgWriterDelay * 1000L; ! else ! udelay = 10000000L; /* Ten seconds */ while (udelay > 999999L) { --- 344,381 ---- pgstat_send_bgwriter(); /* ! * Emergency bailout if postmaster has died. This is to avoid the ! * necessity for manual cleanup of all postmaster children. ! */ ! if (!PostmasterIsAlive()) ! exit(1); ! ! if (got_SIGHUP) ! { ! got_SIGHUP = false; ! ProcessConfigFile(PGC_SIGHUP); ! /* update global shmem state for sync rep */ ! } ! if (shutdown_requested) ! { ! /* ! * From here on, elog(ERROR) should end with exit(1), not send ! * control back to the sigsetjmp block above ! */ ! ExitOnAnyError = true; ! /* Normal exit from the bgwriter is here */ ! proc_exit(0); /* done */ ! } ! ! /* ! * Nap for the configured time. * * On some platforms, signals won't interrupt the sleep. To ensure we * respond reasonably promptly when someone signals us, break down the * sleep into 1-second increments, and check for interrupts after each * nap. */ ! udelay = BgWriterDelay * 1000L; while (udelay > 999999L) { *************** bg_quickdie(SIGNAL_ARGS) *** 351,362 **** --- 430,453 ---- static void BgSigHupHandler(SIGNAL_ARGS) { + int save_errno = errno; + got_SIGHUP = true; + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; } /* SIGTERM: set flag to shutdown and exit */ static void ReqShutdownHandler(SIGNAL_ARGS) { + int save_errno = errno; + shutdown_requested = true; + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c new file mode 100644 index 91cc001..15ddf83 *** a/src/backend/storage/buffer/bufmgr.c --- b/src/backend/storage/buffer/bufmgr.c *************** MarkBufferDirty(Buffer buffer) *** 981,986 **** --- 981,989 ---- VacuumPageDirty++; if (VacuumCostActive) VacuumCostBalance += VacuumCostPageDirty; + /* The bgwriter may need to be woken. */ + if (ProcGlobal->bgwriterLatch) + SetLatch(ProcGlobal->bgwriterLatch); } bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); *************** BufferSync(int flags) *** 1307,1314 **** * BgBufferSync -- Write out some dirty buffers in the pool. * * This is called periodically by the background writer process. */ ! void BgBufferSync(void) { /* info obtained from freelist.c */ --- 1310,1321 ---- * BgBufferSync -- Write out some dirty buffers in the pool. * * This is called periodically by the background writer process. + * + * Returns a value indicating if the clocksweep has been "lapped", or if the + * bgwriter has been effectively disabled due to finding bgwriter_lru_maxpages + * at 0. */ ! bool BgBufferSync(void) { /* info obtained from freelist.c */ *************** BgBufferSync(void) *** 1365,1371 **** if (bgwriter_lru_maxpages <= 0) { saved_info_valid = false; ! return; } /* --- 1372,1378 ---- if (bgwriter_lru_maxpages <= 0) { saved_info_valid = false; ! return true; } /* *************** BgBufferSync(void) *** 1584,1589 **** --- 1591,1598 ---- recent_alloc, strategy_delta, scans_per_alloc, smoothed_density); #endif } + + return bufs_to_lap == 0; } /* *************** SetBufferCommitInfoNeedsSave(Buffer buff *** 2348,2353 **** --- 2357,2365 ---- VacuumPageDirty++; if (VacuumCostActive) VacuumCostBalance += VacuumCostPageDirty; + /* The bgwriter may need to be woken. */ + if (ProcGlobal->bgwriterLatch) + SetLatch(ProcGlobal->bgwriterLatch); } bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); UnlockBufHdr(bufHdr); diff --git a/src/include/postmaster/bgwriter.h b/src/include/postmaster/bgwriter.h new file mode 100644 index 6cc4b62..970faa2 *** a/src/include/postmaster/bgwriter.h --- b/src/include/postmaster/bgwriter.h *************** *** 13,18 **** --- 13,19 ---- #define _BGWRITER_H #include "storage/block.h" + #include "storage/latch.h" #include "storage/relfilenode.h" diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h new file mode 100644 index a03c068..de1bbd0 *** a/src/include/storage/bufmgr.h --- b/src/include/storage/bufmgr.h *************** extern bool HoldingBufferPinThatDelaysRe *** 213,219 **** extern void AbortBufferIO(void); extern void BufmgrCommit(void); ! extern void BgBufferSync(void); extern void AtProcExit_LocalBuffers(void); --- 213,219 ---- extern void AbortBufferIO(void); extern void BufmgrCommit(void); ! extern bool BgBufferSync(void); extern void AtProcExit_LocalBuffers(void); diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h new file mode 100644 index 358d1a4..4520f27 *** a/src/include/storage/proc.h --- b/src/include/storage/proc.h *************** typedef struct PROC_HDR *** 188,193 **** --- 188,195 ---- PGPROC *freeProcs; /* Head of list of autovacuum's free PGPROC structures */ PGPROC *autovacFreeProcs; + /* BGWriter process latch */ + Latch *bgwriterLatch; /* Current shared estimate of appropriate spins_per_delay value */ int spins_per_delay; /* The proc of the Startup process, since not in ProcArray */
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers