On 4 January 2012 07:24, Heikki Linnakangas
<heikki.linnakan...@enterprisedb.com> wrote:
> I think SetBufferCommitInfoNeedsSave() needs the same treatment as
> MarkBufferDirty(). And it would probably be good to only set the latch if
> the buffer wasn't dirty already. Setting a latch that's already set is fast,
> but surely it's even faster to not even try.

That seems reasonable. Revised patch is attached.

> Yeah, I'd like to see a micro-benchmark of a worst-case scenario. I'm a bit
> worried about the impact on systems with a lot of CPUs. If you have a lot of
> CPUs writing to the same cache line that contains the latch's flag, that
> might get expensive.

Also reasonable, but I don't think that I'll get around to it until
after the final commitfest deadline.

-- 
Peter Geoghegan       http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training and Services
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
new file mode 100644
index e3ae92d..0fcaf01
*** a/src/backend/bootstrap/bootstrap.c
--- b/src/backend/bootstrap/bootstrap.c
*************** AuxiliaryProcessMain(int argc, char *arg
*** 416,421 ****
--- 416,422 ----
  
  		case BgWriterProcess:
  			/* don't set signals, bgwriter has its own agenda */
+ 			ProcGlobal->bgwriterLatch = &MyProc->procLatch; /* Expose */
  			BackgroundWriterMain();
  			proc_exit(1);		/* should never return */
  
diff --git a/src/backend/port/unix_latch.c b/src/backend/port/unix_latch.c
new file mode 100644
index fc1a579..a5248ba
*** a/src/backend/port/unix_latch.c
--- b/src/backend/port/unix_latch.c
***************
*** 50,55 ****
--- 50,56 ----
  #include "miscadmin.h"
  #include "postmaster/postmaster.h"
  #include "storage/latch.h"
+ #include "storage/proc.h"
  #include "storage/shmem.h"
  
  /* Are we currently in WaitLatch? The signal handler would like to know. */
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
new file mode 100644
index 1f8d2d6..59e5180
*** a/src/backend/postmaster/bgwriter.c
--- b/src/backend/postmaster/bgwriter.c
***************
*** 51,56 ****
--- 51,58 ----
  #include "storage/ipc.h"
  #include "storage/lwlock.h"
  #include "storage/pmsignal.h"
+ #include "storage/proc.h"
+ #include "storage/procsignal.h"
  #include "storage/shmem.h"
  #include "storage/smgr.h"
  #include "storage/spin.h"
*************** static volatile sig_atomic_t shutdown_re
*** 73,78 ****
--- 75,82 ----
  /*
   * Private state
   */
+ #define BGWRITER_HIBERNATE_MS			10000
+ 
  static bool am_bg_writer = false;
  
  /* Prototypes for private functions */
*************** BackgroundWriterMain(void)
*** 123,129 ****
  	pqsignal(SIGQUIT, bg_quickdie);		/* hard crash time */
  	pqsignal(SIGALRM, SIG_IGN);
  	pqsignal(SIGPIPE, SIG_IGN);
! 	pqsignal(SIGUSR1, SIG_IGN);			/* reserve for ProcSignal */
  	pqsignal(SIGUSR2, SIG_IGN);
  
  	/*
--- 127,133 ----
  	pqsignal(SIGQUIT, bg_quickdie);		/* hard crash time */
  	pqsignal(SIGALRM, SIG_IGN);
  	pqsignal(SIGPIPE, SIG_IGN);
! 	pqsignal(SIGUSR1, procsignal_sigusr1_handler);
  	pqsignal(SIGUSR2, SIG_IGN);
  
  	/*
*************** BackgroundWriterMain(void)
*** 238,278 ****
  	for (;;)
  	{
  		/*
! 		 * Emergency bailout if postmaster has died.  This is to avoid the
! 		 * necessity for manual cleanup of all postmaster children.
  		 */
! 		if (!PostmasterIsAlive())
! 			exit(1);
! 
! 		if (got_SIGHUP)
  		{
! 			got_SIGHUP = false;
! 			ProcessConfigFile(PGC_SIGHUP);
! 			/* update global shmem state for sync rep */
  		}
! 		if (shutdown_requested)
  		{
  			/*
! 			 * From here on, elog(ERROR) should end with exit(1), not send
! 			 * control back to the sigsetjmp block above
  			 */
! 			ExitOnAnyError = true;
! 			/* Normal exit from the bgwriter is here */
! 			proc_exit(0);		/* done */
! 		}
  
! 		/*
! 		 * Do one cycle of dirty-buffer writing.
! 		 */
! 		BgBufferSync();
  
! 		/* Nap for the configured time. */
! 		BgWriterNap();
  	}
  }
  
  /*
   * BgWriterNap -- Nap for the configured time or until a signal is received.
   */
  static void
  BgWriterNap(void)
--- 242,337 ----
  	for (;;)
  	{
  		/*
! 		 * Do one cycle of dirty-buffer writing, potentially hibernating if
! 		 * there have been no buffers to write.
  		 */
! 		if (!BgBufferSync())
  		{
! 			/* Clock sweep was not "lapped" - nap for the configured time. */
! 			BgWriterNap();
  		}
! 		else
  		{
+ 			int res = 0;
  			/*
! 			 * Initiate hibernation by "arming" the process latch. This usage
! 			 * differs from the standard pattern for latches outlined in
! 			 * latch.h, where the latch is generally reset immediately after
! 			 * WaitLatch returns, in the next iteration of an infinite loop.
! 			 *
! 			 * It should only be possible to *really* set the latch from client
! 			 * backends while the bgwriter is idle, and not during productive
! 			 * cycles where buffers are written, or shortly thereafter. It's
! 			 * important that the SetLatch() call within the buffer manager
! 			 * usually inexpensively returns having found that the latch is
! 			 * already set. For write-heavy workloads, it's quite possible that
! 			 * those calls will never actually get the opportunity to really
! 			 * set the latch.
! 			 *
! 			 * By only giving backends the opportunity to really set the
! 			 * latch at this point via ResetLatch(), we avoid the overhead of
! 			 * sending a signal perhaps as much as once per bgwriter cycle, or
! 			 * maybe even more frequently than that due to the asynchronous
! 			 * nature of signals.
  			 */
! 			ResetLatch(&MyProc->procLatch);
  
! 			/*
! 			 * Another nap and sync, in case some buffers were dirtied since
! 			 * the immediately prior BgBufferSync() call and we quite
! 			 * naturally didn't hear about it because the SetLatch calls
! 			 * found the latch already set (i.e. it was "disarmed").
! 			 */
! 			BgWriterNap();
  
! 			/*
! 			 * When syncing, take the opportunity to check one last time if
! 			 * we're still "lapping" the clock sweep, and only proceed if we are.
! 			 */
! 			if (BgBufferSync())
! 				/*
! 				 * Set a timeout so that we can still update BufferAlloc stats
! 				 * reasonably promptly.
! 				 */
! 				res = WaitLatch(&MyProc->procLatch,
! 						WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
! 						BGWRITER_HIBERNATE_MS);
! 			else
! 				/*
! 				 * Changed our mind, set own proc latch, thereby often avoiding
! 				 * signaling from a backend.
! 				 */
! 				SetLatch(&MyProc->procLatch);
! 
! 			/*
! 			 * Ensure that no less than BgWriterDelay ms has passed between
! 			 * BgBufferSyncs - WaitLatch() might have returned instantaneously.
! 			 *
! 			 * Napping here rather than immediately prior to the WaitLatch()
! 			 * call ensures that a sudden burst of dirtied buffers won't result
! 			 * in the initial background writer cycle ineffectually cleaning
! 			 * only a few pages dirtied earlier in the burst, rather than
! 			 * cleaning a number that is somewhere closer to optimal for the
! 			 * cycle, as modeled by the background writing strategy. We'll also
! 			 * want to nap if we changed our minds, before starting a new cycle.
! 			 *
! 			 * We wake on a buffer being dirtied. It's possible that some
! 			 * useful work will become available for the bgwriter to do without
! 			 * a buffer actually being dirtied, as when a dirty buffer's usage
! 			 * count is decremented to zero or it's unpinned. This corner case
! 			 * is judged as too marginal to justify adding additional SetLatch()
! 			 * calls in very hot code paths, cheap though those calls may be.
! 			 */
! 			if (!(res & WL_TIMEOUT))
! 				BgWriterNap();
! 		}
  	}
  }
  
  /*
   * BgWriterNap -- Nap for the configured time or until a signal is received.
+  *
+  * Respond to a signal handler having set a flag for us.
   */
  static void
  BgWriterNap(void)
*************** BgWriterNap(void)
*** 285,302 ****
  	pgstat_send_bgwriter();
  
  	/*
! 	 * Nap for the configured time, or sleep for 10 seconds if there is no
! 	 * bgwriter activity configured.
  	 *
  	 * On some platforms, signals won't interrupt the sleep.  To ensure we
  	 * respond reasonably promptly when someone signals us, break down the
  	 * sleep into 1-second increments, and check for interrupts after each
  	 * nap.
  	 */
! 	if (bgwriter_lru_maxpages > 0)
! 		udelay = BgWriterDelay * 1000L;
! 	else
! 		udelay = 10000000L;		/* Ten seconds */
  
  	while (udelay > 999999L)
  	{
--- 344,381 ----
  	pgstat_send_bgwriter();
  
  	/*
! 	 * Emergency bailout if postmaster has died.  This is to avoid the
! 	 * necessity for manual cleanup of all postmaster children.
! 	 */
! 	if (!PostmasterIsAlive())
! 		exit(1);
! 
! 	if (got_SIGHUP)
! 	{
! 		got_SIGHUP = false;
! 		ProcessConfigFile(PGC_SIGHUP);
! 		/* update global shmem state for sync rep */
! 	}
! 	if (shutdown_requested)
! 	{
! 		/*
! 		 * From here on, elog(ERROR) should end with exit(1), not send
! 		 * control back to the sigsetjmp block above
! 		 */
! 		ExitOnAnyError = true;
! 		/* Normal exit from the bgwriter is here */
! 		proc_exit(0);		/* done */
! 	}
! 
! 	/*
! 	 * Nap for the configured time.
  	 *
  	 * On some platforms, signals won't interrupt the sleep.  To ensure we
  	 * respond reasonably promptly when someone signals us, break down the
  	 * sleep into 1-second increments, and check for interrupts after each
  	 * nap.
  	 */
! 	udelay = BgWriterDelay * 1000L;
  
  	while (udelay > 999999L)
  	{
*************** bg_quickdie(SIGNAL_ARGS)
*** 351,362 ****
--- 430,453 ----
  static void
  BgSigHupHandler(SIGNAL_ARGS)
  {
+ 	int save_errno = errno;
+ 
  	got_SIGHUP = true;
+ 	if (MyProc)
+ 		SetLatch(&MyProc->procLatch);
+ 
+ 	errno = save_errno;
  }
  
  /* SIGTERM: set flag to shutdown and exit */
  static void
  ReqShutdownHandler(SIGNAL_ARGS)
  {
+ 	int save_errno = errno;
+ 
  	shutdown_requested = true;
+ 	if (MyProc)
+ 		SetLatch(&MyProc->procLatch);
+ 
+ 	errno = save_errno;
  }
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
new file mode 100644
index 91cc001..15ddf83
*** a/src/backend/storage/buffer/bufmgr.c
--- b/src/backend/storage/buffer/bufmgr.c
*************** MarkBufferDirty(Buffer buffer)
*** 981,986 ****
--- 981,989 ----
  		VacuumPageDirty++;
  		if (VacuumCostActive)
  			VacuumCostBalance += VacuumCostPageDirty;
+ 		/* The bgwriter may need to be woken. */
+ 		if (ProcGlobal->bgwriterLatch)
+ 			SetLatch(ProcGlobal->bgwriterLatch);
  	}
  
  	bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
*************** BufferSync(int flags)
*** 1307,1314 ****
   * BgBufferSync -- Write out some dirty buffers in the pool.
   *
   * This is called periodically by the background writer process.
   */
! void
  BgBufferSync(void)
  {
  	/* info obtained from freelist.c */
--- 1310,1321 ----
   * BgBufferSync -- Write out some dirty buffers in the pool.
   *
   * This is called periodically by the background writer process.
+  *
+  * Returns a value indicating if the clocksweep has been "lapped", or if the
+  * bgwriter has been effectively disabled due to finding bgwriter_lru_maxpages
+  * at 0.
   */
! bool
  BgBufferSync(void)
  {
  	/* info obtained from freelist.c */
*************** BgBufferSync(void)
*** 1365,1371 ****
  	if (bgwriter_lru_maxpages <= 0)
  	{
  		saved_info_valid = false;
! 		return;
  	}
  
  	/*
--- 1372,1378 ----
  	if (bgwriter_lru_maxpages <= 0)
  	{
  		saved_info_valid = false;
! 		return true;
  	}
  
  	/*
*************** BgBufferSync(void)
*** 1584,1589 ****
--- 1591,1598 ----
  			 recent_alloc, strategy_delta, scans_per_alloc, smoothed_density);
  #endif
  	}
+ 
+ 	return bufs_to_lap == 0;
  }
  
  /*
*************** SetBufferCommitInfoNeedsSave(Buffer buff
*** 2348,2353 ****
--- 2357,2365 ----
  			VacuumPageDirty++;
  			if (VacuumCostActive)
  				VacuumCostBalance += VacuumCostPageDirty;
+ 			/* The bgwriter may need to be woken. */
+ 			if (ProcGlobal->bgwriterLatch)
+ 				SetLatch(ProcGlobal->bgwriterLatch);
  		}
  		bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
  		UnlockBufHdr(bufHdr);
diff --git a/src/include/postmaster/bgwriter.h b/src/include/postmaster/bgwriter.h
new file mode 100644
index 6cc4b62..970faa2
*** a/src/include/postmaster/bgwriter.h
--- b/src/include/postmaster/bgwriter.h
***************
*** 13,18 ****
--- 13,19 ----
  #define _BGWRITER_H
  
  #include "storage/block.h"
+ #include "storage/latch.h"
  #include "storage/relfilenode.h"
  
  
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
new file mode 100644
index a03c068..de1bbd0
*** a/src/include/storage/bufmgr.h
--- b/src/include/storage/bufmgr.h
*************** extern bool HoldingBufferPinThatDelaysRe
*** 213,219 ****
  extern void AbortBufferIO(void);
  
  extern void BufmgrCommit(void);
! extern void BgBufferSync(void);
  
  extern void AtProcExit_LocalBuffers(void);
  
--- 213,219 ----
  extern void AbortBufferIO(void);
  
  extern void BufmgrCommit(void);
! extern bool BgBufferSync(void);
  
  extern void AtProcExit_LocalBuffers(void);
  
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
new file mode 100644
index 358d1a4..4520f27
*** a/src/include/storage/proc.h
--- b/src/include/storage/proc.h
*************** typedef struct PROC_HDR
*** 188,193 ****
--- 188,195 ----
  	PGPROC	   *freeProcs;
  	/* Head of list of autovacuum's free PGPROC structures */
  	PGPROC	   *autovacFreeProcs;
+ 	/* BGWriter process latch */
+ 	Latch *bgwriterLatch;
  	/* Current shared estimate of appropriate spins_per_delay value */
  	int			spins_per_delay;
  	/* The proc of the Startup process, since not in ProcArray */
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to