On Wed, Feb 17, 2010 at 4:07 PM, Fujii Masao <masao.fu...@gmail.com> wrote:
> On Wed, Feb 17, 2010 at 3:03 PM, Magnus Hagander <mag...@hagander.net> wrote:
>> In that case, O_DIRECT would be counterproductive, no? It maps to
>> FILE_FLAG_NOI_BUFFERING, which makes sure it doesn't go into the
>> cache. So the read in the startup proc is actually guaranteed to
>> reuqire a physical read - of something we just wrote, so it'll almost
>> certainly end up waiting for a rotation, no?
>>
>> Seems like getting rid of O_DIRECT here is the right thing to do,
>> regardless of this.
>
> Agreed. I'll remove O_DIRECT from walreceiver.

Here is the patch to do that.

Regards,

-- 
Fujii Masao
NIPPON TELEGRAPH AND TELEPHONE CORPORATION
NTT Open Source Software Center
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 1627,1633 **** XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch)
  			/* create/use new log file */
  			use_existent = true;
  			openLogFile = XLogFileInit(openLogId, openLogSeg,
! 									   &use_existent, true);
  			openLogOff = 0;
  		}
  
--- 1627,1633 ----
  			/* create/use new log file */
  			use_existent = true;
  			openLogFile = XLogFileInit(openLogId, openLogSeg,
! 									   &use_existent, true, true);
  			openLogOff = 0;
  		}
  
***************
*** 2184,2189 **** XLogNeedsFlush(XLogRecPtr record)
--- 2184,2195 ----
   * place.  This should be TRUE except during bootstrap log creation.  The
   * caller must *not* hold the lock at call.
   *
+  * allow_direct_io: if TRUE, allow a WAL write to bypass the kernel cache
+  * by using PG_O_DIRECT for opening a file. Otherwise, PG_O_DIRECT is
+  * forcibly removed from the sync flag of open(). This should be FALSE
+  * only when walreceiver process writes WAL data because it's read
+  * immediately by the startup process.
+  *
   * Returns FD of opened file.
   *
   * Note: errors here are ERROR not PANIC because we might or might not be
***************
*** 2193,2199 **** XLogNeedsFlush(XLogRecPtr record)
   */
  int
  XLogFileInit(uint32 log, uint32 seg,
! 			 bool *use_existent, bool use_lock)
  {
  	char		path[MAXPGPATH];
  	char		tmppath[MAXPGPATH];
--- 2199,2205 ----
   */
  int
  XLogFileInit(uint32 log, uint32 seg,
! 			 bool *use_existent, bool use_lock, bool allow_direct_io)
  {
  	char		path[MAXPGPATH];
  	char		tmppath[MAXPGPATH];
***************
*** 2203,2208 **** XLogFileInit(uint32 log, uint32 seg,
--- 2209,2217 ----
  	int			max_advance;
  	int			fd;
  	int			nbytes;
+ 	int			sync_bit;
+ 
+ 	sync_bit = get_sync_bit(sync_method) & (allow_direct_io ? 0 : ~PG_O_DIRECT);
  
  	XLogFilePath(path, ThisTimeLineID, log, seg);
  
***************
*** 2211,2217 **** XLogFileInit(uint32 log, uint32 seg,
  	 */
  	if (*use_existent)
  	{
! 		fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method),
  						   S_IRUSR | S_IWUSR);
  		if (fd < 0)
  		{
--- 2220,2226 ----
  	 */
  	if (*use_existent)
  	{
! 		fd = BasicOpenFile(path, O_RDWR | PG_BINARY | sync_bit,
  						   S_IRUSR | S_IWUSR);
  		if (fd < 0)
  		{
***************
*** 2237,2243 **** XLogFileInit(uint32 log, uint32 seg,
  
  	unlink(tmppath);
  
! 	/* do not use get_sync_bit() here --- want to fsync only at end of fill */
  	fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
  					   S_IRUSR | S_IWUSR);
  	if (fd < 0)
--- 2246,2252 ----
  
  	unlink(tmppath);
  
! 	/* do not use sync_bit here --- want to fsync only at end of fill */
  	fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
  					   S_IRUSR | S_IWUSR);
  	if (fd < 0)
***************
*** 2317,2323 **** XLogFileInit(uint32 log, uint32 seg,
  	*use_existent = false;
  
  	/* Now open original target segment (might not be file I just made) */
! 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method),
  					   S_IRUSR | S_IWUSR);
  	if (fd < 0)
  		ereport(ERROR,
--- 2326,2332 ----
  	*use_existent = false;
  
  	/* Now open original target segment (might not be file I just made) */
! 	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | sync_bit,
  					   S_IRUSR | S_IWUSR);
  	if (fd < 0)
  		ereport(ERROR,
***************
*** 3121,3127 **** PreallocXlogFiles(XLogRecPtr endptr)
  	{
  		NextLogSeg(_logId, _logSeg);
  		use_existent = true;
! 		lf = XLogFileInit(_logId, _logSeg, &use_existent, true);
  		close(lf);
  		if (!use_existent)
  			CheckpointStats.ckpt_segs_added++;
--- 3130,3136 ----
  	{
  		NextLogSeg(_logId, _logSeg);
  		use_existent = true;
! 		lf = XLogFileInit(_logId, _logSeg, &use_existent, true, true);
  		close(lf);
  		if (!use_existent)
  			CheckpointStats.ckpt_segs_added++;
***************
*** 4794,4800 **** BootStrapXLOG(void)
  
  	/* Create first XLOG segment file */
  	use_existent = false;
! 	openLogFile = XLogFileInit(0, 0, &use_existent, false);
  
  	/* Write the first page with the initial record */
  	errno = 0;
--- 4803,4809 ----
  
  	/* Create first XLOG segment file */
  	use_existent = false;
! 	openLogFile = XLogFileInit(0, 0, &use_existent, false, true);
  
  	/* Write the first page with the initial record */
  	errno = 0;
*** a/src/backend/replication/walreceiver.c
--- b/src/backend/replication/walreceiver.c
***************
*** 446,452 **** XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
  			XLByteToSeg(recptr, recvId, recvSeg);
  			use_existent = true;
  			recvFile = XLogFileInit(recvId, recvSeg,
! 									&use_existent, true);
  			recvOff = 0;
  		}
  
--- 446,452 ----
  			XLByteToSeg(recptr, recvId, recvSeg);
  			use_existent = true;
  			recvFile = XLogFileInit(recvId, recvSeg,
! 									&use_existent, true, false);
  			recvOff = 0;
  		}
  
*** a/src/include/access/xlog.h
--- b/src/include/access/xlog.h
***************
*** 251,257 **** extern void XLogFlush(XLogRecPtr RecPtr);
  extern void XLogBackgroundFlush(void);
  extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
  extern int	XLogFileInit(uint32 log, uint32 seg,
! 						 bool *use_existent, bool use_lock);
  extern int	XLogFileOpen(uint32 log, uint32 seg);
  
  
--- 251,257 ----
  extern void XLogBackgroundFlush(void);
  extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
  extern int	XLogFileInit(uint32 log, uint32 seg,
! 						 bool *use_existent, bool use_lock, bool allow_direct_io);
  extern int	XLogFileOpen(uint32 log, uint32 seg);
  
  
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to