On Wed, Feb 17, 2010 at 4:07 PM, Fujii Masao <masao.fu...@gmail.com> wrote: > On Wed, Feb 17, 2010 at 3:03 PM, Magnus Hagander <mag...@hagander.net> wrote: >> In that case, O_DIRECT would be counterproductive, no? It maps to >> FILE_FLAG_NOI_BUFFERING, which makes sure it doesn't go into the >> cache. So the read in the startup proc is actually guaranteed to >> reuqire a physical read - of something we just wrote, so it'll almost >> certainly end up waiting for a rotation, no? >> >> Seems like getting rid of O_DIRECT here is the right thing to do, >> regardless of this. > > Agreed. I'll remove O_DIRECT from walreceiver.
Here is the patch to do that. Regards, -- Fujii Masao NIPPON TELEGRAPH AND TELEPHONE CORPORATION NTT Open Source Software Center
*** a/src/backend/access/transam/xlog.c --- b/src/backend/access/transam/xlog.c *************** *** 1627,1633 **** XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch) /* create/use new log file */ use_existent = true; openLogFile = XLogFileInit(openLogId, openLogSeg, ! &use_existent, true); openLogOff = 0; } --- 1627,1633 ---- /* create/use new log file */ use_existent = true; openLogFile = XLogFileInit(openLogId, openLogSeg, ! &use_existent, true, true); openLogOff = 0; } *************** *** 2184,2189 **** XLogNeedsFlush(XLogRecPtr record) --- 2184,2195 ---- * place. This should be TRUE except during bootstrap log creation. The * caller must *not* hold the lock at call. * + * allow_direct_io: if TRUE, allow a WAL write to bypass the kernel cache + * by using PG_O_DIRECT for opening a file. Otherwise, PG_O_DIRECT is + * forcibly removed from the sync flag of open(). This should be FALSE + * only when walreceiver process writes WAL data because it's read + * immediately by the startup process. + * * Returns FD of opened file. * * Note: errors here are ERROR not PANIC because we might or might not be *************** *** 2193,2199 **** XLogNeedsFlush(XLogRecPtr record) */ int XLogFileInit(uint32 log, uint32 seg, ! bool *use_existent, bool use_lock) { char path[MAXPGPATH]; char tmppath[MAXPGPATH]; --- 2199,2205 ---- */ int XLogFileInit(uint32 log, uint32 seg, ! bool *use_existent, bool use_lock, bool allow_direct_io) { char path[MAXPGPATH]; char tmppath[MAXPGPATH]; *************** *** 2203,2208 **** XLogFileInit(uint32 log, uint32 seg, --- 2209,2217 ---- int max_advance; int fd; int nbytes; + int sync_bit; + + sync_bit = get_sync_bit(sync_method) & (allow_direct_io ? 0 : ~PG_O_DIRECT); XLogFilePath(path, ThisTimeLineID, log, seg); *************** *** 2211,2217 **** XLogFileInit(uint32 log, uint32 seg, */ if (*use_existent) { ! fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method), S_IRUSR | S_IWUSR); if (fd < 0) { --- 2220,2226 ---- */ if (*use_existent) { ! fd = BasicOpenFile(path, O_RDWR | PG_BINARY | sync_bit, S_IRUSR | S_IWUSR); if (fd < 0) { *************** *** 2237,2243 **** XLogFileInit(uint32 log, uint32 seg, unlink(tmppath); ! /* do not use get_sync_bit() here --- want to fsync only at end of fill */ fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR); if (fd < 0) --- 2246,2252 ---- unlink(tmppath); ! /* do not use sync_bit here --- want to fsync only at end of fill */ fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR); if (fd < 0) *************** *** 2317,2323 **** XLogFileInit(uint32 log, uint32 seg, *use_existent = false; /* Now open original target segment (might not be file I just made) */ ! fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method), S_IRUSR | S_IWUSR); if (fd < 0) ereport(ERROR, --- 2326,2332 ---- *use_existent = false; /* Now open original target segment (might not be file I just made) */ ! fd = BasicOpenFile(path, O_RDWR | PG_BINARY | sync_bit, S_IRUSR | S_IWUSR); if (fd < 0) ereport(ERROR, *************** *** 3121,3127 **** PreallocXlogFiles(XLogRecPtr endptr) { NextLogSeg(_logId, _logSeg); use_existent = true; ! lf = XLogFileInit(_logId, _logSeg, &use_existent, true); close(lf); if (!use_existent) CheckpointStats.ckpt_segs_added++; --- 3130,3136 ---- { NextLogSeg(_logId, _logSeg); use_existent = true; ! lf = XLogFileInit(_logId, _logSeg, &use_existent, true, true); close(lf); if (!use_existent) CheckpointStats.ckpt_segs_added++; *************** *** 4794,4800 **** BootStrapXLOG(void) /* Create first XLOG segment file */ use_existent = false; ! openLogFile = XLogFileInit(0, 0, &use_existent, false); /* Write the first page with the initial record */ errno = 0; --- 4803,4809 ---- /* Create first XLOG segment file */ use_existent = false; ! openLogFile = XLogFileInit(0, 0, &use_existent, false, true); /* Write the first page with the initial record */ errno = 0; *** a/src/backend/replication/walreceiver.c --- b/src/backend/replication/walreceiver.c *************** *** 446,452 **** XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) XLByteToSeg(recptr, recvId, recvSeg); use_existent = true; recvFile = XLogFileInit(recvId, recvSeg, ! &use_existent, true); recvOff = 0; } --- 446,452 ---- XLByteToSeg(recptr, recvId, recvSeg); use_existent = true; recvFile = XLogFileInit(recvId, recvSeg, ! &use_existent, true, false); recvOff = 0; } *** a/src/include/access/xlog.h --- b/src/include/access/xlog.h *************** *** 251,257 **** extern void XLogFlush(XLogRecPtr RecPtr); extern void XLogBackgroundFlush(void); extern bool XLogNeedsFlush(XLogRecPtr RecPtr); extern int XLogFileInit(uint32 log, uint32 seg, ! bool *use_existent, bool use_lock); extern int XLogFileOpen(uint32 log, uint32 seg); --- 251,257 ---- extern void XLogBackgroundFlush(void); extern bool XLogNeedsFlush(XLogRecPtr RecPtr); extern int XLogFileInit(uint32 log, uint32 seg, ! bool *use_existent, bool use_lock, bool allow_direct_io); extern int XLogFileOpen(uint32 log, uint32 seg);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers