diff -rcN pgsql.posix_fadvise/src/backend/access/gin/ginxlog.c pgsql/src/backend/access/gin/ginxlog.c
*** pgsql.posix_fadvise/src/backend/access/gin/ginxlog.c	2009-01-09 20:42:53.000000000 +0900
--- pgsql/src/backend/access/gin/ginxlog.c	2009-01-09 20:32:13.000000000 +0900
***************
*** 15,20 ****
--- 15,21 ----
  
  #include "access/gin.h"
  #include "access/xlogutils.h"
+ #include "access/readahead.h"
  #include "storage/bufmgr.h"
  #include "utils/memutils.h"
  
***************
*** 519,524 ****
--- 520,629 ----
  	}
  }
  
+ /*
+  *    gin_readahead  - enqueue information about data pages
+  *
+  * The readahead module stores information about pages that are modified through
+  * redo-ing record.
+  *
+  */
+ bool
+ gin_readahead(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ 
+ 	Assert(record);
+ 
+ 	switch (info)
+ 	{
+ 		case XLOG_GIN_CREATE_INDEX:
+ 			{
+ 				RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(*node, GIN_ROOT_BLKNO, lsn.xrecoff, false);
+ 				break;
+ 			}
+ 		case XLOG_GIN_CREATE_PTREE:
+ 			{
+ 				ginxlogCreatePostingTree *data =
+ 					(ginxlogCreatePostingTree *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(data->node, data->blkno, lsn.xrecoff, false);
+ 				break;
+ 			}
+ 		case XLOG_GIN_INSERT:
+ 			{
+ 				ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(data->node, data->blkno,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_GIN_SPLIT:
+ 			{
+ 				int readahead_cnt;
+ 				ginxlogSplit *data = (ginxlogSplit *) XLogRecGetData(record);
+ 
+ 				readahead_cnt = 2;
+ 				if (data->isRootSplit)
+ 					readahead_cnt++;
+ 
+ 				if (!ReadAheadHasRoom(readahead_cnt))
+ 					return false;
+ 
+ 				ReadAheadAddEntry(data->node, data->lblkno, lsn.xrecoff, false);
+ 				ReadAheadAddEntry(data->node, data->rblkno, lsn.xrecoff, false);
+ 				if (data->isRootSplit)
+ 				{
+ 					ReadAheadAddEntry(data->node, data->rootBlkno,
+ 						lsn.xrecoff, false);
+ 				}
+ 				break;
+ 			}
+ 		case XLOG_GIN_VACUUM_PAGE:
+ 			{
+ 				ginxlogVacuumPage *data =
+ 					(ginxlogVacuumPage *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(data->node, data->blkno,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_GIN_DELETE_PAGE:
+ 			{
+ 				int readahead_cnt;
+ 				ginxlogDeletePage *data =
+ 					(ginxlogDeletePage *) XLogRecGetData(record);
+ 				readahead_cnt = 2;
+ 				if (data->leftBlkno != InvalidBlockNumber)
+ 					readahead_cnt++;
+ 
+ 				if (!ReadAheadHasRoom(2))
+ 					return false;
+ 				ReadAheadAddEntry(data->node, data->blkno,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				ReadAheadAddEntry(data->node, data->parentBlkno,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_2);
+ 				if (data->leftBlkno != InvalidBlockNumber)
+ 				{
+ 					ReadAheadAddEntry(data->node, data->leftBlkno,
+ 						lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_3);
+ 				}
+                         break;
+ 			}
+ 	}
+ 
+ 	return true;
+ }
+ 
  void
  gin_xlog_startup(void)
  {
diff -rcN pgsql.posix_fadvise/src/backend/access/gist/gistxlog.c pgsql/src/backend/access/gist/gistxlog.c
*** pgsql.posix_fadvise/src/backend/access/gist/gistxlog.c	2009-01-09 20:42:53.000000000 +0900
--- pgsql/src/backend/access/gist/gistxlog.c	2009-01-09 20:32:13.000000000 +0900
***************
*** 15,20 ****
--- 15,21 ----
  
  #include "access/gist_private.h"
  #include "access/xlogutils.h"
+ #include "access/readahead.h"
  #include "miscadmin.h"
  #include "storage/bufmgr.h"
  #include "utils/memutils.h"
***************
*** 500,505 ****
--- 501,584 ----
  	}
  }
  
+ /*
+  *    gist_readahead   - enqueue information about data pages
+  *
+  * The readahead module stores information about pages that are modified through
+  * redo-ing record.
+  *
+  */
+ bool
+ gist_readahead(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ 
+ 	Assert(record);
+ 	
+ 	switch (info)
+ 	{
+ 		case XLOG_GIST_PAGE_UPDATE:
+ 		case XLOG_GIST_NEW_ROOT:
+ 			{
+ 				PageUpdateRecord xlrec;
+ 
+ 				decodePageUpdateRecord(&xlrec, record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec.data->node, xlrec.data->blkno,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_GIST_PAGE_SPLIT:
+ 			{
+ 				int i;
+ 
+ 				PageSplitRecord rec;
+ 				decodePageSplitRecord(&rec, record);
+ 
+ 				if (!ReadAheadHasRoom(rec.data->npage))
+ 					return false;
+ 				for (i = 0; i < rec.data->npage; i++)
+ 				{
+ 					ReadAheadAddEntry(rec.data->node, rec.page[i].header->blkno,
+ 						lsn.xrecoff, false);
+ 				}
+ 				break;
+ 			}
+ 		case XLOG_GIST_INSERT_COMPLETE:
+ 			{
+ 				/*
+ 				 * This WAL record never touch data page, so nothi ng
+ 				 * to do.
+ 				 */
+ 				break;
+ 			}
+ 		case XLOG_GIST_CREATE_INDEX:
+ 			{
+ 				RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(*node, GIST_ROOT_BLKNO, lsn.xrecoff, false);
+ 				break;
+ 			}
+ 		case XLOG_GIST_PAGE_DELETE:
+ 			{
+ 				gistxlogPageDelete *xldata =
+ 					(gistxlogPageDelete *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xldata->node, xldata->blkno,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 	}
+ 
+ 	return true;
+ }
+ 
  IndexTuple
  gist_form_invalid_tuple(BlockNumber blkno)
  {
diff -rcN pgsql.posix_fadvise/src/backend/access/heap/heapam.c pgsql/src/backend/access/heap/heapam.c
*** pgsql.posix_fadvise/src/backend/access/heap/heapam.c	2009-01-09 20:42:53.000000000 +0900
--- pgsql/src/backend/access/heap/heapam.c	2009-01-09 20:32:11.000000000 +0900
***************
*** 42,47 ****
--- 42,48 ----
  #include "access/heapam.h"
  #include "access/hio.h"
  #include "access/multixact.h"
+ #include "access/readahead.h"
  #include "access/relscan.h"
  #include "access/sysattr.h"
  #include "access/transam.h"
***************
*** 4970,4975 ****
--- 4971,5122 ----
  }
  
  /*
+  *	heap_readahead	- enqueue information about data pages
+  *
+  * The readahead module stores information about pages that are modified through
+  * redo-ing record.
+  *
+  */
+ bool
+ heap_readahead(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ 
+ 	Assert(record);
+ 
+ 	switch (info & XLOG_HEAP_OPMASK)
+ 	{
+ 		case XLOG_HEAP_INSERT:
+ 			{
+ 				xl_heap_insert *xlrec =
+ 					(xl_heap_insert *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->target.node,
+ 					ItemPointerGetBlockNumber(&xlrec->target.tid),
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_HEAP_DELETE:
+ 			{
+ 				xl_heap_delete *xlrec =
+ 					(xl_heap_delete *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->target.node,
+ 					ItemPointerGetBlockNumber(&xlrec->target.tid),
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_HEAP_UPDATE:
+ 		case XLOG_HEAP_MOVE:
+ 		case XLOG_HEAP_HOT_UPDATE:
+ 			{
+ 				bool samepage;
+ 				xl_heap_update *xlrec =
+ 					(xl_heap_update *) XLogRecGetData(record);
+ 
+ 				samepage = ItemPointerGetBlockNumber(&xlrec->newtid) ==
+ 					ItemPointerGetBlockNumber(&xlrec->target.tid);
+ 
+ 				if (!ReadAheadHasRoom(1 + (samepage ? 0 : 1)))
+ 					return false;
+ 				/* store page which contains updated tuple. */ 
+ 				ReadAheadAddEntry(xlrec->target.node,
+ 					ItemPointerGetBlockNumber(&xlrec->target.tid),
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				/* store another page if any. */ 
+ 				if (!samepage)
+ 					ReadAheadAddEntry(xlrec->target.node,
+ 						ItemPointerGetBlockNumber(&xlrec->newtid),
+ 						lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_2);
+ 				break;
+ 			}
+ 		case XLOG_HEAP_NEWPAGE:
+ 			{
+ 				xl_heap_newpage *xlrec =
+ 					(xl_heap_newpage *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->node, xlrec->blkno,
+ 					lsn.xrecoff, false);
+ 				break;
+ 			}
+ 		case XLOG_HEAP_LOCK:
+ 			{
+ 				xl_heap_lock *xlrec =
+ 					(xl_heap_lock *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->target.node, 
+ 					ItemPointerGetBlockNumber(&xlrec->target.tid),
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_HEAP_INPLACE:
+ 			{
+ 				xl_heap_inplace *xlrec =
+ 					(xl_heap_inplace *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->target.node,
+ 					ItemPointerGetBlockNumber(&xlrec->target.tid),
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 	}
+ 
+ 	return true;
+ }
+ 
+ /*
+  *	heap2_readahead	- enqueue information about data pages
+  *
+  * The readahead module stores information about pages that are modified through
+  * redo-ing record.
+  *
+  */
+ bool
+ heap2_readahead(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	Assert(record);
+ 
+ 	switch (record->xl_info)
+ 	{
+ 		case XLOG_HEAP2_FREEZE:
+ 			{
+ 				xl_heap_freeze *xlrec =
+ 					(xl_heap_freeze *) XLogRecGetData(record);
+ 			
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->node, xlrec->block,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_HEAP2_CLEAN:
+ 		case XLOG_HEAP2_CLEAN_MOVE:
+ 			{
+ 				xl_heap_clean *xlrec =
+ 					(xl_heap_clean *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->node, xlrec->block,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 	}
+ 
+ 	return true;
+ }
+ 
+ /*
   *	heap_sync		- sync a heap, for use when no WAL has been written
   *
   * This forces the heap contents (including TOAST heap if any) down to disk.
diff -rcN pgsql.posix_fadvise/src/backend/access/nbtree/nbtxlog.c pgsql/src/backend/access/nbtree/nbtxlog.c
*** pgsql.posix_fadvise/src/backend/access/nbtree/nbtxlog.c	2009-01-09 20:42:53.000000000 +0900
--- pgsql/src/backend/access/nbtree/nbtxlog.c	2009-01-09 20:32:13.000000000 +0900
***************
*** 16,21 ****
--- 16,22 ----
  
  #include "access/nbtree.h"
  #include "access/transam.h"
+ #include "access/readahead.h"
  #include "storage/bufmgr.h"
  
  /*
***************
*** 878,883 ****
--- 879,1014 ----
  	}
  }
  
+ /*
+  * btree_readahead	- enqueue information about data pages
+  *
+  */
+ bool
+ btree_readahead(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ 	
+ 	Assert(record);
+ 
+ 	switch (info)
+ 	{
+ 		case XLOG_BTREE_INSERT_LEAF:
+ 		case XLOG_BTREE_INSERT_UPPER:
+ 		case XLOG_BTREE_INSERT_META:
+ 			{
+ 				int readahead_cnt;
+ 				xl_btree_insert *xlrec =
+ 					(xl_btree_insert *) XLogRecGetData(record);
+ 
+ 				readahead_cnt = 1;
+ 				if (info == XLOG_BTREE_INSERT_META)
+ 					readahead_cnt++;
+ 
+ 				if (!ReadAheadHasRoom(readahead_cnt))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->target.node,
+ 					BlockIdGetBlockNumber(&xlrec->target.tid.ip_blkid),
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				if (info == XLOG_BTREE_INSERT_META)
+ 				{
+ 					ReadAheadAddEntry(xlrec->target.node,
+ 						BTREE_METAPAGE, lsn.xrecoff, false);
+ 				}
+ 				break;
+ 			}
+ 		case XLOG_BTREE_SPLIT_L:
+ 		case XLOG_BTREE_SPLIT_L_ROOT:
+ 		case XLOG_BTREE_SPLIT_R:
+ 		case XLOG_BTREE_SPLIT_R_ROOT:
+ 			{
+ 				int readahead_cnt;
+ 				xl_btree_split *xlrec =
+ 					(xl_btree_split *) XLogRecGetData(record);
+ 
+ 				readahead_cnt = 2;
+ 				if (xlrec->rnext != P_NONE)
+ 					readahead_cnt++;
+ 
+ 				if (!ReadAheadHasRoom(readahead_cnt))
+ 					return false;
+ 
+ 				ReadAheadAddEntry(xlrec->node, xlrec->rightsib,
+ 					lsn.xrecoff, false);
+ 				ReadAheadAddEntry(xlrec->node, xlrec->leftsib,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				if (xlrec->rnext != P_NONE)
+ 				{
+ 					ReadAheadAddEntry(xlrec->node, xlrec->rnext,
+ 						lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_2);
+ 				}
+ 				break;
+ 			}
+ 		case XLOG_BTREE_DELETE:
+ 			{
+ 				xl_btree_delete *xlrec =
+ 					(xl_btree_delete *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->node, xlrec->block,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_BTREE_DELETE_PAGE:
+ 		case XLOG_BTREE_DELETE_PAGE_META:
+ 		case XLOG_BTREE_DELETE_PAGE_HALF:
+ 			{
+ 				int readahead_cnt;
+ 				xl_btree_delete_page *xlrec =
+ 					(xl_btree_delete_page *) XLogRecGetData(record);
+ 
+ 				readahead_cnt = 3;
+ 				if (info == XLOG_BTREE_DELETE_PAGE_META)
+ 					readahead_cnt++;
+ 				if (xlrec->leftblk != P_NONE)
+ 					readahead_cnt++;
+ 
+ 				/* parent page */
+ 				ReadAheadAddEntry(xlrec->target.node,
+ 					ItemPointerGetBlockNumber(&(xlrec->target.tid)),
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				/* rightsib page */
+ 				ReadAheadAddEntry(xlrec->target.node, xlrec->rightblk,
+ 						lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_2);
+ 				/* leftsib page, if exists */
+ 				if (xlrec->leftblk != P_NONE)
+ 				{
+ 					ReadAheadAddEntry(xlrec->target.node, xlrec->leftblk,
+ 						lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_3);
+ 				}
+ 				/* target page */
+ 				ReadAheadAddEntry(xlrec->target.node,
+ 					xlrec->deadblk, lsn.xrecoff, false);
+ 				/* metapage, if exists */
+ 				if (info == XLOG_BTREE_DELETE_PAGE_META)
+ 				{
+ 					ReadAheadAddEntry(xlrec->target.node,
+ 						BTREE_METAPAGE, lsn.xrecoff, false);
+ 				}
+ 				break;
+ 			}
+ 		case XLOG_BTREE_NEWROOT:
+ 			{
+ 				xl_btree_newroot *xlrec =
+ 					(xl_btree_newroot *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				/* FPW does not exists. */
+ 				ReadAheadAddEntry(xlrec->node, xlrec->rootblk,
+ 					lsn.xrecoff, false);
+ 				break;
+ 			}
+ 	}
+ 
+ 	return true;
+ }
+ 
  void
  btree_xlog_startup(void)
  {
diff -rcN pgsql.posix_fadvise/src/backend/access/transam/Makefile pgsql/src/backend/access/transam/Makefile
*** pgsql.posix_fadvise/src/backend/access/transam/Makefile	2009-01-09 20:42:53.000000000 +0900
--- pgsql/src/backend/access/transam/Makefile	2009-01-09 20:32:13.000000000 +0900
***************
*** 12,18 ****
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = clog.o transam.o varsup.o xact.o xlog.o xlogutils.o rmgr.o slru.o subtrans.o multixact.o twophase.o twophase_rmgr.o
  
  include $(top_srcdir)/src/backend/common.mk
  
--- 12,18 ----
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = clog.o transam.o varsup.o xact.o xlog.o xlogutils.o rmgr.o slru.o subtrans.o multixact.o twophase.o twophase_rmgr.o readahead.o
  
  include $(top_srcdir)/src/backend/common.mk
  
diff -rcN pgsql.posix_fadvise/src/backend/access/transam/readahead.c pgsql/src/backend/access/transam/readahead.c
*** pgsql.posix_fadvise/src/backend/access/transam/readahead.c	1970-01-01 09:00:00.000000000 +0900
--- pgsql/src/backend/access/transam/readahead.c	2009-01-09 20:36:13.000000000 +0900
***************
*** 0 ****
--- 1,209 ----
+ /*-------------------------------------------------------------------------
+  *
+  * readahead.c
+  *		Store information of data pages which should be read ahead.
+  *
+  * Portions Copyright (c) 2008, Nippon Telegraph and Telephone Corporation
+  * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  *-------------------------------------------------------------------------
+  */
+ 
+ #include <unistd.h>
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <fcntl.h>
+ 
+ #include "postgres.h"
+ #include "access/readahead.h"
+ #include "access/xlog_internal.h"
+ #include "catalog/catalog.h"
+ #include "storage/relfilenode.h"
+ #include "storage/block.h"
+ #include "storage/smgr.h"
+ 
+ /*
+  * Information about the data page which will be read ahead.
+  */
+ struct XLogReadAhead {
+ 	/*
+ 	 * The physical location of the data page.
+ 	 */
+ 	RelFileNode node;
+ 	BlockNumber blkno;
+ 
+ 	/*
+ 	 * xrecoff is the byte offset of location in the WAL segment file as
+ 	 * defined in xlogdefs.h. The read ahead command does not deal with more
+ 	 * than one WAL segment file at once, and xlogid is not going to be changed
+ 	 * during read-ahead. This is why we need only xrecoff. 
+ 	 */
+ 	uint32 xrecoff;
+ 
+ 	/*
+ 	 * has_fpw indicates whether an WAL record contains full page write or not.
+ 	 * This is used to skip unnecessary read-aheads.
+ 	 */
+ 	bool has_fpw;
+ }; 
+ typedef struct XLogReadAhead XLogReadAhead;
+ 
+ /*
+  * ReadAheadQueueSize is the initail size of XLogReadAhead queue.
+  * When the number of XLogReadAhead reaches this amount, we execute readahead.
+  * Queue uses 16MB.
+  */
+ #define ReadAheadQueueSize	(16 * 1024 * 1024 / sizeof(XLogReadAhead))
+ 
+ /* The queue for XLogReadAhead entries. */
+ static XLogReadAhead ReadAheadQueue[ReadAheadQueueSize];
+ 
+ /* The number of XLogReadAhead entries currently used. */
+ static uint32 ReadAheadQueueUsed = 0;
+ 
+ /* prototype of local function */
+ static int ReadAheadCompare(const void *l, const void *r);
+ 
+ /*
+  * Append a new XLogReadAhead entry to the queue
+  *
+  * If XLogReadAhead queue is fullfilled, prefetch first and add ReadAheadQueue
+  * to empty queue.
+  */
+ void
+ ReadAheadAddEntry(RelFileNode node, BlockNumber blkno, uint32 xrecoff,
+ 	bool has_fpw)
+ {
+ 	/* all entries are used, so prefetch pages and make the queue empty */
+ 	if (ReadAheadQueueUsed >= ReadAheadQueueSize)
+ 	{
+ 		ReadAheadExecute();
+ 	}
+ 
+ 	/* Append a new XLogReadAhead ReadAheadQueue to the queue. */
+ 	ReadAheadQueue[ReadAheadQueueUsed].node = node;
+ 	ReadAheadQueue[ReadAheadQueueUsed].blkno = blkno;
+ 	ReadAheadQueue[ReadAheadQueueUsed].xrecoff = xrecoff;
+ 	ReadAheadQueue[ReadAheadQueueUsed].has_fpw = has_fpw;
+ 
+ 	ReadAheadQueueUsed++;
+ }
+ 
+ /*
+  * ReadAhead queue availability check
+  *
+  * If the XLogReadAhead queue has enough room for appending more num of
+  * XLogReadAhead,
+  * return true. If it does not, try to double the queue.
+  * If new queue could't be allocated, return false.
+  */
+ bool
+ ReadAheadHasRoom(int num)
+ {
+ 	return (ReadAheadQueueUsed + num <= ReadAheadQueueSize);
+ }
+ 
+ /*
+  * Check whether info1 and info2 point same data page.
+  */
+ #define IS_SAME_PAGE(info1, info2) \
+ 	(RelFileNodeEquals((info1).node, (info2).node) && \
+ 		(info1).blkno == (info2).blkno)
+  
+ /*
+  * Execute read ahead data pages
+  * 
+  * Before we actually read ahead data pages, sort the XLogReadAhead in the queue
+  * for avoiding duplicated disk access and hopefully, reducing seek time.
+  * We also skip read ahead data pages which has full page write.
+  *
+  * For performance, we keep file opened until reading another file.
+  */
+ void
+ ReadAheadExecute(void)
+ {
+ 	int i;
+ 	SMgrRelation reln;
+ 	XLogReadAhead last_entry = { { 0, 0, 0, }, 0, 0, false };
+ 
+ 	ereport(DEBUG1, (errmsg("%d blocks are prefetch canditate",
+ 		ReadAheadQueueUsed)));
+ 
+ 	/* Sort the XLogReadAhead queue for effective disk access. */
+ 	qsort(ReadAheadQueue, ReadAheadQueueUsed, sizeof(XLogReadAhead),
+ 		ReadAheadCompare);
+ 
+ 	for (i = 0; i < ReadAheadQueueUsed; i++)
+ 	{
+ 		/* Do read ahead once per a page if it doesn't have full page write. */
+ 		if (IS_SAME_PAGE(last_entry, ReadAheadQueue[i]) ||
+ 				ReadAheadQueue[i].has_fpw)
+ 		{
+ 			last_entry = ReadAheadQueue[i];
+ 			continue;
+ 		}
+ 
+ 		/* Create SMgrRelation object */
+ 		reln = smgropen(ReadAheadQueue[i].node);
+ 
+ 		/* Read ahead with prefetch API */
+ 		smgrprefetch(reln, MAIN_FORKNUM, ReadAheadQueue[i].blkno);
+ 			
+ 		/* Store XLogReadAhead to skip duplicate pages. */
+ 		last_entry = ReadAheadQueue[i];
+ 	}
+ 
+ 	ReadAheadQueueUsed = 0;
+ }
+ 
+ /*
+  * Compare two XLogReadAhead objects
+  *
+  * When l > r, then return 1, l == r, then return 0, and l < r, then return -1.
+  * The priority of comparison clauses shows below;
+  *    1. node.spcNode
+  *    2. node.dbNode
+  *    3. node.relNode
+  *    4. blkno
+  *    5. xrecoff
+  */
+ static int
+ ReadAheadCompare(const void *l, const void *r)
+ {
+ 	XLogReadAhead *left = (XLogReadAhead *)l;
+ 	XLogReadAhead *right = (XLogReadAhead *)r;
+ 
+ 	/* compare node.spcNode */
+ 	if (left->node.spcNode > right->node.spcNode)
+ 		return 1;
+ 	else if (left->node.spcNode < right->node.spcNode)
+ 		return -1;
+ 
+ 	/* compare node.dbNode */
+ 	if (left->node.dbNode > right->node.dbNode)
+ 		return 1;
+ 	else if (left->node.dbNode < right->node.dbNode)
+ 		return -1;
+ 
+ 	/* compare node.relNode */
+ 	if (left->node.relNode > right->node.relNode)
+ 		return 1;
+ 	else if (left->node.relNode < right->node.relNode)
+ 		return -1;
+  
+ 	/* compare blkno */
+ 	if (left->blkno > right->blkno)
+ 		return 1;
+ 	else if (left->blkno < right->blkno)
+ 		return -1;
+ 
+ 	/* compare xrecoff */
+ 	if (left->xrecoff > right->xrecoff)
+ 		return 1;
+ 	else if (left->xrecoff < right->xrecoff)
+ 		return -1;
+ 
+ 	/* These two XLogReadAhead are same. */
+ 	return 0;
+ }
diff -rcN pgsql.posix_fadvise/src/backend/access/transam/rmgr.c pgsql/src/backend/access/transam/rmgr.c
*** pgsql.posix_fadvise/src/backend/access/transam/rmgr.c	2009-01-09 20:42:53.000000000 +0900
--- pgsql/src/backend/access/transam/rmgr.c	2009-01-09 20:32:12.000000000 +0900
***************
*** 24,43 ****
  
  
  const RmgrData RmgrTable[RM_MAX_ID + 1] = {
! 	{"XLOG", xlog_redo, xlog_desc, NULL, NULL, NULL},
! 	{"Transaction", xact_redo, xact_desc, NULL, NULL, NULL},
! 	{"Storage", smgr_redo, smgr_desc, NULL, NULL, NULL},
! 	{"CLOG", clog_redo, clog_desc, NULL, NULL, NULL},
! 	{"Database", dbase_redo, dbase_desc, NULL, NULL, NULL},
! 	{"Tablespace", tblspc_redo, tblspc_desc, NULL, NULL, NULL},
! 	{"MultiXact", multixact_redo, multixact_desc, NULL, NULL, NULL},
! 	{"Reserved 7", NULL, NULL, NULL, NULL, NULL},
! 	{"Reserved 8", NULL, NULL, NULL, NULL, NULL},
! 	{"Heap2", heap2_redo, heap2_desc, NULL, NULL, NULL},
! 	{"Heap", heap_redo, heap_desc, NULL, NULL, NULL},
! 	{"Btree", btree_redo, btree_desc, btree_xlog_startup, btree_xlog_cleanup, btree_safe_restartpoint},
! 	{"Hash", hash_redo, hash_desc, NULL, NULL, NULL},
! 	{"Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup, gin_safe_restartpoint},
! 	{"Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup, gist_safe_restartpoint},
! 	{"Sequence", seq_redo, seq_desc, NULL, NULL, NULL}
  };
--- 24,43 ----
  
  
  const RmgrData RmgrTable[RM_MAX_ID + 1] = {
! 	{"XLOG", xlog_redo, xlog_desc, NULL, NULL, NULL, NULL},
! 	{"Transaction", xact_redo, xact_desc, NULL, NULL, NULL, NULL},
! 	{"Storage", smgr_redo, smgr_desc, NULL, NULL, NULL, NULL},
! 	{"CLOG", clog_redo, clog_desc, NULL, NULL, NULL, NULL},
! 	{"Database", dbase_redo, dbase_desc, NULL, NULL, NULL, NULL},
! 	{"Tablespace", tblspc_redo, tblspc_desc, NULL, NULL, NULL, NULL},
! 	{"MultiXact", multixact_redo, multixact_desc, NULL, NULL, NULL, NULL},
! 	{"Reserved 7", NULL, NULL, NULL, NULL, NULL, NULL},
! 	{"Reserved 8", NULL, NULL, NULL, NULL, NULL, NULL},
! 	{"Heap2", heap2_redo, heap2_desc, NULL, NULL, NULL, heap2_readahead},
! 	{"Heap", heap_redo, heap_desc, NULL, NULL, NULL, heap_readahead},
! 	{"Btree", btree_redo, btree_desc, btree_xlog_startup, btree_xlog_cleanup, btree_safe_restartpoint, btree_readahead},
! 	{"Hash", hash_redo, hash_desc, NULL, NULL, NULL, NULL},
! 	{"Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup, gin_safe_restartpoint, gin_readahead},
! 	{"Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup, gist_safe_restartpoint, gist_readahead},
! 	{"Sequence", seq_redo, seq_desc, NULL, NULL, NULL, seq_readahead}
  };
diff -rcN pgsql.posix_fadvise/src/backend/access/transam/xlog.c pgsql/src/backend/access/transam/xlog.c
*** pgsql.posix_fadvise/src/backend/access/transam/xlog.c	2009-01-09 20:42:53.000000000 +0900
--- pgsql/src/backend/access/transam/xlog.c	2009-01-09 20:32:12.000000000 +0900
***************
*** 24,29 ****
--- 24,30 ----
  
  #include "access/clog.h"
  #include "access/multixact.h"
+ #include "access/readahead.h"
  #include "access/subtrans.h"
  #include "access/transam.h"
  #include "access/tuptoaster.h"
***************
*** 381,386 ****
--- 382,398 ----
  static char *readRecordBuf = NULL;
  static uint32 readRecordBufSize = 0;
  
+ /*
+  * Buffer for queued WAL records (fixed size)
+  * 
+  * This buffer is used for holding WAL records and their LSNs. When the all WAL
+  * records of one WAL segment file are read, redo them and make the buffer
+  * empty. Therefore, twice of XLogSegSize, determined by the total size of WAL 
+  * records and LSNs, must be enough for the buffer.
+  */
+ static char RecordQueueBuf[XLogSegSize * 2];
+ static uint32 RecordQueueBufUsed = 0;
+ 
  /* State information for XLOG reading */
  static XLogRecPtr ReadRecPtr;	/* start of last record read */
  static XLogRecPtr EndRecPtr;	/* end+1 of last record read */
***************
*** 441,446 ****
--- 453,461 ----
  static void rm_redo_error_callback(void *arg);
  static int get_sync_bit(int method);
  
+ static void PushRecord(XLogRecPtr lsn, XLogRecord *record);
+ static bool PushReadAhead(XLogRecPtr lsn, XLogRecord *record);
+ static void RedoRecords(void);
  
  /*
   * Insert an XLOG record having the specified RMID and info bytes,
***************
*** 2364,2369 ****
--- 2379,2386 ----
  	ListCell   *cell;
  	int			fd;
  
+ 	ereport(DEBUG1, (errmsg("XLOG switch to %X/%X", log, seg)));
+ 
  	/*
  	 * Loop looking for a suitable timeline ID: we might need to read any of
  	 * the timelines listed in expectedTLIs.
***************
*** 3424,3429 ****
--- 3441,3453 ----
  	return (XLogRecord *) buffer;
  
  next_record_is_invalid:;
+ 	/*
+ 	 * Reached to unused area of current WAL segment file, redo all of WAL
+ 	 * records in the queue.
+ 	 */
+ 	ereport(DEBUG1, (errmsg("next record is invalid(maybe unused area)")));
+ 	RedoRecords();
+ 
  	if (readFile >= 0)
  	{
  		close(readFile);
***************
*** 5154,5160 ****
  		{
  			bool		recoveryContinue = true;
  			bool		recoveryApply = true;
- 			ErrorContextCallback errcontext;
  
  			InRedo = true;
  			ereport(LOG,
--- 5178,5183 ----
***************
*** 5196,5228 ****
  						break;
  				}
  
! 				/* Setup error traceback support for ereport() */
! 				errcontext.callback = rm_redo_error_callback;
! 				errcontext.arg = (void *) record;
! 				errcontext.previous = error_context_stack;
! 				error_context_stack = &errcontext;
! 
! 				/* nextXid must be beyond record's xid */
! 				if (TransactionIdFollowsOrEquals(record->xl_xid,
! 												 ShmemVariableCache->nextXid))
  				{
! 					ShmemVariableCache->nextXid = record->xl_xid;
! 					TransactionIdAdvance(ShmemVariableCache->nextXid);
  				}
  
! 				if (record->xl_info & XLR_BKP_BLOCK_MASK)
! 					RestoreBkpBlocks(record, EndRecPtr);
! 
! 				RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);
! 
! 				/* Pop the error context stack */
! 				error_context_stack = errcontext.previous;
  
  				LastRec = ReadRecPtr;
  
  				record = ReadRecord(NULL, LOG);
  			} while (record != NULL && recoveryContinue);
  
  			/*
  			 * end of main redo apply loop
  			 */
--- 5219,5262 ----
  						break;
  				}
  
! 				/*
! 				 * Push WAL record in WAL record buffer with its LSN for
! 				 * delayed redo. 
! 				 * If the WAL record queue is full, redo all WAL records in the
! 				 * queue and make the queue empty.
! 				 */
! 				ereport(DEBUG1,
! 					(errmsg("WAL record queue is used %d(%d) bytes at %X/%08X.",
! 						RecordQueueBufUsed, record->xl_tot_len,
! 						EndRecPtr.xlogid, EndRecPtr.xrecoff)));
! 				if (RecordQueueBufUsed + MAXALIGN(sizeof(XLogRecPtr)) +
! 						MAXALIGN(record->xl_tot_len) > sizeof(RecordQueueBuf))
  				{
! 					ereport(DEBUG1, (errmsg("WAL record queue is full")));
! 					RedoRecords();
  				}
+ 				PushRecord(EndRecPtr, record);
  
! 				/*
! 				 * Push page information to prefetch later.
! 				 * If no more space, redo all records in queue and make the
! 				 * queue empty.
! 				 */
! 				while (!PushReadAhead(EndRecPtr, record))
! 				{
! 					ereport(DEBUG1, (errmsg("ReadAhead queue is full.")));
! 					RedoRecords();
! 				}
  
  				LastRec = ReadRecPtr;
  
  				record = ReadRecord(NULL, LOG);
  			} while (record != NULL && recoveryContinue);
  
+ 			/* All WAL records are read, redo all queued WAL records.  */
+ 			ereport(DEBUG1, (errmsg("end of redo apply loop")));
+ 			RedoRecords();
+ 
  			/*
  			 * end of main redo apply loop
  			 */
***************
*** 5447,5452 ****
--- 5481,5568 ----
  }
  
  /*
+  * Push the pair of WAL record and its LSN.
+  * Both WAL records and LSNs are aligned as same as WAL segment file.
+  */
+ static void
+ PushRecord(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	Assert(record);
+ 
+ 	memcpy(RecordQueueBuf + RecordQueueBufUsed, &lsn, sizeof(XLogRecPtr));
+ 	RecordQueueBufUsed += MAXALIGN(sizeof(XLogRecPtr));
+ 	memcpy(RecordQueueBuf + RecordQueueBufUsed, record, record->xl_tot_len);
+ 	RecordQueueBufUsed += MAXALIGN(record->xl_tot_len);
+ }
+ 
+ /*
+  * Push page information to readahead module.
+  */
+ static bool
+ PushReadAhead(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	Assert(record);
+ 
+ 	if (!RmgrTable[record->xl_rmid].rm_readahead)
+ 		return true;
+ 
+ 	return RmgrTable[record->xl_rmid].rm_readahead(lsn, record);
+ }
+ 
+ /*
+  * Redo all WAL records in the queue and make the it empty.
+  */
+ static void
+ RedoRecords(void)
+ {
+ 	ErrorContextCallback errcontext;
+ 	uint32 off = 0;
+ 
+ 	/* Readahead data pages which will be modified during redo. */
+ 	ReadAheadExecute();
+ 
+ 	while (off < RecordQueueBufUsed)
+ 	{
+ 		XLogRecPtr lsn;
+ 		XLogRecord *record;
+ 
+ 		/* Extract LSN and WAL record image from local buffer. */
+ 		memcpy(&lsn, RecordQueueBuf + off, sizeof(XLogRecPtr));
+ 		off += MAXALIGN(sizeof(XLogRecPtr));
+ 		record = (XLogRecord *)(RecordQueueBuf + off);
+ 
+ 		/* Setup error traceback support for ereport() */
+ 		errcontext.callback = rm_redo_error_callback;
+ 		errcontext.arg = (void *) record;
+ 		errcontext.previous = error_context_stack;
+ 		error_context_stack = &errcontext;
+ 
+ 		/* nextXid must be beyond record's xid */
+ 		if (TransactionIdFollowsOrEquals(record->xl_xid,
+ 										 ShmemVariableCache->nextXid))
+ 		{
+ 			ShmemVariableCache->nextXid = record->xl_xid;
+ 			TransactionIdAdvance(ShmemVariableCache->nextXid);
+ 		}
+ 
+ 		if (record->xl_info & XLR_BKP_BLOCK_MASK)
+ 			RestoreBkpBlocks(record, lsn);
+ 
+ 		/* Redo with WAL record and its LSN. */
+ 		RmgrTable[record->xl_rmid].rm_redo(lsn, record);
+ 
+ 		/* Pop the error context stack */
+ 		error_context_stack = errcontext.previous;
+ 
+ 		off += MAXALIGN(record->xl_tot_len);
+ 	}
+ 
+ 	/* Make RecordQueueBuf empty. */
+ 	MemSet(RecordQueueBuf, 0, sizeof(RecordQueueBuf));
+ 	RecordQueueBufUsed = 0;
+ }
+ 
+ /*
   * Subroutine to try to fetch and validate a prior checkpoint record.
   *
   * whichChkpt identifies the checkpoint (merely for reporting purposes).
diff -rcN pgsql.posix_fadvise/src/backend/commands/sequence.c pgsql/src/backend/commands/sequence.c
*** pgsql.posix_fadvise/src/backend/commands/sequence.c	2009-01-09 20:42:53.000000000 +0900
--- pgsql/src/backend/commands/sequence.c	2009-01-09 20:32:07.000000000 +0900
***************
*** 15,20 ****
--- 15,21 ----
  #include "postgres.h"
  
  #include "access/heapam.h"
+ #include "access/readahead.h"
  #include "access/transam.h"
  #include "access/xact.h"
  #include "access/xlogutils.h"
***************
*** 1382,1384 ****
--- 1383,1415 ----
  	appendStringInfo(buf, "rel %u/%u/%u",
  			   xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode);
  }
+ 
+ /*
+  *    seq_readahead  - enqueue information about data pages
+  *
+  * The readahead module stores information about pages that are modified through
+  * redo-ing record.
+  *
+  */
+ bool
+ seq_readahead(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ 
+ 	Assert(record);
+ 
+     switch (info)
+     {
+         case XLOG_SEQ_LOG:
+             {
+                 xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record);
+ 
+                 if (!ReadAheadHasRoom(1))
+                     return false;
+                 ReadAheadAddEntry(xlrec->node, 0, lsn.xrecoff, false);
+                 break;
+             }
+     }
+ 
+ 	return true;
+ }
diff -rcN pgsql.posix_fadvise/src/backend/storage/smgr/md.c pgsql/src/backend/storage/smgr/md.c
*** pgsql.posix_fadvise/src/backend/storage/smgr/md.c	2009-01-09 20:43:22.000000000 +0900
--- pgsql/src/backend/storage/smgr/md.c	2009-01-09 20:32:09.000000000 +0900
***************
*** 559,565 ****
  	long		seekpos;
  	MdfdVec    *v;
  
! 	v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
  
  	seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
  	Assert(seekpos < BLCKSZ * RELSEG_SIZE);
--- 559,567 ----
  	long		seekpos;
  	MdfdVec    *v;
  
! 	v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_RETURN_NULL);
! 	if (!v)
! 		return;
  
  	seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
  	Assert(seekpos < BLCKSZ * RELSEG_SIZE);
diff -rcN pgsql.posix_fadvise/src/include/access/gin.h pgsql/src/include/access/gin.h
*** pgsql.posix_fadvise/src/include/access/gin.h	2009-01-09 20:43:22.000000000 +0900
--- pgsql/src/include/access/gin.h	2009-01-09 20:32:07.000000000 +0900
***************
*** 256,261 ****
--- 256,262 ----
  /* ginxlog.c */
  extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
  extern void gin_desc(StringInfo buf, uint8 xl_info, char *rec);
+ extern bool gin_readahead(XLogRecPtr lsn, XLogRecord *record);
  extern void gin_xlog_startup(void);
  extern void gin_xlog_cleanup(void);
  extern bool gin_safe_restartpoint(void);
diff -rcN pgsql.posix_fadvise/src/include/access/gist_private.h pgsql/src/include/access/gist_private.h
*** pgsql.posix_fadvise/src/include/access/gist_private.h	2009-01-09 20:42:53.000000000 +0900
--- pgsql/src/include/access/gist_private.h	2009-01-09 20:32:07.000000000 +0900
***************
*** 250,255 ****
--- 250,256 ----
  /* gistxlog.c */
  extern void gist_redo(XLogRecPtr lsn, XLogRecord *record);
  extern void gist_desc(StringInfo buf, uint8 xl_info, char *rec);
+ extern bool gist_readahead(XLogRecPtr lsn, XLogRecord *record);
  extern void gist_xlog_startup(void);
  extern void gist_xlog_cleanup(void);
  extern bool gist_safe_restartpoint(void);
diff -rcN pgsql.posix_fadvise/src/include/access/heapam.h pgsql/src/include/access/heapam.h
*** pgsql.posix_fadvise/src/include/access/heapam.h	2009-01-09 20:42:53.000000000 +0900
--- pgsql/src/include/access/heapam.h	2009-01-09 20:32:07.000000000 +0900
***************
*** 124,131 ****
--- 124,133 ----
  
  extern void heap_redo(XLogRecPtr lsn, XLogRecord *rptr);
  extern void heap_desc(StringInfo buf, uint8 xl_info, char *rec);
+ extern bool heap_readahead(XLogRecPtr lsn, XLogRecord *rptr);
  extern void heap2_redo(XLogRecPtr lsn, XLogRecord *rptr);
  extern void heap2_desc(StringInfo buf, uint8 xl_info, char *rec);
+ extern bool heap2_readahead(XLogRecPtr lsn, XLogRecord *rptr);
  
  extern XLogRecPtr log_heap_move(Relation reln, Buffer oldbuf,
  			  ItemPointerData from,
diff -rcN pgsql.posix_fadvise/src/include/access/nbtree.h pgsql/src/include/access/nbtree.h
*** pgsql.posix_fadvise/src/include/access/nbtree.h	2009-01-09 20:42:53.000000000 +0900
--- pgsql/src/include/access/nbtree.h	2009-01-09 20:32:07.000000000 +0900
***************
*** 591,596 ****
--- 591,597 ----
   */
  extern void btree_redo(XLogRecPtr lsn, XLogRecord *record);
  extern void btree_desc(StringInfo buf, uint8 xl_info, char *rec);
+ extern bool btree_readahead(XLogRecPtr lns, XLogRecord *rptr);
  extern void btree_xlog_startup(void);
  extern void btree_xlog_cleanup(void);
  extern bool btree_safe_restartpoint(void);
diff -rcN pgsql.posix_fadvise/src/include/access/readahead.h pgsql/src/include/access/readahead.h
*** pgsql.posix_fadvise/src/include/access/readahead.h	1970-01-01 09:00:00.000000000 +0900
--- pgsql/src/include/access/readahead.h	2009-01-09 20:32:07.000000000 +0900
***************
*** 0 ****
--- 1,31 ----
+ /*-------------------------------------------------------------------------
+  *
+  * readahead.h
+  *		Store information of data pages which should be read ahead.
+  *
+  * Portions Copyright (c) 2008, Nippon Telegraph and Telephone Corporation
+  * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  *-------------------------------------------------------------------------
+  */
+ 
+ #ifndef READAHEAD_H
+ #define READAHEAD_H
+ 
+ #include "postgres.h"
+ #include "storage/relfilenode.h"
+ #include "storage/block.h"
+ #include "access/xlogdefs.h"
+ #include "access/xlog.h"
+ 
+ /*
+  * Prototype of public function.
+  */
+ void ReadAheadAddEntry(RelFileNode node, BlockNumber blkno, uint32 xrecoff,
+ 	bool has_fpw);
+ bool ReadAheadHasRoom(int num);
+ void ReadAheadExecute(void);
+ 
+ #endif /* READAHEAD_H */
+ 
diff -rcN pgsql.posix_fadvise/src/include/access/xlog.h pgsql/src/include/access/xlog.h
*** pgsql.posix_fadvise/src/include/access/xlog.h	2009-01-09 20:42:53.000000000 +0900
--- pgsql/src/include/access/xlog.h	2009-01-09 20:32:07.000000000 +0900
***************
*** 196,201 ****
--- 196,202 ----
  
  extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
  extern void xlog_desc(StringInfo buf, uint8 xl_info, char *rec);
+ extern bool xlog_readahead(XLogRecPtr lsn, XLogRecord *rptr);
  
  extern void UpdateControlFile(void);
  extern Size XLOGShmemSize(void);
diff -rcN pgsql.posix_fadvise/src/include/access/xlog_internal.h pgsql/src/include/access/xlog_internal.h
*** pgsql.posix_fadvise/src/include/access/xlog_internal.h	2009-01-09 20:42:53.000000000 +0900
--- pgsql/src/include/access/xlog_internal.h	2009-01-09 20:32:07.000000000 +0900
***************
*** 235,240 ****
--- 235,241 ----
  	void		(*rm_startup) (void);
  	void		(*rm_cleanup) (void);
  	bool		(*rm_safe_restartpoint) (void);
+ 	bool		(*rm_readahead) (XLogRecPtr lsn, XLogRecord *rptr);
  } RmgrData;
  
  extern const RmgrData RmgrTable[];
diff -rcN pgsql.posix_fadvise/src/include/commands/sequence.h pgsql/src/include/commands/sequence.h
*** pgsql.posix_fadvise/src/include/commands/sequence.h	2009-01-09 20:42:53.000000000 +0900
--- pgsql/src/include/commands/sequence.h	2009-01-09 20:32:07.000000000 +0900
***************
*** 98,102 ****
--- 98,103 ----
  
  extern void seq_redo(XLogRecPtr lsn, XLogRecord *rptr);
  extern void seq_desc(StringInfo buf, uint8 xl_info, char *rec);
+ extern bool seq_readahead(XLogRecPtr lsn, XLogRecord *record);
  
  #endif   /* SEQUENCE_H */
