Index: src/backend/storage/buffer/bufmgr.c
===================================================================
--- src/backend/storage/buffer/bufmgr.c	(revision 1864)
+++ src/backend/storage/buffer/bufmgr.c	(working copy)
@@ -985,6 +985,30 @@
 	}
 }
 
+typedef struct BufAndTag
+{
+	int			buf_id;
+	BufferTag	tag;
+} BufAndTag;
+
+static int
+bufcmp(const void *a, const void *b)
+{
+	const BufAndTag *lhs = (const BufAndTag *) a;
+	const BufAndTag *rhs = (const BufAndTag *) b;
+	int		r;
+
+	r = memcmp(&lhs->tag.rnode, &rhs->tag.rnode, sizeof(lhs->tag.rnode));
+	if (r != 0)
+		return r;
+	if (lhs->tag.blockNum < rhs->tag.blockNum)
+		return -1;
+	else if (lhs->tag.blockNum > rhs->tag.blockNum)
+		return 1;
+	else
+		return 0;
+}
+
 /*
  * BufferSync -- Write out all dirty buffers in the pool.
  *
@@ -996,9 +1020,10 @@
 BufferSync(int flags)
 {
 	int			buf_id;
-	int			num_to_scan;
 	int			num_to_write;
 	int			num_written;
+	int			i;
+	BufAndTag  *buf_to_write;
 
 	/* Make sure we can handle the pin inside SyncOneBuffer */
 	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
@@ -1019,6 +1044,7 @@
 	 * BM_CHECKPOINT_NEEDED still set.	This is OK since any such buffer would
 	 * certainly need to be written for the next checkpoint attempt, too.
 	 */
+	buf_to_write = (BufAndTag *) palloc(NBuffers * sizeof(BufAndTag));
 	num_to_write = 0;
 	for (buf_id = 0; buf_id < NBuffers; buf_id++)
 	{
@@ -1033,6 +1059,8 @@
 		if (bufHdr->flags & BM_DIRTY)
 		{
 			bufHdr->flags |= BM_CHECKPOINT_NEEDED;
+			buf_to_write[num_to_write].buf_id = buf_id;
+			buf_to_write[num_to_write].tag = bufHdr->tag;
 			num_to_write++;
 		}
 
@@ -1044,19 +1072,21 @@
 
 	/*
 	 * Loop over all buffers again, and write the ones (still) marked with
-	 * BM_CHECKPOINT_NEEDED.  In this loop, we start at the clock sweep point
-	 * since we might as well dump soon-to-be-recycled buffers first.
+	 * BM_CHECKPOINT_NEEDED.  In this loop, we write buffers in blockNum
+	 * order so that buffers are written sequentially.
 	 *
 	 * Note that we don't read the buffer alloc count here --- that should be
 	 * left untouched till the next BgBufferSync() call.
 	 */
-	buf_id = StrategySyncStart(NULL, NULL);
-	num_to_scan = NBuffers;
+	qsort(buf_to_write, num_to_write, sizeof(BufAndTag), bufcmp);
 	num_written = 0;
-	while (num_to_scan-- > 0)
+	for (i = 0; i < num_to_write; i++)
 	{
-		volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
+		volatile BufferDesc *bufHdr;
 
+		buf_id = buf_to_write[i].buf_id;
+		bufHdr = &BufferDescriptors[buf_id];
+
 		/*
 		 * We don't need to acquire the lock here, because we're only looking
 		 * at a single bit. It's possible that someone else writes the buffer
@@ -1098,9 +1128,6 @@
 									 (double) num_written / num_to_write);
 			}
 		}
-
-		if (++buf_id >= NBuffers)
-			buf_id = 0;
 	}
 
 	/*
@@ -1108,6 +1135,7 @@
 	 * buffers written by other backends or bgwriter scan.
 	 */
 	CheckpointStats.ckpt_bufs_written += num_written;
+	pfree(buf_to_write);
 }
 
 /*
