Hi,
I'm trying to see if it makes sense to do the double-buffering of page
writes before going further ahead with CRC checking. I came up with the
attached patch; it does the double-buffering inconditionally, because as
it was said, it allows releasing the io_in_progress lock (and resetting
BM_IO_IN_PROGRESS) early.
So far I have not managed to convince me that this is a correct change
to make; the io_in_progress bits are pretty convoluted -- for example, I
wonder how does "releasing" the buffer early (before actually sending
the write to the kernel and marking it not dirty) interact with
checkpoint and a possible full-page image.
Basically the change is to take the unsetting of BM_DIRTY out of
TerminateBufferIO and into its own routine; and in FlushBuffer, release
io_in_progress just after copying the buffer contents elsewhere, and
mark the buffer not dirty after actually doing the write.
Thoughts?
--
Alvaro Herrera http://www.CommandPrompt.com/
PostgreSQL Replication, Consulting, Custom Development, 24x7 support
Index: src/backend/storage/buffer/bufmgr.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/storage/buffer/bufmgr.c,v
retrieving revision 1.239
diff -c -p -r1.239 bufmgr.c
*** src/backend/storage/buffer/bufmgr.c 20 Oct 2008 21:11:15 -0000 1.239
--- src/backend/storage/buffer/bufmgr.c 21 Oct 2008 15:30:38 -0000
*************** static void BufferSync(int flags);
*** 84,91 ****
static int SyncOneBuffer(int buf_id, bool skip_recently_used);
static void WaitIO(volatile BufferDesc *buf);
static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);
! static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
! int set_flag_bits);
static void buffer_write_error_callback(void *arg);
static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
BlockNumber blockNum,
--- 84,91 ----
static int SyncOneBuffer(int buf_id, bool skip_recently_used);
static void WaitIO(volatile BufferDesc *buf);
static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);
! static void TerminateBufferIO(volatile BufferDesc *buf, int set_flag_bits);
! static void MarkBufferNotDirty(volatile BufferDesc *buf);
static void buffer_write_error_callback(void *arg);
static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
BlockNumber blockNum,
*************** ReadBuffer_common(SMgrRelation smgr, boo
*** 395,401 ****
else
{
/* Set BM_VALID, terminate IO, and wake up any waiters */
! TerminateBufferIO(bufHdr, false, BM_VALID);
}
if (VacuumCostActive)
--- 395,401 ----
else
{
/* Set BM_VALID, terminate IO, and wake up any waiters */
! TerminateBufferIO(bufHdr, BM_VALID);
}
if (VacuumCostActive)
*************** FlushBuffer(volatile BufferDesc *buf, SM
*** 1792,1797 ****
--- 1792,1798 ----
{
XLogRecPtr recptr;
ErrorContextCallback errcontext;
+ char dblbuf[BLCKSZ];
/*
* Acquire the buffer's io_in_progress lock. If StartBufferIO returns
*************** FlushBuffer(volatile BufferDesc *buf, SM
*** 1834,1856 ****
buf->flags &= ~BM_JUST_DIRTIED;
UnlockBufHdr(buf);
smgrwrite(reln,
buf->tag.forkNum,
buf->tag.blockNum,
! (char *) BufHdrGetBlock(buf),
false);
BufferFlushCount++;
TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode);
- /*
- * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
- * end the io_in_progress state.
- */
- TerminateBufferIO(buf, true, 0);
-
/* Pop the error context stack */
error_context_stack = errcontext.previous;
}
--- 1835,1863 ----
buf->flags &= ~BM_JUST_DIRTIED;
UnlockBufHdr(buf);
+ /*
+ * We make a copy of the buffer to write. This allows us to release the
+ * io_in_progress lock early, before actually doing the write.
+ */
+ memcpy(dblbuf, BufHdrGetBlock(buf), BLCKSZ);
+
+ /* End the io_in_progress state. */
+ TerminateBufferIO(buf, 0);
+
smgrwrite(reln,
buf->tag.forkNum,
buf->tag.blockNum,
! (char *) dblbuf,
false);
+ /* Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) */
+ MarkBufferNotDirty(buf);
+
BufferFlushCount++;
TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode);
/* Pop the error context stack */
error_context_stack = errcontext.previous;
}
*************** StartBufferIO(volatile BufferDesc *buf,
*** 2578,2595 ****
* We hold the buffer's io_in_progress lock
* The buffer is Pinned
*
- * If clear_dirty is TRUE and BM_JUST_DIRTIED is not set, we clear the
- * buffer's BM_DIRTY flag. This is appropriate when terminating a
- * successful write. The check on BM_JUST_DIRTIED is necessary to avoid
- * marking the buffer clean if it was re-dirtied while we were writing.
- *
* set_flag_bits gets ORed into the buffer's flags. It must include
* BM_IO_ERROR in a failure case. For successful completion it could
* be 0, or BM_VALID if we just finished reading in the page.
*/
static void
! TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
! int set_flag_bits)
{
Assert(buf == InProgressBuf);
--- 2585,2596 ----
* We hold the buffer's io_in_progress lock
* The buffer is Pinned
*
* set_flag_bits gets ORed into the buffer's flags. It must include
* BM_IO_ERROR in a failure case. For successful completion it could
* be 0, or BM_VALID if we just finished reading in the page.
*/
static void
! TerminateBufferIO(volatile BufferDesc *buf, int set_flag_bits)
{
Assert(buf == InProgressBuf);
*************** TerminateBufferIO(volatile BufferDesc *b
*** 2597,2604 ****
Assert(buf->flags & BM_IO_IN_PROGRESS);
buf->flags &= ~(BM_IO_IN_PROGRESS | BM_IO_ERROR);
- if (clear_dirty && !(buf->flags & BM_JUST_DIRTIED))
- buf->flags &= ~(BM_DIRTY | BM_CHECKPOINT_NEEDED);
buf->flags |= set_flag_bits;
UnlockBufHdr(buf);
--- 2598,2603 ----
*************** TerminateBufferIO(volatile BufferDesc *b
*** 2609,2614 ****
--- 2608,2631 ----
}
/*
+ * Clear the buffer's BM_DIRTY flag, unless BM_JUST_DIRTIED is set.
+ *
+ * This is appropriate when terminating a successful write. The check on
+ * BM_JUST_DIRTIED is necessary to avoid marking the buffer clean if it was
+ * re-dirtied while we were writing.
+ */
+ static void
+ MarkBufferNotDirty(volatile BufferDesc *buf)
+ {
+ LockBufHdr(buf);
+
+ if (!(buf->flags & BM_JUST_DIRTIED))
+ buf->flags &= ~(BM_DIRTY | BM_CHECKPOINT_NEEDED);
+
+ UnlockBufHdr(buf);
+ }
+
+ /*
* AbortBufferIO: Clean up any active buffer I/O after an error.
*
* All LWLocks we might have held have been released,
*************** AbortBufferIO(void)
*** 2662,2668 ****
errdetail("Multiple failures --- write error might be permanent.")));
}
}
! TerminateBufferIO(buf, false, BM_IO_ERROR);
}
}
--- 2679,2685 ----
errdetail("Multiple failures --- write error might be permanent.")));
}
}
! TerminateBufferIO(buf, BM_IO_ERROR);
}
}
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers