On Sat, Mar 11, 2017 at 12:03 AM, Artur Zakirov
<[email protected]> wrote:
> Because BM_PERMANENT is used for init forks of unlogged indexes now.
Yes, indeed.
--
Michael
diff --git a/contrib/bloom/blinsert.c b/contrib/bloom/blinsert.c
index 913f1f8a51..3557b106d8 100644
--- a/contrib/bloom/blinsert.c
+++ b/contrib/bloom/blinsert.c
@@ -158,31 +158,24 @@ blbuild(Relation heap, Relation index, IndexInfo *indexInfo)
void
blbuildempty(Relation index)
{
- Page metapage;
+ Buffer MetaBuffer;
- /* Construct metapage. */
- metapage = (Page) palloc(BLCKSZ);
- BloomFillMetapage(index, metapage);
+ /* An empty bloom index has one meta page */
+ MetaBuffer =
+ ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+ LockBuffer(MetaBuffer, BUFFER_LOCK_EXCLUSIVE);
- /*
- * Write the page and log it. It might seem that an immediate sync
- * would be sufficient to guarantee that the file exists on disk, but
- * recovery itself might remove it while replaying, for example, an
- * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we
- * need this even when wal_level=minimal.
- */
- PageSetChecksumInplace(metapage, BLOOM_METAPAGE_BLKNO);
- smgrwrite(index->rd_smgr, INIT_FORKNUM, BLOOM_METAPAGE_BLKNO,
- (char *) metapage, true);
- log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
- BLOOM_METAPAGE_BLKNO, metapage, false);
+ /* Initialize the meta buffer */
+ BloomFillMetapage(index, BufferGetPage(MetaBuffer));
- /*
- * An immediate sync is required even if we xlog'd the page, because the
- * write did not go through shared_buffers and therefore a concurrent
- * checkpoint may have moved the redo pointer past our xlog record.
- */
- smgrimmedsync(index->rd_smgr, INIT_FORKNUM);
+ /* And log it */
+ START_CRIT_SECTION();
+ MarkBufferDirty(MetaBuffer);
+ log_newpage_buffer(MetaBuffer, false);
+ END_CRIT_SECTION();
+
+ /* Unlock and release the buffer */
+ UnlockReleaseBuffer(MetaBuffer);
}
/*
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 775f2ff1f8..e1801ea939 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -23,6 +23,7 @@
#include "access/xlog.h"
#include "catalog/index.h"
#include "commands/vacuum.h"
+#include "miscadmin.h"
#include "pgstat.h"
#include "storage/condition_variable.h"
#include "storage/indexfsm.h"
@@ -282,31 +283,22 @@ btbuildCallback(Relation index,
void
btbuildempty(Relation index)
{
- Page metapage;
-
- /* Construct metapage. */
- metapage = (Page) palloc(BLCKSZ);
- _bt_initmetapage(metapage, P_NONE, 0);
-
- /*
- * Write the page and log it. It might seem that an immediate sync
- * would be sufficient to guarantee that the file exists on disk, but
- * recovery itself might remove it while replaying, for example, an
- * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we
- * need this even when wal_level=minimal.
- */
- PageSetChecksumInplace(metapage, BTREE_METAPAGE);
- smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE,
- (char *) metapage, true);
- log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
- BTREE_METAPAGE, metapage, false);
-
- /*
- * An immediate sync is required even if we xlog'd the page, because the
- * write did not go through shared_buffers and therefore a concurrent
- * checkpoint may have moved the redo pointer past our xlog record.
- */
- smgrimmedsync(index->rd_smgr, INIT_FORKNUM);
+ Buffer metabuffer;
+
+ /* An empty btree index has one meta page */
+ metabuffer =
+ ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+ LockBuffer(metabuffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /* Initialize and xlog meta buffer */
+ START_CRIT_SECTION();
+ _bt_initmetapage(BufferGetPage(metabuffer), P_NONE, 0);
+ MarkBufferDirty(metabuffer);
+ log_newpage_buffer(metabuffer, false);
+ END_CRIT_SECTION();
+
+ /* Unlock and release the buffer */
+ UnlockReleaseBuffer(metabuffer);
}
/*
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index 00a0ab4438..4252c2eb53 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -156,49 +156,50 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
void
spgbuildempty(Relation index)
{
- Page page;
-
- /* Construct metapage. */
- page = (Page) palloc(BLCKSZ);
- SpGistInitMetapage(page);
+ Buffer MetaBuffer,
+ RootBuffer,
+ TupleBuffer;
/*
- * Write the page and log it unconditionally. This is important
- * particularly for indexes created on tablespaces and databases
- * whose creation happened after the last redo pointer as recovery
- * removes any of their existing content when the corresponding
- * create records are replayed.
+ * An empty SPGist index has three pages:
+ * - one meta page.
+ * - one root page.
+ * - one null-tuple root page.
*/
- PageSetChecksumInplace(page, SPGIST_METAPAGE_BLKNO);
- smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_METAPAGE_BLKNO,
- (char *) page, true);
- log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
- SPGIST_METAPAGE_BLKNO, page, false);
-
- /* Likewise for the root page. */
- SpGistInitPage(page, SPGIST_LEAF);
-
- PageSetChecksumInplace(page, SPGIST_ROOT_BLKNO);
- smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_ROOT_BLKNO,
- (char *) page, true);
- log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
- SPGIST_ROOT_BLKNO, page, true);
-
- /* Likewise for the null-tuples root page. */
- SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS);
-
- PageSetChecksumInplace(page, SPGIST_NULL_BLKNO);
- smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_NULL_BLKNO,
- (char *) page, true);
- log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
- SPGIST_NULL_BLKNO, page, true);
+ MetaBuffer =
+ ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+ LockBuffer(MetaBuffer, BUFFER_LOCK_EXCLUSIVE);
+ RootBuffer =
+ ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+ LockBuffer(RootBuffer, BUFFER_LOCK_EXCLUSIVE);
+ TupleBuffer =
+ ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+ LockBuffer(TupleBuffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /* Initialize and log all the pages */
+ START_CRIT_SECTION();
- /*
- * An immediate sync is required even if we xlog'd the pages, because the
- * writes did not go through shared buffers and therefore a concurrent
- * checkpoint may have moved the redo pointer past our xlog record.
- */
- smgrimmedsync(index->rd_smgr, INIT_FORKNUM);
+ /* Construct and log the meta page */
+ SpGistInitMetapage(BufferGetPage(MetaBuffer));
+ MarkBufferDirty(MetaBuffer);
+ log_newpage_buffer(MetaBuffer, false);
+
+ /* root page */
+ SpGistInitPage(BufferGetPage(RootBuffer), SPGIST_LEAF);
+ MarkBufferDirty(RootBuffer);
+ log_newpage_buffer(RootBuffer, false);
+
+ /* null-tuples root page. */
+ SpGistInitPage(BufferGetPage(TupleBuffer), SPGIST_LEAF | SPGIST_NULLS);
+ MarkBufferDirty(TupleBuffer);
+ log_newpage_buffer(TupleBuffer, false);
+
+ END_CRIT_SECTION();
+
+ /* Unlock and release the buffers. */
+ UnlockReleaseBuffer(MetaBuffer);
+ UnlockReleaseBuffer(RootBuffer);
+ UnlockReleaseBuffer(TupleBuffer);
}
/*
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 3cb51204dc..66fe9ea529 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -1302,6 +1302,14 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
else
buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE;
+ /*
+ * Initialization forks of unlogged tables need to have their dirty
+ * buffers written permanently to survive in case of a crash if the
+ * redo point is moved past the WAL-logging of the page itself.
+ */
+ if (forkNum == INIT_FORKNUM)
+ buf_state |= BM_PERMANENT;
+
UnlockBufHdr(buf, buf_state);
if (oldPartitionLock != NULL)
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index d117b66537..23054a6f6f 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -64,8 +64,9 @@
#define BM_JUST_DIRTIED (1U << 28) /* dirtied since write started */
#define BM_PIN_COUNT_WAITER (1U << 29) /* have waiter for sole pin */
#define BM_CHECKPOINT_NEEDED (1U << 30) /* must write for checkpoint */
-#define BM_PERMANENT (1U << 31) /* permanent relation (not
- * unlogged) */
+#define BM_PERMANENT (1U << 31) /* permanent relation (init
+ * forks for unlogged) */
+
/*
* The maximum allowed value of usage_count represents a tradeoff between
* accuracy and speed of the clock-sweep buffer management algorithm. A
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers