From 621a67a172fe2eab6fc59f2e388e0379e4837278 Mon Sep 17 00:00:00 2001
From: Amit Kapila <amit.kapila@enterprisedb.com>
Date: Thu, 16 Feb 2017 19:05:12 +0530
Subject: [PATCH 5/5] Restructure hash index creation.

Restructure metapage initialization code so that the operation
can be performed atomically and can be WAL-logged.
---
 src/backend/access/hash/hash.c     |   4 +-
 src/backend/access/hash/hashovfl.c |  62 -----------
 src/backend/access/hash/hashpage.c | 203 +++++++++++++++++++++++++------------
 src/include/access/hash.h          |  10 +-
 4 files changed, 144 insertions(+), 135 deletions(-)

diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 24510e7..1f8a7f6 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -120,7 +120,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
 
 	/* Initialize the hash index metadata page and initial buckets */
-	num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM);
+	num_buckets = _hash_init(index, reltuples, MAIN_FORKNUM);
 
 	/*
 	 * If we just insert the tuples into the index in scan order, then
@@ -182,7 +182,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 void
 hashbuildempty(Relation index)
 {
-	_hash_metapinit(index, 0, INIT_FORKNUM);
+	_hash_init(index, 0, INIT_FORKNUM);
 }
 
 /*
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index 3c2383f..d35089c 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -574,68 +574,6 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
 
 
 /*
- *	_hash_initbitmap()
- *
- *	 Initialize a new bitmap page.  The metapage has a write-lock upon
- *	 entering the function, and must be written by caller after return.
- *
- * 'blkno' is the block number of the new bitmap page.
- *
- * All bits in the new bitmap page are set to "1", indicating "in use".
- */
-void
-_hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno,
-				 ForkNumber forkNum)
-{
-	Buffer		buf;
-	Page		pg;
-	HashPageOpaque op;
-	uint32	   *freep;
-
-	/*
-	 * It is okay to write-lock the new bitmap page while holding metapage
-	 * write lock, because no one else could be contending for the new page.
-	 * Also, the metapage lock makes it safe to extend the index using
-	 * _hash_getnewbuf.
-	 *
-	 * There is some loss of concurrency in possibly doing I/O for the new
-	 * page while holding the metapage lock, but this path is taken so seldom
-	 * that it's not worth worrying about.
-	 */
-	buf = _hash_getnewbuf(rel, blkno, forkNum);
-	pg = BufferGetPage(buf);
-
-	/* initialize the page's special space */
-	op = (HashPageOpaque) PageGetSpecialPointer(pg);
-	op->hasho_prevblkno = InvalidBlockNumber;
-	op->hasho_nextblkno = InvalidBlockNumber;
-	op->hasho_bucket = -1;
-	op->hasho_flag = LH_BITMAP_PAGE;
-	op->hasho_page_id = HASHO_PAGE_ID;
-
-	/* set all of the bits to 1 */
-	freep = HashPageGetBitmap(pg);
-	MemSet(freep, 0xFF, BMPGSZ_BYTE(metap));
-
-	/* dirty the new bitmap page, and release write lock and pin */
-	MarkBufferDirty(buf);
-	_hash_relbuf(rel, buf);
-
-	/* add the new bitmap page to the metapage's list of bitmaps */
-	/* metapage already has a write lock */
-	if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
-		ereport(ERROR,
-				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg("out of overflow pages in hash index \"%s\"",
-						RelationGetRelationName(rel))));
-
-	metap->hashm_mapp[metap->hashm_nmaps] = blkno;
-
-	metap->hashm_nmaps++;
-}
-
-
-/*
  *	_hash_initbitmapbuffer()
  *
  *	 Initialize a new bitmap page.  All bits in the new bitmap page are set to
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index fd7344b..07cb4f2 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -157,6 +157,36 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno)
 }
 
 /*
+ *	_hash_initbuf() -- Get and initialize a buffer by bucket number.
+ */
+void
+_hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag,
+			  bool initpage)
+{
+	HashPageOpaque pageopaque;
+	Page		page;
+
+	page = BufferGetPage(buf);
+
+	/* initialize the page */
+	if (initpage)
+		_hash_pageinit(page, BufferGetPageSize(buf));
+
+	pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
+	/*
+	 * Set hasho_prevblkno with current hashm_maxbucket. This value will
+	 * be used to validate cached HashMetaPageData. See
+	 * _hash_getbucketbuf_from_hashkey().
+	 */
+	pageopaque->hasho_prevblkno = max_bucket;
+	pageopaque->hasho_nextblkno = InvalidBlockNumber;
+	pageopaque->hasho_bucket = num_bucket;
+	pageopaque->hasho_flag = flag;
+	pageopaque->hasho_page_id = HASHO_PAGE_ID;
+}
+
+/*
  *	_hash_getnewbuf() -- Get a new page at the end of the index.
  *
  *		This has the same API as _hash_getinitbuf, except that we are adding
@@ -288,7 +318,7 @@ _hash_dropscanbuf(Relation rel, HashScanOpaque so)
 
 
 /*
- *	_hash_metapinit() -- Initialize the metadata page of a hash index,
+ *	_hash_init() -- Initialize the metadata page of a hash index,
  *				the initial buckets, and the initial bitmap page.
  *
  * The initial number of buckets is dependent on num_tuples, an estimate
@@ -300,19 +330,18 @@ _hash_dropscanbuf(Relation rel, HashScanOpaque so)
  * multiple buffer locks is ignored.
  */
 uint32
-_hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
+_hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
 {
-	HashMetaPage metap;
-	HashPageOpaque pageopaque;
 	Buffer		metabuf;
 	Buffer		buf;
+	Buffer		bitmapbuf;
 	Page		pg;
+	HashMetaPage metap;
+	RegProcedure procid;
 	int32		data_width;
 	int32		item_width;
 	int32		ffactor;
-	double		dnumbuckets;
 	uint32		num_buckets;
-	uint32		log2_num_buckets;
 	uint32		i;
 
 	/* safety check */
@@ -334,6 +363,96 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
 	if (ffactor < 10)
 		ffactor = 10;
 
+	procid = index_getprocid(rel, 1, HASHPROC);
+
+	/*
+	 * We initialize the metapage, the first N bucket pages, and the first
+	 * bitmap page in sequence, using _hash_getnewbuf to cause smgrextend()
+	 * calls to occur.  This ensures that the smgr level has the right idea of
+	 * the physical index length.
+	 *
+	 * Critical section not required, because on error the creation of the
+	 * whole relation will be rolled back.
+	 */
+	metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
+	_hash_init_metabuffer(metabuf, num_tuples, procid, ffactor, false);
+	MarkBufferDirty(metabuf);
+
+	pg = BufferGetPage(metabuf);
+	metap = HashPageGetMeta(pg);
+
+	num_buckets = metap->hashm_maxbucket + 1;
+
+	/*
+	 * Release buffer lock on the metapage while we initialize buckets.
+	 * Otherwise, we'll be in interrupt holdoff and the CHECK_FOR_INTERRUPTS
+	 * won't accomplish anything.  It's a bad idea to hold buffer locks for
+	 * long intervals in any case, since that can block the bgwriter.
+	 */
+	LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
+
+	/*
+	 * Initialize and WAL Log the first N buckets
+	 */
+	for (i = 0; i < num_buckets; i++)
+	{
+		BlockNumber blkno;
+
+		/* Allow interrupts, in case N is huge */
+		CHECK_FOR_INTERRUPTS();
+
+		blkno = BUCKET_TO_BLKNO(metap, i);
+		buf = _hash_getnewbuf(rel, blkno, forkNum);
+		_hash_initbuf(buf, metap->hashm_maxbucket, i, LH_BUCKET_PAGE, false);
+		MarkBufferDirty(buf);
+		_hash_relbuf(rel, buf);
+	}
+
+	/* Now reacquire buffer lock on metapage */
+	LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
+
+	/*
+	 * Initialize bitmap page
+	 */
+	bitmapbuf = _hash_getnewbuf(rel, num_buckets + 1, forkNum);
+	_hash_initbitmapbuffer(bitmapbuf, metap->hashm_bmsize, false);
+	MarkBufferDirty(bitmapbuf);
+
+	/* add the new bitmap page to the metapage's list of bitmaps */
+	/* metapage already has a write lock */
+	if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("out of overflow pages in hash index \"%s\"",
+						RelationGetRelationName(rel))));
+
+	metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
+
+	metap->hashm_nmaps++;
+	MarkBufferDirty(metabuf);
+
+	/* all done */
+	_hash_relbuf(rel, bitmapbuf);
+	_hash_relbuf(rel, metabuf);
+
+	return num_buckets;
+}
+
+/*
+ *	_hash_init_metabuffer() -- Initialize the metadata page of a hash index.
+ */
+void
+_hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid,
+					  uint16 ffactor, bool initpage)
+{
+	HashMetaPage metap;
+	HashPageOpaque pageopaque;
+	Page		page;
+	double		dnumbuckets;
+	uint32		num_buckets;
+	uint32		log2_num_buckets;
+	uint32		i;
+
 	/*
 	 * Choose the number of initial bucket pages to match the fill factor
 	 * given the estimated number of tuples.  We round up the result to the
@@ -353,30 +472,25 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
 	Assert(num_buckets == (((uint32) 1) << log2_num_buckets));
 	Assert(log2_num_buckets < HASH_MAX_SPLITPOINTS);
 
-	/*
-	 * We initialize the metapage, the first N bucket pages, and the first
-	 * bitmap page in sequence, using _hash_getnewbuf to cause smgrextend()
-	 * calls to occur.  This ensures that the smgr level has the right idea of
-	 * the physical index length.
-	 */
-	metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
-	pg = BufferGetPage(metabuf);
+	page = BufferGetPage(buf);
+	if (initpage)
+		_hash_pageinit(page, BufferGetPageSize(buf));
 
-	pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
+	pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
 	pageopaque->hasho_prevblkno = InvalidBlockNumber;
 	pageopaque->hasho_nextblkno = InvalidBlockNumber;
 	pageopaque->hasho_bucket = -1;
 	pageopaque->hasho_flag = LH_META_PAGE;
 	pageopaque->hasho_page_id = HASHO_PAGE_ID;
 
-	metap = HashPageGetMeta(pg);
+	metap = HashPageGetMeta(page);
 
 	metap->hashm_magic = HASH_MAGIC;
 	metap->hashm_version = HASH_VERSION;
 	metap->hashm_ntuples = 0;
 	metap->hashm_nmaps = 0;
 	metap->hashm_ffactor = ffactor;
-	metap->hashm_bsize = HashGetMaxBitmapSize(pg);
+	metap->hashm_bsize = HashGetMaxBitmapSize(page);
 	/* find largest bitmap array size that will fit in page size */
 	for (i = _hash_log2(metap->hashm_bsize); i > 0; --i)
 	{
@@ -393,7 +507,7 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
 	 * pretty useless for normal operation (in fact, hashm_procid is not used
 	 * anywhere), but it might be handy for forensic purposes so we keep it.
 	 */
-	metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);
+	metap->hashm_procid = procid;
 
 	/*
 	 * We initialize the index with N buckets, 0 .. N-1, occupying physical
@@ -411,54 +525,9 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
 	metap->hashm_ovflpoint = log2_num_buckets;
 	metap->hashm_firstfree = 0;
 
-	/*
-	 * Release buffer lock on the metapage while we initialize buckets.
-	 * Otherwise, we'll be in interrupt holdoff and the CHECK_FOR_INTERRUPTS
-	 * won't accomplish anything.  It's a bad idea to hold buffer locks for
-	 * long intervals in any case, since that can block the bgwriter.
-	 */
-	MarkBufferDirty(metabuf);
-	LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
-
-	/*
-	 * Initialize the first N buckets
-	 */
-	for (i = 0; i < num_buckets; i++)
-	{
-		/* Allow interrupts, in case N is huge */
-		CHECK_FOR_INTERRUPTS();
-
-		buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i), forkNum);
-		pg = BufferGetPage(buf);
-		pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
-
-		/*
-		 * Set hasho_prevblkno with current hashm_maxbucket. This value will
-		 * be used to validate cached HashMetaPageData. See
-		 * _hash_getbucketbuf_from_hashkey().
-		 */
-		pageopaque->hasho_prevblkno = metap->hashm_maxbucket;
-		pageopaque->hasho_nextblkno = InvalidBlockNumber;
-		pageopaque->hasho_bucket = i;
-		pageopaque->hasho_flag = LH_BUCKET_PAGE;
-		pageopaque->hasho_page_id = HASHO_PAGE_ID;
-		MarkBufferDirty(buf);
-		_hash_relbuf(rel, buf);
-	}
-
-	/* Now reacquire buffer lock on metapage */
-	LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
-
-	/*
-	 * Initialize first bitmap page
-	 */
-	_hash_initbitmap(rel, metap, num_buckets + 1, forkNum);
-
-	/* all done */
-	MarkBufferDirty(metabuf);
-	_hash_relbuf(rel, metabuf);
-
-	return num_buckets;
+	/* Set pd_lower just past the end of the metadata. */
+	((PageHeader) page)->pd_lower =
+		((char *) metap + sizeof(HashMetaPageData)) - (char *) page;
 }
 
 /*
@@ -535,7 +604,7 @@ restart_expand:
 	 * than a disk block then this would be an independent constraint.
 	 *
 	 * If you change this, see also the maximum initial number of buckets in
-	 * _hash_metapinit().
+	 * _hash_init().
 	 */
 	if (metap->hashm_maxbucket >= (uint32) 0x7FFFFFFE)
 		goto fail;
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index 9c0b79f..bfdfed8 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -311,8 +311,6 @@ extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool r
 extern BlockNumber _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
 				   Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets,
 			 Size *tups_size, uint16 nitups, BufferAccessStrategy bstrategy);
-extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
-				 BlockNumber blkno, ForkNumber forkNum);
 extern void _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage);
 extern void _hash_squeezebucket(Relation rel,
 					Bucket bucket, BlockNumber bucket_blkno,
@@ -331,6 +329,8 @@ extern Buffer _hash_getbucketbuf_from_hashkey(Relation rel, uint32 hashkey,
 								int access,
 								HashMetaPage *cachedmetap);
 extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno);
+extern void _hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket,
+				uint32 flag, bool initpage);
 extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno,
 				ForkNumber forkNum);
 extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
@@ -339,8 +339,10 @@ extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
 extern void _hash_relbuf(Relation rel, Buffer buf);
 extern void _hash_dropbuf(Relation rel, Buffer buf);
 extern void _hash_dropscanbuf(Relation rel, HashScanOpaque so);
-extern uint32 _hash_metapinit(Relation rel, double num_tuples,
-				ForkNumber forkNum);
+extern uint32 _hash_init(Relation rel, double num_tuples,
+		   ForkNumber forkNum);
+extern void _hash_init_metabuffer(Buffer buf, double num_tuples,
+					  RegProcedure procid, uint16 ffactor, bool initpage);
 extern void _hash_pageinit(Page page, Size size);
 extern void _hash_expandtable(Relation rel, Buffer metabuf);
 extern void _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf,
-- 
1.8.4.msysgit.0

