On Tue, Dec 27, 2016 at 1:36 PM, Mithun Cy <mithun...@enterprisedb.com> wrote:
Oops, patch number should be 08 re-attaching same after renaming.

-- 
Thanks and Regards
Mithun C Y
EnterpriseDB: http://www.enterprisedb.com
diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c
index 46df589..c45b3f0 100644
--- a/src/backend/access/hash/hashinsert.c
+++ b/src/backend/access/hash/hashinsert.c
@@ -32,19 +32,13 @@ _hash_doinsert(Relation rel, IndexTuple itup)
 	Buffer		bucket_buf;
 	Buffer		metabuf;
 	HashMetaPage metap;
-	BlockNumber blkno;
-	BlockNumber oldblkno;
-	bool		retry;
+	HashMetaPage usedmetap;
 	Page		metapage;
 	Page		page;
 	HashPageOpaque pageopaque;
 	Size		itemsz;
 	bool		do_expand;
 	uint32		hashkey;
-	Bucket		bucket;
-	uint32		maxbucket;
-	uint32		highmask;
-	uint32		lowmask;
 
 	/*
 	 * Get the hash key for the item (it's stored in the index tuple itself).
@@ -57,10 +51,15 @@ _hash_doinsert(Relation rel, IndexTuple itup)
 								 * need to be consistent */
 
 restart_insert:
-	/* Read the metapage */
-	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
+
+	/*
+	 * Load the metapage. No need to lock as of now because we only access
+	 * page header element pd_pagesize_version in HashMaxItemSize(), this
+	 * element is constant and will not move while accessing. But we hold the
+	 * pin so we can use the metabuf while writing into to it below.
+	 */
+	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
 	metapage = BufferGetPage(metabuf);
-	metap = HashPageGetMeta(metapage);
 
 	/*
 	 * Check whether the item can fit on a hash page at all. (Eventually, we
@@ -76,59 +75,7 @@ restart_insert:
 						itemsz, HashMaxItemSize(metapage)),
 			errhint("Values larger than a buffer page cannot be indexed.")));
 
-	oldblkno = InvalidBlockNumber;
-	retry = false;
-
-	/*
-	 * Loop until we get a lock on the correct target bucket.
-	 */
-	for (;;)
-	{
-		/*
-		 * Compute the target bucket number, and convert to block number.
-		 */
-		bucket = _hash_hashkey2bucket(hashkey,
-									  metap->hashm_maxbucket,
-									  metap->hashm_highmask,
-									  metap->hashm_lowmask);
-
-		blkno = BUCKET_TO_BLKNO(metap, bucket);
-
-		/*
-		 * Copy bucket mapping info now; refer the comment in
-		 * _hash_expandtable where we copy this information before calling
-		 * _hash_splitbucket to see why this is okay.
-		 */
-		maxbucket = metap->hashm_maxbucket;
-		highmask = metap->hashm_highmask;
-		lowmask = metap->hashm_lowmask;
-
-		/* Release metapage lock, but keep pin. */
-		LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
-
-		/*
-		 * If the previous iteration of this loop locked the primary page of
-		 * what is still the correct target bucket, we are done.  Otherwise,
-		 * drop any old lock before acquiring the new one.
-		 */
-		if (retry)
-		{
-			if (oldblkno == blkno)
-				break;
-			_hash_relbuf(rel, buf);
-		}
-
-		/* Fetch and lock the primary bucket page for the target bucket */
-		buf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BUCKET_PAGE);
-
-		/*
-		 * Reacquire metapage lock and check that no bucket split has taken
-		 * place while we were awaiting the bucket lock.
-		 */
-		LockBuffer(metabuf, BUFFER_LOCK_SHARE);
-		oldblkno = blkno;
-		retry = true;
-	}
+	buf = _hash_getbucketbuf_from_hashkey(hashkey, rel, HASH_WRITE, &usedmetap);
 
 	/* remember the primary bucket buffer to release the pin on it at end. */
 	bucket_buf = buf;
@@ -152,7 +99,9 @@ restart_insert:
 		LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 
 		_hash_finish_split(rel, metabuf, buf, pageopaque->hasho_bucket,
-						   maxbucket, highmask, lowmask);
+						   usedmetap->hashm_maxbucket,
+						   usedmetap->hashm_highmask,
+						   usedmetap->hashm_lowmask);
 
 		/* release the pin on old and meta buffer.  retry for insert. */
 		_hash_dropbuf(rel, buf);
@@ -225,6 +174,7 @@ restart_insert:
 	 */
 	LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
 
+	metap = HashPageGetMeta(metapage);
 	metap->hashm_ntuples += 1;
 
 	/* Make sure this stays in sync with _hash_expandtable() */
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index 45e184c..c726909 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -434,7 +434,13 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
 		buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i), forkNum);
 		pg = BufferGetPage(buf);
 		pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
-		pageopaque->hasho_prevblkno = InvalidBlockNumber;
+
+		/*
+		 * Setting hasho_prevblkno of bucket page with latest maxbucket number
+		 * to indicate bucket has been initialized and need to reconstruct
+		 * HashMetaCache if it is older.
+		 */
+		pageopaque->hasho_prevblkno = metap->hashm_maxbucket;
 		pageopaque->hasho_nextblkno = InvalidBlockNumber;
 		pageopaque->hasho_bucket = i;
 		pageopaque->hasho_flag = LH_BUCKET_PAGE;
@@ -845,6 +851,12 @@ _hash_splitbucket(Relation rel,
 	 */
 	oopaque->hasho_flag |= LH_BUCKET_BEING_SPLIT;
 
+	/*
+	 * Setting hasho_prevblkno of bucket page with latest maxbucket number to
+	 * indicate bucket has been split and need to reconstruct HashMetaCache.
+	 * Below same is done for new bucket page.
+	 */
+	oopaque->hasho_prevblkno = maxbucket;
 	npage = BufferGetPage(nbuf);
 
 	/*
@@ -852,7 +864,7 @@ _hash_splitbucket(Relation rel,
 	 * split is in progress.
 	 */
 	nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
-	nopaque->hasho_prevblkno = InvalidBlockNumber;
+	nopaque->hasho_prevblkno = maxbucket;
 	nopaque->hasho_nextblkno = InvalidBlockNumber;
 	nopaque->hasho_bucket = nbucket;
 	nopaque->hasho_flag = LH_BUCKET_PAGE | LH_BUCKET_BEING_POPULATED;
@@ -1191,3 +1203,124 @@ _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf, Bucket obucket,
 	LockBuffer(obuf, BUFFER_LOCK_UNLOCK);
 	hash_destroy(tidhtab);
 }
+
+
+/*
+ *	_hash_getbucketbuf_from_hashkey() -- Get the bucket's buffer for the given
+ *										 hashkey.
+ *
+ *	Bucket Pages do not move or get removed once they are allocated. This give
+ *	us an opportunity to use the previously saved metapage contents to reach
+ *	the target bucket buffer, instead of every time reading from the metapage
+ *	buffer. This saves one buffer access everytime we want to reach the target
+ *	bucket buffer, which is very helpful savings in bufmgr traffic and
+ *	contention.
+ *
+ *	The access type parameter (HASH_READ or HASH_WRITE) indicates whether the
+ *	bucket buffer has to be locked for reading or writing.
+ *
+ *	The out parameter cachedmetap is set with metapage contents used for
+ *	hashkey to bucket buffer mapping. Some callers need this info to reach the
+ *	old bucket in case of bucket split, see @_hash_doinsert().
+ */
+Buffer
+_hash_getbucketbuf_from_hashkey(uint32 hashkey, Relation rel, int access,
+								HashMetaPage *cachedmetap)
+{
+	HashMetaPage metap;
+	Buffer		buf;
+	Buffer		metabuf = InvalidBuffer;
+	Page		page;
+	Bucket		bucket;
+	BlockNumber blkno;
+	HashPageOpaque opaque;
+
+	if (rel->rd_amcache != NULL)
+	{
+		metap = (HashMetaPage) rel->rd_amcache;
+	}
+	else
+	{
+		/* Read the metapage */
+		metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
+		page = BufferGetPage(metabuf);
+		metap = HashPageGetMeta(page);
+
+		/* Cache the metapage data for next time. */
+		rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
+											 sizeof(HashMetaPageData));
+		memcpy(rel->rd_amcache, metap, sizeof(HashMetaPageData));
+		metap = (HashMetaPage) rel->rd_amcache;
+
+		/* Release metapage lock, but keep pin. */
+		LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
+	}
+
+	/*
+	 * Loop until we get a lock on the correct target bucket.
+	 */
+	for (;;)
+	{
+		/*
+		 * Compute the target bucket number, and convert to block number.
+		 */
+		bucket = _hash_hashkey2bucket(hashkey,
+									  metap->hashm_maxbucket,
+									  metap->hashm_highmask,
+									  metap->hashm_lowmask);
+
+		blkno = BUCKET_TO_BLKNO(metap, bucket);
+
+		/* Fetch the primary bucket page for the bucket */
+		buf = _hash_getbuf(rel, blkno, access, LH_BUCKET_PAGE);
+		page = BufferGetPage(buf);
+		opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+		Assert(opaque->hasho_bucket == bucket);
+
+		/*
+		 * Check if this bucket is split after we have cached the hash meta
+		 * data. To do this we need to check whether cached maxbucket number
+		 * is less than or equal to maxbucket number stored in bucket page,
+		 * which was set with that times maxbucket number during bucket page
+		 * splits. In case of upgrade hashno_prevblkno of old bucket page will
+		 * be set with InvalidBlockNumber. And as of now maximum value the
+		 * hashm_maxbucket can take is 1 less than InvalidBlockNumber (see
+		 * _hash_expandtable). So an explicit check for InvalidBlockNumber in
+		 * hasho_prevblkno will tell whether current bucket has been split
+		 * after caching hash meta data.
+		 */
+		if (opaque->hasho_prevblkno == InvalidBlockNumber ||
+			opaque->hasho_prevblkno <= metap->hashm_maxbucket)
+		{
+			/* Ok now we have the right bucket proceed to search in it. */
+			break;
+		}
+
+		/* First drop any locks held on bucket buffers. */
+		_hash_relbuf(rel, buf);
+
+		/* Cached meta data is old try again updating it. */
+		if (BufferIsInvalid(metabuf))
+			metabuf =
+				_hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
+		else
+			LockBuffer(metabuf, BUFFER_LOCK_SHARE);
+
+		metap = HashPageGetMeta(BufferGetPage(metabuf));
+
+		/* Cache the metapage data for next time. */
+		memcpy(rel->rd_amcache, metap, sizeof(HashMetaPageData));
+		metap = (HashMetaPage) rel->rd_amcache;
+
+		/* Release metapage lock, but keep pin. */
+		LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
+	}
+
+	/* done with the metapage */
+	if (!BufferIsInvalid(metabuf))
+		_hash_dropbuf(rel, metabuf);
+
+	if (cachedmetap)
+		*cachedmetap = metap;
+	return buf;
+}
diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c
index 913b87c..cdc54e1 100644
--- a/src/backend/access/hash/hashsearch.c
+++ b/src/backend/access/hash/hashsearch.c
@@ -152,6 +152,11 @@ _hash_readprev(IndexScanDesc scan,
 		_hash_relbuf(rel, *bufp);
 
 	*bufp = InvalidBuffer;
+
+	/* If it is a bucket page there will not be a prevblkno. */
+	if ((*opaquep)->hasho_flag & LH_BUCKET_PAGE)
+		return;
+
 	/* check for interrupts while we're not holding any buffer lock */
 	CHECK_FOR_INTERRUPTS();
 	if (BlockNumberIsValid(blkno))
@@ -216,13 +221,9 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
 	uint32		hashkey;
 	Bucket		bucket;
 	BlockNumber blkno;
-	BlockNumber oldblkno = InvalidBuffer;
-	bool		retry = false;
 	Buffer		buf;
-	Buffer		metabuf;
 	Page		page;
 	HashPageOpaque opaque;
-	HashMetaPage metap;
 	IndexTuple	itup;
 	ItemPointer current;
 	OffsetNumber offnum;
@@ -277,56 +278,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
 
 	so->hashso_sk_hash = hashkey;
 
-	/* Read the metapage */
-	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
-	page = BufferGetPage(metabuf);
-	metap = HashPageGetMeta(page);
-
-	/*
-	 * Loop until we get a lock on the correct target bucket.
-	 */
-	for (;;)
-	{
-		/*
-		 * Compute the target bucket number, and convert to block number.
-		 */
-		bucket = _hash_hashkey2bucket(hashkey,
-									  metap->hashm_maxbucket,
-									  metap->hashm_highmask,
-									  metap->hashm_lowmask);
-
-		blkno = BUCKET_TO_BLKNO(metap, bucket);
-
-		/* Release metapage lock, but keep pin. */
-		LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
-
-		/*
-		 * If the previous iteration of this loop locked what is still the
-		 * correct target bucket, we are done.  Otherwise, drop any old lock
-		 * and lock what now appears to be the correct bucket.
-		 */
-		if (retry)
-		{
-			if (oldblkno == blkno)
-				break;
-			_hash_relbuf(rel, buf);
-		}
-
-		/* Fetch the primary bucket page for the bucket */
-		buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
-
-		/*
-		 * Reacquire metapage lock and check that no bucket split has taken
-		 * place while we were awaiting the bucket lock.
-		 */
-		LockBuffer(metabuf, BUFFER_LOCK_SHARE);
-		oldblkno = blkno;
-		retry = true;
-	}
-
-	/* done with the metapage */
-	_hash_dropbuf(rel, metabuf);
-
+	buf = _hash_getbucketbuf_from_hashkey(hashkey, rel, HASH_READ, NULL);
 	page = BufferGetPage(buf);
 	opaque = (HashPageOpaque) PageGetSpecialPointer(page);
 	Assert(opaque->hasho_bucket == bucket);
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index bc08f81..fe98157 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -60,6 +60,14 @@ typedef uint32 Bucket;
 
 typedef struct HashPageOpaqueData
 {
+	/*
+	 * If this is an ovfl page this stores previous ovfl (or bucket) blkno.
+	 * Else if this is a bucket page we use this for a special purpose. We
+	 * store hashm_maxbucket value, whenever this page is initialized or
+	 * split. So this helps us to know whether the bucket has been split after
+	 * caching the some of the meta page data. See _hash_doinsert(),
+	 * _hash_first() to know how to use same.
+	 */
 	BlockNumber hasho_prevblkno;	/* previous ovfl (or bucket) blkno */
 	BlockNumber hasho_nextblkno;	/* next ovfl blkno */
 	Bucket		hasho_bucket;	/* bucket number this pg belongs to */
@@ -327,6 +335,10 @@ extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno,
 			 int access, int flags);
 extern Buffer _hash_getbuf_with_condlock_cleanup(Relation rel,
 								   BlockNumber blkno, int flags);
+extern Buffer
+_hash_getbucketbuf_from_hashkey(uint32 hashkey, Relation rel,
+								int access,
+								HashMetaPage *cachedmetap);
 extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno);
 extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno,
 				ForkNumber forkNum);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to