On Mon, Dec 12, 2016 at 9:21 PM, Amit Kapila <amit.kapil...@gmail.com> wrote:
> The reason is to make the operations consistent in master and standby.
> In WAL patch, for clearing the SPLIT_CLEANUP flag, we write a WAL and
> if we don't release the lock after writing a WAL, the operation can
> appear on standby even before on master.   Currently, without WAL,
> there is no benefit of doing so and we can fix by using
> MarkBufferDirty, however, I thought it might be simpler to keep it
> this way as this is required for enabling WAL.  OTOH, we can leave
> that for WAL patch.  Let me know which way you prefer?

It's not required for enabling WAL.  You don't *have* to release and
reacquire the buffer lock; in fact, that just adds overhead.  You *do*
have to be aware that the standby will perhaps see the intermediate
state, because it won't hold the lock throughout.  But that does not
mean that the master must release the lock.

>> I don't think we should be afraid to call MarkBufferDirty() instead of
>> going through these (fairly stupid) hasham-specific APIs.
>
> Right and anyway we need to use it at many more call sites when we
> enable WAL for hash index.

I propose the attached patch, which removes _hash_wrtbuf() and causes
those functions which previously called it to do MarkBufferDirty()
directly.  Aside from hopefully fixing the bug we're talking about
here, this makes the logic in several places noticeably less
contorted.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index f1511d0..0eeb37d 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -635,7 +635,8 @@ loop_top:
 		num_index_tuples = metap->hashm_ntuples;
 	}
 
-	_hash_wrtbuf(rel, metabuf);
+	MarkBufferDirty(metabuf);
+	_hash_relbuf(rel, metabuf);
 
 	/* return statistics */
 	if (stats == NULL)
@@ -724,7 +725,6 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
 		OffsetNumber deletable[MaxOffsetNumber];
 		int			ndeletable = 0;
 		bool		retain_pin = false;
-		bool		curr_page_dirty = false;
 
 		vacuum_delay_point();
 
@@ -805,7 +805,7 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
 		{
 			PageIndexMultiDelete(page, deletable, ndeletable);
 			bucket_dirty = true;
-			curr_page_dirty = true;
+			MarkBufferDirty(buf);
 		}
 
 		/* bail out if there are no more pages to scan. */
@@ -820,15 +820,7 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
 		 * release the lock on previous page after acquiring the lock on next
 		 * page
 		 */
-		if (curr_page_dirty)
-		{
-			if (retain_pin)
-				_hash_chgbufaccess(rel, buf, HASH_WRITE, HASH_NOLOCK);
-			else
-				_hash_wrtbuf(rel, buf);
-			curr_page_dirty = false;
-		}
-		else if (retain_pin)
+		if (retain_pin)
 			_hash_chgbufaccess(rel, buf, HASH_READ, HASH_NOLOCK);
 		else
 			_hash_relbuf(rel, buf);
@@ -862,6 +854,7 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
 		bucket_opaque = (HashPageOpaque) PageGetSpecialPointer(page);
 
 		bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
+		MarkBufferDirty(bucket_buf);
 	}
 
 	/*
@@ -873,7 +866,7 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
 		_hash_squeezebucket(rel, cur_bucket, bucket_blkno, bucket_buf,
 							bstrategy);
 	else
-		_hash_chgbufaccess(rel, bucket_buf, HASH_WRITE, HASH_NOLOCK);
+		_hash_chgbufaccess(rel, bucket_buf, HASH_READ, HASH_NOLOCK);
 }
 
 void
diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c
index 572146a..59c4213 100644
--- a/src/backend/access/hash/hashinsert.c
+++ b/src/backend/access/hash/hashinsert.c
@@ -208,11 +208,12 @@ restart_insert:
 	(void) _hash_pgaddtup(rel, buf, itemsz, itup);
 
 	/*
-	 * write and release the modified page.  if the page we modified was an
+	 * dirty and release the modified page.  if the page we modified was an
 	 * overflow page, we also need to separately drop the pin we retained on
 	 * the primary bucket page.
 	 */
-	_hash_wrtbuf(rel, buf);
+	MarkBufferDirty(buf);
+	_hash_relbuf(rel, buf);
 	if (buf != bucket_buf)
 		_hash_dropbuf(rel, bucket_buf);
 
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index e2d208e..8fbf494 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -149,10 +149,11 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
 
 	/* logically chain overflow page to previous page */
 	pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
+	MarkBufferDirty(buf);
 	if ((pageopaque->hasho_flag & LH_BUCKET_PAGE) && retain_pin)
-		_hash_chgbufaccess(rel, buf, HASH_WRITE, HASH_NOLOCK);
+		_hash_chgbufaccess(rel, buf, HASH_READ, HASH_NOLOCK);
 	else
-		_hash_wrtbuf(rel, buf);
+		_hash_relbuf(rel, buf);
 
 	return ovflbuf;
 }
@@ -304,7 +305,8 @@ found:
 
 	/* mark page "in use" in the bitmap */
 	SETBIT(freep, bit);
-	_hash_wrtbuf(rel, mapbuf);
+	MarkBufferDirty(mapbuf);
+	_hash_relbuf(rel, mapbuf);
 
 	/* Reacquire exclusive lock on the meta page */
 	_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
@@ -416,7 +418,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
 	 * in _hash_pageinit() when the page is reused.)
 	 */
 	MemSet(ovflpage, 0, BufferGetPageSize(ovflbuf));
-	_hash_wrtbuf(rel, ovflbuf);
+	MarkBufferDirty(ovflbuf);
+	_hash_relbuf(rel, ovflbuf);
 
 	/*
 	 * Fix up the bucket chain.  this is a doubly-linked list, so we must fix
@@ -445,7 +448,10 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
 		prevopaque->hasho_nextblkno = nextblkno;
 
 		if (prevblkno != writeblkno)
-			_hash_wrtbuf(rel, prevbuf);
+		{
+			MarkBufferDirty(prevbuf);
+			_hash_relbuf(rel, prevbuf);
+		}
 	}
 
 	/* write and unlock the write buffer */
@@ -466,7 +472,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
 
 		Assert(nextopaque->hasho_bucket == bucket);
 		nextopaque->hasho_prevblkno = prevblkno;
-		_hash_wrtbuf(rel, nextbuf);
+		MarkBufferDirty(nextbuf);
+		_hash_relbuf(rel, nextbuf);
 	}
 
 	/* Note: bstrategy is intentionally not used for metapage and bitmap */
@@ -494,7 +501,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
 	freep = HashPageGetBitmap(mappage);
 	Assert(ISSET(freep, bitmapbit));
 	CLRBIT(freep, bitmapbit);
-	_hash_wrtbuf(rel, mapbuf);
+	MarkBufferDirty(mapbuf);
+	_hash_relbuf(rel, mapbuf);
 
 	/* Get write-lock on metapage to update firstfree */
 	_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
@@ -503,13 +511,9 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
 	if (ovflbitno < metap->hashm_firstfree)
 	{
 		metap->hashm_firstfree = ovflbitno;
-		_hash_wrtbuf(rel, metabuf);
-	}
-	else
-	{
-		/* no need to change metapage */
-		_hash_relbuf(rel, metabuf);
+		MarkBufferDirty(metabuf);
 	}
+	_hash_relbuf(rel, metabuf);
 
 	return nextblkno;
 }
@@ -559,8 +563,9 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno,
 	freep = HashPageGetBitmap(pg);
 	MemSet(freep, 0xFF, BMPGSZ_BYTE(metap));
 
-	/* write out the new bitmap page (releasing write lock and pin) */
-	_hash_wrtbuf(rel, buf);
+	/* dirty the new bitmap page, and release write lock and pin */
+	MarkBufferDirty(buf);
+	_hash_relbuf(rel, buf);
 
 	/* add the new bitmap page to the metapage's list of bitmaps */
 	/* metapage already has a write lock */
@@ -724,13 +729,8 @@ _hash_squeezebucket(Relation rel,
 				 * on next page
 				 */
 				if (wbuf_dirty)
-				{
-					if (retain_pin)
-						_hash_chgbufaccess(rel, wbuf, HASH_WRITE, HASH_NOLOCK);
-					else
-						_hash_wrtbuf(rel, wbuf);
-				}
-				else if (retain_pin)
+					MarkBufferDirty(wbuf);
+				if (retain_pin)
 					_hash_chgbufaccess(rel, wbuf, HASH_READ, HASH_NOLOCK);
 				else
 					_hash_relbuf(rel, wbuf);
@@ -742,10 +742,9 @@ _hash_squeezebucket(Relation rel,
 					{
 						/* Delete tuples we already moved off read page */
 						PageIndexMultiDelete(rpage, deletable, ndeletable);
-						_hash_wrtbuf(rel, rbuf);
+						MarkBufferDirty(rbuf);
 					}
-					else
-						_hash_relbuf(rel, rbuf);
+					_hash_relbuf(rel, rbuf);
 					return;
 				}
 
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index 44332e7..a3d2138 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -290,25 +290,6 @@ _hash_dropscanbuf(Relation rel, HashScanOpaque so)
 }
 
 /*
- *	_hash_wrtbuf() -- write a hash page to disk.
- *
- *		This routine releases the lock held on the buffer and our refcount
- *		for it.  It is an error to call _hash_wrtbuf() without a write lock
- *		and a pin on the buffer.
- *
- * NOTE: this routine should go away when/if hash indexes are WAL-ified.
- * The correct sequence of operations is to mark the buffer dirty, then
- * write the WAL record, then release the lock and pin; so marking dirty
- * can't be combined with releasing.
- */
-void
-_hash_wrtbuf(Relation rel, Buffer buf)
-{
-	MarkBufferDirty(buf);
-	UnlockReleaseBuffer(buf);
-}
-
-/*
  * _hash_chgbufaccess() -- Change the lock type on a buffer, without
  *			dropping our pin on it.
  *
@@ -483,7 +464,8 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
 		pageopaque->hasho_bucket = i;
 		pageopaque->hasho_flag = LH_BUCKET_PAGE;
 		pageopaque->hasho_page_id = HASHO_PAGE_ID;
-		_hash_wrtbuf(rel, buf);
+		MarkBufferDirty(buf);
+		_hash_relbuf(rel, buf);
 	}
 
 	/* Now reacquire buffer lock on metapage */
@@ -495,7 +477,8 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
 	_hash_initbitmap(rel, metap, num_buckets + 1, forkNum);
 
 	/* all done */
-	_hash_wrtbuf(rel, metabuf);
+	MarkBufferDirty(metabuf);
+	_hash_relbuf(rel, metabuf);
 
 	return num_buckets;
 }
@@ -1075,7 +1058,10 @@ _hash_splitbucket_guts(Relation rel,
 	if (nbuf == bucket_nbuf)
 		_hash_chgbufaccess(rel, bucket_nbuf, HASH_WRITE, HASH_NOLOCK);
 	else
-		_hash_wrtbuf(rel, nbuf);
+	{
+		MarkBufferDirty(nbuf);
+		_hash_relbuf(rel, nbuf);
+	}
 
 	_hash_chgbufaccess(rel, bucket_obuf, HASH_NOLOCK, HASH_WRITE);
 	opage = BufferGetPage(bucket_obuf);
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index 6dfc41f..9ce44a7 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -336,7 +336,6 @@ extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
 extern void _hash_relbuf(Relation rel, Buffer buf);
 extern void _hash_dropbuf(Relation rel, Buffer buf);
 extern void _hash_dropscanbuf(Relation rel, HashScanOpaque so);
-extern void _hash_wrtbuf(Relation rel, Buffer buf);
 extern void _hash_chgbufaccess(Relation rel, Buffer buf, int from_access,
 				   int to_access);
 extern uint32 _hash_metapinit(Relation rel, double num_tuples,
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to