From af33daa2d0a81c5fd4d57e8fccfbd10138503c3e Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Sun, 15 Mar 2020 15:27:52 -0700
Subject: [PATCH v1] Refactor _bt_doinsert(), fastpath optimization.

Move fastpath optimization logic to a new routine, which is implemented
as a _bt_search() wrapper.
---
 src/backend/access/nbtree/nbtinsert.c | 203 +++++++++++++-------------
 1 file changed, 101 insertions(+), 102 deletions(-)

diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index f503be6422..78b69d7e2e 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -29,6 +29,7 @@
 #define BTREE_FASTPATH_MIN_LEVEL	2
 
 
+static BTStack _bt_search_insert(Relation rel, BTInsertState insertstate);
 static TransactionId _bt_check_unique(Relation rel, BTInsertState insertstate,
 									  Relation heapRel,
 									  IndexUniqueCheck checkUnique, bool *is_unique,
@@ -84,9 +85,7 @@ _bt_doinsert(Relation rel, IndexTuple itup,
 	bool		is_unique = false;
 	BTInsertStateData insertstate;
 	BTScanInsert itup_key;
-	BTStack		stack = NULL;
-	Buffer		buf;
-	bool		fastpath;
+	BTStack		stack;
 	bool		checkingunique = (checkUnique != UNIQUE_CHECK_NO);
 
 	/* we need an insertion scan key to do our search, so build one */
@@ -137,98 +136,14 @@ _bt_doinsert(Relation rel, IndexTuple itup,
 	insertstate.buf = InvalidBuffer;
 	insertstate.postingoff = 0;
 
+search:
+
 	/*
-	 * It's very common to have an index on an auto-incremented or
-	 * monotonically increasing value. In such cases, every insertion happens
-	 * towards the end of the index. We try to optimize that case by caching
-	 * the right-most leaf of the index. If our cached block is still the
-	 * rightmost leaf, has enough free space to accommodate a new entry and
-	 * the insertion key is strictly greater than the first key in this page,
-	 * then we can safely conclude that the new key will be inserted in the
-	 * cached block. So we simply search within the cached block and insert
-	 * the key at the appropriate location. We call it a fastpath.
-	 *
-	 * Testing has revealed, though, that the fastpath can result in increased
-	 * contention on the exclusive-lock on the rightmost leaf page. So we
-	 * conditionally check if the lock is available. If it's not available
-	 * then we simply abandon the fastpath and take the regular path. This
-	 * makes sense because unavailability of the lock also signals that some
-	 * other backend might be concurrently inserting into the page, thus
-	 * reducing our chances to finding an insertion place in this page.
+	 * Find the first leaf page the value might be on with a search for
+	 * itup_key.  insertstate.buf will be set to a buffer that is locked in
+	 * exclusive mode.
 	 */
-top:
-	fastpath = false;
-	if (RelationGetTargetBlock(rel) != InvalidBlockNumber)
-	{
-		Page		page;
-		BTPageOpaque lpageop;
-
-		/*
-		 * Conditionally acquire exclusive lock on the buffer before doing any
-		 * checks. If we don't get the lock, we simply follow slowpath. If we
-		 * do get the lock, this ensures that the index state cannot change,
-		 * as far as the rightmost part of the index is concerned.
-		 */
-		buf = ReadBuffer(rel, RelationGetTargetBlock(rel));
-
-		if (ConditionalLockBuffer(buf))
-		{
-			_bt_checkpage(rel, buf);
-
-			page = BufferGetPage(buf);
-
-			lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
-
-			/*
-			 * Check if the page is still the rightmost leaf page, has enough
-			 * free space to accommodate the new tuple, and the insertion scan
-			 * key is strictly greater than the first key on the page.  Note
-			 * that _bt_insert_parent() has an assertion that catches leaf
-			 * page splits that somehow follow from a fastpath insert.
-			 */
-			if (P_ISLEAF(lpageop) && P_RIGHTMOST(lpageop) &&
-				!P_IGNORE(lpageop) &&
-				PageGetFreeSpace(page) > insertstate.itemsz &&
-				PageGetMaxOffsetNumber(page) >= P_FIRSTDATAKEY(lpageop) &&
-				_bt_compare(rel, itup_key, page, P_FIRSTDATAKEY(lpageop)) > 0)
-			{
-				fastpath = true;
-			}
-			else
-			{
-				_bt_relbuf(rel, buf);
-
-				/*
-				 * Something did not work out. Just forget about the cached
-				 * block and follow the normal path. It might be set again if
-				 * the conditions are favourable.
-				 */
-				RelationSetTargetBlock(rel, InvalidBlockNumber);
-			}
-		}
-		else
-		{
-			ReleaseBuffer(buf);
-
-			/*
-			 * If someone's holding a lock, it's likely to change anyway, so
-			 * don't try again until we get an updated rightmost leaf.
-			 */
-			RelationSetTargetBlock(rel, InvalidBlockNumber);
-		}
-	}
-
-	if (!fastpath)
-	{
-		/*
-		 * Find the first page containing this key.  Buffer returned by
-		 * _bt_search() is locked in exclusive mode.
-		 */
-		stack = _bt_search(rel, itup_key, &buf, BT_WRITE, NULL);
-	}
-
-	insertstate.buf = buf;
-	buf = InvalidBuffer;		/* insertstate.buf now owns the buffer */
+	stack = _bt_search_insert(rel, &insertstate);
 
 	/*
 	 * If we're not allowing duplicates, make sure the key isn't already in
@@ -260,7 +175,7 @@ top:
 		xwait = _bt_check_unique(rel, &insertstate, heapRel, checkUnique,
 								 &is_unique, &speculativeToken);
 
-		if (TransactionIdIsValid(xwait))
+		if (unlikely(TransactionIdIsValid(xwait)))
 		{
 			/* Have to wait for the other guy ... */
 			_bt_relbuf(rel, insertstate.buf);
@@ -276,10 +191,10 @@ top:
 			else
 				XactLockTableWait(xwait, rel, &itup->t_tid, XLTW_InsertIndex);
 
-			/* start over... */
+			/* search from the root page once again... */
 			if (stack)
 				_bt_freestack(stack);
-			goto top;
+			goto search;
 		}
 
 		/* Uniqueness is established -- restore heap tid as scantid */
@@ -325,6 +240,91 @@ top:
 	return is_unique;
 }
 
+/*
+ *	_bt_search_insert() -- _bt_search() wrapper for inserts
+ *
+ * It's very common to have an index on an auto-incremented or monotonically
+ * increasing value. In such cases, every insertion happens towards the end of
+ * the index. We try to optimize that case by caching the right-most leaf of
+ * the index. If our cached block is still the rightmost leaf, has enough free
+ * space to accommodate a new entry and the insertion key is strictly greater
+ * than the first key in this page, then we can safely conclude that the new
+ * key will be inserted in the cached block. So we simply search within the
+ * cached block and insert the key at the appropriate location. We call it a
+ * fastpath.
+ *
+ * The fastpath can result in increased contention on the exclusive-lock on
+ * the rightmost leaf page.  We conditionally check if the lock is available.
+ * If it's not available then we simply abandon the fastpath and take the
+ * regular path. This makes sense because failing to acquire the lock signals
+ * that some other backend might be concurrently inserting into the page, thus
+ * reducing our chances to finding an insertion place in this page.
+ */
+static BTStack
+_bt_search_insert(Relation rel, BTInsertState insertstate)
+{
+	bool		fastpath = false;
+	BTStack		stack = NULL;
+
+	Assert(!insertstate->bounds_valid);
+	Assert(insertstate->postingoff == 0);
+	Assert(insertstate->buf == InvalidBuffer);
+
+	if (RelationGetTargetBlock(rel) != InvalidBlockNumber)
+	{
+		Page		page;
+		BTPageOpaque lpageop;
+
+		insertstate->buf = ReadBuffer(rel, RelationGetTargetBlock(rel));
+		if (ConditionalLockBuffer(insertstate->buf))
+		{
+			_bt_checkpage(rel, insertstate->buf);
+
+			page = BufferGetPage(insertstate->buf);
+
+			lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
+
+			/*
+			 * Check if the page is still the rightmost leaf page, has enough
+			 * free space to accommodate the new tuple, and the insertion scan
+			 * key is strictly greater than the first key on the page.  Note
+			 * that _bt_insert_parent() has an assertion that catches leaf
+			 * page splits that somehow follow from a fastpath insert.
+			 */
+			if (P_ISLEAF(lpageop) && P_RIGHTMOST(lpageop) &&
+				!P_IGNORE(lpageop) &&
+				PageGetFreeSpace(page) > insertstate->itemsz &&
+				PageGetMaxOffsetNumber(page) >= P_HIKEY &&
+				_bt_compare(rel, insertstate->itup_key, page, P_HIKEY) > 0)
+			{
+				fastpath = true;
+			}
+			else
+			{
+				/* Forget about the cached block */
+				_bt_relbuf(rel, insertstate->buf);
+				RelationSetTargetBlock(rel, InvalidBlockNumber);
+			}
+		}
+		else
+		{
+			/* Forget about the cached block */
+			ReleaseBuffer(insertstate->buf);
+			RelationSetTargetBlock(rel, InvalidBlockNumber);
+		}
+	}
+
+	/*
+	 * Find the first page containing this key.  Buffer returned by
+	 * _bt_search() is locked in exclusive mode.
+	 */
+	if (!fastpath)
+		stack = _bt_search(rel, insertstate->itup_key, &insertstate->buf,
+						   BT_WRITE, NULL);
+
+	return stack;
+}
+
 /*
  *	_bt_check_unique() -- Check for violation of unique index constraint
  *
@@ -1245,9 +1245,8 @@ _bt_insertonpg(Relation rel,
 
 		/*
 		 * Cache the block information if we just inserted into the rightmost
-		 * leaf page of the index and it's not the root page.  For very small
-		 * index where root is also the leaf, there is no point trying for any
-		 * optimization.
+		 * leaf page of the index.  This may be used by a future inserter when
+		 * it reaches _bt_search_insert().
 		 */
 		if (P_RIGHTMOST(lpageop) && P_ISLEAF(lpageop) && !P_ISROOT(lpageop))
 			cachedBlock = BufferGetBlockNumber(buf);
@@ -2059,9 +2058,9 @@ _bt_insert_parent(Relation rel,
 			 * This is more of a performance issue than a correctness issue.
 			 * The fastpath won't have a descent stack.  Using a phony stack
 			 * here works, but never rely on that.  The fastpath should be
-			 * rejected when the rightmost leaf page will split, since it's
-			 * faster to go through _bt_search() and get a stack in the usual
-			 * way.
+			 * rejected within _bt_search_insert() when the rightmost leaf
+			 * page will split, since it's faster to go through _bt_search()
+			 * and get a stack in the usual way.
 			 */
 			Assert(!(P_ISLEAF(lpageop) &&
 					 BlockNumberIsValid(RelationGetTargetBlock(rel))));
-- 
2.25.1

