From b95d7881a5932b88d199511afad329b2f5c8147d Mon Sep 17 00:00:00 2001
From: Pavel Borisov <pashkin.elfe@gmail.com>
Date: Mon, 25 Sep 2023 13:45:22 +0400
Subject: [PATCH] Skip checking of scan keys required for directional scan in
 B-tree

Currently, B-tree code matches every scan key to every item on the page.
Imagine the ordered B-tree scan for the query like this.

SELECT * FROM tbl WHERE col > 'a' AND col < 'b' ORDER BY col;

The (col > 'a') scan key will be always matched once we find the location to
start the scan.  The (col < 'b') scan key will match every item on the page
as long as it matches the last item on the page.

This patch implements prechecking of the scan keys required for directional
scan on beginning of page scan.  If precheck is successful we can skip this
scan keys check for the items on the page.  That could lead to significant
acceleration especially if the comparison operator is expensive.

Idea from patch by Konstantin Knizhnik.

Discussion: https://postgr.es/m/079c3f8e-3371-abe2-e93c-fc8a0ae3f571%40garret.ru
Reviewed-by: Peter Geoghegan, Pavel Borisov
---
 src/backend/access/nbtree/nbtsearch.c | 69 +++++++++++++++++++++++++--
 src/backend/access/nbtree/nbtutils.c  | 59 ++++++++++++++++++-----
 src/include/access/nbtree.h           |  6 ++-
 3 files changed, 117 insertions(+), 17 deletions(-)

diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 17ad89749d5..283190c7cf9 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -1429,6 +1429,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 	/* remember which buffer we have pinned, if any */
 	Assert(!BTScanPosIsValid(so->currPos));
 	so->currPos.buf = buf;
+	so->firstPage = true;
 
 	/*
 	 * Now load data from the first page of the scan.
@@ -1539,6 +1540,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 	int			itemIndex;
 	bool		continuescan;
 	int			indnatts;
+	bool		requiredMatchedByPrecheck;
 
 	/*
 	 * We must have the buffer pinned and locked, but the usual macro can't be
@@ -1592,6 +1594,46 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 	 */
 	Assert(BTScanPosIsPinned(so->currPos));
 
+	/*
+	 * Prechecking the page with scan keys required for direction scan.  We
+	 * check these keys with the last item on the page (according to our scan
+	 * direction).  If these keys are matched, we can skip checking them with
+	 * every item on the page.  Scan keys for our scan direction would
+	 * necessarily match the previous items.  Scan keys required for opposite
+	 * direction scan are already matched by the _bt_first() call.
+	 *
+	 * With the forward scan, we do this check for the last item on the page
+	 * instead of the high key.  It's relatively likely that the most
+	 * significant column in the high key will be different from the
+	 * corresponding value from the last item on the page.  So checking with
+	 * the last item on the page would give a more precise answer.
+	 *
+	 * We skip this for the first page in the scan to evade the possible
+	 * slowdown of the point queries.
+	 */
+	if (!so->firstPage && minoff < maxoff)
+	{
+		ItemId		iid;
+		IndexTuple	itup;
+
+		iid = PageGetItemId(page, ScanDirectionIsForward(dir) ? maxoff : minoff);
+		itup = (IndexTuple) PageGetItem(page, iid);
+
+		/*
+		 * Do the precheck.  Note that we pass the pointer to
+		 * 'requiredMatchedByPrecheck' to 'continuescan' argument.  That will
+		 * set flag to true if all required keys are satisfied and false
+		 * otherwise.
+		 */
+		(void) _bt_checkkeys(scan, itup, indnatts, dir,
+							 &requiredMatchedByPrecheck, false);
+	}
+	else
+	{
+		so->firstPage = false;
+		requiredMatchedByPrecheck = false;
+	}
+
 	if (ScanDirectionIsForward(dir))
 	{
 		/* load items[] in ascending order */
@@ -1603,6 +1645,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 		{
 			ItemId		iid = PageGetItemId(page, offnum);
 			IndexTuple	itup;
+			bool		passes_quals;
 
 			/*
 			 * If the scan specifies not to return killed tuples, then we
@@ -1616,7 +1659,18 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 
 			itup = (IndexTuple) PageGetItem(page, iid);
 
-			if (_bt_checkkeys(scan, itup, indnatts, dir, &continuescan))
+			passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
+										 &continuescan, requiredMatchedByPrecheck);
+
+			/*
+			 * If the result of prechecking required keys was true, then in
+			 * assert-enabled builds we also recheck that _bt_checkkeys()
+			 * result is is the same.
+			 */
+			Assert(!requiredMatchedByPrecheck ||
+				   passes_quals == _bt_checkkeys(scan, itup, indnatts, dir,
+												 &continuescan, false));
+			if (passes_quals)
 			{
 				/* tuple passes all scan key conditions */
 				if (!BTreeTupleIsPosting(itup))
@@ -1673,7 +1727,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 			int			truncatt;
 
 			truncatt = BTreeTupleGetNAtts(itup, scan->indexRelation);
-			_bt_checkkeys(scan, itup, truncatt, dir, &continuescan);
+			_bt_checkkeys(scan, itup, truncatt, dir, &continuescan, false);
 		}
 
 		if (!continuescan)
@@ -1725,7 +1779,16 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 			itup = (IndexTuple) PageGetItem(page, iid);
 
 			passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
-										 &continuescan);
+										 &continuescan, requiredMatchedByPrecheck);
+
+			/*
+			 * If the result of prechecking required keys was true, then in
+			 * assert-enabled builds we also recheck that _bt_checkkeys()
+			 * result is is the same.
+			 */
+			Assert(!requiredMatchedByPrecheck ||
+				   passes_quals == _bt_checkkeys(scan, itup, indnatts, dir,
+												 &continuescan, false));
 			if (passes_quals && tuple_alive)
 			{
 				/* tuple passes all scan key conditions */
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 7da499c4dd5..2a9c25b03ed 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -1357,10 +1357,13 @@ _bt_mark_scankey_required(ScanKey skey)
  * tupnatts: number of attributes in tupnatts (high key may be truncated)
  * dir: direction we are scanning in
  * continuescan: output parameter (will be set correctly in all cases)
+ * requiredMatchedByPrecheck: indicates that scan keys required for
+ * 							  direction scan are already matched
  */
 bool
 _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
-			  ScanDirection dir, bool *continuescan)
+			  ScanDirection dir, bool *continuescan,
+			  bool requiredMatchedByPrecheck)
 {
 	TupleDesc	tupdesc;
 	BTScanOpaque so;
@@ -1381,6 +1384,29 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
 		Datum		datum;
 		bool		isNull;
 		Datum		test;
+		bool		requiredSameDir = false,
+					requiredOppositeDir = false;
+
+		/*
+		 * Check if the key is required for ordered scan in the same or
+		 * opposite direction.  Save as flag variables for future usage.
+		 */
+		if (((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) ||
+			((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)))
+			requiredSameDir = true;
+		else if (((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsBackward(dir)) ||
+				 ((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsForward(dir)))
+			requiredOppositeDir = true;
+
+		/*
+		 * Is the key required for scanning for either forward or backward
+		 * direction?  If so and called told us that these types of keys are
+		 * known to be matched, skip the check.  Except for the row keys,
+		 * where NULLs could be found in the middle of matching values.
+		 */
+		if ((requiredSameDir || requiredOppositeDir) &&
+			!(key->sk_flags & SK_ROW_HEADER) && requiredMatchedByPrecheck)
+			continue;
 
 		if (key->sk_attno > tupnatts)
 		{
@@ -1429,11 +1455,7 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
 			 * scan direction, then we can conclude no further tuples will
 			 * pass, either.
 			 */
-			if ((key->sk_flags & SK_BT_REQFWD) &&
-				ScanDirectionIsForward(dir))
-				*continuescan = false;
-			else if ((key->sk_flags & SK_BT_REQBKWD) &&
-					 ScanDirectionIsBackward(dir))
+			if (requiredSameDir)
 				*continuescan = false;
 
 			/*
@@ -1483,8 +1505,23 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
 			return false;
 		}
 
-		test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
-								 datum, key->sk_argument);
+		/*
+		 * Apply the key checking function.  When the key is required for
+		 * opposite direction scan, it must be already satisfied by
+		 * _bt_first() except for the NULLs checking, which have already done
+		 * above.
+		 */
+		if (!requiredOppositeDir)
+		{
+			test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
+									 datum, key->sk_argument);
+		}
+		else
+		{
+			test = true;
+			Assert(test == FunctionCall2Coll(&key->sk_func, key->sk_collation,
+											 datum, key->sk_argument));
+		}
 
 		if (!DatumGetBool(test))
 		{
@@ -1498,11 +1535,7 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
 			 * initial positioning in _bt_first() when they are available. See
 			 * comments in _bt_first().
 			 */
-			if ((key->sk_flags & SK_BT_REQFWD) &&
-				ScanDirectionIsForward(dir))
-				*continuescan = false;
-			else if ((key->sk_flags & SK_BT_REQBKWD) &&
-					 ScanDirectionIsBackward(dir))
+			if (requiredSameDir)
 				*continuescan = false;
 
 			/*
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index f5c66964ca0..14893653588 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -1054,6 +1054,9 @@ typedef struct BTScanOpaqueData
 	int		   *killedItems;	/* currPos.items indexes of killed items */
 	int			numKilled;		/* number of currently stored items */
 
+	/* flag indicating the first page in the scan */
+	bool		firstPage;
+
 	/*
 	 * If we are doing an index-only scan, these are the tuple storage
 	 * workspaces for the currPos and markPos respectively.  Each is of size
@@ -1253,7 +1256,8 @@ extern void _bt_mark_array_keys(IndexScanDesc scan);
 extern void _bt_restore_array_keys(IndexScanDesc scan);
 extern void _bt_preprocess_keys(IndexScanDesc scan);
 extern bool _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple,
-						  int tupnatts, ScanDirection dir, bool *continuescan);
+						  int tupnatts, ScanDirection dir, bool *continuescan,
+						  bool requiredMatchedByPrecheck);
 extern void _bt_killitems(IndexScanDesc scan);
 extern BTCycleId _bt_vacuum_cycleid(Relation rel);
 extern BTCycleId _bt_start_vacuum(Relation rel);
-- 
2.37.1 (Apple Git-137.1)

