From e98287b05969e776236dad3ed5f16738842822b5 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Thu, 14 Sep 2023 13:18:09 +0300
Subject: [PATCH] Skip checking of scan keys required for directional scan in
 B-tree

Currently, B-tree code matches every scan key to every item on the page.
Imagine the ordered B-tree scan for the query like this.

SELECT * FROM tbl WHERE col > 'a' AND col < 'b' ORDER BY col;

The (col > 'a') scan key will be always matched once we find the location to
start the scan.  The (col < 'b') scan key will match every item on the page
as long as it matches the last item on the page.

This patch implements prechecking of the scan keys required for directional
scan on beginning of page scan.  If precheck is successful we can skip this
scan keys check for the items on the page.  That could lead to significant
acceleration especially if the comparison operator is expensive.

Idea from patch by Konstantin Knizhnik.

Discussion: https://postgr.es/m/079c3f8e-3371-abe2-e93c-fc8a0ae3f571%40garret.ru
Reviewed-by: Peter Geoghegan
---
 src/backend/access/nbtree/nbtsearch.c | 40 +++++++++++++++++++++++++--
 src/backend/access/nbtree/nbtutils.c  | 31 +++++++++++++--------
 src/include/access/nbtree.h           |  6 +++-
 3 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 17ad89749d5..2a1d1505df3 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -1429,6 +1429,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 	/* remember which buffer we have pinned, if any */
 	Assert(!BTScanPosIsValid(so->currPos));
 	so->currPos.buf = buf;
+	so->firstPage = true;
 
 	/*
 	 * Now load data from the first page of the scan.
@@ -1539,6 +1540,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 	int			itemIndex;
 	bool		continuescan;
 	int			indnatts;
+	bool		requiredDirMatched;
 
 	/*
 	 * We must have the buffer pinned and locked, but the usual macro can't be
@@ -1592,6 +1594,37 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 	 */
 	Assert(BTScanPosIsPinned(so->currPos));
 
+	/*
+	 * Prechecking the page with scan keys required for direction scan.  We
+	 * check these keys with the last item on the page (according to our scan
+	 * direction).  If these keys are matched, we can skip checking them with
+	 * every item on the page.  Scan keys for our scan direction would
+	 * necessarily match the previous items.  Scan keys required for opposite
+	 * direction scan are already matched by the _bt_first() call.
+	 *
+	 * With the forward scan, we do this check for the last item on the page
+	 * instead of the high key.  It's relatively likely that the most
+	 * significant column in the high key will be different from the
+	 * corresponding value from the last item on the page.  So checking with
+	 * the last item on the page would give a more precise answer.
+	 *
+	 * We skip this for the first page in the scan to evade the possible
+	 * slowdown of the point queries.
+	 */
+	if (!so->firstPage)
+	{
+		ItemId		iid;
+		IndexTuple	itup;
+
+		iid = PageGetItemId(page, ScanDirectionIsForward(dir) ? maxoff : minoff);
+		itup = (IndexTuple) PageGetItem(page, iid);
+		(void) _bt_checkkeys(scan, itup, indnatts, dir, &requiredDirMatched, false);
+	}
+	else
+	{
+		so->firstPage = false;
+	}
+
 	if (ScanDirectionIsForward(dir))
 	{
 		/* load items[] in ascending order */
@@ -1616,7 +1649,8 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 
 			itup = (IndexTuple) PageGetItem(page, iid);
 
-			if (_bt_checkkeys(scan, itup, indnatts, dir, &continuescan))
+			if (_bt_checkkeys(scan, itup, indnatts, dir, &continuescan,
+							  requiredDirMatched))
 			{
 				/* tuple passes all scan key conditions */
 				if (!BTreeTupleIsPosting(itup))
@@ -1673,7 +1707,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 			int			truncatt;
 
 			truncatt = BTreeTupleGetNAtts(itup, scan->indexRelation);
-			_bt_checkkeys(scan, itup, truncatt, dir, &continuescan);
+			_bt_checkkeys(scan, itup, truncatt, dir, &continuescan, false);
 		}
 
 		if (!continuescan)
@@ -1725,7 +1759,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 			itup = (IndexTuple) PageGetItem(page, iid);
 
 			passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
-										 &continuescan);
+										 &continuescan, requiredDirMatched);
 			if (passes_quals && tuple_alive)
 			{
 				/* tuple passes all scan key conditions */
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 7da499c4dd5..96c1674c82e 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -1357,10 +1357,13 @@ _bt_mark_scankey_required(ScanKey skey)
  * tupnatts: number of attributes in tupnatts (high key may be truncated)
  * dir: direction we are scanning in
  * continuescan: output parameter (will be set correctly in all cases)
+ * requiredDirMatched: indicates that scan keys required for direction scan
+ *					   are already matched
  */
 bool
 _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
-			  ScanDirection dir, bool *continuescan)
+			  ScanDirection dir, bool *continuescan,
+			  bool requiredDirMatched)
 {
 	TupleDesc	tupdesc;
 	BTScanOpaque so;
@@ -1381,6 +1384,20 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
 		Datum		datum;
 		bool		isNull;
 		Datum		test;
+		bool		requiredDir = false;
+
+		/*
+		 * Is the key required for scanning for either forward or backward
+		 * direction?  If so and called told us that these types of keys are
+		 * known to be matched, skip the check.  Except for the row keys,
+		 * where NULLs could be found in the middle of matching values.
+		 */
+		if (key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD) &&
+			!(key->sk_flags & SK_ROW_HEADER))
+			requiredDir = true;
+
+		if (requiredDir && requiredDirMatched)
+			continue;
 
 		if (key->sk_attno > tupnatts)
 		{
@@ -1429,11 +1446,7 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
 			 * scan direction, then we can conclude no further tuples will
 			 * pass, either.
 			 */
-			if ((key->sk_flags & SK_BT_REQFWD) &&
-				ScanDirectionIsForward(dir))
-				*continuescan = false;
-			else if ((key->sk_flags & SK_BT_REQBKWD) &&
-					 ScanDirectionIsBackward(dir))
+			if (requiredDir)
 				*continuescan = false;
 
 			/*
@@ -1498,11 +1511,7 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
 			 * initial positioning in _bt_first() when they are available. See
 			 * comments in _bt_first().
 			 */
-			if ((key->sk_flags & SK_BT_REQFWD) &&
-				ScanDirectionIsForward(dir))
-				*continuescan = false;
-			else if ((key->sk_flags & SK_BT_REQBKWD) &&
-					 ScanDirectionIsBackward(dir))
+			if (requiredDir)
 				*continuescan = false;
 
 			/*
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index f5c66964ca0..66e6a7775bf 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -1054,6 +1054,9 @@ typedef struct BTScanOpaqueData
 	int		   *killedItems;	/* currPos.items indexes of killed items */
 	int			numKilled;		/* number of currently stored items */
 
+	/* flag inficating the first page in the scan */
+	bool		firstPage;
+
 	/*
 	 * If we are doing an index-only scan, these are the tuple storage
 	 * workspaces for the currPos and markPos respectively.  Each is of size
@@ -1253,7 +1256,8 @@ extern void _bt_mark_array_keys(IndexScanDesc scan);
 extern void _bt_restore_array_keys(IndexScanDesc scan);
 extern void _bt_preprocess_keys(IndexScanDesc scan);
 extern bool _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple,
-						  int tupnatts, ScanDirection dir, bool *continuescan);
+						  int tupnatts, ScanDirection dir, bool *continuescan,
+						  bool requiredMatched);
 extern void _bt_killitems(IndexScanDesc scan);
 extern BTCycleId _bt_vacuum_cycleid(Relation rel);
 extern BTCycleId _bt_start_vacuum(Relation rel);
-- 
2.37.1 (Apple Git-137.1)

