From 98813d0b0f6c852e5117a677195c8f1b9471a9a9 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Thu, 1 May 2025 14:58:02 -0400
Subject: [PATCH v2 2/2] Prevent premature nbtree array advancement.

Prevent forcenonrequired from prematurely advancing the scan's array
keys beyond key space that the scan has yet to read tuples from.  Do
this by defensively resetting the scan's array keys (to the first array
elements for the current scan direction) before the _bt_checkkeys call
for pstate.finaltup.  Otherwise, it's possible for the scan to fail to
return matching tuples in rare edge cases.

Oversight in commit 8a510275, which optimized nbtree search scan key
comparisons.

Author: Peter Geoghegan <pg@bowt.ie>
Discussion: https://postgr.es/m/CAH2-WzmodSE+gpTd1CRGU9ez8ytyyDS+Kns2r9NzgUp1s56kpw@mail.gmail.com
---
 src/backend/access/nbtree/nbtsearch.c |  8 ++++-
 src/backend/access/nbtree/nbtutils.c  | 42 ++++++++++++++++++++-------
 2 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index eb9163f46..57fdce7da 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -2279,9 +2279,13 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
 			IndexTuple	itup = (IndexTuple) PageGetItem(page, iid);
 			int			truncatt;
 
-			truncatt = BTreeTupleGetNAtts(itup, rel);
+			/* Reset arrays, per _bt_set_startikey contract */
+			if (pstate.forcenonrequired)
+				_bt_start_array_keys(scan, dir);
 			pstate.forcenonrequired = false;
 			pstate.startikey = 0;	/* _bt_set_startikey ignores P_HIKEY */
+
+			truncatt = BTreeTupleGetNAtts(itup, rel);
 			_bt_checkkeys(scan, &pstate, arrayKeys, itup, truncatt);
 		}
 
@@ -2461,8 +2465,10 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
 			pstate.offnum = offnum;
 			if (arrayKeys && offnum == minoff && pstate.forcenonrequired)
 			{
+				/* Reset arrays, per _bt_set_startikey contract */
 				pstate.forcenonrequired = false;
 				pstate.startikey = 0;
+				_bt_start_array_keys(scan, dir);
 			}
 			passes_quals = _bt_checkkeys(scan, &pstate, arrayKeys,
 										 itup, indnatts);
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 686adb9bb..94e230d7e 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -2634,13 +2634,14 @@ _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir,
  * primscan's first page would mislead _bt_advance_array_keys, which expects
  * pstate.nskipadvances to be representative of every first page's key space.)
  *
- * Caller must reset startikey and forcenonrequired ahead of the _bt_checkkeys
- * call for pstate.finaltup iff we set forcenonrequired=true.  This will give
- * _bt_checkkeys the opportunity to call _bt_advance_array_keys once more,
- * with sktrig_required=true, to advance the arrays that were ignored during
- * checks of all of the page's prior tuples.  Caller doesn't need to do this
- * on the rightmost/leftmost page in the index (where pstate.finaltup isn't
- * set), since forcenonrequired won't be set here by us in the first place.
+ * Caller must call _bt_start_array_keys and reset startikey/forcenonrequired
+ * ahead of the finaltup _bt_checkkeys call when we set forcenonrequired=true.
+ * This will give _bt_checkkeys the opportunity to call _bt_advance_array_keys
+ * with sktrig_required=true, restoring the invariant that the scan's required
+ * arrays always track the scan's progress through the index's key space.
+ * Caller won't need to do this on the rightmost/leftmost page in the index
+ * (where pstate.finaltup isn't ever set), since forcenonrequired will never
+ * be set here in the first place.
  */
 void
 _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate)
@@ -2704,10 +2705,31 @@ _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate)
 		if (key->sk_flags & SK_ROW_HEADER)
 		{
 			/*
-			 * Can't let pstate.startikey get set to an ikey beyond a
-			 * RowCompare inequality
+			 * RowCompare inequality.
+			 *
+			 * Only the first subkey from a RowCompare can ever be marked
+			 * required (that happens when the row header is marked required).
+			 * There is no simple, general way for us to transitively deduce
+			 * whether or not every tuple on the page satisfies a RowCompare
+			 * key based only on firsttup and lasttup -- so we just give up.
 			 */
-			break;				/* unsafe */
+			if (!start_past_saop_eq && !so->skipScan)
+				break;			/* unsafe to go further */
+
+			/*
+			 * We have to be even more careful with RowCompares that come
+			 * after an array: we assume it's unsafe to even bypass the array.
+			 * Calling _bt_start_array_keys to recover the scan's arrays
+			 * following use of forcenonrequired mode isn't compatible with
+			 * _bt_check_rowcompare's continuescan=false behavior with NULL
+			 * row compare members.  _bt_advance_array_keys must not make a
+			 * decision on the basis of a key not being satisfied in the
+			 * opposite-to-scan direction until the scan reaches a leaf page
+			 * where the same key begins to be satisfied in scan direction.
+			 * The _bt_first !used_all_subkeys behavior makes this limitation
+			 * hard to work around some other way.
+			 */
+			return;				/* completely unsafe to set pstate.startikey */
 		}
 		if (key->sk_strategy != BTEqualStrategyNumber)
 		{
-- 
2.49.0

