From ad123a3f8da3d95262b2553e90dd9c8fbb8d2335 Mon Sep 17 00:00:00 2001
From: Alexandre Felipe <o.alexandre.felipe@gmail.com>
Date: Thu, 5 Feb 2026 05:09:48 +0000
Subject: [PATCH 3/3] [MERGE-SCAN] Planner integration

---
 src/backend/access/index/genam.c          |   2 +
 src/backend/access/nbtree/nbtmergescan.c  |  60 ++++++-
 src/backend/access/nbtree/nbtree.c        | 129 +++++++++++++++
 src/backend/executor/nodeIndexonlyscan.c  |   5 +-
 src/backend/executor/nodeIndexscan.c      |  11 ++
 src/backend/optimizer/path/indxpath.c     | 188 ++++++++++++++++++++++
 src/backend/optimizer/plan/createplan.c   |   8 +
 src/backend/optimizer/util/pathnode.c     |   2 +
 src/include/access/relscan.h              |   3 +
 src/include/nodes/execnodes.h             |   5 +
 src/include/nodes/pathnodes.h             |   1 +
 src/include/nodes/plannodes.h             |   4 +
 src/test/regress/expected/btree_merge.out |  16 +-
 src/test/regress/sql/btree_merge.sql      |   9 ++
 14 files changed, 437 insertions(+), 6 deletions(-)

diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index 5e89b86a62c..53615fb08d2 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -126,6 +126,8 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
 	scan->xs_hitup = NULL;
 	scan->xs_hitupdesc = NULL;
 
+	scan->xs_num_merge_prefixes = 0;
+
 	return scan;
 }
 
diff --git a/src/backend/access/nbtree/nbtmergescan.c b/src/backend/access/nbtree/nbtmergescan.c
index 70828dc73d3..eda1e683525 100644
--- a/src/backend/access/nbtree/nbtmergescan.c
+++ b/src/backend/access/nbtree/nbtmergescan.c
@@ -27,6 +27,7 @@
 #include "access/relscan.h"
 #include "lib/pairingheap.h"
 #include "miscadmin.h"
+#include "pgstat.h"
 #include "storage/bufmgr.h"
 #include "utils/datum.h"
 #include "utils/lsyscache.h"
@@ -169,7 +170,8 @@ bt_merge_init(IndexScanDesc scan,
 		cursor->exhausted = prefix_nulls[i];	/* NULL prefix = exhausted */
 		cursor->sort_key_isnull = true;
 		BTScanPosInvalidate(cursor->pos);
-		cursor->tuples = NULL;
+		/* Allocate tuple workspace for index-only scans */
+		cursor->tuples = palloc(BLCKSZ);
 	}
 
 	/* Initialize the merge heap */
@@ -219,6 +221,15 @@ bt_merge_getnext(IndexScanDesc scan, ScanDirection dir)
 				state->active_cursors++;
 			}
 		}
+
+		/*
+		 * Track internal tuple reads for stats. We read active_cursors tuples
+		 * during initialization. One of these will be returned first and
+		 * counted by index_getnext_tid, so we count (active_cursors - 1) here.
+		 */
+		if (state->active_cursors > 1)
+			pgstat_count_index_tuples(scan->indexRelation,
+									  state->active_cursors - 1);
 	}
 
 	/* Get the cursor with the smallest suffix value */
@@ -228,9 +239,15 @@ bt_merge_getnext(IndexScanDesc scan, ScanDirection dir)
 	node = pairingheap_remove_first(state->merge_heap);
 	cursor = pairingheap_container(BTMergeCursor, ph_node, node);
 
-	/* Set up the heap TID from the current cursor position */
+	/* Set up the heap TID and index tuple from the current cursor position */
 	Assert(BTScanPosIsValid(cursor->pos));
-	scan->xs_heaptid = cursor->pos.items[cursor->pos.itemIndex].heapTid;
+	{
+		BTScanPosItem *currItem = &cursor->pos.items[cursor->pos.itemIndex];
+		scan->xs_heaptid = currItem->heapTid;
+		/* For index-only scans, set the index tuple pointer */
+		if (cursor->tuples)
+			scan->xs_itup = (IndexTuple) (cursor->tuples + currItem->tupleOffset);
+	}
 
 	/* Advance cursor to next tuple */
 	if (bt_merge_cursor_advance(state, scan, cursor))
@@ -255,9 +272,23 @@ bt_merge_getnext(IndexScanDesc scan, ScanDirection dir)
 void
 bt_merge_end(BTMergeScanState *state)
 {
+	int			i;
+
 	if (state == NULL)
 		return;
 
+	/* Release any buffer pins held by cursors */
+	for (i = 0; i < state->num_cursors; i++)
+	{
+		BTMergeCursor *cursor = &state->cursors[i];
+
+		if (BTScanPosIsValid(cursor->pos) && BufferIsValid(cursor->pos.buf))
+		{
+			ReleaseBuffer(cursor->pos.buf);
+			cursor->pos.buf = InvalidBuffer;
+		}
+	}
+
 	/* Free the memory context, which frees all allocations */
 	MemoryContextDelete(state->merge_context);
 }
@@ -302,8 +333,14 @@ bt_merge_cursor_init(BTMergeScanState *state,
 	/* Invalidate current position to force _bt_first */
 	BTScanPosInvalidate(so->currPos);
 
-	/* Disable array key handling for this cursor's scan */
+	/*
+	 * Disable array key handling for this cursor's scan.
+	 * We need to clear both numArrayKeys and needPrimScan to avoid
+	 * assertions in _bt_readfirstpage that expect array keys when
+	 * needPrimScan is set.
+	 */
 	so->numArrayKeys = 0;
+	so->needPrimScan = false;
 
 	/* Position at first matching tuple */
 	found = _bt_first(scan, state->direction);
@@ -313,6 +350,16 @@ bt_merge_cursor_init(BTMergeScanState *state,
 		/* Copy position to cursor */
 		memcpy(&cursor->pos, &so->currPos, sizeof(BTScanPosData));
 
+		/*
+		 * Copy the tuple data for index-only scans.
+		 * The tuple workspace contains copies of index tuples referenced
+		 * by items in currPos.
+		 */
+		if (so->currTuples && so->currPos.nextTupleOffset > 0)
+		{
+			memcpy(cursor->tuples, so->currTuples, so->currPos.nextTupleOffset);
+		}
+
 		/* Extract the sort key for heap ordering */
 		cursor->sort_key = bt_merge_extract_sortkey(state, scan, cursor,
 													&cursor->sort_key_isnull);
@@ -390,6 +437,11 @@ bt_merge_cursor_advance(BTMergeScanState *state,
 
 	if (found)
 	{
+		/*
+		 * Don't count here - the advanced-to tuple will be returned later
+		 * and counted by index_getnext_tid at that time.
+		 */
+
 		/* Extract new sort key */
 		cursor->sort_key = bt_merge_extract_sortkey(state, scan, cursor,
 													&cursor->sort_key_isnull);
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 3dec1ee657d..0e55c4874b4 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -21,6 +21,8 @@
 #include "access/nbtree.h"
 #include "access/relscan.h"
 #include "access/stratnum.h"
+#include "catalog/pg_amop.h"
+#include "utils/array.h"
 #include "commands/progress.h"
 #include "commands/vacuum.h"
 #include "nodes/execnodes.h"
@@ -34,6 +36,7 @@
 #include "utils/datum.h"
 #include "utils/fmgrprotos.h"
 #include "utils/index_selfuncs.h"
+#include "utils/lsyscache.h"
 #include "utils/memutils.h"
 
 
@@ -98,6 +101,8 @@ static void _bt_parallel_serialize_arrays(Relation rel, BTParallelScanDesc btsca
 										  BTScanOpaque so);
 static void _bt_parallel_restore_arrays(Relation rel, BTParallelScanDesc btscan,
 										BTScanOpaque so);
+static bool bt_init_merge_scan_from_keys(IndexScanDesc scan);
+
 static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 						 IndexBulkDeleteCallback callback, void *callback_state,
 						 BTCycleId cycleid);
@@ -221,6 +226,106 @@ btinsert(Relation rel, Datum *values, bool *isnull,
 	return result;
 }
 
+/*
+ * bt_init_merge_scan_from_keys
+ *		Initialize merge scan state from the preprocessed scan keys.
+ *
+ * Returns true if merge scan was successfully initialized.
+ * Returns false if merge scan cannot be used (e.g., no suitable array key).
+ */
+static bool
+bt_init_merge_scan_from_keys(IndexScanDesc scan)
+{
+	BTScanOpaque so = (BTScanOpaque) scan->opaque;
+	Relation	rel = scan->indexRelation;
+	TupleDesc	itupdesc = RelationGetDescr(rel);
+	ScanKey		arrayKey = NULL;
+	ArrayType  *arr;
+	Datum	   *prefix_values;
+	bool	   *prefix_nulls;
+	int			num_prefixes;
+	int			prefix_attno;
+	int			suffix_attno;
+	Oid			suffix_cmp_oid;
+	Oid			suffix_collation;
+	Oid			opfamily;
+	Oid			elemtype;
+	int16		elemlen;
+	bool		elembyval;
+	char		elemalign;
+	int			i;
+
+	/* Look for SK_SEARCHARRAY on first column in the raw scan keys */
+	for (i = 0; i < scan->numberOfKeys; i++)
+	{
+		ScanKey		sk = &scan->keyData[i];
+
+		if ((sk->sk_flags & SK_SEARCHARRAY) &&
+			sk->sk_attno == 1 &&
+			sk->sk_strategy == BTEqualStrategyNumber)
+		{
+			arrayKey = sk;
+			break;
+		}
+	}
+
+	if (arrayKey == NULL)
+		return false;
+
+	/* Extract array values from the scan key */
+	arr = DatumGetArrayTypeP(arrayKey->sk_argument);
+	num_prefixes = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
+
+	if (num_prefixes < 2)
+		return false;
+
+	/* Get array element type info */
+	elemtype = ARR_ELEMTYPE(arr);
+	get_typlenbyvalalign(elemtype, &elemlen, &elembyval, &elemalign);
+
+	/* Deconstruct the array into individual elements */
+	deconstruct_array(arr, elemtype, elemlen, elembyval, elemalign,
+					  &prefix_values, &prefix_nulls, &num_prefixes);
+
+	/* Attribute numbers (1-based) */
+	prefix_attno = 1;
+	suffix_attno = 2;
+
+	/* Get the opfamily from the index */
+	opfamily = rel->rd_opfamily[suffix_attno - 1];
+
+	/* Get collation from the suffix column */
+	suffix_collation = TupleDescAttr(itupdesc, suffix_attno - 1)->attcollation;
+
+	/* Get the comparison function OID for the suffix column */
+	suffix_cmp_oid = get_opfamily_proc(opfamily,
+									   TupleDescAttr(itupdesc, suffix_attno - 1)->atttypid,
+									   TupleDescAttr(itupdesc, suffix_attno - 1)->atttypid,
+									   BTORDER_PROC);
+
+	if (!OidIsValid(suffix_cmp_oid))
+	{
+		pfree(prefix_values);
+		pfree(prefix_nulls);
+		return false;
+	}
+
+	/* Initialize the merge scan state */
+	so->mergeState = bt_merge_init(scan,
+								   prefix_values,
+								   prefix_nulls,
+								   num_prefixes,
+								   prefix_attno,
+								   suffix_attno,
+								   suffix_cmp_oid,
+								   suffix_collation);
+
+	pfree(prefix_values);
+	pfree(prefix_nulls);
+
+	return (so->mergeState != NULL);
+}
+
 /*
  *	btgettuple() -- Get the next tuple in the scan.
  */
@@ -235,6 +340,24 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
 	/* btree indexes are never lossy */
 	scan->xs_recheck = false;
 
+	/*
+	 * Check if merge scan optimization should be used.
+	 * Initialize merge scan state on first call if needed.
+	 */
+	if (scan->xs_num_merge_prefixes > 0 && so->mergeState == NULL)
+	{
+		if (!bt_init_merge_scan_from_keys(scan))
+		{
+			/* Merge scan init failed, fall through to regular scan */
+			scan->xs_num_merge_prefixes = 0;
+		}
+	}
+
+	/* Use merge scan if initialized */
+	/* Use merge scan if initialized */
+	if (so->mergeState != NULL)
+		return bt_merge_getnext(scan, dir);
+
 	/* Each loop iteration performs another primitive index scan */
 	do
 	{
@@ -365,6 +488,9 @@ btbeginscan(Relation rel, int nkeys, int norderbys)
 	so->killedItems = NULL;		/* until needed */
 	so->numKilled = 0;
 
+	/* Initialize merge scan state to NULL */
+	so->mergeState = NULL;
+
 	/*
 	 * We don't know yet whether the scan will be index-only, so we do not
 	 * allocate the tuple workspace arrays until btrescan.  However, we set up
@@ -486,6 +612,9 @@ btendscan(IndexScanDesc scan)
 		pfree(so->killedItems);
 	if (so->currTuples != NULL)
 		pfree(so->currTuples);
+	/* Clean up merge scan state */
+	if (so->mergeState != NULL)
+		bt_merge_end(so->mergeState);
 	/* so->markTuples should not be pfree'd, see btrescan */
 	pfree(so);
 }
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index c2d09374517..70483c4e767 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -98,6 +98,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
 
 		node->ioss_ScanDesc = scandesc;
 
+		scandesc->xs_num_merge_prefixes = node->ioss_NumMergePrefixes;
 
 		/* Set it up for index-only scan */
 		node->ioss_ScanDesc->xs_want_itup = true;
@@ -615,7 +616,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
 	indexstate->ioss_RuntimeKeysReady = false;
 	indexstate->ioss_RuntimeKeys = NULL;
 	indexstate->ioss_NumRuntimeKeys = 0;
-
+	indexstate->ioss_NumMergePrefixes = node->num_merge_prefixes;
 	/*
 	 * build the index scan keys from the index qualification
 	 */
@@ -790,6 +791,7 @@ ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node,
 								 node->ioss_NumOrderByKeys,
 								 piscan);
 	node->ioss_ScanDesc->xs_want_itup = true;
+	node->ioss_ScanDesc->xs_num_merge_prefixes = node->ioss_NumMergePrefixes;
 	node->ioss_VMBuffer = InvalidBuffer;
 
 	/*
@@ -856,6 +858,7 @@ ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node,
 								 node->ioss_NumOrderByKeys,
 								 piscan);
 	node->ioss_ScanDesc->xs_want_itup = true;
+	node->ioss_ScanDesc->xs_num_merge_prefixes = node->ioss_NumMergePrefixes;
 
 	/*
 	 * If no run-time keys to calculate or they are ready, go ahead and pass
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index a616abff04c..9e62cacd2d3 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -115,6 +115,7 @@ IndexNext(IndexScanState *node)
 
 		node->iss_ScanDesc = scandesc;
 
+		scandesc->xs_num_merge_prefixes = node->iss_NumMergePrefixes;
 		/*
 		 * If no run-time keys to calculate or they are ready, go ahead and
 		 * pass the scankeys to the index AM.
@@ -211,6 +212,8 @@ IndexNextWithReorder(IndexScanState *node)
 
 		node->iss_ScanDesc = scandesc;
 
+		scandesc->xs_num_merge_prefixes = node->iss_NumMergePrefixes;
+
 		/*
 		 * If no run-time keys to calculate or they are ready, go ahead and
 		 * pass the scankeys to the index AM.
@@ -1086,6 +1089,11 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
 		indexstate->iss_RuntimeContext = NULL;
 	}
 
+	/*
+	 * Initialize merge scan state from plan node
+	 */
+	indexstate->iss_NumMergePrefixes = node->num_merge_prefixes;
+
 	/*
 	 * all done.
 	 */
@@ -1725,6 +1733,8 @@ ExecIndexScanInitializeDSM(IndexScanState *node,
 								 node->iss_NumOrderByKeys,
 								 piscan);
 
+	node->iss_ScanDesc->xs_num_merge_prefixes = node->iss_NumMergePrefixes;
+
 	/*
 	 * If no run-time keys to calculate or they are ready, go ahead and pass
 	 * the scankeys to the index AM.
@@ -1789,6 +1799,7 @@ ExecIndexScanInitializeWorker(IndexScanState *node,
 								 node->iss_NumOrderByKeys,
 								 piscan);
 
+	node->iss_ScanDesc->xs_num_merge_prefixes = node->iss_NumMergePrefixes;
 	/*
 	 * If no run-time keys to calculate or they are ready, go ahead and pass
 	 * the scankeys to the index AM.
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 67d9dc35f44..44b79f91335 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -16,6 +16,7 @@
 #include "postgres.h"
 
 #include "access/stratnum.h"
+#include "utils/array.h"
 #include "access/sysattr.h"
 #include "access/transam.h"
 #include "catalog/pg_am.h"
@@ -102,6 +103,8 @@ static bool eclass_already_used(EquivalenceClass *parent_ec, Relids oldrelids,
 static void get_index_paths(PlannerInfo *root, RelOptInfo *rel,
 							IndexOptInfo *index, IndexClauseSet *clauses,
 							List **bitindexpaths);
+static void consider_merge_scan_path(PlannerInfo *root, RelOptInfo *rel,
+									 IndexOptInfo *index, IndexClauseSet *clauses);
 static List *build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 							   IndexOptInfo *index, IndexClauseSet *clauses,
 							   bool useful_predicate,
@@ -770,6 +773,191 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel,
 									   NULL);
 		*bitindexpaths = list_concat(*bitindexpaths, indexpaths);
 	}
+
+	/*
+	 * Consider merge scan optimization for queries with:
+	 * - ScalarArrayOpExpr (IN clause) on first index column
+	 * - ORDER BY on second column (different from index leading column)
+	 * - Optionally LIMIT
+	 */
+	consider_merge_scan_path(root, rel, index, clauses);
+}
+
+/*
+ * consider_merge_scan_path
+ *	  Check if this index can provide a merge scan path for queries of the form:
+ *	  WHERE prefix IN (...) AND suffix >= b ORDER BY suffix, prefix LIMIT N
+ *
+ *	  Merge scan allows lazily producing output sorted by (suffix, prefix) from
+ *	  an index on (prefix, suffix) by doing a K-way merge of K separate scans.
+ */
+static void
+consider_merge_scan_path(PlannerInfo *root, RelOptInfo *rel,
+						 IndexOptInfo *index, IndexClauseSet *clauses)
+{
+	IndexPath  *ipath;
+	List	   *index_clauses;
+	List	   *index_pathkeys;
+	List	   *merge_pathkeys;
+	ListCell   *lc;
+	int			num_prefixes = 0;
+	int			indexcol;
+	bool		has_saop_on_first = false;
+	bool		has_clause_on_second = false;
+
+	/* Need at least 2 index columns for merge scan */
+	if (index->nkeycolumns < 2)
+		return;
+
+	/* Index must be ordered and support gettuple */
+	if (index->sortopfamily == NULL || !index->amhasgettuple)
+		return;
+
+	/* Must have query pathkeys with at least 2 elements */
+	if (root->query_pathkeys == NIL || list_length(root->query_pathkeys) < 2)
+		return;
+
+	/*
+	 * Check for ScalarArrayOpExpr on first column.
+	 * Count the number of array elements (prefix values).
+	 */
+	foreach(lc, clauses->indexclauses[0])
+	{
+		IndexClause *iclause = (IndexClause *) lfirst(lc);
+		RestrictInfo *rinfo = iclause->rinfo;
+
+		if (IsA(rinfo->clause, ScalarArrayOpExpr))
+		{
+			ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
+			Node	   *arrayarg = (Node *) lsecond(saop->args);
+
+			has_saop_on_first = true;
+
+			/* Try to determine the number of array elements */
+			if (IsA(arrayarg, Const))
+			{
+				Const	   *con = (Const *) arrayarg;
+
+				if (!con->constisnull)
+				{
+					ArrayType  *arr = DatumGetArrayTypeP(con->constvalue);
+					num_prefixes = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
+				}
+			}
+			else
+			{
+				/* Can't determine size, estimate conservatively */
+				num_prefixes = 10;
+			}
+			break;
+		}
+	}
+
+	if (!has_saop_on_first || num_prefixes < 2)
+		return;
+
+	/* Check if there's any clause on second column */
+	if (clauses->indexclauses[1] != NIL)
+		has_clause_on_second = true;
+
+	if (!has_clause_on_second)
+		return;
+
+	/*
+	 * Get the natural index pathkeys (prefix, suffix order).
+	 * We need at least 2 pathkeys for merge scan to make sense.
+	 */
+	index_pathkeys = build_index_pathkeys(root, index, ForwardScanDirection);
+	if (list_length(index_pathkeys) < 2)
+		return;
+
+	/*
+	 * Check if query pathkeys are (suffix, prefix) - the REVERSED order.
+	 * query_pathkeys[0] should match index_pathkeys[1] (suffix)
+	 * query_pathkeys[1] should match index_pathkeys[0] (prefix)
+	 */
+	{
+		PathKey    *qpk0 = (PathKey *) linitial(root->query_pathkeys);
+		PathKey    *qpk1 = (PathKey *) lsecond(root->query_pathkeys);
+		PathKey    *ipk0 = (PathKey *) linitial(index_pathkeys);
+		PathKey    *ipk1 = (PathKey *) lsecond(index_pathkeys);
+
+		/* Query's first pathkey must match index's SECOND pathkey (suffix) */
+		if (qpk0->pk_eclass != ipk1->pk_eclass)
+			return;
+
+		/* Query's second pathkey must match index's FIRST pathkey (prefix) */
+		if (qpk1->pk_eclass != ipk0->pk_eclass)
+			return;
+	}
+
+	/*
+	 * The merge scan can satisfy the query's ORDER BY (suffix, prefix).
+	 * Use the query's pathkeys directly since we've verified they match.
+	 * This is critical: PostgreSQL compares pathkeys by pointer equality.
+	 */
+	merge_pathkeys = root->query_pathkeys;
+
+	/*
+	 * Build the index clause list (same as normal path).
+	 */
+	index_clauses = NIL;
+	for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++)
+	{
+		foreach(lc, clauses->indexclauses[indexcol])
+		{
+			IndexClause *iclause = (IndexClause *) lfirst(lc);
+			index_clauses = lappend(index_clauses, iclause);
+		}
+	}
+
+	/*
+	 * Create the merge scan path with (suffix, prefix) pathkeys.
+	 */
+	ipath = create_index_path(root, index,
+							  index_clauses,
+							  NIL,		/* no ORDER BY expressions */
+							  NIL,		/* no ORDER BY columns */
+							  merge_pathkeys,
+							  ForwardScanDirection,
+							  check_index_only(rel, index),
+							  NULL,		/* no outer relids */
+							  1.0,		/* loop_count */
+							  false);	/* not parallel */
+
+	/* Enable merge scan with K-way merge */
+	ipath->num_merge_prefixes = num_prefixes;
+
+	/*
+	 * Adjust costs and row estimate for merge scan.
+	 * Merge scan reads exactly (limit + K - 1) tuples instead of all matching.
+	 * The row estimate reflects actual tuple accesses, not total matches.
+	 */
+	if (root->limit_tuples > 0 && root->limit_tuples < ipath->path.rows)
+	{
+		double		merge_rows;
+		double		original_rows = ipath->path.rows;
+
+		/* Merge scan reads exactly (limit + K - 1) tuples */
+		merge_rows = root->limit_tuples + num_prefixes - 1;
+		if (merge_rows < original_rows)
+		{
+			double		ratio = merge_rows / original_rows;
+
+			/* Scale run cost by ratio of tuples accessed */
+			ipath->path.total_cost = ipath->path.startup_cost +
+				(ipath->path.total_cost - ipath->path.startup_cost) * ratio;
+
+			/* Add startup cost for K index descents */
+			ipath->path.startup_cost += num_prefixes * 0.01 * cpu_operator_cost;
+
+			/* Update row estimate to reflect merge scan efficiency */
+			ipath->path.rows = merge_rows;
+		}
+	}
+
+	/* Submit the path for consideration */
+	add_path(rel, (Path *) ipath);
 }
 
 /*
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index e5200f4b3ce..485b4b3e54e 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -184,12 +184,14 @@ static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid,
 								 Oid indexid, List *indexqual, List *indexqualorig,
 								 List *indexorderby, List *indexorderbyorig,
 								 List *indexorderbyops,
+								 int num_merge_prefixes,
 								 ScanDirection indexscandir);
 static IndexOnlyScan *make_indexonlyscan(List *qptlist, List *qpqual,
 										 Index scanrelid, Oid indexid,
 										 List *indexqual, List *recheckqual,
 										 List *indexorderby,
 										 List *indextlist,
+										 int num_merge_prefixes,
 										 ScanDirection indexscandir);
 static BitmapIndexScan *make_bitmap_indexscan(Index scanrelid, Oid indexid,
 											  List *indexqual,
@@ -3009,6 +3011,7 @@ create_indexscan_plan(PlannerInfo *root,
 												stripped_indexquals,
 												fixed_indexorderbys,
 												indexinfo->indextlist,
+												best_path->num_merge_prefixes,
 												best_path->indexscandir);
 	else
 		scan_plan = (Scan *) make_indexscan(tlist,
@@ -3020,6 +3023,7 @@ create_indexscan_plan(PlannerInfo *root,
 											fixed_indexorderbys,
 											indexorderbys,
 											indexorderbyops,
+											best_path->num_merge_prefixes,
 											best_path->indexscandir);
 
 	copy_generic_path_info(&scan_plan->plan, &best_path->path);
@@ -5527,6 +5531,7 @@ make_indexscan(List *qptlist,
 			   List *indexorderby,
 			   List *indexorderbyorig,
 			   List *indexorderbyops,
+			   int num_merge_prefixes,
 			   ScanDirection indexscandir)
 {
 	IndexScan  *node = makeNode(IndexScan);
@@ -5543,6 +5548,7 @@ make_indexscan(List *qptlist,
 	node->indexorderby = indexorderby;
 	node->indexorderbyorig = indexorderbyorig;
 	node->indexorderbyops = indexorderbyops;
+	node->num_merge_prefixes = num_merge_prefixes;
 	node->indexorderdir = indexscandir;
 
 	return node;
@@ -5557,6 +5563,7 @@ make_indexonlyscan(List *qptlist,
 				   List *recheckqual,
 				   List *indexorderby,
 				   List *indextlist,
+				   int num_merge_prefixes,
 				   ScanDirection indexscandir)
 {
 	IndexOnlyScan *node = makeNode(IndexOnlyScan);
@@ -5572,6 +5579,7 @@ make_indexonlyscan(List *qptlist,
 	node->recheckqual = recheckqual;
 	node->indexorderby = indexorderby;
 	node->indextlist = indextlist;
+	node->num_merge_prefixes = num_merge_prefixes;
 	node->indexorderdir = indexscandir;
 
 	return node;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 7b6c5d51e5d..21746cd684c 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1075,6 +1075,8 @@ create_index_path(PlannerInfo *root,
 	pathnode->indexorderbycols = indexorderbycols;
 	pathnode->indexscandir = indexscandir;
 
+	pathnode->num_merge_prefixes = 0;
+
 	cost_index(pathnode, root, loop_count, partial_path);
 
 	return pathnode;
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index ce340c076f8..fc55315ee07 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -190,6 +190,9 @@ typedef struct IndexScanDescData
 
 	/* parallel index scan information, in shared memory */
 	struct ParallelIndexScanDescData *parallel_scan;
+
+	/* Merge scan: K-way merge, ordered by an index suffix */
+	int			xs_num_merge_prefixes;
 } IndexScanDescData;
 
 /* Generic structure for parallel scans */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index f8053d9e572..4433d1c2612 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1734,6 +1734,9 @@ typedef struct IndexScanState
 	bool	   *iss_OrderByTypByVals;
 	int16	   *iss_OrderByTypLens;
 	Size		iss_PscanLen;
+
+	/* Merge scan: K-way merge */
+	int			iss_NumMergePrefixes;
 } IndexScanState;
 
 /* ----------------
@@ -1780,6 +1783,8 @@ typedef struct IndexOnlyScanState
 	Size		ioss_PscanLen;
 	AttrNumber *ioss_NameCStringAttNums;
 	int			ioss_NameCStringCount;
+	/* Merge scan: K-way merge */
+	int			ioss_NumMergePrefixes;
 } IndexOnlyScanState;
 
 /* ----------------
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index fb808823acf..ced7e224a87 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2040,6 +2040,7 @@ typedef struct IndexPath
 	ScanDirection indexscandir;
 	Cost		indextotalcost;
 	Selectivity indexselectivity;
+	int			num_merge_prefixes;
 } IndexPath;
 
 /*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 4bc6fb5670e..86d8c92e01f 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -597,6 +597,8 @@ typedef struct IndexScan
 	List	   *indexorderbyops;
 	/* forward or backward or don't care */
 	ScanDirection indexorderdir;
+	/* Merge scan: K-way merge */
+	int			num_merge_prefixes;
 } IndexScan;
 
 /* ----------------
@@ -645,6 +647,8 @@ typedef struct IndexOnlyScan
 	List	   *indextlist;
 	/* forward or backward or don't care */
 	ScanDirection indexorderdir;
+	/* Merge scan: K-way merge */
+	int			num_merge_prefixes;
 } IndexOnlyScan;
 
 /* ----------------
diff --git a/src/test/regress/expected/btree_merge.out b/src/test/regress/expected/btree_merge.out
index 441ae1d0657..28509b331d7 100644
--- a/src/test/regress/expected/btree_merge.out
+++ b/src/test/regress/expected/btree_merge.out
@@ -82,6 +82,20 @@ SHOW track_counts;  -- should be 'on'
  on
 (1 row)
 
+-- Verify merge scan is used: no Sort node, rows=10 (N + K - 1 = 3 + 8 - 1)
+EXPLAIN (COSTS OFF)
+SELECT x, y
+FROM btree_merge_test
+WHERE x IN (1,2,5,8,13,21,34,55) AND y >= 19
+ORDER BY y, x
+LIMIT 3;
+                                     QUERY PLAN                                     
+------------------------------------------------------------------------------------
+ Limit
+   ->  Index Only Scan using btree_merge_test_idx on btree_merge_test
+         Index Cond: ((x = ANY ('{1,2,5,8,13,21,34,55}'::integer[])) AND (y >= 19))
+(3 rows)
+
 -- From the limited query proposition this can be computed with 10
 -- tupple accesses.
 SELECT x, y
@@ -107,7 +121,7 @@ FROM pg_stat_user_indexes
 WHERE indexrelname = 'btree_merge_test_idx';
  idx_scan | idx_tup_read | idx_tup_fetch 
 ----------+--------------+---------------
-        5 |           10 |            10
+        8 |            9 |             3
 (1 row)
 
 DROP TABLE btree_merge_test;
diff --git a/src/test/regress/sql/btree_merge.sql b/src/test/regress/sql/btree_merge.sql
index be00c33c2a5..ad9cf03f869 100644
--- a/src/test/regress/sql/btree_merge.sql
+++ b/src/test/regress/sql/btree_merge.sql
@@ -81,6 +81,15 @@ ANALYSE btree_merge_test;
 SET enable_seqscan = OFF;
 SET enable_bitmapscan = OFF;
 SHOW track_counts;  -- should be 'on'
+
+-- Verify merge scan is used: no Sort node, rows=10 (N + K - 1 = 3 + 8 - 1)
+EXPLAIN (COSTS OFF)
+SELECT x, y
+FROM btree_merge_test
+WHERE x IN (1,2,5,8,13,21,34,55) AND y >= 19
+ORDER BY y, x
+LIMIT 3;
+
 -- From the limited query proposition this can be computed with 10
 -- tupple accesses.
 SELECT x, y
-- 
2.40.0