Seems cfbot was not entirely happy about the patch, for two reasons:

1) enable_insert_prefetching definition was inconsistent (different
boot/default values, missing in .conf and so on)

2) stupid bug in execReplication, inserting index entries twice

The attached v3 should fix all of that, I believe.


As for the path forward, I think the prefetching is demonstrably
beneficial. There are cases where it can't help or even harms
performance. I think the success depends on three areas:

(a) reducing the costs of the prefetching - For example right now we
build the index tuples twice (once for prefetch, once for the insert),
but maybe there's a way to do that only once? There are also predicate
indexes, and so on.

(b) being smarter about when to prefetch - For example if we only have
one "prefetchable" index, it's somewhat pointless to prefetch (for
single-row cases). And so on.

(c) not prefetching when already cached - This is somewhat related to
the previous case, but perhaps it'd be cheaper to first check if the
data is already cached. For shared buffers it should not be difficult,
for page cache we could use preadv2 with RWF_NOWAIT flag. The question
is if this is cheap enough to be cheaper than just doing posix_fadvise
(which however only deals with shared buffers).


regards

-- 
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
From 5b2c18986422f5dda3cd0dfebd16e22443051d92 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Mon, 6 Nov 2023 17:42:48 +0100
Subject: [PATCH] v3

---
 src/backend/access/brin/brin.c                |   1 +
 src/backend/access/gin/ginutil.c              |   1 +
 src/backend/access/gist/gist.c                |   1 +
 src/backend/access/hash/hash.c                |   1 +
 src/backend/access/index/indexam.c            |  22 ++++
 src/backend/access/nbtree/nbtinsert.c         |  43 +++++++
 src/backend/access/nbtree/nbtree.c            |  21 ++++
 src/backend/access/nbtree/nbtsearch.c         |  95 ++++++++++++++++
 src/backend/access/spgist/spgutils.c          |   1 +
 src/backend/commands/copyfrom.c               |  21 ++++
 src/backend/executor/execIndexing.c           | 106 ++++++++++++++++++
 src/backend/executor/execReplication.c        |  12 ++
 src/backend/executor/nodeModifyTable.c        |  22 ++++
 src/backend/utils/misc/guc_tables.c           |  10 ++
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/access/amapi.h                    |   8 ++
 src/include/access/genam.h                    |   5 +
 src/include/access/nbtree.h                   |   8 ++
 src/include/executor/executor.h               |   6 +
 src/include/optimizer/cost.h                  |   1 +
 src/test/regress/expected/sysviews.out        |   3 +-
 21 files changed, 388 insertions(+), 1 deletion(-)

diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 25338a90e29..3b9c22847a8 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -117,6 +117,7 @@ brinhandler(PG_FUNCTION_ARGS)
 	amroutine->ambuild = brinbuild;
 	amroutine->ambuildempty = brinbuildempty;
 	amroutine->aminsert = brininsert;
+	amroutine->amprefetch = NULL;
 	amroutine->ambulkdelete = brinbulkdelete;
 	amroutine->amvacuumcleanup = brinvacuumcleanup;
 	amroutine->amcanreturn = NULL;
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 7a4cd93f301..666d58a750f 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -64,6 +64,7 @@ ginhandler(PG_FUNCTION_ARGS)
 	amroutine->ambuild = ginbuild;
 	amroutine->ambuildempty = ginbuildempty;
 	amroutine->aminsert = gininsert;
+	amroutine->amprefetch = NULL;
 	amroutine->ambulkdelete = ginbulkdelete;
 	amroutine->amvacuumcleanup = ginvacuumcleanup;
 	amroutine->amcanreturn = NULL;
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 8ef5fa03290..3ed72cce448 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -86,6 +86,7 @@ gisthandler(PG_FUNCTION_ARGS)
 	amroutine->ambuild = gistbuild;
 	amroutine->ambuildempty = gistbuildempty;
 	amroutine->aminsert = gistinsert;
+	amroutine->amprefetch = NULL;
 	amroutine->ambulkdelete = gistbulkdelete;
 	amroutine->amvacuumcleanup = gistvacuumcleanup;
 	amroutine->amcanreturn = gistcanreturn;
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 7a025f33cfe..4f92fb4e115 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -83,6 +83,7 @@ hashhandler(PG_FUNCTION_ARGS)
 	amroutine->ambuild = hashbuild;
 	amroutine->ambuildempty = hashbuildempty;
 	amroutine->aminsert = hashinsert;
+	amroutine->amprefetch = NULL;
 	amroutine->ambulkdelete = hashbulkdelete;
 	amroutine->amvacuumcleanup = hashvacuumcleanup;
 	amroutine->amcanreturn = NULL;
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index b25b03f7abc..fac126da421 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -63,6 +63,7 @@
 #include "utils/snapmgr.h"
 #include "utils/syscache.h"
 
+bool              enable_insert_prefetch = false;
 
 /* ----------------------------------------------------------------
  *					macros used in index_ routines
@@ -196,6 +197,27 @@ index_insert(Relation indexRelation,
 											 indexInfo);
 }
 
+/* ----------------
+ *		index_prefetch - prefetch index pages for insert
+ * ----------------
+ */
+void
+index_prefetch(Relation indexRelation,
+			   Datum *values,
+			   bool *isnull,
+			   Relation heapRelation,
+			   IndexInfo *indexInfo)
+{
+	RELATION_CHECKS;
+	CHECK_REL_PROCEDURE(amprefetch);
+
+	if (indexRelation->rd_indam->amprefetch == NULL)
+		return;
+
+	indexRelation->rd_indam->amprefetch(indexRelation, values, isnull,
+										heapRelation, indexInfo);
+}
+
 /*
  * index_beginscan - start a scan of an index with amgettuple
  *
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index 9cff4f29313..7eda2258eef 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -275,6 +275,49 @@ search:
 	return is_unique;
 }
 
+/* XXX simplified version of _bt_doinsert */
+void
+_bt_doprefetch(Relation rel, IndexTuple itup, Relation heapRel)
+{
+	BTInsertStateData insertstate;
+	BTScanInsert itup_key;
+
+	/* we need an insertion scan key to do our search, so build one */
+	itup_key = _bt_mkscankey(rel, itup);
+
+	/*
+	 * Fill in the BTInsertState working area, to track the current page and
+	 * position within the page to insert on.
+	 *
+	 * Note that itemsz is passed down to lower level code that deals with
+	 * inserting the item.  It must be MAXALIGN()'d.  This ensures that space
+	 * accounting code consistently considers the alignment overhead that we
+	 * expect PageAddItem() will add later.  (Actually, index_form_tuple() is
+	 * already conservative about alignment, but we don't rely on that from
+	 * this distance.  Besides, preserving the "true" tuple size in index
+	 * tuple headers for the benefit of nbtsplitloc.c might happen someday.
+	 * Note that heapam does not MAXALIGN() each heap tuple's lp_len field.)
+	 */
+	insertstate.itup = itup;
+	insertstate.itemsz = MAXALIGN(IndexTupleSize(itup));
+	insertstate.itup_key = itup_key;
+	insertstate.bounds_valid = false;
+	insertstate.buf = InvalidBuffer;
+	insertstate.postingoff = 0;
+
+	/*
+	 * Find and lock the leaf page that the tuple should be added to by
+	 * searching from the root page.  insertstate.buf will hold a buffer that
+	 * is locked in exclusive mode afterwards.
+	 *
+	 * XXX Same as _bt_search, but just prefetches the leaf page and then
+	 * releases it. We don't need the stack.
+	 */
+	_bt_prefetch(rel, heapRel, &insertstate);
+
+	pfree(itup_key);
+}
+
 /*
  *	_bt_search_insert() -- _bt_search() wrapper for inserts
  *
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index a88b36a589a..2594dc10f90 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -122,6 +122,7 @@ bthandler(PG_FUNCTION_ARGS)
 	amroutine->ambuild = btbuild;
 	amroutine->ambuildempty = btbuildempty;
 	amroutine->aminsert = btinsert;
+	amroutine->amprefetch = btprefetch;
 	amroutine->ambulkdelete = btbulkdelete;
 	amroutine->amvacuumcleanup = btvacuumcleanup;
 	amroutine->amcanreturn = btcanreturn;
@@ -207,6 +208,26 @@ btinsert(Relation rel, Datum *values, bool *isnull,
 	return result;
 }
 
+/*
+ *	btprefetch() -- prefetch pages for insert into the index
+ *
+ *		Descend the tree recursively, find the appropriate location for our
+ *		new tuple, and prefetch the page(s).
+ */
+void
+btprefetch(Relation rel, Datum *values, bool *isnull, Relation heapRel,
+		   IndexInfo *indexInfo)
+{
+	IndexTuple	itup;
+
+	/* generate an index tuple */
+	itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
+
+	_bt_doprefetch(rel, itup, heapRel);
+
+	pfree(itup);
+}
+
 /*
  *	btgettuple() -- Get the next tuple in the scan.
  */
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index efc5284e5b1..eb215c3b246 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -200,6 +200,101 @@ _bt_search(Relation rel, Relation heaprel, BTScanInsert key, Buffer *bufP,
 	return stack_in;
 }
 
+void
+_bt_prefetch(Relation rel, Relation heaprel, BTInsertState insertstate)
+{
+	Buffer		buffer;
+	BTStack		stack_in = NULL;
+	int			access = BT_READ;
+	BTScanInsert	key = insertstate->itup_key;
+
+	/* Get the root page to start with */
+	buffer = _bt_getroot(rel, heaprel, access);
+
+	/* If index is empty, no root page is created. */
+	if (!BufferIsValid(buffer))
+		return;
+
+	/* Loop iterates once per level descended in the tree */
+	for (;;)
+	{
+		Page		page;
+		BTPageOpaque opaque;
+		OffsetNumber offnum;
+		ItemId		itemid;
+		IndexTuple	itup;
+		BlockNumber child;
+		BTStack		new_stack;
+
+		/*
+		 * Race -- the page we just grabbed may have split since we read its
+		 * downlink in its parent page (or the metapage).  If it has, we may
+		 * need to move right to its new sibling.  Do that.
+		 *
+		 * In write-mode, allow _bt_moveright to finish any incomplete splits
+		 * along the way.  Strictly speaking, we'd only need to finish an
+		 * incomplete split on the leaf page we're about to insert to, not on
+		 * any of the upper levels (internal pages with incomplete splits are
+		 * also taken care of in _bt_getstackbuf).  But this is a good
+		 * opportunity to finish splits of internal pages too.
+		 */
+		buffer = _bt_moveright(rel, heaprel, key, buffer,
+							   (access == BT_WRITE), stack_in, access);
+
+		/* if this is a leaf page, we're done */
+		page = BufferGetPage(buffer);
+		opaque = BTPageGetOpaque(page);
+
+		/* we should never see a leaf page here, we only prefetch it */
+		Assert(!P_ISLEAF(opaque));
+
+		/*
+		 * Find the appropriate pivot tuple on this page.  Its downlink points
+		 * to the child page that we're about to descend to.
+		 */
+		offnum = _bt_binsrch(rel, key, buffer);
+		itemid = PageGetItemId(page, offnum);
+		itup = (IndexTuple) PageGetItem(page, itemid);
+		Assert(BTreeTupleIsPivot(itup) || !key->heapkeyspace);
+		child = BTreeTupleGetDownLink(itup);
+
+		/* we should never actually visit a leaf page during prefetching */
+		Assert(!P_ISLEAF(opaque));
+
+		/*
+		 * Page level 1 is lowest non-leaf page level prior to leaves.  So, if
+		 * we're on the level 1 and asked to lock leaf page in write mode,
+		 * then lock next page in write mode, because it must be a leaf.
+		 */
+		if (opaque->btpo_level == 1)
+		{
+			PrefetchBuffer(rel, MAIN_FORKNUM, child);
+			break;
+		}
+
+		/*
+		 * We need to save the location of the pivot tuple we chose in a new
+		 * stack entry for this page/level.  If caller ends up splitting a
+		 * page one level down, it usually ends up inserting a new pivot
+		 * tuple/downlink immediately after the location recorded here.
+		 */
+		new_stack = (BTStack) palloc(sizeof(BTStackData));
+		new_stack->bts_blkno = BufferGetBlockNumber(buffer);
+		new_stack->bts_offset = offnum;
+		new_stack->bts_parent = stack_in;
+
+		/* drop the read lock on the page, then acquire one on its child */
+		buffer = _bt_relandgetbuf(rel, buffer, child, access);
+
+		/* okay, all set to move down a level */
+		stack_in = new_stack;
+	}
+
+	_bt_relbuf(rel, buffer);
+
+	_bt_freestack(stack_in);
+}
+
 /*
  *	_bt_moveright() -- move right in the btree if necessary.
  *
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index c112e1e5dd4..5dba71c6e42 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -70,6 +70,7 @@ spghandler(PG_FUNCTION_ARGS)
 	amroutine->ambuild = spgbuild;
 	amroutine->ambuildempty = spgbuildempty;
 	amroutine->aminsert = spginsert;
+	amroutine->amprefetch = NULL;
 	amroutine->ambulkdelete = spgbulkdelete;
 	amroutine->amvacuumcleanup = spgvacuumcleanup;
 	amroutine->amcanreturn = spgcanreturn;
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index cec80baceaf..03093778691 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -421,6 +421,16 @@ CopyMultiInsertBufferFlush(CopyMultiInsertInfo *miinfo,
 						   buffer->bistate);
 		MemoryContextSwitchTo(oldcontext);
 
+		for (i = 0; i < nused; i++)
+		{
+			if (resultRelInfo->ri_NumIndices > 0)
+			{
+				ExecInsertPrefetchIndexes(resultRelInfo,
+										  buffer->slots[i], estate, false,
+										  false, NULL, NIL, false);
+			}
+		}
+
 		for (i = 0; i < nused; i++)
 		{
 			/*
@@ -1242,6 +1252,16 @@ CopyFrom(CopyFromState cstate)
 										   myslot, mycid, ti_options, bistate);
 
 						if (resultRelInfo->ri_NumIndices > 0)
+						{
+							ExecInsertPrefetchIndexes(resultRelInfo,
+													  myslot,
+													  estate,
+													  false,
+													  false,
+													  NULL,
+													  NIL,
+													  false);
+
 							recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
 																   myslot,
 																   estate,
@@ -1250,6 +1270,7 @@ CopyFrom(CopyFromState cstate)
 																   NULL,
 																   NIL,
 																   false);
+						}
 					}
 
 					/* AFTER ROW INSERT Triggers */
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index 384b39839a0..a83450c8dae 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -113,6 +113,7 @@
 #include "catalog/index.h"
 #include "executor/executor.h"
 #include "nodes/nodeFuncs.h"
+#include "optimizer/cost.h"
 #include "storage/lmgr.h"
 #include "utils/snapmgr.h"
 
@@ -252,6 +253,111 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
 	 */
 }
 
+void
+ExecInsertPrefetchIndexes(ResultRelInfo *resultRelInfo,
+						  TupleTableSlot *slot,
+						  EState *estate,
+						  bool update,
+						  bool noDupErr,
+						  bool *specConflict,
+						  List *arbiterIndexes,
+						  bool onlySummarizing)
+{
+	int			i;
+	int			numIndices;
+	RelationPtr relationDescs;
+	Relation	heapRelation;
+	IndexInfo **indexInfoArray;
+	ExprContext *econtext;
+	Datum		values[INDEX_MAX_KEYS];
+	bool		isnull[INDEX_MAX_KEYS];
+
+	if (!enable_insert_prefetch)
+		return;
+
+	/*
+	 * Get information from the result relation info structure.
+	 */
+	numIndices = resultRelInfo->ri_NumIndices;
+	relationDescs = resultRelInfo->ri_IndexRelationDescs;
+	indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+	heapRelation = resultRelInfo->ri_RelationDesc;
+
+	/* Sanity check: slot must belong to the same rel as the resultRelInfo. */
+	Assert(slot->tts_tableOid == RelationGetRelid(heapRelation));
+
+	/*
+	 * We will use the EState's per-tuple context for evaluating predicates
+	 * and index expressions (creating it if it's not already there).
+	 */
+	econtext = GetPerTupleExprContext(estate);
+
+	/* Arrange for econtext's scan tuple to be the tuple under test */
+	econtext->ecxt_scantuple = slot;
+
+	/*
+	 * for each index, form and insert the index tuple
+	 */
+	for (i = 0; i < numIndices; i++)
+	{
+		Relation	indexRelation = relationDescs[i];
+		IndexInfo  *indexInfo;
+
+		if (indexRelation == NULL)
+			continue;
+
+		indexInfo = indexInfoArray[i];
+
+		/* If the index is marked as read-only, ignore it */
+		if (!indexInfo->ii_ReadyForInserts)
+			continue;
+
+		/*
+		 * Skip processing of non-summarizing indexes if we only update
+		 * summarizing indexes
+		 */
+		if (onlySummarizing && !indexInfo->ii_Summarizing)
+			continue;
+
+		/* Check for partial index */
+		if (indexInfo->ii_Predicate != NIL)
+		{
+			ExprState  *predicate;
+
+			/*
+			 * If predicate state not set up yet, create it (in the estate's
+			 * per-query context)
+			 */
+			predicate = indexInfo->ii_PredicateState;
+			if (predicate == NULL)
+			{
+				predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+				indexInfo->ii_PredicateState = predicate;
+			}
+
+			/* Skip this index-update if the predicate isn't satisfied */
+			if (!ExecQual(predicate, econtext))
+				continue;
+		}
+
+		/*
+		 * FormIndexDatum fills in its values and isnull parameters with the
+		 * appropriate values for the column(s) of the index.
+		 */
+		FormIndexDatum(indexInfo,
+					   slot,
+					   estate,
+					   values,
+					   isnull);
+
+		index_prefetch(indexRelation,
+					   values,
+					   isnull,
+					   heapRelation,
+					   indexInfo);
+	}
+}
+
 /* ----------------------------------------------------------------
  *		ExecInsertIndexTuples
  *
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 81f27042bc4..080966b4e90 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -532,9 +532,14 @@ ExecSimpleRelationInsert(ResultRelInfo *resultRelInfo,
 		simple_table_tuple_insert(resultRelInfo->ri_RelationDesc, slot);
 
 		if (resultRelInfo->ri_NumIndices > 0)
+		{
+			ExecInsertPrefetchIndexes(resultRelInfo,
+									  slot, estate, false, false,
+									  NULL, NIL, false);
 			recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
 												   slot, estate, false, false,
 												   NULL, NIL, false);
+		}
 
 		/* AFTER ROW INSERT Triggers */
 		ExecARInsertTriggers(estate, resultRelInfo, slot,
@@ -600,10 +605,17 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
 								  &update_indexes);
 
 		if (resultRelInfo->ri_NumIndices > 0 && (update_indexes != TU_None))
+		{
+			ExecInsertPrefetchIndexes(resultRelInfo,
+									  slot, estate, true, false,
+									  NULL, NIL,
+									  (update_indexes == TU_Summarizing));
+
 			recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
 												   slot, estate, true, false,
 												   NULL, NIL,
 												   (update_indexes == TU_Summarizing));
+		}
 
 		/* AFTER ROW UPDATE Triggers */
 		ExecARUpdateTriggers(estate, resultRelInfo,
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 299c2c75be8..5ac4b788e2f 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -1094,6 +1094,13 @@ ExecInsert(ModifyTableContext *context,
 										   NULL,
 										   specToken);
 
+			/* prefetch index leafs before inserting index tuples */
+			ExecInsertPrefetchIndexes(resultRelInfo,
+									  slot, estate, false, true,
+									  &specConflict,
+									  arbiterIndexes,
+									  false);
+
 			/* insert index entries for tuple */
 			recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
 												   slot, estate, false, true,
@@ -1136,10 +1143,17 @@ ExecInsert(ModifyTableContext *context,
 
 			/* insert index entries for tuple */
 			if (resultRelInfo->ri_NumIndices > 0)
+			{
+				ExecInsertPrefetchIndexes(resultRelInfo,
+										  slot, estate, false,
+										  false, NULL, NIL,
+										  false);
+
 				recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
 													   slot, estate, false,
 													   false, NULL, NIL,
 													   false);
+			}
 		}
 	}
 
@@ -2127,11 +2141,19 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
 
 	/* insert index entries for tuple if necessary */
 	if (resultRelInfo->ri_NumIndices > 0 && (updateCxt->updateIndexes != TU_None))
+	{
+		ExecInsertPrefetchIndexes(resultRelInfo,
+								  slot, context->estate,
+								  true, false,
+								  NULL, NIL,
+								  (updateCxt->updateIndexes == TU_Summarizing));
+
 		recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
 											   slot, context->estate,
 											   true, false,
 											   NULL, NIL,
 											   (updateCxt->updateIndexes == TU_Summarizing));
+	}
 
 	/* AFTER ROW UPDATE Triggers */
 	ExecARUpdateTriggers(context->estate, resultRelInfo,
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 7605eff9b9d..b58555c8b03 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1037,6 +1037,16 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_insert_prefetch", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables the planner's use of index insert prefetching."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_insert_prefetch,
+		false,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index e48c066a5b1..bef650bb155 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -391,6 +391,7 @@
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_insert_prefetch = off
 
 # - Planner Cost Constants -
 
diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h
index 995725502a6..e3445225f49 100644
--- a/src/include/access/amapi.h
+++ b/src/include/access/amapi.h
@@ -113,6 +113,13 @@ typedef bool (*aminsert_function) (Relation indexRelation,
 								   bool indexUnchanged,
 								   struct IndexInfo *indexInfo);
 
+/* prefetch pages for new tuple */
+typedef void (*amprefetch_function) (Relation indexRelation,
+									 Datum *values,
+									 bool *isnull,
+									 Relation heapRelation,
+									 struct IndexInfo *indexInfo);
+
 /* bulk delete */
 typedef IndexBulkDeleteResult *(*ambulkdelete_function) (IndexVacuumInfo *info,
 														 IndexBulkDeleteResult *stats,
@@ -261,6 +268,7 @@ typedef struct IndexAmRoutine
 	ambuild_function ambuild;
 	ambuildempty_function ambuildempty;
 	aminsert_function aminsert;
+	amprefetch_function amprefetch;
 	ambulkdelete_function ambulkdelete;
 	amvacuumcleanup_function amvacuumcleanup;
 	amcanreturn_function amcanreturn;	/* can be NULL */
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index f31dec6ee0f..3e5e377c64a 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -149,6 +149,11 @@ extern bool index_insert(Relation indexRelation,
 						 bool indexUnchanged,
 						 struct IndexInfo *indexInfo);
 
+extern void index_prefetch(Relation indexRelation,
+						   Datum *values, bool *isnull,
+						   Relation heapRelation,
+						   struct IndexInfo *indexInfo);
+
 extern IndexScanDesc index_beginscan(Relation heapRelation,
 									 Relation indexRelation,
 									 Snapshot snapshot,
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 7bfbf3086c8..3df531bb168 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -1135,6 +1135,10 @@ extern bool btinsert(Relation rel, Datum *values, bool *isnull,
 					 IndexUniqueCheck checkUnique,
 					 bool indexUnchanged,
 					 struct IndexInfo *indexInfo);
+extern void btprefetch(Relation rel,
+					   Datum *values, bool *isnull,
+					   Relation heapRelation,
+					   struct IndexInfo *indexInfo);
 extern IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys);
 extern Size btestimateparallelscan(void);
 extern void btinitparallelscan(void *target);
@@ -1185,6 +1189,8 @@ extern IndexTuple _bt_swap_posting(IndexTuple newitem, IndexTuple oposting,
 extern bool _bt_doinsert(Relation rel, IndexTuple itup,
 						 IndexUniqueCheck checkUnique, bool indexUnchanged,
 						 Relation heapRel);
+extern void _bt_doprefetch(Relation rel, IndexTuple itup,
+						   Relation heapRel);
 extern void _bt_finish_split(Relation rel, Relation heaprel, Buffer lbuf,
 							 BTStack stack);
 extern Buffer _bt_getstackbuf(Relation rel, Relation heaprel, BTStack stack,
@@ -1237,6 +1243,8 @@ extern void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate);
  */
 extern BTStack _bt_search(Relation rel, Relation heaprel, BTScanInsert key,
 						  Buffer *bufP, int access);
+extern void _bt_prefetch(Relation rel, Relation heaprel,
+						 BTInsertState insertstate);
 extern Buffer _bt_moveright(Relation rel, Relation heaprel, BTScanInsert key,
 							Buffer buf, bool forupdate, BTStack stack,
 							int access);
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index e1eefb400b0..f82505a86b1 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -632,6 +632,12 @@ extern List *ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
 								   bool noDupErr,
 								   bool *specConflict, List *arbiterIndexes,
 								   bool onlySummarizing);
+extern void ExecInsertPrefetchIndexes(ResultRelInfo *resultRelInfo,
+									  TupleTableSlot *slot, EState *estate,
+									  bool update,
+									  bool noDupErr,
+									  bool *specConflict, List *arbiterIndexes,
+									  bool onlySummarizing);
 extern bool ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo,
 									  TupleTableSlot *slot,
 									  EState *estate, ItemPointer conflictTid,
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 6d50afbf74c..150bd3affb5 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -70,6 +70,7 @@ extern PGDLLIMPORT bool enable_parallel_hash;
 extern PGDLLIMPORT bool enable_partition_pruning;
 extern PGDLLIMPORT bool enable_presorted_aggregate;
 extern PGDLLIMPORT bool enable_async_append;
+extern PGDLLIMPORT bool enable_insert_prefetch;
 extern PGDLLIMPORT int constraint_exclusion;
 
 extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 271313ebf86..344a0449d99 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -119,6 +119,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_incremental_sort        | on
  enable_indexonlyscan           | on
  enable_indexscan               | on
+ enable_insert_prefetch         | off
  enable_material                | on
  enable_memoize                 | on
  enable_mergejoin               | on
@@ -133,7 +134,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(22 rows)
+(23 rows)
 
 -- There are always wait event descriptions for various types.
 select type, count(*) > 0 as ok FROM pg_wait_events
-- 
2.41.0

Reply via email to