Hello hackers,

My colleague, Ashwin, pointed out to me that brininsert's per-tuple init
of the revmap access struct can have non-trivial overhead.

Turns out he is right. We are saving 24 bytes of memory per-call for
the access struct, and a bit on buffer/locking overhead, with the
attached patch.

The implementation ties the revmap cleanup as a MemoryContext callback
to the IndexInfo struct's MemoryContext, as there is no teardown
function provided by the index AM for end-of-insert-command.

Test setup (local Ubuntu workstation):

# Drop caches and restart between each run:
sudo sh -c "sync; echo 3 > /proc/sys/vm/drop_caches;"
pg_ctl -D /usr/local/pgsql/data/ -l /tmp/logfile restart

\timing
DROP TABLE heap;
CREATE TABLE heap(i int);
CREATE INDEX ON heap USING brin(i) WITH (pages_per_range=1);
INSERT INTO heap SELECT 1 FROM generate_series(1, 200000000);

Results:
We see an improvement for 100M tuples and an even bigger improvement for
200M tuples.

Master (29cf61ade3f245aa40f427a1d6345287ef77e622):

test=# INSERT INTO heap SELECT 1 FROM generate_series(1, 100000000);
INSERT 0 100000000
Time: 222762.159 ms (03:42.762)

-- 3 runs
test=# INSERT INTO heap SELECT 1 FROM generate_series(1, 200000000);
INSERT 0 200000000
Time: 471168.181 ms (07:51.168)
Time: 457071.883 ms (07:37.072)
TimeL 486969.205 ms (08:06.969)

Branch:

test2=# INSERT INTO heap SELECT 1 FROM generate_series(1, 100000000);
INSERT 0 100000000
Time: 200046.519 ms (03:20.047)

-- 3 runs
test2=# INSERT INTO heap SELECT 1 FROM generate_series(1, 200000000);
INSERT 0 200000000
Time: 369041.832 ms (06:09.042)
Time: 365483.382 ms (06:05.483)
Time: 375506.144 ms (06:15.506)

# Profiled backend running INSERT of 100000000 rows
sudo perf record -p 11951 --call-graph fp sleep 180

Please see attached perf diff between master and branch. We see that we
save on a bit of overhead from brinRevmapInitialize(),
brinRevmapTerminate() and lock routines.

Regards,
Soumyadeep (VMware)

Attachment: perf_diff.out
Description: Binary data

From 5d11219e413fe6806e00dd738b051c3948dffcab Mon Sep 17 00:00:00 2001
From: Soumyadeep Chakraborty <soumyadeep2...@gmail.com>
Date: Mon, 3 Jul 2023 10:47:48 -0700
Subject: [PATCH v1 1/1] Reuse revmap and brin desc in brininsert

brininsert() used to have code that performed per-tuple initialization
of the revmap. That had some overhead.
---
 src/backend/access/brin/brin.c | 70 ++++++++++++++++++++++++++++------
 1 file changed, 59 insertions(+), 11 deletions(-)

diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 3c6a956eaa3..e27bee980f1 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -58,6 +58,17 @@ typedef struct BrinBuildState
 	BrinMemTuple *bs_dtuple;
 } BrinBuildState;
 
+/*
+ * We use a BrinInsertState to capture running state spanning multiple
+ * brininsert invocations, within the same command.
+ */
+typedef struct BrinInsertState
+{
+	BrinRevmap 	*bs_rmAccess;
+	BrinDesc   	*bs_desc;
+	BlockNumber bs_pages_per_range;
+} BrinInsertState;
+
 /*
  * Struct used as "opaque" during index scans
  */
@@ -72,6 +83,7 @@ typedef struct BrinOpaque
 
 static BrinBuildState *initialize_brin_buildstate(Relation idxRel,
 												  BrinRevmap *revmap, BlockNumber pagesPerRange);
+static BrinInsertState *initialize_brin_insertstate(Relation idxRel, IndexInfo *indexInfo);
 static void terminate_brin_buildstate(BrinBuildState *state);
 static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
 						  bool include_partial, double *numSummarized, double *numExisting);
@@ -140,6 +152,42 @@ brinhandler(PG_FUNCTION_ARGS)
 	PG_RETURN_POINTER(amroutine);
 }
 
+static void
+brininsertCleanupCallback(void *arg)
+{
+	BrinInsertState *bistate = (BrinInsertState *) arg;
+	/*
+	 * Clean up the revmap. Note that the brinDesc has already been cleaned up
+	 * as part of its own memory context.
+	 */
+	brinRevmapTerminate(bistate->bs_rmAccess);
+}
+
+static BrinInsertState *
+initialize_brin_insertstate(Relation idxRel, IndexInfo *indexInfo)
+{
+	BrinInsertState 		*bistate;
+	MemoryContextCallback 	*cb;
+	MemoryContext 			oldcxt;
+
+	oldcxt = MemoryContextSwitchTo(indexInfo->ii_Context);
+	bistate = MemoryContextAllocZero(indexInfo->ii_Context,
+									 sizeof(BrinInsertState));
+
+	bistate->bs_desc = brin_build_desc(idxRel);
+	cb = palloc(sizeof(MemoryContextCallback));
+	cb->arg = bistate;
+	cb->func = brininsertCleanupCallback;
+	MemoryContextRegisterResetCallback(indexInfo->ii_Context, cb);
+	MemoryContextSwitchTo(oldcxt);
+
+	bistate->bs_rmAccess = brinRevmapInitialize(idxRel,
+												&bistate->bs_pages_per_range,
+												NULL);
+
+	return bistate;
+}
+
 /*
  * A tuple in the heap is being inserted.  To keep a brin index up to date,
  * we need to obtain the relevant index tuple and compare its stored values
@@ -162,14 +210,23 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
 	BlockNumber pagesPerRange;
 	BlockNumber origHeapBlk;
 	BlockNumber heapBlk;
-	BrinDesc   *bdesc = (BrinDesc *) indexInfo->ii_AmCache;
+	BrinInsertState *bistate = (BrinInsertState *) indexInfo->ii_AmCache;
 	BrinRevmap *revmap;
+	BrinDesc *bdesc;
 	Buffer		buf = InvalidBuffer;
 	MemoryContext tupcxt = NULL;
 	MemoryContext oldcxt = CurrentMemoryContext;
 	bool		autosummarize = BrinGetAutoSummarize(idxRel);
 
-	revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL);
+	if (!bistate)
+	{
+		/* First time through in this statement? */
+		bistate = initialize_brin_insertstate(idxRel, indexInfo);
+		indexInfo->ii_AmCache = (void *) bistate;
+	}
+	revmap = bistate->bs_rmAccess;
+	bdesc = bistate->bs_desc;
+	pagesPerRange = bistate->bs_pages_per_range;
 
 	/*
 	 * origHeapBlk is the block number where the insertion occurred.  heapBlk
@@ -228,14 +285,6 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
 		if (!brtup)
 			break;
 
-		/* First time through in this statement? */
-		if (bdesc == NULL)
-		{
-			MemoryContextSwitchTo(indexInfo->ii_Context);
-			bdesc = brin_build_desc(idxRel);
-			indexInfo->ii_AmCache = (void *) bdesc;
-			MemoryContextSwitchTo(oldcxt);
-		}
 		/* First time through in this brininsert call? */
 		if (tupcxt == NULL)
 		{
@@ -306,7 +355,6 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
 		break;
 	}
 
-	brinRevmapTerminate(revmap);
 	if (BufferIsValid(buf))
 		ReleaseBuffer(buf);
 	MemoryContextSwitchTo(oldcxt);
-- 
2.34.1

Reply via email to