Hello hackers, My colleague, Ashwin, pointed out to me that brininsert's per-tuple init of the revmap access struct can have non-trivial overhead.
Turns out he is right. We are saving 24 bytes of memory per-call for the access struct, and a bit on buffer/locking overhead, with the attached patch. The implementation ties the revmap cleanup as a MemoryContext callback to the IndexInfo struct's MemoryContext, as there is no teardown function provided by the index AM for end-of-insert-command. Test setup (local Ubuntu workstation): # Drop caches and restart between each run: sudo sh -c "sync; echo 3 > /proc/sys/vm/drop_caches;" pg_ctl -D /usr/local/pgsql/data/ -l /tmp/logfile restart \timing DROP TABLE heap; CREATE TABLE heap(i int); CREATE INDEX ON heap USING brin(i) WITH (pages_per_range=1); INSERT INTO heap SELECT 1 FROM generate_series(1, 200000000); Results: We see an improvement for 100M tuples and an even bigger improvement for 200M tuples. Master (29cf61ade3f245aa40f427a1d6345287ef77e622): test=# INSERT INTO heap SELECT 1 FROM generate_series(1, 100000000); INSERT 0 100000000 Time: 222762.159 ms (03:42.762) -- 3 runs test=# INSERT INTO heap SELECT 1 FROM generate_series(1, 200000000); INSERT 0 200000000 Time: 471168.181 ms (07:51.168) Time: 457071.883 ms (07:37.072) TimeL 486969.205 ms (08:06.969) Branch: test2=# INSERT INTO heap SELECT 1 FROM generate_series(1, 100000000); INSERT 0 100000000 Time: 200046.519 ms (03:20.047) -- 3 runs test2=# INSERT INTO heap SELECT 1 FROM generate_series(1, 200000000); INSERT 0 200000000 Time: 369041.832 ms (06:09.042) Time: 365483.382 ms (06:05.483) Time: 375506.144 ms (06:15.506) # Profiled backend running INSERT of 100000000 rows sudo perf record -p 11951 --call-graph fp sleep 180 Please see attached perf diff between master and branch. We see that we save on a bit of overhead from brinRevmapInitialize(), brinRevmapTerminate() and lock routines. Regards, Soumyadeep (VMware)
perf_diff.out
Description: Binary data
From 5d11219e413fe6806e00dd738b051c3948dffcab Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty <soumyadeep2...@gmail.com> Date: Mon, 3 Jul 2023 10:47:48 -0700 Subject: [PATCH v1 1/1] Reuse revmap and brin desc in brininsert brininsert() used to have code that performed per-tuple initialization of the revmap. That had some overhead. --- src/backend/access/brin/brin.c | 70 ++++++++++++++++++++++++++++------ 1 file changed, 59 insertions(+), 11 deletions(-) diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 3c6a956eaa3..e27bee980f1 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -58,6 +58,17 @@ typedef struct BrinBuildState BrinMemTuple *bs_dtuple; } BrinBuildState; +/* + * We use a BrinInsertState to capture running state spanning multiple + * brininsert invocations, within the same command. + */ +typedef struct BrinInsertState +{ + BrinRevmap *bs_rmAccess; + BrinDesc *bs_desc; + BlockNumber bs_pages_per_range; +} BrinInsertState; + /* * Struct used as "opaque" during index scans */ @@ -72,6 +83,7 @@ typedef struct BrinOpaque static BrinBuildState *initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, BlockNumber pagesPerRange); +static BrinInsertState *initialize_brin_insertstate(Relation idxRel, IndexInfo *indexInfo); static void terminate_brin_buildstate(BrinBuildState *state); static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, bool include_partial, double *numSummarized, double *numExisting); @@ -140,6 +152,42 @@ brinhandler(PG_FUNCTION_ARGS) PG_RETURN_POINTER(amroutine); } +static void +brininsertCleanupCallback(void *arg) +{ + BrinInsertState *bistate = (BrinInsertState *) arg; + /* + * Clean up the revmap. Note that the brinDesc has already been cleaned up + * as part of its own memory context. + */ + brinRevmapTerminate(bistate->bs_rmAccess); +} + +static BrinInsertState * +initialize_brin_insertstate(Relation idxRel, IndexInfo *indexInfo) +{ + BrinInsertState *bistate; + MemoryContextCallback *cb; + MemoryContext oldcxt; + + oldcxt = MemoryContextSwitchTo(indexInfo->ii_Context); + bistate = MemoryContextAllocZero(indexInfo->ii_Context, + sizeof(BrinInsertState)); + + bistate->bs_desc = brin_build_desc(idxRel); + cb = palloc(sizeof(MemoryContextCallback)); + cb->arg = bistate; + cb->func = brininsertCleanupCallback; + MemoryContextRegisterResetCallback(indexInfo->ii_Context, cb); + MemoryContextSwitchTo(oldcxt); + + bistate->bs_rmAccess = brinRevmapInitialize(idxRel, + &bistate->bs_pages_per_range, + NULL); + + return bistate; +} + /* * A tuple in the heap is being inserted. To keep a brin index up to date, * we need to obtain the relevant index tuple and compare its stored values @@ -162,14 +210,23 @@ brininsert(Relation idxRel, Datum *values, bool *nulls, BlockNumber pagesPerRange; BlockNumber origHeapBlk; BlockNumber heapBlk; - BrinDesc *bdesc = (BrinDesc *) indexInfo->ii_AmCache; + BrinInsertState *bistate = (BrinInsertState *) indexInfo->ii_AmCache; BrinRevmap *revmap; + BrinDesc *bdesc; Buffer buf = InvalidBuffer; MemoryContext tupcxt = NULL; MemoryContext oldcxt = CurrentMemoryContext; bool autosummarize = BrinGetAutoSummarize(idxRel); - revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL); + if (!bistate) + { + /* First time through in this statement? */ + bistate = initialize_brin_insertstate(idxRel, indexInfo); + indexInfo->ii_AmCache = (void *) bistate; + } + revmap = bistate->bs_rmAccess; + bdesc = bistate->bs_desc; + pagesPerRange = bistate->bs_pages_per_range; /* * origHeapBlk is the block number where the insertion occurred. heapBlk @@ -228,14 +285,6 @@ brininsert(Relation idxRel, Datum *values, bool *nulls, if (!brtup) break; - /* First time through in this statement? */ - if (bdesc == NULL) - { - MemoryContextSwitchTo(indexInfo->ii_Context); - bdesc = brin_build_desc(idxRel); - indexInfo->ii_AmCache = (void *) bdesc; - MemoryContextSwitchTo(oldcxt); - } /* First time through in this brininsert call? */ if (tupcxt == NULL) { @@ -306,7 +355,6 @@ brininsert(Relation idxRel, Datum *values, bool *nulls, break; } - brinRevmapTerminate(revmap); if (BufferIsValid(buf)) ReleaseBuffer(buf); MemoryContextSwitchTo(oldcxt); -- 2.34.1