On Wed, 8 Apr 2020 at 16:04, Amit Kapila <[email protected]> wrote:
>
> On Wed, Apr 8, 2020 at 11:53 AM Masahiko Sawada
> <[email protected]> wrote:
> >
> > On Wed, 8 Apr 2020 at 14:44, Amit Kapila <[email protected]> wrote:
> > >
> > >
> > > Thanks for the investigation. I don't see we can do anything special
> > > about this. In an ideal world, this should be done once and not for
> > > each worker but I guess it doesn't matter too much. I am not sure if
> > > it is worth adding a comment for this, what do you think?
> > >
> >
> > I agree with you. If the differences were considerably large probably
> > we would do something but I think we don't need to anything at this
> > time.
> >
>
> Fair enough, can you once check this in back-branches as this needs to
> be backpatched? I will do that once by myself as well.
I've done the same test with HEAD of both REL_12_STABLE and
REL_11_STABLE. I think the patch needs to be backpatched to PG11 where
parallel index creation was introduced. I've attached the patches
for PG12 and PG11 I used for this test for reference.
Here are the results:
* PG12
With no worker:
-[ RECORD 1 ]-------+-------------
shared_blks_hit | 119
shared_blks_read | 44283
total_read_blks | 44402
shared_blks_dirtied | 44262
shared_blks_written | 24925
With 4 workers:
-[ RECORD 1 ]-------+------------
shared_blks_hit | 128
shared_blks_read | 8844
total_read_blks | 8972
shared_blks_dirtied | 8822
shared_blks_written | 5393
With 4 workers after patching:
-[ RECORD 1 ]-------+------------
shared_blks_hit | 140
shared_blks_read | 44284
total_read_blks | 44424
shared_blks_dirtied | 44262
shared_blks_written | 26574
* PG11
With no worker:
-[ RECORD 1 ]-------+------------
shared_blks_hit | 124
shared_blks_read | 44284
total_read_blks | 44408
shared_blks_dirtied | 44263
shared_blks_written | 24908
With 4 workers:
-[ RECORD 1 ]-------+-------------
shared_blks_hit | 132
shared_blks_read | 8910
total_read_blks | 9042
shared_blks_dirtied | 8888
shared_blks_written | 5370
With 4 workers after patched:
-[ RECORD 1 ]-------+-------------
shared_blks_hit | 144
shared_blks_read | 44285
total_read_blks | 44429
shared_blks_dirtied | 44263
shared_blks_written | 26861
Regards,
--
Masahiko Sawada http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index edc4a82b02..da5b39eb02 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -67,6 +67,7 @@
#include "access/xloginsert.h"
#include "catalog/index.h"
#include "commands/progress.h"
+#include "executor/instrument.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/smgr.h"
@@ -81,6 +82,7 @@
#define PARALLEL_KEY_TUPLESORT UINT64CONST(0xA000000000000002)
#define PARALLEL_KEY_TUPLESORT_SPOOL2 UINT64CONST(0xA000000000000003)
#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000004)
+#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xA000000000000005)
/*
* DISABLE_LEADER_PARTICIPATION disables the leader's participation in
@@ -203,6 +205,7 @@ typedef struct BTLeader
Sharedsort *sharedsort;
Sharedsort *sharedsort2;
Snapshot snapshot;
+ BufferUsage *bufferusage;
} BTLeader;
/*
@@ -1336,6 +1339,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
Sharedsort *sharedsort2;
BTSpool *btspool = buildstate->spool;
BTLeader *btleader = (BTLeader *) palloc0(sizeof(BTLeader));
+ BufferUsage *bufferusage;
bool leaderparticipates = true;
char *sharedquery;
int querylen;
@@ -1388,6 +1392,17 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
shm_toc_estimate_keys(&pcxt->estimator, 3);
}
+ /*
+ * Estimate space for BufferUsage -- PARALLEL_KEY_BUFFER_USAGE.
+ *
+ * If there are no extensions loaded that care, we could skip this. We
+ * have no way of knowing whether anyone's looking at pgBufferUsage,
+ * so do it unconditionally.
+ */
+ shm_toc_estimate_chunk(&pcxt->estimator,
+ mul_size(sizeof(BufferUsage), pcxt->nworkers));
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+
/* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
querylen = strlen(debug_query_string);
shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
@@ -1459,6 +1474,11 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
memcpy(sharedquery, debug_query_string, querylen + 1);
shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
+ /* Allocate space for each worker's BufferUsage; no need to initialize */
+ bufferusage = shm_toc_allocate(pcxt->toc,
+ mul_size(sizeof(BufferUsage), pcxt->nworkers));
+ shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
+
/* Launch workers, saving status for leader/caller */
LaunchParallelWorkers(pcxt);
btleader->pcxt = pcxt;
@@ -1469,6 +1489,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
btleader->sharedsort = sharedsort;
btleader->sharedsort2 = sharedsort2;
btleader->snapshot = snapshot;
+ btleader->bufferusage = bufferusage;
/* If no workers were successfully launched, back out (do serial build) */
if (pcxt->nworkers_launched == 0)
@@ -1497,8 +1518,18 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
static void
_bt_end_parallel(BTLeader *btleader)
{
+ int i;
+
/* Shutdown worker processes */
WaitForParallelWorkersToFinish(btleader->pcxt);
+
+ /*
+ * Next, accumulate buffer usage. (This must wait for the workers to
+ * finish, or we might get incomplete data.)
+ */
+ for (i = 0; i < btleader->pcxt->nworkers_launched; i++)
+ InstrAccumParallelQuery(&btleader->bufferusage[i]);
+
/* Free last reference to MVCC snapshot, if one was used */
if (IsMVCCSnapshot(btleader->snapshot))
UnregisterSnapshot(btleader->snapshot);
@@ -1629,6 +1660,7 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
Relation indexRel;
LOCKMODE heapLockmode;
LOCKMODE indexLockmode;
+ BufferUsage *bufferusage;
int sortmem;
#ifdef BTREE_BUILD_STATS
@@ -1690,11 +1722,18 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
tuplesort_attach_shared(sharedsort2, seg);
}
+ /* Prepare to track buffer usage during parallel execution */
+ InstrStartParallelQuery();
+
/* Perform sorting of spool, and possibly a spool2 */
sortmem = maintenance_work_mem / btshared->scantuplesortstates;
_bt_parallel_scan_and_sort(btspool, btspool2, btshared, sharedsort,
sharedsort2, sortmem, false);
+ /* Report buffer usage during parallel execution */
+ bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
+ InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber]);
+
#ifdef BTREE_BUILD_STATS
if (log_btree_build_stats)
{
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index dab41ea298..54627b786a 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -64,6 +64,7 @@
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "catalog/index.h"
+#include "executor/instrument.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/smgr.h"
@@ -78,6 +79,7 @@
#define PARALLEL_KEY_TUPLESORT UINT64CONST(0xA000000000000002)
#define PARALLEL_KEY_TUPLESORT_SPOOL2 UINT64CONST(0xA000000000000003)
#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000004)
+#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xA000000000000005)
/*
* DISABLE_LEADER_PARTICIPATION disables the leader's participation in
@@ -192,6 +194,7 @@ typedef struct BTLeader
Sharedsort *sharedsort;
Sharedsort *sharedsort2;
Snapshot snapshot;
+ BufferUsage *bufferusage;
} BTLeader;
/*
@@ -1240,6 +1243,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
Sharedsort *sharedsort2;
BTSpool *btspool = buildstate->spool;
BTLeader *btleader = (BTLeader *) palloc0(sizeof(BTLeader));
+ BufferUsage *bufferusage;
bool leaderparticipates = true;
char *sharedquery;
int querylen;
@@ -1292,6 +1296,17 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
shm_toc_estimate_keys(&pcxt->estimator, 3);
}
+ /*
+ * Estimate space for BufferUsage -- PARALLEL_KEY_BUFFER_USAGE.
+ *
+ * If there are no extensions loaded that care, we could skip this. We
+ * have no way of knowing whether anyone's looking at pgBufferUsage,
+ * so do it unconditionally.
+ */
+ shm_toc_estimate_chunk(&pcxt->estimator,
+ mul_size(sizeof(BufferUsage), pcxt->nworkers));
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+
/* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
querylen = strlen(debug_query_string);
shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
@@ -1361,6 +1376,11 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
memcpy(sharedquery, debug_query_string, querylen + 1);
shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
+ /* Allocate space for each worker's BufferUsage; no need to initialize */
+ bufferusage = shm_toc_allocate(pcxt->toc,
+ mul_size(sizeof(BufferUsage), pcxt->nworkers));
+ shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
+
/* Launch workers, saving status for leader/caller */
LaunchParallelWorkers(pcxt);
btleader->pcxt = pcxt;
@@ -1371,6 +1391,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
btleader->sharedsort = sharedsort;
btleader->sharedsort2 = sharedsort2;
btleader->snapshot = snapshot;
+ btleader->bufferusage = bufferusage;
/* If no workers were successfully launched, back out (do serial build) */
if (pcxt->nworkers_launched == 0)
@@ -1399,8 +1420,18 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
static void
_bt_end_parallel(BTLeader *btleader)
{
+ int i;
+
/* Shutdown worker processes */
WaitForParallelWorkersToFinish(btleader->pcxt);
+
+ /*
+ * Next, accumulate buffer usage. (This must wait for the workers to
+ * finish, or we might get incomplete data.)
+ */
+ for (i = 0; i < btleader->pcxt->nworkers_launched; i++)
+ InstrAccumParallelQuery(&btleader->bufferusage[i]);
+
/* Free last reference to MVCC snapshot, if one was used */
if (IsMVCCSnapshot(btleader->snapshot))
UnregisterSnapshot(btleader->snapshot);
@@ -1537,6 +1568,7 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
Relation indexRel;
LOCKMODE heapLockmode;
LOCKMODE indexLockmode;
+ BufferUsage *bufferusage;
int sortmem;
#ifdef BTREE_BUILD_STATS
@@ -1598,11 +1630,18 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
tuplesort_attach_shared(sharedsort2, seg);
}
+ /* Prepare to track buffer usage during parallel execution */
+ InstrStartParallelQuery();
+
/* Perform sorting of spool, and possibly a spool2 */
sortmem = maintenance_work_mem / btshared->scantuplesortstates;
_bt_parallel_scan_and_sort(btspool, btspool2, btshared, sharedsort,
sharedsort2, sortmem);
+ /* Report buffer usage during parallel execution */
+ bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
+ InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber]);
+
#ifdef BTREE_BUILD_STATS
if (log_btree_build_stats)
{