Hi, On 1/20/23 09:34, David Geier wrote:
EXPLAIN ANALYZE for parallel Bitmap Heap Scans currently only reports the number of heap blocks processed by the leader. It's missing the per-worker stats. The attached patch adds that functionality in the spirit of e.g. Sort or Memoize. Here is a simple test case and the EXPLAIN ANALYZE output with and without the patch:
Attached is a rebased version of the patch. I would appreciate someone taking a look.
As background: the change doesn't come out of thin air. We repeatedly took wrong conclusions in our query analysis because we assumed that the reported block counts include the workers.
If no one objects I would also register the patch at the commit fest. The patch is passing cleanly on CI.
-- David Geier (ServiceNow)
From de03dfb101810f051b883804947707ecddaffceb Mon Sep 17 00:00:00 2001 From: David Geier <geidav...@gmail.com> Date: Tue, 8 Nov 2022 19:40:31 +0100 Subject: [PATCH v2] Parallel Bitmap Heap Scan reports per-worker stats Similarly to other nodes (e.g. hash join, sort, memoize), Bitmap Heap Scan now reports per-worker stats in the EXPLAIN ANALYZE output. Previously only the heap blocks stats for the leader were reported which was incomplete in parallel scans. Discussion: https://www.postgresql.org/message-id/flat/b3d80961-c2e5-38cc-6a32-61886cdf766d%40gmail.com --- src/backend/commands/explain.c | 45 ++++++++++-- src/backend/executor/execParallel.c | 3 + src/backend/executor/nodeBitmapHeapscan.c | 88 +++++++++++++++++++---- src/include/executor/nodeBitmapHeapscan.h | 1 + src/include/nodes/execnodes.h | 23 +++++- 5 files changed, 137 insertions(+), 23 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index e57bda7b62..b5b31a763c 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -3396,26 +3396,57 @@ show_hashagg_info(AggState *aggstate, ExplainState *es) static void show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es) { + Assert(es->analyze); + if (es->format != EXPLAIN_FORMAT_TEXT) { ExplainPropertyInteger("Exact Heap Blocks", NULL, - planstate->exact_pages, es); + planstate->stats.exact_pages, es); ExplainPropertyInteger("Lossy Heap Blocks", NULL, - planstate->lossy_pages, es); + planstate->stats.lossy_pages, es); } else { - if (planstate->exact_pages > 0 || planstate->lossy_pages > 0) + if (planstate->stats.exact_pages > 0 || planstate->stats.lossy_pages > 0) { ExplainIndentText(es); appendStringInfoString(es->str, "Heap Blocks:"); - if (planstate->exact_pages > 0) - appendStringInfo(es->str, " exact=%ld", planstate->exact_pages); - if (planstate->lossy_pages > 0) - appendStringInfo(es->str, " lossy=%ld", planstate->lossy_pages); + if (planstate->stats.exact_pages > 0) + appendStringInfo(es->str, " exact=%ld", planstate->stats.exact_pages); + if (planstate->stats.lossy_pages > 0) + appendStringInfo(es->str, " lossy=%ld", planstate->stats.lossy_pages); appendStringInfoChar(es->str, '\n'); } } + + if (planstate->shared_info != NULL) + { + for (int n = 0; n < planstate->shared_info->num_workers; n++) + { + BitmapHeapScanInstrumentation *si = &planstate->shared_info->sinstrument[n]; + + if (si->exact_pages == 0 && si->lossy_pages == 0) + continue; + + if (es->workers_state) + ExplainOpenWorker(n, es); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + appendStringInfo(es->str, "Heap Blocks: exact=%ld lossy=%ld\n", + si->exact_pages, si->lossy_pages); + } + else + { + ExplainPropertyInteger("Exact Heap Blocks", NULL, si->exact_pages, es); + ExplainPropertyInteger("Lossy Heap Blocks", NULL, si->lossy_pages, es); + } + + if (es->workers_state) + ExplainCloseWorker(n, es); + } + } } /* diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index aa3f283453..73ef4f0c0f 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -1072,6 +1072,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate, case T_MemoizeState: ExecMemoizeRetrieveInstrumentation((MemoizeState *) planstate); break; + case T_BitmapHeapScanState: + ExecBitmapHeapRetrieveInstrumentation((BitmapHeapScanState *) planstate); + break; default: break; } diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index f35df0b8bf..6d36e7922b 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -239,9 +239,9 @@ BitmapHeapNext(BitmapHeapScanState *node) } if (tbmres->ntuples >= 0) - node->exact_pages++; + node->stats.exact_pages++; else - node->lossy_pages++; + node->stats.lossy_pages++; /* Adjust the prefetch target */ BitmapAdjustPrefetchTarget(node); @@ -641,6 +641,22 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) ExecReScan(outerPlan); } +/* + * The following two functions provide access to the variable length snapshot and stats array stored + * in shared memory. As there are two we cannot use variable length arrays for both of them. + */ +static char * +GetSharedSnapshotData(ParallelBitmapHeapState *node) +{ + return node->snapshot_and_stats; +} + +static SharedBitmapHeapScanInfo * +GetSharedBitmapHeapScanInfo(ParallelBitmapHeapState *node) +{ + return (SharedBitmapHeapScanInfo *)(node->snapshot_and_stats + node->stats_offset); +} + /* ---------------------------------------------------------------- * ExecEndBitmapHeapScan * ---------------------------------------------------------------- @@ -650,6 +666,19 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node) { TableScanDesc scanDesc; + /* + * When ending a parallel worker, copy the statistics gathered by the + * worker back into shared memory so that it can be picked up by the main + * process to report in EXPLAIN ANALYZE. + */ + if (node->pstate != NULL && IsParallelWorker()) + { + SharedBitmapHeapScanInfo * shared_info = GetSharedBitmapHeapScanInfo(node->pstate); + + Assert(ParallelWorkerNumber <= shared_info->num_workers); + shared_info->sinstrument[ParallelWorkerNumber] = node->stats; + } + /* * extract information from the node */ @@ -731,8 +760,8 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) scanstate->return_empty_tuples = 0; scanstate->vmbuffer = InvalidBuffer; scanstate->pvmbuffer = InvalidBuffer; - scanstate->exact_pages = 0; - scanstate->lossy_pages = 0; + scanstate->stats.exact_pages = 0; + scanstate->stats.lossy_pages = 0; scanstate->prefetch_iterator = NULL; scanstate->prefetch_pages = 0; scanstate->prefetch_target = 0; @@ -856,12 +885,14 @@ void ExecBitmapHeapEstimate(BitmapHeapScanState *node, ParallelContext *pcxt) { - EState *estate = node->ss.ps.state; - - node->pscan_len = add_size(offsetof(ParallelBitmapHeapState, - phs_snapshot_data), - EstimateSnapshotSpace(estate->es_snapshot)); - + Size instr_size = mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)); + EState *estate = node->ss.ps.state; + + node->pscan_len = add_size(offsetof(ParallelBitmapHeapState, snapshot_and_stats), + MAXALIGN(EstimateSnapshotSpace(estate->es_snapshot))); + node->pscan_len = add_size(node->pscan_len, instr_size); + node->pscan_len = add_size(node->pscan_len, offsetof(SharedBitmapHeapScanInfo, sinstrument)); + shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len); shm_toc_estimate_keys(&pcxt->estimator, 1); } @@ -877,9 +908,10 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt) { ParallelBitmapHeapState *pstate; + SharedBitmapHeapScanInfo * shared_info; EState *estate = node->ss.ps.state; dsa_area *dsa = node->ss.ps.state->es_query_dsa; - + /* If there's no DSA, there are no workers; initialize nothing. */ if (dsa == NULL) return; @@ -896,7 +928,13 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, pstate->state = BM_INITIAL; ConditionVariableInit(&pstate->cv); - SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data); + pstate->stats_offset = MAXALIGN(EstimateSnapshotSpace(estate->es_snapshot)); + SerializeSnapshot(estate->es_snapshot, GetSharedSnapshotData(pstate)); + + /* ensure any unfilled slots will contain zeroes */ + shared_info = GetSharedBitmapHeapScanInfo(pstate); + memset(shared_info->sinstrument, 0, pcxt->nworkers*sizeof(BitmapHeapScanInstrumentation)); + shared_info->num_workers = pcxt->nworkers; shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate); node->pstate = pstate; @@ -949,6 +987,30 @@ ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); node->pstate = pstate; - snapshot = RestoreSnapshot(pstate->phs_snapshot_data); + snapshot = RestoreSnapshot(GetSharedSnapshotData(pstate)); table_scan_update_snapshot(node->ss.ss_currentScanDesc, snapshot); } + +/* ---------------------------------------------------------------- + * ExecBitmapHeapRetrieveInstrumentation + * + * Transfer bitmap heap scan statistics from DSM to private memory. + * ---------------------------------------------------------------- + */ +void +ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node) +{ + Size size; + SharedBitmapHeapScanInfo *shared_info; + + if (node->pstate == NULL) + return; + + shared_info = GetSharedBitmapHeapScanInfo(node->pstate); + + size = offsetof(SharedBitmapHeapScanInfo, sinstrument) + + shared_info->num_workers * sizeof(BitmapHeapScanInstrumentation); + + node->shared_info = palloc(size); + memcpy(node->shared_info, shared_info, size); +} diff --git a/src/include/executor/nodeBitmapHeapscan.h b/src/include/executor/nodeBitmapHeapscan.h index 3a267a7fbd..bf12377e28 100644 --- a/src/include/executor/nodeBitmapHeapscan.h +++ b/src/include/executor/nodeBitmapHeapscan.h @@ -28,5 +28,6 @@ extern void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, ParallelContext *pcxt); extern void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, ParallelWorkerContext *pwcxt); +extern void ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node); #endif /* NODEBITMAPHEAPSCAN_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 20f4c8b35f..efbc3e0418 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1635,6 +1635,22 @@ typedef struct BitmapIndexScanState struct IndexScanDescData *biss_ScanDesc; } BitmapIndexScanState; +typedef struct BitmapHeapScanInstrumentation +{ + long lossy_pages; + long exact_pages; +} BitmapHeapScanInstrumentation; + +/* ---------------- + * Shared memory container for per-worker bitmap heap scan information + * ---------------- + */ +typedef struct SharedBitmapHeapScanInfo +{ + int num_workers; + BitmapHeapScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]; +} SharedBitmapHeapScanInfo; + /* ---------------- * SharedBitmapState information * @@ -1677,7 +1693,8 @@ typedef struct ParallelBitmapHeapState int prefetch_target; SharedBitmapState state; ConditionVariable cv; - char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER]; + size_t stats_offset; + char snapshot_and_stats[FLEXIBLE_ARRAY_MEMBER]; } ParallelBitmapHeapState; /* ---------------- @@ -1715,8 +1732,6 @@ typedef struct BitmapHeapScanState int return_empty_tuples; Buffer vmbuffer; Buffer pvmbuffer; - long exact_pages; - long lossy_pages; TBMIterator *prefetch_iterator; int prefetch_pages; int prefetch_target; @@ -1726,6 +1741,8 @@ typedef struct BitmapHeapScanState TBMSharedIterator *shared_tbmiterator; TBMSharedIterator *shared_prefetch_iterator; ParallelBitmapHeapState *pstate; + BitmapHeapScanInstrumentation stats; + SharedBitmapHeapScanInfo *shared_info; } BitmapHeapScanState; /* ---------------- -- 2.34.1