Hi hackers,

EXPLAIN ANALYZE for parallel Bitmap Heap Scans currently only reports the number of heap blocks processed by the leader. It's missing the per-worker stats. The attached patch adds that functionality in the spirit of e.g. Sort or Memoize. Here is a simple test case and the EXPLAIN ANALYZE output with and without the patch:

create table foo(col0 int, col1 int);
insert into foo select generate_series(1, 1000, 0.001), generate_series(1000, 2000, 0.001);
create index idx0 on foo(col0);
create index idx1 on foo(col1);
set parallel_tuple_cost = 0;
set parallel_setup_cost = 0;
explain (analyze, costs off, timing off) select * from foo where col0 > 900 or col1 = 1;

With the patch:

 Gather (actual rows=99501 loops=1)
   Workers Planned: 2
   Workers Launched: 2
   ->  Parallel Bitmap Heap Scan on foo (actual rows=33167 loops=3)
         Recheck Cond: ((col0 > 900) OR (col1 = 1))
         Heap Blocks: exact=98
         Worker 0:  Heap Blocks: exact=171 lossy=0
         Worker 1:  Heap Blocks: exact=172 lossy=0
         ->  BitmapOr (actual rows=0 loops=1)
               ->  Bitmap Index Scan on idx0 (actual rows=99501 loops=1)
                     Index Cond: (col0 > 900)
               ->  Bitmap Index Scan on idx1 (actual rows=0 loops=1)
                     Index Cond: (col1 = 1)

Without the patch:

 Gather (actual rows=99501 loops=1)
   Workers Planned: 2
   Workers Launched: 2
   ->  Parallel Bitmap Heap Scan on foo (actual rows=33167 loops=3)
         Recheck Cond: ((col0 > 900) OR (col1 = 1))
         Heap Blocks: exact=91
         ->  BitmapOr (actual rows=0 loops=1)
               ->  Bitmap Index Scan on idx0 (actual rows=99501 loops=1)
                     Index Cond: (col0 > 900)
               ->  Bitmap Index Scan on idx1 (actual rows=0 loops=1)
                     Index Cond: (col1 = 1)

So in total the parallel Bitmap Heap Scan actually processed 441 heap blocks instead of just 91.

Now two variable length arrays (VLA) would be needed, one for the snapshot and one for the stats. As this obviously doesn't work, I now use a single, big VLA and added functions to retrieve pointers to the respective fields. I'm using MAXALIGN() to make sure the latter field is aligned properly. Am I doing this correctly? I'm not entirely sure around alignment conventions and requirements of other platforms.

I couldn't find existing tests that exercise the EXPLAIN ANALYZE output of specific nodes. I could only find a few basic smoke tests for EXPLAIN ANALYZE with parallel nodes in parallel_select.sql. Do we want tests for the changed functionality? If so I could right away also add tests for EXPLAIN ANALYZE including other parallel nodes.

Thank you for your feedback.

--
David Geier
(ServiceNow)
From b2c84fb16e9521d6cfadb0c069e27a213e8e8471 Mon Sep 17 00:00:00 2001
From: David Geier <geidav...@gmail.com>
Date: Tue, 8 Nov 2022 19:40:31 +0100
Subject: [PATCH v1] Parallel Bitmap Heap Scan reports per-worker stats

Similarly to other nodes (e.g. hash join, sort, memoize),
Bitmap Heap Scan now reports per-worker stats in the EXPLAIN
ANALYZE output. Previously only the heap blocks stats for the
leader were reported which was incomplete in parallel scans.
---
 src/backend/commands/explain.c            | 46 ++++++++++--
 src/backend/executor/execParallel.c       |  3 +
 src/backend/executor/nodeBitmapHeapscan.c | 88 +++++++++++++++++++----
 src/include/executor/nodeBitmapHeapscan.h |  1 +
 src/include/nodes/execnodes.h             | 23 +++++-
 5 files changed, 138 insertions(+), 23 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 5212a64b1e..d532fc4a87 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3396,26 +3396,58 @@ show_hashagg_info(AggState *aggstate, ExplainState *es)
 static void
 show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
 {
+	Assert(es->analyze);
+
 	if (es->format != EXPLAIN_FORMAT_TEXT)
 	{
 		ExplainPropertyInteger("Exact Heap Blocks", NULL,
-							   planstate->exact_pages, es);
+							   planstate->stats.exact_pages, es);
 		ExplainPropertyInteger("Lossy Heap Blocks", NULL,
-							   planstate->lossy_pages, es);
+							   planstate->stats.lossy_pages, es);
 	}
 	else
 	{
-		if (planstate->exact_pages > 0 || planstate->lossy_pages > 0)
+		if (planstate->stats.exact_pages > 0 || planstate->stats.lossy_pages > 0)
 		{
 			ExplainIndentText(es);
 			appendStringInfoString(es->str, "Heap Blocks:");
-			if (planstate->exact_pages > 0)
-				appendStringInfo(es->str, " exact=%ld", planstate->exact_pages);
-			if (planstate->lossy_pages > 0)
-				appendStringInfo(es->str, " lossy=%ld", planstate->lossy_pages);
+			if (planstate->stats.exact_pages > 0)
+				appendStringInfo(es->str, " exact=%ld", planstate->stats.exact_pages);
+			if (planstate->stats.lossy_pages > 0)
+				appendStringInfo(es->str, " lossy=%ld", planstate->stats.lossy_pages);
 			appendStringInfoChar(es->str, '\n');
 		}
 	}
+
+	if (planstate->shared_info != NULL)
+	{
+		for (int n = 0; n < planstate->shared_info->num_workers; n++)
+		{
+			BitmapHeapScanInstrumentation *si = &planstate->shared_info->sinstrument[n];
+
+			if (si->exact_pages == 0 && si->lossy_pages == 0)
+				continue;
+
+			if (es->workers_state)
+				ExplainOpenWorker(n, es);
+
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+			{
+				ExplainIndentText(es);
+				appendStringInfo(es->str,
+						 "Heap Blocks: exact="UINT64_FORMAT" lossy=" INT64_FORMAT"\n",
+						 si->exact_pages, si->lossy_pages);
+			}
+			else
+			{
+				ExplainPropertyInteger("Exact Heap Blocks", NULL, si->exact_pages, es);
+				ExplainPropertyInteger("Lossy Heap Blocks", NULL, si->lossy_pages, es);
+			}
+
+			if (es->workers_state)
+				ExplainCloseWorker(n, es);
+		}
+	}
 }
 
 /*
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index aa3f283453..73ef4f0c0f 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -1072,6 +1072,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate,
 		case T_MemoizeState:
 			ExecMemoizeRetrieveInstrumentation((MemoizeState *) planstate);
 			break;
+		case T_BitmapHeapScanState:
+			ExecBitmapHeapRetrieveInstrumentation((BitmapHeapScanState *) planstate);
+			break;
 		default:
 			break;
 	}
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index f35df0b8bf..6d36e7922b 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -239,9 +239,9 @@ BitmapHeapNext(BitmapHeapScanState *node)
 			}
 
 			if (tbmres->ntuples >= 0)
-				node->exact_pages++;
+				node->stats.exact_pages++;
 			else
-				node->lossy_pages++;
+				node->stats.lossy_pages++;
 
 			/* Adjust the prefetch target */
 			BitmapAdjustPrefetchTarget(node);
@@ -641,6 +641,22 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
 		ExecReScan(outerPlan);
 }
 
+/*
+ * The following two functions provide access to the variable length snapshot and stats array stored
+ * in shared memory. As there are two we cannot use variable length arrays for both of them.
+ */
+static char *
+GetSharedSnapshotData(ParallelBitmapHeapState *node)
+{
+	return node->snapshot_and_stats;
+}
+
+static SharedBitmapHeapScanInfo *
+GetSharedBitmapHeapScanInfo(ParallelBitmapHeapState *node)
+{
+	return (SharedBitmapHeapScanInfo *)(node->snapshot_and_stats + node->stats_offset);
+}
+
 /* ----------------------------------------------------------------
  *		ExecEndBitmapHeapScan
  * ----------------------------------------------------------------
@@ -650,6 +666,19 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
 {
 	TableScanDesc scanDesc;
 
+	/*
+	 * When ending a parallel worker, copy the statistics gathered by the
+	 * worker back into shared memory so that it can be picked up by the main
+	 * process to report in EXPLAIN ANALYZE.
+	 */
+	if (node->pstate != NULL && IsParallelWorker())
+	{
+		SharedBitmapHeapScanInfo * shared_info = GetSharedBitmapHeapScanInfo(node->pstate);
+
+		Assert(ParallelWorkerNumber <= shared_info->num_workers);
+		shared_info->sinstrument[ParallelWorkerNumber] = node->stats;
+	}
+
 	/*
 	 * extract information from the node
 	 */
@@ -731,8 +760,8 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->return_empty_tuples = 0;
 	scanstate->vmbuffer = InvalidBuffer;
 	scanstate->pvmbuffer = InvalidBuffer;
-	scanstate->exact_pages = 0;
-	scanstate->lossy_pages = 0;
+	scanstate->stats.exact_pages = 0;
+	scanstate->stats.lossy_pages = 0;
 	scanstate->prefetch_iterator = NULL;
 	scanstate->prefetch_pages = 0;
 	scanstate->prefetch_target = 0;
@@ -856,12 +885,14 @@ void
 ExecBitmapHeapEstimate(BitmapHeapScanState *node,
 					   ParallelContext *pcxt)
 {
-	EState	   *estate = node->ss.ps.state;
-
-	node->pscan_len = add_size(offsetof(ParallelBitmapHeapState,
-										phs_snapshot_data),
-							   EstimateSnapshotSpace(estate->es_snapshot));
-
+	Size   instr_size = mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation));
+	EState *estate = node->ss.ps.state;
+
+	node->pscan_len = add_size(offsetof(ParallelBitmapHeapState, snapshot_and_stats),
+							   MAXALIGN(EstimateSnapshotSpace(estate->es_snapshot)));
+	node->pscan_len = add_size(node->pscan_len, instr_size);
+	node->pscan_len = add_size(node->pscan_len, offsetof(SharedBitmapHeapScanInfo, sinstrument));
+    
 	shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len);
 	shm_toc_estimate_keys(&pcxt->estimator, 1);
 }
@@ -877,9 +908,10 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
 							ParallelContext *pcxt)
 {
 	ParallelBitmapHeapState *pstate;
+    SharedBitmapHeapScanInfo * shared_info;
 	EState	   *estate = node->ss.ps.state;
 	dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
-
+    
 	/* If there's no DSA, there are no workers; initialize nothing. */
 	if (dsa == NULL)
 		return;
@@ -896,7 +928,13 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
 	pstate->state = BM_INITIAL;
 
 	ConditionVariableInit(&pstate->cv);
-	SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data);
+	pstate->stats_offset = MAXALIGN(EstimateSnapshotSpace(estate->es_snapshot));
+	SerializeSnapshot(estate->es_snapshot, GetSharedSnapshotData(pstate));
+
+	/* ensure any unfilled slots will contain zeroes */
+	shared_info = GetSharedBitmapHeapScanInfo(pstate);
+	memset(shared_info->sinstrument, 0, pcxt->nworkers*sizeof(BitmapHeapScanInstrumentation));
+	shared_info->num_workers = pcxt->nworkers;
 
 	shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
 	node->pstate = pstate;
@@ -949,6 +987,30 @@ ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
 	pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
 	node->pstate = pstate;
 
-	snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
+	snapshot = RestoreSnapshot(GetSharedSnapshotData(pstate));
 	table_scan_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
 }
+
+/* ----------------------------------------------------------------
+ *		ExecBitmapHeapRetrieveInstrumentation
+ *
+ *		Transfer bitmap heap scan statistics from DSM to private memory.
+ * ----------------------------------------------------------------
+ */
+void
+ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
+{   
+	Size size;
+	SharedBitmapHeapScanInfo *shared_info;
+
+	if (node->pstate == NULL)
+		return;
+
+	shared_info = GetSharedBitmapHeapScanInfo(node->pstate);
+
+	size = offsetof(SharedBitmapHeapScanInfo, sinstrument)
+		+ shared_info->num_workers * sizeof(BitmapHeapScanInstrumentation);
+
+	node->shared_info = palloc(size);
+	memcpy(node->shared_info, shared_info, size);
+}
diff --git a/src/include/executor/nodeBitmapHeapscan.h b/src/include/executor/nodeBitmapHeapscan.h
index 3a267a7fbd..bf12377e28 100644
--- a/src/include/executor/nodeBitmapHeapscan.h
+++ b/src/include/executor/nodeBitmapHeapscan.h
@@ -28,5 +28,6 @@ extern void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
 										  ParallelContext *pcxt);
 extern void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
 										   ParallelWorkerContext *pwcxt);
+extern void ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node);
 
 #endif							/* NODEBITMAPHEAPSCAN_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 20f4c8b35f..29484166de 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1635,6 +1635,22 @@ typedef struct BitmapIndexScanState
 	struct IndexScanDescData *biss_ScanDesc;
 } BitmapIndexScanState;
 
+typedef struct BitmapHeapScanInstrumentation
+{
+	uint64 lossy_pages;
+	uint64 exact_pages;
+} BitmapHeapScanInstrumentation;
+
+/* ----------------
+ *	 Shared memory container for per-worker bitmap heap scan information
+ * ----------------
+ */
+typedef struct SharedBitmapHeapScanInfo
+{
+	int                           num_workers;
+	BitmapHeapScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
+} SharedBitmapHeapScanInfo;
+
 /* ----------------
  *	 SharedBitmapState information
  *
@@ -1677,7 +1693,8 @@ typedef struct ParallelBitmapHeapState
 	int			prefetch_target;
 	SharedBitmapState state;
 	ConditionVariable cv;
-	char		phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
+	size_t		stats_offset;
+	char		snapshot_and_stats[FLEXIBLE_ARRAY_MEMBER];
 } ParallelBitmapHeapState;
 
 /* ----------------
@@ -1715,8 +1732,6 @@ typedef struct BitmapHeapScanState
 	int			return_empty_tuples;
 	Buffer		vmbuffer;
 	Buffer		pvmbuffer;
-	long		exact_pages;
-	long		lossy_pages;
 	TBMIterator *prefetch_iterator;
 	int			prefetch_pages;
 	int			prefetch_target;
@@ -1726,6 +1741,8 @@ typedef struct BitmapHeapScanState
 	TBMSharedIterator *shared_tbmiterator;
 	TBMSharedIterator *shared_prefetch_iterator;
 	ParallelBitmapHeapState *pstate;
+	BitmapHeapScanInstrumentation stats;
+	SharedBitmapHeapScanInfo *shared_info;
 } BitmapHeapScanState;
 
 /* ----------------
-- 
2.34.1

Reply via email to