Hi,

On 1/20/23 09:34, David Geier wrote:
EXPLAIN ANALYZE for parallel Bitmap Heap Scans currently only reports the number of heap blocks processed by the leader. It's missing the per-worker stats. The attached patch adds that functionality in the spirit of e.g. Sort or Memoize. Here is a simple test case and the EXPLAIN ANALYZE output with and without the patch:

Attached is a rebased version of the patch. I would appreciate someone taking a look.

As background: the change doesn't come out of thin air. We repeatedly took wrong conclusions in our query analysis because we assumed that the reported block counts include the workers.

If no one objects I would also register the patch at the commit fest. The patch is passing cleanly on CI.

--
David Geier
(ServiceNow)
From de03dfb101810f051b883804947707ecddaffceb Mon Sep 17 00:00:00 2001
From: David Geier <geidav...@gmail.com>
Date: Tue, 8 Nov 2022 19:40:31 +0100
Subject: [PATCH v2] Parallel Bitmap Heap Scan reports per-worker stats

Similarly to other nodes (e.g. hash join, sort, memoize),
Bitmap Heap Scan now reports per-worker stats in the EXPLAIN
ANALYZE output. Previously only the heap blocks stats for the
leader were reported which was incomplete in parallel scans.

	Discussion: https://www.postgresql.org/message-id/flat/b3d80961-c2e5-38cc-6a32-61886cdf766d%40gmail.com
---
 src/backend/commands/explain.c            | 45 ++++++++++--
 src/backend/executor/execParallel.c       |  3 +
 src/backend/executor/nodeBitmapHeapscan.c | 88 +++++++++++++++++++----
 src/include/executor/nodeBitmapHeapscan.h |  1 +
 src/include/nodes/execnodes.h             | 23 +++++-
 5 files changed, 137 insertions(+), 23 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index e57bda7b62..b5b31a763c 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3396,26 +3396,57 @@ show_hashagg_info(AggState *aggstate, ExplainState *es)
 static void
 show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
 {
+	Assert(es->analyze);
+
 	if (es->format != EXPLAIN_FORMAT_TEXT)
 	{
 		ExplainPropertyInteger("Exact Heap Blocks", NULL,
-							   planstate->exact_pages, es);
+							   planstate->stats.exact_pages, es);
 		ExplainPropertyInteger("Lossy Heap Blocks", NULL,
-							   planstate->lossy_pages, es);
+							   planstate->stats.lossy_pages, es);
 	}
 	else
 	{
-		if (planstate->exact_pages > 0 || planstate->lossy_pages > 0)
+		if (planstate->stats.exact_pages > 0 || planstate->stats.lossy_pages > 0)
 		{
 			ExplainIndentText(es);
 			appendStringInfoString(es->str, "Heap Blocks:");
-			if (planstate->exact_pages > 0)
-				appendStringInfo(es->str, " exact=%ld", planstate->exact_pages);
-			if (planstate->lossy_pages > 0)
-				appendStringInfo(es->str, " lossy=%ld", planstate->lossy_pages);
+			if (planstate->stats.exact_pages > 0)
+				appendStringInfo(es->str, " exact=%ld", planstate->stats.exact_pages);
+			if (planstate->stats.lossy_pages > 0)
+				appendStringInfo(es->str, " lossy=%ld", planstate->stats.lossy_pages);
 			appendStringInfoChar(es->str, '\n');
 		}
 	}
+
+	if (planstate->shared_info != NULL)
+	{
+		for (int n = 0; n < planstate->shared_info->num_workers; n++)
+		{
+			BitmapHeapScanInstrumentation *si = &planstate->shared_info->sinstrument[n];
+
+			if (si->exact_pages == 0 && si->lossy_pages == 0)
+				continue;
+
+			if (es->workers_state)
+				ExplainOpenWorker(n, es);
+
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+			{
+				ExplainIndentText(es);
+				appendStringInfo(es->str, "Heap Blocks: exact=%ld lossy=%ld\n",
+						 si->exact_pages, si->lossy_pages);
+			}
+			else
+			{
+				ExplainPropertyInteger("Exact Heap Blocks", NULL, si->exact_pages, es);
+				ExplainPropertyInteger("Lossy Heap Blocks", NULL, si->lossy_pages, es);
+			}
+
+			if (es->workers_state)
+				ExplainCloseWorker(n, es);
+		}
+	}
 }
 
 /*
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index aa3f283453..73ef4f0c0f 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -1072,6 +1072,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate,
 		case T_MemoizeState:
 			ExecMemoizeRetrieveInstrumentation((MemoizeState *) planstate);
 			break;
+		case T_BitmapHeapScanState:
+			ExecBitmapHeapRetrieveInstrumentation((BitmapHeapScanState *) planstate);
+			break;
 		default:
 			break;
 	}
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index f35df0b8bf..6d36e7922b 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -239,9 +239,9 @@ BitmapHeapNext(BitmapHeapScanState *node)
 			}
 
 			if (tbmres->ntuples >= 0)
-				node->exact_pages++;
+				node->stats.exact_pages++;
 			else
-				node->lossy_pages++;
+				node->stats.lossy_pages++;
 
 			/* Adjust the prefetch target */
 			BitmapAdjustPrefetchTarget(node);
@@ -641,6 +641,22 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
 		ExecReScan(outerPlan);
 }
 
+/*
+ * The following two functions provide access to the variable length snapshot and stats array stored
+ * in shared memory. As there are two we cannot use variable length arrays for both of them.
+ */
+static char *
+GetSharedSnapshotData(ParallelBitmapHeapState *node)
+{
+	return node->snapshot_and_stats;
+}
+
+static SharedBitmapHeapScanInfo *
+GetSharedBitmapHeapScanInfo(ParallelBitmapHeapState *node)
+{
+	return (SharedBitmapHeapScanInfo *)(node->snapshot_and_stats + node->stats_offset);
+}
+
 /* ----------------------------------------------------------------
  *		ExecEndBitmapHeapScan
  * ----------------------------------------------------------------
@@ -650,6 +666,19 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
 {
 	TableScanDesc scanDesc;
 
+	/*
+	 * When ending a parallel worker, copy the statistics gathered by the
+	 * worker back into shared memory so that it can be picked up by the main
+	 * process to report in EXPLAIN ANALYZE.
+	 */
+	if (node->pstate != NULL && IsParallelWorker())
+	{
+		SharedBitmapHeapScanInfo * shared_info = GetSharedBitmapHeapScanInfo(node->pstate);
+
+		Assert(ParallelWorkerNumber <= shared_info->num_workers);
+		shared_info->sinstrument[ParallelWorkerNumber] = node->stats;
+	}
+
 	/*
 	 * extract information from the node
 	 */
@@ -731,8 +760,8 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->return_empty_tuples = 0;
 	scanstate->vmbuffer = InvalidBuffer;
 	scanstate->pvmbuffer = InvalidBuffer;
-	scanstate->exact_pages = 0;
-	scanstate->lossy_pages = 0;
+	scanstate->stats.exact_pages = 0;
+	scanstate->stats.lossy_pages = 0;
 	scanstate->prefetch_iterator = NULL;
 	scanstate->prefetch_pages = 0;
 	scanstate->prefetch_target = 0;
@@ -856,12 +885,14 @@ void
 ExecBitmapHeapEstimate(BitmapHeapScanState *node,
 					   ParallelContext *pcxt)
 {
-	EState	   *estate = node->ss.ps.state;
-
-	node->pscan_len = add_size(offsetof(ParallelBitmapHeapState,
-										phs_snapshot_data),
-							   EstimateSnapshotSpace(estate->es_snapshot));
-
+	Size   instr_size = mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation));
+	EState *estate = node->ss.ps.state;
+
+	node->pscan_len = add_size(offsetof(ParallelBitmapHeapState, snapshot_and_stats),
+							   MAXALIGN(EstimateSnapshotSpace(estate->es_snapshot)));
+	node->pscan_len = add_size(node->pscan_len, instr_size);
+	node->pscan_len = add_size(node->pscan_len, offsetof(SharedBitmapHeapScanInfo, sinstrument));
+    
 	shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len);
 	shm_toc_estimate_keys(&pcxt->estimator, 1);
 }
@@ -877,9 +908,10 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
 							ParallelContext *pcxt)
 {
 	ParallelBitmapHeapState *pstate;
+    SharedBitmapHeapScanInfo * shared_info;
 	EState	   *estate = node->ss.ps.state;
 	dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
-
+    
 	/* If there's no DSA, there are no workers; initialize nothing. */
 	if (dsa == NULL)
 		return;
@@ -896,7 +928,13 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
 	pstate->state = BM_INITIAL;
 
 	ConditionVariableInit(&pstate->cv);
-	SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data);
+	pstate->stats_offset = MAXALIGN(EstimateSnapshotSpace(estate->es_snapshot));
+	SerializeSnapshot(estate->es_snapshot, GetSharedSnapshotData(pstate));
+
+	/* ensure any unfilled slots will contain zeroes */
+	shared_info = GetSharedBitmapHeapScanInfo(pstate);
+	memset(shared_info->sinstrument, 0, pcxt->nworkers*sizeof(BitmapHeapScanInstrumentation));
+	shared_info->num_workers = pcxt->nworkers;
 
 	shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
 	node->pstate = pstate;
@@ -949,6 +987,30 @@ ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
 	pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
 	node->pstate = pstate;
 
-	snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
+	snapshot = RestoreSnapshot(GetSharedSnapshotData(pstate));
 	table_scan_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
 }
+
+/* ----------------------------------------------------------------
+ *		ExecBitmapHeapRetrieveInstrumentation
+ *
+ *		Transfer bitmap heap scan statistics from DSM to private memory.
+ * ----------------------------------------------------------------
+ */
+void
+ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
+{   
+	Size size;
+	SharedBitmapHeapScanInfo *shared_info;
+
+	if (node->pstate == NULL)
+		return;
+
+	shared_info = GetSharedBitmapHeapScanInfo(node->pstate);
+
+	size = offsetof(SharedBitmapHeapScanInfo, sinstrument)
+		+ shared_info->num_workers * sizeof(BitmapHeapScanInstrumentation);
+
+	node->shared_info = palloc(size);
+	memcpy(node->shared_info, shared_info, size);
+}
diff --git a/src/include/executor/nodeBitmapHeapscan.h b/src/include/executor/nodeBitmapHeapscan.h
index 3a267a7fbd..bf12377e28 100644
--- a/src/include/executor/nodeBitmapHeapscan.h
+++ b/src/include/executor/nodeBitmapHeapscan.h
@@ -28,5 +28,6 @@ extern void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
 										  ParallelContext *pcxt);
 extern void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
 										   ParallelWorkerContext *pwcxt);
+extern void ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node);
 
 #endif							/* NODEBITMAPHEAPSCAN_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 20f4c8b35f..efbc3e0418 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1635,6 +1635,22 @@ typedef struct BitmapIndexScanState
 	struct IndexScanDescData *biss_ScanDesc;
 } BitmapIndexScanState;
 
+typedef struct BitmapHeapScanInstrumentation
+{
+	long lossy_pages;
+	long exact_pages;
+} BitmapHeapScanInstrumentation;
+
+/* ----------------
+ *	 Shared memory container for per-worker bitmap heap scan information
+ * ----------------
+ */
+typedef struct SharedBitmapHeapScanInfo
+{
+	int                           num_workers;
+	BitmapHeapScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
+} SharedBitmapHeapScanInfo;
+
 /* ----------------
  *	 SharedBitmapState information
  *
@@ -1677,7 +1693,8 @@ typedef struct ParallelBitmapHeapState
 	int			prefetch_target;
 	SharedBitmapState state;
 	ConditionVariable cv;
-	char		phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
+	size_t		stats_offset;
+	char		snapshot_and_stats[FLEXIBLE_ARRAY_MEMBER];
 } ParallelBitmapHeapState;
 
 /* ----------------
@@ -1715,8 +1732,6 @@ typedef struct BitmapHeapScanState
 	int			return_empty_tuples;
 	Buffer		vmbuffer;
 	Buffer		pvmbuffer;
-	long		exact_pages;
-	long		lossy_pages;
 	TBMIterator *prefetch_iterator;
 	int			prefetch_pages;
 	int			prefetch_target;
@@ -1726,6 +1741,8 @@ typedef struct BitmapHeapScanState
 	TBMSharedIterator *shared_tbmiterator;
 	TBMSharedIterator *shared_prefetch_iterator;
 	ParallelBitmapHeapState *pstate;
+	BitmapHeapScanInstrumentation stats;
+	SharedBitmapHeapScanInfo *shared_info;
 } BitmapHeapScanState;
 
 /* ----------------
-- 
2.34.1

Reply via email to