diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index 52f1a96db5..ee0f07a81e 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -1281,9 +1281,6 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
 	/* Report workers' query for monitoring purposes */
 	pgstat_report_activity(STATE_RUNNING, debug_query_string);
 
-	/* Prepare to track buffer usage during query execution. */
-	InstrStartParallelQuery();
-
 	/* Attach to the dynamic shared memory area. */
 	area_space = shm_toc_lookup(toc, PARALLEL_KEY_DSA, false);
 	area = dsa_attach_in_place(area_space, seg);
@@ -1309,6 +1306,15 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
 	/* Pass down any tuple bound */
 	ExecSetTupleBound(fpes->tuples_needed, queryDesc->planstate);
 
+	/*
+	 * Prepare to track buffer usage during query execution.
+	 *
+	 * We do this after starting up the executor to match what happens in the
+	 * leader, which also doesn't count buffer accesses that occur during
+	 * executor startup.
+	 */
+	InstrStartParallelQuery();
+
 	/*
 	 * Run the plan.  If we specified a tuple bound, be careful not to demand
 	 * more tuples than that.
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index 8b3663b3c9..eaed9fb5a9 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -752,6 +752,19 @@ ExecShutdownNode(PlanState *node)
 
 	planstate_tree_walker(node, ExecShutdownNode, NULL);
 
+	/*
+	 * Treat the node as running while we shut it down, but only if it's run
+	 * at least once already.  We don't expect much CPU consumption during
+	 * node shutdown, but in the case of Gather or Gather Merge, we may shut
+	 * down workers at this stage.  If so, their buffer usage will get
+	 * propagated into pgBufferUsage at this point, and we want to make sure
+	 * that it gets associated with the Gather node.  We skip this if the node
+	 * has never been executed, so as to avoid incorrectly making it appear
+	 * that it has.
+	 */
+	if (node->instrument && node->instrument->running)
+		InstrStartNode(node->instrument);
+
 	switch (nodeTag(node))
 	{
 		case T_GatherState:
@@ -776,6 +789,10 @@ ExecShutdownNode(PlanState *node)
 			break;
 	}
 
+	/* Stop the node if we started it above, reporting 0 tuples. */
+	if (node->instrument && node->instrument->running)
+		InstrStopNode(node->instrument, 0);
+
 	return false;
 }
 
diff --git a/src/backend/executor/nodeGather.c b/src/backend/executor/nodeGather.c
index cdc9c51bd1..4a700b7b30 100644
--- a/src/backend/executor/nodeGather.c
+++ b/src/backend/executor/nodeGather.c
@@ -324,7 +324,10 @@ gather_readnext(GatherState *gatherstate)
 			Assert(!tup);
 			--gatherstate->nreaders;
 			if (gatherstate->nreaders == 0)
+			{
+				ExecShutdownGatherWorkers(gatherstate);
 				return NULL;
+			}
 			memmove(&gatherstate->reader[gatherstate->nextreader],
 					&gatherstate->reader[gatherstate->nextreader + 1],
 					sizeof(TupleQueueReader *)
diff --git a/src/backend/executor/nodeLimit.c b/src/backend/executor/nodeLimit.c
index 56d98b4490..66ea6aa3c3 100644
--- a/src/backend/executor/nodeLimit.c
+++ b/src/backend/executor/nodeLimit.c
@@ -134,6 +134,8 @@ ExecLimit(PlanState *pstate)
 					node->position - node->offset >= node->count)
 				{
 					node->lstate = LIMIT_WINDOWEND;
+					/* Allow nodes to release or shut down resources. */
+					(void) ExecShutdownNode(outerPlan);
 					return NULL;
 				}
 
