---
 src/backend/commands/explain.c             |   6 +-
 src/backend/executor/execAsync.c           |  22 ++++
 src/backend/executor/execParallel.c        | 131 ++++++++++++++++++---
 src/backend/executor/execProcnode.c        |  31 ++++-
 src/backend/executor/instrument.c          |   7 ++
 src/backend/executor/nodeBitmapAnd.c       |   7 ++
 src/backend/executor/nodeBitmapIndexscan.c |   7 ++
 src/backend/executor/nodeBitmapOr.c        |   7 ++
 src/backend/executor/nodeHash.c            |   7 ++
 src/backend/utils/activity/wait_event.c    |  49 +++++++-
 src/include/executor/instrument.h          |   1 +
 src/include/nodes/execnodes.h              |   1 +
 src/include/utils/wait_event.h             |   4 +
 src/test/regress/expected/explain.out      |  52 +++++++-
 src/test/regress/sql/explain.sql           |  29 +++++
 15 files changed, 333 insertions(+), 28 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 9d7372f5415..0575ae8368b 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -530,6 +530,8 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, 
ExplainState *es,
                instrument_option |= INSTRUMENT_WAL;
        if (es->io)
                instrument_option |= INSTRUMENT_IO;
+       if (es->waits)
+               instrument_option |= INSTRUMENT_WAITS;
 
        /*
         * We always collect timing for the entire statement, even when 
node-level
@@ -2332,6 +2334,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
                show_buffer_usage(es, &planstate->instrument->instr.bufusage);
        if (es->wal && planstate->instrument)
                show_wal_usage(es, &planstate->instrument->instr.walusage);
+       if (es->waits)
+               show_wait_event_usage(es, planstate->wait_event_usage);
 
        /* Prepare per-worker buffer/WAL usage */
        if (es->workers_state && (es->buffers || es->wal) && es->verbose)
@@ -4559,7 +4563,7 @@ show_wait_event_usage(ExplainState *es, const 
WaitEventUsage *usage)
        if (usage == NULL)
                return;
 
-       if (es->format == EXPLAIN_FORMAT_TEXT && usage->nentries == 0)
+       if (usage->nentries == 0)
                return;
 
        if (usage->nentries > 0)
diff --git a/src/backend/executor/execAsync.c b/src/backend/executor/execAsync.c
index cf7ddbb01f4..e73ea55cf92 100644
--- a/src/backend/executor/execAsync.c
+++ b/src/backend/executor/execAsync.c
@@ -19,6 +19,7 @@
 #include "executor/instrument.h"
 #include "executor/nodeAppend.h"
 #include "executor/nodeForeignscan.h"
+#include "utils/wait_event.h"
 
 /*
  * Asynchronously request a tuple from a designed async-capable node.
@@ -26,12 +27,17 @@
 void
 ExecAsyncRequest(AsyncRequest *areq)
 {
+       WaitEventUsage *previous_wait_event_usage = NULL;
+
        if (areq->requestee->chgParam != NULL)  /* something changed? */
                ExecReScan(areq->requestee);    /* let ReScan handle this */
 
        /* must provide our own instrumentation support */
        if (areq->requestee->instrument)
                InstrStartNode(areq->requestee->instrument);
+       if (areq->requestee->wait_event_usage)
+               previous_wait_event_usage =
+                       
pgstat_enter_wait_event_usage(areq->requestee->wait_event_usage);
 
        switch (nodeTag(areq->requestee))
        {
@@ -47,6 +53,8 @@ ExecAsyncRequest(AsyncRequest *areq)
        ExecAsyncResponse(areq);
 
        /* must provide our own instrumentation support */
+       if (areq->requestee->wait_event_usage)
+               pgstat_restore_wait_event_usage(previous_wait_event_usage);
        if (areq->requestee->instrument)
                InstrStopNode(areq->requestee->instrument,
                                          TupIsNull(areq->result) ? 0.0 : 1.0);
@@ -62,9 +70,14 @@ ExecAsyncRequest(AsyncRequest *areq)
 void
 ExecAsyncConfigureWait(AsyncRequest *areq)
 {
+       WaitEventUsage *previous_wait_event_usage = NULL;
+
        /* must provide our own instrumentation support */
        if (areq->requestee->instrument)
                InstrStartNode(areq->requestee->instrument);
+       if (areq->requestee->wait_event_usage)
+               previous_wait_event_usage =
+                       
pgstat_enter_wait_event_usage(areq->requestee->wait_event_usage);
 
        switch (nodeTag(areq->requestee))
        {
@@ -78,6 +91,8 @@ ExecAsyncConfigureWait(AsyncRequest *areq)
        }
 
        /* must provide our own instrumentation support */
+       if (areq->requestee->wait_event_usage)
+               pgstat_restore_wait_event_usage(previous_wait_event_usage);
        if (areq->requestee->instrument)
                InstrStopNode(areq->requestee->instrument, 0.0);
 }
@@ -88,9 +103,14 @@ ExecAsyncConfigureWait(AsyncRequest *areq)
 void
 ExecAsyncNotify(AsyncRequest *areq)
 {
+       WaitEventUsage *previous_wait_event_usage = NULL;
+
        /* must provide our own instrumentation support */
        if (areq->requestee->instrument)
                InstrStartNode(areq->requestee->instrument);
+       if (areq->requestee->wait_event_usage)
+               previous_wait_event_usage =
+                       
pgstat_enter_wait_event_usage(areq->requestee->wait_event_usage);
 
        switch (nodeTag(areq->requestee))
        {
@@ -106,6 +126,8 @@ ExecAsyncNotify(AsyncRequest *areq)
        ExecAsyncResponse(areq);
 
        /* must provide our own instrumentation support */
+       if (areq->requestee->wait_event_usage)
+               pgstat_restore_wait_event_usage(previous_wait_event_usage);
        if (areq->requestee->instrument)
                InstrStopNode(areq->requestee->instrument,
                                          TupIsNull(areq->result) ? 0.0 : 1.0);
diff --git a/src/backend/executor/execParallel.c 
b/src/backend/executor/execParallel.c
index 8213565a708..f77f539dd27 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -83,6 +83,12 @@ typedef struct FixedParallelExecutorState
        int                     jit_flags;
 } FixedParallelExecutorState;
 
+typedef struct SharedWaitEventUsageWorker
+{
+       int                     nentries;
+       dsa_pointer entries;
+} SharedWaitEventUsageWorker;
+
 /*
  * DSM structure for accumulating per-PlanState instrumentation.
  *
@@ -92,6 +98,10 @@ typedef struct FixedParallelExecutorState
  * of the first NodeInstrumentation object.  This will depend on the length of
  * the plan_node_id array.
  *
+ * wait_event_usage_offset: Offset, relative to the start of this structure,
+ * of the first SharedWaitEventUsageWorker object, or 0 if wait event usage is
+ * not being collected.
+ *
  * num_workers: Number of workers.
  *
  * num_plan_nodes: Number of plan nodes.
@@ -103,6 +113,7 @@ struct SharedExecutorInstrumentation
 {
        int                     instrument_options;
        int                     instrument_offset;
+       int                     wait_event_usage_offset;
        int                     num_workers;
        int                     num_plan_nodes;
        int                     plan_node_id[FLEXIBLE_ARRAY_MEMBER];
@@ -110,17 +121,17 @@ struct SharedExecutorInstrumentation
        /*
         * Array of num_plan_nodes * num_workers NodeInstrumentation objects
         * follows.
+        *
+        * If wait_event_usage_offset is non-zero, an array of num_plan_nodes *
+        * num_workers SharedWaitEventUsageWorker objects follows.
         */
 };
 #define GetInstrumentationArray(sei) \
        (StaticAssertVariableIsOfTypeMacro(sei, SharedExecutorInstrumentation 
*), \
         (NodeInstrumentation *) (((char *) sei) + sei->instrument_offset))
-
-typedef struct SharedWaitEventUsageWorker
-{
-       int                     nentries;
-       dsa_pointer entries;
-} SharedWaitEventUsageWorker;
+#define GetInstrumentationWaitEventUsageArray(sei) \
+       (StaticAssertVariableIsOfTypeMacro(sei, SharedExecutorInstrumentation 
*), \
+        (SharedWaitEventUsageWorker *) (((char *) sei) + 
sei->wait_event_usage_offset))
 
 struct SharedWaitEventUsage
 {
@@ -143,6 +154,12 @@ typedef struct ExecParallelInitializeDSMContext
        int                     nnodes;
 } ExecParallelInitializeDSMContext;
 
+typedef struct ExecParallelRetrieveInstrumentationContext
+{
+       SharedExecutorInstrumentation *instrumentation;
+       dsa_area   *area;
+} ExecParallelRetrieveInstrumentationContext;
+
 /* Helper functions that run in the parallel leader. */
 static char *ExecSerializePlan(Plan *plan, EState *estate);
 static bool ExecParallelEstimate(PlanState *planstate,
@@ -154,8 +171,11 @@ static shm_mq_handle 
**ExecParallelSetupTupleQueues(ParallelContext *pcxt,
 static bool ExecParallelReInitializeDSM(PlanState *planstate,
                                                                                
ParallelContext *pcxt);
 static bool ExecParallelRetrieveInstrumentation(PlanState *planstate,
-                                                                               
                SharedExecutorInstrumentation *instrumentation);
+                                                                               
                ExecParallelRetrieveInstrumentationContext *r);
 static void ExecParallelRetrieveWaitEventUsage(ParallelExecutorInfo *pei);
+static void ExecParallelReportWaitEventUsageWorker(SharedWaitEventUsageWorker 
*worker,
+                                                                               
                   dsa_area *area,
+                                                                               
                   const WaitEventUsage *usage);
 static void ExecParallelReportWaitEventUsage(SharedWaitEventUsage *shared,
                                                                                
         dsa_area *area,
                                                                                
         const WaitEventUsage *usage);
@@ -691,6 +711,7 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate,
        int                     instrumentation_len = 0;
        int                     jit_instrumentation_len = 0;
        int                     instrument_offset = 0;
+       int                     wait_event_usage_offset = 0;
        Size            dsa_minsize = dsa_minimum_size();
        char       *query_string;
        int                     query_len;
@@ -798,6 +819,14 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate,
                instrumentation_len +=
                        mul_size(sizeof(NodeInstrumentation),
                                         mul_size(e.nnodes, nworkers));
+               if (estate->es_instrument & INSTRUMENT_WAITS)
+               {
+                       instrumentation_len = MAXALIGN(instrumentation_len);
+                       wait_event_usage_offset = instrumentation_len;
+                       instrumentation_len +=
+                               mul_size(sizeof(SharedWaitEventUsageWorker),
+                                                mul_size(e.nnodes, nworkers));
+               }
                shm_toc_estimate_chunk(&pcxt->estimator, instrumentation_len);
                shm_toc_estimate_keys(&pcxt->estimator, 1);
 
@@ -903,11 +932,23 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate,
                instrumentation = shm_toc_allocate(pcxt->toc, 
instrumentation_len);
                instrumentation->instrument_options = estate->es_instrument;
                instrumentation->instrument_offset = instrument_offset;
+               instrumentation->wait_event_usage_offset = 
wait_event_usage_offset;
                instrumentation->num_workers = nworkers;
                instrumentation->num_plan_nodes = e.nnodes;
                instrument = GetInstrumentationArray(instrumentation);
                for (i = 0; i < nworkers * e.nnodes; ++i)
                        InstrInitNode(&instrument[i], estate->es_instrument, 
false);
+               if (wait_event_usage_offset != 0)
+               {
+                       SharedWaitEventUsageWorker *worker_usage;
+
+                       worker_usage = 
GetInstrumentationWaitEventUsageArray(instrumentation);
+                       for (i = 0; i < nworkers * e.nnodes; ++i)
+                       {
+                               worker_usage[i].nentries = 0;
+                               worker_usage[i].entries = InvalidDsaPointer;
+                       }
+               }
                shm_toc_insert(pcxt->toc, PARALLEL_KEY_INSTRUMENTATION,
                                           instrumentation);
                pei->instrumentation = instrumentation;
@@ -1137,9 +1178,11 @@ ExecParallelReInitializeDSM(PlanState *planstate,
  */
 static bool
 ExecParallelRetrieveInstrumentation(PlanState *planstate,
-                                                                       
SharedExecutorInstrumentation *instrumentation)
+                                                                       
ExecParallelRetrieveInstrumentationContext *r)
 {
+       SharedExecutorInstrumentation *instrumentation = r->instrumentation;
        NodeInstrumentation *instrument;
+       SharedWaitEventUsageWorker *wait_event_usage = NULL;
        int                     i;
        int                     n;
        int                     ibytes;
@@ -1159,6 +1202,30 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate,
        for (n = 0; n < instrumentation->num_workers; ++n)
                InstrAggNode(planstate->instrument, &instrument[n]);
 
+       /* Accumulate the wait event usage from all workers. */
+       if (instrumentation->wait_event_usage_offset != 0 &&
+               planstate->wait_event_usage != NULL)
+       {
+               wait_event_usage = 
GetInstrumentationWaitEventUsageArray(instrumentation);
+               wait_event_usage += i * instrumentation->num_workers;
+               for (n = 0; n < instrumentation->num_workers; ++n)
+               {
+                       SharedWaitEventUsageWorker *worker = 
&wait_event_usage[n];
+                       WaitEventUsageEntry *entries;
+
+                       if (worker->nentries <= 0 || 
!DsaPointerIsValid(worker->entries))
+                               continue;
+
+                       entries = dsa_get_address(r->area, worker->entries);
+                       
pgstat_accumulate_wait_event_usage(planstate->wait_event_usage,
+                                                                               
           entries,
+                                                                               
           worker->nentries);
+                       dsa_free(r->area, worker->entries);
+                       worker->nentries = 0;
+                       worker->entries = InvalidDsaPointer;
+               }
+       }
+
        /*
         * Also store the per-worker detail.
         *
@@ -1216,7 +1283,7 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate,
        }
 
        return planstate_tree_walker(planstate, 
ExecParallelRetrieveInstrumentation,
-                                                                
instrumentation);
+                                                                r);
 }
 
 /*
@@ -1290,25 +1357,21 @@ ExecParallelRetrieveWaitEventUsage(ParallelExecutorInfo 
*pei)
 }
 
 static void
-ExecParallelReportWaitEventUsage(SharedWaitEventUsage *shared,
-                                                                dsa_area *area,
-                                                                const 
WaitEventUsage *usage)
+ExecParallelReportWaitEventUsageWorker(SharedWaitEventUsageWorker *worker,
+                                                                          
dsa_area *area,
+                                                                          
const WaitEventUsage *usage)
 {
-       SharedWaitEventUsageWorker *worker;
        WaitEventUsageEntry *entries;
        dsa_pointer entries_dsa;
        Size            entries_size;
 
-       Assert(shared != NULL);
+       Assert(worker != NULL);
        Assert(area != NULL);
        Assert(usage != NULL);
-       Assert(IsParallelWorker());
-       Assert(ParallelWorkerNumber < shared->num_workers);
 
        if (usage->nentries <= 0)
                return;
 
-       worker = &shared->worker_usage[ParallelWorkerNumber];
        entries_size = mul_size(sizeof(WaitEventUsageEntry), usage->nentries);
        entries_dsa = dsa_allocate(area, entries_size);
        entries = dsa_get_address(area, entries_dsa);
@@ -1320,6 +1383,20 @@ ExecParallelReportWaitEventUsage(SharedWaitEventUsage 
*shared,
        worker->entries = entries_dsa;
 }
 
+static void
+ExecParallelReportWaitEventUsage(SharedWaitEventUsage *shared,
+                                                                dsa_area *area,
+                                                                const 
WaitEventUsage *usage)
+{
+       Assert(shared != NULL);
+       Assert(IsParallelWorker());
+       Assert(ParallelWorkerNumber < shared->num_workers);
+
+       
ExecParallelReportWaitEventUsageWorker(&shared->worker_usage[ParallelWorkerNumber],
+                                                                               
   area,
+                                                                               
   usage);
+}
+
 /*
  * Finish parallel execution.  We wait for parallel workers to finish, and
  * accumulate their buffer/WAL usage.
@@ -1385,8 +1462,13 @@ ExecParallelCleanup(ParallelExecutorInfo *pei)
 {
        /* Accumulate instrumentation, if any. */
        if (pei->instrumentation)
-               ExecParallelRetrieveInstrumentation(pei->planstate,
-                                                                               
        pei->instrumentation);
+       {
+               ExecParallelRetrieveInstrumentationContext r;
+
+               r.instrumentation = pei->instrumentation;
+               r.area = pei->area;
+               ExecParallelRetrieveInstrumentation(pei->planstate, &r);
+       }
 
        /* Accumulate JIT instrumentation, if any. */
        if (pei->jit_instrumentation)
@@ -1495,6 +1577,17 @@ ExecParallelReportInstrumentation(PlanState *planstate,
        Assert(IsParallelWorker());
        Assert(ParallelWorkerNumber < instrumentation->num_workers);
        InstrAggNode(&instrument[ParallelWorkerNumber], planstate->instrument);
+       if (instrumentation->wait_event_usage_offset != 0 &&
+               planstate->wait_event_usage != NULL)
+       {
+               SharedWaitEventUsageWorker *wait_event_usage;
+
+               wait_event_usage = 
GetInstrumentationWaitEventUsageArray(instrumentation);
+               wait_event_usage += i * instrumentation->num_workers;
+               
ExecParallelReportWaitEventUsageWorker(&wait_event_usage[ParallelWorkerNumber],
+                                                                               
           planstate->state->es_query_dsa,
+                                                                               
           planstate->wait_event_usage);
+       }
 
        return planstate_tree_walker(planstate, 
ExecParallelReportInstrumentation,
                                                                 
instrumentation);
diff --git a/src/backend/executor/execProcnode.c 
b/src/backend/executor/execProcnode.c
index 7c4c66e323f..081855b3fed 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -118,6 +118,7 @@
 #include "executor/nodeWindowAgg.h"
 #include "executor/nodeWorktablescan.h"
 #include "miscadmin.h"
+#include "utils/wait_event.h"
 #include "nodes/nodeFuncs.h"
 
 static TupleTableSlot *ExecProcNodeFirst(PlanState *node);
@@ -415,6 +416,16 @@ ExecInitNode(Plan *node, EState *estate, int eflags)
        if (estate->es_instrument)
                result->instrument = InstrAllocNode(estate->es_instrument,
                                                                                
        result->async_capable);
+       if (estate->es_instrument & INSTRUMENT_WAITS)
+       {
+               MemoryContext oldcontext;
+
+               oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
+               result->wait_event_usage = palloc_object(WaitEventUsage);
+               pgstat_init_wait_event_usage(result->wait_event_usage,
+                                                                        
estate->es_query_cxt);
+               MemoryContextSwitchTo(oldcontext);
+       }
 
        return result;
 }
@@ -481,7 +492,8 @@ ExecProcNodeFirst(PlanState *node)
  * This has essentially the same responsibilities as ExecProcNode,
  * but it does not do InstrStartNode/InstrStopNode (mainly because
  * it can't tell how many returned tuples to count).  Each per-node
- * function must provide its own instrumentation support.
+ * function must provide its own instrumentation support, including
+ * wait event attribution when enabled.
  * ----------------------------------------------------------------
  */
 Node *
@@ -758,6 +770,9 @@ ExecShutdownNode(PlanState *node)
 static bool
 ExecShutdownNode_walker(PlanState *node, void *context)
 {
+       bool            node_running;
+       WaitEventUsage *previous_wait_event_usage = NULL;
+
        if (node == NULL)
                return false;
 
@@ -773,8 +788,14 @@ ExecShutdownNode_walker(PlanState *node, void *context)
         * has never been executed, so as to avoid incorrectly making it appear
         * that it has.
         */
-       if (node->instrument && node->instrument->running)
+       node_running = node->instrument && node->instrument->running;
+       if (node_running)
+       {
                InstrStartNode(node->instrument);
+               if (node->wait_event_usage)
+                       previous_wait_event_usage =
+                               
pgstat_enter_wait_event_usage(node->wait_event_usage);
+       }
 
        planstate_tree_walker(node, ExecShutdownNode_walker, context);
 
@@ -803,8 +824,12 @@ ExecShutdownNode_walker(PlanState *node, void *context)
        }
 
        /* Stop the node if we started it above, reporting 0 tuples. */
-       if (node->instrument && node->instrument->running)
+       if (node_running)
+       {
+               if (node->wait_event_usage)
+                       
pgstat_restore_wait_event_usage(previous_wait_event_usage);
                InstrStopNode(node->instrument, 0);
+       }
 
        return false;
 }
diff --git a/src/backend/executor/instrument.c 
b/src/backend/executor/instrument.c
index ffbcd572133..a1f22180f82 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -21,6 +21,7 @@
 #include "nodes/execnodes.h"
 #include "portability/instr_time.h"
 #include "utils/guc_hooks.h"
+#include "utils/wait_event.h"
 
 BufferUsage pgBufferUsage;
 static BufferUsage save_pgBufferUsage;
@@ -181,11 +182,17 @@ TupleTableSlot *
 ExecProcNodeInstr(PlanState *node)
 {
        TupleTableSlot *result;
+       WaitEventUsage *previous_wait_event_usage = NULL;
 
        InstrStartNode(node->instrument);
+       if (node->wait_event_usage)
+               previous_wait_event_usage =
+                       pgstat_enter_wait_event_usage(node->wait_event_usage);
 
        result = node->ExecProcNodeReal(node);
 
+       if (node->wait_event_usage)
+               pgstat_restore_wait_event_usage(previous_wait_event_usage);
        InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0);
 
        return result;
diff --git a/src/backend/executor/nodeBitmapAnd.c 
b/src/backend/executor/nodeBitmapAnd.c
index 9007dda3802..e2b442bd378 100644
--- a/src/backend/executor/nodeBitmapAnd.c
+++ b/src/backend/executor/nodeBitmapAnd.c
@@ -32,6 +32,7 @@
 #include "executor/instrument.h"
 #include "executor/nodeBitmapAnd.h"
 #include "nodes/tidbitmap.h"
+#include "utils/wait_event.h"
 
 
 /* ----------------------------------------------------------------
@@ -115,10 +116,14 @@ MultiExecBitmapAnd(BitmapAndState *node)
        int                     nplans;
        int                     i;
        TIDBitmap  *result = NULL;
+       WaitEventUsage *previous_wait_event_usage = NULL;
 
        /* must provide our own instrumentation support */
        if (node->ps.instrument)
                InstrStartNode(node->ps.instrument);
+       if (node->ps.wait_event_usage)
+               previous_wait_event_usage =
+                       
pgstat_enter_wait_event_usage(node->ps.wait_event_usage);
 
        /*
         * get information from the node
@@ -162,6 +167,8 @@ MultiExecBitmapAnd(BitmapAndState *node)
                elog(ERROR, "BitmapAnd doesn't support zero inputs");
 
        /* must provide our own instrumentation support */
+       if (node->ps.wait_event_usage)
+               pgstat_restore_wait_event_usage(previous_wait_event_usage);
        if (node->ps.instrument)
                InstrStopNode(node->ps.instrument, 0 /* XXX */ );
 
diff --git a/src/backend/executor/nodeBitmapIndexscan.c 
b/src/backend/executor/nodeBitmapIndexscan.c
index 7978514e1bc..c2798a81e97 100644
--- a/src/backend/executor/nodeBitmapIndexscan.c
+++ b/src/backend/executor/nodeBitmapIndexscan.c
@@ -28,6 +28,7 @@
 #include "executor/nodeIndexscan.h"
 #include "miscadmin.h"
 #include "nodes/tidbitmap.h"
+#include "utils/wait_event.h"
 
 
 /* ----------------------------------------------------------------
@@ -54,10 +55,14 @@ MultiExecBitmapIndexScan(BitmapIndexScanState *node)
        IndexScanDesc scandesc;
        double          nTuples = 0;
        bool            doscan;
+       WaitEventUsage *previous_wait_event_usage = NULL;
 
        /* must provide our own instrumentation support */
        if (node->ss.ps.instrument)
                InstrStartNode(node->ss.ps.instrument);
+       if (node->ss.ps.wait_event_usage)
+               previous_wait_event_usage =
+                       
pgstat_enter_wait_event_usage(node->ss.ps.wait_event_usage);
 
        /*
         * extract necessary information from index scan node
@@ -116,6 +121,8 @@ MultiExecBitmapIndexScan(BitmapIndexScanState *node)
        }
 
        /* must provide our own instrumentation support */
+       if (node->ss.ps.wait_event_usage)
+               pgstat_restore_wait_event_usage(previous_wait_event_usage);
        if (node->ss.ps.instrument)
                InstrStopNode(node->ss.ps.instrument, nTuples);
 
diff --git a/src/backend/executor/nodeBitmapOr.c 
b/src/backend/executor/nodeBitmapOr.c
index 148c80fdae6..e9ee934a6f6 100644
--- a/src/backend/executor/nodeBitmapOr.c
+++ b/src/backend/executor/nodeBitmapOr.c
@@ -33,6 +33,7 @@
 #include "executor/nodeBitmapOr.h"
 #include "nodes/tidbitmap.h"
 #include "miscadmin.h"
+#include "utils/wait_event.h"
 
 
 /* ----------------------------------------------------------------
@@ -116,10 +117,14 @@ MultiExecBitmapOr(BitmapOrState *node)
        int                     nplans;
        int                     i;
        TIDBitmap  *result = NULL;
+       WaitEventUsage *previous_wait_event_usage = NULL;
 
        /* must provide our own instrumentation support */
        if (node->ps.instrument)
                InstrStartNode(node->ps.instrument);
+       if (node->ps.wait_event_usage)
+               previous_wait_event_usage =
+                       
pgstat_enter_wait_event_usage(node->ps.wait_event_usage);
 
        /*
         * get information from the node
@@ -180,6 +185,8 @@ MultiExecBitmapOr(BitmapOrState *node)
                elog(ERROR, "BitmapOr doesn't support zero inputs");
 
        /* must provide our own instrumentation support */
+       if (node->ps.wait_event_usage)
+               pgstat_restore_wait_event_usage(previous_wait_event_usage);
        if (node->ps.instrument)
                InstrStopNode(node->ps.instrument, 0 /* XXX */ );
 
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 8825bb6fa23..6e9e4b4218f 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -105,9 +105,14 @@ ExecHash(PlanState *pstate)
 Node *
 MultiExecHash(HashState *node)
 {
+       WaitEventUsage *previous_wait_event_usage = NULL;
+
        /* must provide our own instrumentation support */
        if (node->ps.instrument)
                InstrStartNode(node->ps.instrument);
+       if (node->ps.wait_event_usage)
+               previous_wait_event_usage =
+                       
pgstat_enter_wait_event_usage(node->ps.wait_event_usage);
 
        if (node->parallel_state != NULL)
                MultiExecParallelHash(node);
@@ -115,6 +120,8 @@ MultiExecHash(HashState *node)
                MultiExecPrivateHash(node);
 
        /* must provide our own instrumentation support */
+       if (node->ps.wait_event_usage)
+               pgstat_restore_wait_event_usage(previous_wait_event_usage);
        if (node->ps.instrument)
                InstrStopNode(node->ps.instrument, 
node->hashtable->reportTuples);
 
diff --git a/src/backend/utils/activity/wait_event.c 
b/src/backend/utils/activity/wait_event.c
index eb01bc3d88c..3d5a10b44c4 100644
--- a/src/backend/utils/activity/wait_event.c
+++ b/src/backend/utils/activity/wait_event.c
@@ -27,6 +27,7 @@
 #include "storage/shmem.h"
 #include "storage/subsystems.h"
 #include "storage/spin.h"
+#include "utils/memutils.h"
 #include "utils/wait_event.h"
 
 
@@ -47,6 +48,7 @@ uint32           *my_wait_event_info = 
&local_my_wait_event_info;
 
 int                    pgstat_wait_event_usage_depth = 0;
 static WaitEventUsage *pgstat_wait_event_usage = NULL;
+static WaitEventUsage *pgstat_wait_event_node_usage = NULL;
 static uint32 pgstat_wait_event_usage_current = 0;
 static instr_time pgstat_wait_event_usage_start;
 
@@ -358,6 +360,28 @@ pgstat_reset_wait_event_storage(void)
        my_wait_event_info = &local_my_wait_event_info;
 }
 
+/*
+ * Initialize a wait event usage accumulator.
+ */
+void
+pgstat_init_wait_event_usage(WaitEventUsage *usage, MemoryContext memcontext)
+{
+       Assert(usage != NULL);
+       Assert(memcontext != NULL);
+
+       memset(usage, 0, sizeof(WaitEventUsage));
+
+       /*
+        * Wait events may end inside critical sections, for example while
+        * performing synchronous I/O.  Keep usage entries in a dedicated 
context
+        * where the memory manager permits that accounting path to grow.
+        */
+       usage->memcontext = AllocSetContextCreate(memcontext,
+                                                                               
          "Wait Event Usage",
+                                                                               
          ALLOCSET_SMALL_SIZES);
+       MemoryContextAllowInCriticalSection(usage->memcontext, true);
+}
+
 /*
  * Start collecting exact wait event timings in this backend.
  *
@@ -375,9 +399,9 @@ pgstat_begin_wait_event_usage(WaitEventUsage *usage, 
MemoryContext memcontext)
 
        if (pgstat_wait_event_usage_depth++ == 0)
        {
-               memset(usage, 0, sizeof(WaitEventUsage));
-               usage->memcontext = memcontext;
+               pgstat_init_wait_event_usage(usage, memcontext);
                pgstat_wait_event_usage = usage;
+               pgstat_wait_event_node_usage = NULL;
                pgstat_wait_event_usage_current = 0;
                INSTR_TIME_SET_ZERO(pgstat_wait_event_usage_start);
        }
@@ -398,11 +422,27 @@ pgstat_end_wait_event_usage(WaitEventUsage *usage)
                        pgstat_count_wait_event_end();
 
                pgstat_wait_event_usage = NULL;
+               pgstat_wait_event_node_usage = NULL;
                pgstat_wait_event_usage_current = 0;
                INSTR_TIME_SET_ZERO(pgstat_wait_event_usage_start);
        }
 }
 
+WaitEventUsage *
+pgstat_enter_wait_event_usage(WaitEventUsage *usage)
+{
+       WaitEventUsage *previous = pgstat_wait_event_node_usage;
+
+       pgstat_wait_event_node_usage = usage;
+       return previous;
+}
+
+void
+pgstat_restore_wait_event_usage(WaitEventUsage *usage)
+{
+       pgstat_wait_event_node_usage = usage;
+}
+
 /*
  * Record the beginning of a wait event for exact EXPLAIN-style accounting.
  */
@@ -444,6 +484,11 @@ pgstat_count_wait_event_end(void)
                                          pgstat_wait_event_usage_current,
                                          1,
                                          &elapsed);
+       if (pgstat_wait_event_node_usage != NULL)
+               WaitEventUsageAdd(pgstat_wait_event_node_usage,
+                                                 
pgstat_wait_event_usage_current,
+                                                 1,
+                                                 &elapsed);
 
        pgstat_wait_event_usage_current = 0;
        INSTR_TIME_SET_ZERO(pgstat_wait_event_usage_start);
diff --git a/src/include/executor/instrument.h 
b/src/include/executor/instrument.h
index f093a52aae0..f46ee03d2c1 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -65,6 +65,7 @@ typedef enum InstrumentOption
        INSTRUMENT_ROWS = 1 << 2,       /* needs row count */
        INSTRUMENT_WAL = 1 << 3,        /* needs WAL usage */
        INSTRUMENT_IO = 1 << 4,         /* needs IO usage */
+       INSTRUMENT_WAITS = 1 << 5,      /* needs wait event usage */
        INSTRUMENT_ALL = PG_INT32_MAX
 } InstrumentOption;
 
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 781c8c79132..56f9dcbef88 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1212,6 +1212,7 @@ typedef struct PlanState
 
        NodeInstrumentation *instrument;        /* Optional runtime stats for 
this
                                                                                
 * node */
+       WaitEventUsage *wait_event_usage; /* Optional wait event usage */
        WorkerNodeInstrumentation *worker_instrument;   /* per-worker
                                                                                
                         * instrumentation */
 
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index 63992137ee7..64411288403 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -36,12 +36,16 @@ static inline void pgstat_report_wait_start(uint32 
wait_event_info);
 static inline void pgstat_report_wait_end(void);
 extern void pgstat_set_wait_event_storage(uint32 *wait_event_info);
 extern void pgstat_reset_wait_event_storage(void);
+extern void pgstat_init_wait_event_usage(WaitEventUsage *usage,
+                                                                               
 MemoryContext memcontext);
 extern void pgstat_begin_wait_event_usage(WaitEventUsage *usage,
                                                                                
  MemoryContext memcontext);
 extern void pgstat_end_wait_event_usage(WaitEventUsage *usage);
 extern void pgstat_accumulate_wait_event_usage(WaitEventUsage *usage,
                                                                                
           const WaitEventUsageEntry *entries,
                                                                                
           int nentries);
+extern WaitEventUsage *pgstat_enter_wait_event_usage(WaitEventUsage *usage);
+extern void pgstat_restore_wait_event_usage(WaitEventUsage *usage);
 extern void pgstat_count_wait_event_start(uint32 wait_event_info);
 extern void pgstat_count_wait_event_end(void);
 
diff --git a/src/test/regress/expected/explain.out 
b/src/test/regress/expected/explain.out
index e3847e222be..c086fa48d80 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -101,12 +101,14 @@ select explain_filter('explain (buffers, format text) 
select * from int8_tbl i8'
 
 -- WAITS option
 select explain_filter('explain (analyze, waits, costs off, summary off, timing 
off, buffers off) select pg_sleep(0.01)');
-            explain_filter             
----------------------------------------
+             explain_filter              
+-----------------------------------------
  Result (actual rows=N.N loops=N)
+   Wait Events:
+     Timeout:PgSleep calls=N time=N.N ms
  Wait Events:
    Timeout:PgSleep calls=N time=N.N ms
-(3 rows)
+(5 rows)
 
 select explain_filter_to_json('explain (analyze, waits, costs off, summary 
off, timing off, buffers off, format json) select pg_sleep(0.01)') #> '{0,Wait 
Events,0}';
                                      ?column?                                  
   
@@ -114,6 +116,12 @@ select explain_filter_to_json('explain (analyze, waits, 
costs off, summary off,
  {"Time": 0.0, "Calls": 0, "Wait Event": "PgSleep", "Wait Event Type": 
"Timeout"}
 (1 row)
 
+select explain_filter_to_json('explain (analyze, waits, costs off, summary 
off, timing off, buffers off, format json) select pg_sleep(0.01)') #> 
'{0,Plan,Wait Events,0}';
+                                     ?column?                                  
   
+----------------------------------------------------------------------------------
+ {"Time": 0.0, "Calls": 0, "Wait Event": "PgSleep", "Wait Event Type": 
"Timeout"}
+(1 row)
+
 begin;
 create function pg_temp.parallel_pg_sleep(float8) returns void
   language internal volatile parallel safe as 'pg_sleep';
@@ -130,6 +138,44 @@ select jsonb_path_query_first(
  {"Time": 0.0, "Calls": 0, "Wait Event": "PgSleep", "Wait Event Type": 
"Timeout"}
 (1 row)
 
+select jsonb_path_query_first(
+  explain_filter_to_json('explain (analyze, waits, costs off, summary off, 
timing off, buffers off, format json)
+                         select pg_temp.parallel_pg_sleep(0.01)
+                         from tenk1 where unique1 = 1') #> '{0,Plan}',
+  '$.**."Wait Events"[*] ? (@."Wait Event" == "PgSleep")'
+);
+                              jsonb_path_query_first                           
   
+----------------------------------------------------------------------------------
+ {"Time": 0.0, "Calls": 0, "Wait Event": "PgSleep", "Wait Event Type": 
"Timeout"}
+(1 row)
+
+rollback;
+begin;
+-- This test deliberately creates a Bitmap Index Scan runtime-key wait.
+-- The STABLE PL/pgSQL wrapper is test scaffolding: STABLE lets the
+-- expression be used as an index runtime key, while PL/pgSQL prevents SQL
+-- inlining from moving pg_sleep() out of the Bitmap Index Scan boundary.
+-- The planner GUCs below are likewise test-only scaffolding to make the
+-- node shape deterministic.
+create function pg_temp.explain_waits_sleep_int(int) returns int
+  language plpgsql stable as $$begin perform pg_sleep(0.01); return $1; end$$;
+create temp table explain_waits_bitmap (a int);
+insert into explain_waits_bitmap select g from generate_series(1, 10) g;
+create index explain_waits_bitmap_a_idx on explain_waits_bitmap(a);
+analyze explain_waits_bitmap;
+set local enable_seqscan = off;
+set local enable_indexscan = off;
+select jsonb_path_query_first(
+  explain_filter_to_json('explain (analyze, waits, costs off, summary off, 
timing off, buffers off, format json)
+                         select * from explain_waits_bitmap
+                         where a = pg_temp.explain_waits_sleep_int(1)') #> 
'{0,Plan}',
+  '$.** ? (@."Node Type" == "Bitmap Index Scan")."Wait Events"[*] ? (@."Wait 
Event" == "PgSleep")'
+);
+                              jsonb_path_query_first                           
   
+----------------------------------------------------------------------------------
+ {"Time": 0.0, "Calls": 0, "Wait Event": "PgSleep", "Wait Event Type": 
"Timeout"}
+(1 row)
+
 rollback;
 explain (waits) select 1;
 ERROR:  EXPLAIN option WAITS requires ANALYZE
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
index 8821250bcef..bd196f248db 100644
--- a/src/test/regress/sql/explain.sql
+++ b/src/test/regress/sql/explain.sql
@@ -71,6 +71,7 @@ select explain_filter('explain (buffers, format text) select 
* from int8_tbl i8'
 -- WAITS option
 select explain_filter('explain (analyze, waits, costs off, summary off, timing 
off, buffers off) select pg_sleep(0.01)');
 select explain_filter_to_json('explain (analyze, waits, costs off, summary 
off, timing off, buffers off, format json) select pg_sleep(0.01)') #> '{0,Wait 
Events,0}';
+select explain_filter_to_json('explain (analyze, waits, costs off, summary 
off, timing off, buffers off, format json) select pg_sleep(0.01)') #> 
'{0,Plan,Wait Events,0}';
 begin;
 create function pg_temp.parallel_pg_sleep(float8) returns void
   language internal volatile parallel safe as 'pg_sleep';
@@ -82,6 +83,34 @@ select jsonb_path_query_first(
                          from tenk1 where unique1 = 1') #> '{0,Wait Events}',
   '$[*] ? (@."Wait Event" == "PgSleep")'
 );
+select jsonb_path_query_first(
+  explain_filter_to_json('explain (analyze, waits, costs off, summary off, 
timing off, buffers off, format json)
+                         select pg_temp.parallel_pg_sleep(0.01)
+                         from tenk1 where unique1 = 1') #> '{0,Plan}',
+  '$.**."Wait Events"[*] ? (@."Wait Event" == "PgSleep")'
+);
+rollback;
+begin;
+-- This test deliberately creates a Bitmap Index Scan runtime-key wait.
+-- The STABLE PL/pgSQL wrapper is test scaffolding: STABLE lets the
+-- expression be used as an index runtime key, while PL/pgSQL prevents SQL
+-- inlining from moving pg_sleep() out of the Bitmap Index Scan boundary.
+-- The planner GUCs below are likewise test-only scaffolding to make the
+-- node shape deterministic.
+create function pg_temp.explain_waits_sleep_int(int) returns int
+  language plpgsql stable as $$begin perform pg_sleep(0.01); return $1; end$$;
+create temp table explain_waits_bitmap (a int);
+insert into explain_waits_bitmap select g from generate_series(1, 10) g;
+create index explain_waits_bitmap_a_idx on explain_waits_bitmap(a);
+analyze explain_waits_bitmap;
+set local enable_seqscan = off;
+set local enable_indexscan = off;
+select jsonb_path_query_first(
+  explain_filter_to_json('explain (analyze, waits, costs off, summary off, 
timing off, buffers off, format json)
+                         select * from explain_waits_bitmap
+                         where a = pg_temp.explain_waits_sleep_int(1)') #> 
'{0,Plan}',
+  '$.** ? (@."Node Type" == "Bitmap Index Scan")."Wait Events"[*] ? (@."Wait 
Event" == "PgSleep")'
+);
 rollback;
 explain (waits) select 1;
 
-- 
2.52.0



Reply via email to