From 67ba6949d928db3a1a8f41163f10b71c94571ade Mon Sep 17 00:00:00 2001
From: rafaelthca <rafaelthca@gmail.com>
Date: Tue, 31 Dec 2024 15:14:03 +0000
Subject: [PATCH v2] Proposal for progressive explains

This proposal introduces a feature to print execution plans of active
queries in an in-memory shared hash object so that other sessions can
visualize via new view pg_stat_progress_explain.

Plans are only printed if new GUC parameter progressive_explain is
enabled.

For regular queries or queries started with EXPLAIN (without ANALYZE)
the plan is printed only once at the start.

For instrumented runs (started via EXPLAIN ANALYZE or when auto_explain
flag log_analyze is enabled) the plan is printed on a fixed interval
controlled by new GUC parameter progressive_explain_interval including
all instrumentation stats computed so far (per node rows and execution
time).

New view:
- pg_stat_progress_explain
  - pid: PID of the process running the query
  - last_explain: timestamp when plan was last printed
  - explain_count: amount of times plan was printed
  - total_explain_time: accumulated time spent printing plans (in ms)
  - explain: the actual plan (limited read privileges)

New GUCs:
- progressive_explain: if progressive plans are printed for local
session.
  - type: bool
  - default: off
  - context: user
- progressive_explain_interval: interval between each explain print.
  - type: int
  - default: 1s
  - min: 10ms
  - context: user
- progressive_explain_sample_rate: fraction of rows processed by the
query until progressive_explain_interval is evaluated to print a
progressive plan
  - type: floating point
  - default: 0.1
  - range: 0.0 - 1.0
  - context: user
- progressive_explain_output_size: max output size of the plan
printed in the in-memory hash table.
  - type: int
  - default: 4096
  - min: 100
  - context: postmaster
- progressive_explain_format: format used to print the plans.
  - type: enum
  - default: text
  - context: user
- progressive_explain_settings: controls whether information about
modified configuration is added to the printed plan.
  - type: bool
  - default: off
  - context: user
- progressive_explain_verbose: controls whether verbose details are
added to the printed plan.
  - type: bool
  - default: off
  - context: user
---
 doc/src/sgml/config.sgml                      | 109 ++++
 doc/src/sgml/monitoring.sgml                  |  84 +++
 doc/src/sgml/perform.sgml                     | 100 ++++
 src/backend/catalog/system_views.sql          |   5 +
 src/backend/commands/explain.c                | 479 ++++++++++++++++--
 src/backend/executor/execMain.c               |  12 +
 src/backend/executor/execProcnode.c           |  35 +-
 src/backend/executor/instrument.c             |  20 +-
 src/backend/storage/ipc/ipci.c                |   7 +
 .../utils/activity/wait_event_names.txt       |   1 +
 src/backend/utils/misc/guc_tables.c           |  92 ++++
 src/include/catalog/pg_proc.dat               |  10 +
 src/include/commands/explain.h                |  22 +
 src/include/executor/instrument.h             |   1 +
 src/include/nodes/execnodes.h                 |   5 +
 src/include/storage/lwlocklist.h              |   1 +
 src/include/utils/guc.h                       |   7 +
 17 files changed, 938 insertions(+), 52 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index a84e60c09b..bde7631268 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8462,6 +8462,115 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-progressive-explain" xreflabel="progressive_explain">
+      <term><varname>progressive_explain</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>progressive_explain</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Determines whether progressive explains are enabled; see
+        <xref linkend="using-explain-progressive"/>. The default is
+        <literal>off</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-progressive-explain-verbose" xreflabel="progressive_explain_verbose">
+      <term><varname>progressive_explain_verbose</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>progressive_explain_verbose</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Controls whether verbose details are added to progressive explains.
+        Equivalent to the VERBOSE option of EXPLAIN. The default is
+        <literal>off</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-progressive-explain-settings" xreflabel="progressive_explain_settings">
+      <term><varname>progressive_explain_settings</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>progressive_explain_settings</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Controls whether information about modified configuration is added to
+        progressive explains. Equivalent to the SETTINGS option of EXPLAIN.
+        The default is <literal>off</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-progressive-explain-interval" xreflabel="progressive_explain_interval">
+      <term><varname>progressive_explain_interval</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>progressive_explain_interval</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Sets the interval (in milliseconds) between each instrumented
+        progressive explain. The default is <literal>1s</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-progressive-explain-format" xreflabel="progressive_explain_format">
+      <term><varname>progressive_explain_format</varname> (<type>enum</type>)
+      <indexterm>
+       <primary><varname>progressive_explain_format</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Selects the EXPLAIN output format to be used with progressive
+        explains. Equivalent to the FORMAT option of EXPLAIN. The default
+        is <literal>text</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-progressive-explain-output-size" xreflabel="progressive_explain_output_size">
+      <term><varname>progressive_explain_output_size</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>progressive_explain_output_size</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+       Specifies the amount of memory reserved to store the text of the
+       progressive explain for each client backend or parallel worker, for the
+       <structname>pg_stat_progress_explain</structname>.<structfield>explain</structfield>
+       field. If this value is specified without units, it is taken as bytes.
+       The default value is <literal>4096 bytes</literal>.
+       This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-progressive-explain-sample-rate" xreflabel="progressive_explain_sample_rate">
+      <term><varname>progressive_explain_sample_rate</varname> (<type>floating point</type>)
+      <indexterm>
+       <primary><varname>progressive_explain_sample_rate</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Fraction of rows processed by the query until
+        <xref linkend="guc-progressive-explain-interval"/> is evaluated
+        to print a progressive explain plan. The default value is
+        <literal>0.01</literal>, resulting in 1 check every 100 processed
+        rows.
+       </para>
+      </listitem>
+     </varlistentry>
+
      </variablelist>
     </sect2>
 
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 840d7f8161..d2beb91893 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -6688,6 +6688,90 @@ FROM pg_stat_get_backend_idset() AS backendid;
 
  </sect2>
 
+<sect2 id="explain-progress-reporting">
+  <title>EXPLAIN Progress Reporting</title>
+
+  <indexterm>
+   <primary>pg_stat_progress_explain</primary>
+  </indexterm>
+
+  <para>
+   Whenever a client backend or parallel worker is running a query with
+   <xref linkend="guc-progressive-explain"/> enabled, the
+   <structname>pg_stat_progress_explain</structname> view  will contain a
+   corresponding row with query plan details; see
+   <xref linkend="using-explain-progressive"/>. The table below describe the
+   information that will be reported.
+  </para>
+
+  <table id="pg-stat-progress-explain-view" xreflabel="pg_stat_progress_explain">
+   <title><structname>pg_stat_progress_explain</structname> View</title>
+   <tgroup cols="1">
+    <thead>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       Column Type
+      </para>
+      <para>
+       Description
+      </para></entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>pid</structfield> <type>integer</type>
+      </para>
+      <para>
+       Process ID of a client backend or parallel worker.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>last_explain</structfield> <type>timestamp with time zone</type>
+      </para>
+      <para>
+       Timestamp when plan was last printed.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>explain_count</structfield> <type>integer</type>
+      </para>
+      <para>
+       Amount of times plan was printed.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>total_explain_time</structfield> <type>floating point</type>
+      </para>
+      <para>
+       Accumulated time spent printing plans (in ms).
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>explain</structfield> <type>text</type>
+      </para>
+      <para>
+       The progressive explain text. By default the explain text is
+       truncated at 4096 bytes; this value can be changed via the
+       parameter <xref linkend="guc-progressive-explain-output-size"/>.
+      </para></entry>
+     </row>
+
+    </tbody>
+   </tgroup>
+  </table>
+
+ </sect2>
+
  </sect1>
 
  <sect1 id="dynamic-trace">
diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml
index cd12b9ce48..dd2d21edb3 100644
--- a/doc/src/sgml/perform.sgml
+++ b/doc/src/sgml/perform.sgml
@@ -1091,6 +1091,106 @@ EXPLAIN ANALYZE SELECT * FROM tenk1 WHERE unique1 &lt; 100 AND unique2 &gt; 9000
    </para>
   </sect2>
 
+  <sect2 id="using-explain-progressive">
+   <title>Progressive <command>EXPLAIN</command></title>
+
+   <para>
+    The query plan created by the planner for any given active query, or
+    detailed plan with row counts and accumulated run time when
+    <command>EXPLAIN ANALYZE</command> is used, can be visualized by any
+    session via <xref linkend="pg-stat-progress-explain-view"/> view when
+    <xref linkend="guc-progressive-explain"/> is enabled in the client
+    backend or parallel worker executing query. Settings
+    <xref linkend="guc-progressive-explain-format"/>,
+    <xref linkend="guc-progressive-explain-verbose"/> and
+    <xref linkend="guc-progressive-explain-settings"/> can be adjusted
+    to customize the printed plan, containing a length limit defined by
+    <xref linkend="guc-progressive-explain-output-size"/>.
+   </para>
+
+   <para>
+    For queries executed without <command>EXPLAIN ANALYZE</command> the
+    plan is printed only once at the beginning of query execution:
+   </para>
+
+   <para>
+<screen>
+SET progressive_explain = on;
+SET
+
+SELECT *
+FROM test t1
+INNER JOIN test t2 ON (t1.c1 = t2.c1);
+</screen>
+   </para>
+   <para>
+<screen>
+SELECT pid, explain_count, explain FROM pg_stat_progress_explain;
+ pid | explain_count |                                    explain
+-----+---------------+--------------------------------------------------------------------------------
+ 159 |             1 | Hash Join  (cost=1159375.00..3912500.00 rows=30000000 width=74)
+     |               |   Hash Cond: (t1.c1 = t2.c1)
+     |               |   ->  Seq Scan on test t1  (cost=0.00..550000.00 rows=30000000 width=37)
+     |               |   ->  Hash  (cost=550000.00..550000.00 rows=30000000 width=37)
+     |               |         ->  Seq Scan on test t2  (cost=0.00..550000.00 rows=30000000 width=37)
+     |               |
+</screen>
+   </para>
+
+   <para>
+    When <command>EXPLAIN ANALYZE</command> is used the detailed plan is
+    printed progressively based on
+    <xref linkend="guc-progressive-explain-interval"/> and
+    <xref linkend="guc-progressive-explain-sample-rate"/> settings, including
+    per node accumulated row count and run time statistics computed so far. This
+    makes progressive explain a powerful ally when investigating bottlenecks in
+    expensive queries without having to wait for <command>EXPLAIN ANALYZE</command>
+    to finish.
+   </para>
+
+   <para>
+    Progressive explains include additional information per node to help analyzing
+    execution progress:
+
+    <itemizedlist>
+     <listitem>
+      <para>
+       current: the plan node currently being processed.
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       never executed: a plan node not processed yet.
+      </para>
+     </listitem>
+    </itemizedlist>
+   </para>
+   <para>
+<screen>
+SET progressive_explain = on;
+SET
+
+EXPLAIN ANALYZE SELECT *
+FROM test t1
+INNER JOIN test t2 ON (t1.c1 = t2.c1);
+</screen>
+   </para>
+   <para>
+<screen>
+SELECT pid, explain_count, explain FROM pg_stat_progress_explain;
+ pid | explain_count |                                                                   explain
+-----+---------------+----------------------------------------------------------------------------------------------------------------------------------------------
+ 159 |             7 | Hash Join  (cost=1159375.00..3912500.00 rows=30000000 width=74) (never executed)
+     |               |   Hash Cond: (t1.c1 = t2.c1)
+     |               |   ->  Seq Scan on test t1  (cost=0.00..550000.00 rows=30000000 width=37) (actual time=0.009..0.009 rows=1 loops=1)
+     |               |   ->  Hash  (cost=550000.00..550000.00 rows=30000000 width=37) (never executed)
+     |               |         ->  Seq Scan on test t2  (cost=0.00..550000.00 rows=30000000 width=37) (actual time=0.004..2165.201 rows=27925599 loops=1) (current)
+     |               |
+</screen>
+   </para>
+
+  </sect2>
+
  </sect1>
 
  <sect1 id="planner-stats">
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index da9a8fe99f..4021b1ee6b 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1325,6 +1325,11 @@ CREATE VIEW pg_stat_progress_copy AS
     FROM pg_stat_get_progress_info('COPY') AS S
         LEFT JOIN pg_database D ON S.datid = D.oid;
 
+CREATE VIEW pg_stat_progress_explain AS
+    SELECT
+            *
+    FROM pg_stat_progress_explain(true);
+
 CREATE VIEW pg_user_mappings AS
     SELECT
         U.oid       AS umid,
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 7c0fd63b2f..1f37ec755d 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -13,6 +13,8 @@
  */
 #include "postgres.h"
 
+#include <time.h>
+
 #include "access/xact.h"
 #include "catalog/pg_type.h"
 #include "commands/createas.h"
@@ -22,6 +24,8 @@
 #include "jit/jit.h"
 #include "libpq/pqformat.h"
 #include "libpq/protocol.h"
+#include "miscadmin.h"
+#include "funcapi.h"
 #include "nodes/extensible.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
@@ -40,6 +44,12 @@
 #include "utils/tuplesort.h"
 #include "utils/typcache.h"
 #include "utils/xml.h"
+#include "utils/backend_status.h"
+#include "storage/procarray.h"
+#include "executor/spi.h"
+#include "utils/guc.h"
+
+
 
 
 /* Hook for plugins to get control in ExplainOneQuery() */
@@ -48,6 +58,8 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL;
 /* Hook for plugins to get control in explain_get_index_name() */
 explain_get_index_name_hook_type explain_get_index_name_hook = NULL;
 
+/* Shared hash to store progressive explains */
+static HTAB *explainArray = NULL;
 
 /* Instrumentation data for SERIALIZE option */
 typedef struct SerializeMetrics
@@ -140,7 +152,7 @@ static void show_hashagg_info(AggState *aggstate, ExplainState *es);
 static void show_tidbitmap_info(BitmapHeapScanState *planstate,
 								ExplainState *es);
 static void show_instrumentation_count(const char *qlabel, int which,
-									   PlanState *planstate, ExplainState *es);
+									   Instrumentation *instr, ExplainState *es);
 static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es);
 static const char *explain_get_index_name(Oid indexId);
 static bool peek_buffer_usage(ExplainState *es, const BufferUsage *usage);
@@ -180,6 +192,8 @@ static void ExplainJSONLineEnding(ExplainState *es);
 static void ExplainYAMLLineStarting(ExplainState *es);
 static void escape_yaml(StringInfo buf, const char *str);
 static SerializeMetrics GetSerializationMetrics(DestReceiver *dest);
+void		ProgressiveExplainPrint(QueryDesc *queryDesc);
+static void ExplainTrackQueryReleaseFunc(void *);
 
 
 
@@ -385,6 +399,8 @@ NewExplainState(void)
 	es->costs = true;
 	/* Prepare output buffer. */
 	es->str = makeStringInfo();
+	/* An explain state is not progressive by default */
+	es->progressive = false;
 
 	return es;
 }
@@ -1497,6 +1513,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	const char *partialmode = NULL;
 	const char *operation = NULL;
 	const char *custom_name = NULL;
+	Instrumentation *local_instr = NULL;
 	ExplainWorkersState *save_workers_state = es->workers_state;
 	int			save_indent = es->indent;
 	bool		haschildren;
@@ -1960,24 +1977,56 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	 * instrumentation results the user didn't ask for.  But we do the
 	 * InstrEndLoop call anyway, if possible, to reduce the number of cases
 	 * auto_explain has to contend with.
+	 *
+	 * For regular explains instrumentation clean up is called directly in the
+	 * main instrumentation objects. progressive explains need to clone
+	 * instrumentation object and forcibly end the loop in nodes that may be
+	 * running.
 	 */
 	if (planstate->instrument)
-		InstrEndLoop(planstate->instrument);
+	{
+		/* Progressive explain. Clone instrumentation */
+		if (es->progressive)
+		{
+			local_instr = palloc0(sizeof(*local_instr));
+			*local_instr = *planstate->instrument;
+			/* Force end loop even if node is in progress */
+			InstrEndLoopForce(local_instr);
+		}
+		/* Use main instrumentation */
+		else
+		{
+			local_instr = planstate->instrument;
+			InstrEndLoop(local_instr);
+		}
+	}
 
 	if (es->analyze &&
-		planstate->instrument && planstate->instrument->nloops > 0)
+		local_instr && local_instr->nloops > 0)
 	{
-		double		nloops = planstate->instrument->nloops;
-		double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
-		double		total_ms = 1000.0 * planstate->instrument->total / nloops;
-		double		rows = planstate->instrument->ntuples / nloops;
+		double		nloops = local_instr->nloops;
+		double		startup_ms = 1000.0 * local_instr->startup / nloops;
+		double		total_ms = 1000.0 * local_instr->total / nloops;
+		double		rows = local_instr->ntuples / nloops;
 
 		if (es->format == EXPLAIN_FORMAT_TEXT)
 		{
 			if (es->timing)
+				/* Node in progress */
+				if (es->progressive && planstate == planstate->state->progressive_explain_current_node)
+					appendStringInfo(es->str,
+									 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f) (current)",
+									 startup_ms, total_ms, rows, nloops);
+				else
+					appendStringInfo(es->str,
+									 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
+									 startup_ms, total_ms, rows, nloops);
+			else
+				/* Node in progress */
+			if (es->progressive && planstate == planstate->state->progressive_explain_current_node)
 				appendStringInfo(es->str,
-								 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
-								 startup_ms, total_ms, rows, nloops);
+								 " (actual rows=%.0f loops=%.0f) (current)",
+								 rows, nloops);
 			else
 				appendStringInfo(es->str,
 								 " (actual rows=%.0f loops=%.0f)",
@@ -1992,6 +2041,11 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				ExplainPropertyFloat("Actual Total Time", "ms", total_ms,
 									 3, es);
 			}
+			/* Progressive explain. Add current node flag is applicable */
+			if (es->progressive && planstate == planstate->state->progressive_explain_current_node)
+			{
+				ExplainPropertyBool("Current", true, es);
+			}
 			ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
 			ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 		}
@@ -2100,29 +2154,29 @@ ExplainNode(PlanState *planstate, List *ancestors,
 						   "Index Cond", planstate, ancestors, es);
 			if (((IndexScan *) plan)->indexqualorig)
 				show_instrumentation_count("Rows Removed by Index Recheck", 2,
-										   planstate, es);
+										   local_instr, es);
 			show_scan_qual(((IndexScan *) plan)->indexorderbyorig,
 						   "Order By", planstate, ancestors, es);
 			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
-										   planstate, es);
+										   local_instr, es);
 			break;
 		case T_IndexOnlyScan:
 			show_scan_qual(((IndexOnlyScan *) plan)->indexqual,
 						   "Index Cond", planstate, ancestors, es);
 			if (((IndexOnlyScan *) plan)->recheckqual)
 				show_instrumentation_count("Rows Removed by Index Recheck", 2,
-										   planstate, es);
+										   local_instr, es);
 			show_scan_qual(((IndexOnlyScan *) plan)->indexorderby,
 						   "Order By", planstate, ancestors, es);
 			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
-										   planstate, es);
+										   local_instr, es);
 			if (es->analyze)
 				ExplainPropertyFloat("Heap Fetches", NULL,
-									 planstate->instrument->ntuples2, 0, es);
+									 local_instr->ntuples2, 0, es);
 			break;
 		case T_BitmapIndexScan:
 			show_scan_qual(((BitmapIndexScan *) plan)->indexqualorig,
@@ -2133,11 +2187,11 @@ ExplainNode(PlanState *planstate, List *ancestors,
 						   "Recheck Cond", planstate, ancestors, es);
 			if (((BitmapHeapScan *) plan)->bitmapqualorig)
 				show_instrumentation_count("Rows Removed by Index Recheck", 2,
-										   planstate, es);
+										   local_instr, es);
 			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
-										   planstate, es);
+										   local_instr, es);
 			show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
 			break;
 		case T_SampleScan:
@@ -2154,7 +2208,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
-										   planstate, es);
+										   local_instr, es);
 			if (IsA(plan, CteScan))
 				show_ctescan_info(castNode(CteScanState, planstate), es);
 			break;
@@ -2165,7 +2219,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
 				if (plan->qual)
 					show_instrumentation_count("Rows Removed by Filter", 1,
-											   planstate, es);
+											   local_instr, es);
 				ExplainPropertyInteger("Workers Planned", NULL,
 									   gather->num_workers, es);
 
@@ -2189,7 +2243,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
 				if (plan->qual)
 					show_instrumentation_count("Rows Removed by Filter", 1,
-											   planstate, es);
+											   local_instr, es);
 				ExplainPropertyInteger("Workers Planned", NULL,
 									   gm->num_workers, es);
 
@@ -2223,7 +2277,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
-										   planstate, es);
+										   local_instr, es);
 			break;
 		case T_TableFuncScan:
 			if (es->verbose)
@@ -2237,7 +2291,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
-										   planstate, es);
+										   local_instr, es);
 			show_table_func_scan_info(castNode(TableFuncScanState,
 											   planstate), es);
 			break;
@@ -2255,7 +2309,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
 				if (plan->qual)
 					show_instrumentation_count("Rows Removed by Filter", 1,
-											   planstate, es);
+											   local_instr, es);
 			}
 			break;
 		case T_TidRangeScan:
@@ -2272,14 +2326,14 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
 				if (plan->qual)
 					show_instrumentation_count("Rows Removed by Filter", 1,
-											   planstate, es);
+											   local_instr, es);
 			}
 			break;
 		case T_ForeignScan:
 			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
-										   planstate, es);
+										   local_instr, es);
 			show_foreignscan_info((ForeignScanState *) planstate, es);
 			break;
 		case T_CustomScan:
@@ -2289,7 +2343,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
 				if (plan->qual)
 					show_instrumentation_count("Rows Removed by Filter", 1,
-											   planstate, es);
+											   local_instr, es);
 				if (css->methods->ExplainCustomScan)
 					css->methods->ExplainCustomScan(css, ancestors, es);
 			}
@@ -2299,11 +2353,11 @@ ExplainNode(PlanState *planstate, List *ancestors,
 							"Join Filter", planstate, ancestors, es);
 			if (((NestLoop *) plan)->join.joinqual)
 				show_instrumentation_count("Rows Removed by Join Filter", 1,
-										   planstate, es);
+										   local_instr, es);
 			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 2,
-										   planstate, es);
+										   local_instr, es);
 			break;
 		case T_MergeJoin:
 			show_upper_qual(((MergeJoin *) plan)->mergeclauses,
@@ -2312,11 +2366,11 @@ ExplainNode(PlanState *planstate, List *ancestors,
 							"Join Filter", planstate, ancestors, es);
 			if (((MergeJoin *) plan)->join.joinqual)
 				show_instrumentation_count("Rows Removed by Join Filter", 1,
-										   planstate, es);
+										   local_instr, es);
 			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 2,
-										   planstate, es);
+										   local_instr, es);
 			break;
 		case T_HashJoin:
 			show_upper_qual(((HashJoin *) plan)->hashclauses,
@@ -2325,11 +2379,11 @@ ExplainNode(PlanState *planstate, List *ancestors,
 							"Join Filter", planstate, ancestors, es);
 			if (((HashJoin *) plan)->join.joinqual)
 				show_instrumentation_count("Rows Removed by Join Filter", 1,
-										   planstate, es);
+										   local_instr, es);
 			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 2,
-										   planstate, es);
+										   local_instr, es);
 			break;
 		case T_Agg:
 			show_agg_keys(castNode(AggState, planstate), ancestors, es);
@@ -2337,13 +2391,13 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			show_hashagg_info((AggState *) planstate, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
-										   planstate, es);
+										   local_instr, es);
 			break;
 		case T_WindowAgg:
 			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
-										   planstate, es);
+										   local_instr, es);
 			show_upper_qual(((WindowAgg *) plan)->runConditionOrig,
 							"Run Condition", planstate, ancestors, es);
 			show_windowagg_info(castNode(WindowAggState, planstate), es);
@@ -2353,7 +2407,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
-										   planstate, es);
+										   local_instr, es);
 			break;
 		case T_Sort:
 			show_sort_keys(castNode(SortState, planstate), ancestors, es);
@@ -2375,7 +2429,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
-										   planstate, es);
+										   local_instr, es);
 			break;
 		case T_ModifyTable:
 			show_modifytable_info(castNode(ModifyTableState, planstate), ancestors,
@@ -2420,10 +2474,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	}
 
 	/* Show buffer/WAL usage */
-	if (es->buffers && planstate->instrument)
-		show_buffer_usage(es, &planstate->instrument->bufusage);
-	if (es->wal && planstate->instrument)
-		show_wal_usage(es, &planstate->instrument->walusage);
+	if (es->buffers && local_instr)
+		show_buffer_usage(es, &local_instr->bufusage);
+	if (es->wal && local_instr)
+		show_wal_usage(es, &local_instr->walusage);
 
 	/* Prepare per-worker buffer/WAL usage */
 	if (es->workers_state && (es->buffers || es->wal) && es->verbose)
@@ -2562,6 +2616,12 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	ExplainCloseGroup("Plan",
 					  relationship ? NULL : "Plan",
 					  true, es);
+
+	/* Progressive explain. Free cloned instrumentation object */
+	if (es->progressive && local_instr)
+	{
+		pfree(local_instr);
+	}
 }
 
 /*
@@ -3940,19 +4000,19 @@ show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
  */
 static void
 show_instrumentation_count(const char *qlabel, int which,
-						   PlanState *planstate, ExplainState *es)
+						   Instrumentation *instr, ExplainState *es)
 {
 	double		nfiltered;
 	double		nloops;
 
-	if (!es->analyze || !planstate->instrument)
+	if (!es->analyze || !instr)
 		return;
 
 	if (which == 2)
-		nfiltered = planstate->instrument->nfiltered2;
+		nfiltered = instr->nfiltered2;
 	else
-		nfiltered = planstate->instrument->nfiltered1;
-	nloops = planstate->instrument->nloops;
+		nfiltered = instr->nfiltered1;
+	nloops = instr->nloops;
 
 	/* In text mode, suppress zero counts; they're not interesting enough */
 	if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT)
@@ -4618,7 +4678,7 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
 		{
 			show_upper_qual((List *) node->onConflictWhere, "Conflict Filter",
 							&mtstate->ps, ancestors, es);
-			show_instrumentation_count("Rows Removed by Conflict Filter", 1, &mtstate->ps, es);
+			show_instrumentation_count("Rows Removed by Conflict Filter", 1, (&mtstate->ps)->instrument, es);
 		}
 
 		/* EXPLAIN ANALYZE display of actual outcome for each tuple proposed */
@@ -4627,11 +4687,24 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
 			double		total;
 			double		insert_path;
 			double		other_path;
+			Instrumentation *local_instr;
 
-			InstrEndLoop(outerPlanState(mtstate)->instrument);
+			/* Progressive explain. Clone instrumentation */
+			if (es->progressive)
+			{
+				local_instr = palloc0(sizeof(*local_instr));
+				*local_instr = *outerPlanState(mtstate)->instrument;
+				/* Force end loop even if node is in progress */
+				InstrEndLoopForce(local_instr);
+			}
+			else
+			{
+				local_instr = outerPlanState(mtstate)->instrument;
+				InstrEndLoop(local_instr);
+			}
 
 			/* count the number of source rows */
-			total = outerPlanState(mtstate)->instrument->ntuples;
+			total = local_instr->ntuples;
 			other_path = mtstate->ps.instrument->ntuples2;
 			insert_path = total - other_path;
 
@@ -4639,6 +4712,10 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
 								 insert_path, 0, es);
 			ExplainPropertyFloat("Conflicting Tuples", NULL,
 								 other_path, 0, es);
+
+			/* Progressive explain. Free cloned instrumentation object */
+			if (es->progressive)
+				pfree(local_instr);
 		}
 	}
 	else if (node->operation == CMD_MERGE)
@@ -4651,11 +4728,24 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
 			double		update_path;
 			double		delete_path;
 			double		skipped_path;
+			Instrumentation *local_instr;
 
-			InstrEndLoop(outerPlanState(mtstate)->instrument);
+			/* Progressive explain. Clone instrumentation */
+			if (es->progressive)
+			{
+				local_instr = palloc0(sizeof(*local_instr));
+				*local_instr = *outerPlanState(mtstate)->instrument;
+				/* Force end loop even if node is in progress */
+				InstrEndLoopForce(local_instr);
+			}
+			else
+			{
+				local_instr = outerPlanState(mtstate)->instrument;
+				InstrEndLoop(local_instr);
+			}
 
 			/* count the number of source rows */
-			total = outerPlanState(mtstate)->instrument->ntuples;
+			total = local_instr->ntuples;
 			insert_path = mtstate->mt_merge_inserted;
 			update_path = mtstate->mt_merge_updated;
 			delete_path = mtstate->mt_merge_deleted;
@@ -4686,6 +4776,10 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
 				ExplainPropertyFloat("Tuples Deleted", NULL, delete_path, 0, es);
 				ExplainPropertyFloat("Tuples Skipped", NULL, skipped_path, 0, es);
 			}
+
+			/* Progressive explain. Free cloned instrumentation object */
+			if (es->progressive)
+				pfree(local_instr);
 		}
 	}
 
@@ -5910,3 +6004,290 @@ GetSerializationMetrics(DestReceiver *dest)
 
 	return empty;
 }
+
+
+/*
+ * ProgressiveExplainPrint
+ * Prints progressive explain in memory.
+ *
+ * This operation needs to be done in a dedicated memory context
+ * as plans for instrumented runs will be printed multiple times
+ * and instrumentation objects need to be cloned so that stats
+ * can get updated without interfering with original objects.
+ */
+void
+ProgressiveExplainPrint(QueryDesc *queryDesc)
+{
+	MemoryContext tmpCxt;
+	MemoryContext oldCxt;
+	instr_time	starttime;
+
+	INSTR_TIME_SET_CURRENT(starttime);
+
+	/* Dedicated memory context for the current plan being printed */
+	tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
+								   "Progressive Explain Temporary Context",
+								   ALLOCSET_DEFAULT_SIZES);
+	oldCxt = MemoryContextSwitchTo(tmpCxt);
+
+	/* Produce a plan only if descriptor is being tracked */
+	if (queryDesc &&
+		queryDesc->planstate)
+	{
+		QueryDesc  *currentQueryDesc = queryDesc;
+
+		explainHashKey key;
+		explainHashEntry *entry;
+		ExplainState *es;
+
+		es = NewExplainState();
+
+		/*
+		 * Mark ExplainState as progressive so that the plan printing function
+		 * adjusts logic accordingly.
+		 */
+		es->progressive = true;
+		/* Instrumentation details come from the active QueryDesc */
+		es->analyze = queryDesc->instrument_options;
+		es->buffers = (queryDesc->instrument_options &
+					   INSTRUMENT_BUFFERS) != 0;
+		es->wal = (queryDesc->instrument_options &
+				   INSTRUMENT_WAL) != 0;
+		es->timing = (queryDesc->instrument_options &
+					  INSTRUMENT_TIMER) != 0;
+		es->summary = (es->analyze);
+
+		/* Additional options come from progressive GUC settings */
+		es->format = progressive_explain_format;
+		es->verbose = progressive_explain_verbose;
+		es->settings = progressive_explain_settings;
+
+		ExplainBeginOutput(es);
+		ExplainPrintPlan(es, currentQueryDesc);
+		ExplainEndOutput(es);
+
+		key.pid = MyProcPid;
+
+		/* Exclusive access is needed to update the hash */
+		LWLockAcquire(ExplainHashLock, LW_EXCLUSIVE);
+		entry = (explainHashEntry *) hash_search(explainArray, &key, HASH_FIND, NULL);
+
+
+		if (entry)
+		{
+			entry->explain_count++;
+			strncpy(entry->plan, es->str->data, progressive_explain_output_size);
+			entry->explain_duration += elapsed_time(&starttime);
+			entry->last_explain = GetCurrentTimestamp();
+		}
+
+		LWLockRelease(ExplainHashLock);
+
+		/*
+		 * Free local explain state before exiting as this function may be
+		 * called multiple times in the same memory context.
+		 */
+		pfree(es->str);
+		pfree(es);
+	}
+
+	/* Clean up temp context */
+	MemoryContextSwitchTo(oldCxt);
+	MemoryContextDelete(tmpCxt);
+}
+
+/*
+ * ProgressiveExplainBegin
+ * Enables progressive explain progress tracking for a query in the local backend.
+ *
+ * An progressive explain is printed at the beginning of every query if progressive_explain
+ * is enabled.
+
+ * For instrumented runs started with EXPLAIN ANALYZE the progressive plan is printed
+ * via ExecProcNodeInstrExplain at a regular interval controlled by progressive_explain_interval.
+ *
+ * Plans are stored in shared memory object explainArray that needs to be properly
+ * cleared when the query finishes or gets canceled. This is achieved with the help
+ * of a memory context callback configured in the same memory context where the query
+ * descriptor was created. This strategy allows cleaning explainArray even when the
+ * query doesn't finish gracefully.
+ */
+void
+ProgressiveExplainBegin(QueryDesc *queryDesc)
+{
+	explainHashKey key;
+	explainHashEntry *entry;
+	bool		found;
+
+	/* Configure memory context release callback */
+	MemoryContextCallback *queryDescReleaseCallback;
+
+	queryDescReleaseCallback = (MemoryContextCallback *)
+		palloc0(sizeof(MemoryContextCallback));
+	queryDescReleaseCallback->func = ExplainTrackQueryReleaseFunc;
+	queryDescReleaseCallback->arg = NULL;
+	MemoryContextRegisterResetCallback(CurrentMemoryContext,
+									   queryDescReleaseCallback);
+
+	INSTR_TIME_SET_CURRENT(queryDesc->estate->progressive_explain_interval_time);
+
+	key.pid = MyProcPid;
+
+	/* Exclusive access is needed to update the hash */
+	LWLockAcquire(ExplainHashLock, LW_EXCLUSIVE);
+
+	/* Find or create an entry with desired hash code */
+	entry = (explainHashEntry *) hash_search(explainArray, &key, HASH_ENTER, &found);
+	entry->pid = MyProcPid;
+	entry->explain_count = 0;
+	entry->explain_duration = 0.0f;
+	strcpy(entry->plan, "");
+	entry->last_explain = 0;
+
+	LWLockRelease(ExplainHashLock);
+
+	/* Printing progressive plan for the first time */
+	ProgressiveExplainPrint(queryDesc);
+}
+
+/*
+ * ProgressiveExplainUpdate
+ * Updates progressive explain for instrumented runs.
+ */
+void
+ProgressiveExplainUpdate(PlanState *node)
+{
+	/*
+	 * Update explain plan only if has passed since previous print.
+	 */
+	if (elapsed_time(&node->state->progressive_explain_interval_time) * 1000.0 > progressive_explain_interval)
+	{
+		node->state->progressive_explain_current_node = node;
+		ProgressiveExplainPrint(node->state->query_desc);
+		node->state->progressive_explain_current_node = NULL;
+		INSTR_TIME_SET_CURRENT(node->state->progressive_explain_interval_time);
+	}
+}
+
+/*
+ * ExplainTrackQueryReleaseFunc
+ * Memory context release callback function to remove
+ * plan from explain hash.
+ */
+static void
+ExplainTrackQueryReleaseFunc(void *)
+{
+	/* Remove row from hash */
+	explainHashKey key;
+
+	key.pid = MyProcPid;
+	LWLockAcquire(ExplainHashLock, LW_EXCLUSIVE);
+	hash_search(explainArray, &key, HASH_REMOVE, NULL);
+	LWLockRelease(ExplainHashLock);
+}
+
+/*
+ * ExplainHashShmemSize
+ * Compute shared memory space needed for explain hash.
+ */
+Size
+ExplainHashShmemSize(void)
+{
+	Size		size = 0;
+	long		max_table_size;
+
+	max_table_size = add_size(MaxBackends, max_parallel_workers);
+
+	size = add_size(size, hash_estimate_size(max_table_size, add_size(sizeof(explainHashEntry), progressive_explain_output_size)));
+
+	return size;
+}
+
+/*
+ * InitExplainHash
+ * Initialize hash used to store data of progressive explains.
+ */
+void
+InitExplainHash(void)
+{
+	HASHCTL		info;
+
+	info.keysize = sizeof(explainHashKey);
+	info.entrysize = sizeof(explainHashEntry) + progressive_explain_output_size;
+
+	explainArray = ShmemInitHash("explain hash",
+								 50, 50,
+								 &info,
+								 HASH_ELEM | HASH_BLOBS);
+}
+
+/*
+ * pg_stat_progress_explain
+ * Return the progress of progressive explains.
+ */
+Datum
+pg_stat_progress_explain(PG_FUNCTION_ARGS)
+{
+	char		duration[1024];
+#define EXPLAIN_ACTIVITY_COLS	5
+	HASH_SEQ_STATUS hash_seq;
+	explainHashEntry *entry;
+
+	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+
+	InitMaterializedSRF(fcinfo, 0);
+
+	LWLockAcquire(ExplainHashLock, LW_SHARED);
+
+	hash_seq_init(&hash_seq, explainArray);
+	while ((entry = hash_seq_search(&hash_seq)) != NULL)
+	{
+		Datum		values[EXPLAIN_ACTIVITY_COLS] = {0};
+		bool		nulls[EXPLAIN_ACTIVITY_COLS] = {0};
+
+		values[0] = entry->pid;
+		values[1] = TimestampTzGetDatum(entry->last_explain);
+		values[2] = entry->explain_count;
+		sprintf(duration, "%.3f", 1000.0 * entry->explain_duration);
+		values[3] = CStringGetTextDatum(duration);
+
+		if (superuser())
+			values[4] = CStringGetTextDatum(entry->plan);
+		else
+		{
+			int			num_backends = pgstat_fetch_stat_numbackends();
+			int			curr_backend;
+			bool		found;
+
+			/* 1-based index */
+			for (curr_backend = 1; curr_backend <= num_backends; curr_backend++)
+			{
+				LocalPgBackendStatus *local_beentry;
+				PgBackendStatus *beentry;
+
+				/* Get the next one in the list */
+				local_beentry = pgstat_get_local_beentry_by_index(curr_backend);
+				beentry = &local_beentry->backendStatus;
+
+				if (beentry->st_procpid == entry->pid)
+				{
+					found = true;
+					if (beentry->st_userid == GetUserId())
+						values[4] = CStringGetTextDatum(entry->plan);
+					else
+						values[4] = CStringGetTextDatum("<insufficient privilege>");
+					break;
+				}
+			}
+
+			if (!found)
+				values[3] = CStringGetTextDatum("<insufficient privilege>");
+		}
+
+		tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
+
+	}
+	LWLockRelease(ExplainHashLock);
+
+	return (Datum) 0;
+}
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 5ca856fd27..e34c8f03fe 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -61,6 +61,7 @@
 #include "utils/partcache.h"
 #include "utils/rls.h"
 #include "utils/snapmgr.h"
+#include "commands/explain.h"
 
 
 /* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */
@@ -174,6 +175,11 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
 	estate = CreateExecutorState();
 	queryDesc->estate = estate;
 
+	/*
+	 * Adding back reference to QueryDesc
+	 */
+	estate->query_desc = queryDesc;
+
 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 
 	/*
@@ -260,6 +266,12 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
 	InitPlan(queryDesc, eflags);
 
 	MemoryContextSwitchTo(oldcontext);
+
+	/*
+	 * Start progressive explain if enabled.
+	 */
+	if (progressive_explain)
+		ProgressiveExplainBegin(queryDesc);
 }
 
 /* ----------------------------------------------------------------
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index 34f28dfece..16f1407633 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -118,9 +118,13 @@
 #include "executor/nodeWorktablescan.h"
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
+#include "commands/explain.h"
+#include "utils/guc.h"
+#include "common/pg_prng.h"
 
 static TupleTableSlot *ExecProcNodeFirst(PlanState *node);
 static TupleTableSlot *ExecProcNodeInstr(PlanState *node);
+static TupleTableSlot *ExecProcNodeInstrExplain(PlanState *node);
 static bool ExecShutdownNode_walker(PlanState *node, void *context);
 
 
@@ -461,8 +465,12 @@ ExecProcNodeFirst(PlanState *node)
 	 * does instrumentation.  Otherwise we can dispense with all wrappers and
 	 * have ExecProcNode() directly call the relevant function from now on.
 	 */
+
 	if (node->instrument)
-		node->ExecProcNode = ExecProcNodeInstr;
+		if (progressive_explain)
+			node->ExecProcNode = ExecProcNodeInstrExplain;
+		else
+			node->ExecProcNode = ExecProcNodeInstr;
 	else
 		node->ExecProcNode = node->ExecProcNodeReal;
 
@@ -489,6 +497,31 @@ ExecProcNodeInstr(PlanState *node)
 	return result;
 }
 
+/*
+ * ExecProcNode wrapper that performs instrumentation calls and prints
+ * progressive explains.  By keeping this a separate function, we add
+ * overhead only when progressive explain is enabled
+ */
+static TupleTableSlot *
+ExecProcNodeInstrExplain(PlanState *node)
+{
+	TupleTableSlot *result;
+
+	InstrStartNode(node->instrument);
+
+	/*
+	 * Update progressive explain based on sampling.
+	 */
+	if (pg_prng_double(&pg_global_prng_state) < progressive_explain_sample_rate)
+		ProgressiveExplainUpdate(node);
+
+	result = node->ExecProcNodeReal(node);
+
+	InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0);
+
+	return result;
+}
+
 
 /* ----------------------------------------------------------------
  *		MultiExecProcNode
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index 268ae8a945..244c3591a1 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -25,6 +25,8 @@ static WalUsage save_pgWalUsage;
 static void BufferUsageAdd(BufferUsage *dst, const BufferUsage *add);
 static void WalUsageAdd(WalUsage *dst, WalUsage *add);
 
+static void InstrEndLoopInternal(Instrumentation *instr, bool force);
+
 
 /* Allocate new instrumentation structure(s) */
 Instrumentation *
@@ -137,7 +139,7 @@ InstrUpdateTupleCount(Instrumentation *instr, double nTuples)
 
 /* Finish a run cycle for a plan node */
 void
-InstrEndLoop(Instrumentation *instr)
+InstrEndLoopInternal(Instrumentation *instr, bool force)
 {
 	double		totaltime;
 
@@ -145,7 +147,7 @@ InstrEndLoop(Instrumentation *instr)
 	if (!instr->running)
 		return;
 
-	if (!INSTR_TIME_IS_ZERO(instr->starttime))
+	if (!INSTR_TIME_IS_ZERO(instr->starttime) && !force)
 		elog(ERROR, "InstrEndLoop called on running node");
 
 	/* Accumulate per-cycle statistics into totals */
@@ -164,6 +166,20 @@ InstrEndLoop(Instrumentation *instr)
 	instr->tuplecount = 0;
 }
 
+/* Safely finish a run cycle for a plan node */
+void
+InstrEndLoop(Instrumentation *instr)
+{
+	InstrEndLoopInternal(instr, false);
+}
+
+/* Forcibly finish a run cycle for a plan node */
+void
+InstrEndLoopForce(Instrumentation *instr)
+{
+	InstrEndLoopInternal(instr, true);
+}
+
 /* aggregate instrumentation information */
 void
 InstrAggNode(Instrumentation *dst, Instrumentation *add)
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 7783ba854f..25e70c63d5 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -50,6 +50,7 @@
 #include "storage/sinvaladt.h"
 #include "utils/guc.h"
 #include "utils/injection_point.h"
+#include "commands/explain.h"
 
 /* GUCs */
 int			shared_memory_type = DEFAULT_SHARED_MEMORY_TYPE;
@@ -148,6 +149,7 @@ CalculateShmemSize(int *num_semaphores)
 	size = add_size(size, WaitEventCustomShmemSize());
 	size = add_size(size, InjectionPointShmemSize());
 	size = add_size(size, SlotSyncShmemSize());
+	size = add_size(size, ExplainHashShmemSize());
 
 	/* include additional requested shmem from preload libraries */
 	size = add_size(size, total_addin_request);
@@ -300,6 +302,11 @@ CreateOrAttachShmemStructs(void)
 	 */
 	PredicateLockShmemInit();
 
+	/*
+	 * Set up instrumented explain hash table
+	 */
+	InitExplainHash();
+
 	/*
 	 * Set up process table
 	 */
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index 16144c2b72..da08e7f3c9 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -345,6 +345,7 @@ WALSummarizer	"Waiting to read or update WAL summarization state."
 DSMRegistry	"Waiting to read or update the dynamic shared memory registry."
 InjectionPoint	"Waiting to read or update information related to injection points."
 SerialControl	"Waiting to read or update shared <filename>pg_serial</filename> state."
+ExplainHash	"Waiting to access backend explain shared hash table."
 
 #
 # END OF PREDEFINED LWLOCKS (DO NOT CHANGE THIS LINE)
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 8a67f01200..b276955603 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -46,6 +46,7 @@
 #include "commands/vacuum.h"
 #include "common/file_utils.h"
 #include "common/scram-common.h"
+#include "commands/explain.h"
 #include "jit/jit.h"
 #include "libpq/auth.h"
 #include "libpq/libpq.h"
@@ -474,6 +475,14 @@ static const struct config_enum_entry wal_compression_options[] = {
 	{NULL, 0, false}
 };
 
+static const struct config_enum_entry explain_format_options[] = {
+	{"text", EXPLAIN_FORMAT_TEXT, false},
+	{"xml", EXPLAIN_FORMAT_XML, false},
+	{"json", EXPLAIN_FORMAT_JSON, false},
+	{"yaml", EXPLAIN_FORMAT_YAML, false},
+	{NULL, 0, false}
+};
+
 /*
  * Options for enum values stored in other modules
  */
@@ -528,6 +537,13 @@ int			log_parameter_max_length_on_error = 0;
 int			log_temp_files = -1;
 double		log_statement_sample_rate = 1.0;
 double		log_xact_sample_rate = 0;
+bool		progressive_explain = false;
+bool		progressive_explain_verbose = false;
+bool		progressive_explain_settings = false;
+int			progressive_explain_interval = 1000;
+int			progressive_explain_format = EXPLAIN_FORMAT_TEXT;
+int			progressive_explain_output_size = 4096;
+double		progressive_explain_sample_rate = 0.01;
 char	   *backtrace_functions;
 
 int			temp_file_limit = -1;
@@ -2076,6 +2092,39 @@ struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"progressive_explain", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables progressive explains."),
+			gettext_noop("Explain output is visible via pg_stat_progress_explain."),
+			GUC_EXPLAIN
+		},
+		&progressive_explain,
+		false,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"progressive_explain_verbose", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Controls whether verbose details are added to progressive explains."),
+			gettext_noop("Equivalent to the VERBOSE option of EXPLAIN."),
+			GUC_EXPLAIN
+		},
+		&progressive_explain_verbose,
+		false,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"progressive_explain_settings", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Controls whether information about modified configuration is added to progressive explains."),
+			gettext_noop("Equivalent to the SETTINGS option of EXPLAIN."),
+			GUC_EXPLAIN
+		},
+		&progressive_explain_settings,
+		false,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
@@ -3714,6 +3763,28 @@ struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"progressive_explain_interval", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Sets the interval between instrumented progressive "
+						 "explains."),
+			NULL,
+			GUC_UNIT_MS
+		},
+		&progressive_explain_interval,
+		1000, 10, INT_MAX,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"progressive_explain_output_size", PGC_POSTMASTER, QUERY_TUNING_METHOD,
+			gettext_noop("Sets the size reserved for pg_stat_progress_explain.explain, in bytes."),
+			NULL
+		},
+		&progressive_explain_output_size,
+		4096, 100, 1048576,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
@@ -3995,6 +4066,17 @@ struct config_real ConfigureNamesReal[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"progressive_explain_sample_rate", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Fraction of rows processed by the query until progressive_explain_interval is evaluated "
+						 "to print a progressive plan."),
+			gettext_noop("Use a value between 0.0 (never) and 1.0 (always).")
+		},
+		&progressive_explain_sample_rate,
+		0.01, 0.0, 1.0,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0.0, 0.0, 0.0, NULL, NULL, NULL
@@ -5207,6 +5289,16 @@ struct config_enum ConfigureNamesEnum[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"progressive_explain_format", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Selects the EXPLAIN output format to be used with progressive explains."),
+			gettext_noop("Equivalent to the FORMAT option of EXPLAIN.")
+		},
+		&progressive_explain_format,
+		EXPLAIN_FORMAT_TEXT, explain_format_options,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index cbbe8acd38..dbc1185cfc 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -12402,4 +12402,14 @@
   proargtypes => 'int2',
   prosrc => 'gist_stratnum_identity' },
 
+{ oid => '8770',
+  descr => 'statistics: information about progress of backends running statements',
+  proname => 'pg_stat_progress_explain', prorows => '100', proisstrict => 'f',
+  proretset => 't', provolatile => 's', proparallel => 'r',
+  prorettype => 'record', proargtypes => 'bool',
+  proallargtypes => '{bool,int4,timestamptz,int4,text,text}',
+  proargmodes => '{i,o,o,o,o,o}',
+  proargnames => '{mode,pid,last_explain,explain_count,total_explain_time,explain}',
+  prosrc => 'pg_stat_progress_explain' },
+
 ]
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index aa5872bc15..127777b174 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -16,6 +16,7 @@
 #include "executor/executor.h"
 #include "lib/stringinfo.h"
 #include "parser/parse_node.h"
+#include "datatype/timestamp.h"
 
 typedef enum ExplainSerializeOption
 {
@@ -67,12 +68,28 @@ typedef struct ExplainState
 	List	   *deparse_cxt;	/* context list for deparsing expressions */
 	Bitmapset  *printed_subplans;	/* ids of SubPlans we've printed */
 	bool		hide_workers;	/* set if we find an invisible Gather */
+	bool		progressive;	/* set if tracking a progressive explain */
 	int			rtable_size;	/* length of rtable excluding the RTE_GROUP
 								 * entry */
 	/* state related to the current plan node */
 	ExplainWorkersState *workers_state; /* needed if parallel plan */
 } ExplainState;
 
+typedef struct explainHashKey
+{
+	int			pid;			/* PID */
+}			explainHashKey;
+
+typedef struct explainHashEntry
+{
+	explainHashKey key;			/* hash key of entry - MUST BE FIRST */
+	int			pid;
+	TimestampTz last_explain;
+	int			explain_count;
+	float		explain_duration;
+	char		plan[];
+}			explainHashEntry;
+
 /* Hook for plugins to get control in ExplainOneQuery() */
 typedef void (*ExplainOneQuery_hook_type) (Query *query,
 										   int cursorOptions,
@@ -144,4 +161,9 @@ extern void ExplainCloseGroup(const char *objtype, const char *labelname,
 
 extern DestReceiver *CreateExplainSerializeDestReceiver(ExplainState *es);
 
+extern void ProgressiveExplainBegin(QueryDesc *queryDesc);
+extern void ProgressiveExplainUpdate(PlanState *node);
+extern Size ExplainHashShmemSize(void);
+extern void InitExplainHash(void);
+
 #endif							/* EXPLAIN_H */
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index bfd7b6d844..2963a70e41 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -108,6 +108,7 @@ extern void InstrStartNode(Instrumentation *instr);
 extern void InstrStopNode(Instrumentation *instr, double nTuples);
 extern void InstrUpdateTupleCount(Instrumentation *instr, double nTuples);
 extern void InstrEndLoop(Instrumentation *instr);
+extern void InstrEndLoopForce(Instrumentation *instr);
 extern void InstrAggNode(Instrumentation *dst, Instrumentation *add);
 extern void InstrStartParallelQuery(void);
 extern void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 182a6956bb..c57b4c28d2 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -56,6 +56,7 @@ struct ExprState;
 struct ExprContext;
 struct RangeTblEntry;			/* avoid including parsenodes.h here */
 struct ExprEvalStep;			/* avoid including execExpr.h everywhere */
+struct QueryDesc;				/* avoid including execdesc.h here */
 struct CopyMultiInsertBuffer;
 struct LogicalTapeSet;
 
@@ -735,6 +736,10 @@ typedef struct EState
 	 */
 	List	   *es_insert_pending_result_relations;
 	List	   *es_insert_pending_modifytables;
+
+	struct QueryDesc *query_desc;
+	instr_time	progressive_explain_interval_time;
+	struct PlanState *progressive_explain_current_node;
 } EState;
 
 
diff --git a/src/include/storage/lwlocklist.h b/src/include/storage/lwlocklist.h
index 6a2f64c54f..b6ab577370 100644
--- a/src/include/storage/lwlocklist.h
+++ b/src/include/storage/lwlocklist.h
@@ -83,3 +83,4 @@ PG_LWLOCK(49, WALSummarizer)
 PG_LWLOCK(50, DSMRegistry)
 PG_LWLOCK(51, InjectionPoint)
 PG_LWLOCK(52, SerialControl)
+PG_LWLOCK(53, ExplainHash)
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 840b0fe57f..faa5118c58 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -274,6 +274,13 @@ extern PGDLLIMPORT int log_min_duration_statement;
 extern PGDLLIMPORT int log_temp_files;
 extern PGDLLIMPORT double log_statement_sample_rate;
 extern PGDLLIMPORT double log_xact_sample_rate;
+extern PGDLLIMPORT bool progressive_explain;
+extern PGDLLIMPORT bool progressive_explain_verbose;
+extern PGDLLIMPORT bool progressive_explain_settings;
+extern PGDLLIMPORT int progressive_explain_interval;
+extern PGDLLIMPORT int progressive_explain_output_size;
+extern PGDLLIMPORT int progressive_explain_format;
+extern PGDLLIMPORT double progressive_explain_sample_rate;
 extern PGDLLIMPORT char *backtrace_functions;
 
 extern PGDLLIMPORT int temp_file_limit;
-- 
2.39.5 (Apple Git-154)

