On Thu, Oct 24, 2019 at 6:48 PM Andres Freund <[email protected]> wrote:
> Unfortunately I think the fix isn't all that trivial, due to the way we
> output the per-worker information at the end of ExplainNode(), by just
> dumping things into a string. It seems to me that a step in the right
> direction would be for ExplainNode() to create
> planstate->worker_instrument StringInfos, which can be handed to
> routines like show_sort_info(), which would print the per-node
> information into that, rather than directly dumping into
> es->output. Most of the current "Show worker detail" would be done
> earlier in ExplainNode(), at the place where we current display the
> "actual rows" bit.
>
> ISTM that should include removing the duplication fo the the contents of
> show_sort_info(), and probably also for the Gather, GatherMerge blocks
> (I've apparently skipped adding the JIT information to the latter, not
> sure if we ought to fix that in the stable branches).
>
> Any chance you want to take a stab at that?
It took me a while, but I did take a stab at it (thanks for your
off-list help). Attached is my patch that changes the structured
formats to merge sort worker output in with costs/timing/buffers
worker output. I have not touched any other worker output yet, since
it's not under a Workers group as far as I can tell (so it does not
exhibit the problem I originally reported). I can try to make further
changes here if the approach is deemed sound. I also have not touched
text output; above you had proposed
> Sort Method: external merge Disk: 4920kB
> Buffers: shared hit=682 read=10188, temp read=1415 written=2101
> Worker 0: actual time=130.058..130.324 rows=1324 loops=1
> Sort Method: external merge Disk: 5880kB
> Buffers: shared hit=337 read=3489, temp read=505 written=739
> Worker 1: actual time=130.273..130.512 rows=1297 loops=1
> Buffers: shared hit=345 read=3507, temp read=505 written=744
> Sort Method: external merge Disk: 5920kB
which makes sense to me, but I'd like to confirm this is the approach
we want before I add it to the patch.
This is my first C in close to a decade (and I was never much of a C
programmer to begin with), so please be gentle.
As Andres suggested off-list, I also changed the worker output to
order fields that also occur in the parent node in the same way as the
parent node.
I've also added a test for the patch, and because this is really an
EXPLAIN issue rather than a query feature issue, I added a
src/test/regress/sql/explain.sql for the test. I added a couple of
utility functions for munging json-formatted EXPLAIN plans into
something we can repeatably verify in regression tests (the functions
use json rather than jsonb to preserve field order). I have not added
this for YAML or XML (even though they should behave the same way),
since I'm not familiar with the the functions to manipulate those data
types in a similar way (if they exist). My hunch is due to the
similarity of structured formats, just testing JSON is enough, but I
can expand/adjust tests as necessary.
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 62fb3434a3..8f898fc20c 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -100,7 +100,7 @@ static void show_sortorder_options(StringInfo buf, Node *sortexpr,
Oid sortOperator, Oid collation, bool nullsFirst);
static void show_tablesample(TableSampleClause *tsc, PlanState *planstate,
List *ancestors, ExplainState *es);
-static void show_sort_info(SortState *sortstate, ExplainState *es);
+static void show_sort_info(SortState *sortstate, StringInfo* worker_strs, ExplainState *es);
static void show_hash_info(HashState *hashstate, ExplainState *es);
static void show_tidbitmap_info(BitmapHeapScanState *planstate,
ExplainState *es);
@@ -290,6 +290,7 @@ NewExplainState(void)
es->costs = true;
/* Prepare output buffer. */
es->str = makeStringInfo();
+ es->root_str = es->str;
return es;
}
@@ -1073,9 +1074,20 @@ ExplainNode(PlanState *planstate, List *ancestors,
const char *partialmode = NULL;
const char *operation = NULL;
const char *custom_name = NULL;
+ StringInfo *worker_strs = NULL;
int save_indent = es->indent;
bool haschildren;
+ /* Prepare per-worker output */
+ if (es->analyze && planstate->worker_instrument) {
+ int num_workers = planstate->worker_instrument->num_workers;
+ int n;
+ worker_strs = (StringInfo *) palloc0(num_workers * sizeof(StringInfo));
+ for (n = 0; n < num_workers; n++) {
+ worker_strs[n] = makeStringInfo();
+ }
+ }
+
switch (nodeTag(plan))
{
case T_Result:
@@ -1340,6 +1352,64 @@ ExplainNode(PlanState *planstate, List *ancestors,
ExplainPropertyBool("Parallel Aware", plan->parallel_aware, es);
}
+ /* Prepare worker general execution details */
+ if (es->analyze && es->verbose && planstate->worker_instrument)
+ {
+ WorkerInstrumentation *w = planstate->worker_instrument;
+ int n;
+
+ for (n = 0; n < w->num_workers; ++n)
+ {
+ Instrumentation *instrument = &w->instrument[n];
+ double nloops = instrument->nloops;
+ double startup_ms;
+ double total_ms;
+ double rows;
+
+ if (nloops <= 0)
+ continue;
+ startup_ms = 1000.0 * instrument->startup / nloops;
+ total_ms = 1000.0 * instrument->total / nloops;
+ rows = instrument->ntuples / nloops;
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ appendStringInfoSpaces(es->str, es->indent * 2);
+ appendStringInfo(es->str, "Worker %d: ", n);
+ if (es->timing)
+ appendStringInfo(es->str,
+ "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
+ startup_ms, total_ms, rows, nloops);
+ else
+ appendStringInfo(es->str,
+ "actual rows=%.0f loops=%.0f\n",
+ rows, nloops);
+ es->indent++;
+ if (es->buffers)
+ show_buffer_usage(es, &instrument->bufusage);
+ es->indent--;
+ }
+ else
+ {
+ ExplainOpenWorker(worker_strs[n], es);
+
+ if (es->timing)
+ {
+ ExplainPropertyFloat("Actual Startup Time", "ms",
+ startup_ms, 3, es);
+ ExplainPropertyFloat("Actual Total Time", "ms",
+ total_ms, 3, es);
+ }
+ ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+ ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+ }
+ }
+
+ if (es->format != EXPLAIN_FORMAT_TEXT) {
+ ExplainCloseWorker(es);
+ }
+ }
+
switch (nodeTag(plan))
{
case T_SeqScan:
@@ -1839,7 +1909,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
break;
case T_Sort:
show_sort_keys(castNode(SortState, planstate), ancestors, es);
- show_sort_info(castNode(SortState, planstate), es);
+ show_sort_info(castNode(SortState, planstate), worker_strs, es);
break;
case T_MergeAppend:
show_merge_append_keys(castNode(MergeAppendState, planstate),
@@ -1868,73 +1938,30 @@ ExplainNode(PlanState *planstate, List *ancestors,
if (es->buffers && planstate->instrument)
show_buffer_usage(es, &planstate->instrument->bufusage);
- /* Show worker detail */
- if (es->analyze && es->verbose && planstate->worker_instrument)
+ /* Prepare worker buffer usage */
+ if (es->buffers && es->analyze && es->verbose && planstate->worker_instrument
+ && es->format != EXPLAIN_FORMAT_TEXT)
{
WorkerInstrumentation *w = planstate->worker_instrument;
- bool opened_group = false;
int n;
for (n = 0; n < w->num_workers; ++n)
{
Instrumentation *instrument = &w->instrument[n];
double nloops = instrument->nloops;
- double startup_ms;
- double total_ms;
- double rows;
if (nloops <= 0)
continue;
- startup_ms = 1000.0 * instrument->startup / nloops;
- total_ms = 1000.0 * instrument->total / nloops;
- rows = instrument->ntuples / nloops;
- if (es->format == EXPLAIN_FORMAT_TEXT)
- {
- appendStringInfoSpaces(es->str, es->indent * 2);
- appendStringInfo(es->str, "Worker %d: ", n);
- if (es->timing)
- appendStringInfo(es->str,
- "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
- startup_ms, total_ms, rows, nloops);
- else
- appendStringInfo(es->str,
- "actual rows=%.0f loops=%.0f\n",
- rows, nloops);
- es->indent++;
- if (es->buffers)
- show_buffer_usage(es, &instrument->bufusage);
- es->indent--;
- }
- else
- {
- if (!opened_group)
- {
- ExplainOpenGroup("Workers", "Workers", false, es);
- opened_group = true;
- }
- ExplainOpenGroup("Worker", NULL, true, es);
- ExplainPropertyInteger("Worker Number", NULL, n, es);
-
- if (es->timing)
- {
- ExplainPropertyFloat("Actual Startup Time", "ms",
- startup_ms, 3, es);
- ExplainPropertyFloat("Actual Total Time", "ms",
- total_ms, 3, es);
- }
- ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
- ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
-
- if (es->buffers)
- show_buffer_usage(es, &instrument->bufusage);
-
- ExplainCloseGroup("Worker", NULL, true, es);
- }
+ ExplainOpenWorker(worker_strs[n], es);
+ show_buffer_usage(es, &instrument->bufusage);
}
+ ExplainCloseWorker(es);
+ }
- if (opened_group)
- ExplainCloseGroup("Workers", "Workers", false, es);
+ /* Show worker detail */
+ if (planstate->worker_instrument) {
+ ExplainFlushWorkers(worker_strs, planstate->worker_instrument->num_workers, es);
}
/* Get ready to display the child plans */
@@ -2534,7 +2561,7 @@ show_tablesample(TableSampleClause *tsc, PlanState *planstate,
* If it's EXPLAIN ANALYZE, show tuplesort stats for a sort node
*/
static void
-show_sort_info(SortState *sortstate, ExplainState *es)
+show_sort_info(SortState *sortstate, StringInfo *worker_strs, ExplainState *es)
{
if (!es->analyze)
return;
@@ -2569,7 +2596,6 @@ show_sort_info(SortState *sortstate, ExplainState *es)
if (sortstate->shared_info != NULL)
{
int n;
- bool opened_group = false;
for (n = 0; n < sortstate->shared_info->num_workers; n++)
{
@@ -2594,21 +2620,15 @@ show_sort_info(SortState *sortstate, ExplainState *es)
}
else
{
- if (!opened_group)
- {
- ExplainOpenGroup("Workers", "Workers", false, es);
- opened_group = true;
- }
- ExplainOpenGroup("Worker", NULL, true, es);
- ExplainPropertyInteger("Worker Number", NULL, n, es);
+ ExplainOpenWorker(worker_strs[n], es);
ExplainPropertyText("Sort Method", sortMethod, es);
ExplainPropertyInteger("Sort Space Used", "kB", spaceUsed, es);
ExplainPropertyText("Sort Space Type", spaceType, es);
- ExplainCloseGroup("Worker", NULL, true, es);
}
}
- if (opened_group)
- ExplainCloseGroup("Workers", "Workers", false, es);
+ if (es->format != EXPLAIN_FORMAT_TEXT) {
+ ExplainCloseWorker(es);
+ }
}
}
@@ -3716,6 +3736,62 @@ ExplainCloseGroup(const char *objtype, const char *labelname,
}
}
+/*
+ * Begin output for a specific worker. If we are currently producing output
+ * for another worker, close that worker automatically.
+ */
+void
+ExplainOpenWorker(StringInfo worker_str, ExplainState *es)
+{
+ /*
+ * We indent twice--once for the "Worker" group and once for the "Workers"
+ * group--but only need to adjust indentation once for each string of
+ * ExplainOpenWorker calls.
+ */
+ if (es->str == es->root_str) {
+ es->indent += 2;
+ }
+
+ es->print_workers = true;
+ es->str = worker_str;
+}
+
+/*
+ * Explicitly end output for the current worker and resume output
+ * for the root of the node.
+ */
+void
+ExplainCloseWorker(ExplainState *es)
+{
+ es->indent -= 2;
+ es->str = es->root_str;
+}
+
+/*
+ * Flush output registered so far to the output buffer and deallocate
+ * mechanism for writing to workers.
+ */
+void
+ExplainFlushWorkers(StringInfo* worker_strs, int num_workers, ExplainState *es)
+{
+ int i;
+
+ if (!es->print_workers) {
+ return;
+ }
+
+ ExplainOpenGroup("Workers", "Workers", false, es);
+ for (i = 0; i < num_workers; i++) {
+ ExplainOpenGroup("Worker", NULL, true, es);
+ ExplainPropertyInteger("Worker Number", NULL, i, es);
+ appendStringInfoString(es->str, worker_strs[i]->data);
+ ExplainCloseGroup("Worker", NULL, true, es);
+ }
+ ExplainCloseGroup("Workers", "Workers", false, es);
+ // do we have any other cleanup to do?
+ es->print_workers = false;
+}
+
/*
* Emit a "dummy" group that never has any members.
*
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index 8639891c16..e16f6872d8 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -27,7 +27,7 @@ typedef enum ExplainFormat
typedef struct ExplainState
{
- StringInfo str; /* output buffer */
+ StringInfo str; /* current output buffer */
/* options */
bool verbose; /* be verbose */
bool analyze; /* print actual times */
@@ -40,6 +40,8 @@ typedef struct ExplainState
/* state for output formatting --- not reset for each new plan tree */
int indent; /* current indentation level */
List *grouping_stack; /* format-specific grouping state */
+ bool print_workers; /* whether current node has worker metadata */
+ StringInfo root_str; /* root output buffer */
/* state related to the current plan tree (filled by ExplainPrintPlan) */
PlannedStmt *pstmt; /* top of plan */
List *rtable; /* range table */
@@ -110,4 +112,9 @@ extern void ExplainOpenGroup(const char *objtype, const char *labelname,
extern void ExplainCloseGroup(const char *objtype, const char *labelname,
bool labeled, ExplainState *es);
+extern void ExplainOpenWorker(StringInfo worker_str, ExplainState *es);
+extern void ExplainCloseWorker(ExplainState *es);
+extern void ExplainFlushWorkers(StringInfo *worker_strs, int num_workers, ExplainState *es);
+
+
#endif /* EXPLAIN_H */
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
new file mode 100644
index 0000000000..db9914db7b
--- /dev/null
+++ b/src/test/regress/expected/explain.out
@@ -0,0 +1,75 @@
+--
+-- EXPLAIN
+--
+-- helper functions for examining json explain output
+-- return a json explain of given query
+CREATE OR REPLACE FUNCTION json_explain(query text, variadic opts text[])
+ RETURNS json LANGUAGE PLPGSQL AS $$
+DECLARE
+ plan json;
+BEGIN
+ EXECUTE 'EXPLAIN (' || array_to_string(array_append(opts, 'FORMAT JSON) '), ', ') || query INTO STRICT plan;
+ RETURN plan;
+END;
+$$;
+/*
+ * Takes a json object and processes its keys and values. For every
+ * matching key whose value is a number, set it to zero to facilitate
+ * comparisons for values that can vary across executions. If not a number,
+ * set the value to -1 to indicate we matched an unexpected key. Return
+ * non-matching keys and values unmodified.
+ */
+CREATE OR REPLACE FUNCTION json_normalize_numeric_keys_matching(obj json, regexp text)
+ RETURNS json LANGUAGE SQL AS $$
+SELECT
+ coalesce(json_object_agg(
+ key,
+ CASE
+ WHEN key ~ regexp THEN
+ CASE json_typeof(value)
+ WHEN 'number' THEN 0::text::json
+ ELSE (-1)::text::json
+ END
+ ELSE value
+ END
+ ), '{}')
+FROM
+ json_each(obj)
+$$;
+-- explain only worker information, normalizing any values that could vary across executions
+CREATE OR REPLACE FUNCTION explain_workers(plan_node json)
+ RETURNS json LANGUAGE SQL AS $$
+SELECT
+ coalesce(json_object_agg(key, value), '{}')
+FROM (
+ SELECT
+ key,
+ CASE key
+ WHEN 'Workers' THEN
+ (SELECT json_agg(
+ json_normalize_numeric_keys_matching(worker, '(Blocks|Time)$|Sort Space Used')
+ )
+ FROM json_array_elements(value) AS workers(worker))
+ WHEN 'Plans' THEN
+ (SELECT json_agg(
+ explain_workers(child)
+ )
+ FROM json_array_elements(value) AS children(child))
+ END AS value
+ FROM
+ json_each(plan_node) AS entries(key, value)
+ WHERE
+ key IN ('Workers', 'Plans')
+) AS plan_fields(key, value);
+$$;
+-- test that per-worker sort and buffers output is combined correctly in EXPLAIN
+set force_parallel_mode = true;
+SELECT explain_workers(json_explain($$
+ SELECT * FROM (VALUES(1),(2)) x ORDER BY x;
+$$, 'verbose', 'analyze', 'buffers') -> 0 -> 'Plan');
+ explain_workers
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ { "Plans" : [{ "Workers" : [{ "Worker Number" : 0, "Actual Startup Time" : 0, "Actual Total Time" : 0, "Actual Rows" : 2, "Actual Loops" : 1, "Sort Method" : "quicksort", "Sort Space Used" : 0, "Sort Space Type" : "Memory", "Shared Hit Blocks" : 0, "Shared Read Blocks" : 0, "Shared Dirtied Blocks" : 0, "Shared Written Blocks" : 0, "Local Hit Blocks" : 0, "Local Read Blocks" : 0, "Local Dirtied Blocks" : 0, "Local Written Blocks" : 0, "Temp Read Blocks" : 0, "Temp Written Blocks" : 0 }], "Plans" : [{ "Workers" : [{ "Worker Number" : 0, "Actual Startup Time" : 0, "Actual Total Time" : 0, "Actual Rows" : 2, "Actual Loops" : 1, "Shared Hit Blocks" : 0, "Shared Read Blocks" : 0, "Shared Dirtied Blocks" : 0, "Shared Written Blocks" : 0, "Local Hit Blocks" : 0, "Local Read Blocks" : 0, "Local Dirtied Blocks" : 0, "Local Written Blocks" : 0, "Temp Read Blocks" : 0, "Temp Written Blocks" : 0 }] }] }] }
+(1 row)
+
+reset force_parallel_mode;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index d33a4e143d..d2b17dd3ea 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -112,7 +112,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr
# ----------
# Another group of parallel tests
# ----------
-test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort
+test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain
# event triggers cannot run concurrently with any test that runs DDL
test: event_trigger
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index f86f5c5682..acba391332 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -193,6 +193,7 @@ test: indexing
test: partition_aggregate
test: partition_info
test: tuplesort
+test: explain
test: event_trigger
test: fast_default
test: stats
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
new file mode 100644
index 0000000000..5fb6e7d2e1
--- /dev/null
+++ b/src/test/regress/sql/explain.sql
@@ -0,0 +1,77 @@
+--
+-- EXPLAIN
+--
+
+-- helper functions for examining json explain output
+
+-- return a json explain of given query
+CREATE OR REPLACE FUNCTION json_explain(query text, variadic opts text[])
+ RETURNS json LANGUAGE PLPGSQL AS $$
+DECLARE
+ plan json;
+BEGIN
+ EXECUTE 'EXPLAIN (' || array_to_string(array_append(opts, 'FORMAT JSON) '), ', ') || query INTO STRICT plan;
+ RETURN plan;
+END;
+$$;
+
+/*
+ * Takes a json object and processes its keys and values. For every
+ * matching key whose value is a number, set it to zero to facilitate
+ * comparisons for values that can vary across executions. If not a number,
+ * set the value to -1 to indicate we matched an unexpected key. Return
+ * non-matching keys and values unmodified.
+ */
+CREATE OR REPLACE FUNCTION json_normalize_numeric_keys_matching(obj json, regexp text)
+ RETURNS json LANGUAGE SQL AS $$
+SELECT
+ coalesce(json_object_agg(
+ key,
+ CASE
+ WHEN key ~ regexp THEN
+ CASE json_typeof(value)
+ WHEN 'number' THEN 0::text::json
+ ELSE (-1)::text::json
+ END
+ ELSE value
+ END
+ ), '{}')
+FROM
+ json_each(obj)
+$$;
+
+-- explain only worker information, normalizing any values that could vary across executions
+CREATE OR REPLACE FUNCTION explain_workers(plan_node json)
+ RETURNS json LANGUAGE SQL AS $$
+SELECT
+ coalesce(json_object_agg(key, value), '{}')
+FROM (
+ SELECT
+ key,
+ CASE key
+ WHEN 'Workers' THEN
+ (SELECT json_agg(
+ json_normalize_numeric_keys_matching(worker, '(Blocks|Time)$|Sort Space Used')
+ )
+ FROM json_array_elements(value) AS workers(worker))
+ WHEN 'Plans' THEN
+ (SELECT json_agg(
+ explain_workers(child)
+ )
+ FROM json_array_elements(value) AS children(child))
+ END AS value
+ FROM
+ json_each(plan_node) AS entries(key, value)
+ WHERE
+ key IN ('Workers', 'Plans')
+) AS plan_fields(key, value);
+$$;
+
+-- test that per-worker sort and buffers output is combined correctly in EXPLAIN
+set force_parallel_mode = true;
+
+SELECT explain_workers(json_explain($$
+ SELECT * FROM (VALUES(1),(2)) x ORDER BY x;
+$$, 'verbose', 'analyze', 'buffers') -> 0 -> 'Plan');
+
+reset force_parallel_mode;