Hello,

The other day I've rediscovered the exciting idea of using signals to dump
a backend's currently executed query plan, which, it turns out, was already
proposed by Pavel and Simon in these threads:

http://www.postgresql.org/message-id/flat/cafj8pra-duzkmdtu52ciugb0p7tvri_b8ltjmjfwcnr1lpt...@mail.gmail.com

http://www.postgresql.org/message-id/flat/CAFj8pRDEo24joEg4UFRDYeFADFTw-jw_=t=kPwOyDW=v=g1...@mail.gmail.com#

Unfortunately, the latest one is missing an actual patch (or the attachment
was scrubbed) and I'm really surprised that the idea didn't take off then.


While evaluating it myself I've decided to simply patch auto_explain module
which is now in contrib, so presumably should be available to a broad
audience.  Proof-of-concept patch against master is attached (a patch for
an older branch like 9.0 requires trivial manual effort to adapt).

While I can see the value of in-core support for this, realistically this
is unlikely to be included in 9.5, but an ad hoc patch could be useful long
before that happens.

What this patch does is simply dump the plan of the query being run by the
executor to the log when signaled with SIGUSR1.  The auto_explain module
must be loaded to the backend beforehand of course,
session_preload_libraries seems like the place to do that cluster-wide.

Probably using SIGUSR2 would be more appropriate, but I'm not sure if there
are other extensions out there that might be already using it for some
other reason (well, I do not know that for SIGUSR1 either).  Looking at the
current state of affairs in procsignal_sigusr1_handler() makes me believe
it should be pretty safe to catch the signal like I do.  Or is that not the
case?

The current_query_desc probably needs to be a stack-like structure in order
to keep track of the nested queries correctly, but it works in the simplest
cases.


What would be even more useful is including stats from the running query in
the explain output, if you're a willing to pay for a (hopefully small)
overhead.  Unfortunately, that doesn't work out of the box: if you enable
the auto_explain.log_analyze and friends in the .conf-file, you either get
all zero counts, or if you're really unlucky, an error from InstrEndLoop():

ERROR:  InstrEndLoop called on running node

The latest patch in this area I could found is this one:
http://www.postgresql.org/message-id/87wsn82lda....@oxford.xeocode.com

>From what I can see, there's no way around this problem short of hacking
InstrEndLoop...  Did anything change in this area since 2008 possibly?  I
would really love to have a way to make this work with existing un-patched
servers.

Cheers!
--
Alex
diff --git a/contrib/auto_explain/auto_explain.c b/contrib/auto_explain/auto_explain.c
index 2a184ed..a64b3fe 100644
--- a/contrib/auto_explain/auto_explain.c
+++ b/contrib/auto_explain/auto_explain.c
@@ -13,6 +13,7 @@
 #include "postgres.h"
 
 #include <limits.h>
+#include <signal.h>
 
 #include "commands/explain.h"
 #include "executor/instrument.h"
@@ -46,6 +47,9 @@ static ExecutorStart_hook_type prev_ExecutorStart = NULL;
 static ExecutorRun_hook_type prev_ExecutorRun = NULL;
 static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
+static pqsigfunc prev_sigusr1 = NULL;
+
+static QueryDesc *current_query_desc = NULL;
 
 #define auto_explain_enabled() \
 	(auto_explain_log_min_duration >= 0 && \
@@ -54,6 +58,8 @@ static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
 void		_PG_init(void);
 void		_PG_fini(void);
 
+static void sigusr1_handler(int signum);
+
 static void explain_ExecutorStart(QueryDesc *queryDesc, int eflags);
 static void explain_ExecutorRun(QueryDesc *queryDesc,
 					ScanDirection direction,
@@ -61,6 +67,7 @@ static void explain_ExecutorRun(QueryDesc *queryDesc,
 static void explain_ExecutorFinish(QueryDesc *queryDesc);
 static void explain_ExecutorEnd(QueryDesc *queryDesc);
 
+static void explain_output_plain(QueryDesc *queryDesc, double time_msec, int format);
 
 /*
  * Module load callback
@@ -170,6 +177,8 @@ _PG_init(void)
 	ExecutorFinish_hook = explain_ExecutorFinish;
 	prev_ExecutorEnd = ExecutorEnd_hook;
 	ExecutorEnd_hook = explain_ExecutorEnd;
+
+	prev_sigusr1 = pqsignal(SIGUSR1, sigusr1_handler);
 }
 
 /*
@@ -185,12 +194,34 @@ _PG_fini(void)
 	ExecutorEnd_hook = prev_ExecutorEnd;
 }
 
+static void
+sigusr1_handler(int signum)
+{
+	int			save_errno = errno;
+
+	if (current_query_desc != NULL)
+	{
+		explain_output_plain(current_query_desc, 0.0, EXPLAIN_FORMAT_TEXT);
+	}
+
+	if (prev_sigusr1 != NULL &&
+		prev_sigusr1 != SIG_IGN &&
+		prev_sigusr1 != SIG_DFL)
+	{
+		prev_sigusr1(signum);
+	}
+
+	errno = save_errno;
+}
+
 /*
  * ExecutorStart hook: start up logging if needed
  */
 static void
 explain_ExecutorStart(QueryDesc *queryDesc, int eflags)
 {
+	current_query_desc = queryDesc;
+
 	if (auto_explain_enabled())
 	{
 		/* Enable per-node instrumentation iff log_analyze is required. */
@@ -294,45 +325,7 @@ explain_ExecutorEnd(QueryDesc *queryDesc)
 		msec = queryDesc->totaltime->total * 1000.0;
 		if (msec >= auto_explain_log_min_duration)
 		{
-			ExplainState *es = NewExplainState();
-
-			es->analyze = (queryDesc->instrument_options && auto_explain_log_analyze);
-			es->verbose = auto_explain_log_verbose;
-			es->buffers = (es->analyze && auto_explain_log_buffers);
-			es->timing = (es->analyze && auto_explain_log_timing);
-			es->summary = es->analyze;
-			es->format = auto_explain_log_format;
-
-			ExplainBeginOutput(es);
-			ExplainQueryText(es, queryDesc);
-			ExplainPrintPlan(es, queryDesc);
-			if (es->analyze && auto_explain_log_triggers)
-				ExplainPrintTriggers(es, queryDesc);
-			ExplainEndOutput(es);
-
-			/* Remove last line break */
-			if (es->str->len > 0 && es->str->data[es->str->len - 1] == '\n')
-				es->str->data[--es->str->len] = '\0';
-
-			/* Fix JSON to output an object */
-			if (auto_explain_log_format == EXPLAIN_FORMAT_JSON)
-			{
-				es->str->data[0] = '{';
-				es->str->data[es->str->len - 1] = '}';
-			}
-
-			/*
-			 * Note: we rely on the existing logging of context or
-			 * debug_query_string to identify just which statement is being
-			 * reported.  This isn't ideal but trying to do it here would
-			 * often result in duplication.
-			 */
-			ereport(LOG,
-					(errmsg("duration: %.3f ms  plan:\n%s",
-							msec, es->str->data),
-					 errhidestmt(true)));
-
-			pfree(es->str->data);
+			explain_output_plain(queryDesc, msec, auto_explain_log_format);
 		}
 	}
 
@@ -340,4 +333,50 @@ explain_ExecutorEnd(QueryDesc *queryDesc)
 		prev_ExecutorEnd(queryDesc);
 	else
 		standard_ExecutorEnd(queryDesc);
+
+	current_query_desc = NULL;
+}
+
+static void
+explain_output_plain(QueryDesc *queryDesc, double time_msec, int format)
+{
+	ExplainState *es = NewExplainState();
+
+	es->analyze = (queryDesc->instrument_options && auto_explain_log_analyze);
+	es->verbose = auto_explain_log_verbose;
+	es->buffers = (es->analyze && auto_explain_log_buffers);
+	es->timing = (es->analyze && auto_explain_log_timing);
+	es->summary = es->analyze;
+	es->format = format;
+
+	ExplainBeginOutput(es);
+	ExplainQueryText(es, queryDesc);
+	ExplainPrintPlan(es, queryDesc);
+	if (es->analyze && auto_explain_log_triggers)
+		ExplainPrintTriggers(es, queryDesc);
+	ExplainEndOutput(es);
+
+	/* Remove last line break */
+	if (es->str->len > 0 && es->str->data[es->str->len - 1] == '\n')
+		es->str->data[--es->str->len] = '\0';
+
+	/* Fix JSON to output an object */
+	if (format == EXPLAIN_FORMAT_JSON)
+	{
+		es->str->data[0] = '{';
+		es->str->data[es->str->len - 1] = '}';
+	}
+
+	/*
+	 * Note: we rely on the existing logging of context or
+	 * debug_query_string to identify just which statement is being
+	 * reported.  This isn't ideal but trying to do it here would
+	 * often result in duplication.
+	 */
+	ereport(LOG,
+			(errmsg("duration: %.3f ms  plan:\n%s",
+					time_msec, es->str->data),
+			 errhidestmt(true)));
+
+	pfree(es->str->data);
 }
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to