Re: [HACKERS] auto_explain sample rate

Craig Ringer Wed, 03 Jun 2015 03:55:18 -0700

On 2 June 2015 at 15:07, Craig Ringer <cr...@2ndquadrant.com> wrote:

> On 29 May 2015 at 11:35, Tom Lane <t...@sss.pgh.pa.us> wrote:
>
>> Craig Ringer <cr...@2ndquadrant.com> writes:
>> > It's sometimes desirable to collect auto_explain data with ANALYZE in
>> order
>> > to track down hard-to-reproduce issues, but the performance impacts can
>> be
>> > pretty hefty on the DB.
>>
>> > I'm inclined to add a sample rate to auto_explain so that it can trigger
>> > only on x percent of queries,
>>
>> That sounds reasonable ...
>>
>
> Cool, I'll cook that up then. Thanks for the sanity check.
>


OK, here we go.

To make sure it doesn't trigger on all backends at once, and to ensure it
doesn't rely on a shared point of contention in shmem, this sets up a
counter with a random value on each backend start.

Because it needs to either always run both the Start and End hooks, or run
neither, this doesn't count nested statements for sampling purposes. So if
you run my_huge_plpgsql_function() then either all its statements will be
explained or none of them will. This only applies if nested statement
explain is enabled. It's possible to get around this by adding a separate
nested statement counter that's reset at each top level End hook, but it
doesn't seem worthwhile.

The sample rate has no effect on ANALYZE, which remains enabled or disabled
for all queries. I don't see any point adding a separate sample rate
control to ANALYZE only some sub-proportion of EXPLAINed statements.

-- 
 Craig Ringer                   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Tra

From 1f395e11bad41a77881970c7179f65e33c523cdd Mon Sep 17 00:00:00 2001
From: Craig Ringer <cr...@2ndquadrant.com>
Date: Wed, 3 Jun 2015 17:48:02 +0800
Subject: [PATCH] Allow sampling of only some queries by auto_explain

The new parameter auto_explain.sample_ratio specifies the proportion
of queries that are to be explained.  The starting offset is randomized
so that somewhere between the 1st and n'th statement gets explained,
then every n'th statement. No statement classification is performed.

If nested statement auto_explain is enabled, then for any given
top level statement either all sub-statements or none will be explained,
i.e. sampling doesn't apply to nested statements.
---
 contrib/auto_explain/auto_explain.c | 56 ++++++++++++++++++++++++++++++++++++-
 doc/src/sgml/auto-explain.sgml      | 18 ++++++++++++
 2 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/contrib/auto_explain/auto_explain.c b/contrib/auto_explain/auto_explain.c
index 2a184ed..161e24e 100644
--- a/contrib/auto_explain/auto_explain.c
+++ b/contrib/auto_explain/auto_explain.c
@@ -29,6 +29,7 @@ static bool auto_explain_log_triggers = false;
 static bool auto_explain_log_timing = true;
 static int	auto_explain_log_format = EXPLAIN_FORMAT_TEXT;
 static bool auto_explain_log_nested_statements = false;
+static int  auto_explain_sample_ratio = 1;
 
 static const struct config_enum_entry format_options[] = {
 	{"text", EXPLAIN_FORMAT_TEXT, false},
@@ -47,9 +48,14 @@ static ExecutorRun_hook_type prev_ExecutorRun = NULL;
 static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
 
+/* per-backend counter used for ratio sampling */
+static int  auto_explain_sample_counter = 0;
+
 #define auto_explain_enabled() \
 	(auto_explain_log_min_duration >= 0 && \
-	 (nesting_level == 0 || auto_explain_log_nested_statements))
+	 (nesting_level == 0 || auto_explain_log_nested_statements)) && \
+	 (auto_explain_sample_ratio == 1 || auto_explain_sample_counter == 0)
+
 
 void		_PG_init(void);
 void		_PG_fini(void);
@@ -61,6 +67,19 @@ static void explain_ExecutorRun(QueryDesc *queryDesc,
 static void explain_ExecutorFinish(QueryDesc *queryDesc);
 static void explain_ExecutorEnd(QueryDesc *queryDesc);
 
+static void
+auto_explain_sample_ratio_assign_hook(int newval, void *extra)
+{
+	if (auto_explain_sample_ratio != newval)
+	{
+		/* Schedule a counter reset when the sample ratio changed */
+		auto_explain_sample_counter = -1;
+	}
+
+	auto_explain_sample_ratio = newval;
+}
+
+
 
 /*
  * Module load callback
@@ -159,6 +178,18 @@ _PG_init(void)
 							 NULL,
 							 NULL);
 
+	DefineCustomIntVariable("auto_explain.sample_ratio",
+		"Only explain one in approx. every sample_ratio queries, or 1 for all",
+							NULL,
+							&auto_explain_sample_ratio,
+							1,
+							1, INT_MAX - 1,
+							PGC_SUSET,
+							0,
+							NULL,
+							auto_explain_sample_ratio_assign_hook,
+							NULL);
+
 	EmitWarningsOnPlaceholders("auto_explain");
 
 	/* Install hooks. */
@@ -191,6 +222,29 @@ _PG_fini(void)
 static void
 explain_ExecutorStart(QueryDesc *queryDesc, int eflags)
 {
+	/*
+	 * For ratio sampling, only increment the counter for top-level
+	 * statements. Either all nested statements will be explained
+	 * or none will, because we need to know at ExecutorEnd hook time
+	 * whether or not we explained any given statement.
+	 */
+	if (nesting_level == 0 && auto_explain_sample_ratio > 1)
+	{
+		if (auto_explain_sample_counter == -1)
+		{
+			/*
+			 * First time the hook ran in this backend, seed the counter. This
+			 * might be zero and cause the first statement to be explained.
+			 */
+			auto_explain_sample_counter = pg_lrand48() % auto_explain_sample_ratio;
+		}
+		else if ((++auto_explain_sample_counter) == auto_explain_sample_ratio)
+		{
+			/* Wrap the counter and explain this statement */
+			auto_explain_sample_counter = 0;
+		}
+	}
+
 	if (auto_explain_enabled())
 	{
 		/* Enable per-node instrumentation iff log_analyze is required. */
diff --git a/doc/src/sgml/auto-explain.sgml b/doc/src/sgml/auto-explain.sgml
index d527208..fbb1220 100644
--- a/doc/src/sgml/auto-explain.sgml
+++ b/doc/src/sgml/auto-explain.sgml
@@ -203,6 +203,24 @@ LOAD 'auto_explain';
      </para>
     </listitem>
    </varlistentry>
+
+   <varlistentry>
+    <term>
+     <varname>auto_explain.sample_ratio</varname> (<type>integer</type>)
+     <indexterm>
+      <primary><varname>auto_explain.sample_ratio</> configuration parameter</primary>
+     </indexterm>
+    </term>
+    <listitem>
+     <para>
+      <varname>auto_explain.sample_ratio</varname> causes auto_explain to only
+      explain every n'th statement in each session.  The number of statements
+      before the first is explained is between 1 and n, randomized to ensure
+      that short sessions still sometimes get explained. Only superusers can
+      change this setting.
+     </para>
+    </listitem>
+   </varlistentry>
   </variablelist>
 
   <para>
-- 
2.1.0

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] auto_explain sample rate

Reply via email to