Hi,

My apologies for the last grumpy message I wrote; I sent it out of
surprise that my patch was marked "closed" on the current commitfest.
I hope I haven't discouraged you from reviewing this updated version.

I think you intended to use the "Waiting on Author" status -- that
leaves the commitfest entry open. I will re-open the commitfest entry
myself, I hope that's OK.

Here is version 3 of the patch.

On Wed, Oct 5, 2011 at 02:36, gabrielle <gor...@gmail.com> wrote:
> - The doc comment 'pgstat_get_backend_current_activity' doesn't match
> the function name 'pgstat_get_crashed_backend_activity'.

This is now fixed in the patch.

> Project coding guidelines:
> - There are some formatting problems, such as all newlines at the same
> indent level need to line up.
> - Wayward tabs, line 2725 in pgstat.c specifically

I wasn't entirely sure which line you're referring to (what's on line
2725 in the current git master wasn't touched by me)
But I presume it's this statement:
    if (activity < BackendActivityBuffer || ...

I have rearranged this if statement using a temporary variable so that
there's no need to right-align long expressions anymore.

I also tweaked the wording of comments here and there.

For the points not addressed in this version, see my response here:
http://archives.postgresql.org/pgsql-hackers/2011-10/msg00202.php

Regards,
Marti
From 748c5fcaf1dc7518a06da276574a565098cde628 Mon Sep 17 00:00:00 2001
From: Marti Raudsepp <ma...@juffo.org>
Date: Wed, 28 Sep 2011 00:46:48 +0300
Subject: [PATCH] Log crashed backend's query (activity string)

The crashing query is often a good starting point in debugging the
cause, and much more easily accessible than core dumps.

We're extra-paranoid in reading the activity buffer since it might be
corrupt. All non-ASCII characters are replaced with '?'

Example output:
LOG:  server process (PID 31451) was terminated by signal 9: Killed
DETAIL:  Running query: DO LANGUAGE plpythonu 'import os;os.kill(os.getpid(),9)'
---
 src/backend/postmaster/pgstat.c     |   66 +++++++++++++++++++++++++++++++++++
 src/backend/postmaster/postmaster.c |   17 ++++++---
 src/backend/utils/adt/ascii.c       |   31 ++++++++++++++++
 src/include/pgstat.h                |    1 +
 src/include/utils/ascii.h           |    1 +
 5 files changed, 111 insertions(+), 5 deletions(-)

diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 9132db7..220507f 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -58,6 +58,7 @@
 #include "storage/pg_shmem.h"
 #include "storage/pmsignal.h"
 #include "storage/procsignal.h"
+#include "utils/ascii.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
 #include "utils/ps_status.h"
@@ -2751,6 +2752,71 @@ pgstat_get_backend_current_activity(int pid, bool checkUser)
 	return "<backend information not available>";
 }
 
+/* ----------
+ * pgstat_get_crashed_backend_activity() -
+ *
+ *	Return a string representing the current activity of the backend with
+ *	the specified PID.	Like the function above, but reads shared memory with
+ *	the expectation that it may be corrupt.
+ *
+ *	This function is only intended to be used by postmaster to report the
+ *	query that crashed the backend. In particular, no attempt is made to
+ *	follow the correct concurrency protocol when accessing the
+ *	BackendStatusArray. But that's OK, in the worst case we get a corrupted
+ *	message. We also must take care not to trip on ereport(ERROR).
+ *
+ *	Note: return strings for special cases match pg_stat_get_backend_activity.
+ * ----------
+ */
+const char *
+pgstat_get_crashed_backend_activity(int pid)
+{
+	volatile PgBackendStatus *beentry;
+	int			i;
+
+	beentry = BackendStatusArray;
+	for (i = 1; i <= MaxBackends; i++)
+	{
+		if (beentry->st_procpid == pid)
+		{
+			/* Read pointer just once, so it can't change after validation */
+			const char *activity = beentry->st_activity;
+			char	   *buffer;
+			const char *activity_last;
+
+			/*
+			 * We can't access activity pointer before we verify that it
+			 * falls into BackendActivityBuffer. To make sure that the entire
+			 * string including its ending is contained within the buffer,
+			 * the pointer can start at offset (MaxBackends - 1) at most.
+			 */
+			activity_last = BackendActivityBuffer +
+				((MaxBackends - 1) * pgstat_track_activity_query_size);
+
+			if (activity < BackendActivityBuffer ||
+				activity > activity_last)
+				return "<command string corrupt>";
+
+			if (*(activity) == '\0')
+				return "<command string not enabled>";
+
+			buffer = (char *) palloc(pgstat_track_activity_query_size);
+			/*
+			 * Copy only ASCII-safe characters so we don't run into encoding
+			 * problems when reporting the message.
+			 */
+			ascii_safe_strncpy(buffer, activity,
+							   pgstat_track_activity_query_size);
+
+			return buffer;
+		}
+
+		beentry++;
+	}
+
+	/* PID not found */
+	return "<backend information not available>";
+}
 
 /* ------------------------------------------------------------
  * Local support functions follow
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 0a84d97..1137aa1 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -2764,6 +2764,8 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
 static void
 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
 {
+	const char   *activity = pgstat_get_crashed_backend_activity(pid);
+
 	if (WIFEXITED(exitstatus))
 		ereport(lev,
 
@@ -2771,7 +2773,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
 		  translator: %s is a noun phrase describing a child process, such as
 		  "server process" */
 				(errmsg("%s (PID %d) exited with exit code %d",
-						procname, pid, WEXITSTATUS(exitstatus))));
+						procname, pid, WEXITSTATUS(exitstatus)),
+				 errdetail("Running query: %s", activity)));
 	else if (WIFSIGNALED(exitstatus))
 #if defined(WIN32)
 		ereport(lev,
@@ -2781,7 +2784,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
 		  "server process" */
 				(errmsg("%s (PID %d) was terminated by exception 0x%X",
 						procname, pid, WTERMSIG(exitstatus)),
-				 errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value.")));
+				 errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
+				 errdetail("Running query: %s", activity)));
 #elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
 	ereport(lev,
 
@@ -2791,7 +2795,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
 			(errmsg("%s (PID %d) was terminated by signal %d: %s",
 					procname, pid, WTERMSIG(exitstatus),
 					WTERMSIG(exitstatus) < NSIG ?
-					sys_siglist[WTERMSIG(exitstatus)] : "(unknown)")));
+					sys_siglist[WTERMSIG(exitstatus)] : "(unknown)"),
+			 errdetail("Running query: %s", activity)));
 #else
 		ereport(lev,
 
@@ -2799,7 +2804,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
 		  translator: %s is a noun phrase describing a child process, such as
 		  "server process" */
 				(errmsg("%s (PID %d) was terminated by signal %d",
-						procname, pid, WTERMSIG(exitstatus))));
+						procname, pid, WTERMSIG(exitstatus)),
+				 errdetail("Running query: %s", activity)));
 #endif
 	else
 		ereport(lev,
@@ -2808,7 +2814,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
 		  translator: %s is a noun phrase describing a child process, such as
 		  "server process" */
 				(errmsg("%s (PID %d) exited with unrecognized status %d",
-						procname, pid, exitstatus)));
+						procname, pid, exitstatus),
+				 errdetail("Running query: %s", activity)));
 }
 
 /*
diff --git a/src/backend/utils/adt/ascii.c b/src/backend/utils/adt/ascii.c
index 7ecf494..f82aeb4 100644
--- a/src/backend/utils/adt/ascii.c
+++ b/src/backend/utils/adt/ascii.c
@@ -158,3 +158,34 @@ to_ascii_default(PG_FUNCTION_ARGS)
 
 	PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
 }
+
+/* ----------
+ * "Escape" a string in unknown encoding to a valid ASCII string.
+ * Replace non-ASCII bytes with '?'
+ * This must not trigger ereport(ERROR), as it is called from postmaster.
+ * ----------
+ */
+void
+ascii_safe_strncpy(char *dest, const char *src, int len)
+{
+	int			i;
+
+	for (i = 0; i < (len - 1); i++)
+	{
+		char ch = src[i];
+
+		if (ch == '\0')
+			break;
+		/* Keep printable ASCII characters */
+		if (32 <= ch && ch <= 127)
+			dest[i] = ch;
+		/* White-space is also OK */
+		else if (ch == '\n' || ch == '\r' || ch == '\t')
+			dest[i] = ch;
+		/* Everything else is replaced with '?' */
+		else
+			dest[i] = '?';
+	}
+
+	dest[i] = '\0';
+}
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 20c4d43..9694896 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -720,6 +720,7 @@ extern void pgstat_report_appname(const char *appname);
 extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
 extern void pgstat_report_waiting(bool waiting);
 extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser);
+extern const char *pgstat_get_crashed_backend_activity(int pid);
 
 extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id);
 extern PgStat_BackendFunctionEntry *find_funcstat_entry(Oid func_id);
diff --git a/src/include/utils/ascii.h b/src/include/utils/ascii.h
index ad70e03..adeddb5 100644
--- a/src/include/utils/ascii.h
+++ b/src/include/utils/ascii.h
@@ -16,5 +16,6 @@
 extern Datum to_ascii_encname(PG_FUNCTION_ARGS);
 extern Datum to_ascii_enc(PG_FUNCTION_ARGS);
 extern Datum to_ascii_default(PG_FUNCTION_ARGS);
+extern void ascii_safe_strncpy(char *dest, const char *src, int len);
 
 #endif   /* _ASCII_H_ */
-- 
1.7.7

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to