Hi, My apologies for the last grumpy message I wrote; I sent it out of surprise that my patch was marked "closed" on the current commitfest. I hope I haven't discouraged you from reviewing this updated version.
I think you intended to use the "Waiting on Author" status -- that leaves the commitfest entry open. I will re-open the commitfest entry myself, I hope that's OK. Here is version 3 of the patch. On Wed, Oct 5, 2011 at 02:36, gabrielle <gor...@gmail.com> wrote: > - The doc comment 'pgstat_get_backend_current_activity' doesn't match > the function name 'pgstat_get_crashed_backend_activity'. This is now fixed in the patch. > Project coding guidelines: > - There are some formatting problems, such as all newlines at the same > indent level need to line up. > - Wayward tabs, line 2725 in pgstat.c specifically I wasn't entirely sure which line you're referring to (what's on line 2725 in the current git master wasn't touched by me) But I presume it's this statement: if (activity < BackendActivityBuffer || ... I have rearranged this if statement using a temporary variable so that there's no need to right-align long expressions anymore. I also tweaked the wording of comments here and there. For the points not addressed in this version, see my response here: http://archives.postgresql.org/pgsql-hackers/2011-10/msg00202.php Regards, Marti
From 748c5fcaf1dc7518a06da276574a565098cde628 Mon Sep 17 00:00:00 2001 From: Marti Raudsepp <ma...@juffo.org> Date: Wed, 28 Sep 2011 00:46:48 +0300 Subject: [PATCH] Log crashed backend's query (activity string) The crashing query is often a good starting point in debugging the cause, and much more easily accessible than core dumps. We're extra-paranoid in reading the activity buffer since it might be corrupt. All non-ASCII characters are replaced with '?' Example output: LOG: server process (PID 31451) was terminated by signal 9: Killed DETAIL: Running query: DO LANGUAGE plpythonu 'import os;os.kill(os.getpid(),9)' --- src/backend/postmaster/pgstat.c | 66 +++++++++++++++++++++++++++++++++++ src/backend/postmaster/postmaster.c | 17 ++++++--- src/backend/utils/adt/ascii.c | 31 ++++++++++++++++ src/include/pgstat.h | 1 + src/include/utils/ascii.h | 1 + 5 files changed, 111 insertions(+), 5 deletions(-) diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 9132db7..220507f 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -58,6 +58,7 @@ #include "storage/pg_shmem.h" #include "storage/pmsignal.h" #include "storage/procsignal.h" +#include "utils/ascii.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/ps_status.h" @@ -2751,6 +2752,71 @@ pgstat_get_backend_current_activity(int pid, bool checkUser) return "<backend information not available>"; } +/* ---------- + * pgstat_get_crashed_backend_activity() - + * + * Return a string representing the current activity of the backend with + * the specified PID. Like the function above, but reads shared memory with + * the expectation that it may be corrupt. + * + * This function is only intended to be used by postmaster to report the + * query that crashed the backend. In particular, no attempt is made to + * follow the correct concurrency protocol when accessing the + * BackendStatusArray. But that's OK, in the worst case we get a corrupted + * message. We also must take care not to trip on ereport(ERROR). + * + * Note: return strings for special cases match pg_stat_get_backend_activity. + * ---------- + */ +const char * +pgstat_get_crashed_backend_activity(int pid) +{ + volatile PgBackendStatus *beentry; + int i; + + beentry = BackendStatusArray; + for (i = 1; i <= MaxBackends; i++) + { + if (beentry->st_procpid == pid) + { + /* Read pointer just once, so it can't change after validation */ + const char *activity = beentry->st_activity; + char *buffer; + const char *activity_last; + + /* + * We can't access activity pointer before we verify that it + * falls into BackendActivityBuffer. To make sure that the entire + * string including its ending is contained within the buffer, + * the pointer can start at offset (MaxBackends - 1) at most. + */ + activity_last = BackendActivityBuffer + + ((MaxBackends - 1) * pgstat_track_activity_query_size); + + if (activity < BackendActivityBuffer || + activity > activity_last) + return "<command string corrupt>"; + + if (*(activity) == '\0') + return "<command string not enabled>"; + + buffer = (char *) palloc(pgstat_track_activity_query_size); + /* + * Copy only ASCII-safe characters so we don't run into encoding + * problems when reporting the message. + */ + ascii_safe_strncpy(buffer, activity, + pgstat_track_activity_query_size); + + return buffer; + } + + beentry++; + } + + /* PID not found */ + return "<backend information not available>"; +} /* ------------------------------------------------------------ * Local support functions follow diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 0a84d97..1137aa1 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2764,6 +2764,8 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) static void LogChildExit(int lev, const char *procname, int pid, int exitstatus) { + const char *activity = pgstat_get_crashed_backend_activity(pid); + if (WIFEXITED(exitstatus)) ereport(lev, @@ -2771,7 +2773,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus) translator: %s is a noun phrase describing a child process, such as "server process" */ (errmsg("%s (PID %d) exited with exit code %d", - procname, pid, WEXITSTATUS(exitstatus)))); + procname, pid, WEXITSTATUS(exitstatus)), + errdetail("Running query: %s", activity))); else if (WIFSIGNALED(exitstatus)) #if defined(WIN32) ereport(lev, @@ -2781,7 +2784,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus) "server process" */ (errmsg("%s (PID %d) was terminated by exception 0x%X", procname, pid, WTERMSIG(exitstatus)), - errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."))); + errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."), + errdetail("Running query: %s", activity))); #elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST ereport(lev, @@ -2791,7 +2795,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus) (errmsg("%s (PID %d) was terminated by signal %d: %s", procname, pid, WTERMSIG(exitstatus), WTERMSIG(exitstatus) < NSIG ? - sys_siglist[WTERMSIG(exitstatus)] : "(unknown)"))); + sys_siglist[WTERMSIG(exitstatus)] : "(unknown)"), + errdetail("Running query: %s", activity))); #else ereport(lev, @@ -2799,7 +2804,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus) translator: %s is a noun phrase describing a child process, such as "server process" */ (errmsg("%s (PID %d) was terminated by signal %d", - procname, pid, WTERMSIG(exitstatus)))); + procname, pid, WTERMSIG(exitstatus)), + errdetail("Running query: %s", activity))); #endif else ereport(lev, @@ -2808,7 +2814,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus) translator: %s is a noun phrase describing a child process, such as "server process" */ (errmsg("%s (PID %d) exited with unrecognized status %d", - procname, pid, exitstatus))); + procname, pid, exitstatus), + errdetail("Running query: %s", activity))); } /* diff --git a/src/backend/utils/adt/ascii.c b/src/backend/utils/adt/ascii.c index 7ecf494..f82aeb4 100644 --- a/src/backend/utils/adt/ascii.c +++ b/src/backend/utils/adt/ascii.c @@ -158,3 +158,34 @@ to_ascii_default(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(encode_to_ascii(data, enc)); } + +/* ---------- + * "Escape" a string in unknown encoding to a valid ASCII string. + * Replace non-ASCII bytes with '?' + * This must not trigger ereport(ERROR), as it is called from postmaster. + * ---------- + */ +void +ascii_safe_strncpy(char *dest, const char *src, int len) +{ + int i; + + for (i = 0; i < (len - 1); i++) + { + char ch = src[i]; + + if (ch == '\0') + break; + /* Keep printable ASCII characters */ + if (32 <= ch && ch <= 127) + dest[i] = ch; + /* White-space is also OK */ + else if (ch == '\n' || ch == '\r' || ch == '\t') + dest[i] = ch; + /* Everything else is replaced with '?' */ + else + dest[i] = '?'; + } + + dest[i] = '\0'; +} diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 20c4d43..9694896 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -720,6 +720,7 @@ extern void pgstat_report_appname(const char *appname); extern void pgstat_report_xact_timestamp(TimestampTz tstamp); extern void pgstat_report_waiting(bool waiting); extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser); +extern const char *pgstat_get_crashed_backend_activity(int pid); extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id); extern PgStat_BackendFunctionEntry *find_funcstat_entry(Oid func_id); diff --git a/src/include/utils/ascii.h b/src/include/utils/ascii.h index ad70e03..adeddb5 100644 --- a/src/include/utils/ascii.h +++ b/src/include/utils/ascii.h @@ -16,5 +16,6 @@ extern Datum to_ascii_encname(PG_FUNCTION_ARGS); extern Datum to_ascii_enc(PG_FUNCTION_ARGS); extern Datum to_ascii_default(PG_FUNCTION_ARGS); +extern void ascii_safe_strncpy(char *dest, const char *src, int len); #endif /* _ASCII_H_ */ -- 1.7.7
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers