On Wed, Dec 31, 2014 at 01:56:08PM -0500, Noah Misch wrote:
> On Wed, Dec 31, 2014 at 12:32:37AM -0500, Robert Haas wrote:
> > On Sun, Dec 28, 2014 at 4:58 PM, Noah Misch <n...@leadboat.com> wrote:
> > > I wondered whether to downgrade FATAL to LOG in back branches.  
> > > Introducing a
> > > new reason to block startup is disruptive for a minor release, but having 
> > > the
> > > postmaster deadlock at an unpredictable later time is even more 
> > > disruptive.  I
> > > am inclined to halt startup that way in all branches.
> > 
> > Jeepers.  I'd rather not do that.  From your report, this problem has
> > been around for years.  Yet, as far as I know, it's bothering very few
> > real users, some of whom might be far more bothered by the postmaster
> > suddenly failing to start.  I'm fine with a FATAL in master, but I'd
> > vote against doing anything that might prevent startup in the
> > back-branches without more compelling justification.
> 
> Clusters hosted on OS X fall into these categories:
> 
> 1) Unaffected configuration.  This includes everyone setting a valid messages
>    locale via LANG, LC_ALL or LC_MESSAGES.
> 2) Affected configuration.  Through luck and light use, the cluster would not
>    experience the crashes/hangs.
> 3) Cluster would experience the crashes/hangs.
> 
> DBAs in (3) want the FATAL at startup, but those in (2) want a LOG message
> instead.  DBAs in (1) don't care.  Since intermittent postmaster hangs are far
> worse than startup failure, if (2) and (3) have similar population, FATAL is
> the better bet.  If (2) is sufficiently more populous than (3), then the many
> small pricks from startup failure do add up to hurt more than the occasional
> postmaster hang.  Who knows how that calculation plays out.

The first attached patch, for all branches, adds LOG-level messages and an
assertion.  So cassert builds will fail hard, while others won't.  The second
patch, for master only, changes the startup-time message to FATAL.  If we
decide to use FATAL in all branches, I would just squash them into one.
diff --git a/configure b/configure
index 7594401..f7dedc0 100755
--- a/configure
+++ b/configure
@@ -11366,7 +11366,7 @@ fi
 LIBS_including_readline="$LIBS"
 LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 
-for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit 
mbstowcs_l memmove poll pstat readlink setproctitle setsid shm_open sigprocmask 
symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
+for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit 
mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle 
setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes 
wcstombs wcstombs_l
 do :
   as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
diff --git a/configure.in b/configure.in
index 0dc3f18..76c6405 100644
--- a/configure.in
+++ b/configure.in
@@ -1265,7 +1265,7 @@ PGAC_FUNC_GETTIMEOFDAY_1ARG
 LIBS_including_readline="$LIBS"
 LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 
-AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit 
mbstowcs_l memmove poll pstat readlink setproctitle setsid shm_open sigprocmask 
symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
+AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit 
mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle 
setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes 
wcstombs wcstombs_l])
 
 AC_REPLACE_FUNCS(fseeko)
 case $host_os in
diff --git a/src/backend/postmaster/postmaster.c 
b/src/backend/postmaster/postmaster.c
index 5106f52..6b0190c 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -87,6 +87,10 @@
 #include <dns_sd.h>
 #endif
 
+#ifdef HAVE_PTHREAD_IS_THREADED_NP
+#include <pthread.h>
+#endif
+
 #include "access/transam.h"
 #include "access/xlog.h"
 #include "bootstrap/bootstrap.h"
@@ -1200,6 +1204,24 @@ PostmasterMain(int argc, char *argv[])
         */
        RemovePgTempFiles();
 
+#ifdef HAVE_PTHREAD_IS_THREADED_NP
+
+       /*
+        * On Darwin, libintl replaces setlocale() with a version that calls
+        * CFLocaleCopyCurrent() when its second argument is "" and every 
relevant
+        * environment variable is unset or empty.  CFLocaleCopyCurrent() makes
+        * the process multithreaded.  The postmaster calls sigprocmask() and
+        * calls fork() without an immediate exec(), both of which have 
undefined
+        * behavior in a multithreaded program.  A multithreaded postmaster is 
the
+        * normal case on Windows, which offers neither fork() nor 
sigprocmask().
+        */
+       if (pthread_is_threaded_np() != 0)
+               ereport(LOG,
+                               
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                errmsg("postmaster became multithreaded during 
startup"),
+                errhint("Set the LC_ALL environment variable to a valid 
locale.")));
+#endif
+
        /*
         * Remember postmaster startup time
         */
@@ -1657,6 +1679,15 @@ ServerLoop(void)
                        last_touch_time = now;
                }
 
+#ifdef HAVE_PTHREAD_IS_THREADED_NP
+
+               /*
+                * With assertions enabled, check regularly for appearance of
+                * additional threads.  All builds check at start and exit.
+                */
+               Assert(pthread_is_threaded_np() == 0);
+#endif
+
                /*
                 * If we already sent SIGQUIT to children and they are slow to 
shut
                 * down, it's time to send them SIGKILL.  This doesn't happen
@@ -4745,6 +4776,18 @@ SubPostmasterMain(int argc, char *argv[])
 static void
 ExitPostmaster(int status)
 {
+#ifdef HAVE_PTHREAD_IS_THREADED_NP
+
+       /*
+        * There is no known cause for a postmaster to become multithreaded 
after
+        * startup.  Recheck to account for the possibility of unknown causes.
+        */
+       if (pthread_is_threaded_np() != 0)
+               ereport(LOG,
+                               
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                errmsg("postmaster became multithreaded")));
+#endif
+
        /* should cleanup shared memory and kill all backends */
 
        /*
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 465281c..83f04e9 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -394,6 +394,9 @@
 /* Define to 1 if the PS_STRINGS thing exists. */
 #undef HAVE_PS_STRINGS
 
+/* Define to 1 if you have the `pthread_is_threaded_np' function. */
+#undef HAVE_PTHREAD_IS_THREADED_NP
+
 /* Define to 1 if you have the <pwd.h> header file. */
 #undef HAVE_PWD_H
 
diff --git a/src/backend/postmaster/postmaster.c 
b/src/backend/postmaster/postmaster.c
index 6b0190c..96d54e8 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -1216,7 +1216,7 @@ PostmasterMain(int argc, char *argv[])
         * normal case on Windows, which offers neither fork() nor 
sigprocmask().
         */
        if (pthread_is_threaded_np() != 0)
-               ereport(LOG,
+               ereport(FATAL,
                                
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                                 errmsg("postmaster became multithreaded during 
startup"),
                 errhint("Set the LC_ALL environment variable to a valid 
locale.")));
@@ -4781,11 +4781,14 @@ ExitPostmaster(int status)
        /*
         * There is no known cause for a postmaster to become multithreaded 
after
         * startup.  Recheck to account for the possibility of unknown causes.
+        * This message uses LOG level, because an unclean shutdown at this 
point
+        * would usually not look much different from a clean shutdown.
         */
        if (pthread_is_threaded_np() != 0)
                ereport(LOG,
-                               
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                                errmsg("postmaster became multithreaded")));
+                               (errcode(ERRCODE_INTERNAL_ERROR),
+                                errmsg_internal("postmaster became 
multithreaded"),
+                  errdetail("Please report this to 
<pgsql-b...@postgresql.org>.")));
 #endif
 
        /* should cleanup shared memory and kill all backends */
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to