Hi,

Open MPI's signal handler (show_stackframe function defined in
opal/util/stacktrace.c) calls non-async-signal-safe functions
and it causes a problem.

See attached mpisigabrt.c. Passing corrupted memory to realloc(3)
will cause SIGABRT and show_stackframe function will be invoked.
But invoked show_stackframe function deadlocks in backtrace_symbols(3)
on some systems because backtrace_symbols(3) calls malloc(3)
internally and a deadlock of realloc/malloc mutex occurs.

Attached mpisigabrt.gstack.txt shows the stacktrace gotten
by gdb in this deadlock situation on Ubuntu 12.04 LTS (precise)
x86_64. Though I could not reproduce this behavior on RHEL 5/6,
I can reproduce it also on K computer and its successor PRIMEHPC FX10.
Passing non-heap memory to free(3) and double-free also cause
this deadlock.

malloc (and backtrace_symbols) is not marked as async-signal-safe
in POSIX and current glibc, though it seems to have been marked
in old glibc. So we should not call it in the signal handler now.

  
http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04
  http://cygwin.com/ml/libc-help/2013-06/msg00005.html

I wrote a patch to address this issue. See the attached
async-signal-safe-stacktrace.patch.

This patch calls backtrace_symbols_fd(3) instead of backtrace_symbols(3).
Though backtrace_symbols_fd is not declared as async-signal-safe,
it is described not to call malloc internally in its man. So it
should be rather safer.

Output format of show_stackframe function is not changed by
this patch. But the opal_backtrace_print function (backtrace
framework) interface is changed for the output format compatibility.
This requires changes in some additional files (ompi_mpi_abort.c
etc.).

This patch also removes unnecessary fflush(3) calls, which are
meaningless for write(2) system call but might cause a similar
problem.

What do you think about this patch?

Takahiro Kawashima,
MPI development team,
Fujitsu
Index: opal/mca/backtrace/backtrace.h
===================================================================
--- opal/mca/backtrace/backtrace.h	(revision 29841)
+++ opal/mca/backtrace/backtrace.h	(working copy)
@@ -34,11 +34,12 @@
 
 
 /*
- * print back trace to FILE file
+ * Print back trace to FILE file with a prefix for each line.
+ * First strip lines are not printed.
  *
  * \note some attempts made to be signal safe.
  */
-OPAL_DECLSPEC void opal_backtrace_print(FILE *file);
+OPAL_DECLSPEC int opal_backtrace_print(FILE *file, char *prefix, int strip);
 
 /*
  * Return back trace in buffer.  buffer will be allocated by the
Index: opal/mca/backtrace/execinfo/backtrace_execinfo.c
===================================================================
--- opal/mca/backtrace/execinfo/backtrace_execinfo.c	(revision 29841)
+++ opal/mca/backtrace/execinfo/backtrace_execinfo.c	(working copy)
@@ -20,6 +20,10 @@
 #include "opal_config.h"
 
 #include <stdio.h>
+#include <string.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
 #ifdef HAVE_EXECINFO_H
 #include <execinfo.h>
 #endif
@@ -27,23 +31,31 @@
 #include "opal/constants.h"
 #include "opal/mca/backtrace/backtrace.h"
 
-void
-opal_backtrace_print(FILE *file)
+int
+opal_backtrace_print(FILE *file, char *prefix, int strip)
 {
-    int i;
+    int i, fd, len;
     int trace_size;
     void * trace[32];
-    char ** messages = (char **)NULL;
+    char buf[6];
 
+    fd = fileno (file);
+    if (-1 == fd) {
+        return OPAL_ERR_BAD_PARAM;
+    }
+
     trace_size = backtrace (trace, 32);
-    messages = backtrace_symbols (trace, trace_size);
 
-    for (i = 0; i < trace_size; i++) {
-        fprintf(file, "[%d] func:%s\n", i, messages[i]);
-        fflush(file);
+    for (i = strip; i < trace_size; i++) {
+        if (NULL != prefix) {
+            write (fd, prefix, strlen (prefix));
+        }
+        len = snprintf (buf, sizeof(buf), "[%2d] ", i - strip);
+        write (fd, buf, len);
+        backtrace_symbols_fd (&trace[i], 1, fd);
     }
 
-    free(messages);
+    return OPAL_SUCCESS;
 }
 
 
Index: opal/mca/backtrace/printstack/backtrace_printstack.c
===================================================================
--- opal/mca/backtrace/printstack/backtrace_printstack.c	(revision 29841)
+++ opal/mca/backtrace/printstack/backtrace_printstack.c	(working copy)
@@ -24,10 +24,12 @@
 #include "opal/constants.h"
 #include "opal/mca/backtrace/backtrace.h"
 
-void
-opal_backtrace_print(FILE *file)
+int
+opal_backtrace_print(FILE *file, char *prefix, int strip)
 {
     printstack(fileno(file));
+
+    return OPAL_SUCCESS;
 }
 
 
Index: opal/mca/backtrace/none/backtrace_none.c
===================================================================
--- opal/mca/backtrace/none/backtrace_none.c	(revision 29841)
+++ opal/mca/backtrace/none/backtrace_none.c	(working copy)
@@ -23,9 +23,10 @@
 #include "opal/constants.h"
 #include "opal/mca/backtrace/backtrace.h"
 
-void
-opal_backtrace_print(FILE *file)
+int
+opal_backtrace_print(FILE *file, char *prefix, int strip)
 {
+    return OPAL_ERR_NOT_IMPLEMENTED;
 }
 
 
Index: opal/util/stacktrace.c
===================================================================
--- opal/util/stacktrace.c	(revision 29841)
+++ opal/util/stacktrace.c	(working copy)
@@ -53,12 +53,10 @@
 /**
  * This function is being called as a signal-handler in response
  * to a user-specified signal (e.g. SIGFPE or SIGSEGV).
- * For Linux/Glibc, it then uses backtrace and backtrace_symbols
- * to figure the current stack and then prints that out to stdout.
+ * For Linux/Glibc, it then uses backtrace and backtrace_symbols_fd
+ * to figure the current stack and print that out to stderr.
  * Where available, the BSD libexecinfo is used to provide Linux/Glibc
- * compatible backtrace and backtrace_symbols functions.
- * Yes, printf and malloc are not signal-safe per se, but should be 
- * on Linux?
+ * compatible backtrace and backtrace_symbols_fd functions.
  *
  *  @param signo with the signal number raised 
  *  @param info with information regarding the reason/send of the signal
@@ -72,9 +70,8 @@
     char print_buffer[1024];
     char * tmp = print_buffer;
     int size = sizeof (print_buffer);
-    int ret, traces_size;
+    int ret;
     char *si_code_str = "";
-    char **traces;
 
     /* write out the footer information */
     memset (print_buffer, 0, sizeof (print_buffer));
@@ -82,18 +79,8 @@
                    HOSTFORMAT "*** Process received signal ***\n",
                    stacktrace_hostname, getpid());
     write(fileno(stderr), print_buffer, ret);
-    fflush(stderr);
 
 
-    /*
-     * Yes, we are doing printf inside a signal-handler.
-     * However, backtrace itself calls malloc (which may not be signal-safe,
-     * under linux, printf and malloc are)
-     *
-     * We could use backtrace_symbols_fd and write directly into an
-     * filedescriptor, however, without formatting -- also this fd 
-     * should be opened in a sensible way...
-     */
     memset (print_buffer, 0, sizeof (print_buffer));
 
 #ifdef HAVE_STRSIGNAL
@@ -342,28 +329,14 @@
 
     /* write out the signal information generated above */
     write(fileno(stderr), print_buffer, sizeof(print_buffer)-size);
-    fflush(stderr);
 
     /* print out the stack trace */
-    ret = opal_backtrace_buffer(&traces, &traces_size);
-    if (OPAL_SUCCESS == ret) {
-        int i;
-        /* since we have the opportunity, strip off the bottom two
-           function calls, which will be this function and
-           opal_backtrace_buffer(). */
-        for (i = 2 ; i < traces_size ; ++i) {
-            ret = snprintf(print_buffer, sizeof(print_buffer),
-                           HOSTFORMAT "[%2d] %s\n",
-                           stacktrace_hostname, getpid(), i - 2, traces[i]);
-            if (ret > 0) {
-                write(fileno(stderr), print_buffer, ret);
-            } else {
-                write(fileno(stderr), unable_to_print_msg, 
-                      strlen(unable_to_print_msg));
-            }
-        }
-    } else {
-        opal_backtrace_print(stderr);
+    snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT,
+             stacktrace_hostname, getpid());
+    print_buffer[sizeof(print_buffer) - 1] = '\0';
+    ret = opal_backtrace_print(stderr, print_buffer, 2);
+    if (OPAL_SUCCESS != ret) {
+        write(fileno(stderr), unable_to_print_msg, strlen(unable_to_print_msg));
     }
 
     /* write out the footer information */
@@ -376,7 +349,6 @@
     } else {
         write(fileno(stderr), unable_to_print_msg, strlen(unable_to_print_msg));
     }
-    fflush(stderr);
 }
 
 #endif /* OPAL_WANT_PRETTY_PRINT_STACKTRACE */
@@ -393,12 +365,12 @@
         int i;
         /* since we have the opportunity, strip off the bottom two
            function calls, which will be this function and
-           opa_backtrace_buffer(). */
+           opal_backtrace_buffer(). */
         for (i = 2; i < traces_size; ++i) {
             opal_output(stream, "%s", traces[i]);
         }
     } else {
-        opal_backtrace_print(stderr);
+        opal_backtrace_print(stderr, NULL, 2);
     }
 }
 
Index: ompi/runtime/ompi_mpi_abort.c
===================================================================
--- ompi/runtime/ompi_mpi_abort.c	(revision 29841)
+++ ompi/runtime/ompi_mpi_abort.c	(working copy)
@@ -87,7 +87,7 @@
             /* This will print an message if it's unable to print the
                backtrace, so we don't need an additional "else" clause
                if opal_backtrace_print() is not supported. */
-            opal_backtrace_print(stderr);
+            opal_backtrace_print(stderr, NULL, 1);
         }
     }
 
Index: orte/mca/oob/tcp/oob_tcp_common.c
===================================================================
--- orte/mca/oob/tcp/oob_tcp_common.c	(revision 29841)
+++ orte/mca/oob/tcp/oob_tcp_common.c	(working copy)
@@ -79,7 +79,7 @@
     int optval;
     optval = 1;
     if(setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) {
-        opal_backtrace_print(stderr);
+        opal_backtrace_print(stderr, NULL, 1);
         opal_output(0, "[%s:%d] setsockopt(TCP_NODELAY) failed: %s (%d)", 
                     __FILE__, __LINE__, 
                     strerror(opal_socket_errno),
#include <stdlib.h>
#include <execinfo.h>
#include <mpi.h>

int main(int argc, char *argv[])
{
  void *p;

  {
    /* This code is not interest of us. Just avoiding hang in backtrace(3)
       which is called in realloc(3) failure message.
       https://sourceware.org/bugzilla/show_bug.cgi?id=956
       https://sourceware.org/bugzilla/show_bug.cgi?id=16159
     */
    void *b;
    backtrace(&b, 0);
  }

  MPI_Init(NULL, NULL);

  p = malloc(100);
  *((size_t *)p - 1) = 0x10;  /* simulate buffer overrun */
  p = realloc(p, 100);

  MPI_Finalize();

  return 0;
}
Thread 2 (Thread 0x7f2a74605700 (LWP 16294)):
#0  0x00007f2a7585ea43 in __GI___poll (fds=<optimized out>, 
#1  0x00007f2a7529fa3a in poll_dispatch ()
#2  0x00007f2a75294336 in opal_libevent2021_event_base_loop ()
#3  0x00007f2a7551711e in orte_progress_thread_engine ()
#4  0x00007f2a75b3de9a in start_thread (arg=0x7f2a74605700)
#5  0x00007f2a7586a3fd in clone ()
#6  0x0000000000000000 in ?? ()
Thread 1 (Thread 0x7f2a76240700 (LWP 16293)):
#0  __lll_lock_wait_private ()
#1  0x00007f2a757fb231 in _L_lock_10574 () at malloc.c:5241
#2  0x00007f2a757f8f87 in __GI___libc_malloc (bytes=139820340016928)
#3  0x00007f2a7588131a in __backtrace_symbols (array=0x7fff4a14a970, size=12)
#4  0x00007f2a7528e165 in opal_backtrace_buffer ()
#5  0x00007f2a7528b8e6 in show_stackframe ()
#6  <signal handler called>
#7  0x00007f2a757ac425 in __GI_raise (sig=<optimized out>)
#8  0x00007f2a757afb8b in __GI_abort () at abort.c:91
#9  0x00007f2a757ea39e in __libc_message (do_abort=2, 
#10 0x00007f2a757f4b96 in malloc_printerr (action=3, 
#11 0x00007f2a757f7ee7 in _int_realloc (av=0x7f2a75b2f720, oldp=0x172d050, 
#12 0x00007f2a757f96fe in __GI___libc_realloc (oldmem=0x172d060, bytes=100)
#13 0x00000000004007b1 in main (argc=1, argv=0x7fff4a14bfb8) at mpisigabrt.c:23

Reply via email to