It looks like we faced a similar issue : opal_process_name_t is 64 bits aligned wheteas orte_process_name_t is 32 bits aligned. If you run an alignment sensitive cpu such as sparc and you are not lucky (so to speak) you can run into this issue. i will make a patch for this shortly
Ralph Castain <r...@open-mpi.org> wrote: >Afraid this must be something about the Sparc - just ran on a Solaris 11 x86 >box and everything works fine. > > >> On Oct 26, 2014, at 8:22 AM, Siegmar Gross >> <siegmar.gr...@informatik.hs-fulda.de> wrote: >> >> Hi Gilles, >> >> I wanted to explore which function is called, when I call MPI_Init >> in a C program, because this function should be called from a Java >> program as well. Unfortunately C programs break with a Bus Error >> once more for openmpi-dev-124-g91e9686 on Solaris. I assume that's >> the reason why I get no useful backtrace for my Java program. >> >> tyr small_prog 117 mpicc -o init_finalize init_finalize.c >> tyr small_prog 118 /usr/local/gdb-7.6.1_64_gcc/bin/gdb mpiexec >> ... >> (gdb) run -np 1 init_finalize >> Starting program: /usr/local/openmpi-1.9.0_64_gcc/bin/mpiexec -np 1 >> init_finalize >> [Thread debugging using libthread_db enabled] >> [New Thread 1 (LWP 1)] >> [New LWP 2 ] >> [tyr:19240] *** Process received signal *** >> [tyr:19240] Signal: Bus Error (10) >> [tyr:19240] Signal code: Invalid address alignment (1) >> [tyr:19240] Failing at address: ffffffff7bd1c10c >> /export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0.0.0:opal_backtrace_print+0x2c >> /export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0.0.0:0xdcc04 >> /lib/sparcv9/libc.so.1:0xd8b98 >> /lib/sparcv9/libc.so.1:0xcc70c >> /lib/sparcv9/libc.so.1:0xcc918 >> /export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0.0.0:opal_proc_set_name+0x1c >> [ Signal 10 (BUS)] >> /export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/openmpi/mca_pmix_native.so:0x103e8 >> /export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/openmpi/mca_ess_pmi.so:0x33dc >> /export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-rte.so.0.0.0:orte_init+0x67c >> /export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi.so.0.0.0:ompi_mpi_init+0x374 >> /export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi.so.0.0.0:PMPI_Init+0x2a8 >> /home/fd1026/work/skripte/master/parallel/prog/mpi/small_prog/init_finalize:main+0x20 >> /home/fd1026/work/skripte/master/parallel/prog/mpi/small_prog/init_finalize:_start+0x7c >> [tyr:19240] *** End of error message *** >> -------------------------------------------------------------------------- >> mpiexec noticed that process rank 0 with PID 0 on node tyr exited on signal >> 10 (Bus Error). >> -------------------------------------------------------------------------- >> [LWP 2 exited] >> [New Thread 2 ] >> [Switching to Thread 1 (LWP 1)] >> sol_thread_fetch_registers: td_ta_map_id2thr: no thread can be found to >> satisfy query >> (gdb) bt >> #0 0xffffffff7f6173d0 in rtld_db_dlactivity () from /usr/lib/sparcv9/ld.so.1 >> #1 0xffffffff7f6175a8 in rd_event () from /usr/lib/sparcv9/ld.so.1 >> #2 0xffffffff7f618950 in lm_delete () from /usr/lib/sparcv9/ld.so.1 >> #3 0xffffffff7f6226bc in remove_so () from /usr/lib/sparcv9/ld.so.1 >> #4 0xffffffff7f624574 in remove_hdl () from /usr/lib/sparcv9/ld.so.1 >> #5 0xffffffff7f61d97c in dlclose_core () from /usr/lib/sparcv9/ld.so.1 >> #6 0xffffffff7f61d9d4 in dlclose_intn () from /usr/lib/sparcv9/ld.so.1 >> #7 0xffffffff7f61db0c in dlclose () from /usr/lib/sparcv9/ld.so.1 >> #8 0xffffffff7ec87f60 in vm_close (loader_data=0x0, >> module=0xffffffff7c901fe0) >> at ../../../openmpi-dev-124-g91e9686/opal/libltdl/loaders/dlopen.c:212 >> #9 0xffffffff7ec85534 in lt_dlclose (handle=0x100189b50) >> at ../../../openmpi-dev-124-g91e9686/opal/libltdl/ltdl.c:1982 >> #10 0xffffffff7ecaabd4 in ri_destructor (obj=0x1001893a0) >> at >> ../../../../openmpi-dev-124-g91e9686/opal/mca/base/mca_base_component_repository.c:382 >> #11 0xffffffff7eca9504 in opal_obj_run_destructors (object=0x1001893a0) >> at ../../../../openmpi-dev-124-g91e9686/opal/class/opal_object.h:446 >> #12 0xffffffff7ecaa474 in mca_base_component_repository_release ( >> component=0xffffffff7b1236f0 <mca_oob_tcp_component>) >> at >> ../../../../openmpi-dev-124-g91e9686/opal/mca/base/mca_base_component_repository.c:240 >> #13 0xffffffff7ecac774 in mca_base_component_unload ( >> component=0xffffffff7b1236f0 <mca_oob_tcp_component>, output_id=-1) >> at >> ../../../../openmpi-dev-124-g91e9686/opal/mca/base/mca_base_components_close.c:47 >> #14 0xffffffff7ecac808 in mca_base_component_close ( >> component=0xffffffff7b1236f0 <mca_oob_tcp_component>, output_id=-1) >> at >> ../../../../openmpi-dev-124-g91e9686/opal/mca/base/mca_base_components_close.c:60 >> #15 0xffffffff7ecac8dc in mca_base_components_close (output_id=-1, >> components=0xffffffff7f14ba58 <orte_oob_base_framework+80>, skip=0x0) >> at >> ../../../../openmpi-dev-124-g91e9686/opal/mca/base/mca_base_components_close.c:86 >> #16 0xffffffff7ecac844 in mca_base_framework_components_close ( >> framework=0xffffffff7f14ba08 <orte_oob_base_framework>, skip=0x0) >> at >> ../../../../openmpi-dev-124-g91e9686/opal/mca/base/mca_base_components_close.c:66 >> #17 0xffffffff7efcaf58 in orte_oob_base_close () >> at >> ../../../../openmpi-dev-124-g91e9686/orte/mca/oob/base/oob_base_frame.c:112 >> #18 0xffffffff7ecc136c in mca_base_framework_close ( >> framework=0xffffffff7f14ba08 <orte_oob_base_framework>) >> at >> ../../../../openmpi-dev-124-g91e9686/opal/mca/base/mca_base_framework.c:187 >> #19 0xffffffff7be07858 in rte_finalize () >> at >> ../../../../../openmpi-dev-124-g91e9686/orte/mca/ess/hnp/ess_hnp_module.c:857 >> #20 0xffffffff7ef338a4 in orte_finalize () >> at ../../openmpi-dev-124-g91e9686/orte/runtime/orte_finalize.c:66 >> #21 0x000000010000723c in orterun (argc=4, argv=0xffffffff7fffe0b8) >> at ../../../../openmpi-dev-124-g91e9686/orte/tools/orterun/orterun.c:1103 >> #22 0x0000000100003e80 in main (argc=4, argv=0xffffffff7fffe0b8) >> at ../../../../openmpi-dev-124-g91e9686/orte/tools/orterun/main.c:13 >> (gdb) >> >> Kind regards >> >> Siegmar >> >> >> >>> thank you very much for the quick tutorial. Unfortunately I still >>> can't get a backtrace. >>> >>>> You might need to configure with --enable-debug and add -g -O0 >>>> to your CFLAGS and LDFLAGS >>>> >>>> Then once you attach with gdb, you have to find the thread that is polling >>>> : >>>> thread 1 >>>> bt >>>> thread 2 >>>> bt >>>> and so on until you find the good thread >>>> If _dbg is a local variable, you need to select the right frame >>>> before you can change the value : >>>> get the frame number from bt (generally 1 under linux) >>>> f <frame number> >>>> set _dbg=0 >>>> >>>> I hope this helps >>> >>> "--enable-debug" is one of my default options. Now I used the >>> following command to configure Open MPI. I always start the >>> build process in an empty directory and I always remove >>> /usr/local/openmpi-1.9.0_64_gcc, before I install a new version. >>> >>> tyr openmpi-dev-124-g91e9686-SunOS.sparc.64_gcc 112 head config.log \ >>> | grep openmpi >>> $ ../openmpi-dev-124-g91e9686/configure >>> --prefix=/usr/local/openmpi-1.9.0_64_gcc >>> --libdir=/usr/local/openmpi-1.9.0_64_gcc/lib64 >>> --with-jdk-bindir=/usr/local/jdk1.8.0/bin >>> --with-jdk-headers=/usr/local/jdk1.8.0/include >>> JAVA_HOME=/usr/local/jdk1.8.0 >>> LDFLAGS=-m64 -g -O0 CC=gcc CXX=g++ FC=gfortran >>> CFLAGS=-m64 -D_REENTRANT -g -O0 >>> CXXFLAGS=-m64 FCFLAGS=-m64 CPP=cpp CXXCPP=cpp >>> CPPFLAGS=-D_REENTRANT CXXCPPFLAGS= >>> --enable-mpi-cxx --enable-cxx-exceptions --enable-mpi-java >>> --enable-heterogeneous --enable-mpi-thread-multiple >>> --with-threads=posix --with-hwloc=internal --without-verbs >>> --with-wrapper-cflags=-std=c11 -m64 --enable-debug >>> tyr openmpi-dev-124-g91e9686-SunOS.sparc.64_gcc 113 >>> >>> >>> "gbd" doesn't allow any backtrace for any thread. >>> >>> tyr java 124 /usr/local/gdb-7.6.1_64_gcc/bin/gdb >>> GNU gdb (GDB) 7.6.1 >>> ... >>> (gdb) attach 18876 >>> Attaching to process 18876 >>> [New process 18876] >>> Retry #1: >>> Retry #2: >>> Retry #3: >>> Retry #4: >>> 0x7eadcb04 in ?? () >>> (gdb) info threads >>> [New LWP 12] >>> [New LWP 11] >>> [New LWP 10] >>> [New LWP 9] >>> [New LWP 8] >>> [New LWP 7] >>> [New LWP 6] >>> [New LWP 5] >>> [New LWP 4] >>> [New LWP 3] >>> [New LWP 2] >>> Id Target Id Frame >>> 12 LWP 2 0x7eadc6b0 in ?? () >>> 11 LWP 3 0x7eadcbb8 in ?? () >>> 10 LWP 4 0x7eadcbb8 in ?? () >>> 9 LWP 5 0x7eadcbb8 in ?? () >>> 8 LWP 6 0x7eadcbb8 in ?? () >>> 7 LWP 7 0x7eadcbb8 in ?? () >>> 6 LWP 8 0x7ead8b0c in ?? () >>> 5 LWP 9 0x7eadcbb8 in ?? () >>> 4 LWP 10 0x7eadcbb8 in ?? () >>> 3 LWP 11 0x7eadcbb8 in ?? () >>> 2 LWP 12 0x7eadcbb8 in ?? () >>> * 1 LWP 1 0x7eadcb04 in ?? () >>> (gdb) thread 1 >>> [Switching to thread 1 (LWP 1)] >>> #0 0x7eadcb04 in ?? () >>> (gdb) bt >>> #0 0x7eadcb04 in ?? () >>> #1 0x7eaca12c in ?? () >>> Backtrace stopped: previous frame identical to this frame (corrupt stack?) >>> (gdb) thread 2 >>> [Switching to thread 2 (LWP 12)] >>> #0 0x7eadcbb8 in ?? () >>> (gdb) bt >>> #0 0x7eadcbb8 in ?? () >>> #1 0x7eac2638 in ?? () >>> Backtrace stopped: previous frame identical to this frame (corrupt stack?) >>> (gdb) thread 3 >>> [Switching to thread 3 (LWP 11)] >>> #0 0x7eadcbb8 in ?? () >>> (gdb) bt >>> #0 0x7eadcbb8 in ?? () >>> #1 0x7eac25a8 in ?? () >>> Backtrace stopped: previous frame identical to this frame (corrupt stack?) >>> (gdb) thread 4 >>> [Switching to thread 4 (LWP 10)] >>> #0 0x7eadcbb8 in ?? () >>> (gdb) bt >>> #0 0x7eadcbb8 in ?? () >>> #1 0x7eac2638 in ?? () >>> Backtrace stopped: previous frame identical to this frame (corrupt stack?) >>> (gdb) thread 5 >>> [Switching to thread 5 (LWP 9)] >>> #0 0x7eadcbb8 in ?? () >>> (gdb) bt >>> #0 0x7eadcbb8 in ?? () >>> #1 0x7eac2638 in ?? () >>> Backtrace stopped: previous frame identical to this frame (corrupt stack?) >>> (gdb) thread 6 >>> [Switching to thread 6 (LWP 8)] >>> #0 0x7ead8b0c in ?? () >>> (gdb) bt >>> #0 0x7ead8b0c in ?? () >>> #1 0x7eacbcb0 in ?? () >>> Backtrace stopped: previous frame identical to this frame (corrupt stack?) >>> (gdb) thread 7 >>> [Switching to thread 7 (LWP 7)] >>> #0 0x7eadcbb8 in ?? () >>> (gdb) bt >>> #0 0x7eadcbb8 in ?? () >>> #1 0x7eac25a8 in ?? () >>> Backtrace stopped: previous frame identical to this frame (corrupt stack?) >>> (gdb) thread 8 >>> [Switching to thread 8 (LWP 6)] >>> #0 0x7eadcbb8 in ?? () >>> (gdb) bt >>> #0 0x7eadcbb8 in ?? () >>> #1 0x7eac25a8 in ?? () >>> Backtrace stopped: previous frame identical to this frame (corrupt stack?) >>> (gdb) thread 9 >>> [Switching to thread 9 (LWP 5)] >>> #0 0x7eadcbb8 in ?? () >>> (gdb) bt >>> #0 0x7eadcbb8 in ?? () >>> #1 0x7eac2638 in ?? () >>> Backtrace stopped: previous frame identical to this frame (corrupt stack?) >>> (gdb) thread 10 >>> [Switching to thread 10 (LWP 4)] >>> #0 0x7eadcbb8 in ?? () >>> (gdb) bt >>> #0 0x7eadcbb8 in ?? () >>> #1 0x7eac25a8 in ?? () >>> Backtrace stopped: previous frame identical to this frame (corrupt stack?) >>> (gdb) thread 11 >>> [Switching to thread 11 (LWP 3)] >>> #0 0x7eadcbb8 in ?? () >>> (gdb) bt >>> #0 0x7eadcbb8 in ?? () >>> #1 0x7eac25a8 in ?? () >>> Backtrace stopped: previous frame identical to this frame (corrupt stack?) >>> (gdb) thread 12 >>> [Switching to thread 12 (LWP 2)] >>> #0 0x7eadc6b0 in ?? () >>> (gdb) >>> >>> >>> >>> I also tried to set _dbg in all available frames without success. >>> >>> (gdb) f 1 >>> #1 0x7eacb46c in ?? () >>> (gdb) set _dbg=0 >>> No symbol table is loaded. Use the "file" command. >>> (gdb) symbol-file /usr/local/openmpi-1.9.0_64_gcc/lib64/libmpi_java.so >>> Reading symbols from >>> /export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi_java.so.0.0.0...done. >>> (gdb) f 1 >>> #1 0x7eacb46c in ?? () >>> (gdb) set _dbg=0 >>> No symbol "_dbg" in current context. >>> (gdb) f 2 >>> #0 0x00000000 in ?? () >>> (gdb) set _dbg=0 >>> No symbol "_dbg" in current context. >>> (gdb) >>> ... >>> >>> >>> With "list" I get source code from mpi_CartComm.c and not from mpi_MPI.c. >>> If a switch threads, "list" continues in the old file. >>> >>> (gdb) thread 1 >>> [Switching to thread 1 (LWP 1)] >>> #0 0x7eadcb04 in ?? () >>> (gdb) list 36 >>> 31 distributed under the License is distributed on an "AS IS" >>> BASIS, >>> 32 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or >>> implied. >>> 33 See the License for the specific language governing permissions >>> and >>> 34 limitations under the License. >>> 35 */ >>> 36 /* >>> 37 * File : mpi_CartComm.c >>> 38 * Headerfile : mpi_CartComm.h >>> 39 * Author : Sung-Hoon Ko, Xinying Li >>> 40 * Created : Thu Apr 9 12:22:15 1998 >>> (gdb) thread 2 >>> [Switching to thread 2 (LWP 12)] >>> #0 0x7eadcbb8 in ?? () >>> (gdb) list >>> 41 * Revision : $Revision: 1.6 $ >>> 42 * Updated : $Date: 2003/01/16 16:39:34 $ >>> 43 * Copyright: Northeast Parallel Architectures Center >>> 44 * at Syracuse University 1998 >>> 45 */ >>> 46 #include "ompi_config.h" >>> 47 >>> 48 #include <stdlib.h> >>> 49 #ifdef HAVE_TARGETCONDITIONALS_H >>> 50 #include <TargetConditionals.h> >>> (gdb) >>> >>> >>> Do you have any ideas, what's going wrong or if I must use a different >>> symbol table? >>> >>> >>> Kind regards >>> >>> Siegmar >>> >>> >>> >>> >>>> >>>> Gilles >>>> >>>> >>>> Siegmar Gross <siegmar.gr...@informatik.hs-fulda.de> wrote: >>>>> Hi Gilles, >>>>> >>>>> I changed _dbg to a static variable, so that it is visible in the >>>>> library, but unfortunately still not in the symbol table. >>>>> >>>>> >>>>> tyr java 419 nm /usr/local/openmpi-1.9.0_64_gcc/lib64/libmpi_java.so | >>>>> grep -i _dbg >>>>> [271] | 1249644| 4|OBJT |LOCL |0 |18 |_dbg.14258 >>>>> tyr java 420 /usr/local/gdb-7.6.1_64_gcc/bin/gdb >>>>> GNU gdb (GDB) 7.6.1 >>>>> ... >>>>> (gdb) attach 13019 >>>>> Attaching to process 13019 >>>>> [New process 13019] >>>>> Retry #1: >>>>> Retry #2: >>>>> Retry #3: >>>>> Retry #4: >>>>> 0x7eadcb04 in ?? () >>>>> (gdb) symbol-file /usr/local/openmpi-1.9.0_64_gcc/lib64/libmpi_java.so >>>>> Reading symbols from >>> /export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi_java.so.0.0.0...done. >>>>> (gdb) set var _dbg.14258=0 >>>>> No symbol "_dbg" in current context. >>>>> (gdb) >>>>> >>>>> >>>>> Kind regards >>>>> >>>>> Siegmar >>>>> >>>>> >>>>> >>>>> >>>>>> unfortunately I didn't get anything useful. It's probably my fault, >>>>>> because I'm still not very familiar with gdb or any other debugger. >>>>>> I did the following things. >>>>>> >>>>>> >>>>>> 1st window: >>>>>> ----------- >>>>>> >>>>>> tyr java 174 setenv OMPI_ATTACH 1 >>>>>> tyr java 175 mpijavac InitFinalizeMain.java >>>>>> warning: [path] bad path element >>>>>> "/usr/local/openmpi-1.9.0_64_gcc/lib64/shmem.jar": >>>>>> no such file or directory >>>>>> 1 warning >>>>>> tyr java 176 mpiexec -np 1 java InitFinalizeMain >>>>>> >>>>>> >>>>>> >>>>>> 2nd window: >>>>>> ----------- >>>>>> >>>>>> tyr java 379 ps -aef | grep java >>>>>> noaccess 1345 1 0 May 22 ? 113:23 /usr/java/bin/java >>>>>> -server -Xmx128m >>> -XX:+UseParallelGC >>>>> -XX:ParallelGCThreads=4 >>>>>> fd1026 3661 10753 0 14:09:12 pts/14 0:00 mpiexec -np 1 java >>>>>> InitFinalizeMain >>>>>> fd1026 3677 13371 0 14:16:55 pts/2 0:00 grep java >>>>>> fd1026 3663 3661 0 14:09:12 pts/14 0:01 java -cp >>>>> /home/fd1026/work/skripte/master/parallel/prog/mpi/java:/usr/local/jun >>>>>> tyr java 380 /usr/local/gdb-7.6.1_64_gcc/bin/gdb >>>>>> GNU gdb (GDB) 7.6.1 >>>>>> ... >>>>>> (gdb) attach 3663 >>>>>> Attaching to process 3663 >>>>>> [New process 3663] >>>>>> Retry #1: >>>>>> Retry #2: >>>>>> Retry #3: >>>>>> Retry #4: >>>>>> 0x7eadcb04 in ?? () >>>>>> (gdb) symbol-file /usr/local/openmpi-1.9.0_64_gcc/lib64/libmpi_java.so >>>>>> Reading symbols from >>> /export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi_java.so.0.0.0...done. >>>>>> (gdb) set var _dbg=0 >>>>>> No symbol "_dbg" in current context. >>>>>> (gdb) set var JNI_OnLoad::_dbg=0 >>>>>> No symbol "_dbg" in specified context. >>>>>> (gdb) set JNI_OnLoad::_dbg=0 >>>>>> No symbol "_dbg" in specified context. >>>>>> (gdb) info threads >>>>>> [New LWP 12] >>>>>> [New LWP 11] >>>>>> [New LWP 10] >>>>>> [New LWP 9] >>>>>> [New LWP 8] >>>>>> [New LWP 7] >>>>>> [New LWP 6] >>>>>> [New LWP 5] >>>>>> [New LWP 4] >>>>>> [New LWP 3] >>>>>> [New LWP 2] >>>>>> Id Target Id Frame >>>>>> 12 LWP 2 0x7eadc6b0 in ?? () >>>>>> 11 LWP 3 0x7eadcbb8 in ?? () >>>>>> 10 LWP 4 0x7eadcbb8 in ?? () >>>>>> 9 LWP 5 0x7eadcbb8 in ?? () >>>>>> 8 LWP 6 0x7eadcbb8 in ?? () >>>>>> 7 LWP 7 0x7eadcbb8 in ?? () >>>>>> 6 LWP 8 0x7ead8b0c in ?? () >>>>>> 5 LWP 9 0x7eadcbb8 in ?? () >>>>>> 4 LWP 10 0x7eadcbb8 in ?? () >>>>>> 3 LWP 11 0x7eadcbb8 in ?? () >>>>>> 2 LWP 12 0x7eadcbb8 in ?? () >>>>>> * 1 LWP 1 0x7eadcb04 in ?? () >>>>>> (gdb) >>>>>> >>>>>> >>>>>> >>>>>> It seems that "_dbg" is unknown and unavailable. >>>>>> >>>>>> tyr java 399 grep _dbg >>>>>> /export2/src/openmpi-1.9/openmpi-dev-124-g91e9686/ompi/mpi/java/c/* >>>>>> /export2/src/openmpi-1.9/openmpi-dev-124-g91e9686/ompi/mpi/java/c/mpi_MPI.c: >>>>>> volatile >>> int _dbg = 1; >>>>>> /export2/src/openmpi-1.9/openmpi-dev-124-g91e9686/ompi/mpi/java/c/mpi_MPI.c: >>>>>> while >>> (_dbg) poll(NULL, 0, 1); >>>>>> tyr java 400 nm /usr/local/openmpi-1.9.0_64_gcc/lib64/*.so | grep -i _dbg >>>>>> tyr java 401 nm /usr/local/openmpi-1.9.0_64_gcc/lib64/*.so | grep -i >>>>>> JNI_OnLoad >>>>>> [1057] | 139688| 444|FUNC |GLOB |0 |11 >>>>>> |JNI_OnLoad >>>>>> tyr java 402 >>>>>> >>>>>> >>>>>> >>>>>> How can I set _dbg to zero to continue mpiexec? I also tried to >>>>>> set a breakpoint for function JNI_OnLoad, but it seems, that the >>>>>> function isn't called before SIGSEGV. >>>>>> >>>>>> >>>>>> tyr java 177 unsetenv OMPI_ATTACH >>>>>> tyr java 178 /usr/local/gdb-7.6.1_64_gcc/bin/gdb mpiexec >>>>>> GNU gdb (GDB) 7.6.1 >>>>>> ... >>>>>> (gdb) b mpi_MPI.c:JNI_OnLoad >>>>>> No source file named mpi_MPI.c. >>>>>> Make breakpoint pending on future shared library load? (y or [n]) y >>>>>> >>>>>> Breakpoint 1 (mpi_MPI.c:JNI_OnLoad) pending. >>>>>> (gdb) run -np 1 java InitFinalizeMain >>>>>> Starting program: /usr/local/openmpi-1.9.0_64_gcc/bin/mpiexec -np 1 java >>>>>> InitFinalizeMain >>>>>> [Thread debugging using libthread_db enabled] >>>>>> [New Thread 1 (LWP 1)] >>>>>> [New LWP 2 ] >>>>>> # >>>>>> # A fatal error has been detected by the Java Runtime Environment: >>>>>> # >>>>>> # SIGSEGV (0xb) at pc=0xffffffff7ea3c7f0, pid=3518, tid=2 >>>>>> ... >>>>>> >>>>>> >>>>>> >>>>>> tyr java 381 cat InitFinalizeMain.java >>>>>> import mpi.*; >>>>>> >>>>>> public class InitFinalizeMain >>>>>> { >>>>>> public static void main (String args[]) throws MPIException >>>>>> { >>>>>> MPI.Init (args); >>>>>> System.out.print ("Hello!\n"); >>>>>> MPI.Finalize (); >>>>>> } >>>>>> } >>>>>> >>>>>> >>>>>> SIGSEGV happens in MPI.Init(args), because I can print a message >>>>>> before I call the method. >>>>>> >>>>>> tyr java 192 unsetenv OMPI_ATTACH >>>>>> tyr java 193 mpijavac InitFinalizeMain.java >>>>>> tyr java 194 mpiexec -np 1 java InitFinalizeMain >>>>>> Before MPI.Init() >>>>>> # >>>>>> # A fatal error has been detected by the Java Runtime Environment: >>>>>> # >>>>>> # SIGSEGV (0xb) at pc=0xffffffff7ea3c7f0, pid=3697, tid=2 >>>>>> ... >>>>>> >>>>>> >>>>>> >>>>>> Any ideas, how I can continue? I couldn't find a C function for >>>>>> MPI.Init() in a C file. Do you know, which function is called first, >>>>>> so that I can set a breakpoint? By the way, I get the same error >>>>>> for Solaris 10 x86_64. >>>>>> >>>>>> tyr java 388 ssh sunpc1 >>>>>> ... >>>>>> sunpc1 java 106 mpijavac InitFinalizeMain.java >>>>>> sunpc1 java 107 uname -a >>>>>> SunOS sunpc1 5.10 Generic_147441-21 i86pc i386 i86pc Solaris >>>>>> sunpc1 java 108 isainfo -k >>>>>> amd64 >>>>>> sunpc1 java 109 mpiexec -np 1 java InitFinalizeMain >>>>>> # >>>>>> # A fatal error has been detected by the Java Runtime Environment: >>>>>> # >>>>>> # SIGSEGV (0xb) at pc=0xfffffd7fff1d77f0, pid=20256, tid=2 >>>>>> >>>>>> >>>>>> Thank you very much for any help in advance. >>>>>> >>>>>> Kind regards >>>>>> >>>>>> Siegmar >>>>>> >>>>>> >>>>>> >>>>>>> thank you very much for your help. >>>>>>> >>>>>>>> how did you configure openmpi ? which java version did you use ? >>>>>>>> >>>>>>>> i just found a regression and you currently have to explicitly add >>>>>>>> CFLAGS=-D_REENTRANT CPPFLAGS=-D_REENTRANT >>>>>>>> to your configure command line >>>>>>> >>>>>>> I added "-D_REENTRANT" to my command. >>>>>>> >>>>>>> ../openmpi-dev-124-g91e9686/configure >>>>>>> --prefix=/usr/local/openmpi-1.9.0_64_gcc \ >>>>>>> --libdir=/usr/local/openmpi-1.9.0_64_gcc/lib64 \ >>>>>>> --with-jdk-bindir=/usr/local/jdk1.8.0/bin \ >>>>>>> --with-jdk-headers=/usr/local/jdk1.8.0/include \ >>>>>>> JAVA_HOME=/usr/local/jdk1.8.0 \ >>>>>>> LDFLAGS="-m64" CC="gcc" CXX="g++" FC="gfortran" \ >>>>>>> CFLAGS="-m64 -D_REENTRANT" CXXFLAGS="-m64" FCFLAGS="-m64" \ >>>>>>> CPP="cpp" CXXCPP="cpp" \ >>>>>>> CPPFLAGS="-D_REENTRANT" CXXCPPFLAGS="" \ >>>>>>> --enable-mpi-cxx \ >>>>>>> --enable-cxx-exceptions \ >>>>>>> --enable-mpi-java \ >>>>>>> --enable-heterogeneous \ >>>>>>> --enable-mpi-thread-multiple \ >>>>>>> --with-threads=posix \ >>>>>>> --with-hwloc=internal \ >>>>>>> --without-verbs \ >>>>>>> --with-wrapper-cflags="-std=c11 -m64" \ >>>>>>> --enable-debug \ >>>>>>> |& tee log.configure.$SYSTEM_ENV.$MACHINE_ENV.64_gcc >>>>>>> >>>>>>> I use Java 8. >>>>>>> >>>>>>> tyr openmpi-1.9 112 java -version >>>>>>> java version "1.8.0" >>>>>>> Java(TM) SE Runtime Environment (build 1.8.0-b132) >>>>>>> Java HotSpot(TM) 64-Bit Server VM (build 25.0-b70, mixed mode) >>>>>>> tyr openmpi-1.9 113 >>>>>>> >>>>>>> Unfortunately I still get a SIGSEGV with openmpi-dev-124-g91e9686. >>>>>>> I have applied your patch and will try to debug my small Java >>>>>>> program tomorrow or next week and then let you know the result. >>>>>> >>>>>> _______________________________________________ >>>>>> users mailing list >>>>>> us...@open-mpi.org >>>>>> Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/users >>>>>> Link to this post: >>>>>> http://www.open-mpi.org/community/lists/users/2014/10/25581.php >>>>> >>>>> _______________________________________________ >>>>> users mailing list >>>>> us...@open-mpi.org >>>>> Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/users >>>>> Link to this post: >>>>> http://www.open-mpi.org/community/lists/users/2014/10/25582.php >>>> >>> >>> _______________________________________________ >>> users mailing list >>> us...@open-mpi.org >>> Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/users >>> Link to this post: >>> http://www.open-mpi.org/community/lists/users/2014/10/25584.php >> >> _______________________________________________ >> users mailing list >> us...@open-mpi.org >> Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/users >> Link to this post: >> http://www.open-mpi.org/community/lists/users/2014/10/25585.php > >_______________________________________________ >users mailing list >us...@open-mpi.org >Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/users >Link to this post: >http://www.open-mpi.org/community/lists/users/2014/10/25586.php