Hi Gilles,

> From the jvm logs, there is an alignment error in native_get_attr
> but i could not find it by reading the source code.
> 
> Could you please do
> ulimit -c unlimited
> mpiexec ...
> and then
> gdb <your path to java>/bin/java core
> And run bt on all threads until you get a line number in native_get_attr

I found pmix_native.c:1131 in native_get_attr, attached gdb to the
Java process and set a breakpoint to this line. From there I single
stepped until I got SIGSEGV, so that you can see what happened.


(gdb) b pmix_native.c:1131
No source file named pmix_native.c.
Make breakpoint pending on future shared library load? (y or [n]) y

Breakpoint 1 (pmix_native.c:1131) pending.
(gdb) thread 14
[Switching to thread 14 (Thread 2 (LWP 2))]
#0  0xffffffff7eadc6b0 in __pollsys () from /lib/sparcv9/libc.so.1
(gdb) f 3
#3  0xfffffffee5122230 in JNI_OnLoad (vm=0xffffffff7e57e9d8 <main_vm>, 
    reserved=0x0)
    at ../../../../../openmpi-dev-178-ga16c1e4/ompi/mpi/java/c/mpi_MPI.c:128
128             while (_dbg) poll(NULL, 0, 1);
(gdb) set _dbg=0
(gdb) c
Continuing.
[New LWP    13        ]

Breakpoint 1, native_get_attr (attr=0xfffffffee2e05db0 "pmix.jobid", 
    kv=0xffffffff7b4ff028)
    at 
../../../../../openmpi-dev-178-ga16c1e4/opal/mca/pmix/native/pmix_native.c:1131
1131            OPAL_OUTPUT_VERBOSE((1, 
opal_pmix_base_framework.framework_output,
(gdb) s
opal_proc_local_get () at ../../../openmpi-dev-178-ga16c1e4/opal/util/proc.c:80
80          return opal_proc_my_name;
(gdb) 
81      }
(gdb) 
_process_name_print_for_opal (procname=14259803799433510912)
    at ../../openmpi-dev-178-ga16c1e4/orte/runtime/orte_init.c:64
64          orte_process_name_t* rte_name = (orte_process_name_t*)&procname;
(gdb) 
65          return ORTE_NAME_PRINT(rte_name);
(gdb) 
orte_util_print_name_args (name=0xffffffff7b4feb90)
    at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:122
122         if (NULL == name) {
(gdb) 
142         job = orte_util_print_jobids(name->jobid);
(gdb) 
orte_util_print_jobids (job=3320119297)
    at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:170
170         ptr = get_print_name_buffer();
(gdb) 
get_print_name_buffer ()
    at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:92
92          if (!fns_init) {
(gdb) 
101         ret = opal_tsd_getspecific(print_args_tsd_key, (void**)&ptr);
(gdb) 
opal_tsd_getspecific (key=4, valuep=0xffffffff7b4fe8a0)
    at ../../openmpi-dev-178-ga16c1e4/opal/threads/tsd.h:163
163         *valuep = pthread_getspecific(key);
(gdb) 
164         return OPAL_SUCCESS;
(gdb) 
165     }
(gdb) 
get_print_name_buffer ()
    at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:102
102         if (OPAL_SUCCESS != ret) return NULL;
(gdb) 
104         if (NULL == ptr) {
(gdb) 
113         return (orte_print_args_buffers_t*) ptr;
(gdb) 
114     }
(gdb) 
orte_util_print_jobids (job=3320119297)
    at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:172
172         if (NULL == ptr) {
(gdb) 
178         if (ORTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) {
(gdb) 
179             ptr->cntr = 0;
(gdb) 
182         if (ORTE_JOBID_INVALID == job) {
(gdb) 
184         } else if (ORTE_JOBID_WILDCARD == job) {
(gdb) 
187             tmp1 = ORTE_JOB_FAMILY((unsigned long)job);
(gdb) 
188             tmp2 = ORTE_LOCAL_JOBID((unsigned long)job);
(gdb) 
189             snprintf(ptr->buffers[ptr->cntr++], 
(gdb) 
193         return ptr->buffers[ptr->cntr-1];
(gdb) 
194     }
(gdb) 
orte_util_print_name_args (name=0xffffffff7b4feb90)
    at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:143
143         vpid = orte_util_print_vpids(name->vpid);
(gdb) 
orte_util_print_vpids (vpid=0)
    at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:260
260         ptr = get_print_name_buffer();
(gdb) 
get_print_name_buffer ()
    at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:92
92          if (!fns_init) {
(gdb) 
101         ret = opal_tsd_getspecific(print_args_tsd_key, (void**)&ptr);
(gdb) 
opal_tsd_getspecific (key=4, valuep=0xffffffff7b4fe8b0)
    at ../../openmpi-dev-178-ga16c1e4/opal/threads/tsd.h:163
163         *valuep = pthread_getspecific(key);
(gdb) 
164         return OPAL_SUCCESS;
(gdb) 
165     }
(gdb) 
get_print_name_buffer ()
    at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:102
102         if (OPAL_SUCCESS != ret) return NULL;
(gdb) 
104         if (NULL == ptr) {
(gdb) 
113         return (orte_print_args_buffers_t*) ptr;
(gdb) 
114     }
(gdb) 
orte_util_print_vpids (vpid=0)
    at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:262
262         if (NULL == ptr) {
(gdb) 
268         if (ORTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) {
(gdb) 
272         if (ORTE_VPID_INVALID == vpid) {
(gdb) 
274         } else if (ORTE_VPID_WILDCARD == vpid) {
(gdb) 
277             snprintf(ptr->buffers[ptr->cntr++], 
(gdb) 
281         return ptr->buffers[ptr->cntr-1];
(gdb) 
282     }
(gdb) 
orte_util_print_name_args (name=0xffffffff7b4feb90)
    at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:146
146         ptr = get_print_name_buffer();
(gdb) 
get_print_name_buffer ()
    at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:92
92          if (!fns_init) {
(gdb) 
101         ret = opal_tsd_getspecific(print_args_tsd_key, (void**)&ptr);
(gdb) 
opal_tsd_getspecific (key=4, valuep=0xffffffff7b4fe970)
    at ../../openmpi-dev-178-ga16c1e4/opal/threads/tsd.h:163
163         *valuep = pthread_getspecific(key);
(gdb) 
164         return OPAL_SUCCESS;
(gdb) 
165     }
(gdb) 
get_print_name_buffer ()
    at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:102
102         if (OPAL_SUCCESS != ret) return NULL;
(gdb) 
104         if (NULL == ptr) {
(gdb) 
113         return (orte_print_args_buffers_t*) ptr;
(gdb) 
114     }
(gdb) 
orte_util_print_name_args (name=0xffffffff7b4feb90)
    at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:148
148         if (NULL == ptr) {
(gdb) 
154         if (ORTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) {
(gdb) 
158         snprintf(ptr->buffers[ptr->cntr++], 
(gdb) 
162         return ptr->buffers[ptr->cntr-1];
(gdb) 
163     }
(gdb) 
_process_name_print_for_opal (procname=14259803799433510912)
    at ../../openmpi-dev-178-ga16c1e4/orte/runtime/orte_init.c:66
66      }
(gdb) 

Program received signal SIGSEGV, Segmentation fault.
0xfffffffee3210bfc in native_get_attr (attr=0xfffffffee2e05db0 "pmix.jobid", 
    kv=0xffffffff7b4ff028)
    at 
../../../../../openmpi-dev-178-ga16c1e4/opal/mca/pmix/native/pmix_native.c:1131
1131            OPAL_OUTPUT_VERBOSE((1, 
opal_pmix_base_framework.framework_output,
(gdb) bt
#0  0xfffffffee3210bfc in native_get_attr (
    attr=0xfffffffee2e05db0 "pmix.jobid", kv=0xffffffff7b4ff028)
    at 
../../../../../openmpi-dev-178-ga16c1e4/opal/mca/pmix/native/pmix_native.c:1131
#1  0xfffffffee2e033e4 in rte_init ()
    at 
../../../../../openmpi-dev-178-ga16c1e4/orte/mca/ess/pmi/ess_pmi_module.c:170
#2  0xfffffffee4a340c0 in orte_init (pargc=0x0, pargv=0x0, flags=32)
    at ../../openmpi-dev-178-ga16c1e4/orte/runtime/orte_init.c:239
#3  0xfffffffee4d9a164 in ompi_mpi_init (argc=0, argv=0x1003f5850, 
    requested=0, provided=0xffffffff7b4ff44c)
    at ../../openmpi-dev-178-ga16c1e4/ompi/runtime/ompi_mpi_init.c:480
#4  0xfffffffee4dfbb30 in PMPI_Init (argc=0xffffffff7b4ff554, 
    argv=0xffffffff7b4ff548) at pinit.c:84
#5  0xfffffffee5122f6c in Java_mpi_MPI_Init_1jni (env=0x10010e9e0, 
    clazz=0xffffffff7b4ff760, argv=0xffffffff7b4ff858)
    at ../../../../../openmpi-dev-178-ga16c1e4/ompi/mpi/java/c/mpi_MPI.c:271
#6  0xffffffff6b810738 in ?? ()
#7  0xffffffff6b810738 in ?? ()
Backtrace stopped: previous frame identical to this frame (corrupt stack?)
(gdb) 



Hopefully the above output is helpful. Please let me know if you
need something else.

Kind regards

Siegmar



> Siegmar Gross <siegmar.gr...@informatik.hs-fulda.de> wrote:
> >Hi,
> >
> >today I installed openmpi-dev-178-ga16c1e4 on Solaris 10 Sparc
> >with gcc-4.9.1 and Java 8. Now a very simple Java program works
> >as expected, but other Java programs still break. I removed the
> >warnings about "shmem.jar" and used the following configure
> >command.
> >
> >tyr openmpi-dev-178-ga16c1e4-SunOS.sparc.64_gcc 406 head config.log \
> >  | grep openmpi
> >$ ../openmpi-dev-178-ga16c1e4/configure
> >  --prefix=/usr/local/openmpi-1.9.0_64_gcc
> >  --libdir=/usr/local/openmpi-1.9.0_64_gcc/lib64
> >  --with-jdk-bindir=/usr/local/jdk1.8.0/bin
> >  --with-jdk-headers=/usr/local/jdk1.8.0/include
> >  JAVA_HOME=/usr/local/jdk1.8.0
> >  LDFLAGS=-m64 CC=gcc CXX=g++ FC=gfortran CFLAGS=-m64 -D_REENTRANT
> >  CXXFLAGS=-m64 FCFLAGS=-m64 CPP=cpp CXXCPP=cpp
> >  CPPFLAGS= -D_REENTRANT CXXCPPFLAGS=
> >  --enable-mpi-cxx --enable-cxx-exceptions --enable-mpi-java
> >  --enable-mpi-thread-multiple --with-threads=posix
> >  --with-hwloc=internal
> >  --without-verbs --with-wrapper-cflags=-std=c11 -m64
> >  --with-wrapper-cxxflags=-m64 --enable-debug
> >
> >
> >tyr java 290 ompi_info | grep -e "Open MPI repo revision:" -e "C compiler 
> >version:"
> >  Open MPI repo revision: dev-178-ga16c1e4
> >      C compiler version: 4.9.1
> >
> >
> >
> >> > regarding the BUS error reported by Siegmar, i also commited
> >> > 62bde1fcb554079143030bb305512c236672386f
> >> > in order to fix it (this is based on code review only, i have no sparc64
> >> > hardware to test it is enough)
> >> 
> >> I'll test it, when a new nightly snapshot is available for the trunk.
> >
> >
> >tyr java 291 mpijavac InitFinalizeMain.java 
> >tyr java 292 mpiexec -np 1 java InitFinalizeMain
> >Hello!
> >
> >tyr java 293 mpijavac BcastIntMain.java 
> >tyr java 294 mpiexec -np 2 java BcastIntMain
> >#
> ># A fatal error has been detected by the Java Runtime Environment:
> >#
> >#  SIGBUS (0xa) at pc=0xfffffffee3210bfc, pid=24792, tid=2
> >...
> >
> >
> >
> >tyr java 296 /usr/local/gdb-7.6.1_64_gcc/bin/gdb mpiexec
> >...
> >(gdb) run -np 2 java BcastIntMain
> >Starting program: /usr/local/openmpi-1.9.0_64_gcc/bin/mpiexec -np 2 java 
> >BcastIntMain
> >[Thread debugging using libthread_db enabled]
> >[New Thread 1 (LWP 1)]
> >[New LWP    2        ]
> >#
> ># A fatal error has been detected by the Java Runtime Environment:
> >#
> >#  SIGBUS (0xa) at pc=0xfffffffee3210bfc, pid=24814, tid=2
> >#
> ># JRE version: Java(TM) SE Runtime Environment (8.0-b132) (build 1.8.0-b132)
> ># Java VM: Java HotSpot(TM) 64-Bit Server VM (25.0-b70 mixed mode 
> >solaris-sparc compressed oops)
> ># Problematic frame:
> ># C  [mca_pmix_native.so+0x10bfc]  native_get_attr+0x3000
> >#
> ># Failed to write core dump. Core dumps have been disabled. To enable core 
> >dumping, try "ulimit -c unlimited" 
before starting Java again
> >#
> ># An error report file with more information is saved as:
> ># /home/fd1026/work/skripte/master/parallel/prog/mpi/java/hs_err_pid24814.log
> >#
> ># A fatal error has been detected by the Java Runtime Environment:
> >#
> >#  SIGBUS (0xa) at pc=0xfffffffee3210bfc, pid=24812, tid=2
> >#
> ># JRE version: Java(TM) SE Runtime Environment (8.0-b132) (build 1.8.0-b132)
> ># Java VM: Java HotSpot(TM) 64-Bit Server VM (25.0-b70 mixed mode 
> >solaris-sparc compressed oops)
> ># Problematic frame:
> ># C  [mca_pmix_native.so+0x10bfc]  native_get_attr+0x3000
> >#
> ># Failed to write core dump. Core dumps have been disabled. To enable core 
> >dumping, try "ulimit -c unlimited" 
before starting Java again
> >#
> ># An error report file with more information is saved as:
> ># /home/fd1026/work/skripte/master/parallel/prog/mpi/java/hs_err_pid24812.log
> >#
> ># If you would like to submit a bug report, please visit:
> >#   http://bugreport.sun.com/bugreport/crash.jsp
> ># The crash happened outside the Java Virtual Machine in native code.
> ># See problematic frame for where to report the bug.
> >#
> >[tyr:24814] *** Process received signal ***
> >[tyr:24814] Signal: Abort (6)
> >[tyr:24814] Signal code:  (-1)
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0.0.0:opal_backtrace_print+0x2c
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0.0.0:0xdc2d4
> >/lib/sparcv9/libc.so.1:0xd8b98
> >/lib/sparcv9/libc.so.1:0xcc70c
> >/lib/sparcv9/libc.so.1:0xcc918
> >/lib/sparcv9/libc.so.1:0xdd2d0 [ Signal 6 (ABRT)]
> >/lib/sparcv9/libc.so.1:_thr_sigsetmask+0x1c4
> >/lib/sparcv9/libc.so.1:sigprocmask+0x28
> >/lib/sparcv9/libc.so.1:_sigrelse+0x5c
> >/lib/sparcv9/libc.so.1:abort+0xc0
> >/export2/prog/SunOS_sparc/jdk1.8.0/jre/lib/sparcv9/server/libjvm.so:0xb3cb90
> >/export2/prog/SunOS_sparc/jdk1.8.0/jre/lib/sparcv9/server/libjvm.so:0xd97a04
> >/export2/prog/SunOS_sparc/jdk1.8.0/jre/lib/sparcv9/server/libjvm.so:JVM_handle_solaris_signal+0xc0c
> >/export2/prog/SunOS_sparc/jdk1.8.0/jre/lib/sparcv9/server/libjvm.so:0xb44e84
> >/lib/sparcv9/libc.so.1:0xd8b98
> >/lib/sparcv9/libc.so.1:0xcc70c
> >/lib/sparcv9/libc.so.1:0xcc918
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/openmpi/mca_pmix_native.so:0x10bfc
> > [ Signal 10 (BUS)]
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/openmpi/mca_ess_pmi.so:0x33dc
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-rte.so.0.0.0:orte_init+0x67c
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi.so.0.0.0:ompi_mpi_init+0x374
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi.so.0.0.0:PMPI_Init+0x2a8
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi_java.so.0.0.0:Java_mpi_MPI_Init_1jni+0x1a0
> >0xffffffff6b810730
> >0xffffffff6b8106d4
> >0xffffffff6b8078a8
> >0xffffffff6b8078a8
> >0xffffffff6b80024c
> >/export2/prog/SunOS_sparc/jdk1.8.0/jre/lib/sparcv9/server/libjvm.so:0x6fd4e8
> >/export2/prog/SunOS_sparc/jdk1.8.0/jre/lib/sparcv9/server/libjvm.so:0x79331c
> >/export2/prog/SunOS_sparc/jdk1.8.0/lib/sparcv9/jli/libjli.so:0x7290
> >/lib/sparcv9/libc.so.1:0xd8a6c
> >[tyr:24814] *** End of error message ***
> >--------------------------------------------------------------------------
> >mpiexec noticed that process rank 1 with PID 0 on node tyr exited on signal 
> >6 (Abort).
> >--------------------------------------------------------------------------
> >[LWP    2         exited]
> >[New Thread 2        ]
> >[Switching to Thread 1 (LWP 1)]
> >sol_thread_fetch_registers: td_ta_map_id2thr: no thread can be found to 
> >satisfy query
> >(gdb) bt
> >#0  0xffffffff7f6173d0 in rtld_db_dlactivity () from /usr/lib/sparcv9/ld.so.1
> >#1  0xffffffff7f6175a8 in rd_event () from /usr/lib/sparcv9/ld.so.1
> >#2  0xffffffff7f618950 in lm_delete () from /usr/lib/sparcv9/ld.so.1
> >#3  0xffffffff7f6226bc in remove_so () from /usr/lib/sparcv9/ld.so.1
> >#4  0xffffffff7f624574 in remove_hdl () from /usr/lib/sparcv9/ld.so.1
> >#5  0xffffffff7f61d97c in dlclose_core () from /usr/lib/sparcv9/ld.so.1
> >#6  0xffffffff7f61d9d4 in dlclose_intn () from /usr/lib/sparcv9/ld.so.1
> >#7  0xffffffff7f61db0c in dlclose () from /usr/lib/sparcv9/ld.so.1
> >#8  0xffffffff7ec87ca0 in vm_close ()
> >   from /usr/local/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0
> >#9  0xffffffff7ec85274 in lt_dlclose ()
> >   from /usr/local/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0
> >#10 0xffffffff7ecaa5dc in ri_destructor (obj=0x100187b70)
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_component_repository.c:382
> >#11 0xffffffff7eca8fd8 in opal_obj_run_destructors (object=0x100187b70)
> >    at ../../../../openmpi-dev-178-ga16c1e4/opal/class/opal_object.h:446
> >#12 0xffffffff7eca9eac in mca_base_component_repository_release (
> >    component=0xffffffff7b1236f0 <mca_oob_tcp_component>)
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_component_repository.c:240
> >#13 0xffffffff7ecac17c in mca_base_component_unload (
> >    component=0xffffffff7b1236f0 <mca_oob_tcp_component>, output_id=-1)
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:47
> >#14 0xffffffff7ecac210 in mca_base_component_close (
> >    component=0xffffffff7b1236f0 <mca_oob_tcp_component>, output_id=-1)
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:60
> >#15 0xffffffff7ecac2e4 in mca_base_components_close (output_id=-1, 
> >    components=0xffffffff7f14bc58 <orte_oob_base_framework+80>, skip=0x0)
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:86
> >#16 0xffffffff7ecac24c in mca_base_framework_components_close (
> >    framework=0xffffffff7f14bc08 <orte_oob_base_framework>, skip=0x0)
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:66
> >#17 0xffffffff7efcaf80 in orte_oob_base_close ()
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/orte/mca/oob/base/oob_base_frame.c:112
> >#18 0xffffffff7ecc0d74 in mca_base_framework_close (
> >    framework=0xffffffff7f14bc08 <orte_oob_base_framework>)
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_framework.c:187
> >#19 0xffffffff7be07858 in rte_finalize ()
> >    at 
> > ../../../../../openmpi-dev-178-ga16c1e4/orte/mca/ess/hnp/ess_hnp_module.c:857
> >#20 0xffffffff7ef338bc in orte_finalize ()
> >    at ../../openmpi-dev-178-ga16c1e4/orte/runtime/orte_finalize.c:66
> >#21 0x000000010000723c in orterun (argc=5, argv=0xffffffff7fffe0d8)
> >    at ../../../../openmpi-dev-178-ga16c1e4/orte/tools/orterun/orterun.c:1103
> >#22 0x0000000100003e80 in main (argc=5, argv=0xffffffff7fffe0d8)
> >---Type <return> to continue, or q <return> to quit---
> >    at ../../../../openmpi-dev-178-ga16c1e4/orte/tools/orterun/main.c:13
> >(gdb) 
> >
> >
> >
> >
> >I get the same error for C programs, if they use more than
> >MPI_Init and MPI_Finalize.
> >
> >tyr small_prog 301 mpicc init_finalize.c 
> >tyr small_prog 302 mpiexec -np 1 a.out
> >Hello!
> >tyr small_prog 303 mpicc column_int.c 
> >tyr small_prog 306 /usr/local/gdb-7.6.1_64_gcc/bin/gdb mpiexec
> >...
> >(gdb) run -np 4 a.out
> >Starting program: /usr/local/openmpi-1.9.0_64_gcc/bin/mpiexec -np 4 a.out
> >[Thread debugging using libthread_db enabled]
> >[New Thread 1 (LWP 1)]
> >[New LWP    2        ]
> >[tyr:24880] *** Process received signal ***
> >[tyr:24880] Signal: Bus Error (10)
> >[tyr:24880] Signal code: Invalid address alignment (1)
> >[tyr:24880] Failing at address: ffffffff7bd1c10c
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0.0.0:opal_backtrace_print+0x2c
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0.0.0:0xdc2d4
> >/lib/sparcv9/libc.so.1:0xd8b98
> >/lib/sparcv9/libc.so.1:0xcc70c
> >/lib/sparcv9/libc.so.1:0xcc918
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/openmpi/mca_pmix_native.so:0x10684
> > [ Signal 10 (BUS)]
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/openmpi/mca_ess_pmi.so:0x33dc
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-rte.so.0.0.0:orte_init+0x67c
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi.so.0.0.0:ompi_mpi_init+0x374
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi.so.0.0.0:PMPI_Init+0x2a8
> >/home/fd1026/work/skripte/master/parallel/prog/mpi/small_prog/a.out:main+0x20
> >/home/fd1026/work/skripte/master/parallel/prog/mpi/small_prog/a.out:_start+0x7c
> >[tyr:24880] *** End of error message ***
> >[tyr:24876] *** Process received signal ***
> >[tyr:24876] Signal: Bus Error (10)
> >[tyr:24876] Signal code: Invalid address alignment (1)
> >[tyr:24876] Failing at address: ffffffff7bd1c10c
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0.0.0:opal_backtrace_print+0x2c
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0.0.0:0xdc2d4
> >/lib/sparcv9/libc.so.1:0xd8b98
> >/lib/sparcv9/libc.so.1:0xcc70c
> >/lib/sparcv9/libc.so.1:0xcc918
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/openmpi/mca_pmix_native.so:0x10684
> > [ Signal 10 (BUS)]
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/openmpi/mca_ess_pmi.so:0x33dc
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-rte.so.0.0.0:orte_init+0x67c
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi.so.0.0.0:ompi_mpi_init+0x374
> >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi.so.0.0.0:PMPI_Init+0x2a8
> >/home/fd1026/work/skripte/master/parallel/prog/mpi/small_prog/a.out:main+0x20
> >/home/fd1026/work/skripte/master/parallel/prog/mpi/small_prog/a.out:_start+0x7c
> >[tyr:24876] *** End of error message ***
> >--------------------------------------------------------------------------
> >mpiexec noticed that process rank 2 with PID 0 on node tyr exited on signal 
> >10 (Bus Error).
> >--------------------------------------------------------------------------
> >[LWP    2         exited]
> >[New Thread 2        ]
> >[Switching to Thread 1 (LWP 1)]
> >sol_thread_fetch_registers: td_ta_map_id2thr: no thread can be found to 
> >satisfy query
> >(gdb) bt
> >#0  0xffffffff7f6173d0 in rtld_db_dlactivity () from /usr/lib/sparcv9/ld.so.1
> >#1  0xffffffff7f6175a8 in rd_event () from /usr/lib/sparcv9/ld.so.1
> >#2  0xffffffff7f618950 in lm_delete () from /usr/lib/sparcv9/ld.so.1
> >#3  0xffffffff7f6226bc in remove_so () from /usr/lib/sparcv9/ld.so.1
> >#4  0xffffffff7f624574 in remove_hdl () from /usr/lib/sparcv9/ld.so.1
> >#5  0xffffffff7f61d97c in dlclose_core () from /usr/lib/sparcv9/ld.so.1
> >#6  0xffffffff7f61d9d4 in dlclose_intn () from /usr/lib/sparcv9/ld.so.1
> >#7  0xffffffff7f61db0c in dlclose () from /usr/lib/sparcv9/ld.so.1
> >#8  0xffffffff7ec87ca0 in vm_close ()
> >   from /usr/local/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0
> >#9  0xffffffff7ec85274 in lt_dlclose ()
> >   from /usr/local/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0
> >#10 0xffffffff7ecaa5dc in ri_destructor (obj=0x100187ae0)
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_component_repository.c:382
> >#11 0xffffffff7eca8fd8 in opal_obj_run_destructors (object=0x100187ae0)
> >    at ../../../../openmpi-dev-178-ga16c1e4/opal/class/opal_object.h:446
> >#12 0xffffffff7eca9eac in mca_base_component_repository_release (
> >    component=0xffffffff7b0236f0 <mca_oob_tcp_component>)
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_component_repository.c:240
> >#13 0xffffffff7ecac17c in mca_base_component_unload (
> >    component=0xffffffff7b0236f0 <mca_oob_tcp_component>, output_id=-1)
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:47
> >#14 0xffffffff7ecac210 in mca_base_component_close (
> >    component=0xffffffff7b0236f0 <mca_oob_tcp_component>, output_id=-1)
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:60
> >#15 0xffffffff7ecac2e4 in mca_base_components_close (output_id=-1, 
> >    components=0xffffffff7f14bc58 <orte_oob_base_framework+80>, skip=0x0)
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:86
> >#16 0xffffffff7ecac24c in mca_base_framework_components_close (
> >    framework=0xffffffff7f14bc08 <orte_oob_base_framework>, skip=0x0)
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:66
> >#17 0xffffffff7efcaf80 in orte_oob_base_close ()
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/orte/mca/oob/base/oob_base_frame.c:112
> >#18 0xffffffff7ecc0d74 in mca_base_framework_close (
> >    framework=0xffffffff7f14bc08 <orte_oob_base_framework>)
> >    at 
> > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_framework.c:187
> >#19 0xffffffff7bd07858 in rte_finalize ()
> >    at 
> > ../../../../../openmpi-dev-178-ga16c1e4/orte/mca/ess/hnp/ess_hnp_module.c:857
> >#20 0xffffffff7ef338bc in orte_finalize ()
> >    at ../../openmpi-dev-178-ga16c1e4/orte/runtime/orte_finalize.c:66
> >#21 0x000000010000723c in orterun (argc=4, argv=0xffffffff7fffe0e8)
> >    at ../../../../openmpi-dev-178-ga16c1e4/orte/tools/orterun/orterun.c:1103
> >#22 0x0000000100003e80 in main (argc=4, argv=0xffffffff7fffe0e8)
> >    at ../../../../openmpi-dev-178-ga16c1e4/orte/tools/orterun/main.c:13
> >(gdb) 
> >
> >
> >
> >Do you need any other information?
> >
> >
> >Kind regards
> >
> >Siegmar
> 

Reply via email to