Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO
Hi Gilles, The correction was to explicitly make gcc and g++ the compiler on the NERSC system. export CC=gcc export CXX=g++ The build now works correctly. Cheers, Henry From: users on behalf of Lovelace III, Henry Sent: Friday, July 27, 2018 2:20:18 PM To: Open MPI Users Subject: Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO Hi Gilles, If I understand the instruction correctly by invoking the -showme command to give addition information. I did use the -showme command, however the files that are being compiled are C/C++ files and the header files. cmake_file~~ set (EXENAME ansga2) set (SRC_FILES apisa/nsga2/nsga2.cpp apisa/nsga2/nsga2_functions.cpp apisa/nsga2/nsga2.hpp apisa/nsga2/nsga2_io.cpp ) ~~~ ~SHOWME~~ 10:42 AM->hlovelac:nsga2 ../../../production/bin/mpicc -showme nsga2.cpp gcc nsga2.cpp -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread -Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi 10:42 AM->hlovelac:nsga2 ../../../production/bin/mpicc -showme nsga2_functions.cpp gcc nsga2_functions.cpp -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread -Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi 10:43 AM->hlovelac:nsga2 ../../../production/bin/mpicc -showme nsga2.hpp gcc nsga2.hpp -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread -Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi 10:44 AM->hlovelac:nsga2 ../../../production/bin/mpicc -showme nsga2_io.cpp gcc nsga2_io.cpp -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread -Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi 11:14 AM->hlovelac:nsga2 ../../../production/bin/mpifort -showme nsga2.cpp gfortran nsga2.cpp -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi 11:15 AM->hlovelac:nsga2 ../../../production/bin/mpifort -showme nsga2_functions.cpp gfortran nsga2_functions.cpp -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi 11:15 AM->hlovelac:nsga2 ../../../production/bin/mpifort -showme nsga2.hpp gfortran nsga2.hpp -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi 11:15 AM->hlovelac:nsga2 ../../../production/bin/mpifort -showme nsga2_io.cpp gfortran nsga2_io.cpp -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi cmake_file~~ set (EXENAME aspea2) set (SRC_FILES apisa/spea2/spea2.cpp apisa/spea2/spea2_functions.cpp apisa/spea2/spea2.hpp apisa/spea2/spea2_io.cpp ) ~~~ ~SHOWME~~ 10:50 AM->hlovelac:spea2 ../../../production/bin/mpicc -showme spea2.cpp gcc spea2.cpp -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread -Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi 10:50 AM->hlovelac:spea2 ../../../production/bin/mpicc -showme spea2_functions.cpp gcc spea2_functions.cpp -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_07
Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO
hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi 10:51 AM->hlovelac:spea2 ../../../production/bin/mpicc -showme spea2_io.cpp gcc spea2_io.cpp -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread -Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi 11:16 AM->hlovelac:spea2 ../../../production/bin/mpifort -showme spea2.cpp gfortran spea2.cpp -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi 11:17 AM->hlovelac:spea2 ../../../production/bin/mpifort -showme spea2_functions.cpp gfortran spea2_functions.cpp -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi 11:17 AM->hlovelac:spea2 ../../../production/bin/mpifort -showme spea2.hpp gfortran spea2.hpp -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi 11:17 AM->hlovelac:spea2 ../../../production/bin/mpifort -showme spea2_io.cpp gfortran spea2_io.cpp -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi Does the -showme show errors? The distributions was updated that is the reason behind the directory change. Locally I have no problem with the build. Thanks, Henry From: users on behalf of gil...@rist.or.jp Sent: Tuesday, July 24, 2018 10:00:51 PM To: Open MPI Users Subject: Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO Henry, First, you could/should use mpicc instead of the cc cray compiler I also noted gfortran Linker Flags : -pthread -I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/lib -Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/lib -lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi;-lX11 It should be '-lmpi -lX11' instead of '-lmpi;-lX11' The linker flags suggest Open MPI is installed in /global/homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/lib, but your LD_LIBRARY_PATH suggests it is in $HOME/BMAD/bmad_dist_2018_0717/production/lib (note 0724 vs 0717) Also, keep in mind LD_LIBRARY_PATH is only used at runtime in order to resolve dependencies. The linker does *not* use LD_LIBRARY_PATH. IIRC, it uses LIBRARY_PATH, but the preferred way is to use the -L argument. If your problem persists, I suggest you get the full command line that is failing. (It should invoke mpifort instead gfortran or cc). Then you can copy/paste the mpifort command, add the -showme parameter, and run it manually so we can understand what is really hapenning under the(cmake) hood. Cheers, Gilles - Original Message - Hi, I am receiving these errors when building with OpenMPI on the NERSC system. Building directory: util_programs -- The C compiler identification is GNU 7.1.0 -- The CXX compiler identification is GNU 7.1.0 -- Cray Programming Environment 2.5.12 C -- Check for working C compiler: /opt/cray/pe/craype/2.5.12/bin/cc -- Check for working C compiler: /opt/cray/pe/craype/2.5.12/bin/cc -- works -- Detecting C compiler ABI info -- Detecting C compiler ABI info - done -- Detecting C compile features -- Detecting C compile features - done -- Cray Programming Environment 2.5.12 CXX -- Check for working CXX compiler: /opt/cray/pe/craype/2.5.12/bin/CC -- Check for working CXX compiler: /opt/cray/pe/craype/2.5.12/bin/CC -- works -- Detecting CXX compiler ABI info -- Detecting CXX compiler ABI info - done -- Detecting CXX compile features -- Detecting CXX
Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO
Henry, First, you could/should use mpicc instead of the cc cray compiler I also noted gfortran Linker Flags : -pthread -I/global/homes/h/hlovelac/BMAD/bmad_ dist_2018_0724/production/lib -Wl,-rpath -Wl,/global/homes/h/hlovelac/ BMAD/bmad_dist_2018_0724/production/lib -Wl,--enable-new-dtags -L/global /homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/lib -lmpi_ usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi;-lX11 It should be '-lmpi -lX11' instead of '-lmpi;-lX11' The linker flags suggest Open MPI is installed in /global/homes/h/ hlovelac/BMAD/bmad_dist_2018_0724/production/lib, but your LD_LIBRARY_ PATH suggests it is in $HOME/BMAD/bmad_dist_2018_0717/production/lib (note 0724 vs 0717) Also, keep in mind LD_LIBRARY_PATH is only used at runtime in order to resolve dependencies. The linker does *not* use LD_LIBRARY_PATH. IIRC, it uses LIBRARY_PATH, but the preferred way is to use the -L argument. If your problem persists, I suggest you get the full command line that is failing. (It should invoke mpifort instead gfortran or cc). Then you can copy/ paste the mpifort command, add the -showme parameter, and run it manually so we can understand what is really hapenning under the(cmake) hood. Cheers, Gilles - Original Message - Hi, I am receiving these errors when building with OpenMPI on the NERSC system. Building directory: util_programs -- The C compiler identification is GNU 7.1.0 -- The CXX compiler identification is GNU 7.1.0 -- Cray Programming Environment 2.5.12 C -- Check for working C compiler: /opt/cray/pe/craype/2.5.12/bin/cc -- Check for working C compiler: /opt/cray/pe/craype/2.5.12/bin/cc - - works -- Detecting C compiler ABI info -- Detecting C compiler ABI info - done -- Detecting C compile features -- Detecting C compile features - done -- Cray Programming Environment 2.5.12 CXX -- Check for working CXX compiler: /opt/cray/pe/craype/2.5.12/bin/CC -- Check for working CXX compiler: /opt/cray/pe/craype/2.5.12/bin/CC -- works -- Detecting CXX compiler ABI info -- Detecting CXX compiler ABI info - done -- Detecting CXX compile features -- Detecting CXX compile features - done -- The Fortran compiler identification is GNU 7.1.0 -- Check for working Fortran compiler: /global/homes/h/hlovelac/BMAD /bmad_dist_2018_0724/production/bin/mpifort -- Check for working Fortran compiler: /global/homes/h/hlovelac/BMAD /bmad_dist_2018_0724/production/bin/mpifort -- works -- Detecting Fortran compiler ABI info -- Detecting Fortran compiler ABI info - done -- Checking whether /global/homes/h/hlovelac/BMAD/bmad_dist_2018_ 0724/production/bin/mpifort supports Fortran 90 -- Checking whether /global/homes/h/hlovelac/BMAD/bmad_dist_2018_ 0724/production/bin/mpifort supports Fortran 90 -- yes Build type : Production Linking with release : /global/homes/h/hlovelac/BMAD/bmad_dist_2018_ 0724 (Off-site Distribution) C Compiler : /opt/cray/pe/craype/2.5.12/bin/cc Fortran Compiler : /global/homes/h/hlovelac/BMAD/bmad_dist_2018_ 0724/production/bin/mpifort Plotting Libraries : pgplot OpenMP Support : Not Enabled MPI Support : Enabled FFLAGS : gfortran Compiler Flags : -Df2cFortran -DCESR_UNIX -DCESR_LINUX -u - traceback -cpp -fno-range-check -fdollar-ok -fbacktrace -Bstatic -ffree- line-length-none -DCESR_PGPLOT -I/global/homes/h/hlovelac/BMAD/bmad_dist _2018_0724/production/include -pthread -I/global/homes/h/hlovelac/BMAD/ bmad_dist_2018_0724/production/lib -fPIC -O2 gfortran Linker Flags : -pthread -I/global/homes/h/hlovelac/BMAD/ bmad_dist_2018_0724/production/lib -Wl,-rpath -Wl,/global/homes/h/ hlovelac/BMAD/bmad_dist_2018_0724/production/lib -Wl,--enable-new-dtags -L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/lib -lmpi _usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi;-lX11 SHARED DEPS : -- Configuring done -- Generating done -- Build files have been written to: /global/homes/h/hlovelac/BMAD/ bmad_dist_2018_0724/util_programs/production Scanning dependencies of target compare_tracking_methods_text-exe Scanning dependencies of target compare_tracking_methods_plot-exe Scanning dependencies of target f77_to_f90-exe Scanning dependencies of target util_programs Scanning dependencies of target lattice_cleaner-exe Scanning dependencies of target bmad_to_gpt-exe Scanning dependencies of target bmad_to_mad_sad_and_xsif-exe Scanning dependencies of target sad_to_bmad_postprocess-exe Scanning dependencies of target aspea2-exe Scanning dependencies of target bmad_to_csrtrack-exe Scanning dependencies of target ansga2-exe Scanning dependencies of target bmad_to_blender-exe Scanning dependencies of target bmad_to_autocad-exe Scanning dependencies of target element_attributes-exe
Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO
%] Built target bmad_to_csrtrack-exe [ 77%] Linking Fortran executable /global/homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/bin/envelope_ibs [ 77%] Built target sad_to_bmad_postprocess-exe [ 77%] Built target compare_tracking_methods_plot-exe [ 79%] Building Fortran object CMakeFiles/util_programs.dir/modules/custom_dynamic_aperture_mod.f90.o [ 79%] Built target envelope_ibs-exe [ 84%] Building Fortran object CMakeFiles/util_programs.dir/modules/namelist_da.f90.o [ 84%] Building Fortran object CMakeFiles/util_programs.dir/modules/namelist_general.f90.o [ 84%] Building Fortran object CMakeFiles/util_programs.dir/modules/linear_aperture_mod.f90.o [ 86%] Building Fortran object CMakeFiles/util_programs.dir/modules/namelist_moga.f90.o [ 88%] Linking Fortran static library /global/homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/lib/libutil_programs.a [ 88%] Built target util_programs Makefile:83: recipe for target 'all' failed gmake: *** [all] Error 2 Error in distribution build. The build is not seeing the mpi libraries even though before the build I defined export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$HOME/BMAD/bmad_dist_2018_0717/production/lib Is this error from the MPIfort? Thanks, Henry From: users on behalf of Brian Smith Sent: Monday, July 23, 2018 2:06:14 PM To: Open MPI Users Subject: Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO On Sat, Jul 21, 2018 at 9:13 PM, Gilles Gouaillardet wrote: > Brian, > > As Ralph already stated, this is likely a hwloc API issue. > From debian9, you can > lstopo --of xml | ssh debian8 lstopo --if xml -i - > > that will likely confirm the API error. > > If you are willing to get a bit more details, you can add some printf > in opal_hwloc_unpack (from opal/mca/hwloc/base/hwloc_base_dt.c) to > figure out where exactly the failure occurs. > > Meanwhile, you can move forward by using the embedded hwloc on both > distros (--with-hwloc=internal or no --with-hwloc option at all). > > > Note we strongly discourage you configure --with-FOO=/usr > (it explicitly add /usr/include and /usr/lib[64] in the search path, > and might hide some other external libraries installed in a non > standard location). In order to force the external hwloc lib installed > in the default location, --with-hwloc=external is what you need (same > thing applies to libevent and pmix) Thank you for the advice. Removing --with-hwloc from the configure statement corrected the problem. > > > Cheers, > > Gilles > On Sun, Jul 22, 2018 at 7:52 AM r...@open-mpi.org wrote: >> >> More than likely the problem is the difference in hwloc versions - sounds >> like the topology to/from xml is different between the two versions, and the >> older one doesn’t understand the new one. >> >> > On Jul 21, 2018, at 12:04 PM, Brian Smith >> > wrote: >> > >> > Greetings, >> > >> > I'm having trouble getting openmpi 2.1.2 to work when launching a >> > process from debian 8 on a remote debian 9 host. To keep things simple >> > in this example, I'm just launching date on the remote host. >> > >> > deb8host$ mpirun -H deb9host date >> > [deb8host:01552] [[32763,0],0] ORTE_ERROR_LOG: Error in file >> > base/plm_base_launch_support.c at line 954 >> > >> > It works fine when executed from debian 9: >> > deb9host$ mpirun -H deb8host date >> > Sat Jul 21 13:40:43 CDT 2018 >> > >> > Also works when executed from debian 8 against debian 8: >> > deb8host:~$ mpirun -H deb8host2 date >> > Sat Jul 21 13:55:57 CDT 2018 >> > >> > The failure results from an error code returned by: >> > opal_dss.unpack(buffer, , , OPAL_HWLOC_TOPO) >> > >> > openmpi was built with the same configure flags on both hosts. >> > >> >--prefix=$(PREFIX) \ >> >--with-verbs \ >> >--with-libfabric \ >> >--disable-silent-rules \ >> >--with-hwloc=/usr \ >> >--with-libltdl=/usr \ >> >--with-devel-headers \ >> >--with-slurm \ >> >--with-sge \ >> >--without-tm \ >> >--disable-heterogeneous \ >> >--with-contrib-vt-flags=--disable-iotrace \ >> >--sysconfdir=$(PREFIX)/etc \ >> >--libdir=$(PREFIX)/lib\ >> >--includedir=$(PREFIX)/include >> > >> > >> > deb9host libhwloc and libhwloc-plugins is 1.11.5-1 >> > deb8host libhwloc and libhwloc-plugins is 1.10.0-3 >> > >> > I've been trying to debug this for the past few days and would &g
Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO
On Sat, Jul 21, 2018 at 9:13 PM, Gilles Gouaillardet wrote: > Brian, > > As Ralph already stated, this is likely a hwloc API issue. > From debian9, you can > lstopo --of xml | ssh debian8 lstopo --if xml -i - > > that will likely confirm the API error. > > If you are willing to get a bit more details, you can add some printf > in opal_hwloc_unpack (from opal/mca/hwloc/base/hwloc_base_dt.c) to > figure out where exactly the failure occurs. > > Meanwhile, you can move forward by using the embedded hwloc on both > distros (--with-hwloc=internal or no --with-hwloc option at all). > > > Note we strongly discourage you configure --with-FOO=/usr > (it explicitly add /usr/include and /usr/lib[64] in the search path, > and might hide some other external libraries installed in a non > standard location). In order to force the external hwloc lib installed > in the default location, --with-hwloc=external is what you need (same > thing applies to libevent and pmix) Thank you for the advice. Removing --with-hwloc from the configure statement corrected the problem. > > > Cheers, > > Gilles > On Sun, Jul 22, 2018 at 7:52 AM r...@open-mpi.org wrote: >> >> More than likely the problem is the difference in hwloc versions - sounds >> like the topology to/from xml is different between the two versions, and the >> older one doesn’t understand the new one. >> >> > On Jul 21, 2018, at 12:04 PM, Brian Smith >> > wrote: >> > >> > Greetings, >> > >> > I'm having trouble getting openmpi 2.1.2 to work when launching a >> > process from debian 8 on a remote debian 9 host. To keep things simple >> > in this example, I'm just launching date on the remote host. >> > >> > deb8host$ mpirun -H deb9host date >> > [deb8host:01552] [[32763,0],0] ORTE_ERROR_LOG: Error in file >> > base/plm_base_launch_support.c at line 954 >> > >> > It works fine when executed from debian 9: >> > deb9host$ mpirun -H deb8host date >> > Sat Jul 21 13:40:43 CDT 2018 >> > >> > Also works when executed from debian 8 against debian 8: >> > deb8host:~$ mpirun -H deb8host2 date >> > Sat Jul 21 13:55:57 CDT 2018 >> > >> > The failure results from an error code returned by: >> > opal_dss.unpack(buffer, , , OPAL_HWLOC_TOPO) >> > >> > openmpi was built with the same configure flags on both hosts. >> > >> >--prefix=$(PREFIX) \ >> >--with-verbs \ >> >--with-libfabric \ >> >--disable-silent-rules \ >> >--with-hwloc=/usr \ >> >--with-libltdl=/usr \ >> >--with-devel-headers \ >> >--with-slurm \ >> >--with-sge \ >> >--without-tm \ >> >--disable-heterogeneous \ >> >--with-contrib-vt-flags=--disable-iotrace \ >> >--sysconfdir=$(PREFIX)/etc \ >> >--libdir=$(PREFIX)/lib\ >> >--includedir=$(PREFIX)/include >> > >> > >> > deb9host libhwloc and libhwloc-plugins is 1.11.5-1 >> > deb8host libhwloc and libhwloc-plugins is 1.10.0-3 >> > >> > I've been trying to debug this for the past few days and would >> > appreciate any help on determining why this failure is occurring >> > and/or resolving the problem. >> > >> > -- >> > Brian T. Smith >> > System Fabric Works >> > Senior Technical Staff >> > bsm...@systemfabricworks.com >> > GPG Key: B3C2C7B73BA3CD7F >> > ___ >> > users mailing list >> > users@lists.open-mpi.org >> > https://lists.open-mpi.org/mailman/listinfo/users >> >> ___ >> users mailing list >> users@lists.open-mpi.org >> https://lists.open-mpi.org/mailman/listinfo/users > ___ > users mailing list > users@lists.open-mpi.org > https://lists.open-mpi.org/mailman/listinfo/users -- Brian T. Smith System Fabric Works Senior Technical Staff bsm...@systemfabricworks.com GPG Key: B3C2C7B73BA3CD7F ___ users mailing list users@lists.open-mpi.org https://lists.open-mpi.org/mailman/listinfo/users
Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO
Brian, As Ralph already stated, this is likely a hwloc API issue. From debian9, you can lstopo --of xml | ssh debian8 lstopo --if xml -i - that will likely confirm the API error. If you are willing to get a bit more details, you can add some printf in opal_hwloc_unpack (from opal/mca/hwloc/base/hwloc_base_dt.c) to figure out where exactly the failure occurs. Meanwhile, you can move forward by using the embedded hwloc on both distros (--with-hwloc=internal or no --with-hwloc option at all). Note we strongly discourage you configure --with-FOO=/usr (it explicitly add /usr/include and /usr/lib[64] in the search path, and might hide some other external libraries installed in a non standard location). In order to force the external hwloc lib installed in the default location, --with-hwloc=external is what you need (same thing applies to libevent and pmix) Cheers, Gilles On Sun, Jul 22, 2018 at 7:52 AM r...@open-mpi.org wrote: > > More than likely the problem is the difference in hwloc versions - sounds > like the topology to/from xml is different between the two versions, and the > older one doesn’t understand the new one. > > > On Jul 21, 2018, at 12:04 PM, Brian Smith > > wrote: > > > > Greetings, > > > > I'm having trouble getting openmpi 2.1.2 to work when launching a > > process from debian 8 on a remote debian 9 host. To keep things simple > > in this example, I'm just launching date on the remote host. > > > > deb8host$ mpirun -H deb9host date > > [deb8host:01552] [[32763,0],0] ORTE_ERROR_LOG: Error in file > > base/plm_base_launch_support.c at line 954 > > > > It works fine when executed from debian 9: > > deb9host$ mpirun -H deb8host date > > Sat Jul 21 13:40:43 CDT 2018 > > > > Also works when executed from debian 8 against debian 8: > > deb8host:~$ mpirun -H deb8host2 date > > Sat Jul 21 13:55:57 CDT 2018 > > > > The failure results from an error code returned by: > > opal_dss.unpack(buffer, , , OPAL_HWLOC_TOPO) > > > > openmpi was built with the same configure flags on both hosts. > > > >--prefix=$(PREFIX) \ > >--with-verbs \ > >--with-libfabric \ > >--disable-silent-rules \ > >--with-hwloc=/usr \ > >--with-libltdl=/usr \ > >--with-devel-headers \ > >--with-slurm \ > >--with-sge \ > >--without-tm \ > >--disable-heterogeneous \ > >--with-contrib-vt-flags=--disable-iotrace \ > >--sysconfdir=$(PREFIX)/etc \ > >--libdir=$(PREFIX)/lib\ > >--includedir=$(PREFIX)/include > > > > > > deb9host libhwloc and libhwloc-plugins is 1.11.5-1 > > deb8host libhwloc and libhwloc-plugins is 1.10.0-3 > > > > I've been trying to debug this for the past few days and would > > appreciate any help on determining why this failure is occurring > > and/or resolving the problem. > > > > -- > > Brian T. Smith > > System Fabric Works > > Senior Technical Staff > > bsm...@systemfabricworks.com > > GPG Key: B3C2C7B73BA3CD7F > > ___ > > users mailing list > > users@lists.open-mpi.org > > https://lists.open-mpi.org/mailman/listinfo/users > > ___ > users mailing list > users@lists.open-mpi.org > https://lists.open-mpi.org/mailman/listinfo/users ___ users mailing list users@lists.open-mpi.org https://lists.open-mpi.org/mailman/listinfo/users
Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO
More than likely the problem is the difference in hwloc versions - sounds like the topology to/from xml is different between the two versions, and the older one doesn’t understand the new one. > On Jul 21, 2018, at 12:04 PM, Brian Smith > wrote: > > Greetings, > > I'm having trouble getting openmpi 2.1.2 to work when launching a > process from debian 8 on a remote debian 9 host. To keep things simple > in this example, I'm just launching date on the remote host. > > deb8host$ mpirun -H deb9host date > [deb8host:01552] [[32763,0],0] ORTE_ERROR_LOG: Error in file > base/plm_base_launch_support.c at line 954 > > It works fine when executed from debian 9: > deb9host$ mpirun -H deb8host date > Sat Jul 21 13:40:43 CDT 2018 > > Also works when executed from debian 8 against debian 8: > deb8host:~$ mpirun -H deb8host2 date > Sat Jul 21 13:55:57 CDT 2018 > > The failure results from an error code returned by: > opal_dss.unpack(buffer, , , OPAL_HWLOC_TOPO) > > openmpi was built with the same configure flags on both hosts. > >--prefix=$(PREFIX) \ >--with-verbs \ >--with-libfabric \ >--disable-silent-rules \ >--with-hwloc=/usr \ >--with-libltdl=/usr \ >--with-devel-headers \ >--with-slurm \ >--with-sge \ >--without-tm \ >--disable-heterogeneous \ >--with-contrib-vt-flags=--disable-iotrace \ >--sysconfdir=$(PREFIX)/etc \ >--libdir=$(PREFIX)/lib\ >--includedir=$(PREFIX)/include > > > deb9host libhwloc and libhwloc-plugins is 1.11.5-1 > deb8host libhwloc and libhwloc-plugins is 1.10.0-3 > > I've been trying to debug this for the past few days and would > appreciate any help on determining why this failure is occurring > and/or resolving the problem. > > -- > Brian T. Smith > System Fabric Works > Senior Technical Staff > bsm...@systemfabricworks.com > GPG Key: B3C2C7B73BA3CD7F > ___ > users mailing list > users@lists.open-mpi.org > https://lists.open-mpi.org/mailman/listinfo/users ___ users mailing list users@lists.open-mpi.org https://lists.open-mpi.org/mailman/listinfo/users
[OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO
Greetings, I'm having trouble getting openmpi 2.1.2 to work when launching a process from debian 8 on a remote debian 9 host. To keep things simple in this example, I'm just launching date on the remote host. deb8host$ mpirun -H deb9host date [deb8host:01552] [[32763,0],0] ORTE_ERROR_LOG: Error in file base/plm_base_launch_support.c at line 954 It works fine when executed from debian 9: deb9host$ mpirun -H deb8host date Sat Jul 21 13:40:43 CDT 2018 Also works when executed from debian 8 against debian 8: deb8host:~$ mpirun -H deb8host2 date Sat Jul 21 13:55:57 CDT 2018 The failure results from an error code returned by: opal_dss.unpack(buffer, , , OPAL_HWLOC_TOPO) openmpi was built with the same configure flags on both hosts. --prefix=$(PREFIX) \ --with-verbs \ --with-libfabric \ --disable-silent-rules \ --with-hwloc=/usr \ --with-libltdl=/usr \ --with-devel-headers \ --with-slurm \ --with-sge \ --without-tm \ --disable-heterogeneous \ --with-contrib-vt-flags=--disable-iotrace \ --sysconfdir=$(PREFIX)/etc \ --libdir=$(PREFIX)/lib\ --includedir=$(PREFIX)/include deb9host libhwloc and libhwloc-plugins is 1.11.5-1 deb8host libhwloc and libhwloc-plugins is 1.10.0-3 I've been trying to debug this for the past few days and would appreciate any help on determining why this failure is occurring and/or resolving the problem. -- Brian T. Smith System Fabric Works Senior Technical Staff bsm...@systemfabricworks.com GPG Key: B3C2C7B73BA3CD7F ___ users mailing list users@lists.open-mpi.org https://lists.open-mpi.org/mailman/listinfo/users