Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO

2018-07-28 Thread Lovelace III, Henry
Hi Gilles,

The correction was to explicitly make gcc and g++ the compiler on the NERSC 
system.


export CC=gcc

export CXX=g++


The build now works correctly.

Cheers,

Henry


From: users  on behalf of Lovelace III, Henry 

Sent: Friday, July 27, 2018 2:20:18 PM
To: Open MPI Users
Subject: Re: [OMPI users] Error in file base/plm_base_launch_support.c: 
OPAL_HWLOC_TOPO


Hi Gilles,

If I understand the instruction correctly by invoking the -showme command 
to give addition information. I did use the -showme command, however the files 
that are being compiled are C/C++ files and the header files.

cmake_file~~
set (EXENAME ansga2)

set (SRC_FILES
  apisa/nsga2/nsga2.cpp
  apisa/nsga2/nsga2_functions.cpp
  apisa/nsga2/nsga2.hpp
  apisa/nsga2/nsga2_io.cpp
)

~~~

~SHOWME~~


10:42 AM->hlovelac:nsga2 ../../../production/bin/mpicc -showme nsga2.cpp
gcc nsga2.cpp 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread 
-Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi
10:42 AM->hlovelac:nsga2 ../../../production/bin/mpicc -showme 
nsga2_functions.cpp
gcc nsga2_functions.cpp 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread 
-Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi
10:43 AM->hlovelac:nsga2 ../../../production/bin/mpicc -showme nsga2.hpp
gcc nsga2.hpp 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread 
-Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi
10:44 AM->hlovelac:nsga2 ../../../production/bin/mpicc -showme nsga2_io.cpp
gcc nsga2_io.cpp 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread 
-Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi



11:14 AM->hlovelac:nsga2 ../../../production/bin/mpifort -showme nsga2.cpp
gfortran nsga2.cpp 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath 
-Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi
11:15 AM->hlovelac:nsga2 ../../../production/bin/mpifort -showme 
nsga2_functions.cpp
gfortran nsga2_functions.cpp 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath 
-Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi
11:15 AM->hlovelac:nsga2 ../../../production/bin/mpifort -showme nsga2.hpp
gfortran nsga2.hpp 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath 
-Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi
11:15 AM->hlovelac:nsga2 ../../../production/bin/mpifort -showme nsga2_io.cpp
gfortran nsga2_io.cpp 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath 
-Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi



cmake_file~~


set (EXENAME aspea2)

set (SRC_FILES
  apisa/spea2/spea2.cpp
  apisa/spea2/spea2_functions.cpp
  apisa/spea2/spea2.hpp
  apisa/spea2/spea2_io.cpp
)

~~~

~SHOWME~~


10:50 AM->hlovelac:spea2 ../../../production/bin/mpicc -showme spea2.cpp
gcc spea2.cpp 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread 
-Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi
10:50 AM->hlovelac:spea2 ../../../production/bin/mpicc -showme 
spea2_functions.cpp
gcc spea2_functions.cpp 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_07

Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO

2018-07-27 Thread Lovelace III, Henry
hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi
10:51 AM->hlovelac:spea2 ../../../production/bin/mpicc -showme spea2_io.cpp
gcc spea2_io.cpp 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread 
-Wl,-rpath -Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -lmpi

11:16 AM->hlovelac:spea2 ../../../production/bin/mpifort -showme spea2.cpp
gfortran spea2.cpp 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath 
-Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi
11:17 AM->hlovelac:spea2 ../../../production/bin/mpifort -showme 
spea2_functions.cpp
gfortran spea2_functions.cpp 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath 
-Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi
11:17 AM->hlovelac:spea2 ../../../production/bin/mpifort -showme spea2.hpp
gfortran spea2.hpp 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath 
-Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi
11:17 AM->hlovelac:spea2 ../../../production/bin/mpifort -showme spea2_io.cpp
gfortran spea2_io.cpp 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/include -pthread 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib -Wl,-rpath 
-Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0727/production/lib 
-lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi




Does the -showme show errors? The distributions was updated that is the reason 
behind the directory change. Locally I have no problem with the build.

Thanks,

Henry


From: users  on behalf of gil...@rist.or.jp 

Sent: Tuesday, July 24, 2018 10:00:51 PM
To: Open MPI Users
Subject: Re: [OMPI users] Error in file base/plm_base_launch_support.c: 
OPAL_HWLOC_TOPO


 Henry,

First, you could/should use mpicc instead of the cc cray compiler



I also noted



gfortran Linker Flags   : -pthread 
-I/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/lib -Wl,-rpath 
-Wl,/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/lib 
-Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/lib 
-lmpi_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi;-lX11



It should be '-lmpi -lX11' instead of '-lmpi;-lX11'



The linker flags suggest Open MPI is installed in 
/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/lib, but your 
LD_LIBRARY_PATH suggests it is in
$HOME/BMAD/bmad_dist_2018_0717/production/lib

(note 0724 vs 0717)



Also, keep in mind LD_LIBRARY_PATH is only used at runtime in order to resolve 
dependencies.

The linker does *not* use LD_LIBRARY_PATH.

IIRC, it uses LIBRARY_PATH, but the preferred way is to use the -L 
argument.



If your problem persists, I suggest you get the full command line that is 
failing.

(It should invoke mpifort instead gfortran or cc). Then you can copy/paste the 
mpifort command, add the

-showme parameter, and run it manually so we can understand what is really 
hapenning under the(cmake) hood.



Cheers,



Gilles

- Original Message -

Hi,

   I am receiving these errors when building with OpenMPI on the NERSC system.

Building directory: util_programs

-- The C compiler identification is GNU 7.1.0
-- The CXX compiler identification is GNU 7.1.0
-- Cray Programming Environment 2.5.12 C
-- Check for working C compiler: /opt/cray/pe/craype/2.5.12/bin/cc
-- Check for working C compiler: /opt/cray/pe/craype/2.5.12/bin/cc -- works
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Detecting C compile features
-- Detecting C compile features - done
-- Cray Programming Environment 2.5.12 CXX
-- Check for working CXX compiler: /opt/cray/pe/craype/2.5.12/bin/CC
-- Check for working CXX compiler: /opt/cray/pe/craype/2.5.12/bin/CC -- works
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Detecting CXX compile features
-- Detecting CXX

Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO

2018-07-24 Thread gilles
 Henry,

First, you could/should use mpicc instead of the cc cray compiler


I also noted


gfortran Linker Flags   : -pthread -I/global/homes/h/hlovelac/BMAD/bmad_
dist_2018_0724/production/lib -Wl,-rpath -Wl,/global/homes/h/hlovelac/
BMAD/bmad_dist_2018_0724/production/lib -Wl,--enable-new-dtags -L/global
/homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/lib -lmpi_
usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi;-lX11


It should be '-lmpi -lX11' instead of '-lmpi;-lX11'


The linker flags suggest Open MPI is installed in /global/homes/h/
hlovelac/BMAD/bmad_dist_2018_0724/production/lib, but your LD_LIBRARY_
PATH suggests it is in
$HOME/BMAD/bmad_dist_2018_0717/production/lib

(note 0724 vs 0717)

Also, keep in mind LD_LIBRARY_PATH is only used at runtime in order to 
resolve dependencies.

The linker does *not* use LD_LIBRARY_PATH.

IIRC, it uses LIBRARY_PATH, but the preferred way is to use the -L 
argument.

If your problem persists, I suggest you get the full command line that 
is failing.

(It should invoke mpifort instead gfortran or cc). Then you can copy/
paste the mpifort command, add the

-showme parameter, and run it manually so we can understand what is 
really hapenning under the(cmake) hood.

Cheers,

Gilles

- Original Message -

Hi,

   I am receiving these errors when building with OpenMPI on the 
NERSC system.
Building directory: util_programs

-- The C compiler identification is GNU 7.1.0
-- The CXX compiler identification is GNU 7.1.0
-- Cray Programming Environment 2.5.12 C
-- Check for working C compiler: /opt/cray/pe/craype/2.5.12/bin/cc
-- Check for working C compiler: /opt/cray/pe/craype/2.5.12/bin/cc -
- works
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Detecting C compile features
-- Detecting C compile features - done
-- Cray Programming Environment 2.5.12 CXX
-- Check for working CXX compiler: /opt/cray/pe/craype/2.5.12/bin/CC
-- Check for working CXX compiler: /opt/cray/pe/craype/2.5.12/bin/CC 
-- works
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- The Fortran compiler identification is GNU 7.1.0
-- Check for working Fortran compiler: /global/homes/h/hlovelac/BMAD
/bmad_dist_2018_0724/production/bin/mpifort
-- Check for working Fortran compiler: /global/homes/h/hlovelac/BMAD
/bmad_dist_2018_0724/production/bin/mpifort  -- works
-- Detecting Fortran compiler ABI info
-- Detecting Fortran compiler ABI info - done
-- Checking whether /global/homes/h/hlovelac/BMAD/bmad_dist_2018_
0724/production/bin/mpifort supports Fortran 90
-- Checking whether /global/homes/h/hlovelac/BMAD/bmad_dist_2018_
0724/production/bin/mpifort supports Fortran 90 -- yes

Build type   : Production
Linking with release : /global/homes/h/hlovelac/BMAD/bmad_dist_2018_
0724 (Off-site Distribution)
C Compiler   : /opt/cray/pe/craype/2.5.12/bin/cc
Fortran Compiler : /global/homes/h/hlovelac/BMAD/bmad_dist_2018_
0724/production/bin/mpifort
Plotting Libraries   : pgplot
OpenMP Support   : Not Enabled
MPI Support  : Enabled
FFLAGS   :  
gfortran Compiler Flags : -Df2cFortran -DCESR_UNIX -DCESR_LINUX -u -
traceback -cpp -fno-range-check -fdollar-ok -fbacktrace -Bstatic -ffree-
line-length-none -DCESR_PGPLOT -I/global/homes/h/hlovelac/BMAD/bmad_dist
_2018_0724/production/include -pthread -I/global/homes/h/hlovelac/BMAD/
bmad_dist_2018_0724/production/lib -fPIC -O2
gfortran Linker Flags   : -pthread -I/global/homes/h/hlovelac/BMAD/
bmad_dist_2018_0724/production/lib -Wl,-rpath -Wl,/global/homes/h/
hlovelac/BMAD/bmad_dist_2018_0724/production/lib -Wl,--enable-new-dtags 
-L/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/lib -lmpi
_usempif08 -lmpi_usempi_ignore_tkr -lmpi_mpifh -lmpi;-lX11

SHARED DEPS  :

-- Configuring done
-- Generating done
-- Build files have been written to: /global/homes/h/hlovelac/BMAD/
bmad_dist_2018_0724/util_programs/production
Scanning dependencies of target compare_tracking_methods_text-exe
Scanning dependencies of target compare_tracking_methods_plot-exe
Scanning dependencies of target f77_to_f90-exe
Scanning dependencies of target util_programs
Scanning dependencies of target lattice_cleaner-exe
Scanning dependencies of target bmad_to_gpt-exe
Scanning dependencies of target bmad_to_mad_sad_and_xsif-exe
Scanning dependencies of target sad_to_bmad_postprocess-exe
Scanning dependencies of target aspea2-exe
Scanning dependencies of target bmad_to_csrtrack-exe
Scanning dependencies of target ansga2-exe
Scanning dependencies of target bmad_to_blender-exe
Scanning dependencies of target bmad_to_autocad-exe
Scanning dependencies of target element_attributes-exe

Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO

2018-07-24 Thread Lovelace III, Henry
%] Built target bmad_to_csrtrack-exe
[ 77%] Linking Fortran executable 
/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/bin/envelope_ibs
[ 77%] Built target sad_to_bmad_postprocess-exe
[ 77%] Built target compare_tracking_methods_plot-exe
[ 79%] Building Fortran object 
CMakeFiles/util_programs.dir/modules/custom_dynamic_aperture_mod.f90.o
[ 79%] Built target envelope_ibs-exe
[ 84%] Building Fortran object 
CMakeFiles/util_programs.dir/modules/namelist_da.f90.o
[ 84%] Building Fortran object 
CMakeFiles/util_programs.dir/modules/namelist_general.f90.o
[ 84%] Building Fortran object 
CMakeFiles/util_programs.dir/modules/linear_aperture_mod.f90.o
[ 86%] Building Fortran object 
CMakeFiles/util_programs.dir/modules/namelist_moga.f90.o
[ 88%] Linking Fortran static library 
/global/homes/h/hlovelac/BMAD/bmad_dist_2018_0724/production/lib/libutil_programs.a
[ 88%] Built target util_programs
Makefile:83: recipe for target 'all' failed
gmake: *** [all] Error 2

Error in distribution build.

The build is not seeing the mpi libraries even though before the build I defined
export 
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$HOME/BMAD/bmad_dist_2018_0717/production/lib

Is this error from the MPIfort?

Thanks,
Henry

From: users  on behalf of Brian Smith 

Sent: Monday, July 23, 2018 2:06:14 PM
To: Open MPI Users
Subject: Re: [OMPI users] Error in file base/plm_base_launch_support.c: 
OPAL_HWLOC_TOPO

On Sat, Jul 21, 2018 at 9:13 PM, Gilles Gouaillardet
 wrote:
> Brian,
>
> As Ralph already stated, this is likely a hwloc API issue.
> From debian9, you can
> lstopo --of xml | ssh debian8 lstopo --if xml -i -
>
> that will likely confirm the API error.
>
> If you are willing to get a bit more details, you can add some printf
> in opal_hwloc_unpack (from opal/mca/hwloc/base/hwloc_base_dt.c) to
> figure out where exactly the failure occurs.
>
> Meanwhile, you can move forward by using the embedded hwloc on both
> distros (--with-hwloc=internal or no --with-hwloc option at all).
>
>
> Note we strongly discourage you configure --with-FOO=/usr
> (it explicitly add /usr/include and /usr/lib[64] in the search path,
> and might hide some other external libraries installed in a non
> standard location). In order to force the external hwloc lib installed
> in the default location, --with-hwloc=external is what you need (same
> thing applies to libevent and pmix)


Thank you for the advice. Removing --with-hwloc from the configure
statement corrected the problem.


>
>
> Cheers,
>
> Gilles
> On Sun, Jul 22, 2018 at 7:52 AM r...@open-mpi.org  wrote:
>>
>> More than likely the problem is the difference in hwloc versions - sounds 
>> like the topology to/from xml is different between the two versions, and the 
>> older one doesn’t understand the new one.
>>
>> > On Jul 21, 2018, at 12:04 PM, Brian Smith  
>> > wrote:
>> >
>> > Greetings,
>> >
>> > I'm having trouble getting openmpi 2.1.2 to work when launching a
>> > process from debian 8 on a remote debian 9 host. To keep things simple
>> > in this example, I'm just launching date on the remote host.
>> >
>> > deb8host$ mpirun -H deb9host date
>> > [deb8host:01552] [[32763,0],0] ORTE_ERROR_LOG: Error in file
>> > base/plm_base_launch_support.c at line 954
>> >
>> > It works fine when executed from debian 9:
>> > deb9host$ mpirun -H deb8host date
>> > Sat Jul 21 13:40:43 CDT 2018
>> >
>> > Also works when executed from debian 8 against debian 8:
>> > deb8host:~$ mpirun -H deb8host2 date
>> > Sat Jul 21 13:55:57 CDT 2018
>> >
>> > The failure results from an error code returned by:
>> > opal_dss.unpack(buffer, , , OPAL_HWLOC_TOPO)
>> >
>> > openmpi was built with the same configure flags on both hosts.
>> >
>> >--prefix=$(PREFIX) \
>> >--with-verbs \
>> >--with-libfabric \
>> >--disable-silent-rules \
>> >--with-hwloc=/usr \
>> >--with-libltdl=/usr \
>> >--with-devel-headers \
>> >--with-slurm \
>> >--with-sge \
>> >--without-tm \
>> >--disable-heterogeneous \
>> >--with-contrib-vt-flags=--disable-iotrace \
>> >--sysconfdir=$(PREFIX)/etc \
>> >--libdir=$(PREFIX)/lib\
>> >--includedir=$(PREFIX)/include
>> >
>> >
>> > deb9host libhwloc and libhwloc-plugins is 1.11.5-1
>> > deb8host libhwloc and libhwloc-plugins is 1.10.0-3
>> >
>> > I've been trying to debug this for the past few days and would
&g

Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO

2018-07-23 Thread Brian Smith
On Sat, Jul 21, 2018 at 9:13 PM, Gilles Gouaillardet
 wrote:
> Brian,
>
> As Ralph already stated, this is likely a hwloc API issue.
> From debian9, you can
> lstopo --of xml | ssh debian8 lstopo --if xml -i -
>
> that will likely confirm the API error.
>
> If you are willing to get a bit more details, you can add some printf
> in opal_hwloc_unpack (from opal/mca/hwloc/base/hwloc_base_dt.c) to
> figure out where exactly the failure occurs.
>
> Meanwhile, you can move forward by using the embedded hwloc on both
> distros (--with-hwloc=internal or no --with-hwloc option at all).
>
>
> Note we strongly discourage you configure --with-FOO=/usr
> (it explicitly add /usr/include and /usr/lib[64] in the search path,
> and might hide some other external libraries installed in a non
> standard location). In order to force the external hwloc lib installed
> in the default location, --with-hwloc=external is what you need (same
> thing applies to libevent and pmix)


Thank you for the advice. Removing --with-hwloc from the configure
statement corrected the problem.


>
>
> Cheers,
>
> Gilles
> On Sun, Jul 22, 2018 at 7:52 AM r...@open-mpi.org  wrote:
>>
>> More than likely the problem is the difference in hwloc versions - sounds 
>> like the topology to/from xml is different between the two versions, and the 
>> older one doesn’t understand the new one.
>>
>> > On Jul 21, 2018, at 12:04 PM, Brian Smith  
>> > wrote:
>> >
>> > Greetings,
>> >
>> > I'm having trouble getting openmpi 2.1.2 to work when launching a
>> > process from debian 8 on a remote debian 9 host. To keep things simple
>> > in this example, I'm just launching date on the remote host.
>> >
>> > deb8host$ mpirun -H deb9host date
>> > [deb8host:01552] [[32763,0],0] ORTE_ERROR_LOG: Error in file
>> > base/plm_base_launch_support.c at line 954
>> >
>> > It works fine when executed from debian 9:
>> > deb9host$ mpirun -H deb8host date
>> > Sat Jul 21 13:40:43 CDT 2018
>> >
>> > Also works when executed from debian 8 against debian 8:
>> > deb8host:~$ mpirun -H deb8host2 date
>> > Sat Jul 21 13:55:57 CDT 2018
>> >
>> > The failure results from an error code returned by:
>> > opal_dss.unpack(buffer, , , OPAL_HWLOC_TOPO)
>> >
>> > openmpi was built with the same configure flags on both hosts.
>> >
>> >--prefix=$(PREFIX) \
>> >--with-verbs \
>> >--with-libfabric \
>> >--disable-silent-rules \
>> >--with-hwloc=/usr \
>> >--with-libltdl=/usr \
>> >--with-devel-headers \
>> >--with-slurm \
>> >--with-sge \
>> >--without-tm \
>> >--disable-heterogeneous \
>> >--with-contrib-vt-flags=--disable-iotrace \
>> >--sysconfdir=$(PREFIX)/etc \
>> >--libdir=$(PREFIX)/lib\
>> >--includedir=$(PREFIX)/include
>> >
>> >
>> > deb9host libhwloc and libhwloc-plugins is 1.11.5-1
>> > deb8host libhwloc and libhwloc-plugins is 1.10.0-3
>> >
>> > I've been trying to debug this for the past few days and would
>> > appreciate any help on determining why this failure is occurring
>> > and/or resolving the problem.
>> >
>> > --
>> > Brian T. Smith
>> > System Fabric Works
>> > Senior Technical Staff
>> > bsm...@systemfabricworks.com
>> > GPG Key: B3C2C7B73BA3CD7F
>> > ___
>> > users mailing list
>> > users@lists.open-mpi.org
>> > https://lists.open-mpi.org/mailman/listinfo/users
>>
>> ___
>> users mailing list
>> users@lists.open-mpi.org
>> https://lists.open-mpi.org/mailman/listinfo/users
> ___
> users mailing list
> users@lists.open-mpi.org
> https://lists.open-mpi.org/mailman/listinfo/users



-- 
Brian T. Smith
System Fabric Works
Senior Technical Staff
bsm...@systemfabricworks.com
GPG Key: B3C2C7B73BA3CD7F
___
users mailing list
users@lists.open-mpi.org
https://lists.open-mpi.org/mailman/listinfo/users

Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO

2018-07-21 Thread Gilles Gouaillardet
Brian,

As Ralph already stated, this is likely a hwloc API issue.
From debian9, you can
lstopo --of xml | ssh debian8 lstopo --if xml -i -

that will likely confirm the API error.

If you are willing to get a bit more details, you can add some printf
in opal_hwloc_unpack (from opal/mca/hwloc/base/hwloc_base_dt.c) to
figure out where exactly the failure occurs.

Meanwhile, you can move forward by using the embedded hwloc on both
distros (--with-hwloc=internal or no --with-hwloc option at all).


Note we strongly discourage you configure --with-FOO=/usr
(it explicitly add /usr/include and /usr/lib[64] in the search path,
and might hide some other external libraries installed in a non
standard location). In order to force the external hwloc lib installed
in the default location, --with-hwloc=external is what you need (same
thing applies to libevent and pmix)


Cheers,

Gilles
On Sun, Jul 22, 2018 at 7:52 AM r...@open-mpi.org  wrote:
>
> More than likely the problem is the difference in hwloc versions - sounds 
> like the topology to/from xml is different between the two versions, and the 
> older one doesn’t understand the new one.
>
> > On Jul 21, 2018, at 12:04 PM, Brian Smith  
> > wrote:
> >
> > Greetings,
> >
> > I'm having trouble getting openmpi 2.1.2 to work when launching a
> > process from debian 8 on a remote debian 9 host. To keep things simple
> > in this example, I'm just launching date on the remote host.
> >
> > deb8host$ mpirun -H deb9host date
> > [deb8host:01552] [[32763,0],0] ORTE_ERROR_LOG: Error in file
> > base/plm_base_launch_support.c at line 954
> >
> > It works fine when executed from debian 9:
> > deb9host$ mpirun -H deb8host date
> > Sat Jul 21 13:40:43 CDT 2018
> >
> > Also works when executed from debian 8 against debian 8:
> > deb8host:~$ mpirun -H deb8host2 date
> > Sat Jul 21 13:55:57 CDT 2018
> >
> > The failure results from an error code returned by:
> > opal_dss.unpack(buffer, , , OPAL_HWLOC_TOPO)
> >
> > openmpi was built with the same configure flags on both hosts.
> >
> >--prefix=$(PREFIX) \
> >--with-verbs \
> >--with-libfabric \
> >--disable-silent-rules \
> >--with-hwloc=/usr \
> >--with-libltdl=/usr \
> >--with-devel-headers \
> >--with-slurm \
> >--with-sge \
> >--without-tm \
> >--disable-heterogeneous \
> >--with-contrib-vt-flags=--disable-iotrace \
> >--sysconfdir=$(PREFIX)/etc \
> >--libdir=$(PREFIX)/lib\
> >--includedir=$(PREFIX)/include
> >
> >
> > deb9host libhwloc and libhwloc-plugins is 1.11.5-1
> > deb8host libhwloc and libhwloc-plugins is 1.10.0-3
> >
> > I've been trying to debug this for the past few days and would
> > appreciate any help on determining why this failure is occurring
> > and/or resolving the problem.
> >
> > --
> > Brian T. Smith
> > System Fabric Works
> > Senior Technical Staff
> > bsm...@systemfabricworks.com
> > GPG Key: B3C2C7B73BA3CD7F
> > ___
> > users mailing list
> > users@lists.open-mpi.org
> > https://lists.open-mpi.org/mailman/listinfo/users
>
> ___
> users mailing list
> users@lists.open-mpi.org
> https://lists.open-mpi.org/mailman/listinfo/users
___
users mailing list
users@lists.open-mpi.org
https://lists.open-mpi.org/mailman/listinfo/users

Re: [OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO

2018-07-21 Thread r...@open-mpi.org
More than likely the problem is the difference in hwloc versions - sounds like 
the topology to/from xml is different between the two versions, and the older 
one doesn’t understand the new one.

> On Jul 21, 2018, at 12:04 PM, Brian Smith  
> wrote:
> 
> Greetings,
> 
> I'm having trouble getting openmpi 2.1.2 to work when launching a
> process from debian 8 on a remote debian 9 host. To keep things simple
> in this example, I'm just launching date on the remote host.
> 
> deb8host$ mpirun -H deb9host date
> [deb8host:01552] [[32763,0],0] ORTE_ERROR_LOG: Error in file
> base/plm_base_launch_support.c at line 954
> 
> It works fine when executed from debian 9:
> deb9host$ mpirun -H deb8host date
> Sat Jul 21 13:40:43 CDT 2018
> 
> Also works when executed from debian 8 against debian 8:
> deb8host:~$ mpirun -H deb8host2 date
> Sat Jul 21 13:55:57 CDT 2018
> 
> The failure results from an error code returned by:
> opal_dss.unpack(buffer, , , OPAL_HWLOC_TOPO)
> 
> openmpi was built with the same configure flags on both hosts.
> 
>--prefix=$(PREFIX) \
>--with-verbs \
>--with-libfabric \
>--disable-silent-rules \
>--with-hwloc=/usr \
>--with-libltdl=/usr \
>--with-devel-headers \
>--with-slurm \
>--with-sge \
>--without-tm \
>--disable-heterogeneous \
>--with-contrib-vt-flags=--disable-iotrace \
>--sysconfdir=$(PREFIX)/etc \
>--libdir=$(PREFIX)/lib\
>--includedir=$(PREFIX)/include
> 
> 
> deb9host libhwloc and libhwloc-plugins is 1.11.5-1
> deb8host libhwloc and libhwloc-plugins is 1.10.0-3
> 
> I've been trying to debug this for the past few days and would
> appreciate any help on determining why this failure is occurring
> and/or resolving the problem.
> 
> -- 
> Brian T. Smith
> System Fabric Works
> Senior Technical Staff
> bsm...@systemfabricworks.com
> GPG Key: B3C2C7B73BA3CD7F
> ___
> users mailing list
> users@lists.open-mpi.org
> https://lists.open-mpi.org/mailman/listinfo/users

___
users mailing list
users@lists.open-mpi.org
https://lists.open-mpi.org/mailman/listinfo/users

[OMPI users] Error in file base/plm_base_launch_support.c: OPAL_HWLOC_TOPO

2018-07-21 Thread Brian Smith
Greetings,

I'm having trouble getting openmpi 2.1.2 to work when launching a
process from debian 8 on a remote debian 9 host. To keep things simple
in this example, I'm just launching date on the remote host.

deb8host$ mpirun -H deb9host date
[deb8host:01552] [[32763,0],0] ORTE_ERROR_LOG: Error in file
base/plm_base_launch_support.c at line 954

It works fine when executed from debian 9:
deb9host$ mpirun -H deb8host date
Sat Jul 21 13:40:43 CDT 2018

Also works when executed from debian 8 against debian 8:
deb8host:~$ mpirun -H deb8host2 date
Sat Jul 21 13:55:57 CDT 2018

The failure results from an error code returned by:
opal_dss.unpack(buffer, , , OPAL_HWLOC_TOPO)

openmpi was built with the same configure flags on both hosts.

--prefix=$(PREFIX) \
--with-verbs \
--with-libfabric \
--disable-silent-rules \
--with-hwloc=/usr \
--with-libltdl=/usr \
--with-devel-headers \
--with-slurm \
--with-sge \
--without-tm \
--disable-heterogeneous \
--with-contrib-vt-flags=--disable-iotrace \
--sysconfdir=$(PREFIX)/etc \
--libdir=$(PREFIX)/lib\
--includedir=$(PREFIX)/include


deb9host libhwloc and libhwloc-plugins is 1.11.5-1
deb8host libhwloc and libhwloc-plugins is 1.10.0-3

I've been trying to debug this for the past few days and would
appreciate any help on determining why this failure is occurring
and/or resolving the problem.

-- 
Brian T. Smith
System Fabric Works
Senior Technical Staff
bsm...@systemfabricworks.com
GPG Key: B3C2C7B73BA3CD7F
___
users mailing list
users@lists.open-mpi.org
https://lists.open-mpi.org/mailman/listinfo/users