Dear hdf-forum members,
I have a problem I am hoping someone can help me with. I have a program
that outputs a 2D-array (contiguous, indexed linearly) using parallel
HDF5. When I choose a number of processors that is not a power of 2
(1,2,4,8,...) H5Fclose() hangs, inexplicably. I'm using HDF5 v.1.8.14,
and OpenMPI 1.7.2, on top of GCC 4.8 with Linux.
Can someone help me pinpoint my mistake?
I have searched the forum, and the first hit [searching for "h5fclose
hangs"] was a user mistake that I didn't make (to the best of my
knowledge). The second didn't go on beyond the initial problem
description, and didn't offer a solution.
Attached is a (maybe insufficiently bare-boned, apologies) demonstrator
program. Strangely, the hang only happens if nx >= 32. The code is
adapted from an HDF5 example program.
The demonstrator is compiled with
h5pcc test.hangs.cpp -DVERBOSE -lstdc++
( on my system, for some strange reason, MPI has been compiled with the
deprecated C++ bindings. I need to include -lmpi_cxx also, but that
shouldn't be necessary for anyone else. I hope that's not the reason for
the hang-ups. )
Thanks in advance for your help!
Wolf Dapp
--
#include <stdlib.h>
#include <string>
#include <cassert> // assert()
#include <mpi.h>
#include <hdf5.h>
int myID = 0, iNumOfProc = 1, root = 0;
const int nx = 32, ny = nx;
int nxLocal, ixStart;
inline int getLinearIndex(int ix, int iy) { return ix*ny+iy; };
template<typename T> struct TypeIsDouble { static const bool value = false; }; // helper functions to test double
template<> struct TypeIsDouble< double > { static const bool value = true; };
template<typename T>
void writeH5( const char *name, double * field)
{
// nxLocal == how many cols (of length ny each) does this process own [ie. the block is nxLocal x ny in size]
// ixStart == (global) index of the beginning of (local) block
// myID == index of local process
// field == local data array, of size nxLocal*ny, contiguous
// ===============================================================
// =================== write output to HDF5 ======================
// ===============================================================
#ifdef VERBOSE
if (myID == root) { std::cout << " # ...writing to file " << name << "...\n"; }
#endif // VERBOSE
/* HDF5 APIs definitions */
hid_t file_id, dset_id; /* file and dataset identifiers */
hid_t filespace, memspace; /* file and memory dataspace identifiers */
hsize_t dimsf[2]; /* dataset dimensions */
hsize_t chunk_dims[2]; /* chunk dimensions */
hsize_t count[2]; /* hyperslab selection parameters */
hsize_t stride[2];
hsize_t block[2];
hsize_t offset[2];
hid_t plist_id; /* property list identifier */
herr_t status_h5;
hid_t datatype = (TypeIsDouble< T >::value) ? H5T_NATIVE_DOUBLE : H5T_NATIVE_FLOAT;
T * data = NULL;
MPI_Info info = MPI_INFO_NULL;
/* Set up file access property list with parallel I/O access */
plist_id = H5Pcreate(H5P_FILE_ACCESS);
H5Pset_fapl_mpio(plist_id, MPI_COMM_WORLD, info);
/* Create a new file collectively and release property list identifier. */
std::string filename;
filename.append(name);
file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, plist_id); // filename.c_str() is the file name
status_h5 = H5Pclose(plist_id); assert(status_h5 >= 0);
/* Create the dataspace for the dataset. */
dimsf[0] = nx;
dimsf[1] = ny;
chunk_dims[0] = nxLocal;
chunk_dims[1] = ny;
filespace = H5Screate_simple(2, dimsf, NULL);
memspace = H5Screate_simple(2, chunk_dims, NULL);
/* Create chunked dataset. */
plist_id = H5Pcreate(H5P_DATASET_CREATE);
status_h5 = H5Pset_layout( plist_id, H5D_CHUNKED ); assert(status_h5 >= 0);
status_h5 = H5Pset_chunk(plist_id, 2, chunk_dims); assert(status_h5 >= 0);
std::string fname = name;
std::size_t pos = fname.find("."); // position of the occurence of "." in fname
std::string dataset = fname.substr (0,pos);
dset_id = H5Dcreate(file_id, dataset.c_str(), datatype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); // dataset.c_str() is the data set name
status_h5 = H5Pclose(plist_id); assert(status_h5 >= 0);
status_h5 = H5Sclose(filespace); assert(status_h5 >= 0);
/* Each process defines dataset in memory and writes it to the hyperslab in the file. */
count[0] = 1; // one block in x-direction
count[1] = 1; // one block in y-direction
stride[0] = 1; // the blocks have no gaps between them in x-direction
stride[1] = 1; // the blocks have no gaps between them in y-direction
block[0] = chunk_dims[0]; // contiguous x-size of block is chunk_dims[0]
block[1] = chunk_dims[1]; // contiguous y-size of block is chunk_dims[1]
offset[0] = ixStart; // x-index [in data file] of (0,0) entry [in memory] of block
offset[1] = 0; // y-index [in data file] of (0,0) entry [in memory] of block
/* Select hyperslab in the file. */
filespace = H5Dget_space(dset_id);
status_h5 = H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, stride, count, block); assert(status_h5 >= 0);
/* Create property list for collective dataset write. */
plist_id = H5Pcreate(H5P_DATASET_XFER);
H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
/* Initialize data buffer; memory layout *may be* different from file layout, due to padding */
data = (T *) calloc(chunk_dims[0]*chunk_dims[1],sizeof(T));
for(ptrdiff_t ix = 0; ix < nxLocal; ix++)
{
for(ptrdiff_t iy = 0; iy < ny; iy++)
{
ptrdiff_t kmem = getLinearIndex(ix,iy);
ptrdiff_t kfile = ix*ny + iy;
data[kfile] = field[kmem];
}
}
/* Write to file */
status_h5 = H5Dwrite(dset_id, datatype, memspace, filespace, plist_id, data); assert(status_h5 >= 0);
free(data);
/* Close/release resources. */
status_h5 = H5Dclose(dset_id); assert(status_h5 >= 0);
status_h5 = H5Sclose(filespace); assert(status_h5 >= 0);
status_h5 = H5Sclose(memspace); assert(status_h5 >= 0);
status_h5 = H5Pclose(plist_id); assert(status_h5 >= 0);
#ifdef VERBOSE
std::cout << "proc"<<myID << " I CAN REACH THIS\n";
#endif // VERBOSE
status_h5 = H5Fclose(file_id); assert(status_h5 >= 0); // WARNING number of procs must be power of 2, else this call deadlocks; no clue why
#ifdef VERBOSE
std::cout << "proc"<<myID << " BUT NOT THIS\n";
#endif // VERBOSE
}
int main(int argc, char **argv)
{
MPI_Init (&argc, &argv);
MPI_Comm_size (MPI_COMM_WORLD, &iNumOfProc);
MPI_Comm_rank (MPI_COMM_WORLD, &myID);
if (1.0*nx/iNumOfProc < 4) { if (myID == root) std::cerr << " ##### np " << iNumOfProc << ": System too small.\n\n"; MPI_Finalize(); exit(0); }
nxLocal = nx/iNumOfProc;
ixStart = myID*nxLocal;
if ((nx%iNumOfProc) != 0)
{
nxLocal += 1;
ixStart = myID*nxLocal;
if (myID == iNumOfProc-1) nxLocal -= (nxLocal*iNumOfProc-nx); // last proc has less elements
}
std::cout << " proc"<<myID << " " << nxLocal << " " << ixStart << " " << nxLocal*ny << std::endl;
double * data;
data = (double *) calloc(nxLocal*ny,sizeof(double));
for (int ix = 0; ix < nxLocal; ++ix) {
for (int iy = 0; iy < ny; ++iy) {
data[ix*ny+iy] = double((ix+ixStart)*ny+iy);
} }
std::string filename = "_test.", fname;
char buffer[50];
sprintf(buffer,"np%02d",(int)iNumOfProc);
filename.append(buffer);
fname = filename; fname.append(".h5");
writeH5<float>(fname.c_str(),data);
free(data);
if (myID == root) std::cout << " ### done.\n\n";
MPI_Finalize();
return(0);
}_______________________________________________
Hdf-forum is for HDF software users discussion.
[email protected]
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
Twitter: https://twitter.com/hdf5