Dear hdf-forum members,

I have a problem I am hoping someone can help me with. I have a program
that outputs a 2D-array (contiguous, indexed linearly) using parallel
HDF5. When I choose a number of processors that is not a power of 2
(1,2,4,8,...) H5Fclose() hangs, inexplicably. I'm using HDF5 v.1.8.14,
and OpenMPI 1.7.2, on top of GCC 4.8 with Linux.

Can someone help me pinpoint my mistake?

I have searched the forum, and the first hit [searching for "h5fclose
hangs"] was a user mistake that I didn't make (to the best of my
knowledge). The second didn't go on beyond the initial problem
description, and didn't offer a solution.

Attached is a (maybe insufficiently bare-boned, apologies) demonstrator
program. Strangely, the hang only happens if nx >= 32. The code is
adapted from an HDF5 example program.

The demonstrator is compiled with
h5pcc test.hangs.cpp -DVERBOSE -lstdc++

( on my system, for some strange reason, MPI has been compiled with the
deprecated C++ bindings. I need to include -lmpi_cxx also, but that
shouldn't be necessary for anyone else. I hope that's not the reason for
the hang-ups. )

Thanks in advance for your help!

Wolf Dapp


-- 


#include <stdlib.h>
#include <string>
#include <cassert>  // assert()

#include <mpi.h>
#include <hdf5.h>

int myID = 0, iNumOfProc = 1, root = 0;
const int nx = 32, ny = nx;
int nxLocal, ixStart;

inline int getLinearIndex(int ix, int iy) { return ix*ny+iy; };

template<typename T> struct TypeIsDouble           { static const bool value = false; }; // helper functions to test double
template<>           struct TypeIsDouble< double > { static const bool value = true;  };

template<typename T>
void writeH5( const char *name, double * field)
{ 
  // nxLocal     == how many cols (of length ny each) does this process own [ie. the block is nxLocal x ny in size]
  // ixStart     == (global) index of the beginning of (local) block
  // myID        == index of local process
  // field       == local data array, of size nxLocal*ny, contiguous

  // ===============================================================
  // =================== write output to HDF5 ======================
  // ===============================================================
#ifdef VERBOSE
  if (myID == root) { std::cout << " # ...writing to file " << name << "...\n"; }
#endif // VERBOSE

  /* HDF5 APIs definitions */
  hid_t	file_id, dset_id;	/* file and dataset identifiers */
  hid_t	filespace, memspace;	/* file and memory dataspace identifiers */
  hsize_t	dimsf[2];	/* dataset dimensions */
  hsize_t	chunk_dims[2];	/* chunk dimensions */
  hsize_t	count[2];	/* hyperslab selection parameters */
  hsize_t	stride[2];
  hsize_t	block[2];
  hsize_t	offset[2];
  hid_t	plist_id;		/* property list identifier */
  herr_t	status_h5;
  hid_t datatype = (TypeIsDouble< T >::value) ? H5T_NATIVE_DOUBLE : H5T_NATIVE_FLOAT;
  T * data = NULL;
  MPI_Info info  = MPI_INFO_NULL;

  /* Set up file access property list with parallel I/O access */
  plist_id = H5Pcreate(H5P_FILE_ACCESS);
  H5Pset_fapl_mpio(plist_id, MPI_COMM_WORLD, info);
  
  /* Create a new file collectively and release property list identifier. */
  std::string filename;
  filename.append(name);
  file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, plist_id); // filename.c_str() is the file name
  status_h5 = H5Pclose(plist_id); assert(status_h5 >= 0);

  /* Create the dataspace for the dataset. */
  dimsf[0] = nx;
  dimsf[1] = ny;
  chunk_dims[0] = nxLocal;
  chunk_dims[1] = ny;
  filespace = H5Screate_simple(2, dimsf, NULL); 
  memspace  = H5Screate_simple(2, chunk_dims, NULL);    

  /* Create chunked dataset. */
  plist_id  = H5Pcreate(H5P_DATASET_CREATE);
  status_h5 = H5Pset_layout( plist_id, H5D_CHUNKED ); assert(status_h5 >= 0);
  status_h5 = H5Pset_chunk(plist_id, 2, chunk_dims);  assert(status_h5 >= 0);
  
  std::string fname = name;
  std::size_t pos = fname.find(".");      // position of the occurence of "." in fname
  std::string dataset = fname.substr (0,pos);
  
  dset_id = H5Dcreate(file_id, dataset.c_str(), datatype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); // dataset.c_str() is the data set name
  status_h5 = H5Pclose(plist_id);  assert(status_h5 >= 0);
  status_h5 = H5Sclose(filespace); assert(status_h5 >= 0);

  /* Each process defines dataset in memory and writes it to the hyperslab in the file. */
  count[0] = 1;               // one block in x-direction
  count[1] = 1;               // one block in y-direction
  stride[0] = 1;              // the blocks have no gaps between them in x-direction
  stride[1] = 1;              // the blocks have no gaps between them in y-direction
  block[0] = chunk_dims[0];   // contiguous x-size of block is chunk_dims[0]
  block[1] = chunk_dims[1];   // contiguous y-size of block is chunk_dims[1]
  offset[0] = ixStart;        // x-index [in data file] of (0,0) entry [in memory] of block
  offset[1] = 0;              // y-index [in data file] of (0,0) entry [in memory] of block

  /* Select hyperslab in the file. */
  filespace = H5Dget_space(dset_id);
  status_h5 = H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, stride, count, block); assert(status_h5 >= 0);
  
  /* Create property list for collective dataset write. */
  plist_id = H5Pcreate(H5P_DATASET_XFER);
  H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);

  /* Initialize data buffer; memory layout *may be* different from file layout, due to padding */
  data = (T *) calloc(chunk_dims[0]*chunk_dims[1],sizeof(T));
  for(ptrdiff_t ix = 0; ix < nxLocal; ix++)
  {
    for(ptrdiff_t iy = 0; iy < ny; iy++)
    {
      ptrdiff_t kmem  = getLinearIndex(ix,iy);
      ptrdiff_t kfile = ix*ny + iy;
      data[kfile] = field[kmem];
    }
  }

  /* Write to file */   
  status_h5 = H5Dwrite(dset_id, datatype, memspace, filespace, plist_id, data); assert(status_h5 >= 0);
  free(data);

  /* Close/release resources. */
  status_h5 = H5Dclose(dset_id);   assert(status_h5 >= 0);
  status_h5 = H5Sclose(filespace); assert(status_h5 >= 0);
  status_h5 = H5Sclose(memspace);  assert(status_h5 >= 0);
  status_h5 = H5Pclose(plist_id);  assert(status_h5 >= 0);
#ifdef VERBOSE
  std::cout << "proc"<<myID << " I CAN REACH THIS\n";
#endif // VERBOSE
  status_h5 = H5Fclose(file_id);   assert(status_h5 >= 0); // WARNING number of procs must be power of 2, else this call deadlocks; no clue why
#ifdef VERBOSE
  std::cout << "proc"<<myID << " BUT NOT THIS\n";
#endif // VERBOSE
}

int main(int argc, char **argv)
{
  MPI_Init (&argc, &argv);
  MPI_Comm_size (MPI_COMM_WORLD, &iNumOfProc);
  MPI_Comm_rank (MPI_COMM_WORLD, &myID);
  
  if (1.0*nx/iNumOfProc < 4) { if (myID == root) std::cerr << " ##### np " << iNumOfProc << ": System too small.\n\n"; MPI_Finalize(); exit(0); }
  
  nxLocal = nx/iNumOfProc;
  ixStart = myID*nxLocal;
  if ((nx%iNumOfProc) != 0) 
  { 
    nxLocal += 1; 
    ixStart = myID*nxLocal;
    if (myID == iNumOfProc-1) nxLocal -= (nxLocal*iNumOfProc-nx); // last proc has less elements
  }
  std::cout << " proc"<<myID << " " << nxLocal << " " << ixStart << " " << nxLocal*ny << std::endl;
  
  double * data;
  data = (double *) calloc(nxLocal*ny,sizeof(double));
  for (int ix = 0; ix < nxLocal; ++ix) {
  for (int iy = 0; iy < ny; ++iy) {
    data[ix*ny+iy] = double((ix+ixStart)*ny+iy);
  } }
  
  std::string filename = "_test.", fname;
  char buffer[50];
  sprintf(buffer,"np%02d",(int)iNumOfProc);
  filename.append(buffer);
  fname = filename; fname.append(".h5");
  writeH5<float>(fname.c_str(),data);
  
  free(data);
  if (myID == root) std::cout <<  " ### done.\n\n";

  MPI_Finalize();
  return(0);
}
_______________________________________________
Hdf-forum is for HDF software users discussion.
[email protected]
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
Twitter: https://twitter.com/hdf5

Reply via email to