Hello,

I have a HDF5 file with 129x128x128 dataset, that is chunked in
32x32x32 blocks.  When I try to read the dataset in parallel, from
Fortran 90 piece of code, and 8 MPI ranks running in parallel, it
seems that some values get skipped.  Attached is small program
demonstrating the problem, as well as the HDF5 file in question.  I've
tested on Linux, with GCC compiler version 5.4.0, and HDF5 version
1.10.0-patch1, built against OpenMPI 2.0.1.  In order to build the
program:
   mpifort -o foo foo.f90 -lhdf5_fortran -lhdf5
Then, unpack the archive with the HDF5 file:
   tar xfv foo.tgz
and then run the program:
   mpirun -np 8 foo

All values in the HDF5 file are set to 0.  The array in the program is
initialized to very large values.  Then, the dataset is read in
parallel.  Note that there is small overlap in reading between various
MPI ranks, this is because the actual problem occurred in a finite
difference code, and the overlap is because of halo regions.  After
reading values from HDF5 file, the program will check are all array
elements set to 0, and report any array elements that still have very
large values set.  Quite a few elements get reported with wrong values
when I run the code.  However, if I repack the HDF5 file so that
chunking is removed:
   h5repack -l CONTI foo.h5 bar.h5 && mv bar.h5 foo.h5
then all values are read properly.

I tried to find are there any limitations on selections in case when
dataset is chunked in the HDF5 file, but was not able to confirm that
this could be the issue here.  Otherwise, I really have no idea of
what's wrong...  So, any suggestions?

Thanks.
program main
  use hdf5
  use mpi

  implicit none

  integer, parameter :: NP = 8 
  integer, parameter, dimension(0 : NP - 1) :: X1 = (/ 1, 64, 1, 64, 1, 64, &
       1, 64 /)
  integer, parameter, dimension(0 : NP - 1) :: Y1 = (/ 1, 1, 64, 64, 1, 1, &
       64, 64 /)
  integer, parameter, dimension(0 : NP - 1) :: Z1 = (/ 1, 1, 1, 1, 64, 64, &
       64, 64 /)
  integer, parameter, dimension(0 : NP - 1) :: X2 = (/ 65, 128, 65, 128, 65, &
       128, 65, 128 /)
  integer, parameter, dimension(0 : NP - 1) :: Y2 = (/ 65, 65, 128, 128, 65, &
       65, 128, 128 /)
  integer, parameter, dimension(0 : NP - 1) :: Z2 = (/ 65, 65, 65, 65, 129, &
       129, 129, 129 /)
  double precision, parameter :: INVALID_VALUE = huge(0.0d0)
  character(*), parameter :: FILE_NAME = 'foo.h5'
  character(*), parameter :: DATASET_NAME = 'w'

  integer :: status
  integer :: error
  integer :: size, rank
  double precision, dimension(:, :, :), allocatable :: w
  integer(hsize_t), dimension(3) :: dims
  integer(hsize_t), dimension(3) :: offset, count
  integer(hid_t) :: access_properties, transfer_properties
  integer(hid_t) :: file
  integer(hid_t) :: dataset
  integer(hid_t) :: file_dataspace, memory_dataspace
  integer :: i, j, k

  call mpi_init(error)
  if (error /= MPI_SUCCESS) stop 'MPI error'

  call mpi_comm_size(MPI_COMM_WORLD, size, error)
  if (error /= MPI_SUCCESS) stop 'MPI error'
  if (size /= NP) stop 'invalid number of ranks'

  call mpi_comm_rank(MPI_COMM_WORLD, rank, error)
  if (error /= MPI_SUCCESS) stop 'MPI error'

  allocate(w(X1(rank) : X2(rank), Y1(rank) : Y2(rank), Z1(rank) : Z2(rank)), &
       stat = status)
  if (status /= 0) stop 'memory allocation error'
  w = INVALID_VALUE

  dims = shape(w)
  offset = (/ X1(rank) - 1, Y1(rank) - 1, Z1(rank) - 1 /)
  count = (/ X2(rank) - X1(rank) + 1, Y2(rank) - Y1(rank) + 1, &
       Z2(rank) - Z1(rank) + 1 /)

  call h5open_f(error)
  if (error /= 0) stop 'HDF5 error'

  call h5pcreate_f(H5P_FILE_ACCESS_F, access_properties, error)
  if (error /= 0) stop 'HDF5 error'
  call h5pset_fapl_mpio_f(access_properties, MPI_COMM_WORLD, MPI_INFO_NULL, &
       error)
  if (error /= 0) stop 'HDF5 error'

  call h5pcreate_f(H5P_DATASET_XFER_F, transfer_properties, error)
  if (error /= 0) stop 'HDF5 error'
  call h5pset_dxpl_mpio_f (transfer_properties, H5FD_MPIO_COLLECTIVE_F, error)
  if (error /= 0) stop 'HDF5 error'

  call h5fopen_f(FILE_NAME, H5F_ACC_RDONLY_F, file, error, &
       access_prp = access_properties)
  if (error /= 0) stop 'HDF5 error'

  call h5dopen_f(file, DATASET_NAME, dataset, error)
  if (error /= 0) stop 'HDF5 error'

  call h5dget_space_f(dataset, file_dataspace, error)
  if (error /= 0) stop 'HDF5 error'

  call h5sselect_hyperslab_f(file_dataspace, H5S_SELECT_SET_F, offset, &
       count, error)
  if (error /= 0) stop 'HDF5 error'

  call h5screate_simple_f(3, count, memory_dataspace, error)
  if (error /= 0) stop 'HDF5 error'
       
  call h5sselect_all_f(memory_dataspace, error)
  if (error /= 0) stop 'HDF5 error'

  call h5dread_f(dataset, H5T_NATIVE_DOUBLE, w, dims, error, &
            file_space_id = file_dataspace, mem_space_id = memory_dataspace, &
            xfer_prp = transfer_properties)
  if (error /= 0) stop 'HDF5 error'

  do i = X1(rank), X2(rank)
     do j = Y1(rank), Y2(rank)
        do k = Z1(rank), Z2(rank)
           if (w(i, j, k) == INVALID_VALUE) then
              write (*, *) 'Error: ', rank, i, j, k
           end if
        end do
     end do
  end do

  call h5sclose_f(file_dataspace, error)
  if (error /= 0) stop 'HDF5 error'

  call h5sclose_f(memory_dataspace, error)
  if (error /= 0) stop 'HDF5 error'

  call h5dclose_f(dataset, error)
  if (error /= 0) stop 'HDF5 error'

  call h5fclose_f(file, error)
  if (error /= 0) stop 'HDF5 error'

  call h5pclose_f(access_properties, error)
  if (error /= 0) stop 'HDF5 error'

  call h5pclose_f(transfer_properties, error)
  if (error /= 0) stop 'HDF5 error'

  call h5close_f(error)
  if (error /= 0) stop 'HDF5 error'

  deallocate(w, stat = status)
  if (status /= 0) stop 'memory deallocation error'

  call mpi_finalize(error)
  if (error /= MPI_SUCCESS) stop 'MPI error'
end program

Attachment: foo.tgz
Description: GNU Zip compressed data

_______________________________________________
Hdf-forum is for HDF software users discussion.
[email protected]
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
Twitter: https://twitter.com/hdf5

Reply via email to