I haven’t checked the performance yet, but the error you are running into is a
known issue that you can fix by either:
1) Create the file on process 0 using the MPI-IO VFD with MPI_COMM_SELF:
hid_t file_access = H5Pcreate(H5P_FILE_ACCESS);
H5Pset_fapl_mpio( file_access, MPI_COMM_SELF, MPI_INFO_NULL );
2) Or Create the dataset on process 0 with space allocation set to
H5D_ALLOC_TIME_EARLY:
hid_t plist = H5Pcreate(H5P_DATASET_CREATE);
H5Pset_layout(plist, H5D_CHUNKED);
H5Pset_alloc_time(plist, H5D_ALLOC_TIME_EARLY)
This should fix the error you are seeing.. The problem is that when you create
the file on process 0 with the sec2 driver, the space allocation for the
dataset is set to LATE by default, however parallel access requires it to be
set to EARLY. We were debating whether this is a bug or just a documentation
issue that we should make more clear for dataset creation. I believe we decided
for the latter, but I couldn’t find where this was documented, so I’ll follow
up on that too internally.
I’ll look into performance once I get some time..
Thanks
Mohamad
From: Hdf-forum [mailto:[email protected]] On Behalf Of
Frederic Perez
Sent: Friday, September 04, 2015 9:33 AM
To: HDF Users Discussion List
Subject: Re: [Hdf-forum] Slow or buggy using H5Sselect_elements
I finally found a way to reproduce the errors I sent in the first post (pasted
code below).
The problem is that the collective data transfer does not work after using
H5Dset_extent. This behaviour does not even depend on H5Sselect_elements, as I
initially thought.
Anybody knows if there is something wrong with my use of H5Dset_extent?
Anybody knows why I does not get any performance improvement when using
H5FD_MPIO_COLLECTIVE?
Thank you for your help
Fred
#include <mpi.h>
#include <hdf5.h>
#include <iostream>
#include <sstream>
#include <vector>
#include <algorithm>
#include <ctime>
using namespace std;
int main (int argc, char* argv[])
{
int mpi_provided, sz, rk;
MPI_Init_thread( &argc, &argv, MPI_THREAD_FUNNELED, &mpi_provided );
MPI_Comm_size( MPI_COMM_WORLD, &sz );
MPI_Comm_rank( MPI_COMM_WORLD, &rk );
hsize_t local_nElements = 10000;
hsize_t global_nElements = sz*local_nElements;
vector<int> vec;
vec.resize(1);
hsize_t dims[2] = {0, global_nElements};
if (rk==0) {
// create file
hid_t fid = H5Fcreate( "test.h5", H5F_ACC_TRUNC, H5P_DEFAULT,
H5P_DEFAULT);
// prepare file space
hsize_t max_dims[2] = {H5S_UNLIMITED, global_nElements}; // not really
needed but for future use
hid_t file_space = H5Screate_simple(2, dims, max_dims);
// prepare dataset
hid_t plist = H5Pcreate(H5P_DATASET_CREATE);
H5Pset_layout(plist, H5D_CHUNKED);
hsize_t chunk_dims[2] = {1, global_nElements};
H5Pset_chunk(plist, 2, chunk_dims);
// create dataset
hid_t did = H5Dcreate(fid, "Id", H5T_NATIVE_INT, file_space,
H5P_DEFAULT, plist, H5P_DEFAULT);
H5Dclose(did);
H5Pclose(plist);
H5Sclose(file_space);
H5Fclose( fid );
// make a randomized vector
vec.resize(global_nElements);
for(int i=0; i<global_nElements; i++) vec[i]=i;
random_shuffle(vec.begin(), vec.end());
}
// Scatter the randomized vector
int * v = &vec[0];
int * data = new int[local_nElements];
MPI_Scatter( v, local_nElements, MPI_INT, data, local_nElements, MPI_INT,
0, MPI_COMM_WORLD);
hsize_t * coords = new hsize_t[local_nElements*2];
for(int i=0; i<local_nElements; i++) {
coords[i*2 ] = 0;
coords[i*2+1] = data[i];
}
clock_t start = clock();
// define MPI file access
hid_t file_access = H5Pcreate(H5P_FILE_ACCESS);
H5Pset_fapl_mpio( file_access, MPI_COMM_WORLD, MPI_INFO_NULL );
// define MPI transfer mode
hid_t transfer = H5Pcreate(H5P_DATASET_XFER);
H5Pset_dxpl_mpio( transfer, H5FD_MPIO_COLLECTIVE);
// Open the file
hid_t fid = H5Fopen( "test.h5", H5F_ACC_RDWR, file_access);
// Open the existing dataset
hid_t did = H5Dopen( fid, "Id", H5P_DEFAULT );
dims[0] ++;
H5Dset_extent(did, dims);
// Get the file space
hid_t file_space = H5Dget_space(did);
// Define the memory space for this proc
hsize_t count[2] = {1, (hsize_t) local_nElements};
hid_t mem_space = H5Screate_simple(2, count, NULL);
// Select the elements for this particular proc (the `coords` array has
been created before)
H5Sselect_elements( file_space, H5S_SELECT_SET, local_nElements, coords );
// Write the previously generated `data` array
H5Dwrite( did, H5T_NATIVE_INT, mem_space , file_space , transfer, data );
H5D_mpio_actual_io_mode_t actual_io_mode;
H5Pget_mpio_actual_io_mode(transfer, &actual_io_mode);
cout << "rank " << rk << " - actual_io_mode " << actual_io_mode << endl;
// Close stuff
H5Sclose(file_space);
H5Dclose(did);
H5Fclose( fid );
double duration = ( clock() - start ) / (double) CLOCKS_PER_SEC;
cout << "rank " << rk << ", duration = " << duration << endl;
MPI_Finalize();
}
2015-09-04 0:21 GMT+02:00 Frederic Perez
<[email protected]<mailto:[email protected]>>:
In response to Mohamad Chaarawi, here is a full code that compiles, but it
surprisingly does not raise the errors that I obtained with my full code. I
will investigate into that. In the meantime, I found that the short version
below does not have improved performance when I remove the line that sets
H5FD_MPIO_COLLECTIVE.
Cheers
Fred
#include <mpi.h>
#include <hdf5.h>
#include <iostream>
#include <sstream>
#include <vector>
#include <algorithm>
#include <ctime>
using namespace std;
int main (int argc, char* argv[])
{
int mpi_provided, sz, rk;
MPI_Init_thread( &argc, &argv, MPI_THREAD_FUNNELED, &mpi_provided );
MPI_Comm_size( MPI_COMM_WORLD, &sz );
MPI_Comm_rank( MPI_COMM_WORLD, &rk );
hsize_t local_nElements = 10000;
hsize_t global_nElements = sz*local_nElements;
vector<int> vec;
vec.resize(1);
if (rk==0) {
// create file
hid_t fid = H5Fcreate( "test.h5", H5F_ACC_TRUNC, H5P_DEFAULT,
H5P_DEFAULT);
// prepare file space
hsize_t dims[2] = {1, global_nElements};
hsize_t max_dims[2] = {H5S_UNLIMITED, global_nElements}; // not really
needed but for future use
hid_t file_space = H5Screate_simple(2, dims, max_dims);
// prepare dataset
hid_t plist = H5Pcreate(H5P_DATASET_CREATE);
H5Pset_layout(plist, H5D_CHUNKED);
hsize_t chunk_dims[2] = {1, global_nElements};
H5Pset_chunk(plist, 2, chunk_dims);
// create dataset
hid_t did = H5Dcreate(fid, "Id", H5T_NATIVE_INT, file_space,
H5P_DEFAULT, plist, H5P_DEFAULT);
H5Dclose(did);
H5Pclose(plist);
H5Sclose(file_space);
H5Fclose( fid );
// make a randomized vector
vec.resize(global_nElements);
for(int i=0; i<global_nElements; i++) vec[i]=i;
random_shuffle(vec.begin(), vec.end());
}
// Scatter the randomized vector
int * v = &vec[0];
int * data = new int[local_nElements];
MPI_Scatter( v, local_nElements, MPI_INT, data, local_nElements, MPI_INT,
0, MPI_COMM_WORLD);
hsize_t * coords = new hsize_t[local_nElements*2];
for(int i=0; i<local_nElements; i++) {
coords[i*2 ] = 0;
coords[i*2+1] = data[i];
}
clock_t start = clock();
// define MPI file access
hid_t file_access = H5Pcreate(H5P_FILE_ACCESS);
H5Pset_fapl_mpio( file_access, MPI_COMM_WORLD, MPI_INFO_NULL );
// define MPI transfer mode
hid_t transfer = H5Pcreate(H5P_DATASET_XFER);
H5Pset_dxpl_mpio( transfer, H5FD_MPIO_COLLECTIVE);
// Open the file
hid_t fid = H5Fopen( "test.h5", H5F_ACC_RDWR, file_access);
// Open the existing dataset
hid_t did = H5Dopen( fid, "Id", H5P_DEFAULT );
// Get the file space
hid_t file_space = H5Dget_space(did);
// Define the memory space for this proc
hsize_t count[2] = {1, (hsize_t) local_nElements};
hid_t mem_space = H5Screate_simple(2, count, NULL);
// Select the elements for this particular proc (the `coords` array has
been created before)
H5Sselect_elements( file_space, H5S_SELECT_SET, local_nElements, coords );
// Write the previously generated `data` array
H5Dwrite( did, H5T_NATIVE_INT, mem_space , file_space , transfer, data );
// Close stuff
H5Sclose(file_space);
H5Dclose(did);
H5Fclose( fid );
double duration = ( clock() - start ) / (double) CLOCKS_PER_SEC;
cout << rk << " " << duration << endl;
MPI_Finalize();
}
_______________________________________________
Hdf-forum is for HDF software users discussion.
[email protected]
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
Twitter: https://twitter.com/hdf5