I haven’t checked the performance yet, but the error you are running into is a 
known issue that you can fix by either:

1)      Create the file on process 0 using the MPI-IO VFD with MPI_COMM_SELF:

        hid_t file_access = H5Pcreate(H5P_FILE_ACCESS);

        H5Pset_fapl_mpio( file_access, MPI_COMM_SELF, MPI_INFO_NULL );

2)      Or Create the dataset on process 0 with space allocation set to 
H5D_ALLOC_TIME_EARLY:
hid_t plist = H5Pcreate(H5P_DATASET_CREATE);
H5Pset_layout(plist, H5D_CHUNKED);
H5Pset_alloc_time(plist, H5D_ALLOC_TIME_EARLY)

This should fix the error you are seeing.. The problem is that when you create 
the file on process 0 with the sec2 driver, the space allocation for the 
dataset is set to LATE by default, however parallel access requires it to be 
set to EARLY. We were debating whether this is a bug or just a documentation 
issue that we should make more clear for dataset creation. I believe we decided 
for the latter, but I couldn’t find where this was documented, so I’ll follow 
up on that too internally.

I’ll look into performance once I get some time..

Thanks
Mohamad

From: Hdf-forum [mailto:[email protected]] On Behalf Of 
Frederic Perez
Sent: Friday, September 04, 2015 9:33 AM
To: HDF Users Discussion List
Subject: Re: [Hdf-forum] Slow or buggy using H5Sselect_elements

I finally found a way to reproduce the errors I sent in the first post (pasted 
code below).
The problem is that the collective data transfer does not work after using 
H5Dset_extent. This behaviour does not even depend on H5Sselect_elements, as I 
initially thought.
Anybody knows if there is something wrong with my use of H5Dset_extent?
Anybody knows why I does not get any performance improvement when using 
H5FD_MPIO_COLLECTIVE?
Thank you for your help
Fred



#include <mpi.h>
#include <hdf5.h>
#include <iostream>
#include <sstream>
#include <vector>
#include <algorithm>
#include <ctime>

using namespace std;

int main (int argc, char* argv[])
{
    int mpi_provided, sz, rk;
    MPI_Init_thread( &argc, &argv, MPI_THREAD_FUNNELED, &mpi_provided );
    MPI_Comm_size( MPI_COMM_WORLD, &sz );
    MPI_Comm_rank( MPI_COMM_WORLD, &rk );

    hsize_t local_nElements = 10000;
    hsize_t global_nElements = sz*local_nElements;

    vector<int> vec;
    vec.resize(1);

    hsize_t dims[2] = {0, global_nElements};

    if (rk==0) {
        // create file
        hid_t fid = H5Fcreate( "test.h5", H5F_ACC_TRUNC, H5P_DEFAULT, 
H5P_DEFAULT);
        // prepare file space
        hsize_t max_dims[2] = {H5S_UNLIMITED, global_nElements}; // not really 
needed but for future use
        hid_t file_space = H5Screate_simple(2, dims, max_dims);
        // prepare dataset
        hid_t plist = H5Pcreate(H5P_DATASET_CREATE);
        H5Pset_layout(plist, H5D_CHUNKED);
        hsize_t chunk_dims[2] = {1, global_nElements};
        H5Pset_chunk(plist, 2, chunk_dims);
        // create dataset
        hid_t did = H5Dcreate(fid, "Id", H5T_NATIVE_INT, file_space, 
H5P_DEFAULT, plist, H5P_DEFAULT);

        H5Dclose(did);
        H5Pclose(plist);
        H5Sclose(file_space);
        H5Fclose( fid );


        // make a randomized vector
        vec.resize(global_nElements);
        for(int i=0; i<global_nElements; i++) vec[i]=i;
        random_shuffle(vec.begin(), vec.end());
    }

    // Scatter the randomized vector
    int * v = &vec[0];
    int * data = new int[local_nElements];
    MPI_Scatter( v, local_nElements, MPI_INT, data, local_nElements, MPI_INT, 
0, MPI_COMM_WORLD);

    hsize_t * coords = new hsize_t[local_nElements*2];
    for(int i=0; i<local_nElements; i++) {
        coords[i*2  ] = 0;
        coords[i*2+1] = data[i];
    }

    clock_t start = clock();

    // define MPI file access
    hid_t file_access = H5Pcreate(H5P_FILE_ACCESS);
    H5Pset_fapl_mpio( file_access, MPI_COMM_WORLD, MPI_INFO_NULL );
    // define MPI transfer mode
    hid_t transfer = H5Pcreate(H5P_DATASET_XFER);
    H5Pset_dxpl_mpio( transfer, H5FD_MPIO_COLLECTIVE);
    // Open the file
    hid_t fid = H5Fopen( "test.h5", H5F_ACC_RDWR, file_access);
    // Open the existing dataset
    hid_t did = H5Dopen( fid, "Id", H5P_DEFAULT );
    dims[0] ++;
    H5Dset_extent(did, dims);
    // Get the file space
    hid_t file_space = H5Dget_space(did);
    // Define the memory space for this proc
    hsize_t count[2] = {1, (hsize_t) local_nElements};
    hid_t mem_space = H5Screate_simple(2, count, NULL);
    // Select the elements for this particular proc (the `coords` array has 
been created before)
    H5Sselect_elements( file_space, H5S_SELECT_SET, local_nElements, coords );
    // Write the previously generated `data` array
    H5Dwrite( did, H5T_NATIVE_INT, mem_space , file_space , transfer, data );
    H5D_mpio_actual_io_mode_t actual_io_mode;
    H5Pget_mpio_actual_io_mode(transfer, &actual_io_mode);
    cout << "rank " << rk << " - actual_io_mode " << actual_io_mode << endl;
    // Close stuff
    H5Sclose(file_space);
    H5Dclose(did);
    H5Fclose( fid );

    double duration = ( clock() - start ) / (double) CLOCKS_PER_SEC;
    cout << "rank " << rk << ", duration = " << duration << endl;

    MPI_Finalize();
}

2015-09-04 0:21 GMT+02:00 Frederic Perez 
<[email protected]<mailto:[email protected]>>:
In response to Mohamad Chaarawi, here is a full code that compiles, but it 
surprisingly does not raise the errors that I obtained with my full code. I 
will investigate into that. In the meantime, I found that the short version 
below does not have improved performance when I remove the line that sets 
H5FD_MPIO_COLLECTIVE.
Cheers
Fred

#include <mpi.h>
#include <hdf5.h>
#include <iostream>
#include <sstream>
#include <vector>
#include <algorithm>
#include <ctime>

using namespace std;

int main (int argc, char* argv[])
{
    int mpi_provided, sz, rk;
    MPI_Init_thread( &argc, &argv, MPI_THREAD_FUNNELED, &mpi_provided );
    MPI_Comm_size( MPI_COMM_WORLD, &sz );
    MPI_Comm_rank( MPI_COMM_WORLD, &rk );

    hsize_t local_nElements = 10000;
    hsize_t global_nElements = sz*local_nElements;

    vector<int> vec;
    vec.resize(1);

    if (rk==0) {
        // create file
        hid_t fid = H5Fcreate( "test.h5", H5F_ACC_TRUNC, H5P_DEFAULT, 
H5P_DEFAULT);
        // prepare file space
        hsize_t dims[2] = {1, global_nElements};
        hsize_t max_dims[2] = {H5S_UNLIMITED, global_nElements}; // not really 
needed but for future use
        hid_t file_space = H5Screate_simple(2, dims, max_dims);
        // prepare dataset
        hid_t plist = H5Pcreate(H5P_DATASET_CREATE);
        H5Pset_layout(plist, H5D_CHUNKED);
        hsize_t chunk_dims[2] = {1, global_nElements};
        H5Pset_chunk(plist, 2, chunk_dims);
        // create dataset
        hid_t did = H5Dcreate(fid, "Id", H5T_NATIVE_INT, file_space, 
H5P_DEFAULT, plist, H5P_DEFAULT);

        H5Dclose(did);
        H5Pclose(plist);
        H5Sclose(file_space);
        H5Fclose( fid );


        // make a randomized vector
        vec.resize(global_nElements);
        for(int i=0; i<global_nElements; i++) vec[i]=i;
        random_shuffle(vec.begin(), vec.end());
    }

    // Scatter the randomized vector
    int * v = &vec[0];
    int * data = new int[local_nElements];
    MPI_Scatter( v, local_nElements, MPI_INT, data, local_nElements, MPI_INT, 
0, MPI_COMM_WORLD);

    hsize_t * coords = new hsize_t[local_nElements*2];
    for(int i=0; i<local_nElements; i++) {
        coords[i*2  ] = 0;
        coords[i*2+1] = data[i];
    }

    clock_t start = clock();

    // define MPI file access
    hid_t file_access = H5Pcreate(H5P_FILE_ACCESS);
    H5Pset_fapl_mpio( file_access, MPI_COMM_WORLD, MPI_INFO_NULL );
    // define MPI transfer mode
    hid_t transfer = H5Pcreate(H5P_DATASET_XFER);
    H5Pset_dxpl_mpio( transfer, H5FD_MPIO_COLLECTIVE);
    // Open the file
    hid_t fid = H5Fopen( "test.h5", H5F_ACC_RDWR, file_access);
    // Open the existing dataset
    hid_t did = H5Dopen( fid, "Id", H5P_DEFAULT );
    // Get the file space
    hid_t file_space = H5Dget_space(did);
    // Define the memory space for this proc
    hsize_t count[2] = {1, (hsize_t) local_nElements};
    hid_t mem_space = H5Screate_simple(2, count, NULL);
    // Select the elements for this particular proc (the `coords` array has 
been created before)
    H5Sselect_elements( file_space, H5S_SELECT_SET, local_nElements, coords );
    // Write the previously generated `data` array
    H5Dwrite( did, H5T_NATIVE_INT, mem_space , file_space , transfer, data );
    // Close stuff
    H5Sclose(file_space);
    H5Dclose(did);
    H5Fclose( fid );

    double duration = ( clock() - start ) / (double) CLOCKS_PER_SEC;
    cout << rk << " " << duration << endl;

    MPI_Finalize();
}

_______________________________________________
Hdf-forum is for HDF software users discussion.
[email protected]
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
Twitter: https://twitter.com/hdf5

Reply via email to