Hi,

We just got hit by a problem with sharedfp/lockedfile component under
v2.0.1 (should be identical with v2.0.2). We had 2 instances of an MPI
program running conccurrently on the same input file and using
MPI_File_read_shared() function ...

If the shared file pointer is maintained with the lockedfile
component, a "XXX.lockedfile" is created near to the data
file. Unfortunately, this fixed name will collide with multiple tools
instances ;)

Running 2 instances of the following command line (source code
attached) on the same machine will show the problematic behaviour.

mpirun -n 1 --mca sharedfp lockedfile ./shrread -v input.dat

Confirmed with lsof(8) output :

njoly@tars [~]> lsof input.dat.lockedfile
COMMAND  PID  USER   FD   TYPE DEVICE SIZE/OFF              NODE NAME
shrread 5876 njoly   21w   REG   0,30        8 13510798885996031 
input.dat.lockedfile
shrread 5884 njoly   21w   REG   0,30        8 13510798885996031 
input.dat.lockedfile

Thanks in advance.

-- 
Nicolas Joly

Cluster & Computing Group
Biology IT Center
Institut Pasteur, Paris.
#include <sys/statvfs.h>
#include <sys/utsname.h>

#include <assert.h>
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#include <mpi.h>

int main(int argc, char **argv) {
  char *rbuf, *file;
  int i, res, cnt, dry, inf;
  size_t len;
  struct statvfs stv; 
  struct utsname nam;

  MPI_Comm comm = MPI_COMM_WORLD;
  MPI_Info info = MPI_INFO_NULL;
  MPI_File rfh;
  MPI_Offset size, read, rsum;
  MPI_Status sts;

  dry = inf = 0;
  while ((i = getopt(argc, argv, "nv")) != -1) {
    switch (i) {
    case 'n': dry++; break; /* dryrun */
    case 'v': inf++; break; /* verbose */
    default: return EXIT_FAILURE; }
  }

  if (argc - optind != 1)
    errx(1, "usage: %s <file>", *argv);
  file = *(argv+optind);

  res = statvfs(file, &stv);
  assert(res == 0);

  len = 10UL * 1024 * 1024;
  len -= (len % stv.f_bsize);
  rbuf = malloc(len);
  assert(rbuf != NULL);

  res = MPI_Init(&argc, &argv);
  assert(res == MPI_SUCCESS);

  res = MPI_File_open(comm, file, MPI_MODE_RDONLY, info, &rfh);
  assert(res == MPI_SUCCESS);
  res = MPI_File_get_size(rfh, &size);
  assert(res == MPI_SUCCESS);

  read = 0;
  while (1) {

    res = MPI_File_read_shared(rfh, rbuf, (int)len, MPI_CHAR, &sts);
    assert(res == MPI_SUCCESS);
    res = MPI_Get_count(&sts, MPI_CHAR, &cnt);
    assert(res == MPI_SUCCESS);
    if (cnt == 0) break;

    read += cnt;
    assert(read <= size);
  }

  res = MPI_File_close(&rfh);
  assert(res == MPI_SUCCESS);

  rsum = 0;
  res = MPI_Allreduce(&read, &rsum, 1, MPI_OFFSET, MPI_SUM, comm);
  assert(res == MPI_SUCCESS);

  res = MPI_Finalize();
  assert(res == MPI_SUCCESS);

  free(rbuf);

  if (inf != 0) {
    assert(uname(&nam) == 0);
    printf("%s: %lld/%lld/%lld\n", nam.nodename, read, rsum, size); }
  assert(rsum == size);

  return 0; }
_______________________________________________
users mailing list
users@lists.open-mpi.org
https://rfd.newmexicoconsortium.org/mailman/listinfo/users

Reply via email to