diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/acinclude.m4 NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/acinclude.m4
--- ompi-trunk/ompi/mca/io/romio/romio/acinclude.m4	2010-11-16 09:16:36.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/acinclude.m4	2010-11-23 15:20:44.000000000 +0100
@@ -1,848 +1,7 @@
-dnl
-dnl This files contains additional macros for using autoconf to 
-dnl build configure scripts.
-dnl
-dnl Almost all of this file is taken from the aclocal.m4 of MPICH
-dnl
-dnl Get the format of Fortran names.  Uses F77, FFLAGS, and sets WDEF.
-dnl If the test fails, sets NOF77 to 1, HAVE_FORTRAN to 0.
-dnl
-dnl
-AC_DEFUN([PAC_GET_FORTNAMES],[
-   rm -f confftest.f confftest.o
-   cat > confftest.f <<EOF
-       subroutine mpir_init_fop( a )
-       integer a
-       a = 1
-       return
-       end
-EOF
-   $F77 $FFLAGS -c confftest.f > /dev/null 2>&1
-   if test ! -s confftest.o ; then
-	AC_MSG_WARN([Unable to test Fortran compiler.  Compiling a test 
-program failed to produce an object file])
-	NOF77=1
-   elif test -z "$FORTRANNAMES" ; then
-     # MAC OS X (and probably FreeBSD need strings - (not strings -a)
-     # Cray doesn't accept -a ...
-     allstrings="-a"
-     if test $arch_CRAY ; then 
-	allstrings="" 
-     elif strings - confftest.o < /dev/null >/dev/null 2>&1 ; then
-         allstrings="-"
-     elif strings -a confftest.o < /dev/null >/dev/null 2>&1 ; then
-         allstrings="-a"
-     fi
-    
-     nameform1=`strings $allstrings confftest.o | grep mpir_init_fop_  | head -n 1`
-     nameform2=`strings $allstrings confftest.o | grep MPIR_INIT_FOP   | head -n 1`
-     nameform3=`strings $allstrings confftest.o | grep mpir_init_fop   | head -n 1`
-     nameform4=`strings $allstrings confftest.o | grep mpir_init_fop__ | head -n 1`
-    rm -f confftest.f confftest.o
-    if test -n "$nameform4" ; then
-	echo "Fortran externals are lower case and have two trailing underscores"
-	FORTRANNAMES="FORTRANDOUBLEUNDERSCORE"
-    elif test -n "$nameform1" ; then
-        # We don't set this in CFLAGS; it is a default case
-        echo "Fortran externals have a trailing underscore and are lowercase"
-	FORTRANNAMES="FORTRANUNDERSCORE"
-    elif test -n "$nameform2" ; then
-	echo "Fortran externals are uppercase"     
-	FORTRANNAMES="FORTRANCAPS" 
-    elif test -n "$nameform3" ; then
-	echo "Fortran externals are lower case"
-	FORTRANNAMES="FORTRANNOUNDERSCORE"
-    else
-	AC_MSG_WARN([Unable to determine the form of Fortran external names.
-Make sure that the compiler $F77 can be run on this system.
-Turning off Fortran (-nof77 being assumed)])
-	NOF77=1
-    fi
-    fi
-    if test -n "$FORTRANNAMES" ; then
-        WDEF="-D$FORTRANNAMES"
-    fi
-    rm -f confftest.f confftest.o
-    ])dnl
-dnl
-define(PAC_GET_SPECIAL_SYSTEM_INFO,[
-#
-if test -n "$arch_IRIX"; then
-   AC_MSG_CHECKING(for IRIX OS version)
-   dnl This block of code replaces a generic "IRIX" arch value with
-   dnl  IRIX_<version>_<chip>
-   dnl  For example
-   dnl  IRIX_5_4400 (IRIX 5.x, using MIPS 4400)
-   osversion=`uname -r | sed 's/\..*//'`
-   dnl Note that we need to allow brackets here, so we briefly turn off 
-   dnl the macro quotes
-   changequote(,)dnl
-   dnl Get the second field (looking for 6.1)
-   osvminor=`uname -r | sed 's/[0-9]\.\([0-9]*\)\..*/\1/'`
-   changequote([,])dnl
-   AC_MSG_RESULT($osversion)
-   dnl Get SGI processor count by quick hack
-   AC_MSG_CHECKING(for IRIX cpucount)
-   changequote(,)dnl
-   cpucount=`hinv | grep '[0-9]* [0-9]* MHZ IP[0-9]* Proc' | cut -f 1 -d' '`
-   if test "$cpucount" = "" ; then
-     cpucount=`hinv | grep 'Processor [0-9]*:' | wc -l | sed -e 's/ //g'`
-   fi
-   changequote([,])dnl
-   if test "$cpucount" = "" ; then
-     AC_MSG_RESULT([Could not determine cpucount.  Please send])
-     hinv
-     AC_MSG_ERROR([to romio-maint@mcs.anl.gov])
-   fi
-   AC_MSG_RESULT($cpucount)
-   dnl
-   AC_MSG_CHECKING(for IRIX cpumodel)
-   dnl The tail -1 is necessary for multiple processor SGI boxes
-   dnl We might use this to detect SGI multiprocessors and recommend
-   dnl -comm=shared
-   cputype=`hinv -t cpu | tail -1 | cut -f 3 -d' '`
-   if test -z "$cputype" ; then
-        AC_MSG_RESULT([Could not get cputype from hinv -t cpu command. Please send])
-        hinv -t cpu 2>&1
-        hinv -t cpu | cut -f 3 -d' ' 2>&1
-	AC_MSG_ERROR([to romio-maint@mcs.anl.gov])
-   fi
-   AC_MSG_RESULT($cputype)
-   dnl echo "checking for osversion and cputype"
-   dnl cputype may contain R4400, R2000A/R3000, or something else.  
-   dnl We may eventually need to look at it.
-   if test -z "$osversion" ; then
-        AC_MSG_RESULT([Could not determine OS version.  Please send])
-        uname -a
-        AC_MSG_ERROR([to romio-maint@mcs.anl.gov])
-   elif test $osversion = 4 ; then
-        true
-   elif test $osversion = 5 ; then
-        true
-   elif test $osversion = 6 ; then
-        true
-   else 
-       AC_MSG_RESULT([Could not recognize the version of IRIX (got $osversion).
-ROMIO knows about versions 4, 5 and 6; the version being returned from 
-uname -r is $osversion.  Please send])
-       uname -a 2>&1
-       hinv 2>&1
-       AC_MSG_ERROR([to romio-maint@mcs.anl.gov])
-   fi
-   AC_MSG_CHECKING(for cputype)
-   OLD_ARCH=IRIX
-   IRIXARCH="$ARCH_$osversion"
-   dnl Now, handle the chip set
-   changequote(,)dnl
-   cputype=`echo $cputype | sed -e 's%.*/%%' -e 's/R//' | tr -d "[A-Z]"`
-   changequote([,])dnl
-   case $cputype in 
-        3000) ;;
-        4000) ;;
-        4400) ;;
-        4600) ;;
-        5000) ;;
-        8000) ;;
-        10000);;
-	12000);;
-        *)
-	AC_MSG_WARN([Unexpected IRIX/MIPS chipset $cputype.  Please send the output])
-        uname -a 2>&1
-        hinv 2>&1 
-        AC_MSG_WARN([to romio-maint@mcs.anl.gov
-ROMIO will continue and assume that the cputype is
-compatible with a MIPS 4400 processor.])
-        cputype=4400
-        ;;
-   esac
-   AC_MSG_RESULT($cputype)
-   IRIXARCH="$IRIXARCH_$cputype"
-   echo "IRIX-specific architecture is $IRIXARCH"
-fi
-])dnl
-dnl
-dnl
-define(PAC_TEST_MPI,[
-  AC_MSG_CHECKING(if a simple MPI program compiles and links)
-  rm -f mpitest.c
-  cat > mpitest.c <<EOF
-#include "mpi.h"
-     main(int argc, char **argv)
-     {
-         MPI_Init(&argc,&argv);
-         MPI_Finalize(); 
-     }
-EOF
-  rm -f conftest
-  $CC $USER_CFLAGS -I$MPI_INCLUDE_DIR -o conftest mpitest.c $MPI_LIB > /dev/null 2>&1
-  if test ! -x conftest ; then
-      rm -f conftest mpitest.c
-      AC_MSG_ERROR([Unable to compile a simple MPI program.
-Use environment variables to provide the location of MPI libraries and
-include directories])
-  else
-      rm -f conftest mpitest.c
-  fi
-AC_MSG_RESULT(yes)
-])dnl
-dnl
-dnl
-dnl
-define(PAC_NEEDS_FINT,[
-  AC_MSG_CHECKING(if MPI_Fint is defined in the MPI implementation)
-  cat > mpitest1.c <<EOF
-#include "mpi.h"
-     main()
-     {
-         MPI_Fint i;
-         i = 0;
-     }
-EOF
-  rm -f mpitest1.o
-  $CC $USER_CFLAGS -I$MPI_INCLUDE_DIR -c mpitest1.c > /dev/null 2>&1
-  if test ! -s mpitest1.o ; then
-      NEEDS_MPI_FINT="#define NEEDS_MPI_FINT"
-      CFLAGS="$CFLAGS -DNEEDS_MPI_FINT"
-      AC_MSG_RESULT(no)
-      rm -f mpitest1.o mpitest1.c
-  else
-      NEEDS_MPI_FINT=""
-      AC_MSG_RESULT(yes)
-      rm -f mpitest1.o mpitest1.c
-  fi
-])dnl
-dnl
-define(PAC_MPI_LONG_LONG_INT,[
-  AC_MSG_CHECKING(if MPI_LONG_LONG_INT is defined in mpi.h)
-  rm -f mpitest.c
-  cat > mpitest.c <<EOF
-#include "mpi.h"
-     main(int argc, char **argv)
-     {
-         long long i;   
-         MPI_Init(&argc,&argv);
-         MPI_Send(&i, 1, MPI_LONG_LONG_INT, 0, 0, MPI_COMM_WORLD);
-         MPI_Finalize(); 
-     }
-EOF
-  rm -f conftest
-  $CC $USER_CFLAGS -I$MPI_INCLUDE_DIR -o conftest mpitest.c $MPI_LIB > /dev/null 2>&1
-  if test -x conftest ; then
-      AC_MSG_RESULT(yes)
-      AC_DEFINE(HAVE_MPI_LONG_LONG_INT,,[Define if mpi has long long it])
-  else
-      AC_MSG_RESULT(no)
-  fi
-  rm -f conftest mpitest.c
-])dnl
-dnl
-dnl PAC_LONG_LONG_64: check if there is a 64-bit long long
-dnl
-define(PAC_LONG_LONG_64,[
-if test -n "$longlongsize" ; then
-    if test "$longlongsize" = 8 ; then
-       echo "defining MPI_Offset as long long in C and integer*8 in Fortran" 
-       AC_DEFINE(HAVE_LONG_LONG_64,,[Define if long long is 64 bits])
-       DEFINE_MPI_OFFSET="typedef long long MPI_Offset;"
-       FORTRAN_MPI_OFFSET="integer*8"
-       LL="\%lld"
-    elif test "$longlongsize" = "int" ; then  # a hack to set MPI_Offset as int
-       echo "defining MPI_Offset as int in C and integer in Fortran"
-       DEFINE_MPI_OFFSET="typedef int MPI_Offset;"
-       FORTRAN_MPI_OFFSET="integer"
-       AC_DEFINE(MPI_OFFSET_IS_INT,,[Define if MPI_Offset is int])
-       LL="\%d"
-       MPI_OFFSET_KIND1="!"
-       MPI_OFFSET_KIND2="!"
-    else 
-       echo "defining MPI_Offset as long in C and integer in Fortran" 
-       DEFINE_MPI_OFFSET="typedef long MPI_Offset;"
-       FORTRAN_MPI_OFFSET="integer"
-       LL="\%ld"
-       MPI_OFFSET_KIND1="!"
-       MPI_OFFSET_KIND2="!"
-    fi
-else
-   PAC_GET_TYPE_SIZE(long long, longlongsize)
-   if test -n "$longlongsize" ; then
-      if test "$longlongsize" = 8 ; then
-         PAC_TEST_LONG_LONG()
-      else
-         echo "defining MPI_Offset as long in C and integer in Fortran" 
-         DEFINE_MPI_OFFSET="typedef long MPI_Offset;"
-         FORTRAN_MPI_OFFSET="integer"
-         LL="\%ld"
-         MPI_OFFSET_KIND1="!"
-         MPI_OFFSET_KIND2="!"
-      fi
-   else 
-dnl   check if longlong is not supported or only its size cannot be determined
-dnl   because the program cannot be run.
-      rm -f ltest.c
-      cat > ltest.c <<EOF
-        main()
-        {
-           long long i=8;
-           return 0;
-        }
-EOF
-      rm -f conftest
-      $CC $USER_CFLAGS -o conftest ltest.c > /dev/null 2>&1
-      if test -x conftest ; then
-         echo "assuming size of long long is 8bytes; use '-longlongsize' to indicate otherwise"
-         rm -f conftest ltest.c
-         echo "defining MPI_Offset as long long in C and integer*8 in Fortran" 
-         AC_DEFINE(HAVE_LONG_LONG_64,,[Define if long long is 64 bits])
-         DEFINE_MPI_OFFSET="typedef long long MPI_Offset;"
-         FORTRAN_MPI_OFFSET="integer*8"
-         LL="\%lld"
-      else 
-         echo "assuming long long is not available; use '-longlongsize' to indicate otherwise"
-         echo "defining MPI_Offset as long in C and integer in Fortran" 
-         DEFINE_MPI_OFFSET="typedef long MPI_Offset;"
-         FORTRAN_MPI_OFFSET="integer"
-         LL="\%ld"
-         MPI_OFFSET_KIND1="!"
-         MPI_OFFSET_KIND2="!"
-      fi
-   fi
-fi
-])dnl
-dnl
-dnl
-define(PAC_MPI_INFO,[
-  AC_MSG_CHECKING(if MPI_Info functions are defined in the MPI implementation)
-  rm -f mpitest.c
-  cat > mpitest.c <<EOF
-#include "mpi.h"
-     main(int argc, char **argv)
-     {
-         MPI_Info info;
-         MPI_Init(&argc,&argv);
-         MPI_Info_create(&info);
-         MPI_Finalize(); 
-     }
-EOF
-  rm -f conftest
-  $CC $USER_CFLAGS -I$MPI_INCLUDE_DIR -o conftest mpitest.c $MPI_LIB > /dev/null 2>&1
-  if test -x conftest ; then
-      AC_MSG_RESULT(yes)
-      AC_DEFINE(HAVE_MPI_INFO,1,[Define if MPI_Info available])
-      HAVE_MPI_INFO="#define HAVE_MPI_INFO"
-      MPI_FINFO1="!"
-      MPI_FINFO2="!"
-      MPI_FINFO3="!"
-      MPI_FINFO4="!"
-  else
-      AC_MSG_RESULT(no)
-      BUILD_MPI_INFO=1
-      MPI_FINFO1="      INTEGER MPI_MAX_INFO_KEY, MPI_MAX_INFO_VAL"
-      MPI_FINFO2="      PARAMETER (MPI_MAX_INFO_KEY=255, MPI_MAX_INFO_VAL=1024)"
-      MPI_FINFO3="      INTEGER MPI_INFO_NULL"
-      MPI_FINFO4="      PARAMETER (MPI_INFO_NULL=0)"
-  fi
-  rm -f conftest mpitest.c
-])dnl
-dnl
-dnl
-define(PAC_MPI_DARRAY_SUBARRAY,[
-  AC_MSG_CHECKING(if darray and subarray constructors are defined in the MPI implementation)
-  rm -f mpitest.c
-  cat > mpitest.c <<EOF
-#include "mpi.h"
-     main(int argc, char **argv)
-     {
-         int i=MPI_DISTRIBUTE_CYCLIC;
-         MPI_Datatype t;
-         MPI_Init(&argc,&argv);
-         MPI_Type_create_darray(i, i, i, &i, &i, &i, &i, i, MPI_INT, &t);
-         MPI_Type_create_subarray(i, &i, &i, &i, i, MPI_INT, &t);
-         MPI_Finalize(); 
-     }
-EOF
-  rm -f conftest
-  $CC $USER_CFLAGS -I$MPI_INCLUDE_DIR -o conftest mpitest.c $MPI_LIB > /dev/null 2>&1
-  if test -x conftest ; then
-      AC_MSG_RESULT(yes)
-      AC_DEFINE(HAVE_MPI_DARRAY_SUBARRAY,,[Define if MPI Darray available])
-      HAVE_MPI_DARRAY_SUBARRAY="#define HAVE_MPI_DARRAY_SUBARRAY"
-      MPI_FARRAY1="!"
-      MPI_FARRAY2="!"
-      MPI_FARRAY3="!"
-      MPI_FARRAY4="!"
-      MPI_FARRAY5="!"
-      MPI_FARRAY6="!"
-      MPI_FARRAY7="!"
-  else
-      AC_MSG_RESULT(no)
-      BUILD_MPI_ARRAY=1
-      MPI_FARRAY1="      INTEGER MPI_ORDER_C, MPI_ORDER_FORTRAN"
-      MPI_FARRAY2="      PARAMETER (MPI_ORDER_C=56, MPI_ORDER_FORTRAN=57)"
-      MPI_FARRAY3="      INTEGER MPI_DISTRIBUTE_BLOCK, MPI_DISTRIBUTE_CYCLIC"
-      MPI_FARRAY4="      INTEGER MPI_DISTRIBUTE_NONE, MPI_DISTRIBUTE_DFLT_DARG"
-      MPI_FARRAY5="      PARAMETER (MPI_DISTRIBUTE_BLOCK=121, MPI_DISTRIBUTE_CYCLIC=122)"
-      MPI_FARRAY6="      PARAMETER (MPI_DISTRIBUTE_NONE=123)"
-      MPI_FARRAY7="      PARAMETER (MPI_DISTRIBUTE_DFLT_DARG=-49767)"
-  fi
-  rm -f conftest mpitest.c
-])dnl
-dnl
-dnl
-define(PAC_CHECK_MPI_SGI_INFO_NULL,[
-  AC_MSG_CHECKING([if MPI_INFO_NULL is defined in mpi.h])
-  rm -f mpitest.c
-  cat > mpitest.c <<EOF
-#include "mpi.h"
-     main(int argc, char **argv)
-     {
-	int i;
-	i = MPI_INFO_NULL;
-     }
-EOF
-  rm -f conftest
-  $CC $USER_CFLAGS -I$MPI_INCLUDE_DIR -o conftest mpitest.c $MPI_LIB > /dev/null 2>&1
-  if test -x conftest ; then
-      AC_MSG_RESULT(yes)
-      cp adio/sgi/mpi3.1/*.h include
-  else
-      AC_MSG_RESULT(no)
-  fi
-  rm -f conftest mpitest.c
-])dnl
-dnl
-dnl
-dnl
-define(PAC_CHECK_MPIOF_H,[
-  AC_MSG_CHECKING(if mpiof.h is included in mpif.h)
-  rm -f mpitest.f
-  cat > mpitest.f <<EOF
-      program main
-      implicit none
-      include 'mpif.h'
-      integer i
-      i = MPI_MODE_RDWR
-      stop
-      end
-EOF
-  rm -f conftest
-  $F77 $FFLAGS -I$MPI_INCLUDE_DIR -o conftest mpitest.f $MPI_LIB > /dev/null 2>&1
-  if test -x conftest ; then
-      AC_MSG_RESULT(yes)
-      MPIOF_H_INCLUDED=1
-  else
-      AC_MSG_RESULT(no)
-  fi
-  rm -f conftest mpitest.f
-])dnl
-dnl
-dnl
-dnl check if pread64 is defined in IRIX. needed on IRIX 6.5
-dnl
-define(PAC_HAVE_PREAD64,[
-  AC_MSG_CHECKING(if pread64 is defined)
-  rm -f conftest.c
-  cat > conftest.c <<EOF
-#include <unistd.h>
-     main()
-     {
-         int fd=0, buf=0, i=0;
-         off64_t off=0;
-         pread64(fd, &buf, i, off);
-     }
-EOF
-  rm -f conftest
-  $CC $USER_CFLAGS -o conftest conftest.c > /dev/null 2>&1
-  if test -x conftest ; then
-      AC_MSG_RESULT(yes)
-      AC_DEFINE(HAVE_PREAD64,,[Define if pread64 available])
-  else
-      AC_MSG_RESULT(no)
-  fi
-rm -f conftest conftest.c
-])dnl
-dnl
-dnl
-define(PAC_TEST_MPI_SGI_type_is_contig,[
-  AC_MSG_CHECKING(if MPI_SGI_type_is_contig is defined)
-  rm -f mpitest.c
-  cat > mpitest.c <<EOF
-#include "mpi.h"
-     main(int argc, char **argv)
-     {
-         MPI_Datatype type;
-         int i;
-
-         MPI_Init(&argc,&argv);
-         i = MPI_SGI_type_is_contig(type);
-         MPI_Finalize(); 
-     }
-EOF
-  rm -f conftest
-  $CC $USER_CFLAGS -I$MPI_INCLUDE_DIR -o conftest mpitest.c $MPI_LIB > /dev/null 2>&1
-  if test -x conftest ; then
-     AC_MSG_RESULT(yes)
-  else
-     AC_MSG_RESULT(no)
-     AC_DEFINE(NO_MPI_SGI_type_is_contig,,[Define if no MPI type is contig])
-  fi
-  rm -f conftest mpitest.c
-])dnl
-dnl
-dnl
-dnl
-define(PAC_TEST_MPI_COMBINERS,[
-  AC_MSG_CHECKING(if MPI-2 combiners are defined in mpi.h)
-  rm -f mpitest.c
-  cat > mpitest.c <<EOF
-#include "mpi.h"
-     main(int argc, char **argv)
-     {
-         int i;
-
-         MPI_Init(&argc,&argv);
-         i = MPI_COMBINER_STRUCT;
-         MPI_Finalize(); 
-     }
-EOF
-  rm -f conftest
-  $CC $USER_CFLAGS -I$MPI_INCLUDE_DIR -o conftest mpitest.c $MPI_LIB > /dev/null 2>&1
-  if test -x conftest ; then
-     AC_MSG_RESULT(yes)
-     AC_DEFINE(HAVE_MPI_COMBINERS,,[Define if MPI combiners available])
-  else
-     AC_MSG_RESULT(no)
-  fi
-  rm -f conftest mpitest.c
-])dnl
-dnl
-dnl
-dnl PAC_MPI_OFFSET_KIND()
-dnl
-dnl tries to determine the Fortran 90 kind parameter for 8-byte integers
-dnl
-define(PAC_MPI_OFFSET_KIND,
-[rm -f conftest*
-# Determine the extension for Fortran 90 files (not all compilers accept
-# .f and not all accept .f90)
-if test -z "$ac_f90ext" ; then
-    if test -z "$F90" ; then
-       AC_CHECK_PROGS(F90,f90 xlf90 pgf90 ifort epcf90 f95 fort xlf95 lf95 pathf90 g95 fc ifc efc)
-    fi
-    AC_MSG_CHECKING([for extension for Fortran 90 programs])
-    ac_f90ext="f90"
-    ac_f90compile='${F90-f90} -c $F90FLAGS conftest.$ac_f90ext 1>&AC_FD_CC'
-    cat > conftest.$ac_f90ext <<EOF
-      program conftest
-      end
-EOF
-    if AC_TRY_EVAL(ac_f90compile) ; then
-        AC_MSG_RESULT([f90])
-    else
-        rm -f conftest*
-        ac_f90ext="f"
-        cat > conftest.$ac_f90ext <<EOF
-      program conftest
-      end
-EOF
-        if AC_TRY_EVAL(ac_f90compile) ; then
-            AC_MSG_RESULT([f])
-        else
-            AC_MSG_RESULT([unknown!])
-        fi
-    fi
-fi
-AC_MSG_CHECKING([for Fortran 90 KIND parameter for 8-byte integers])
-cat <<EOF > conftest.$ac_f90ext
-      program main
-      integer i
-      i = selected_int_kind(16)
-      open(8, file="conftest.out", form="formatted")
-      write (8,*) i
-      close(8)
-      stop
-      end
-EOF
-if test -z "$F90" ; then
-   F90=f90
-fi
-KINDVAL=""
-if $F90 -o conftest conftest.$ac_f90ext >/dev/null 2>&1 ; then
-    ./conftest >/dev/null 2>&1
-    if test -s conftest.out ; then 
-        KINDVAL=`cat conftest.out`
-    fi
-fi
-rm -f conftest*
-if test -n "$KINDVAL" -a "$KINDVAL" != "-1" ; then
-   AC_MSG_RESULT($KINDVAL)
-   MPI_OFFSET_KIND1="      INTEGER MPI_OFFSET_KIND"
-   MPI_OFFSET_KIND2="      PARAMETER (MPI_OFFSET_KIND=$KINDVAL)"
-else
-    AC_MSG_RESULT(unavailable)
-fi
-])dnl
-dnl
-dnl
-define(PAC_TEST_MPI_HAVE_OFFSET_KIND,[
-  AC_MSG_CHECKING(if MPI_OFFSET_KIND is defined in mpif.h)
-  rm -f mpitest.f
-  cat > mpitest.f <<EOF
-      program main
-      implicit none
-      include 'mpif.h'
-      integer i
-      i = MPI_OFFSET_KIND
-      stop
-      end
-EOF
-  rm -f conftest
-  $F77 $FFLAGS -I$MPI_INCLUDE_DIR -o conftest mpitest.f $MPI_LIB > /dev/null 2>&1
-  if test -x conftest ; then
-     AC_MSG_RESULT(yes)
-     MPI_OFFSET_KIND1="!"
-     MPI_OFFSET_KIND2="!"
-  else
-     AC_MSG_RESULT(no)
-  fi
-  rm -f conftest mpitest.f
-])dnl
-dnl
-dnl
-dnl PAC_GET_XFS_MEMALIGN
-dnl 
-dnl
-define(PAC_GET_XFS_MEMALIGN,
-[AC_MSG_CHECKING([for memory alignment needed for direct I/O])
-/bin/rm -f memalignval
-/bin/rm -f /tmp/romio_tmp.bin
-AC_TEST_PROGRAM([#include <stdio.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <stdio.h>
-main() { 
-  struct dioattr st;
-  int fd = open("/tmp/romio_tmp.bin", O_RDWR | O_CREAT, 0644);
-  FILE *f=fopen("memalignval","w");
-  if (fd == -1) exit(1);
-  if (!f) exit(1);
-  fcntl(fd, F_DIOINFO, &st);
-  fprintf( f, "%u\n", st.d_mem);
-  exit(0);
-}],Pac_CV_NAME=`cat memalignval`,Pac_CV_NAME="")
-/bin/rm -f memalignval
-/bin/rm -f /tmp/romio_tmp.bin
-if test -n "$Pac_CV_NAME" -a "$Pac_CV_NAME" != 0 ; then
-    AC_MSG_RESULT($Pac_CV_NAME)
-    CFLAGS="$CFLAGS -DXFS_MEMALIGN=$Pac_CV_NAME"
-else
-    AC_MSG_RESULT(unavailable, assuming 128)
-    CFLAGS="$CFLAGS -DXFS_MEMALIGN=128"
-fi
-])dnl
-dnl
-
-dnl
-dnl Look for a style of VPATH.  Known forms are
-dnl VPATH = .:dir
-dnl .PATH: . dir
-dnl
-dnl Defines VPATH or .PATH with . $(srcdir)
-dnl Requires that vpath work with implicit targets
-dnl NEED TO DO: Check that $< works on explicit targets.
-dnl
-define(PAC_MAKE_VPATH,[
-AC_SUBST(VPATH)
-AC_MSG_CHECKING(for virtual path format)
-rm -rf conftest*
-mkdir conftestdir
-cat >conftestdir/a.c <<EOF
-A sample file
-EOF
-cat > conftest <<EOF
-all: a.o
-VPATH=.:conftestdir
-.c.o:
-	@echo \$<
-EOF
-ac_out=`$MAKE -f conftest 2>&1 | grep 'conftestdir/a.c'`
-if test -n "$ac_out" ; then 
-    AC_MSG_RESULT(VPATH)
-    VPATH='VPATH=.:$(srcdir)'
-else
-    rm -f conftest
-    cat > conftest <<EOF
-all: a.o
-.PATH: . conftestdir
-.c.o:
-	@echo \$<
-EOF
-    ac_out=`$MAKE -f conftest 2>&1 | grep 'conftestdir/a.c'`
-    if test -n "$ac_out" ; then 
-        AC_MSG_RESULT(.PATH)
-        VPATH='.PATH: . $(srcdir)'
-    else
-	AC_MSG_RESULT(neither VPATH nor .PATH works)
-    fi
-fi
-rm -rf conftest*
-])dnl
-dnl
-define(PAC_HAVE_MOUNT_NFS,[
-  AC_MSG_CHECKING([if MOUNT_NFS is defined in the include files])
-  rm -f conftest.c
-  cat > conftest.c <<EOF
-#include <sys/param.h>
-#include <sys/mount.h>
-     main()
-     {
-         int i=MOUNT_NFS;
-     }
-EOF
-  rm -f conftest
-  $CC $USER_CFLAGS -o conftest conftest.c > /dev/null 2>&1
-  if test -x conftest ; then
-     AC_MSG_RESULT(yes)
-     ROMIO_HAVE_MOUNT_NFS=1
-     AC_DEFINE(HAVE_MOUNT_NFS,,[Define if MOUNT_NFS defined])
-  else
-     ROMIO_HAVE_MOUNT_NFS=0
-     AC_MSG_RESULT(no)
-  fi
-  rm -f conftest conftest.c
-])dnl
-dnl
-dnl
-dnl PAC_MPI_OFFSET_KIND_4BYTE()
-dnl
-dnl tries to determine the Fortran 90 kind parameter for 4-byte integers
-dnl
-define(PAC_MPI_OFFSET_KIND_4BYTE,
-[AC_MSG_CHECKING([for Fortran 90 KIND parameter for 4-byte integers])
-rm -f kind.f kind.o kind
-cat <<EOF > kind.f
-      program main
-      integer i
-      i = selected_int_kind(8)
-      open(8, file="k.out", form="formatted")
-      write (8,*) i
-      close(8)
-      stop
-      end
-EOF
-if test -z "$F90" ; then
-   F90=f90
-fi
-KINDVAL=""
-if $F90 -o kind kind.f >/dev/null 2>&1 ; then
-    ./kind >/dev/null 2>&1
-    if test -s k.out ; then 
-        KINDVAL=`cat k.out`
-    fi
-fi
-rm -f kind k.out kind.f kind.o
-if test -n "$KINDVAL" -a "$KINDVAL" != "-1" ; then
-   AC_MSG_RESULT($KINDVAL)
-   MPI_OFFSET_KIND1="      INTEGER MPI_OFFSET_KIND"
-   MPI_OFFSET_KIND2="      PARAMETER (MPI_OFFSET_KIND=$KINDVAL)"
-else
-    AC_MSG_RESULT(unavailable)
-fi
-])dnl
-dnl
-dnl
-define(PAC_FUNC_STRERROR,[
-  AC_MSG_CHECKING([for strerror()])
-  rm -f conftest.c
-  cat > conftest.c <<EOF
-#include <string.h>
-     main()
-     {
-        char *s = strerror(5);
-     }
-EOF
-  rm -f conftest
-  $CC $USER_CFLAGS -o conftest conftest.c >> config.log 2>&1
-  if test -x conftest ; then
-     AC_MSG_RESULT(yes)
-     AC_DEFINE(HAVE_STRERROR,,[Define if strerror available])
-  else
-     AC_MSG_RESULT(no)
-     AC_MSG_CHECKING([for sys_errlist])
-     rm -f conftest.c
-changequote(,)
-     cat > conftest.c <<EOF
-#include <stdio.h>
-        main()
-        {
-           extern char *sys_errlist[];
-	   printf("%s\n", sys_errlist[34]);
-        }
-EOF
-changequote([,])
-     rm -f conftest
-     $CC $USER_CFLAGS -o conftest conftest.c > config.log 2>&1
-     if test -x conftest ; then
-        AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_SYSERRLIST,,[Define if syserrlist available])
-     else
-        AC_MSG_RESULT(no)
-     fi
-  fi
-  rm -f conftest conftest.c
-])dnl
-dnl
-define(PAC_TEST_MPIR_STATUS_SET_BYTES,[
-  AC_MSG_CHECKING(if MPIR_Status_set_bytes is defined)
-  rm -f mpitest.c
-  cat > mpitest.c <<EOF
-#include "mpi.h"
-     main(int argc, char **argv)
-     {
-     	 MPI_Status status;
-         MPI_Datatype type;
-	 int err;
-
-         MPI_Init(&argc,&argv);
-         MPIR_Status_set_bytes(status,type,err);
-         MPI_Finalize(); 
-     }
-EOF
-  rm -f conftest
-  $CC $USER_CFLAGS -I$MPI_INCLUDE_DIR -o conftest mpitest.c $MPI_LIB > /dev/null 2>&1
-  if test -x conftest ; then
-     AC_MSG_RESULT(yes)
-     AC_DEFINE(HAVE_STATUS_SET_BYTES,,[Define if status set bytes available])
-  else
-     AC_MSG_RESULT(no)
-  fi
-  rm -f conftest mpitest.c
-])dnl
-define(PAC_TEST_MPI_GREQUEST,[
-  AC_MSG_CHECKING(support for generalized requests)
-  rm -f mpitest.c
-  cat > mpitest.c <<EOF
-#include "mpi.h"
-#include "stdio.h"
-    main(int argc, char **argv)
-    {
-       MPI_Request request;
-       MPI_Init(&argc, &argv);
-       MPI_Grequest_start(NULL, NULL, NULL, NULL, &request);
-       MPI_Finalize();
-     }
-EOF
-  rm -f conftest
-  $CC $USER_CFLAGS -I$MPI_INCLUDE_DIR -o conftest mpitest.c $MPI_LIB > /dev/null 2>&1
-  if test -x conftest ; then
-     AC_MSG_RESULT(yes)
-     AC_DEFINE(HAVE_MPI_GREQUEST,,[Define if generalized requests avaliable])
-  else
-     AC_MSG_RESULT(no)
-  fi
-  rm -f conftest mpitest.c
-])dnl
-dnl
 dnl/*D
+dnl Open MPI: This script has been transferred from confdb/aclocal_cc.m4 to acinclude.m4
+dnl           It does not work instead
+dnl
 dnl PAC_FUNC_NEEDS_DECL - Set NEEDS_<funcname>_DECL if a declaration is needed
 dnl
 dnl Synopsis:
@@ -860,12 +19,17 @@
 dnl
 dnl We use a 'double' as the first argument to try and catch varargs
 dnl routines that may use an int or pointer as the first argument.
+dnl
+dnl There is one difficulty - if the compiler has been instructed to
+dnl fail on implicitly defined functions, then this test will always
+dnl fail.
 dnl 
 dnl D*/
 AC_DEFUN([PAC_FUNC_NEEDS_DECL],[
 AC_CACHE_CHECK([whether $2 needs a declaration],
-[pac_cv_func_decl_$2],[
-AC_TRY_COMPILE([$1],[int a=$2(1.0,27,1.0,"foo");],
+pac_cv_func_decl_$2,[
+AC_TRY_COMPILE([$1
+int $2(double, int, double, const char *);],[int a=$2(1.0,27,1.0,"foo");],
 pac_cv_func_decl_$2=yes,pac_cv_func_decl_$2=no)])
 if test "$pac_cv_func_decl_$2" = "yes" ; then
 changequote(<<,>>)dnl
@@ -874,115 +38,4 @@
     AC_DEFINE_UNQUOTED(PAC_FUNC_NAME,1,[Define if $2 needs a declaration])
 undefine([PAC_FUNC_NAME])
 fi
-])dnl
-dnl
-dnl/*D
-dnl PAC_C_RESTRICT - Check if C supports restrict
-dnl
-dnl Synopsis:
-dnl PAC_C_RESTRICT
-dnl
-dnl Output Effect:
-dnl Defines 'restrict' if some version of restrict is supported; otherwise
-dnl defines 'restrict' as empty.  This allows you to include 'restrict' in 
-dnl declarations in the same way that 'AC_C_CONST' allows you to use 'const'
-dnl in declarations even when the C compiler does not support 'const'
-dnl
-dnl Note that some compilers accept restrict only with additional options.
-dnl DEC/Compaq/HP Alpha Unix (Tru64 etc.) -accept restrict_keyword
-dnl
-dnl D*/
-AC_DEFUN([PAC_C_RESTRICT],[
-AC_CACHE_CHECK([for restrict],
-[pac_cv_c_restrict],[
-AC_TRY_COMPILE(,[int * restrict a;],pac_cv_c_restrict="restrict",
-pac_cv_c_restrict="no")
-if test "$pac_cv_c_restrict" = "no" ; then
-   AC_TRY_COMPILE(,[int * _Restrict a;],pac_cv_c_restrict="_Restrict",
-   pac_cv_c_restrict="no")
-fi
-if test "$pac_cv_c_restrict" = "no" ; then
-   AC_TRY_COMPILE(,[int * __restrict a;],pac_cv_c_restrict="__restrict",
-   pac_cv_c_restrict="no")
-fi
 ])
-if test "$pac_cv_c_restrict" = "no" ; then
-  restrict_val=""
-elif test "$pac_cv_c_restrict" != "restrict" ; then
-  restrict_val=$pac_cv_c_restrict
-fi
-if test "$restrict_val" != "restrict" ; then 
-  AC_DEFINE_UNQUOTED(restrict,$restrict_val,[if C does not support restrict])
-fi
-])dnl
-dnl
-dnl
-dnl
-dnl This is a replacement for AC_PROG_CC that does not prefer gcc and
-dnl that does not mess with CFLAGS.  See acspecific.m4 for the original defn.
-dnl
-dnl/*D
-dnl PAC_PROG_CC - Find a working C compiler
-dnl
-dnl Synopsis:
-dnl PAC_PROG_CC
-dnl
-dnl Output Effect:
-dnl   Sets the variable CC if it is not already set
-dnl
-dnl Notes:
-dnl   Unlike AC_PROG_CC, this does not prefer gcc and does not set CFLAGS.
-dnl   It does check that the compiler can compile a simple C program.
-dnl   It also sets the variable GCC to yes if the compiler is gcc.  It does
-dnl   not yet check for some special options needed in particular for 
-dnl   parallel computers, such as -Tcray-t3e, or special options to get
-dnl   full ANSI/ISO C, such as -Aa for HP.
-dnl
-dnl D*/
-dnl 2.52 doesn't have AC_PROG_CC_GNU
-ifdef([AC_PROG_CC_GNU],,[AC_DEFUN([AC_PROG_CC_GNU],)])
-AC_DEFUN([PAC_PROG_CC],[
-AC_PROVIDE([AC_PROG_CC])
-AC_CHECK_PROGS([CC, cc xlC xlc pgcc icc gcc])
-test -z "$CC" && AC_MSG_ERROR([no acceptable cc found in \$PATH])
-PAC_PROG_CC_WORKS
-AC_PROG_CC_GNU
-if test "$ac_cv_prog_gcc" = yes; then
-  GCC=yes
-else
-  GCC=
-fi
-])
-dnl
-dnl
-dnl PAC_C_GNU_ATTRIBUTE - See if the GCC __attribute__ specifier is allow.
-dnl Use the following
-dnl #ifndef HAVE_GCC_ATTRIBUTE
-dnl #define __attribute__(a)
-dnl #endif
-dnl If *not*, define __attribute__(a) as null
-dnl
-dnl We start by requiring Gcc.  Some other compilers accept __attribute__
-dnl but generate warning messages, or have different interpretations 
-dnl (which seems to make __attribute__ just as bad as #pragma) 
-dnl For example, the Intel icc compiler accepts __attribute__ and
-dnl __attribute__((pure)) but generates warnings for __attribute__((format...))
-dnl
-AC_DEFUN([PAC_C_GNU_ATTRIBUTE],[
-AC_REQUIRE([AC_PROG_CC])
-if test "$ac_cv_prog_gcc" = "yes" ; then
-    AC_CACHE_CHECK([whether __attribute__ allowed],
-pac_cv_gnu_attr_pure,[
-AC_TRY_COMPILE([int foo(int) __attribute__ ((pure));],[int a;],
-pac_cv_gnu_attr_pure=yes,pac_cv_gnu_attr_pure=no)])
-AC_CACHE_CHECK([whether __attribute__((format)) allowed],
-pac_cv_gnu_attr_format,[
-AC_TRY_COMPILE([int foo(char *,...) __attribute__ ((format(printf,1,2)));],[int a;],
-pac_cv_gnu_attr_format=yes,pac_cv_gnu_attr_format=no)])
-    if test "$pac_cv_gnu_attr_pure" = "yes" -a "$pac_cv_gnu_attr_format" = "yes" ; then
-        AC_DEFINE(HAVE_GCC_ATTRIBUTE,1,[Define if GNU __attribute__ is supported])
-    fi
-fi
-])
-dnl
-
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.c	2010-11-16 09:16:31.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.c	2010-11-15 15:02:47.000000000 +0100
@@ -1,5 +1,6 @@
 /* ---------------------------------------------------------------- */
 /* (C)Copyright IBM Corp.  2007, 2008                               */
+/* ---------------------------------------------------------------- */
 /**
  * \file ad_bgl_aggrs.c
  * \brief The externally used function from this file is is declared in ad_bgl_aggrs.h
@@ -7,7 +8,7 @@
 
 /* -*- Mode: C; c-basic-offset:4 ; -*- */
 /* 
- *   Copyright (C) 1997 University of Chicago. 
+ *   Copyright (C) 1997-2001 University of Chicago. 
  *   See COPYRIGHT notice in top-level directory.
  */
 
@@ -16,10 +17,49 @@
 #include "ad_bgl.h"
 #include "ad_bgl_pset.h"
 #include "ad_bgl_aggrs.h"
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
+
+#ifdef USE_DBG_LOGGING
+  #define AGG_DEBUG 1
+#endif
 
 
-int aggrsInPsetSize=0;
-int *aggrsInPset=NULL;
+
+static int aggrsInPsetSize=0;
+static int *aggrsInPset=NULL;
+
+/* Comments copied from common:
+ * This file contains four functions:
+ *
+ * ADIOI_Calc_aggregator()
+ * ADIOI_Calc_file_domains()
+ * ADIOI_Calc_my_req()
+ * ADIOI_Calc_others_req()
+ *
+ * The last three of these were originally in ad_read_coll.c, but they are
+ * also shared with ad_write_coll.c.  I felt that they were better kept with
+ * the rest of the shared aggregation code.  
+ */
+
+/* Discussion of values available from above:
+ *
+ * ADIO_Offset st_offsets[0..nprocs-1]
+ * ADIO_Offset end_offsets[0..nprocs-1]
+ *    These contain a list of start and end offsets for each process in 
+ *    the communicator.  For example, an access at loc 10, size 10 would
+ *    have a start offset of 10 and end offset of 19.
+ * int nprocs
+ *    number of processors in the collective I/O communicator
+ * ADIO_Offset min_st_offset
+ * ADIO_Offset fd_start[0..nprocs_for_coll-1]
+ *    starting location of "file domain"; region that a given process will
+ *    perform aggregation for (i.e. actually do I/O)
+ * ADIO_Offset fd_end[0..nprocs_for_coll-1]
+ *    start + size - 1 roughly, but it can be less, or 0, in the case of 
+ *    uneven distributions
+ */
 
 /* forward declaration */
 static void 
@@ -219,8 +259,7 @@
 					ADIOI_BGL_ProcInfo_t *all_procInfo,
 					int *aggrsInPset )
 {
-#   define DEBUG 0
-#   if DEBUG
+#   if AGG_DEBUG
     int i; 
 #   endif
     int naggs; 
@@ -229,9 +268,10 @@
   /* compute the ranklist of IO aggregators and put into tmp_ranklist */
     tmp_ranklist = (int *) ADIOI_Malloc (confInfo->nProcs * sizeof(int));
 
-#   if DEBUG
-    for (i=0; i<confInfo->nProcs; i++) 
-    printf( "\tcpuid %1d, rank = %6d\n", all_procInfo[i].cpuid, all_procInfo[i].rank );
+#   if AGG_DEBUG
+    for (i=0; i<confInfo->nProcs; i++) {
+      DBG_FPRINTF(stderr, "\tcpuid %1d, rank = %6d\n", all_procInfo[i].cpuid, all_procInfo[i].rank );
+    }
 #   endif
 
     naggs = 
@@ -239,7 +279,7 @@
 
 #   define VERIFY 0
 #   if VERIFY
-    printf( "\tconfInfo = %3d,%3d,%3d,%3d,%3d,%3d,%.4f; naggs = %d\n", 
+    DBG_FPRINTF(stderr, "\tconfInfo = %3d,%3d,%3d,%3d,%3d,%3d,%.4f; naggs = %d\n", 
 	    confInfo->PsetSize        ,
 	    confInfo->numPsets        ,
 	    confInfo->isVNM           ,
@@ -250,9 +290,10 @@
 	    naggs );
 #   endif
 
-#   if DEBUG
-    for (i=0; i<naggs; i++) 
-    printf( "\taggr %-4d = %6d\n", i, tmp_ranklist[i] );
+#   if AGG_DEBUG
+    for (i=0; i<naggs; i++) {
+      DBG_FPRINTF(stderr, "\taggr %-4d = %6d\n", i, tmp_ranklist[i] );
+    }
 #   endif
 
   /* copy the ranklist of IO aggregators to fd->hints */
@@ -267,7 +308,112 @@
     return;
 }
 
+/* Description from common/ad_aggregate.c.  (Does it completely apply to bgl?)
+ * ADIOI_Calc_aggregator()
+ *
+ * The intention here is to implement a function which provides basically 
+ * the same functionality as in Rajeev's original version of 
+ * ADIOI_Calc_my_req().  He used a ceiling division approach to assign the 
+ * file domains, and we use the same approach here when calculating the
+ * location of an offset/len in a specific file domain.  Further we assume
+ * this same distribution when calculating the rank_index, which is later
+ *  used to map to a specific process rank in charge of the file domain.
+ *
+ * A better (i.e. more general) approach would be to use the list of file
+ * domains only.  This would be slower in the case where the
+ * original ceiling division was used, but it would allow for arbitrary
+ * distributions of regions to aggregators.  We'd need to know the 
+ * nprocs_for_coll in that case though, which we don't have now.
+ *
+ * Note a significant difference between this function and Rajeev's old code:
+ * this code doesn't necessarily return a rank in the range
+ * 0..nprocs_for_coll; instead you get something in 0..nprocs.  This is a
+ * result of the rank mapping; any set of ranks in the communicator could be
+ * used now.
+ *
+ * Returns an integer representing a rank in the collective I/O communicator.
+ *
+ * The "len" parameter is also modified to indicate the amount of data
+ * actually available in this file domain.
+ */
+/* 
+ * This is more general aggregator search function which does not base on the assumption
+ * that each aggregator hosts the file domain with the same size 
+ */
+int ADIOI_BGL_Calc_aggregator(ADIO_File fd,
+			      ADIO_Offset off,
+			      ADIO_Offset min_off,
+			      ADIO_Offset *len,
+			      ADIO_Offset fd_size,
+			      ADIO_Offset *fd_start,
+			      ADIO_Offset *fd_end)
+{
+    int rank_index, rank;
+    ADIO_Offset avail_bytes;
+
+    AD_BGL_assert ( (off <= fd_end[fd->hints->cb_nodes-1] && off >= min_off && fd_start[0] >= min_off ) );
+
+    /* binary search --> rank_index is returned */
+    int ub = fd->hints->cb_nodes;
+    int lb = 0;
+    /* get an index into our array of aggregators */
+    /* Common code for striping - bgl doesn't use it but it's
+       here to make diff'ing easier.
+    rank_index = (int) ((off - min_off + fd_size)/ fd_size - 1);
+
+    if (fd->hints->striping_unit > 0) {
+        * wkliao: implementation for file domain alignment
+           fd_start[] and fd_end[] have been aligned with file lock
+	   boundaries when returned from ADIOI_Calc_file_domains() so cannot
+	   just use simple arithmatic as above *
+        rank_index = 0;
+        while (off > fd_end[rank_index]) rank_index++;
+    } 
+    bgl does it's own striping below 
+    */
+    rank_index = fd->hints->cb_nodes / 2;
+    while ( off < fd_start[rank_index] || off > fd_end[rank_index] ) {
+	if ( off > fd_end  [rank_index] ) {
+	    lb = rank_index;
+	    rank_index = (rank_index + ub) / 2;
+	}
+	else 
+	if ( off < fd_start[rank_index] ) {
+	    ub = rank_index;
+	    rank_index = (rank_index + lb) / 2;
+	}
+    }
+    /* we index into fd_end with rank_index, and fd_end was allocated to be no
+     * bigger than fd->hins->cb_nodes.   If we ever violate that, we're
+     * overrunning arrays.  Obviously, we should never ever hit this abort */
+    if (rank_index >= fd->hints->cb_nodes || rank_index < 0) {
+        FPRINTF(stderr, "Error in ADIOI_Calc_aggregator(): rank_index(%d) >= fd->hints->cb_nodes (%d) fd_size=%lld off=%lld\n",
+			rank_index,fd->hints->cb_nodes,fd_size,off);
+        MPI_Abort(MPI_COMM_WORLD, 1);
+    }
+    // DBG_FPRINTF ("ADIOI_BGL_Calc_aggregator: rank_index = %d\n", rank_index );
+
+    /* 
+     * remember here that even in Rajeev's original code it was the case that
+     * different aggregators could end up with different amounts of data to
+     * aggregate.  here we use fd_end[] to make sure that we know how much
+     * data this aggregator is working with.  
+     *
+     * the +1 is to take into account the end vs. length issue.
+     */
+    avail_bytes = fd_end[rank_index] + 1 - off;
+    if (avail_bytes < *len && avail_bytes > 0) {
+        /* this file domain only has part of the requested contig. region */
+
+        *len = avail_bytes;
+    }
 
+    /* map our index to a rank */
+    /* NOTE: FOR NOW WE DON'T HAVE A MAPPING...JUST DO 0..NPROCS_FOR_COLL */
+    rank = fd->hints->ranklist[rank_index];
+
+    return rank;
+}
 
 /* 
  * Compute a dynamic access range based file domain partition among I/O aggregators,
@@ -278,6 +424,10 @@
  * Additional effort is to make sure that each I/O aggregator get
  * a file domain that aligns to the GPFS block size.  So, there will 
  * not be any false sharing of GPFS file blocks among multiple I/O nodes. 
+ *  
+ * The common version of this now accepts a min_fd_size and striping_unit. 
+ * It doesn't seem necessary here (using GPFS block sizes) but keep it in mind
+ * (e.g. we could pass striping unit instead of using fs_ptr->blksize). 
  */
 void ADIOI_BGL_GPFS_Calc_file_domains(ADIO_Offset *st_offsets,
                                       ADIO_Offset *end_offsets,
@@ -291,13 +441,23 @@
 {
     ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, *fd_size;
     int i, aggr;
+
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5004, 0, NULL);
+#endif
+
+#   if AGG_DEBUG
     static char myname[] = "ADIOI_BGL_GPFS_Calc_file_domains";
+    DBG_FPRINTF(stderr, "%s(%d): %d aggregator(s)\n", 
+	    myname,__LINE__,nprocs_for_coll);
+#   endif
     __blksize_t blksize = 1048576; /* default to 1M */
     if(fs_ptr && ((ADIOI_BGL_fs*)fs_ptr)->blksize) /* ignore null ptr or 0 blksize */
       blksize = ((ADIOI_BGL_fs*)fs_ptr)->blksize;
-/*    FPRINTF(stderr,"%s(%d): Blocksize=%ld\n",myname,__LINE__,blksize);*/
-
-    /* find the range of all the requests */
+#   if AGG_DEBUG
+    DBG_FPRINTF(stderr,"%s(%d): Blocksize=%ld\n",myname,__LINE__,blksize);
+#   endif
+/* find min of start offsets and max of end offsets of all processes */
     min_st_offset  = st_offsets [0];
     max_end_offset = end_offsets[0];
     for (i=1; i<nprocs; i++) {
@@ -305,7 +465,7 @@
         max_end_offset = ADIOI_MAX(max_end_offset, end_offsets[i]);
     }
 
-    // printf( "_calc_file_domains, min_st_offset, max_ = %qd, %qd\n", min_st_offset, max_end_offset );
+    // DBG_FPRINTF(stderr, "_calc_file_domains, min_st_offset, max_ = %qd, %qd\n", min_st_offset, max_end_offset );
 
     /* determine the "file domain (FD)" of each process, i.e., the portion of
        the file that will be "owned" by each process */
@@ -317,6 +477,18 @@
     ADIO_Offset fd_gpfs_range = gpfs_ub - gpfs_lb + 1;
 
     int         naggs    = nprocs_for_coll;
+
+    /* Tweak the file domains so that no fd is smaller than a threshold.  We
+     * have to strike a balance between efficency and parallelism: somewhere
+     * between 10k processes sending 32-byte requests and one process sending a
+     * 320k request is a (system-dependent) sweet spot 
+     
+    This is from the common code - the new min_fd_size parm that we didn't implement. 
+    (And common code uses a different declaration of fd_size so beware) 
+     
+    if (fd_size < min_fd_size)
+        fd_size = min_fd_size;
+    */
     fd_size              = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
     *fd_start_ptr        = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
     *fd_end_ptr          = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
@@ -332,7 +504,8 @@
         if (i < naggs_small) fd_size[i] = nb_cn_small     * blksize;
                         else fd_size[i] = (nb_cn_small+1) * blksize;
 
-/*     FPRINTF(stderr,"%s(%d): "
+#   if AGG_DEBUG
+     DBG_FPRINTF(stderr,"%s(%d): "
                    "gpfs_ub       %llu, "
                    "gpfs_lb       %llu, "
                    "gpfs_ub_rdoff %llu, "
@@ -354,7 +527,8 @@
                    naggs_large  ,
                    naggs_small
                    );
-*/
+#   endif
+
     fd_size[0]       -= gpfs_lb_rdoff;
     fd_size[naggs-1] -= gpfs_ub_rdoff;
 
@@ -369,178 +543,11 @@
     *fd_size_ptr = fd_size[0];
     *min_st_offset_ptr = min_st_offset;
 
-    ADIOI_Free (fd_size);
-}
-
-
-/* 
- * deprecated
- *
-void ADIOI_BGL_GPFS_Calc_file_domain0(ADIO_Offset *st_offsets, 
-				      ADIO_Offset *end_offsets, 
-				      int          nprocs, 
-				      int          nprocs_for_coll,
-				      ADIO_Offset *min_st_offset_ptr,
-				      ADIO_Offset **fd_start_ptr, 
-				      ADIO_Offset **fd_end_ptr, 
-				      ADIO_Offset *fd_size_ptr)
-{
-    ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, *fd_size;
-    int i;
-static int GPFS_BSIZE=1048576;
-     * find the range of all the requests *
-    min_st_offset  = st_offsets [0];
-    max_end_offset = end_offsets[0];
-    for (i=1; i<nprocs; i++) {
-        min_st_offset = ADIOI_MIN(min_st_offset, st_offsets[i]);
-        max_end_offset = ADIOI_MAX(max_end_offset, end_offsets[i]);
-    }
-
-     * determine the "file domain (FD)" of each process, i.e., the portion of
-       the file that will be "owned" by each process *
-          
-     * GPFS specific, pseudo starting/end point has to round to GPFS_BSIZE *
-    ADIO_Offset gpfs_ub       = (max_end_offset +GPFS_BSIZE-1) / GPFS_BSIZE * GPFS_BSIZE - 1;
-    ADIO_Offset gpfs_lb       = min_st_offset / GPFS_BSIZE * GPFS_BSIZE;
-    ADIO_Offset gpfs_ub_rdoff = (max_end_offset +GPFS_BSIZE-1) / GPFS_BSIZE * GPFS_BSIZE - 1 - max_end_offset;
-    ADIO_Offset gpfs_lb_rdoff = min_st_offset - min_st_offset / GPFS_BSIZE * GPFS_BSIZE;
-    ADIO_Offset fd_gpfs_range = gpfs_ub - gpfs_lb + 1;
-
-     * all computation of partition is based on the rounded pseudo-range *
-    ADIO_Offset fds_ub   = (fd_gpfs_range  +nprocs_for_coll-1) / nprocs_for_coll;
-    ADIO_Offset fds_lb   =  fd_gpfs_range                      / nprocs_for_coll;
-    int         naggs    = nprocs_for_coll;
-    int         npsets   = aggrsInPset[0];         * special meaning for element 0 *
-    fd_size              = (ADIO_Offset *) ADIOI_Malloc(naggs * sizeof(ADIO_Offset));
-    *fd_start_ptr        = (ADIO_Offset *) ADIOI_Malloc(naggs * sizeof(ADIO_Offset));
-    *fd_end_ptr          = (ADIO_Offset *) ADIOI_Malloc(naggs * sizeof(ADIO_Offset));
-    fd_start             = *fd_start_ptr;
-    fd_end               = *fd_end_ptr;
-
-     * some pre-computation to determine rough ratio of when to up-fit, when to low-fit *
-     * 1. get the estimated data per pset *
-     * 2. determine a factor between up and down *
-        int           avg_aggrsInPset = (naggs +npsets-1)/npsets;
-        ADIO_Offset avg_bytes_perPset = fd_gpfs_range / npsets;
-        ADIO_Offset             resid = avg_bytes_perPset % GPFS_BSIZE;
-        ADIO_Offset             downr = GPFS_BSIZE - resid;
-        int                     small = (resid < downr);
-        int                     ratio = downr == 0 ? npsets + 2 : (resid +downr-1)/downr;
-        if (small)              ratio = resid == 0 ? npsets + 2 : (downr +resid-1)/resid;
-
-
-     * go through aggrsInfo of all PSETs *
-    ADIO_Offset fd_range = fd_gpfs_range;
-    int aggr = 0, pset; 
-    for (pset=0; pset<npsets; pset++) {
-
-        ADIO_Offset fds_try  = fds_lb;
-	int         my_naggs = aggrsInPset[pset+1];
-	ADIO_Offset fds_pset;
-
-	 * Last pset will deal with the residuals *
-	if (pset == npsets-1) 
-	    fds_pset = fd_range;
-	else 
-	{
-	    int cond1 = ((pset+1) % ratio == 0);
-	    int cond2 = ((pset+1) % ratio != 0);
-
-	    if (small) {
-		int temp = cond1; cond1 = cond2; cond2 = temp;
-	    }
-
-	    if (cond1) {
-		fds_pset = fds_try * my_naggs;
-		if (fds_pset % GPFS_BSIZE)   			// align to GPFS_BSIZE
-		    fds_pset = ((fds_pset +GPFS_BSIZE-1)/GPFS_BSIZE) * GPFS_BSIZE;	
-	    }
-	    if (cond2) 
-	    {
-		fds_try = fds_ub;
-		fds_pset = fds_try * my_naggs;
-		if (fds_pset % GPFS_BSIZE)   			// align to GPFS_BSIZE
-		    fds_pset = (fds_pset / GPFS_BSIZE) * GPFS_BSIZE;	
-	    }
-	}
-
-	 * for aggrs in each PSET, divide evenly the data range *
-#define CN_ALIGN 1
-#if !CN_ALIGN
-	fd_range -= fds_pset;
-	if ( pset == 0        ) fds_pset -= gpfs_lb_rdoff;
-	if ( pset == npsets-1 ) fds_pset -= gpfs_ub_rdoff;
-        int p;
-        for (p=0; p<my_naggs; p++) {
-            fd_size[aggr]  = (fds_pset   +my_naggs-1) / my_naggs;
-            if (p== my_naggs-1)
-                fd_size[aggr] -= (fd_size[aggr]*my_naggs - fds_pset);
-
-            aggr++;
-        }
-#else
-        ADIO_Offset avg_bytes_perP = fds_pset / my_naggs;
-        ADIO_Offset resid2 = avg_bytes_perP % GPFS_BSIZE;
-        ADIO_Offset downr2 = GPFS_BSIZE - resid2;
-        int small2 = (resid2 < downr2);
-        int         ratio2 = downr2 == 0 ? my_naggs + 2 : (resid2 +downr2-1)/downr2;
-        if (small2) ratio2 = resid2 == 0 ? my_naggs + 2 : (downr2 +resid2-1)/resid2;
-        ADIO_Offset accu = 0;
-        int p;
-        for (p=0; p<my_naggs; p++) {
-            int cond1 = ((p+1) % ratio2 == 0);
-            int cond2 = ((p+1) % ratio2 != 0);
-            if (small2) {
-                int temp = cond1; cond1 = cond2; cond2 = temp;
-            }
-            fd_size[aggr]  = avg_bytes_perP;
-            if (cond2) fd_size[aggr] = ((fd_size[aggr] +GPFS_BSIZE-1)/GPFS_BSIZE) * GPFS_BSIZE;
-            if (cond1) fd_size[aggr] = ((fd_size[aggr]              )/GPFS_BSIZE) * GPFS_BSIZE;
-            if (p== my_naggs-1)
-                fd_size[aggr] = (fds_pset - accu);
-
-            accu     += fd_size[aggr];
-            fd_range -= fd_size[aggr];
-            aggr++;
-        }
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5005, 0, NULL);
 #endif
-    }
-
-     * after scheduling, the first and the last region has to remove the round-off effect *
-
-#if CN_ALIGN
-    fd_size[0]       -= gpfs_lb_rdoff;
-    fd_size[naggs-1] -= gpfs_ub_rdoff;
-#endif
-    
-     * compute the file domain for each aggr *
-    ADIO_Offset offset = min_st_offset;
-    for (aggr=0; aggr<naggs; aggr++) {
-        fd_start[aggr] = offset;
-	fd_end  [aggr] = offset + fd_size[aggr] - 1;
-	offset += fd_size[aggr];
-    }
-
-     *
-    printf( "\t%6d : %12qd:%12qd, %12qd:%12qd:%12qd, %12qd:%12qd:%12qd\n", 
-	    naggs,
-	    min_st_offset,
-	    max_end_offset,
-	    fd_start[0],	
-	    fd_end  [0],	
-	    fd_size [0],	
-	    fd_start[naggs-1],	
-	    fd_end  [naggs-1],	
-	    fd_size [naggs-1] );	
-    *
-
-
-    *fd_size_ptr = fd_size[0];
-    *min_st_offset_ptr = min_st_offset;
-
     ADIOI_Free (fd_size);
 }
-*/
 
 /* 
  * When a process is an IO aggregator, this will return its index in the aggrs list.
@@ -555,72 +562,14 @@
 }
 
 /* 
- * This is more general aggregator search function which does not base on the assumption
- * that each aggregator hosts the file domain with the same size 
- */
-int ADIOI_BGL_Calc_aggregator(ADIO_File fd,
-			      ADIO_Offset off,
-			      ADIO_Offset min_off,
-			      ADIO_Offset *len,
-			      ADIO_Offset fd_size,
-			      ADIO_Offset *fd_start,
-			      ADIO_Offset *fd_end)
-{
-    int rank_index, rank;
-    ADIO_Offset avail_bytes;
-
-    AD_BGL_assert ( (off <= fd_end[fd->hints->cb_nodes-1] && off >= min_off && fd_start[0] >= min_off ) );
-
-    /* binary search --> rank_index is returned */
-    int ub = fd->hints->cb_nodes;
-    int lb = 0;
-    rank_index = fd->hints->cb_nodes / 2;
-    while ( off < fd_start[rank_index] || off > fd_end[rank_index] ) {
-	if ( off > fd_end  [rank_index] ) {
-	    lb = rank_index;
-	    rank_index = (rank_index + ub) / 2;
-	}
-	else 
-	if ( off < fd_start[rank_index] ) {
-	    ub = rank_index;
-	    rank_index = (rank_index + lb) / 2;
-	}
-    }
-
-    // printf ("ADIOI_BGL_Calc_aggregator: rank_index = %d\n", rank_index );
-
-    /* 
-     * remember here that even in Rajeev's original code it was the case that
-     * different aggregators could end up with different amounts of data to
-     * aggregate.  here we use fd_end[] to make sure that we know how much
-     * data this aggregator is working with.  
-     *
-     * the +1 is to take into account the end vs. length issue.
-     */
-    avail_bytes = fd_end[rank_index] + 1 - off;
-    if (avail_bytes < *len && avail_bytes > 0) {
-        /* this file domain only has part of the requested contig. region */
-
-        *len = avail_bytes;
-    }
-
-    /* map our index to a rank */
-    /* NOTE: FOR NOW WE DON'T HAVE A MAPPING...JUST DO 0..NPROCS_FOR_COLL */
-    rank = fd->hints->ranklist[rank_index];
-
-    return rank;
-}
-
-
-/* 
  * ADIOI_BGL_Calc_my_req() overrides ADIOI_Calc_my_req for the default implementation 
  * is specific for static file domain partitioning.
  *
- * ADIOI_Calc_my_req() calculate what portions of the access requests
+ * ADIOI_Calc_my_req() - calculate what portions of the access requests
  * of this process are located in the file domains of various processes
  * (including this one)
  */
-void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list, 
+void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list, 
 			   int contig_access_count, ADIO_Offset 
 			   min_st_offset, ADIO_Offset *fd_start,
 			   ADIO_Offset *fd_end, ADIO_Offset fd_size,
@@ -629,12 +578,17 @@
 			   int **count_my_req_per_proc_ptr,
 			   ADIOI_Access **my_req_ptr,
 			   int **buf_idx_ptr)
+/* Possibly reconsider if buf_idx's are ok as int's, or should they be aints/offsets? 
+   They are used as memory buffer indices so it seems like the 2G limit is in effect */
 {
     int *count_my_req_per_proc, count_my_req_procs, *buf_idx;
     int i, l, proc;
     ADIO_Offset fd_len, rem_len, curr_idx, off;
     ADIOI_Access *my_req;
 
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5024, 0, NULL);
+#endif
 
     *count_my_req_per_proc_ptr = (int *) ADIOI_Calloc(nprocs,sizeof(int)); 
     count_my_req_per_proc = *count_my_req_per_proc_ptr;
@@ -656,10 +610,10 @@
      * contig_access_count was calculated way back in ADIOI_Calc_my_off_len()
      */
     for (i=0; i < contig_access_count; i++) {
-
-	/* When there is no data being processed, bypass this loop */
-	if (len_list[i] == 0) continue;
-
+	/* short circuit offset/len processing if len == 0 
+	 * 	(zero-byte  read/write */
+	if (len_list[i] == 0) 
+		continue;
 	off = offset_list[i];
 	fd_len = len_list[i];
 	/* note: we set fd_len to be the total size of the access.  then
@@ -710,20 +664,24 @@
 /* now fill in my_req */
     curr_idx = 0;
     for (i=0; i<contig_access_count; i++) { 
-
-        /* When there is no data being processed, bypass this loop */
-        if (len_list[i] == 0) continue;
-
+	/* short circuit offset/len processing if len == 0 
+	 * 	(zero-byte  read/write */
+	if (len_list[i] == 0)
+		continue;
 	off = offset_list[i];
 	fd_len = len_list[i];
 	proc = ADIOI_BGL_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size, 
 				     fd_start, fd_end);
 
 	/* for each separate contiguous access from this process */
-	if (buf_idx[proc] == -1) buf_idx[proc] = (int) curr_idx;
+	if (buf_idx[proc] == -1)
+  {
+    ADIOI_Assert(curr_idx == (int) curr_idx);
+    buf_idx[proc] = (int) curr_idx;
+  }
 
 	l = my_req[proc].count;
-	curr_idx += (int) fd_len; /* NOTE: Why is curr_idx an int?  Fix? */
+	curr_idx += fd_len;
 
 	rem_len = len_list[i] - fd_len;
 
@@ -733,6 +691,7 @@
 	 * and the associated count. 
 	 */
 	my_req[proc].offsets[l] = off;
+  ADIOI_Assert(fd_len == (int) fd_len);
 	my_req[proc].lens[l] = (int) fd_len;
 	my_req[proc].count++;
 
@@ -742,13 +701,18 @@
 	    proc = ADIOI_BGL_Calc_aggregator(fd, off, min_st_offset, &fd_len, 
 					 fd_size, fd_start, fd_end);
 
-	    if (buf_idx[proc] == -1) buf_idx[proc] = (int) curr_idx;
+	    if (buf_idx[proc] == -1) 
+      {
+        ADIOI_Assert(curr_idx == (int) curr_idx);
+        buf_idx[proc] = (int) curr_idx;
+      }
 
 	    l = my_req[proc].count;
 	    curr_idx += fd_len;
 	    rem_len -= fd_len;
 
 	    my_req[proc].offsets[l] = off;
+      ADIOI_Assert(fd_len == (int) fd_len);
 	    my_req[proc].lens[l] = (int) fd_len;
 	    my_req[proc].count++;
 	}
@@ -757,27 +721,26 @@
 #ifdef AGG_DEBUG
     for (i=0; i<nprocs; i++) {
 	if (count_my_req_per_proc[i] > 0) {
-	    FPRINTF(stdout, "data needed from %d (count = %d):\n", i, 
+	    DBG_FPRINTF(stderr, "data needed from %d (count = %d):\n", i, 
 		    my_req[i].count);
 	    for (l=0; l < my_req[i].count; l++) {
-		FPRINTF(stdout, "   off[%d] = %Ld, len[%d] = %d\n", l,
+		DBG_FPRINTF(stderr, "   off[%d] = %lld, len[%d] = %d\n", l,
 			my_req[i].offsets[l], l, my_req[i].lens[l]);
 	    }
 	}
-    }
-#if 0
-    for (i=0; i<nprocs; i++) {
-	FPRINTF(stdout, "buf_idx[%d] = 0x%x\n", i, buf_idx[i]);
+	DBG_FPRINTF(stderr, "buf_idx[%d] = 0x%x\n", i, buf_idx[i]);
     }
 #endif
-#endif
 
     *count_my_req_procs_ptr = count_my_req_procs;
     *buf_idx_ptr = buf_idx;
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5025, 0, NULL);
+#endif
 }
 
 /*
- * ADIOI_Calc_others_req
+ * ADIOI_Calc_others_req (copied to bgl and switched to all to all for performance)
  *
  * param[in]  count_my_req_procs        Number of processes whose file domain my
  *                                        request touches.
@@ -826,7 +789,9 @@
 	 *recvBufForLens   =(void*)0xFFFFFFFF; 
 
 /* first find out how much to send/recv and from/to whom */
-
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5026, 0, NULL);
+#endif
     /* Send 1 int to each process.  count_my_req_per_proc[i] is the number of 
      * requests that my process will do to the file domain owned by process[i].
      * Receive 1 int from each process.  count_others_req_per_proc[i] is the number of
@@ -866,9 +831,9 @@
 	    others_req[i].lens = (int *)
 		ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(int)); 
 
-	    if ( (unsigned)others_req[i].offsets < (unsigned)recvBufForOffsets )
+	    if ( (MPIR_Upint)others_req[i].offsets < (MPIR_Upint)recvBufForOffsets )
 		recvBufForOffsets = others_req[i].offsets;
-	    if ( (unsigned)others_req[i].lens < (unsigned)recvBufForLens )
+	    if ( (MPIR_Upint)others_req[i].lens < (MPIR_Upint)recvBufForLens )
 		recvBufForLens = others_req[i].lens;
 
 	    others_req[i].mem_ptrs = (MPI_Aint *)
@@ -883,6 +848,9 @@
 	    others_req[i].lens    = NULL;
 	}
     }
+    /* If no recv buffer was allocated in the loop above, make it NULL */
+    if ( recvBufForOffsets == (void*)0xFFFFFFFF) recvBufForOffsets = NULL;
+    if ( recvBufForLens    == (void*)0xFFFFFFFF) recvBufForLens    = NULL;
     
     /* Now send the calculated offsets and lengths to respective processes */
 
@@ -894,14 +862,18 @@
     for (i=0; i<nprocs; i++)
     {
 	if ( (my_req[i].count) &&
-	     ((unsigned)my_req[i].offsets <= (unsigned)sendBufForOffsets) )
-	    sendBufForOffsets = my_req[i].offsets;
+	     ((MPIR_Upint)my_req[i].offsets <= (MPIR_Upint)sendBufForOffsets) )
+	  sendBufForOffsets = my_req[i].offsets;
 	   
 	if ( (my_req[i].count) &&
-	     ((unsigned)my_req[i].lens <= (unsigned)sendBufForLens) )
+	     ((MPIR_Upint)my_req[i].lens <= (MPIR_Upint)sendBufForLens) )
 	    sendBufForLens = my_req[i].lens;
     }
 
+    /* If no send buffer was found in the loop above, make it NULL */
+    if ( sendBufForOffsets == (void*)0xFFFFFFFF) sendBufForOffsets = NULL;
+    if ( sendBufForLens    == (void*)0xFFFFFFFF) sendBufForLens    = NULL;
+
     /* Calculate the displacements from the sendBufForOffsets/Lens */
     for (i=0; i<nprocs; i++)
     {
@@ -910,16 +882,20 @@
 	if ( scounts[i] == 0 )
 	    sdispls[i] = 0;
 	else
-	    sdispls[i] = ( (unsigned)my_req[i].offsets - 
-			   (unsigned)sendBufForOffsets ) / sizeof(ADIO_Offset);
+  	  sdispls[i] =  (int)
+	                ( ( (MPIR_Upint)my_req[i].offsets - 
+			   (MPIR_Upint)sendBufForOffsets ) / 
+			  (MPIR_Upint)sizeof(ADIO_Offset) );
 
 	// Receive these offsets from process i.
 	rcounts[i] = count_others_req_per_proc[i];
 	if ( rcounts[i] == 0 )
 	    rdispls[i] = 0;
 	else
-	    rdispls[i] = ( (unsigned)others_req[i].offsets - 
-			   (unsigned)recvBufForOffsets ) / sizeof(ADIO_Offset);
+	    rdispls[i] = (int)
+	                 ( ( (MPIR_Upint)others_req[i].offsets - 
+			     (MPIR_Upint)recvBufForOffsets ) / 
+			   (MPIR_Upint)sizeof(ADIO_Offset) );
     }
 
     /* Exchange the offsets */
@@ -940,16 +916,20 @@
 	if ( scounts[i] == 0 )
 	    sdispls[i] = 0;
 	else
-	    sdispls[i] = ( (unsigned)my_req[i].lens - 
-			   (unsigned)sendBufForLens ) / sizeof(int);
+	  sdispls[i] = (int)
+	               ( ( (MPIR_Upint)my_req[i].lens - 
+			   (MPIR_Upint)sendBufForLens ) / 
+			 (MPIR_Upint) sizeof(int) );
 	
 	// Receive these offsets from process i.
 	rcounts[i] = count_others_req_per_proc[i];
 	if ( rcounts[i] == 0 )
 	    rdispls[i] = 0;
 	else
-	    rdispls[i] = ( (unsigned)others_req[i].lens - 
-			   (unsigned)recvBufForLens ) / sizeof(int);
+	    rdispls[i] = (int)
+	                 ( ( (MPIR_Upint)others_req[i].lens - 
+			     (MPIR_Upint)recvBufForLens ) / 
+			   (MPIR_Upint) sizeof(int) );
     }
 
     /* Exchange the lengths */
@@ -967,4 +947,7 @@
     ADIOI_Free (rdispls);
 
     *count_others_req_procs_ptr = count_others_req_procs;
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5027, 0, NULL);
+#endif
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.h	2010-11-16 09:16:31.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.h	2010-11-15 15:02:47.000000000 +0100
@@ -1,5 +1,6 @@
 /* ---------------------------------------------------------------- */
 /* (C)Copyright IBM Corp.  2007, 2008                               */
+/* ---------------------------------------------------------------- */
 /**
  * \file ad_bgl_aggrs.h
  * \brief ???
@@ -22,13 +23,22 @@
 #include "adio.h"
 #include <sys/stat.h>
 
-    extern int *aggrsInPset;	/* defined in ad_bgl_aggrs.c */
-
+#if !defined(GPFS_SUPER_MAGIC)
+  #define GPFS_SUPER_MAGIC (0x47504653)
+#endif
+
+#if !defined(PVFS2_SUPER_MAGIC)
+  #define PVFS2_SUPER_MAGIC (0x20030528)
+#endif
 
     /* File system (BGL) specific information - 
          hung off of ADIOI_FileD file descriptor (fd->fs_ptr) at open */
     typedef struct ADIOI_BGL_fs_s {
       __blksize_t blksize;
+      int         fsync_aggr; /* "fsync aggregation" flags (below) */
+#define ADIOI_BGL_FSYNC_AGGREGATION_DISABLED  0x00
+#define ADIOI_BGL_FSYNC_AGGREGATION_ENABLED   0x01
+#define ADIOI_BGL_FSYNC_AGGREGATOR            0x10 /* This rank is an aggregator */
     }  ADIOI_BGL_fs;
 
     /* generate a list of I/O aggregators that utilizes BGL-PSET orginization. */
@@ -60,7 +70,7 @@
 
     /* overriding ADIOI_Calc_my_req for the default implementation is specific for 
        static file domain partitioning */
-    void ADIOI_BGL_Calc_my_req ( ADIO_File fd, ADIO_Offset *offset_list, int *len_list,
+    void ADIOI_BGL_Calc_my_req ( ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list,
 				 int contig_access_count, ADIO_Offset
 				 min_st_offset, ADIO_Offset *fd_start,
 				 ADIO_Offset *fd_end, ADIO_Offset fd_size,
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.c	2010-11-16 09:16:31.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.c	2010-11-15 15:02:47.000000000 +0100
@@ -1,5 +1,6 @@
 /* ---------------------------------------------------------------- */
 /* (C)Copyright IBM Corp.  2007, 2008                               */
+/* ---------------------------------------------------------------- */
 /**
  * \file ad_bgl.c
  * \brief ???
@@ -18,6 +19,7 @@
 
 struct ADIOI_Fns_struct ADIO_BGL_operations = {
     ADIOI_BGL_Open, /* Open */
+    ADIOI_GEN_OpenColl, /* Collective open */
     ADIOI_BGL_ReadContig, /* ReadContig */
     ADIOI_BGL_WriteContig, /* WriteContig */
 #if BGL_OPTIM_STEP1_2
@@ -51,7 +53,8 @@
     ADIOI_GEN_IOComplete, /* WriteComplete */
     ADIOI_GEN_IreadStrided, /* IreadStrided */
     ADIOI_GEN_IwriteStrided, /* IwriteStrided */
-    ADIOI_GEN_Flush, /* Flush */
+    ADIOI_BGL_Flush, /* Flush */
     ADIOI_GEN_Resize, /* Resize */
     ADIOI_GEN_Delete, /* Delete */
+    ADIOI_GEN_Feature, /* Features */
 };
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_close.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_close.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_close.c	2010-11-16 09:16:31.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_close.c	2010-11-15 15:02:47.000000000 +0100
@@ -1,7 +1,8 @@
 /* ---------------------------------------------------------------- */
 /* (C)Copyright IBM Corp.  2007, 2008                               */
+/* ---------------------------------------------------------------- */
 /**
- * \file ad_bgl_open.c
+ * \file ad_bgl_close.c
  * \brief ???
  */
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_fcntl.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_fcntl.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_fcntl.c	2010-11-16 09:16:31.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_fcntl.c	2010-11-15 15:02:47.000000000 +0100
@@ -1,5 +1,6 @@
 /* ---------------------------------------------------------------- */
 /* (C)Copyright IBM Corp.  2007, 2008                               */
+/* ---------------------------------------------------------------- */
 /**
  * \file ad_bgl_fcntl.c
  * \brief ???
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl: ad_bgl_flush.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.h	2010-11-16 09:16:31.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.h	2010-11-15 15:02:47.000000000 +0100
@@ -28,8 +28,10 @@
 #include <aio.h>
 #endif
 
+#if 0 
 int ADIOI_BGL_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
 		  int wr, void *handle);
+#endif
 
 void ADIOI_BGL_Open(ADIO_File fd, int *error_code);
 
@@ -87,6 +89,7 @@
 void ADIOI_BGL_Get_shared_fp(ADIO_File fd, int size, ADIO_Offset *shared_fp, int *error_code);
 void ADIOI_BGL_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code);
 
+void ADIOI_BGL_Flush(ADIO_File fd, int *error_code);
 
 #include "ad_bgl_tuning.h"
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_hints.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_hints.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_hints.c	2010-11-16 09:16:31.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_hints.c	2010-11-15 15:02:47.000000000 +0100
@@ -38,8 +38,8 @@
 
     MPI_Info info;
     char *value;
-    int flag, intval, tmp_val, nprocs, nprocs_is_valid = 0;
-    static char myname[] = "ADIOI_GEN_SETINFO";
+    int flag, intval, tmp_val, nprocs=0, nprocs_is_valid = 0;
+    static char myname[] = "ADIOI_BGL_SETINFO";
 
     int did_anything = 0;
 
@@ -61,15 +61,15 @@
 	did_anything = 1;
 
 	/* buffer size for collective I/O */
-	MPI_Info_set(info, "cb_buffer_size", ADIOI_BGL_CB_BUFFER_SIZE_DFLT); 
+	ADIOI_Info_set(info, "cb_buffer_size", ADIOI_BGL_CB_BUFFER_SIZE_DFLT); 
 	fd->hints->cb_buffer_size = atoi(ADIOI_BGL_CB_BUFFER_SIZE_DFLT);
 
 	/* default is to let romio automatically decide when to use
 	 * collective buffering
 	 */
-	MPI_Info_set(info, "romio_cb_read", "enable"); 
+	ADIOI_Info_set(info, "romio_cb_read", "enable"); 
 	fd->hints->cb_read = ADIOI_HINT_ENABLE;
-	MPI_Info_set(info, "romio_cb_write", "enable"); 
+	ADIOI_Info_set(info, "romio_cb_write", "enable"); 
 	fd->hints->cb_write = ADIOI_HINT_ENABLE;
 
    	if ( fd->hints->cb_config_list != NULL ) ADIOI_Free (fd->hints->cb_config_list);
@@ -78,30 +78,54 @@
 	/* number of processes that perform I/O in collective I/O */
 	MPI_Comm_size(fd->comm, &nprocs);
 	nprocs_is_valid = 1;
-	sprintf(value, "%d", nprocs);
-	MPI_Info_set(info, "cb_nodes", value);
+	ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", nprocs);
+	ADIOI_Info_set(info, "cb_nodes", value);
 	fd->hints->cb_nodes = -1;
 
 	/* hint indicating that no indep. I/O will be performed on this file */
-	MPI_Info_set(info, "romio_no_indep_rw", "false");
+	ADIOI_Info_set(info, "romio_no_indep_rw", "false");
 	fd->hints->no_indep_rw = 0;
-	 /* deferred_open derrived from no_indep_rw and cb_{read,write} */
+
+	/* bgl is not implementing file realms (ADIOI_IOStridedColl),
+	   initialize to disabled it. 	   */
+	/* hint instructing the use of persistent file realms */
+	ADIOI_Info_set(info, "romio_cb_pfr", "disable");
+	fd->hints->cb_pfr = ADIOI_HINT_DISABLE;
+	
+	/* hint guiding the assignment of persistent file realms */
+	ADIOI_Info_set(info, "romio_cb_fr_types", "aar");
+	fd->hints->cb_fr_type = ADIOI_FR_AAR;
+
+	/* hint to align file realms with a certain byte value */
+	ADIOI_Info_set(info, "romio_cb_fr_alignment", "1");
+	fd->hints->cb_fr_alignment = 1;
+
+	/* hint to set a threshold percentage for a datatype's size/extent at
+	 * which data sieving should be done in collective I/O */
+	ADIOI_Info_set(info, "romio_cb_ds_threshold", "0");
+	fd->hints->cb_ds_threshold = 0;
+
+	/* hint to switch between point-to-point or all-to-all for two-phase */
+	ADIOI_Info_set(info, "romio_cb_alltoall", "automatic");
+	fd->hints->cb_alltoall = ADIOI_HINT_AUTO;
+
+	 /* deferred_open derived from no_indep_rw and cb_{read,write} */
 	fd->hints->deferred_open = 0;
 
 	/* buffer size for data sieving in independent reads */
-	MPI_Info_set(info, "ind_rd_buffer_size", ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT);
+	ADIOI_Info_set(info, "ind_rd_buffer_size", ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT);
 	fd->hints->ind_rd_buffer_size = atoi(ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT);
 
 	/* buffer size for data sieving in independent writes */
-	MPI_Info_set(info, "ind_wr_buffer_size", ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT);
+	ADIOI_Info_set(info, "ind_wr_buffer_size", ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT);
 	fd->hints->ind_wr_buffer_size = atoi(ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT);
 
   if(fd->file_system == ADIO_UFS)
   {
     /* default for ufs/pvfs is to disable data sieving  */
-    MPI_Info_set(info, "romio_ds_read", "disable"); 
+    ADIOI_Info_set(info, "romio_ds_read", "disable"); 
     fd->hints->ds_read = ADIOI_HINT_DISABLE;
-    MPI_Info_set(info, "romio_ds_write", "disable"); 
+    ADIOI_Info_set(info, "romio_ds_write", "disable"); 
     fd->hints->ds_write = ADIOI_HINT_DISABLE;
   }
   else
@@ -109,18 +133,23 @@
     /* default is to let romio automatically decide when to use data
      * sieving
      */
-    MPI_Info_set(info, "romio_ds_read", "automatic"); 
+    ADIOI_Info_set(info, "romio_ds_read", "automatic"); 
     fd->hints->ds_read = ADIOI_HINT_AUTO;
-    MPI_Info_set(info, "romio_ds_write", "automatic"); 
+    ADIOI_Info_set(info, "romio_ds_write", "automatic"); 
     fd->hints->ds_write = ADIOI_HINT_AUTO;
   }
 
-	fd->hints->initialized = 1;
+    /* still to do: tune this a bit for a variety of file systems. there's
+	 * no good default value so just leave it unset */
+    fd->hints->min_fdomain_size = 0;
+    fd->hints->striping_unit = 0;
+
+    fd->hints->initialized = 1;
     }
 
     /* add in user's info if supplied */
     if (users_info != MPI_INFO_NULL) {
-	MPI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL, 
+	ADIOI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL, 
 		     value, &flag);
 	if (flag && ((intval=atoi(value)) > 0)) {
 	    tmp_val = intval;
@@ -135,30 +164,106 @@
 	    }
 	    /* --END ERROR HANDLING-- */
 
-	    MPI_Info_set(info, "cb_buffer_size", value);
+	    ADIOI_Info_set(info, "cb_buffer_size", value);
 	    fd->hints->cb_buffer_size = intval;
 
 	}
+#if 0
+	/* bgl is not implementing file realms (ADIOI_IOStridedColl) ... */
+	/* aligning file realms to certain sizes (e.g. stripe sizes)
+	 * may benefit I/O performance */
+	ADIOI_Info_get(users_info, "romio_cb_fr_alignment", MPI_MAX_INFO_VAL, 
+		     value, &flag);
+	if (flag && ((intval=atoi(value)) > 0)) {
+	    tmp_val = intval;
+
+	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+	    /* --BEGIN ERROR HANDLING-- */
+	    if (tmp_val != intval) {
+		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+						   "romio_cb_fr_alignment",
+						   error_code);
+		return;
+	    }
+	    /* --END ERROR HANDLING-- */
+
+	    ADIOI_Info_set(info, "romio_cb_fr_alignment", value);
+	    fd->hints->cb_fr_alignment = intval;
+
+	}
+
+	/* for collective I/O, try to be smarter about when to do data sieving
+	 * using a specific threshold for the datatype size/extent
+	 * (percentage 0-100%) */
+	ADIOI_Info_get(users_info, "romio_cb_ds_threshold", MPI_MAX_INFO_VAL, 
+		     value, &flag);
+	if (flag && ((intval=atoi(value)) > 0)) {
+	    tmp_val = intval;
+
+	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+	    /* --BEGIN ERROR HANDLING-- */
+	    if (tmp_val != intval) {
+		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+						   "romio_cb_ds_threshold",
+						   error_code);
+		return;
+	    }
+	    /* --END ERROR HANDLING-- */
+
+	    ADIOI_Info_set(info, "romio_cb_ds_threshold", value);
+	    fd->hints->cb_ds_threshold = intval;
+
+	}
+	ADIOI_Info_get(users_info, "romio_cb_alltoall", MPI_MAX_INFO_VAL, value,
+		     &flag);
+	if (flag) {
+	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
+		ADIOI_Info_set(info, "romio_cb_alltoall", value);
+		fd->hints->cb_read = ADIOI_HINT_ENABLE;
+	    }
+	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
+		ADIOI_Info_set(info, "romio_cb_alltoall", value);
+		fd->hints->cb_read = ADIOI_HINT_DISABLE;
+	    }
+	    else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
+	    {
+		ADIOI_Info_set(info, "romio_cb_alltoall", value);
+		fd->hints->cb_read = ADIOI_HINT_AUTO;
+	    }
+
+	    tmp_val = fd->hints->cb_alltoall;
 
+	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+	    /* --BEGIN ERROR HANDLING-- */
+	    if (tmp_val != fd->hints->cb_alltoall) {
+		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+						   "romio_cb_alltoall",
+						   error_code);
+		return;
+	    }
+	    /* --END ERROR HANDLING-- */
+	}
+#endif
 	/* new hints for enabling/disabling coll. buffering on
 	 * reads/writes
 	 */
-	MPI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value, &flag);
+	ADIOI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value,
+		     &flag);
 	if (flag) {
 	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
-		MPI_Info_set(info, "romio_cb_read", value);
+		ADIOI_Info_set(info, "romio_cb_read", value);
 		fd->hints->cb_read = ADIOI_HINT_ENABLE;
 	    }
 	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
 		    /* romio_cb_read overrides no_indep_rw */
-		MPI_Info_set(info, "romio_cb_read", value);
-		MPI_Info_set(info, "romio_no_indep_rw", "false");
+		ADIOI_Info_set(info, "romio_cb_read", value);
+		ADIOI_Info_set(info, "romio_no_indep_rw", "false");
 		fd->hints->cb_read = ADIOI_HINT_DISABLE;
 		fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
 	    }
 	    else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
 	    {
-		MPI_Info_set(info, "romio_cb_read", value);
+		ADIOI_Info_set(info, "romio_cb_read", value);
 		fd->hints->cb_read = ADIOI_HINT_AUTO;
 	    }
 
@@ -174,24 +279,25 @@
 	    }
 	    /* --END ERROR HANDLING-- */
 	}
-	MPI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value, &flag);
+	ADIOI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value,
+		     &flag);
 	if (flag) {
 	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
-		MPI_Info_set(info, "romio_cb_write", value);
+		ADIOI_Info_set(info, "romio_cb_write", value);
 		fd->hints->cb_write = ADIOI_HINT_ENABLE;
 	    }
 	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE"))
 	    {
 		/* romio_cb_write overrides no_indep_rw, too */
-		MPI_Info_set(info, "romio_cb_write", value);
-		MPI_Info_set(info, "romio_no_indep_rw", "false");
+		ADIOI_Info_set(info, "romio_cb_write", value);
+		ADIOI_Info_set(info, "romio_no_indep_rw", "false");
 		fd->hints->cb_write = ADIOI_HINT_DISABLE;
 		fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
 	    }
 	    else if (!strcmp(value, "automatic") ||
 		     !strcmp(value, "AUTOMATIC"))
 	    {
-		MPI_Info_set(info, "romio_cb_write", value);
+		ADIOI_Info_set(info, "romio_cb_write", value);
 		fd->hints->cb_write = ADIOI_HINT_AUTO;
 	    }
 	
@@ -208,23 +314,81 @@
 	    /* --END ERROR HANDLING-- */
 	}
 
+#if 0
+	/* bgl is not implementing file realms (ADIOI_IOStridedColl) ... */
+	/* enable/disable persistent file realms for collective I/O */
+	/* may want to check for no_indep_rdwr hint as well */
+	ADIOI_Info_get(users_info, "romio_cb_pfr", MPI_MAX_INFO_VAL, value,
+		     &flag);
+	if (flag) {
+	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
+		ADIOI_Info_set(info, "romio_cb_pfr", value);
+		fd->hints->cb_pfr = ADIOI_HINT_ENABLE;
+	    }
+	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
+		ADIOI_Info_set(info, "romio_cb_pfr", value);
+		fd->hints->cb_pfr = ADIOI_HINT_DISABLE;
+	    }
+	    else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
+	    {
+		ADIOI_Info_set(info, "romio_cb_pfr", value);
+		fd->hints->cb_pfr = ADIOI_HINT_AUTO;
+	    }
+
+	    tmp_val = fd->hints->cb_pfr;
+
+	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+	    /* --BEGIN ERROR HANDLING-- */
+	    if (tmp_val != fd->hints->cb_pfr) {
+		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+						   "romio_cb_pfr",
+						   error_code);
+		return;
+	    }
+	    /* --END ERROR HANDLING-- */
+	}
+
+	/* file realm assignment types ADIOI_FR_AAR(0),
+	 ADIOI_FR_FSZ(-1), ADIOI_FR_USR_REALMS(-2), all others specify
+	 a regular fr size in bytes. probably not the best way... */
+	ADIOI_Info_get(users_info, "romio_cb_fr_type", MPI_MAX_INFO_VAL, 
+		     value, &flag);
+	if (flag && ((intval=atoi(value)) >= -2)) {
+	    tmp_val = intval;
+
+	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+	    /* --BEGIN ERROR HANDLING-- */
+	    if (tmp_val != intval) {
+		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+						   "romio_cb_fr_type",
+						   error_code);
+		return;
+	    }
+	    /* --END ERROR HANDLING-- */
+
+	    ADIOI_Info_set(info, "romio_cb_fr_type", value);
+	    fd->hints->cb_fr_type = intval;
+
+	}
+#endif
 	/* new hint for specifying no indep. read/write will be performed */
-	MPI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value, &flag);
+	ADIOI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value,
+		     &flag);
 	if (flag) {
 	    if (!strcmp(value, "true") || !strcmp(value, "TRUE")) {
 		    /* if 'no_indep_rw' set, also hint that we will do
 		     * collective buffering: if we aren't doing independent io,
 		     * then we have to do collective  */
-		MPI_Info_set(info, "romio_no_indep_rw", value);
-		MPI_Info_set(info, "romio_cb_write", "enable");
-		MPI_Info_set(info, "romio_cb_read", "enable");
+		ADIOI_Info_set(info, "romio_no_indep_rw", value);
+		ADIOI_Info_set(info, "romio_cb_write", "enable");
+		ADIOI_Info_set(info, "romio_cb_read", "enable");
 		fd->hints->no_indep_rw = 1;
 		fd->hints->cb_read = 1;
 		fd->hints->cb_write = 1;
 		tmp_val = 1;
 	    }
 	    else if (!strcmp(value, "false") || !strcmp(value, "FALSE")) {
-		MPI_Info_set(info, "romio_no_indep_rw", value);
+		ADIOI_Info_set(info, "romio_no_indep_rw", value);
 		fd->hints->no_indep_rw = 0;
 		tmp_val = 0;
 	    }
@@ -246,64 +410,80 @@
 	/* new hints for enabling/disabling data sieving on
 	 * reads/writes
 	 */
-	MPI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value, 
+	ADIOI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value, 
 		     &flag);
 	if (flag) {
 	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
-		MPI_Info_set(info, "romio_ds_read", value);
+		ADIOI_Info_set(info, "romio_ds_read", value);
 		fd->hints->ds_read = ADIOI_HINT_ENABLE;
 	    }
 	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
-		MPI_Info_set(info, "romio_ds_read", value);
+		ADIOI_Info_set(info, "romio_ds_read", value);
 		fd->hints->ds_read = ADIOI_HINT_DISABLE;
 	    }
 	    else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
 	    {
-		MPI_Info_set(info, "romio_ds_read", value);
+		ADIOI_Info_set(info, "romio_ds_read", value);
 		fd->hints->ds_read = ADIOI_HINT_AUTO;
 	    }
 	    /* otherwise ignore */
 	}
-	MPI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value, 
+	ADIOI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value, 
 		     &flag);
 	if (flag) {
 	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
-		MPI_Info_set(info, "romio_ds_write", value);
+		ADIOI_Info_set(info, "romio_ds_write", value);
 		fd->hints->ds_write = ADIOI_HINT_ENABLE;
 	    }
 	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
-		MPI_Info_set(info, "romio_ds_write", value);
+		ADIOI_Info_set(info, "romio_ds_write", value);
 		fd->hints->ds_write = ADIOI_HINT_DISABLE;
 	    }
 	    else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
 	    {
-		MPI_Info_set(info, "romio_ds_write", value);
+		ADIOI_Info_set(info, "romio_ds_write", value);
 		fd->hints->ds_write = ADIOI_HINT_AUTO;
 	    }
 	    /* otherwise ignore */
 	}
 
-	MPI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, 
+	ADIOI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, 
 		     value, &flag);
 	if (flag && ((intval = atoi(value)) > 0)) {
-	    MPI_Info_set(info, "ind_wr_buffer_size", value);
+	    ADIOI_Info_set(info, "ind_wr_buffer_size", value);
 	    fd->hints->ind_wr_buffer_size = intval;
 	}
 
-	MPI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, 
+	ADIOI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, 
 		     value, &flag);
 	if (flag && ((intval = atoi(value)) > 0)) {
-	    MPI_Info_set(info, "ind_rd_buffer_size", value);
+	    ADIOI_Info_set(info, "ind_rd_buffer_size", value);
 	    fd->hints->ind_rd_buffer_size = intval;
 	}
 
 	memset( value, 0, MPI_MAX_INFO_VAL+1 );
-	MPI_Info_get(users_info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, MPI_MAX_INFO_VAL,
+	ADIOI_Info_get(users_info, "romio_min_fdomain_size", MPI_MAX_INFO_VAL,
+			value, &flag);
+	if ( flag && ((intval = atoi(value)) > 0) ) {
+		ADIOI_Info_set(info, "romio_min_fdomain_size", value);
+		fd->hints->min_fdomain_size = intval;
+	}
+  /* Now we use striping unit in common code so we should
+     process hints for it. */
+	ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
+			value, &flag);
+	if ( flag && ((intval = atoi(value)) > 0) ) {
+		ADIOI_Info_set(info, "striping_unit", value);
+		fd->hints->striping_unit = intval;
+	}
+
+	memset( value, 0, MPI_MAX_INFO_VAL+1 );
+        ADIOI_Info_get(users_info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, MPI_MAX_INFO_VAL,
 		     value, &flag);
 	if (flag && ((intval = atoi(value)) > 0)) {
 
 	    did_anything = 1;
-	    MPI_Info_set(info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, value);
+	    ADIOI_Info_set(info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, value);
 	    fd->hints->cb_nodes = intval;
 	}
     }
@@ -312,24 +492,30 @@
     if (did_anything) {
 	ADIOI_BGL_gen_agg_ranklist(fd, fd->hints->cb_nodes);
     }
-
-    /* deferred_open won't be set by callers, but if the user doesn't
-     * explicitly disable collecitve buffering (two-phase) and does hint that
-     * io w/o independent io is going on, we'll set this internal hint as a
-     * convenience */
-    if ( ( (fd->hints->cb_read != ADIOI_HINT_DISABLE) 
-	    && (fd->hints->cb_write != ADIOI_HINT_DISABLE)
-	    && fd->hints->no_indep_rw ) ) 
-    {
-	    fd->hints->deferred_open = 1;
-    } else {
-	    /* setting romio_no_indep_rw enable and romio_cb_{read,write}
-	     * disable at the same time doesn't make sense. honor
-	     * romio_cb_{read,write} and force the no_indep_rw hint to
-	     * 'disable' */
-	    MPI_Info_set(info, "romio_no_indep_rw", "false");
-	    fd->hints->no_indep_rw = 0;
-	    fd->hints->deferred_open = 0;
+    /* ignore defered open hints and do not enable it for bluegene: need all
+     * processors in the open path so we can stat-and-broadcast the blocksize
+     */
+    ADIOI_Info_set(info, "romio_no_indep_rw", "false");
+    fd->hints->no_indep_rw = 0;
+    fd->hints->deferred_open = 0;
+
+    /* BobC commented this out, but since hint processing runs on both bgl and
+     * bglockless, we need to keep DS writes enabled on gpfs and disabled on
+     * PVFS */
+    if (ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) == 0) {
+    /* disable data sieving for fs that do not
+       support file locking */
+       	ADIOI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
+		     value, &flag);
+	if (flag) {
+	    /* get rid of this value if it is set */
+	    ADIOI_Info_delete(info, "ind_wr_buffer_size");
+	}
+	/* note: leave ind_wr_buffer_size alone; used for other cases
+	 * as well. -- Rob Ross, 04/22/2003
+	 */
+	ADIOI_Info_set(info, "romio_ds_write", "disable");
+	fd->hints->ds_write = ADIOI_HINT_DISABLE;
     }
 
     ADIOI_Free(value);
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_open.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_open.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_open.c	2010-11-16 09:16:31.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_open.c	2010-11-15 15:02:47.000000000 +0100
@@ -15,6 +15,181 @@
 #include "ad_bgl.h"
 #include "ad_bgl_aggrs.h"
 
+#include <sys/statfs.h>
+#include <sys/vfs.h>
+
+/* COPIED FROM ad_fstype.c since it is static in that file
+
+ ADIO_FileSysType_parentdir - determines a string pathname for the
+ parent directory of a given filename.
+
+Input Parameters:
+. filename - pointer to file name character array
+
+Output Parameters:
+. dirnamep - pointer to location in which to store a pointer to a string
+
+ Note that the caller should free the memory located at the pointer returned
+ after the string is no longer needed.
+*/
+
+#ifndef PATH_MAX
+#define PATH_MAX 65535
+#endif
+
+/* In a strict ANSI environment, S_ISLNK may not be defined.  Fix that
+   here.  We assume that S_ISLNK is *always* defined as a macro.  If
+   that is not universally true, then add a test to the romio
+   configure that trys to link a program that references S_ISLNK */
+#if !defined(S_ISLNK) 
+#    if defined(S_IFLNK)
+     /* Check for the link bit */
+#    define S_ISLNK(mode) ((mode) & S_IFLNK)
+#    else
+     /* no way to check if it is a link, so say false */
+#    define S_ISLNK(mode) 0   
+#    endif
+#endif /* !(S_ISLNK) */
+
+/* ADIO_FileSysType_parentdir
+ *
+ * Returns pointer to string in dirnamep; that string is allocated with
+ * strdup and must be free()'d.
+ */
+static void ADIO_FileSysType_parentdir(char *filename, char **dirnamep)
+{
+    int err;
+    char *dir = NULL, *slash;
+    struct stat statbuf;
+    
+    err = lstat(filename, &statbuf);
+
+    if (err || (!S_ISLNK(statbuf.st_mode))) {
+	/* no such file, or file is not a link; these are the "normal"
+	 * cases where we can just return the parent directory.
+	 */
+	dir = ADIOI_Strdup(filename);
+    }
+    else {
+	/* filename is a symlink.  we've presumably already tried
+	 * to stat it and found it to be missing (dangling link),
+	 * but this code doesn't care if the target is really there
+	 * or not.
+	 */
+	int namelen;
+	char *linkbuf;
+
+	linkbuf = ADIOI_Malloc(PATH_MAX+1);
+	namelen = readlink(filename, linkbuf, PATH_MAX+1);
+	if (namelen == -1) {
+	    /* something strange has happened between the time that
+	     * we determined that this was a link and the time that
+	     * we attempted to read it; punt and use the old name.
+	     */
+	    dir = ADIOI_Strdup(filename);
+	}
+	else {
+	    /* successfully read the link */
+	    linkbuf[namelen] = '\0'; /* readlink doesn't null terminate */
+	    dir = ADIOI_Strdup(linkbuf);
+	    ADIOI_Free(linkbuf);
+	}
+    }
+
+    slash = strrchr(dir, '/');
+    if (!slash) ADIOI_Strncpy(dir, ".", 2);
+    else {
+	if (slash == dir) *(dir + 1) = '\0';
+	else *slash = '\0';
+    }
+
+    *dirnamep = dir;
+    return;
+}
+
+static void scaleable_stat(ADIO_File fd)
+{
+    struct stat64 bgl_stat;
+    struct statfs bgl_statfs;
+    int rank, rc;
+    char * dir;
+    long buf[2];
+    MPI_Comm_rank(fd->comm, &rank);
+
+    if (rank == 0) {
+	/* Get the (real) underlying file system block size */
+	rc = stat64(fd->filename, &bgl_stat);
+	if (rc >= 0)
+	{
+	    buf[0] = bgl_stat.st_blksize;
+	    DBGV_FPRINTF(stderr,"Successful stat '%s'.  Blocksize=%ld\n",
+		    fd->filename,bgl_stat.st_blksize);
+	}
+	else
+	{
+	    DBGV_FPRINTF(stderr,"Stat '%s' failed with rc=%d, errno=%d\n",
+		    fd->filename,rc,errno);
+	}
+	/* Get the (real) underlying file system type so we can 
+	 * plan our fsync scaling strategy */
+	rc = statfs(fd->filename,&bgl_statfs);
+	if (rc >= 0)
+	{
+	    DBGV_FPRINTF(stderr,"Successful statfs '%s'.  Magic number=%#X\n",
+		    fd->filename,bgl_statfs.f_type);
+	    buf[1] = bgl_statfs.f_type;
+	}
+	else
+	{
+	    DBGV_FPRINTF(stderr,"Statfs '%s' failed with rc=%d, errno=%d\n",
+		    fd->filename,rc,errno);
+	    ADIO_FileSysType_parentdir(fd->filename, &dir);
+	    rc = statfs(dir,&bgl_statfs);
+	    if (rc >= 0)
+	    {
+		DBGV_FPRINTF(stderr,"Successful statfs '%s'.  Magic number=%#X\n",dir,bgl_statfs.f_type);
+		buf[1] = bgl_statfs.f_type;
+	    }
+	    else
+	    {
+		/* Hmm.  Guess we'll assume the worst-case, that it's not GPFS
+		 * or PVFS2 below */
+		buf[1] = -1; /* bogus magic number */
+		DBGV_FPRINTF(stderr,"Statfs '%s' failed with rc=%d, errno=%d\n",dir,rc,errno);
+	    }
+	    free(dir);
+	}
+    }
+    /* now we can broadcast the stat/statfs data to everyone else */
+    MPI_Bcast(buf, 2, MPI_LONG, 0, fd->comm);
+    bgl_stat.st_blksize = buf[0];
+    bgl_statfs.f_type = buf[1];
+
+    /* data from stat64 */
+    /* store the blksize in the file system specific storage */
+    ((ADIOI_BGL_fs*)fd->fs_ptr)->blksize = bgl_stat.st_blksize;
+
+    /* data from statfs */
+    if ((bgl_statfs.f_type == GPFS_SUPER_MAGIC) ||
+	    (bgl_statfs.f_type == PVFS2_SUPER_MAGIC))
+    {
+	((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr = 
+	    ADIOI_BGL_FSYNC_AGGREGATION_ENABLED;
+
+	/* Only one rank is an "fsync aggregator" because only one 
+	 * fsync is needed */
+	if (rank == 0)
+	{
+	    ((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr |= 
+		ADIOI_BGL_FSYNC_AGGREGATOR;
+	    DBG_FPRINTF(stderr,"fsync aggregator %d\n",rank);
+	}
+	else ; /* aggregation enabled but this rank is not an aggregator*/
+    }
+    else; /* Other filesystems default to no fsync aggregation */
+}
+
+
 void ADIOI_BGL_Open(ADIO_File fd, int *error_code)
 {
     int perm, old_mask, amode;
@@ -41,8 +216,14 @@
 	amode = amode | O_RDWR;
     if (fd->access_mode & ADIO_EXCL)
 	amode = amode | O_EXCL;
-
+#ifdef ADIOI_MPE_LOGGING
+    MPE_Log_event(ADIOI_MPE_open_a, 0, NULL);
+#endif
     fd->fd_sys = open(fd->filename, amode, perm);
+#ifdef ADIOI_MPE_LOGGING
+    MPE_Log_event(ADIOI_MPE_open_b, 0, NULL);
+#endif
+  DBG_FPRINTF(stderr,"open('%s',%#X,%#X) rc=%d, errno=%d\n",fd->filename,amode,perm,fd->fd_sys,errno);
     fd->fd_direct = -1;
 
     if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
@@ -50,18 +231,29 @@
 
     if(fd->fd_sys != -1)
     {
-      struct stat64 bgl_stat;
-      int rc = stat64(fd->filename,&bgl_stat);
-      if (rc >= 0)
-      {
-        /* store the blksize in the file system specific storage */
+        struct stat64 bgl_stat;
+        struct statfs bgl_statfs;
+        char* dir;
+        int rc;
+
+        /* Initialize the ad_bgl file system specific information */
         AD_BGL_assert(fd->fs_ptr == NULL);
         fd->fs_ptr = (ADIOI_BGL_fs*) ADIOI_Malloc(sizeof(ADIOI_BGL_fs));
-        ((ADIOI_BGL_fs*)fd->fs_ptr)->blksize = bgl_stat.st_blksize;
-/*        FPRINTF(stderr,"%s(%d):Successful stat '%s'.  Blocksize=%ld\n",myname,__LINE__,fd->filename,bgl_stat.st_blksize);*/
-      }
-/*      else
-        FPRINTF(stderr,"%s(%d):Stat '%s' failed with rc=%d, errno=%d\n",myname,__LINE__,fd->filename,rc,errno);*/
+
+        ((ADIOI_BGL_fs*)fd->fs_ptr)->blksize = 1048576; /* default to 1M */
+
+        /* default is no fsync aggregation */
+        ((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr = 
+	    ADIOI_BGL_FSYNC_AGGREGATION_DISABLED; 
+
+
+#ifdef ADIOI_MPE_LOGGING
+        MPE_Log_event(ADIOI_MPE_stat_a, 0, NULL);
+#endif
+        scaleable_stat(fd);
+#ifdef ADIOI_MPE_LOGGING
+        MPE_Log_event(ADIOI_MPE_stat_b, 0, NULL);
+#endif
     }
 
     if (fd->fd_sys == -1) {
@@ -112,3 +304,6 @@
     }
     else *error_code = MPI_SUCCESS;
 }
+/* 
+ *vim: ts=8 sts=4 sw=4 noexpandtab 
+ */
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_rdcoll.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_rdcoll.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_rdcoll.c	2010-11-16 09:16:31.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_rdcoll.c	2010-11-15 15:02:47.000000000 +0100
@@ -8,6 +8,7 @@
 
 /* -*- Mode: C; c-basic-offset:4 ; -*- */
 /* 
+ *
  *   Copyright (C) 1997 University of Chicago. 
  *   See COPYRIGHT notice in top-level directory.
  */
@@ -22,18 +23,25 @@
 #include "mpe.h"
 #endif
 
+#ifdef USE_DBG_LOGGING
+  #define RDCOLL_DEBUG 1
+#endif
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
+
 /* prototypes of functions used for collective reads only. */
 static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
 				datatype, int nprocs,
 				int myrank, ADIOI_Access
 				*others_req, ADIO_Offset *offset_list,
-				int *len_list, int contig_access_count, 
+				ADIO_Offset *len_list, int contig_access_count, 
 				ADIO_Offset
 				min_st_offset, ADIO_Offset fd_size,
 				ADIO_Offset *fd_start, ADIO_Offset *fd_end,
 				int *buf_idx, int *error_code);
 static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
-				  *flat_buf, ADIO_Offset *offset_list, int
+				  *flat_buf, ADIO_Offset *offset_list, ADIO_Offset
 				  *len_list, int *send_size, int *recv_size,
 				  int *count, int *start_pos, 
 				  int *partial_send, 
@@ -47,7 +55,7 @@
 				  int iter, 
 				  MPI_Aint buftype_extent, int *buf_idx);
 static void ADIOI_R_Exchange_data_alltoallv(ADIO_File fd, void *buf, ADIOI_Flatlist_node
-                                  *flat_buf, ADIO_Offset *offset_list, int
+                                  *flat_buf, ADIO_Offset *offset_list, ADIO_Offset
                                   *len_list, int *send_size, int *recv_size,
                                   int *count, int *start_pos,
                                   int *partial_send,
@@ -62,8 +70,8 @@
                                   MPI_Aint buftype_extent, int *buf_idx);
 static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
 				   *flat_buf, char **recv_buf, ADIO_Offset 
-				   *offset_list, int *len_list, 
-				   int *recv_size, 
+				   *offset_list, ADIO_Offset *len_list, 
+				   unsigned *recv_size, 
 				   MPI_Request *requests, MPI_Status *statuses,
 				   int *recd_from_proc, int nprocs,
 				   int contig_access_count, 
@@ -74,7 +82,7 @@
 
 extern void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
 			    datatype, int file_ptr_type, ADIO_Offset
-			    offset, ADIO_Offset **offset_list_ptr, int
+			    offset, ADIO_Offset **offset_list_ptr, ADIO_Offset
 			    **len_list_ptr, ADIO_Offset *start_offset_ptr,
 			    ADIO_Offset *end_offset_ptr, int
 			   *contig_access_count_ptr);
@@ -99,25 +107,15 @@
        whose request lies in this process's file domain. */
 
     int i, filetype_is_contig, nprocs, nprocs_for_coll, myrank;
-    int contig_access_count, interleave_count = 0, buftype_is_contig;
+    int contig_access_count=0, interleave_count = 0, buftype_is_contig;
     int *count_my_req_per_proc, count_my_req_procs, count_others_req_procs;
     ADIO_Offset start_offset, end_offset, orig_fp, fd_size, min_st_offset, off;
     ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *fd_start = NULL,
 	*fd_end = NULL, *end_offsets = NULL;
     ADIO_Offset *bgl_offsets0 = NULL, *bgl_offsets = NULL;
     int  ii;
-    int *len_list = NULL, *buf_idx = NULL;
-
-    double io_time = 0., all_time, max_all_time; 
-    double tstep1, max_tstep1;
-    double tstep1_1, max_tstep1_1;
-    double tstep1_2, max_tstep1_2;
-    double tstep1_3, max_tstep1_3;
-    double tstep2, max_tstep2;
-    double tstep3, max_tstep3;
-    double tstep4, max_tstep4;
-    double sum_sz;
-
+    ADIO_Offset *len_list = NULL;
+    int *buf_idx = NULL;
 #if BGL_PROFILE 
     BGLMPIO_T_CIO_RESET( 0, r )
 #endif
@@ -126,6 +124,14 @@
     int bufsize, size;
 #endif
 
+#if 0
+/*   From common code - not implemented for bgl. */
+    if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) {
+        ADIOI_IOStridedColl (fd, buf, count, ADIOI_READ, datatype, 
+			file_ptr_type, offset, status, error_code);
+        return;
+    } */
+#endif
 #ifdef PROFILE
         MPE_Log_event(13, 0, "start computation");
 #endif
@@ -157,14 +163,16 @@
     BGLMPIO_T_CIO_SET_GET( 0, r, 1, 1, 1, BGLMPIO_CIO_GATHER, BGLMPIO_CIO_LCOMP )
 #endif
 
-	/*    for (i=0; i<contig_access_count; i++) {
-	      FPRINTF(stderr, "rank %d  off %ld  len %d\n", myrank, offset_list[i], 
-	      len_list[i]);
-	      }*/
+#ifdef RDCOLL_DEBUG
+    for (i=0; i<contig_access_count; i++) {
+	      DBG_FPRINTF(stderr, "rank %d  off %lld  len %lld\n", 
+			      myrank, offset_list[i], len_list[i]);
+    }
+#endif
 
 	/* each process communicates its start and end offsets to other 
-	   processes. The result is an array each of start and end offsets stored
-	   in order of process rank. */ 
+	   processes. The result is an array each of start and end offsets
+	   stored in order of process rank. */ 
     
 	st_offsets   = (ADIO_Offset *) ADIOI_Malloc(nprocs*sizeof(ADIO_Offset));
 	end_offsets  = (ADIO_Offset *) ADIOI_Malloc(nprocs*sizeof(ADIO_Offset));
@@ -200,7 +208,9 @@
 
 	/* are the accesses of different processes interleaved? */
 	for (i=1; i<nprocs; i++)
-	    if (st_offsets[i] < end_offsets[i-1]) interleave_count++;
+	    if ((st_offsets[i] < end_offsets[i-1]) && 
+                (st_offsets[i] <= end_offsets[i]))
+                interleave_count++;
 	/* This is a rudimentary check for interleaving, but should suffice
 	   for the moment. */
     }
@@ -223,7 +233,7 @@
 
 	if (buftype_is_contig && filetype_is_contig) {
 	    if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
-		off = fd->disp + (fd->etype_size) * offset;
+		off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset;
 		ADIO_ReadContig(fd, buf, count, datatype, ADIO_EXPLICIT_OFFSET,
                        off, status, error_code);
 	    }
@@ -263,7 +273,9 @@
     else
     ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs,
 			    nprocs_for_coll, &min_st_offset,
-			    &fd_start, &fd_end, &fd_size);
+			    &fd_start, &fd_end,
+			    fd->hints->min_fdomain_size, &fd_size, 
+			    fd->hints->striping_unit);
 
 #if BGL_PROFILE 
     BGLMPIO_T_CIO_SET_GET( 0, r, 0, 1, 1, BGLMPIO_CIO_MYREQ, BGLMPIO_CIO_FD_PART )
@@ -381,205 +393,11 @@
     fd->fp_sys_posn = -1;   /* set it to null. */
 }
 
-#if 0
-void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
-			    datatype, int file_ptr_type, ADIO_Offset
-			    offset, ADIO_Offset **offset_list_ptr, int
-			    **len_list_ptr, ADIO_Offset *start_offset_ptr,
-			    ADIO_Offset *end_offset_ptr, int
-			   *contig_access_count_ptr)
-{
-    int filetype_size, buftype_size, etype_size;
-    int i, j, k, frd_size=0, old_frd_size=0, st_index=0;
-    int n_filetypes, etype_in_filetype;
-    ADIO_Offset abs_off_in_filetype=0;
-    int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
-    int contig_access_count, *len_list, flag, filetype_is_contig;
-    MPI_Aint filetype_extent, filetype_lb;
-    ADIOI_Flatlist_node *flat_file;
-    ADIO_Offset *offset_list, off, end_offset=0, disp;
-    
-/* For this process's request, calculate the list of offsets and
-   lengths in the file and determine the start and end offsets. */
-
-    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
-
-    MPI_Type_size(fd->filetype, &filetype_size);
-    MPI_Type_extent(fd->filetype, &filetype_extent);
-    MPI_Type_lb(fd->filetype, &filetype_lb);
-    MPI_Type_size(datatype, &buftype_size);
-    etype_size = fd->etype_size;
-
-    if ( ! filetype_size ) {
-	*contig_access_count_ptr = 0;
-	*offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
-	*len_list_ptr = (int *) ADIOI_Malloc(2*sizeof(int));
-        /* 2 is for consistency. everywhere I malloc one more than needed */
-
-	offset_list = *offset_list_ptr;
-	len_list = *len_list_ptr;
-        offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
-                 fd->disp + etype_size * offset;
-	len_list[0] = 0;
-	*start_offset_ptr = offset_list[0];
-	*end_offset_ptr = offset_list[0] + len_list[0] - 1;
-	
-	return;
-    }
-
-    if (filetype_is_contig) {
-	*contig_access_count_ptr = 1;        
-	*offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
-	*len_list_ptr = (int *) ADIOI_Malloc(2*sizeof(int));
-        /* 2 is for consistency. everywhere I malloc one more than needed */
-
-	offset_list = *offset_list_ptr;
-	len_list = *len_list_ptr;
-        offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
-                 fd->disp + etype_size * offset;
-	len_list[0] = bufcount * buftype_size;
-	*start_offset_ptr = offset_list[0];
-	*end_offset_ptr = offset_list[0] + len_list[0] - 1;
-
-	/* update file pointer */
-	if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = *end_offset_ptr + 1;
-    }
-
-    else {
-
-       /* First calculate what size of offset_list and len_list to allocate */
-   
-       /* filetype already flattened in ADIO_Open or ADIO_Fcntl */
-	flat_file = ADIOI_Flatlist;
-	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
-	disp = fd->disp;
-
-	if (file_ptr_type == ADIO_INDIVIDUAL) {
-	    offset = fd->fp_ind; /* in bytes */
-	    n_filetypes = -1;
-	    flag = 0;
-	    while (!flag) {
-		n_filetypes++;
-		for (i=0; i<flat_file->count; i++) {
-		    if (disp + flat_file->indices[i] + 
-			(ADIO_Offset) n_filetypes*filetype_extent + 
-			flat_file->blocklens[i] >= offset) 
-		    {
-			st_index = i;
-			frd_size = (int) (disp + flat_file->indices[i] + 
-			    (ADIO_Offset) n_filetypes*filetype_extent
-			        + flat_file->blocklens[i] - offset);
-			flag = 1;
-			break;
-		    }
-		}
-	    }
-	}
-	else {
-	    n_etypes_in_filetype = filetype_size/etype_size;
-	    n_filetypes = (int) (offset / n_etypes_in_filetype);
-	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
-	    size_in_filetype = etype_in_filetype * etype_size;
- 
-	    sum = 0;
-	    for (i=0; i<flat_file->count; i++) {
-		sum += flat_file->blocklens[i];
-		if (sum > size_in_filetype) {
-		    st_index = i;
-		    frd_size = sum - size_in_filetype;
-		    abs_off_in_filetype = flat_file->indices[i] +
-			size_in_filetype - (sum - flat_file->blocklens[i]);
-		    break;
-		}
-	    }
-
-	    /* abs. offset in bytes in the file */
-	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + 
-		abs_off_in_filetype;
-	}
-
-         /* calculate how much space to allocate for offset_list, len_list */
-
-	old_frd_size = frd_size;
-	contig_access_count = i = 0;
-	j = st_index;
-	bufsize = buftype_size * bufcount;
-	frd_size = ADIOI_MIN(frd_size, bufsize);
-	while (i < bufsize) {
-	    if (frd_size) contig_access_count++;
-	    i += frd_size;
-	    j = (j + 1) % flat_file->count;
-	    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
-	}
-
-        /* allocate space for offset_list and len_list */
-
-	*offset_list_ptr = (ADIO_Offset *)
-	         ADIOI_Malloc((contig_access_count+1)*sizeof(ADIO_Offset));  
-	*len_list_ptr = (int *) ADIOI_Malloc((contig_access_count+1)*sizeof(int));
-        /* +1 to avoid a 0-size malloc */
-
-	offset_list = *offset_list_ptr;
-	len_list = *len_list_ptr;
-
-      /* find start offset, end offset, and fill in offset_list and len_list */
-
-	*start_offset_ptr = offset; /* calculated above */
-
-	i = k = 0;
-	j = st_index;
-	off = offset;
-	frd_size = ADIOI_MIN(old_frd_size, bufsize);
-	while (i < bufsize) {
-	    if (frd_size) {
-		offset_list[k] = off;
-		len_list[k] = frd_size;
-		k++;
-	    }
-	    i += frd_size;
-	    end_offset = off + frd_size - 1;
-
-     /* Note: end_offset points to the last byte-offset that will be accessed.
-         e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
-
-	    if (off + frd_size < disp + flat_file->indices[j] +
-		flat_file->blocklens[j] + 
-		(ADIO_Offset) n_filetypes*filetype_extent)
-	    {
-		off += frd_size;
-		/* did not reach end of contiguous block in filetype.
-		 * no more I/O needed. off is incremented by frd_size. 
-		 */
-	    }
-	    else {
-		if (j < (flat_file->count - 1)) j++;
-		else {
-		    /* hit end of flattened filetype; 
-		     * start at beginning again 
-		     */
-		    j = 0;
-		    n_filetypes++;
-		}
-		off = disp + flat_file->indices[j] + 
-		    (ADIO_Offset) n_filetypes*filetype_extent;
-		frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
-	    }
-	}
-
-	/* update file pointer */
-	if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
-
-	*contig_access_count_ptr = contig_access_count;
-	*end_offset_ptr = end_offset;
-    }
-}
-#endif
-
 static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
 			 datatype, int nprocs,
 			 int myrank, ADIOI_Access
 			 *others_req, ADIO_Offset *offset_list,
-			 int *len_list, int contig_access_count, ADIO_Offset
+			 ADIO_Offset *len_list, int contig_access_count, ADIO_Offset
                          min_st_offset, ADIO_Offset fd_size,
 			 ADIO_Offset *fd_start, ADIO_Offset *fd_end,
                          int *buf_idx, int *error_code)
@@ -594,19 +412,21 @@
    array from a file, where each local array is 8Mbytes, requiring
    at least another 8Mbytes of temp space is unacceptable. */
 
-    int i, j, m, size, ntimes, max_ntimes, buftype_is_contig;
+    int i, j, m, ntimes, max_ntimes, buftype_is_contig;
     ADIO_Offset st_loc=-1, end_loc=-1, off, done, real_off, req_off;
     char *read_buf = NULL, *tmp_buf;
     int *curr_offlen_ptr, *count, *send_size, *recv_size;
-    int *partial_send, *recd_from_proc, *start_pos, for_next_iter;
-    int real_size, req_len, flag, for_curr_iter, rank;
+    int *partial_send, *recd_from_proc, *start_pos;
+    /* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/
+    ADIO_Offset real_size, size, for_curr_iter, for_next_iter;
+    int req_len, flag, rank;
     MPI_Status status;
     ADIOI_Flatlist_node *flat_buf=NULL;
     MPI_Aint buftype_extent;
     int coll_bufsize;
-
+#ifdef RDCOLL_DEBUG
     int iii;
-
+#endif
     *error_code = MPI_SUCCESS;  /* changed below if error */
     /* only I/O errors are currently reported */
     
@@ -738,7 +558,7 @@
 #ifdef PROFILE
         MPE_Log_event(13, 0, "start computation");
 #endif
-	size = (int) (ADIOI_MIN(coll_bufsize, end_loc-st_loc+1-done)); 
+	size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); 
 	real_off = off - for_curr_iter;
 	real_size = size + for_curr_iter;
 
@@ -746,7 +566,9 @@
 	for_next_iter = 0;
 
 	for (i=0; i<nprocs; i++) {
-	    /* FPRINTF(stderr, "rank %d, i %d, others_count %d\n", rank, i, others_req[i].count); */
+#ifdef RDCOLL_DEBUG
+	    DBG_FPRINTF(stderr, "rank %d, i %d, others_count %d\n", rank, i, others_req[i].count); 
+#endif
 	    if (others_req[i].count) {
 		start_pos[i] = curr_offlen_ptr[i];
 		for (j=curr_offlen_ptr[i]; j<others_req[i].count;
@@ -769,22 +591,22 @@
 		    }
 		    if (req_off < real_off + real_size) {
 			count[i]++;
+      ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)read_buf)+req_off-real_off) == (ADIO_Offset)(MPIR_Upint)(read_buf+req_off-real_off));
 			MPI_Address(read_buf+req_off-real_off, 
                                &(others_req[i].mem_ptrs[j]));
-			send_size[i] += (int)(ADIOI_MIN(real_off + (ADIO_Offset)real_size - 
-						  req_off, req_len));
+      ADIOI_Assert((real_off + real_size - req_off) == (int)(real_off + real_size - req_off));
+			send_size[i] += (int)(ADIOI_MIN(real_off + real_size - req_off, 
+                                      (ADIO_Offset)(unsigned)req_len)); 
 
-			if (real_off+real_size-req_off < req_len) {
-			    partial_send[i] = (int) (real_off+real_size-
-						     req_off);
+			if (real_off+real_size-req_off < (ADIO_Offset)(unsigned)req_len) {
+			    partial_send[i] = (int) (real_off + real_size - req_off);
 			    if ((j+1 < others_req[i].count) && 
                                  (others_req[i].offsets[j+1] < 
                                      real_off+real_size)) { 
 				/* this is the case illustrated in the
 				   figure above. */
-				for_next_iter = (int) (ADIOI_MAX(for_next_iter,
-					  real_off + real_size - 
-                                             others_req[i].offsets[j+1])); 
+				for_next_iter = ADIOI_MAX(for_next_iter,
+					  real_off + real_size - others_req[i].offsets[j+1]); 
 				/* max because it must cover requests 
 				   from different processes */
 			    }
@@ -805,13 +627,14 @@
         MPE_Log_event(14, 0, "end computation");
 #endif
 	if (flag) {
-	    ADIO_ReadContig(fd, read_buf+for_curr_iter, size, MPI_BYTE,
+      ADIOI_Assert(size == (int)size);
+	    ADIO_ReadContig(fd, read_buf+for_curr_iter, (int)size, MPI_BYTE,
 			    ADIO_EXPLICIT_OFFSET, off, &status, error_code);
-/*
-	    printf( "\tread_coll: 700, data read [%3d] = ", size );
-	    for (iii=0; iii<size; iii++) { printf( "%3d,", *((unsigned char *)read_buf + for_curr_iter + iii) ); }
-	    printf( "\n" );
- */
+#ifdef RDCOLL_DEBUG
+	    DBG_FPRINTF(stderr, "\tread_coll: 700, data read [%lld] = ", size );
+	    for (iii=0; iii<size && iii<80; iii++) { DBGV_FPRINTF(stderr, "%3d,", *((unsigned char *)read_buf + for_curr_iter + iii) ); }
+	    DBG_FPRINTF(stderr, "\n" );
+#endif
 
 	    if (*error_code != MPI_SUCCESS) return;
 	}
@@ -849,6 +672,8 @@
 
 	if (for_next_iter) {
 	    tmp_buf = (char *) ADIOI_Malloc(for_next_iter);
+      ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)read_buf)+real_size-for_next_iter) == (ADIO_Offset)(MPIR_Upint)(read_buf+real_size-for_next_iter));
+      ADIOI_Assert((for_next_iter+coll_bufsize) == (size_t)(for_next_iter+coll_bufsize));
 	    memcpy(tmp_buf, read_buf+real_size-for_next_iter, for_next_iter);
 	    ADIOI_Free(read_buf);
 	    read_buf = (char *) ADIOI_Malloc(for_next_iter+coll_bufsize);
@@ -902,7 +727,7 @@
 }
 
 static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
-			 *flat_buf, ADIO_Offset *offset_list, int
+			 *flat_buf, ADIO_Offset *offset_list, ADIO_Offset
                          *len_list, int *send_size, int *recv_size,
 			 int *count, int *start_pos, int *partial_send, 
 			 int *recd_from_proc, int nprocs, 
@@ -937,10 +762,14 @@
 /* post recvs. if buftype_is_contig, data can be directly recd. into
    user buf at location given by buf_idx. else use recv_buf. */
 
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5032, 0, NULL);
+#endif
+
     if (buftype_is_contig) {
 	j = 0;
 	for (i=0; i < nprocs; i++) 
-	    if (recv_size[i]) {
+	    if (recv_size[i]) { 
 		MPI_Irecv(((char *) buf) + buf_idx[i], recv_size[i], 
 		  MPI_BYTE, i, myrank+i+100*iter, fd->comm, requests+j);
 		j++;
@@ -960,8 +789,10 @@
 		    MPI_Irecv(recv_buf[i], recv_size[i], MPI_BYTE, i, 
 			      myrank+i+100*iter, fd->comm, requests+j);
 		    j++;
-		    /* FPRINTF(stderr, "node %d, recv_size %d, tag %d \n", 
-		       myrank, recv_size[i], myrank+i+100*iter); */
+#ifdef RDCOLL_DEBUG
+		    DBG_FPRINTF(stderr, "node %d, recv_size %d, tag %d \n", 
+		       myrank, recv_size[i], myrank+i+100*iter); 
+#endif
 		}
     }
 
@@ -1006,7 +837,7 @@
 	/* if noncontiguous, to the copies from the recv buffers */
 	if (!buftype_is_contig) 
 	    ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf,
-				   offset_list, len_list, recv_size, 
+				   offset_list, len_list, (unsigned*)recv_size, 
 				   requests, statuses, recd_from_proc, 
 				   nprocs, contig_access_count,
 				   min_st_offset, fd_size, fd_start, fd_end,
@@ -1024,9 +855,11 @@
 	    if (recv_size[i]) ADIOI_Free(recv_buf[i]);
 	ADIOI_Free(recv_buf);
     }
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5033, 0, NULL);
+#endif
 }
 
-
 #define ADIOI_BUF_INCR \
 { \
     while (buf_incr) { \
@@ -1040,7 +873,7 @@
                 n_buftypes++; \
             } \
             user_buf_idx = flat_buf->indices[flat_buf_idx] + \
-                              n_buftypes*buftype_extent; \
+                              (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
 	    flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
 	} \
 	buf_incr -= size_in_buf; \
@@ -1052,9 +885,11 @@
 { \
     while (size) { \
 	size_in_buf = ADIOI_MIN(size, flat_buf_sz); \
+  ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)(buf + user_buf_idx)); \
+  ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \
 	memcpy(((char *) buf) + user_buf_idx, \
 	       &(recv_buf[p][recv_buf_idx[p]]), size_in_buf); \
-	recv_buf_idx[p] += size_in_buf; \
+	recv_buf_idx[p] += size_in_buf; /* already tested (size_t)size_in_buf*/ \
 	user_buf_idx += size_in_buf; \
 	flat_buf_sz -= size_in_buf; \
 	if (!flat_buf_sz) { \
@@ -1064,7 +899,7 @@
                 n_buftypes++; \
             } \
             user_buf_idx = flat_buf->indices[flat_buf_idx] + \
-                              n_buftypes*buftype_extent; \
+                              (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
 	    flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
 	} \
 	size -= size_in_buf; \
@@ -1073,11 +908,10 @@
     ADIOI_BUF_INCR \
 }
 
-
 static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
 				   *flat_buf, char **recv_buf, ADIO_Offset 
-				   *offset_list, int *len_list, 
-				   int *recv_size, 
+				   *offset_list, ADIO_Offset *len_list, 
+				   unsigned *recv_size, 
 				   MPI_Request *requests, MPI_Status *statuses,
 				   int *recd_from_proc, int nprocs,
 				   int contig_access_count, 
@@ -1086,13 +920,18 @@
 				   ADIO_Offset *fd_end,
 				   MPI_Aint buftype_extent)
 {
+
 /* this function is only called if buftype is not contig */
 
-    int i, p, flat_buf_idx, size, buf_incr;
-    int flat_buf_sz, size_in_buf, n_buftypes;
+    int i, p, flat_buf_idx;
+    ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size;
+    int n_buftypes;
     ADIO_Offset off, len, rem_len, user_buf_idx;
+    /* Not sure unsigned is necessary, but it makes the math safer */
+    unsigned *curr_from_proc, *done_from_proc, *recv_buf_idx;
 
-    int *curr_from_proc, *done_from_proc, *recv_buf_idx;
+    ADIOI_UNREFERENCED_ARG(requests);
+    ADIOI_UNREFERENCED_ARG(statuses);
 
 /*  curr_from_proc[p] = amount of data recd from proc. p that has already
                         been accounted for so far
@@ -1100,9 +939,9 @@
                         filled into user buffer in previous iterations
     user_buf_idx = current location in user buffer 
     recv_buf_idx[p] = current location in recv_buf of proc. p  */
-    curr_from_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
-    done_from_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
-    recv_buf_idx   = (int *) ADIOI_Malloc(nprocs * sizeof(int));
+    curr_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned));
+    done_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned));
+    recv_buf_idx   = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned));
 
     for (i=0; i < nprocs; i++) {
 	recv_buf_idx[i] = curr_from_proc[i] = 0;
@@ -1120,7 +959,7 @@
 
     for (i=0; i<contig_access_count; i++) { 
 	off     = offset_list[i];
-	rem_len = (ADIO_Offset) len_list[i];
+	rem_len = len_list[i];
 
 	/* this request may span the file domains of more than one process */
 	while (rem_len > 0) {
@@ -1140,29 +979,32 @@
 	    if (recv_buf_idx[p] < recv_size[p]) {
 		if (curr_from_proc[p]+len > done_from_proc[p]) {
 		    if (done_from_proc[p] > curr_from_proc[p]) {
-			size = (int)ADIOI_MIN(curr_from_proc[p] + len - 
+			size = ADIOI_MIN(curr_from_proc[p] + len - 
 			      done_from_proc[p], recv_size[p]-recv_buf_idx[p]);
 			buf_incr = done_from_proc[p] - curr_from_proc[p];
 			ADIOI_BUF_INCR
-			buf_incr = (int)(curr_from_proc[p]+len-done_from_proc[p]);
+			buf_incr = curr_from_proc[p]+len-done_from_proc[p];
+      ADIOI_Assert((done_from_proc[p] + size) == (unsigned)((ADIO_Offset)done_from_proc[p] + size));
 			curr_from_proc[p] = done_from_proc[p] + size;
 			ADIOI_BUF_COPY
 		    }
 		    else {
-			size = (int)ADIOI_MIN(len,recv_size[p]-recv_buf_idx[p]);
-			buf_incr = (int)len;
-			curr_from_proc[p] += size;
+			size = ADIOI_MIN(len,recv_size[p]-recv_buf_idx[p]);
+			buf_incr = len;
+      ADIOI_Assert((curr_from_proc[p] + size) == (unsigned)((ADIO_Offset)curr_from_proc[p] + size));
+			curr_from_proc[p] += (unsigned) size;
 			ADIOI_BUF_COPY
 		    }
 		}
 		else {
-		    curr_from_proc[p] += (int)len;
-		    buf_incr = (int)len;
+        ADIOI_Assert((curr_from_proc[p] + len) == (unsigned)((ADIO_Offset)curr_from_proc[p] + len));
+		    curr_from_proc[p] += (unsigned) len;
+		    buf_incr = len;
 		    ADIOI_BUF_INCR
 		}
 	    }
 	    else {
-		buf_incr = (int)len;
+		buf_incr = len;
 		ADIOI_BUF_INCR
 	    }
 	    off     += len;
@@ -1179,7 +1021,7 @@
 
 static void ADIOI_R_Exchange_data_alltoallv(
                 ADIO_File fd, void *buf, ADIOI_Flatlist_node
-                *flat_buf, ADIO_Offset *offset_list, int
+                *flat_buf, ADIO_Offset *offset_list, ADIO_Offset
                 *len_list, int *send_size, int *recv_size, 
                 int *count, int *start_pos, int *partial_send,
                 int *recd_from_proc, int nprocs,
@@ -1192,9 +1034,8 @@
 {   
     int i, j, k=0, tmp=0, nprocs_recv, nprocs_send;
     char **recv_buf = NULL;
-    MPI_Request *requests;
-    MPI_Datatype send_type;
-    MPI_Status *statuses;
+    MPI_Request *requests=NULL;
+    MPI_Status *statuses=NULL;
     int rtail, stail;
     char *sbuf_ptr, *from_ptr;
     int  len;
@@ -1238,7 +1079,8 @@
 	    }
             sbuf_ptr = all_send_buf + sdispls[i];
             for (j=0; j<count[i]; j++) {
-                from_ptr = (char *)( others_req[i].mem_ptrs[ start_pos[i]+j ] );
+                ADIOI_ENSURE_AINT_FITS_IN_PTR( others_req[i].mem_ptrs[ start_pos[i]+j ]);
+                from_ptr = (char *) ADIOI_AINT_CAST_TO_VOID_PTR ( others_req[i].mem_ptrs[ start_pos[i]+j ] );
                 len      =           others_req[i].lens[     start_pos[i]+j ]  ;
                 memcpy( sbuf_ptr, from_ptr, len );
                 sbuf_ptr += len;
@@ -1247,26 +1089,19 @@
         }
     }
 
-#if 0
-    printf( "\tsend_size = " );
-    for (i=0; i<nprocs; i++) { printf( "%2d,", send_size[i] ); }
-    printf( "\n" );
-    printf( "\trecv_size = " );
-    for (i=0; i<nprocs; i++) { printf( "%2d,", recv_size[i] ); }
-    printf( "\n" );
-    printf( "\tsdispls   = " );
-    for (i=0; i<nprocs; i++) { printf( "%2d,", sdispls  [i] ); }
-    printf( "\n" );
-    printf( "\trdispls   = " );
-    for (i=0; i<nprocs; i++) { printf( "%2d,", rdispls  [i] ); }
-    printf( "\n" );
-    printf( "\ttails = %4d, %4d\n", stail, rtail );
-#endif
-#if 0
+#if RDCOLL_DEBUG
+    DBG_FPRINTF(stderr, "\tsend_size = [%d]%2d,",0,send_size[0]);
+    for (i=1; i<nprocs; i++) if(send_size[i-1]!=send_size[i]){ DBG_FPRINTF(stderr, "\t\t[%d]%2d,", i,send_size[i] ); }
+    DBG_FPRINTF(stderr, "\trecv_size =  [%d]%2d,",0,recv_size[0]);
+    for (i=1; i<nprocs; i++) if(recv_size[i-1]!=recv_size[i]){ DBG_FPRINTF(stderr, "\t\t[%d]%2d,", i,recv_size[i] ); }
+    DBG_FPRINTF(stderr, "\tsdispls   =  [%d]%2d,",0,sdispls[0]);
+    for (i=1; i<nprocs; i++) if(sdispls[i-1]!=sdispls[i]){ DBG_FPRINTF(stderr, "\t\t[%d]%2d,", i,sdispls  [i] ); }
+    DBG_FPRINTF(stderr, "\trdispls   =  [%d]%2d,",0,rdispls[0]);
+    for (i=1; i<nprocs; i++) if(rdispls[i-1]!=rdispls[i]){ DBG_FPRINTF(stderr, "\t\t[%d]%2d,", i,rdispls  [i] ); }
+    DBG_FPRINTF(stderr, "\ttails = %4d, %4d\n", stail, rtail );
     if (nprocs_send) {
-    printf( "\tall_send_buf = " );
-    for (i=0; i<nprocs; i++) { printf( "%2d,", all_send_buf  [i*131072] ); }
-    printf( "\n" );
+    DBG_FPRINTF(stderr, "\tall_send_buf =  [%d]%2d,",0,all_send_buf[0]);
+    for (i=1; i<nprocs; i++) if(all_send_buf[(i-1)*131072]!=all_send_buf[i*131072]){ DBG_FPRINTF(stderr, "\t\t[%d]%2d,", i, all_send_buf  [i*131072] ); }
     }
 #endif
     
@@ -1277,16 +1112,16 @@
             fd->comm ); 
 
 #if 0
-    printf( "\tall_recv_buf = " );
-    for (i=131072; i<131073; i++) { printf( "%2d,", all_recv_buf  [i] ); }
-    printf( "\n" );
+    DBG_FPRINTF(stderr, "\tall_recv_buf = " );
+    for (i=131072; i<131073; i++) { DBG_FPRINTF(stderr, "%2d,", all_recv_buf  [i] ); }
+    DBG_FPRINTF(stderr, "\n" );
 #endif
     
   /* unpack at the receiver side */
     if (nprocs_recv) { 
         if (!buftype_is_contig)
             ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf,
-                                   offset_list, len_list, recv_size,
+                                   offset_list, len_list, (unsigned*)recv_size,
                                    requests, statuses,          /* never used inside */
                                    recd_from_proc,
                                    nprocs, contig_access_count,
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_read.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_read.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_read.c	2010-11-16 09:16:31.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_read.c	2010-11-15 15:02:47.000000000 +0100
@@ -21,9 +21,9 @@
                      MPI_Datatype datatype, int file_ptr_type,
 		     ADIO_Offset offset, ADIO_Status *status, int *error_code)
 {
-    int err=-1, datatype_size, len;
+    int err=-1, datatype_size;
+    ADIO_Offset len;
     static char myname[] = "ADIOI_BGL_READCONTIG";
-
 #if BGL_PROFILE
 		/* timing */
 		double io_time, io_time2;
@@ -35,7 +35,8 @@
 #endif
 
     MPI_Type_size(datatype, &datatype_size);
-    len = datatype_size * count;
+    len = (ADIO_Offset)datatype_size * (ADIO_Offset)count;
+    ADIOI_Assert(len == (unsigned int) len); /* read takes an unsigned int parm */
 
 #if BGL_PROFILE
 
@@ -48,7 +49,7 @@
 	    ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
 	else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
         	if (bglmpio_timing2) io_time2 = MPI_Wtime();
-	err = read(fd->fd_sys, buf, len);
+	err = read(fd->fd_sys, buf, (unsigned int)len);
         	if (bglmpio_timing2) bglmpio_prof_cr[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
 	ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
 	fd->fp_sys_posn = offset + err;
@@ -64,7 +65,7 @@
 	    ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
 	else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
         	if (bglmpio_timing2) io_time2 = MPI_Wtime();
-	err = read(fd->fd_sys, buf, len);
+	err = read(fd->fd_sys, buf, (unsigned int)len);
         	if (bglmpio_timing2) bglmpio_prof_cr[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
 	ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
 	fd->fp_ind += err;
@@ -79,7 +80,7 @@
 	if (fd->atomicity)
 	    ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
 	else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
-	err = read(fd->fd_sys, buf, len);
+	err = read(fd->fd_sys, buf, (unsigned int)len);
 	ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
 	fd->fp_sys_posn = offset + err;
 	/* individual file pointer not updated */        
@@ -91,7 +92,7 @@
 	if (fd->atomicity)
 	    ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
 	else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
-	err = read(fd->fd_sys, buf, len);
+	err = read(fd->fd_sys, buf, (unsigned int)len);
 	ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
 	fd->fp_ind += err;
 	fd->fp_sys_posn = fd->fp_ind;
@@ -120,12 +121,11 @@
 }
 
 
-
 #define ADIOI_BUFFERED_READ \
 { \
     if (req_off >= readbuf_off + readbuf_len) { \
 	readbuf_off = req_off; \
-	readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\
+	readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\
 	lseek(fd->fd_sys, readbuf_off, SEEK_SET);\
         if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);\
         err = read(fd->fd_sys, readbuf, readbuf_len);\
@@ -133,6 +133,7 @@
         if (err == -1) err_flag = 1; \
     } \
     while (req_len > readbuf_off + readbuf_len - req_off) { \
+  ADIOI_Assert((readbuf_off + readbuf_len - req_off) == (int) (readbuf_off + readbuf_len - req_off));\
 	partial_read = (int) (readbuf_off + readbuf_len - req_off); \
 	tmp_buf = (char *) ADIOI_Malloc(partial_read); \
 	memcpy(tmp_buf, readbuf+readbuf_len-partial_read, partial_read); \
@@ -141,7 +142,7 @@
 	memcpy(readbuf, tmp_buf, partial_read); \
 	ADIOI_Free(tmp_buf); \
 	readbuf_off += readbuf_len-partial_read; \
-	readbuf_len = (int) (partial_read + ADIOI_MIN(max_bufsize, \
+	readbuf_len = (unsigned) (partial_read + ADIOI_MIN(max_bufsize, \
 				       end_offset-readbuf_off+1)); \
 	lseek(fd->fd_sys, readbuf_off+partial_read, SEEK_SET);\
         if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
@@ -149,6 +150,7 @@
         if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
         if (err == -1) err_flag = 1; \
     } \
+    ADIOI_Assert(req_len == (size_t)req_len); \
     memcpy((char *)buf + userbuf_off, readbuf+req_off-readbuf_off, req_len); \
 }
 
@@ -160,20 +162,23 @@
 {
 /* offset is in units of etype relative to the filetype. */
 
+
     ADIOI_Flatlist_node *flat_buf, *flat_file;
-    int i, j, k, err=-1, brd_size, frd_size=0, st_index=0;
-    int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
-    int n_filetypes, etype_in_filetype;
+    ADIO_Offset i_offset, new_brd_size, brd_size, size;
+    int i, j, k, err=-1, st_index=0;
+    ADIO_Offset frd_size=0, new_frd_size, st_frd_size;
+    unsigned num, bufsize; 
+    int n_etypes_in_filetype;
+    ADIO_Offset n_filetypes, etype_in_filetype, st_n_filetypes, size_in_filetype;
     ADIO_Offset abs_off_in_filetype=0;
-    int filetype_size, etype_size, buftype_size, req_len, partial_read;
+    int filetype_size, etype_size, buftype_size, partial_read;
     MPI_Aint filetype_extent, buftype_extent; 
     int buf_count, buftype_is_contig, filetype_is_contig;
-    ADIO_Offset userbuf_off;
+    ADIO_Offset userbuf_off, req_len, sum;
     ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off;
     char *readbuf, *tmp_buf, *value;
-    int flag, st_frd_size, st_n_filetypes, readbuf_len;
-    int new_brd_size, new_frd_size, err_flag=0, info_flag, max_bufsize;
-
+    int err_flag=0, info_flag;
+    unsigned max_bufsize, readbuf_len;
     static char myname[] = "ADIOI_BGL_READSTRIDED";
 
     if (fd->hints->ds_read == ADIOI_HINT_DISABLE) {
@@ -207,12 +212,13 @@
     MPI_Type_extent(datatype, &buftype_extent);
     etype_size = fd->etype_size;
 
+    ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
     bufsize = buftype_size * count;
 
 /* get max_bufsize from the info object. */
 
     value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
-    MPI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, 
+    ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, 
                  &info_flag);
     max_bufsize = atoi(value);
     ADIOI_Free(value);
@@ -226,13 +232,13 @@
 	while (flat_buf->type != datatype) flat_buf = flat_buf->next;
 
         off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
-                 fd->disp + etype_size * offset;
+                 fd->disp + (ADIO_Offset)etype_size * offset;
 
 	start_off = off;
 	end_offset = off + bufsize - 1;
         readbuf_off = off;
         readbuf = (char *) ADIOI_Malloc(max_bufsize);
-        readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
+        readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
 
 /* if atomicity is true, lock (exclusive) the region to be accessed */
         if (fd->atomicity)
@@ -245,13 +251,16 @@
         if (err == -1) err_flag = 1;
 
         for (j=0; j<count; j++) 
-            for (i=0; i<flat_buf->count; i++) {
-                userbuf_off = j*buftype_extent + flat_buf->indices[i];
-		req_off = off;
-		req_len = flat_buf->blocklens[i];
-		ADIOI_BUFFERED_READ
-                off += flat_buf->blocklens[i];
-            }
+        {
+          int i;
+              for (i=0; i<flat_buf->count; i++) {
+                  userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i];
+      req_off = off;
+      req_len = flat_buf->blocklens[i];
+      ADIOI_BUFFERED_READ
+                  off += flat_buf->blocklens[i];
+              }
+        }
 
         if (fd->atomicity)
             ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
@@ -277,29 +286,36 @@
 	disp = fd->disp;
 
 	if (file_ptr_type == ADIO_INDIVIDUAL) {
-	    offset = fd->fp_ind; /* in bytes */
-	    n_filetypes = -1;
-	    flag = 0;
-	    while (!flag) {
-                n_filetypes++;
-		for (i=0; i<flat_file->count; i++) {
-		    if (disp + flat_file->indices[i] + 
-                        (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] 
-                            >= offset) {
-			st_index = i;
-			frd_size = (int) (disp + flat_file->indices[i] + 
-			        (ADIO_Offset) n_filetypes*filetype_extent
-			         + flat_file->blocklens[i] - offset);
-			flag = 1;
-			break;
-		    }
+	    /* Wei-keng reworked type processing to be a bit more efficient */
+            offset       = fd->fp_ind - disp;
+            n_filetypes  = (offset - flat_file->indices[0]) / filetype_extent;
+	    offset -= (ADIO_Offset)n_filetypes * filetype_extent;
+	    /* now offset is local to this extent */
+
+            /* find the block where offset is located, skip blocklens[i]==0 */
+            for (i=0; i<flat_file->count; i++) {
+                ADIO_Offset dist;
+                if (flat_file->blocklens[i] == 0) continue;
+                dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
+                /* frd_size is from offset to the end of block i */
+		if (dist == 0) {
+		    i++;
+		    offset   = flat_file->indices[i];
+		    frd_size = flat_file->blocklens[i];
+		    break;
+		}
+		if (dist > 0) {
+                    frd_size = dist;
+		    break;
 		}
 	    }
+            st_index = i;  /* starting index in flat_file->indices[] */
+            offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
 	}
 	else {
 	    n_etypes_in_filetype = filetype_size/etype_size;
-	    n_filetypes = (int) (offset / n_etypes_in_filetype);
-	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+	    n_filetypes = offset / n_etypes_in_filetype;
+	    etype_in_filetype = offset % n_etypes_in_filetype;
 	    size_in_filetype = etype_in_filetype * etype_size;
  
 	    sum = 0;
@@ -315,32 +331,63 @@
 	    }
 
 	    /* abs. offset in bytes in the file */
-	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + 
+		    abs_off_in_filetype;
 	}
 
         start_off = offset;
 
+	/* Wei-keng Liao: read request is within a single flat_file contig
+	 * block e.g. with subarray types that actually describe the whole
+	 * array */
+	if (buftype_is_contig && bufsize <= frd_size) {
+            ADIO_ReadContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
+                             offset, status, error_code);
+
+	    if (file_ptr_type == ADIO_INDIVIDUAL) {
+                /* update MPI-IO file pointer to point to the first byte that 
+		 * can be accessed in the fileview. */
+		fd->fp_ind = offset + bufsize;
+		if (bufsize == frd_size) {
+		    do {
+			st_index++;
+			if (st_index == flat_file->count) {
+			    st_index = 0;
+			    n_filetypes++;
+			}
+                    } while (flat_file->blocklens[st_index] == 0);
+		    fd->fp_ind = disp + flat_file->indices[st_index]
+                               + n_filetypes*filetype_extent;
+		}
+	    }
+	    fd->fp_sys_posn = -1;   /* set it to null. */ 
+#ifdef HAVE_STATUS_SET_BYTES
+	    MPIR_Status_set_bytes(status, datatype, bufsize);
+#endif 
+            return;
+	}
+
        /* Calculate end_offset, the last byte-offset that will be accessed.
          e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
 
 	st_frd_size = frd_size;
 	st_n_filetypes = n_filetypes;
-	i = 0;
+	i_offset = 0;
 	j = st_index;
 	off = offset;
 	frd_size = ADIOI_MIN(st_frd_size, bufsize);
-	while (i < bufsize) {
-	    i += frd_size;
+	while (i_offset < bufsize) {
+	    i_offset += frd_size;
 	    end_offset = off + frd_size - 1;
 
-	    if (j < (flat_file->count - 1)) j++;
-	    else {
-		j = 0;
-		n_filetypes++;
+	    j = (j+1) % flat_file->count;
+            n_filetypes += (j == 0) ? 1 : 0;
+            while (flat_file->blocklens[j]==0) {
+		j = (j+1) % flat_file->count;
+		n_filetypes += (j == 0) ? 1 : 0;
 	    }
-
-	    off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
-	    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+	    off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent;
+	    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
 	}
 
 /* if atomicity is true, lock (exclusive) the region to be accessed */
@@ -350,7 +397,7 @@
         /* initial read into readbuf */
 	readbuf_off = offset;
 	readbuf = (char *) ADIOI_Malloc(max_bufsize);
-	readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
+	readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
 
 	lseek(fd->fd_sys, offset, SEEK_SET);
         if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, offset, SEEK_SET, readbuf_len);
@@ -364,12 +411,12 @@
 /* contiguous in memory, noncontiguous in file. should be the most
    common case. */
 
-	    i = 0;
+	    i_offset = 0;
 	    j = st_index;
 	    off = offset;
 	    n_filetypes = st_n_filetypes;
 	    frd_size = ADIOI_MIN(st_frd_size, bufsize);
-	    while (i < bufsize) {
+	    while (i_offset < bufsize) {
                 if (frd_size) { 
                     /* TYPE_UB and TYPE_LB can result in 
                        frd_size = 0. save system call in such cases */ 
@@ -378,25 +425,26 @@
 
 		    req_off = off;
 		    req_len = frd_size;
-		    userbuf_off = i;
+		    userbuf_off = i_offset;
 		    ADIOI_BUFFERED_READ
 		}
-		i += frd_size;
+		i_offset += frd_size;
 
                 if (off + frd_size < disp + flat_file->indices[j] +
-                   flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
+                   flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
                        off += frd_size;
                 /* did not reach end of contiguous block in filetype.
                    no more I/O needed. off is incremented by frd_size. */
                 else {
-		    if (j < (flat_file->count - 1)) j++;
-		    else {
-			j = 0;
-			n_filetypes++;
+                    j = (j+1) % flat_file->count;
+                    n_filetypes += (j == 0) ? 1 : 0;
+                    while (flat_file->blocklens[j]==0) {
+                        j = (j+1) % flat_file->count;
+                        n_filetypes += (j == 0) ? 1 : 0;
 		    }
 		    off = disp + flat_file->indices[j] + 
-                                        (ADIO_Offset) n_filetypes*filetype_extent;
-		    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+                                        n_filetypes*(ADIO_Offset)filetype_extent;
+		    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
 		}
 	    }
 	}
@@ -408,7 +456,7 @@
 	    while (flat_buf->type != datatype) flat_buf = flat_buf->next;
 
 	    k = num = buf_count = 0;
-	    i = (int) (flat_buf->indices[0]);
+	    i_offset = flat_buf->indices[0];
 	    j = st_index;
 	    off = offset;
 	    n_filetypes = st_n_filetypes;
@@ -423,7 +471,7 @@
 
 		    req_off = off;
 		    req_len = size;
-		    userbuf_off = i;
+		    userbuf_off = i_offset;
 		    ADIOI_BUFFERED_READ
 		}
 
@@ -432,18 +480,19 @@
 
 		if (size == frd_size) {
 /* reached end of contiguous block in file */
-		    if (j < (flat_file->count - 1)) j++;
-		    else {
-			j = 0;
-			n_filetypes++;
+                    j = (j+1) % flat_file->count;
+                    n_filetypes += (j == 0) ? 1 : 0;
+                    while (flat_file->blocklens[j]==0) {
+                        j = (j+1) % flat_file->count;
+                        n_filetypes += (j == 0) ? 1 : 0;
 		    }
 
 		    off = disp + flat_file->indices[j] + 
-                                              (ADIO_Offset) n_filetypes*filetype_extent;
+                                              n_filetypes*(ADIO_Offset)filetype_extent;
 
 		    new_frd_size = flat_file->blocklens[j];
 		    if (size != brd_size) {
-			i += size;
+			i_offset += size;
 			new_brd_size -= size;
 		    }
 		}
@@ -453,7 +502,7 @@
 
 		    k = (k + 1)%flat_buf->count;
 		    buf_count++;
-		    i = (int) (buftype_extent*(buf_count/flat_buf->count) +
+		    i_offset = ((ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) +
 			flat_buf->indices[k]); 
 		    new_brd_size = flat_buf->blocklens[k];
 		    if (size != frd_size) {
@@ -461,6 +510,7 @@
 			new_frd_size -= size;
 		    }
 		}
+    ADIOI_Assert(((ADIO_Offset)num + size) == (unsigned)(num + size));
 		num += size;
 		frd_size = new_frd_size;
                 brd_size = new_brd_size;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_tuning.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_tuning.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_tuning.c	2010-11-16 09:16:31.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_tuning.c	2010-11-15 15:02:47.000000000 +0100
@@ -3,7 +3,13 @@
 /* ---------------------------------------------------------------- */
 /**
  * \file ad_bgl_tuning.c
- * \brief ???
+ * \brief defines ad_bgl performance tuning
+ */
+
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/* 
+ *   Copyright (C) 2008 University of Chicago. 
+ *   See COPYRIGHT notice in top-level directory.
  */
 
 /*---------------------------------------------------------------------
@@ -26,6 +32,40 @@
 double	bglmpio_prof_cr    [BGLMPIO_CIO_LAST];
 
 /* set internal variables for tuning environment variables */
+/** \page env_vars Environment Variables
+ * - BGLMPIO_COMM - Define how data is exchanged on collective
+ *   reads and writes.  Possible values:
+ *   - 0 - Use MPI_Alltoallv.
+ *   - 1 - Use MPI_Isend/MPI_Irecv.
+ *   - Default is 0.
+ *
+ * - BGLMPIO_TIMING - collect timing breakdown for MPI I/O collective calls.
+ *   Must also compile the library with BGL_PROFILE defined. Possible values:
+ *   - 0 - Do not collect/report timing.
+ *   - 1 - Collect/report timing.
+ *   - Default is 0.
+ *
+ * - BGLMPIO_TIMING2 - collect additional averages for MPI I/O collective calls.
+ *   Must also compile the library with BGL_PROFILE defined. Possible values:
+ *   - 0 - Do not collect/report averages.
+ *   - 1 - Collect/report averages.
+ *   - Default is 0.
+ *
+ * - BGLMPIO_TUNEGATHER - Tune how starting and ending offsets are communicated
+ *   for aggregator collective i/o.  Possible values:
+ *   - 0 - Use two MPI_Allgather's to collect starting and ending offsets.
+ *   - 1 - Use MPI_Allreduce(MPI_MAX) to collect starting and ending offsets.
+ *   - Default is 1.
+ *
+ * - BGLMPIO_TUNEBLOCKING - Tune how aggregate file domains are 
+ *   calculated (block size).  Possible values:
+ *   - 0 - Evenly calculate file domains across aggregators.  Also use 
+ *   MPI_Isend/MPI_Irecv to exchange domain information.
+ *   - 1 - Align file domains with the underlying file system's block size.  Also use 
+ *   MPI_Alltoallv to exchange domain information.
+ *   - Default is 1.
+ *
+*/
 void ad_bgl_get_env_vars() {
     char *x;
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_wrcoll.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_wrcoll.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_wrcoll.c	2010-11-16 09:16:31.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_wrcoll.c	2010-11-15 15:02:47.000000000 +0100
@@ -18,6 +18,9 @@
 #include "ad_bgl_pset.h"
 #include "ad_bgl_aggrs.h"
 
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
 #ifdef PROFILE
 #include "mpe.h"
 #endif
@@ -26,13 +29,13 @@
 static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
                          datatype, int nprocs, int myrank, ADIOI_Access
                          *others_req, ADIO_Offset *offset_list,
-                         int *len_list, int contig_access_count, ADIO_Offset
+                         ADIO_Offset *len_list, int contig_access_count, ADIO_Offset
                          min_st_offset, ADIO_Offset fd_size,
                          ADIO_Offset *fd_start, ADIO_Offset *fd_end,
                          int *buf_idx, int *error_code);
 static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
                          ADIOI_Flatlist_node *flat_buf, ADIO_Offset 
-                         *offset_list, int *len_list, int *send_size, 
+                         *offset_list, ADIO_Offset *len_list, int *send_size, 
                          int *recv_size, ADIO_Offset off, int size,
                          int *count, int *start_pos, int *partial_recv, 
                          int *sent_to_proc, int nprocs, 
@@ -49,7 +52,7 @@
 		char *write_buf,					/* 1 */
 		ADIOI_Flatlist_node *flat_buf, 
 		ADIO_Offset *offset_list, 
-		int *len_list, int *send_size, int *recv_size, 
+		ADIO_Offset *len_list, int *send_size, int *recv_size, 
 		ADIO_Offset off, int size,				/* 2 */
 		int *count, int *start_pos, int *partial_recv,
 		int *sent_to_proc, int nprocs, int myrank, 
@@ -65,7 +68,7 @@
 		int *error_code);
 static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
                            *flat_buf, char **send_buf, ADIO_Offset 
-                           *offset_list, int *len_list, int *send_size, 
+                           *offset_list, ADIO_Offset *len_list, int *send_size, 
                            MPI_Request *requests, int *sent_to_proc, 
                            int nprocs, int myrank, 
                            int contig_access_count, ADIO_Offset
@@ -76,7 +79,7 @@
                            MPI_Aint buftype_extent);
 static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, void *buf, ADIOI_Flatlist_node
                            *flat_buf, char **send_buf, ADIO_Offset 
-                           *offset_list, int *len_list, int *send_size, 
+                           *offset_list, ADIO_Offset *len_list, int *send_size, 
                            MPI_Request *requests, int *sent_to_proc, 
                            int nprocs, int myrank, 
                            int contig_access_count, ADIO_Offset
@@ -118,26 +121,27 @@
     ADIO_Offset *bgl_offsets0 = NULL, *bgl_offsets = NULL;
     int  ii;
 
-    int *buf_idx = NULL, *len_list = NULL;
-
-    double io_time = 0, all_time, max_all_time;
-    double tstep1, max_tstep1;
-    double tstep1_1, max_tstep1_1;
-    double tstep1_2, max_tstep1_2;
-    double tstep1_3, max_tstep1_3;
-    double tstep2, max_tstep2;
-    double tstep3, max_tstep3;
-    double tstep4, max_tstep4;
-    double sum_sz;
-
+    int *buf_idx = NULL;
+    ADIO_Offset *len_list = NULL;
 #if BGL_PROFILE 
     BGLMPIO_T_CIO_RESET( 0, w )
 #endif
-
+#if 0
+    /* From common code - not implemented for bgl.*/
+    int old_error, tmp_error;
+#endif
 #ifdef PROFILE
 	MPE_Log_event(13, 0, "start computation");
 #endif
 
+#if 0
+/*   From common code - not implemented for bgl. */
+     if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) { 
+	ADIOI_IOStridedColl (fd, buf, count, ADIOI_WRITE, datatype, 
+			file_ptr_type, offset, status, error_code);
+	return;
+    }
+#endif
     MPI_Comm_size(fd->comm, &nprocs);
     MPI_Comm_rank(fd->comm, &myrank);
 
@@ -207,7 +211,8 @@
 	/* are the accesses of different processes interleaved? */
 	for (i=1; i<nprocs; i++)
       if ((st_offsets[i] < end_offsets[i-1]) &&
-          (st_offsets[i] <= end_offsets[i])) interleave_count++;                   
+                (st_offsets[i] <= end_offsets[i]))
+                interleave_count++;
 	/* This is a rudimentary check for interleaving, but should suffice
 	   for the moment. */
     }
@@ -231,7 +236,7 @@
         if (buftype_is_contig && filetype_is_contig) {
 
             if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
-                off = fd->disp + (fd->etype_size) * offset;
+                off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset;
                 ADIO_WriteContig(fd, buf, count, datatype,
 				 ADIO_EXPLICIT_OFFSET,
 				 off, status, error_code);
@@ -260,7 +265,9 @@
     else
     ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs,
 			    nprocs_for_coll, &min_st_offset,
-			    &fd_start, &fd_end, &fd_size);   
+			    &fd_start, &fd_end,
+			    fd->hints->min_fdomain_size, &fd_size,
+			    fd->hints->striping_unit);   
 
 #if BGL_PROFILE 
     BGLMPIO_T_CIO_SET_GET( 0, w, 0, 1, 1, BGLMPIO_CIO_MYREQ, BGLMPIO_CIO_FD_PART )
@@ -329,9 +336,50 @@
 
     BGLMPIO_T_CIO_REPORT( 0, w, fd, myrank )
 #endif
-	
+#if 0
+    /* From common code - not implemented for bgl.
+     * 
+     * If this collective write is followed by an independent write,
+     * it's possible to have those subsequent writes on other processes
+     * race ahead and sneak in before the read-modify-write completes.
+     * We carry out a collective communication at the end here so no one
+     * can start independent i/o before collective I/O completes. 
+     *
+     * need to do some gymnastics with the error codes so that if something
+     * went wrong, all processes report error, but if a process has a more
+     * specific error code, we can still have that process report the
+     * additional information */
+
+    old_error = *error_code;
+    if (*error_code != MPI_SUCCESS) *error_code = MPI_ERR_IO;
+
+     /* optimization: if only one process performing i/o, we can perform
+     * a less-expensive Bcast  */
+#ifdef ADIOI_MPE_LOGGING
+    MPE_Log_event( ADIOI_MPE_postwrite_a, 0, NULL );
+#endif
+    if (fd->hints->cb_nodes == 1) 
+	    MPI_Bcast(error_code, 1, MPI_INT, 
+			    fd->hints->ranklist[0], fd->comm);
+    else {
+	    tmp_error = *error_code;
+	    MPI_Allreduce(&tmp_error, error_code, 1, MPI_INT, 
+			    MPI_MAX, fd->comm);
+    }
+#ifdef ADIOI_MPE_LOGGING
+    MPE_Log_event( ADIOI_MPE_postwrite_b, 0, NULL );
+#endif
+#ifdef AGGREGATION_PROFILE
+	MPE_Log_event (5012, 0, NULL);
+#endif
 
+    if ( (old_error != MPI_SUCCESS) && (old_error != MPI_ERR_IO) )
+	    *error_code = old_error;
+
+
+#endif
 /* free all memory allocated for collective I/O */
+    if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
 
     for (i=0; i<nprocs; i++) {
 	if (others_req[i].count) {
@@ -363,6 +411,9 @@
 #endif
 
     fd->fp_sys_posn = -1;   /* set it to null. */
+#ifdef AGGREGATION_PROFILE
+	MPE_Log_event (5013, 0, NULL);
+#endif
 }
 
 
@@ -371,12 +422,12 @@
  * code is created and returned in error_code.
  */
 static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
-				 datatype, int nprocs, int myrank,
+				 datatype, int nprocs, 
+				 int myrank,
 				 ADIOI_Access
 				 *others_req, ADIO_Offset *offset_list,
-				 int *len_list, int contig_access_count,
-				 ADIO_Offset
-				 min_st_offset, ADIO_Offset fd_size,
+				 ADIO_Offset *len_list, int contig_access_count,
+				 ADIO_Offset min_st_offset, ADIO_Offset fd_size,
 				 ADIO_Offset *fd_start, ADIO_Offset *fd_end,
 				 int *buf_idx, int *error_code)
 {
@@ -389,7 +440,9 @@
    array to a file, where each local array is 8Mbytes, requiring
    at least another 8Mbytes of temp space is unacceptable. */
 
-    int hole, i, j, m, size=0, ntimes, max_ntimes, buftype_is_contig;
+    /* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/
+    ADIO_Offset size=0;
+    int hole, i, j, m, ntimes, max_ntimes, buftype_is_contig;
     ADIO_Offset st_loc=-1, end_loc=-1, off, done, req_off;
     char *write_buf=NULL;
     int *curr_offlen_ptr, *count, *send_size, req_len, *recv_size;
@@ -410,7 +463,7 @@
    That gives the no. of communication phases as well. */
 
     value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
-    MPI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value, 
+    ADIOI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value, 
                  &info_flag);
     coll_bufsize = atoi(value);
     ADIOI_Free(value);
@@ -526,7 +579,7 @@
 #endif
 	for (i=0; i < nprocs; i++) count[i] = recv_size[i] = 0;
 
-	size = (int) (ADIOI_MIN(coll_bufsize, end_loc-st_loc+1-done)); 
+	size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); 
 
 	for (i=0; i < nprocs; i++) {
 	    if (others_req[i].count) {
@@ -550,12 +603,14 @@
 		    }
 		    if (req_off < off + size) {
 			count[i]++;
+      ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIR_Upint)(write_buf+req_off-off));
 			MPI_Address(write_buf+req_off-off, 
                                &(others_req[i].mem_ptrs[j]));
-			recv_size[i] += (int)(ADIOI_MIN(off + (ADIO_Offset)size - 
-						  req_off, req_len));
+      ADIOI_Assert((off + size - req_off) == (int)(off + size - req_off));
+			recv_size[i] += (int)(ADIOI_MIN(off + size - req_off, 
+                                      (unsigned)req_len));
 
-			if (off+size-req_off < req_len)
+			if (off+size-req_off < (unsigned)req_len)
 			{
 			    partial_recv[i] = (int) (off + size - req_off);
 
@@ -618,7 +673,8 @@
 	    if (count[i]) flag = 1;
 
 	if (flag) {
-	    ADIO_WriteContig(fd, write_buf, size, MPI_BYTE, ADIO_EXPLICIT_OFFSET, 
+      ADIOI_Assert(size == (int)size);
+	    ADIO_WriteContig(fd, write_buf, (int)size, MPI_BYTE, ADIO_EXPLICIT_OFFSET, 
                         off, &status, error_code);
 	    if (*error_code != MPI_SUCCESS) return;
 	}
@@ -678,7 +734,7 @@
  */
 static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
 				  ADIOI_Flatlist_node *flat_buf, ADIO_Offset 
-				  *offset_list, int *len_list, int *send_size, 
+				  *offset_list, ADIO_Offset *len_list, int *send_size, 
 				  int *recv_size, ADIO_Offset off, int size,
 				  int *count, int *start_pos,
 				  int *partial_recv,
@@ -758,19 +814,26 @@
         }
     ADIOI_Free(tmp_len);
 
-/* check if there are any holes */
+    /* check if there are any holes. If yes, must do read-modify-write.
+     * holes can be in three places.  'middle' is what you'd expect: the
+     * processes are operating on noncontigous data.  But holes can also show
+     * up at the beginning or end of the file domain (see John Bent ROMIO REQ
+     * #835). Missing these holes would result in us writing more data than
+     * recieved by everyone else. */
     *hole = 0;
-    /* See if there are holes before the first request or after the last request*/
-    if((srt_off[0] > off) || 
-       ((srt_off[sum-1] + srt_len[sum-1]) < (off + size)))
-    {
-       *hole = 1;
-    }
-    else /* See if there are holes between the requests, if there are more than one */
-    for (i=0; i<sum-1; i++)
-	if (srt_off[i]+srt_len[i] < srt_off[i+1]) {
-	    *hole = 1;
-	    break;
+    if (off != srt_off[0]) /* hole at the front */
+        *hole = 1;
+    else { /* coalesce the sorted offset-length pairs */
+        for (i=1; i<sum; i++) {
+            if (srt_off[i] <= srt_off[0] + srt_len[0]) {
+		int new_len = srt_off[i] + srt_len[i] - srt_off[0];
+		if (new_len > srt_len[0]) srt_len[0] = new_len;
+	    }
+            else
+                break;
+        }
+        if (i < sum || size != srt_len[0]) /* hole in middle or end */
+            *hole = 1;
 	}
 
     ADIOI_Free(srt_off);
@@ -821,6 +884,9 @@
 /* post sends. if buftype_is_contig, data can be directly sent from
    user buf at location given by buf_idx. else use send_buf. */
 
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5032, 0, NULL);
+#endif
     if (buftype_is_contig) {
 	j = 0;
 	for (i=0; i < nprocs; i++) 
@@ -895,6 +961,9 @@
         MPI_Waitall(nprocs_send+nprocs_recv, requests, statuses);
 #endif
 
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5033, 0, NULL);
+#endif
     ADIOI_Free(statuses);
     ADIOI_Free(requests);
     if (!buftype_is_contig && nprocs_send) {
@@ -918,7 +987,7 @@
                 n_buftypes++; \
             } \
             user_buf_idx = flat_buf->indices[flat_buf_idx] + \
-                              n_buftypes*buftype_extent; \
+                              (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
             flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
         } \
         buf_incr -= size_in_buf; \
@@ -930,6 +999,8 @@
 { \
     while (size) { \
         size_in_buf = ADIOI_MIN(size, flat_buf_sz); \
+  ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + user_buf_idx)); \
+  ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \
         memcpy(&(send_buf[p][send_buf_idx[p]]), \
                ((char *) buf) + user_buf_idx, size_in_buf); \
         send_buf_idx[p] += size_in_buf; \
@@ -942,7 +1013,7 @@
                 n_buftypes++; \
             } \
             user_buf_idx = flat_buf->indices[flat_buf_idx] + \
-                              n_buftypes*buftype_extent; \
+                              (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
             flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
         } \
         size -= size_in_buf; \
@@ -951,11 +1022,9 @@
     ADIOI_BUF_INCR \
 }
 
-
-
 static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
                            *flat_buf, char **send_buf, ADIO_Offset 
-                           *offset_list, int *len_list, int *send_size, 
+                           *offset_list, ADIO_Offset *len_list, int *send_size, 
                            MPI_Request *requests, int *sent_to_proc, 
                            int nprocs, int myrank, 
                            int contig_access_count, 
@@ -967,8 +1036,9 @@
 {
 /* this function is only called if buftype is not contig */
 
-    int i, p, flat_buf_idx, size;
-    int flat_buf_sz, buf_incr, size_in_buf, jj, n_buftypes;
+    int i, p, flat_buf_idx;
+    ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size;
+    int jj, n_buftypes;
     ADIO_Offset off, len, rem_len, user_buf_idx;
 
 /*  curr_to_proc[p] = amount of data sent to proc. p that has already
@@ -995,7 +1065,7 @@
 
     for (i=0; i<contig_access_count; i++) { 
 	off     = offset_list[i];
-	rem_len = (ADIO_Offset) len_list[i];
+	rem_len = len_list[i];
 
 	/*this request may span the file domains of more than one process*/
   while (rem_len != 0) {
@@ -1015,17 +1085,20 @@
 	    if (send_buf_idx[p] < send_size[p]) {
 		if (curr_to_proc[p]+len > done_to_proc[p]) {
 		    if (done_to_proc[p] > curr_to_proc[p]) {
-			size = (int)ADIOI_MIN(curr_to_proc[p] + len - 
+			size = ADIOI_MIN(curr_to_proc[p] + len - 
                                 done_to_proc[p], send_size[p]-send_buf_idx[p]);
 			buf_incr = done_to_proc[p] - curr_to_proc[p];
 			ADIOI_BUF_INCR
-		        buf_incr = (int)(curr_to_proc[p] + len - done_to_proc[p]);
+      ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p]));
+		        buf_incr = curr_to_proc[p] + len - done_to_proc[p];
+      ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size));
 			curr_to_proc[p] = done_to_proc[p] + size;
 		        ADIOI_BUF_COPY
 		    }
 		    else {
-			size = (int)ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
-			buf_incr = (int)len;
+			size = ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
+			buf_incr = len;
+      ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size));
 			curr_to_proc[p] += size;
 			ADIOI_BUF_COPY
 		    }
@@ -1036,13 +1109,14 @@
 		    }
 		}
 		else {
-		    curr_to_proc[p] += (int)len;
-		    buf_incr = (int)len;
+        ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len));
+		    curr_to_proc[p] += len;
+		    buf_incr = len;
 		    ADIOI_BUF_INCR
 		}
 	    }
 	    else {
-		buf_incr = (int)len;
+		buf_incr = len;
 		ADIOI_BUF_INCR
             }
 	    off     += len;
@@ -1181,7 +1255,7 @@
 		char *write_buf,					/* 1 */
 		ADIOI_Flatlist_node *flat_buf, 
 		ADIO_Offset *offset_list, 
-		int *len_list, int *send_size, int *recv_size, 
+		ADIO_Offset *len_list, int *send_size, int *recv_size, 
 		ADIO_Offset off, int size,				/* 2 */
 		int *count, int *start_pos, int *partial_recv,
 		int *sent_to_proc, int nprocs, int myrank, 
@@ -1196,11 +1270,10 @@
 		int iter, MPI_Aint buftype_extent, int *buf_idx,
 		int *error_code)
 {   
-    int i, j, k=0, tmp=0, nprocs_recv, nprocs_send, erri, *tmp_len, err;
+    int i, j, k=0, nprocs_recv, nprocs_send, *tmp_len, err;
     char **send_buf = NULL;
-    MPI_Request *requests, *send_req;
-    MPI_Datatype recv_type;
-    MPI_Status *statuses, status;
+    MPI_Request *send_req=NULL;
+    MPI_Status status;
     int rtail, stail;
     char *sbuf_ptr, *to_ptr;
     int  len;
@@ -1324,7 +1397,8 @@
 
             sbuf_ptr = all_recv_buf + rdispls[i];
             for (j=0; j<count[i]; j++) {
-                to_ptr = (char *)( others_req[i].mem_ptrs[ start_pos[i]+j ] );
+                ADIOI_ENSURE_AINT_FITS_IN_PTR(others_req[i].mem_ptrs[ start_pos[i]+j ]);
+                to_ptr = (char *) ADIOI_AINT_CAST_TO_VOID_PTR ( others_req[i].mem_ptrs[ start_pos[i]+j ] );
                 len    =           others_req[i].lens[     start_pos[i]+j ]  ;
                 memcpy( to_ptr, sbuf_ptr, len );
                 sbuf_ptr += len;
@@ -1349,7 +1423,7 @@
 
 static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, void *buf, ADIOI_Flatlist_node
                            *flat_buf, char **send_buf, ADIO_Offset 
-                           *offset_list, int *len_list, int *send_size, 
+                           *offset_list, ADIO_Offset *len_list, int *send_size, 
                            MPI_Request *requests, int *sent_to_proc, 
                            int nprocs, int myrank, 
                            int contig_access_count, 
@@ -1361,8 +1435,9 @@
 {
 /* this function is only called if buftype is not contig */
 
-    int i, p, flat_buf_idx, size;
-    int flat_buf_sz, buf_incr, size_in_buf, jj, n_buftypes;
+    int i, p, flat_buf_idx;
+    ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size;
+    int jj, n_buftypes;
     ADIO_Offset off, len, rem_len, user_buf_idx;
 
 /*  curr_to_proc[p] = amount of data sent to proc. p that has already
@@ -1389,7 +1464,7 @@
 
     for (i=0; i<contig_access_count; i++) { 
 	off     = offset_list[i];
-	rem_len = (ADIO_Offset) len_list[i];
+	rem_len = len_list[i];
 
 	/*this request may span the file domains of more than one process*/
   while (rem_len != 0) {
@@ -1409,17 +1484,20 @@
 	    if (send_buf_idx[p] < send_size[p]) {
 		if (curr_to_proc[p]+len > done_to_proc[p]) {
 		    if (done_to_proc[p] > curr_to_proc[p]) {
-			size = (int)ADIOI_MIN(curr_to_proc[p] + len - 
+			size = ADIOI_MIN(curr_to_proc[p] + len - 
                                 done_to_proc[p], send_size[p]-send_buf_idx[p]);
 			buf_incr = done_to_proc[p] - curr_to_proc[p];
 			ADIOI_BUF_INCR
-		        buf_incr = (int)(curr_to_proc[p] + len - done_to_proc[p]);
+      ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p]));
+		        buf_incr = curr_to_proc[p] + len - done_to_proc[p];
+      ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size));
 			curr_to_proc[p] = done_to_proc[p] + size;
 		        ADIOI_BUF_COPY
 		    }
 		    else {
-			size = (int)ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
-			buf_incr = (int)len;
+			size = ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
+			buf_incr = len;
+      ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size));
 			curr_to_proc[p] += size;
 			ADIOI_BUF_COPY
 		    }
@@ -1433,13 +1511,14 @@
 		    */
 		}
 		else {
+        ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len));
 		    curr_to_proc[p] += (int)len;
-		    buf_incr = (int)len;
+		    buf_incr = len;
 		    ADIOI_BUF_INCR
 		}
 	    }
 	    else {
-		buf_incr = (int)len;
+		buf_incr = len;
 		ADIOI_BUF_INCR
             }
 	    off     += len;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_write.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_write.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_write.c	2010-11-16 09:16:31.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_write.c	2010-11-15 15:02:47.000000000 +0100
@@ -17,13 +17,20 @@
 
 #include "ad_bgl_tuning.h"
 
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
+
 void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count, 
                      MPI_Datatype datatype, int file_ptr_type,
 		     ADIO_Offset offset, ADIO_Status *status, int *error_code)
 {
-    int err=-1, datatype_size, len;
+    int err=-1, datatype_size;
+    ADIO_Offset len;
     static char myname[] = "ADIOI_BGL_WRITECONTIG";
-
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5036, 0, NULL);
+#endif
 #if BGL_PROFILE
 		/* timing */
 		double io_time, io_time2;
@@ -35,7 +42,8 @@
 #endif
 			  
     MPI_Type_size(datatype, &datatype_size);
-    len = datatype_size * count;
+    len = (ADIO_Offset)datatype_size * (ADIO_Offset)count;
+    ADIOI_Assert(len == (unsigned int) len); /* write takes an unsigned int parm */
 
 #if BGL_PROFILE
 
@@ -46,7 +54,7 @@
         	if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2);
 	ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
         	if (bglmpio_timing2) io_time2 = MPI_Wtime();
-	err = write(fd->fd_sys, buf, len);
+	err = write(fd->fd_sys, buf, (unsigned int)len);
         	if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
 	ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
 	fd->fp_sys_posn = offset + err;
@@ -60,7 +68,7 @@
         	if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2);
 	ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
         	if (bglmpio_timing2) io_time2 = MPI_Wtime();
-	err = write(fd->fd_sys, buf, len);
+	err = write(fd->fd_sys, buf, (unsigned int)len);
         	if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
 	ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
 	fd->fp_ind += err;
@@ -73,7 +81,7 @@
 	if (fd->fp_sys_posn != offset)
 	    lseek(fd->fd_sys, offset, SEEK_SET);
 	ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
-	err = write(fd->fd_sys, buf, len);
+	err = write(fd->fd_sys, buf, (unsigned int)len);
 	ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
 	fd->fp_sys_posn = offset + err;
 	/* individual file pointer not updated */        
@@ -83,7 +91,7 @@
 	if (fd->fp_sys_posn != fd->fp_ind)
 	    lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
 	ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
-	err = write(fd->fd_sys, buf, len);
+	err = write(fd->fd_sys, buf, (unsigned int)len);
 	ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
 	fd->fp_ind += err;
 	fd->fp_sys_posn = fd->fp_ind;
@@ -110,11 +118,12 @@
 #endif
 
     *error_code = MPI_SUCCESS;
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5037, 0, NULL);
+#endif
 }
 
 
-
-
 #define ADIOI_BUFFERED_WRITE \
 { \
     if (req_off >= writebuf_off + writebuf_len) { \
@@ -123,7 +132,7 @@
         if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
         if (err == -1) err_flag = 1; \
 	writebuf_off = req_off; \
-        writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+        writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
 	if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
 	lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
 	err = read(fd->fd_sys, writebuf, writebuf_len); \
@@ -135,7 +144,8 @@
 	    return; \
         } \
     } \
-    write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+    write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+    ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\
     memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
     while (write_sz != req_len) { \
 	lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
@@ -145,7 +155,7 @@
         req_len -= write_sz; \
         userbuf_off += write_sz; \
         writebuf_off += writebuf_len; \
-        writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+        writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
 	if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
 	lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
 	err = read(fd->fd_sys, writebuf, writebuf_len); \
@@ -173,9 +183,10 @@
         if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
         if (err == -1) err_flag = 1; \
 	writebuf_off = req_off; \
-        writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+        writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
     } \
-    write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+    write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+    ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\
     memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
     while (write_sz != req_len) { \
 	lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
@@ -186,7 +197,7 @@
         req_len -= write_sz; \
         userbuf_off += write_sz; \
         writebuf_off += writebuf_len; \
-        writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+        writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
         write_sz = ADIOI_MIN(req_len, writebuf_len); \
         memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
     } \
@@ -201,19 +212,23 @@
 {
 /* offset is in units of etype relative to the filetype. */
 
+
+
     ADIOI_Flatlist_node *flat_buf, *flat_file;
-    int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0;
-    int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
-    int n_filetypes, etype_in_filetype;
+    ADIO_Offset i_offset, sum, size_in_filetype;
+    int i, j, k, err=-1, st_index=0;
+    int n_etypes_in_filetype;
+    ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes;
     ADIO_Offset abs_off_in_filetype=0;
-    int filetype_size, etype_size, buftype_size, req_len;
+    int filetype_size, etype_size, buftype_size;
     MPI_Aint filetype_extent, buftype_extent; 
     int buf_count, buftype_is_contig, filetype_is_contig;
     ADIO_Offset userbuf_off;
     ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
     char *writebuf, *value;
-    int flag, st_fwr_size, st_n_filetypes, writebuf_len, write_sz;
-    int new_bwr_size, new_fwr_size, err_flag=0, info_flag, max_bufsize;
+    unsigned bufsize, writebuf_len, max_bufsize, write_sz;
+    int err_flag=0, info_flag;
+    ADIO_Offset new_bwr_size, new_fwr_size, st_fwr_size, fwr_size=0, bwr_size, req_len;
     static char myname[] = "ADIOI_BGL_WRITESTRIDED";
 
     if (fd->hints->ds_write == ADIOI_HINT_DISABLE) {
@@ -247,12 +262,13 @@
     MPI_Type_extent(datatype, &buftype_extent);
     etype_size = fd->etype_size;
 
+    ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
     bufsize = buftype_size * count;
 
 /* get max_bufsize from the info object. */
 
     value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
-    MPI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, 
+    ADIOI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, 
                  &info_flag);
     max_bufsize = atoi(value);
     ADIOI_Free(value);
@@ -272,20 +288,23 @@
 	end_offset = off + bufsize - 1;
         writebuf_off = off;
         writebuf = (char *) ADIOI_Malloc(max_bufsize);
-        writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
+        writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
 
 /* if atomicity is true, lock the region to be accessed */
         if (fd->atomicity) 
             ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
 
         for (j=0; j<count; j++) 
+        {
+          int i;
             for (i=0; i<flat_buf->count; i++) {
-                userbuf_off = j*buftype_extent + flat_buf->indices[i];
+                userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i];
 		req_off = off;
 		req_len = flat_buf->blocklens[i];
 		ADIOI_BUFFERED_WRITE_WITHOUT_READ
                 off += flat_buf->blocklens[i];
             }
+        }
 
         /* write the buffer out finally */
 	lseek(fd->fd_sys, writebuf_off, SEEK_SET); 
@@ -317,29 +336,37 @@
 	disp = fd->disp;
 
 	if (file_ptr_type == ADIO_INDIVIDUAL) {
-	    offset = fd->fp_ind; /* in bytes */
-	    n_filetypes = -1;
-	    flag = 0;
-	    while (!flag) {
-                n_filetypes++;
-		for (i=0; i<flat_file->count; i++) {
-		    if (disp + flat_file->indices[i] + 
-                        (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] 
-                            >= offset) {
-			st_index = i;
-			fwr_size = (int) (disp + flat_file->indices[i] + 
-			        (ADIO_Offset) n_filetypes*filetype_extent
-			         + flat_file->blocklens[i] - offset);
-			flag = 1;
-			break;
-		    }
-		}
-	    }
+	/* Wei-keng reworked type processing to be a bit more efficient */
+            offset       = fd->fp_ind - disp;
+            n_filetypes  = (offset - flat_file->indices[0]) / filetype_extent;
+            offset      -= (ADIO_Offset)n_filetypes * filetype_extent;
+            /* now offset is local to this extent */
+
+            /* find the block where offset is located, skip blocklens[i]==0 */
+            for (i=0; i<flat_file->count; i++) {
+                ADIO_Offset dist;
+                if (flat_file->blocklens[i] == 0) continue;
+                dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
+                /* fwr_size is from offset to the end of block i */
+                if (dist == 0) {
+                    i++;
+                    offset   = flat_file->indices[i];
+                    fwr_size = flat_file->blocklens[i];
+                    break;
+                }
+                if (dist > 0) {
+                    fwr_size = dist;
+                    break;
+                }
+            }
+            st_index = i;  /* starting index in flat_file->indices[] */
+            offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
 	}
 	else {
+    int i;
 	    n_etypes_in_filetype = filetype_size/etype_size;
-	    n_filetypes = (int) (offset / n_etypes_in_filetype);
-	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+	    n_filetypes = offset / n_etypes_in_filetype;
+	    etype_in_filetype = offset % n_etypes_in_filetype;
 	    size_in_filetype = etype_in_filetype * etype_size;
  
 	    sum = 0;
@@ -355,32 +382,64 @@
 	    }
 
 	    /* abs. offset in bytes in the file */
-	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + 
+		    abs_off_in_filetype;
 	}
 
         start_off = offset;
+        /* Wei-keng Liao:write request is within single flat_file contig block*/
+	/* this could happen, for example, with subarray types that are
+	 * actually fairly contiguous */
+        if (buftype_is_contig && bufsize <= fwr_size) {
+            ADIO_WriteContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
+                             offset, status, error_code);
+
+	    if (file_ptr_type == ADIO_INDIVIDUAL) {
+                /* update MPI-IO file pointer to point to the first byte 
+		 * that can be accessed in the fileview. */
+                fd->fp_ind = offset + bufsize;
+                if (bufsize == fwr_size) {
+                    do {
+                        st_index++;
+                        if (st_index == flat_file->count) {
+                            st_index = 0;
+                            n_filetypes++;
+                        }
+                    } while (flat_file->blocklens[st_index] == 0);
+                    fd->fp_ind = disp + flat_file->indices[st_index]
+                               + (ADIO_Offset)n_filetypes*filetype_extent;
+                }
+            }
+	    fd->fp_sys_posn = -1;   /* set it to null. */ 
+#ifdef HAVE_STATUS_SET_BYTES
+	    MPIR_Status_set_bytes(status, datatype, bufsize);
+#endif 
+            return;
+        }
 
        /* Calculate end_offset, the last byte-offset that will be accessed.
          e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/
 
 	st_fwr_size = fwr_size;
 	st_n_filetypes = n_filetypes;
-	i = 0;
+	i_offset = 0;
 	j = st_index;
 	off = offset;
 	fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
-	while (i < bufsize) {
-	    i += fwr_size;
+	while (i_offset < bufsize) {
+	    i_offset += fwr_size;
 	    end_offset = off + fwr_size - 1;
 
-	    if (j < (flat_file->count - 1)) j++;
-	    else {
-		j = 0;
-		n_filetypes++;
-	    }
+            j = (j+1) % flat_file->count;
+            n_filetypes += (j == 0) ? 1 : 0;
+            while (flat_file->blocklens[j]==0) {
+                j = (j+1) % flat_file->count;
+                n_filetypes += (j == 0) ? 1 : 0;
+            }
 
-	    off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
-	    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+	    off = disp + flat_file->indices[j] + 
+		    n_filetypes*(ADIO_Offset)filetype_extent;
+	    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
 	}
 
 /* if atomicity is true, lock the region to be accessed */
@@ -390,7 +449,7 @@
         /* initial read for the read-modify-write */
         writebuf_off = offset;
         writebuf = (char *) ADIOI_Malloc(max_bufsize);
-        writebuf_len = (int)(ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
+        writebuf_len = (unsigned)(ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
 	if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
 	lseek(fd->fd_sys, writebuf_off, SEEK_SET); 
 	err = read(fd->fd_sys, writebuf, writebuf_len); 
@@ -408,39 +467,41 @@
 /* contiguous in memory, noncontiguous in file. should be the most
    common case. */
 
-	    i = 0;
+	    i_offset = 0;
 	    j = st_index;
 	    off = offset;
 	    n_filetypes = st_n_filetypes;
 	    fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
-	    while (i < bufsize) {
+	    while (i_offset < bufsize) {
                 if (fwr_size) { 
                     /* TYPE_UB and TYPE_LB can result in 
                        fwr_size = 0. save system call in such cases */ 
 		    /* lseek(fd->fd_sys, off, SEEK_SET);
-		    err = write(fd->fd_sys, ((char *) buf) + i, fwr_size);*/
+		    err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/
 
 		    req_off = off;
 		    req_len = fwr_size;
-		    userbuf_off = i;
+		    userbuf_off = i_offset;
 		    ADIOI_BUFFERED_WRITE
 		}
-		i += fwr_size;
+		i_offset += fwr_size;
 
                 if (off + fwr_size < disp + flat_file->indices[j] +
-                   flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
+                   flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
                        off += fwr_size;
                 /* did not reach end of contiguous block in filetype.
                    no more I/O needed. off is incremented by fwr_size. */
                 else {
-		    if (j < (flat_file->count - 1)) j++;
-		    else {
-			j = 0;
-			n_filetypes++;
-		    }
+                    j = (j+1) % flat_file->count;
+                    n_filetypes += (j == 0) ? 1 : 0;
+                    while (flat_file->blocklens[j]==0) {
+                        j = (j+1) % flat_file->count;
+                        n_filetypes += (j == 0) ? 1 : 0;
+                    }
 		    off = disp + flat_file->indices[j] + 
-                                        (ADIO_Offset) n_filetypes*filetype_extent;
-		    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+                                    n_filetypes*(ADIO_Offset)filetype_extent;
+		    fwr_size = ADIOI_MIN(flat_file->blocklens[j], 
+				    bufsize-i_offset);
 		}
 	    }
 	}
@@ -452,7 +513,7 @@
 	    while (flat_buf->type != datatype) flat_buf = flat_buf->next;
 
 	    k = num = buf_count = 0;
-	    i = (int) (flat_buf->indices[0]);
+	    i_offset = flat_buf->indices[0];
 	    j = st_index;
 	    off = offset;
 	    n_filetypes = st_n_filetypes;
@@ -463,11 +524,11 @@
 		size = ADIOI_MIN(fwr_size, bwr_size);
 		if (size) {
 		    /* lseek(fd->fd_sys, off, SEEK_SET);
-		    err = write(fd->fd_sys, ((char *) buf) + i, size); */
+		    err = write(fd->fd_sys, ((char *) buf) + i_offset, size); */
 
 		    req_off = off;
 		    req_len = size;
-		    userbuf_off = i;
+		    userbuf_off = i_offset;
 		    ADIOI_BUFFERED_WRITE
 		}
 
@@ -476,18 +537,19 @@
 
 		if (size == fwr_size) {
 /* reached end of contiguous block in file */
-		    if (j < (flat_file->count - 1)) j++;
-		    else {
-			j = 0;
-			n_filetypes++;
+ 		    j = (j+1) % flat_file->count;
+ 		    n_filetypes += (j == 0) ? 1 : 0;
+ 		    while (flat_file->blocklens[j]==0) {
+ 			j = (j+1) % flat_file->count;
+ 			n_filetypes += (j == 0) ? 1 : 0;
 		    }
 
 		    off = disp + flat_file->indices[j] + 
-                                  (ADIO_Offset) n_filetypes*filetype_extent;
+                                  n_filetypes*(ADIO_Offset)filetype_extent;
 
 		    new_fwr_size = flat_file->blocklens[j];
 		    if (size != bwr_size) {
-			i += size;
+			i_offset += size;
 			new_bwr_size -= size;
 		    }
 		}
@@ -497,8 +559,8 @@
 
 		    k = (k + 1)%flat_buf->count;
 		    buf_count++;
-		    i = (int) (buftype_extent*(buf_count/flat_buf->count) +
-			flat_buf->indices[k]); 
+		    i_offset = (ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) +
+			flat_buf->indices[k]; 
 		    new_bwr_size = flat_buf->blocklens[k];
 		    if (size != fwr_size) {
 			off += size;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/Makefile.am NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/Makefile.am
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl/Makefile.am	2010-11-16 09:16:31.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bgl/Makefile.am	2010-11-15 15:03:30.000000000 +0100
@@ -26,6 +26,7 @@
         ad_bgl.c \
         ad_bgl_close.c \
         ad_bgl_fcntl.c \
+        ad_bgl_flush.c \
         ad_bgl_getsh.c \
         ad_bgl.h \
         ad_bgl_hints.c \
Only in ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bgl: .state-cache
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.c	2010-11-16 09:16:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.c	2010-11-15 15:02:47.000000000 +0100
@@ -6,12 +6,14 @@
  */
 
 #include "../ad_bgl/ad_bgl.h"
+#include "ad_bglockless.h"
 
 /* adioi.h has the ADIOI_Fns_struct define */
 #include "adioi.h"
 
 struct ADIOI_Fns_struct ADIO_BGLOCKLESS_operations = {
     ADIOI_BGL_Open, /* Open */
+    ADIOI_GEN_OpenColl, /* Collective open */
     ADIOI_GEN_ReadContig, /* ReadContig */
     ADIOI_GEN_WriteContig, /* WriteContig */
     ADIOI_BGL_ReadStridedColl, /* ReadStridedColl */
@@ -35,7 +37,8 @@
     ADIOI_GEN_IOComplete, /* WriteComplete */
     ADIOI_GEN_IreadStrided, /* IreadStrided */
     ADIOI_GEN_IwriteStrided, /* IwriteStrided */
-    ADIOI_GEN_Flush, /* Flush */
+    ADIOI_BGL_Flush, /* Flush */
     ADIOI_GEN_Resize, /* Resize */
     ADIOI_GEN_Delete, /* Delete */
+    ADIOI_BGLOCKLESS_Feature  /* Features */
 };
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bglockless: ad_bglockless_features.c
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bglockless: ad_bglockless.h
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bglockless/Makefile.am NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bglockless/Makefile.am
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bglockless/Makefile.am	2010-11-16 09:16:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_bglockless/Makefile.am	2010-11-15 15:03:30.000000000 +0100
@@ -21,4 +21,6 @@
 
 noinst_LTLIBRARIES = libadio_bglockless.la
 libadio_bglockless_la_SOURCES = \
-        ad_bglockless.c
+        ad_bglockless.c \
+        ad_bglockless.h \
+        ad_bglockless_features.c
Only in ompi-trunk/ompi/mca/io/romio/romio/adio/ad_bglockless: .state-cache
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp.c	2010-11-16 09:16:24.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp.c	2010-11-15 15:02:47.000000000 +0100
@@ -33,4 +33,5 @@
     ADIOI_GRIDFTP_Flush, /* Flush */
     ADIOI_GRIDFTP_Resize, /* Resize */
     ADIOI_GRIDFTP_Delete, /* Delete */
+    ADIOI_GRIDFTP_Feature, /* Features */
 };
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_gridftp: ad_gridftp_features.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_hints.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_hints.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_hints.c	2010-11-16 09:16:24.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_hints.c	2010-11-15 15:02:47.000000000 +0100
@@ -56,8 +56,8 @@
 			    MPI_Info_get_valuelen(users_info,key,&valuelen,&flag);
 			    if (flag)
 				{
-				    MPI_Info_get(users_info,key,valuelen,value,&flag);
-				    if (flag) MPI_Info_set(fd->info,key,value);
+				    ADIOI_Info_get(users_info,key,valuelen,value,&flag);
+				    if (flag) ADIOI_Info_set(fd->info,key,value);
 				}
 			}
 		}
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_open.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_open.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_open.c	2010-11-16 09:16:24.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_open.c	2010-11-15 15:03:31.000000000 +0100
@@ -136,7 +136,7 @@
        oattr[] (eg. parallelism, striping, etc.) goes here */
     if ( fd->info!=MPI_INFO_NULL )
 	{
-	    MPI_Info_get(fd->info,"ftp_control_mode",MPI_MAX_INFO_VAL,hintval,&keyfound);
+	    ADIOI_Info_get(fd->info,"ftp_control_mode",MPI_MAX_INFO_VAL,hintval,&keyfound);
 	    if ( keyfound )
 		{
 		    if ( ( !strcmp(hintval,"extended") || !strcmp(hintval,"extended_block") ) && 
@@ -153,7 +153,7 @@
 			globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result);
 		}
 
-	    MPI_Info_get(fd->info,"parallelism",MPI_MAX_INFO_VAL,hintval,&keyfound);
+	    ADIOI_Info_get(fd->info,"parallelism",MPI_MAX_INFO_VAL,hintval,&keyfound);
 	    if ( keyfound )
 		{
 		    int nftpthreads;
@@ -170,14 +170,14 @@
 			}
 		}
 
-	    MPI_Info_get(fd->info,"striped_ftp",MPI_MAX_INFO_VAL,hintval,&keyfound);
+	    ADIOI_Info_get(fd->info,"striped_ftp",MPI_MAX_INFO_VAL,hintval,&keyfound);
 	    if ( keyfound )
 		{
 		    /* if set to "true" or "enable", set up round-robin block layout */
 		    if ( !strncmp("true",hintval,4) || !strncmp("TRUE",hintval,4) ||
 			 !strncmp("enable",hintval,4) || !strncmp("ENABLE",hintval,4) )
 			{
-			    MPI_Info_get(fd->info,"striping_factor",MPI_MAX_INFO_VAL,hintval,&keyfound);
+			    ADIOI_Info_get(fd->info,"striping_factor",MPI_MAX_INFO_VAL,hintval,&keyfound);
 			    if ( keyfound )
 				{
 				    int striping_factor;
@@ -197,7 +197,7 @@
 			}
 		}
 
-	    MPI_Info_get(fd->info,"tcp_buffer",MPI_MAX_INFO_VAL,hintval,&keyfound);
+	    ADIOI_Info_get(fd->info,"tcp_buffer",MPI_MAX_INFO_VAL,hintval,&keyfound);
 	    if ( keyfound )
 		{
 		    /* set tcp buffer size */
@@ -214,7 +214,7 @@
 			}
 		}
 
-	    MPI_Info_get(fd->info,"transfer_type",MPI_MAX_INFO_VAL,hintval,&keyfound);
+	    ADIOI_Info_get(fd->info,"transfer_type",MPI_MAX_INFO_VAL,hintval,&keyfound);
 	    if ( keyfound )
 		{
 		    globus_ftp_control_type_t filetype;
@@ -340,84 +340,4 @@
 		}
 	}
     num_gridftp_handles++;
-    
-#if 0
-    /* Debugging info for testing PASV mode behind firewalls */
-    if ( myrank==0 )
-	{
-	    globus_bool_t striped;
-	    globus_ftp_control_mode_t mode;
-	    globus_ftp_control_type_t filetype;
-	    globus_ftp_control_parallelism_t parallelism;
-
-	    FPRINTF(stderr,"--gridftp details for %s--\n",
-		    fd->filename);
-
-	    /* 
-	    FPRINTF(stderr,"Connection caching: ");
-	    globus_ftp_client_handleattr_get_cache_all(&hattr,&cached);
-	    if ( cached==GLOBUS_TRUE )
-		FPRINTF(stderr,"Y\n");
-	    else
-		FPRINTF(stderr,"N\n");
-	    */
-
-	    FPRINTF(stderr,"Control mode:  ");
-	    globus_ftp_client_operationattr_get_mode(&(oattr[fd->fd_sys]),&mode);
-	    if ( mode==GLOBUS_FTP_CONTROL_MODE_BLOCK )
-		FPRINTF(stderr,"block\n");
-	    else if ( mode==GLOBUS_FTP_CONTROL_MODE_COMPRESSED )
-		FPRINTF(stderr,"compressed\n");
-	    else if ( mode==GLOBUS_FTP_CONTROL_MODE_EXTENDED_BLOCK )
-		FPRINTF(stderr,"extended block\n");
-	    else if ( mode==GLOBUS_FTP_CONTROL_MODE_STREAM )
-		FPRINTF(stderr,"stream\n");
-	    else
-		FPRINTF(stderr,"unknown\n");
-
-	    FPRINTF(stderr,"File type:  ");
-	    globus_ftp_client_operationattr_get_type(&(oattr[fd->fd_sys]),&filetype);
-	    if ( filetype==GLOBUS_FTP_CONTROL_TYPE_ASCII )
-		FPRINTF(stderr,"ASCII\n");
-	    else if ( filetype==GLOBUS_FTP_CONTROL_TYPE_IMAGE )
-		FPRINTF(stderr,"binary\n");
-	    else if ( filetype==GLOBUS_FTP_CONTROL_TYPE_EBCDIC )
-		FPRINTF(stderr,"EBCDIC\n");
-	    else
-		FPRINTF(stderr,"unknown\n");
-
-	    FPRINTF(stderr,"Parallelism:  ");
-	    globus_ftp_client_operationattr_get_parallelism(&(oattr[fd->fd_sys]),&parallelism);
-	    if ( parallelism.mode==GLOBUS_FTP_CONTROL_PARALLELISM_NONE )
-		FPRINTF(stderr,"none\n");
-	    else if ( parallelism.mode==GLOBUS_FTP_CONTROL_PARALLELISM_FIXED )
-		FPRINTF(stderr,"fixed with %d streams\n",parallelism.fixed.size);
-	    else
-		FPRINTF(stderr,"unknown\n");
-
-	    FPRINTF(stderr,"Striping:  ");
-	    globus_ftp_client_operationattr_get_striped(&(oattr[fd->fd_sys]),&striped);
-	    if ( striped==GLOBUS_TRUE )
-		{
-		    globus_ftp_control_layout_t layout;
-
-		    FPRINTF(stderr,"Y\nLayout:  ");
-		    globus_ftp_client_operationattr_get_layout(&(oattr[fd->fd_sys]),
-									       &layout);
-		    if ( layout.mode==GLOBUS_FTP_CONTROL_STRIPING_NONE )
-			FPRINTF(stderr,"none\n");
-		    else if ( layout.mode==GLOBUS_FTP_CONTROL_STRIPING_PARTITIONED )
-			FPRINTF(stderr,"partitioned, size=%d\n",layout.partitioned.size);
-		    else if ( layout.mode==GLOBUS_FTP_CONTROL_STRIPING_BLOCKED_ROUND_ROBIN )
-			FPRINTF(stderr,"round-robin, block size=%d\n",layout.round_robin.block_size);
-		    else
-			FPRINTF(stderr,"unknown\n");
-		}
-	    else
-		FPRINTF(stderr,"N\n");
-
-	    fflush(stderr);
-	}
-#endif
-
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_read.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_read.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_read.c	2010-11-16 09:16:24.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_read.c	2010-11-15 15:03:31.000000000 +0100
@@ -50,10 +50,6 @@
 	readcontig_data_cb: buffer 0x404c0008 length 65536 offset 32112640 eof 0
 	readcontig_data_cb: buffer 0x404d0008 length 65536 offset 32178176 eof 0
      */
-#if 0
-    FPRINTF(stderr, "%s: buffer %p length %d offset %Ld eof %d\n",
-      __func__, buffer, length, offset, eof);
-#endif
     if ( !eof )
 	    globus_ftp_client_register_read(handle,
 					    buffer+length,
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_write.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_write.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_write.c	2010-11-16 09:16:24.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_write.c	2010-11-15 15:03:31.000000000 +0100
@@ -364,10 +364,6 @@
     {
 	fd->fp_ind += extent;
 	fd->fp_sys_posn = fd->fp_ind;
-#if 0
-	FPRINTF(stdout, "[%d/%d]    new file position is %Ld\n", myrank, 
-		nprocs, (long long) fd->fp_ind);
-#endif
     }
     else {
 	fd->fp_sys_posn = offset + extent;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_gridftp/Makefile.am NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_gridftp/Makefile.am
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_gridftp/Makefile.am	2010-11-16 09:16:24.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_gridftp/Makefile.am	2010-11-15 15:03:30.000000000 +0100
@@ -25,6 +25,7 @@
         ad_gridftp_close.c \
         ad_gridftp_delete.c \
         ad_gridftp_fcntl.c \
+        ad_gridftp_features.c \
         ad_gridftp_flush.c \
         ad_gridftp_hints.c \
         ad_gridftp_open.c \
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_fcntl.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_fcntl.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_fcntl.c	2010-11-16 09:16:07.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_fcntl.c	2010-11-15 15:02:47.000000000 +0100
@@ -8,6 +8,9 @@
 #include "ad_hfs.h"
 #include "adio_extern.h"
 
+#ifndef HAVE_LSEEK64
+#define lseek64 lseek
+#endif
 void ADIOI_HFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
 {
     int  i, ntimes, err;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_open.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_open.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_open.c	2010-11-16 09:16:07.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_open.c	2010-11-15 15:02:47.000000000 +0100
@@ -7,6 +7,10 @@
 
 #include "ad_hfs.h"
 
+#ifndef HAVE_LSEEK64
+#define lseek64 lseek
+#endif
+
 void ADIOI_HFS_Open(ADIO_File fd, int *error_code)
 {
     int perm, old_mask, amode;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_read.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_read.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_read.c	2010-11-16 09:16:07.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_read.c	2010-11-15 15:02:47.000000000 +0100
@@ -7,6 +7,10 @@
 
 #include "ad_hfs.h"
 
+#ifndef HAVE_LSEEK64
+#define lseek64 lseek
+#endif
+
 void ADIOI_HFS_ReadContig(ADIO_File fd, void *buf, int count, 
                      MPI_Datatype datatype, int file_ptr_type,
 		     ADIO_Offset offset, ADIO_Status *status, int *error_code)
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_write.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_write.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_write.c	2010-11-16 09:16:07.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_write.c	2010-11-15 15:02:47.000000000 +0100
@@ -7,6 +7,10 @@
 
 #include "ad_hfs.h"
 
+#ifndef HAVE_LSEEK64
+#define lseek64 lseek
+#endif
+
 void ADIOI_HFS_WriteContig(ADIO_File fd, void *buf, int count, 
                      MPI_Datatype datatype, int file_ptr_type,
 		     ADIO_Offset offset, ADIO_Status *status, int *error_code)
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre: ad_lustre_aggregate.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.c	2010-11-16 09:16:04.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.c	2010-11-15 15:02:47.000000000 +0100
@@ -1,24 +1,27 @@
 /* -*- Mode: C; c-basic-offset:4 ; -*- */
-/* 
- *   Copyright (C) 2001 University of Chicago. 
+/*
+ *   Copyright (C) 2001 University of Chicago.
  *   See COPYRIGHT notice in top-level directory.
  *
  *   Copyright (C) 2007 Oak Ridge National Laboratory
+ *
+ *   Copyright (C) 2008 Sun Microsystems, Lustre group
  */
 
 #include "ad_lustre.h"
 
 struct ADIOI_Fns_struct ADIO_LUSTRE_operations = {
     ADIOI_LUSTRE_Open, /* Open */
+    ADIOI_GEN_OpenColl, /* OpenColl */
     ADIOI_LUSTRE_ReadContig, /* ReadContig */
     ADIOI_LUSTRE_WriteContig, /* WriteContig */
     ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
-    ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */
+    ADIOI_LUSTRE_WriteStridedColl, /* WriteStridedColl */
     ADIOI_GEN_SeekIndividual, /* SeekIndividual */
     ADIOI_GEN_Fcntl, /* Fcntl */
     ADIOI_LUSTRE_SetInfo, /* SetInfo */
     ADIOI_GEN_ReadStrided, /* ReadStrided */
-    ADIOI_GEN_WriteStrided, /* WriteStrided */
+    ADIOI_LUSTRE_WriteStrided, /* WriteStrided */
     ADIOI_GEN_Close, /* Close */
 #if defined(ROMIO_HAVE_WORKING_AIO) && !defined(CRAY_XT_LUSTRE)
     ADIOI_GEN_IreadContig, /* IreadContig */
@@ -36,4 +39,5 @@
     ADIOI_GEN_Flush, /* Flush */
     ADIOI_GEN_Resize, /* Resize */
     ADIOI_GEN_Delete, /* Delete */
+    ADIOI_GEN_Feature, /* Features */
 };
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_fcntl.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_fcntl.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_fcntl.c	2010-11-16 09:16:04.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_fcntl.c	2010-11-15 15:02:47.000000000 +0100
@@ -25,7 +25,7 @@
 	if (fd->fp_sys_posn != -1) 
 	     lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
 	if (fcntl_struct->fsize == -1) {
-	    *error_code = MPIR_Err_create_code(MPI_SUCCESS, 
+	    *error_code = MPIO_Err_create_code(MPI_SUCCESS, 
 		    MPIR_ERR_RECOVERABLE, myname, __LINE__, 
 		    MPI_ERR_IO, "**io", "**io %s", strerror(errno));
 	}
@@ -56,7 +56,7 @@
 	    ADIO_ReadContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, done,
 			    &status, error_code);
 	    if (*error_code != MPI_SUCCESS) {
-		*error_code = MPIR_Err_create_code(MPI_SUCCESS, 
+		*error_code = MPIO_Err_create_code(MPI_SUCCESS, 
 			MPIR_ERR_RECOVERABLE, myname, __LINE__, 
 			MPI_ERR_IO, "**io", "**io %s", strerror(errno));
                 return;  
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.h	2010-11-16 09:16:04.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.h	2010-11-15 15:02:47.000000000 +0100
@@ -1,9 +1,11 @@
 /* -*- Mode: C; c-basic-offset:4 ; -*- */
-/* 
- *   Copyright (C) 1997 University of Chicago. 
+/*
+ *   Copyright (C) 1997 University of Chicago.
  *   See COPYRIGHT notice in top-level directory.
  *
  *   Copyright (C) 2007 Oak Ridge National Laboratory
+ *
+ *   Copyright (C) 2008 Sun Microsystems, Lustre group
  */
 
 #ifndef AD_UNIX_INCLUDE
@@ -17,6 +19,7 @@
 
 #ifdef __linux__
 #  include <sys/ioctl.h>                            /* necessary for: */
+#  include <time.h>
 #  define __USE_GNU                                 /* O_DIRECT and */
 #  include <fcntl.h>                                /* IO operations */
 #  undef __USE_GNU
@@ -24,7 +27,7 @@
 
 /*#include <fcntl.h>*/
 #include <sys/ioctl.h>
-#include "lustre/lustre_user.h"
+#include <lustre/lustre_user.h>
 #include "adio.h"
 /*#include "adioi.h"*/
 
@@ -41,24 +44,48 @@
 
 void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code);
 void ADIOI_LUSTRE_Close(ADIO_File fd, int *error_code);
-void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count, 
-                      MPI_Datatype datatype, int file_ptr_type,
-                     ADIO_Offset offset, ADIO_Status *status, int
-		     *error_code);
-void ADIOI_LUSTRE_WriteContig(ADIO_File fd, void *buf, int count, 
-                      MPI_Datatype datatype, int file_ptr_type,
-                      ADIO_Offset offset, ADIO_Status *status, int
-		      *error_code);   
+void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count,
+                             MPI_Datatype datatype, int file_ptr_type,
+                             ADIO_Offset offset, ADIO_Status *status,
+                             int *error_code);
+void ADIOI_LUSTRE_WriteContig(ADIO_File fd, void *buf, int count,
+                              MPI_Datatype datatype, int file_ptr_type,
+                              ADIO_Offset offset, ADIO_Status *status,
+                              int *error_code);
+void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, void *buf, int count,
+			       MPI_Datatype datatype, int file_ptr_type,
+			       ADIO_Offset offset, ADIO_Status *status,
+			       int *error_code);
 void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, void *buf, int count,
-		       MPI_Datatype datatype, int file_ptr_type,
-		       ADIO_Offset offset, ADIO_Status *status, int
-		       *error_code);
+		                   MPI_Datatype datatype, int file_ptr_type,
+		                   ADIO_Offset offset, ADIO_Status *status,
+                                   int *error_code);
 void ADIOI_LUSTRE_ReadStridedColl(ADIO_File fd, void *buf, int count,
-		       MPI_Datatype datatype, int file_ptr_type,
-		       ADIO_Offset offset, ADIO_Status *status, int
-		       *error_code);
+		                  MPI_Datatype datatype, int file_ptr_type,
+		                  ADIO_Offset offset, ADIO_Status *status,
+                                  int *error_code);
+void ADIOI_LUSTRE_ReadStrided(ADIO_File fd, void *buf, int count,
+			      MPI_Datatype datatype, int file_ptr_type,
+			      ADIO_Offset offset, ADIO_Status *status,
+                              int *error_code);
 void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct,
 	               int *error_code);
 void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
 
+/* the lustre utilities: */
+int ADIOI_LUSTRE_Docollect(ADIO_File fd, int contig_access_count,
+			   ADIO_Offset *len_list, int nprocs);
+
+void ADIOI_LUSTRE_Get_striping_info(ADIO_File fd, int **striping_info_ptr,
+				    int mode);
+void ADIOI_LUSTRE_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list,
+			      ADIO_Offset *len_list, int contig_access_count,
+			      int *striping_info, int nprocs,
+                              int *count_my_req_procs_ptr,
+			      int **count_my_req_per_proc_ptr,
+			      ADIOI_Access **my_req_ptr,
+			      int ***buf_idx_ptr);
+
+int ADIOI_LUSTRE_Calc_aggregator(ADIO_File fd, ADIO_Offset off,
+                                 ADIO_Offset *len, int *striping_info);
 #endif /* End of AD_UNIX_INCLUDE */
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_hints.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_hints.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_hints.c	2010-11-16 09:16:04.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_hints.c	2010-11-15 15:02:47.000000000 +0100
@@ -1,9 +1,11 @@
 /* -*- Mode: C; c-basic-offset:4 ; -*- */
-/* 
- *   Copyright (C) 1997 University of Chicago. 
+/*
+ *   Copyright (C) 1997 University of Chicago.
  *   See COPYRIGHT notice in top-level directory.
  *
  *   Copyright (C) 2007 Oak Ridge National Laboratory
+ *
+ *   Copyright (C) 2008 Sun Microsystems, Lustre group
  */
 
 #include "ad_lustre.h"
@@ -11,70 +13,81 @@
 
 void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
 {
-    char *value, *value_in_fd;
-    int flag, tmp_val[3], str_factor=-1, str_unit=0, start_iodev=-1;
+    char *value;
+    int flag, stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1;
     struct lov_user_md lum = { 0 };
     int err, myrank, fd_sys, perm, amode, old_mask;
+    int int_val, tmp_val;
+    static char myname[] = "ADIOI_LUSTRE_SETINFO";
 
     value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
     if ( (fd->info) == MPI_INFO_NULL) {
-	/* This must be part of the open call. can set striping parameters 
-           if necessary. */ 
+	/* This must be part of the open call. can set striping parameters
+           if necessary. */
 	MPI_Info_create(&(fd->info));
 
-	MPI_Info_set(fd->info, "direct_read", "false");
-	MPI_Info_set(fd->info, "direct_write", "false");
+	ADIOI_Info_set(fd->info, "direct_read", "false");
+	ADIOI_Info_set(fd->info, "direct_write", "false");
 	fd->direct_read = fd->direct_write = 0;
-	
-	/* has user specified striping or server buffering parameters 
+        /* initialize lustre hints */
+	ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", "1");
+        fd->hints->fs_hints.lustre.co_ratio = 1;
+	ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", "0");
+        fd->hints->fs_hints.lustre.coll_threshold = 0;
+	ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "enable");
+        fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_ENABLE;
+
+	/* has user specified striping or server buffering parameters
            and do they have the same value on all processes? */
 	if (users_info != MPI_INFO_NULL) {
-	    MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, 
+            /* striping information */
+	    ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
 			 value, &flag);
-	    if (flag) 
+	    if (flag)
 		str_unit=atoi(value);
 
-	    MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, 
+	    ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
 			 value, &flag);
-	    if (flag) 
+	    if (flag)
 		str_factor=atoi(value);
 
-	    MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, 
-			 value, &flag);
-	    if (flag) 
+	    ADIOI_Info_get(users_info, "romio_lustre_start_iodevice",
+                         MPI_MAX_INFO_VAL, value, &flag);
+	    if (flag)
 		start_iodev=atoi(value);
 
-	    MPI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL, 
-			     value, &flag);
+            /* direct read and write */
+	    ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
+			 value, &flag);
 	    if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
-		MPI_Info_set(fd->info, "direct_read", "true");
+		ADIOI_Info_set(fd->info, "direct_read", "true");
 		fd->direct_read = 1;
 	    }
-
-	    MPI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL, 
+	    ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL,
 			     value, &flag);
 	    if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
-		MPI_Info_set(fd->info, "direct_write", "true");
+		ADIOI_Info_set(fd->info, "direct_write", "true");
 		fd->direct_write = 1;
 	    }
 	}
 
+        /* set striping information with ioctl */
 	MPI_Comm_rank(fd->comm, &myrank);
 	if (myrank == 0) {
-	    tmp_val[0] = str_factor;
-	    tmp_val[1] = str_unit;
-	    tmp_val[2] = start_iodev;
+	    stripe_val[0] = str_factor;
+	    stripe_val[1] = str_unit;
+	    stripe_val[2] = start_iodev;
 	}
-	MPI_Bcast(tmp_val, 3, MPI_INT, 0, fd->comm);
+	MPI_Bcast(stripe_val, 3, MPI_INT, 0, fd->comm);
 
-	if (tmp_val[0] != str_factor 
-		|| tmp_val[1] != str_unit 
-		|| tmp_val[2] != start_iodev) {
+	if (stripe_val[0] != str_factor
+		|| stripe_val[1] != str_unit
+		|| stripe_val[2] != start_iodev) {
 	    FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys"
 		    "-striping_factor:striping_unit:start_iodevice "
 		    "need to be identical across all processes\n");
 	    MPI_Abort(MPI_COMM_WORLD, 1);
-       	} else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) {
+	} else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) {
 	     /* if user has specified striping info, process 0 tries to set it */
 	    if (!myrank) {
 		if (fd->perm == ADIO_PERM_NULL) {
@@ -100,9 +113,9 @@
 		amode = amode | O_LOV_DELAY_CREATE | O_CREAT;
 
 		fd_sys = open(fd->filename, amode, perm);
-		if (fd_sys == -1) { 
-		    if (errno != EEXIST) 
-			fprintf(stderr, 
+		if (fd_sys == -1) {
+		    if (errno != EEXIST)
+			fprintf(stderr,
 				"Failure to open file %s %d %d\n",strerror(errno), amode, perm);
 		} else {
 		    lum.lmm_magic = LOV_USER_MAGIC;
@@ -112,25 +125,73 @@
 		    lum.lmm_stripe_offset = start_iodev;
 
 		    err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum);
-		    if (err == -1 && errno != EEXIST) { 
+		    if (err == -1 && errno != EEXIST) {
 			fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
 		    }
 		    close(fd_sys);
 	       }
 	    } /* End of striping parameters validation */
 	}
-	
 	MPI_Barrier(fd->comm);
-	/* set the values for collective I/O and data sieving parameters */
-	ADIOI_GEN_SetInfo(fd, users_info, error_code);
-    } else {
-	/* The file has been opened previously and fd->fd_sys is a valid
-           file descriptor. cannot set striping parameters now. */
-	
-	/* set the values for collective I/O and data sieving parameters */
-	ADIOI_GEN_SetInfo(fd, users_info, error_code);
     }
- 
+    /* get other hint */
+    if (users_info != MPI_INFO_NULL) {
+        /* CO: IO Clients/OST,
+         * to keep the load balancing between clients and OSTs */
+        ADIOI_Info_get(users_info, "romio_lustre_co_ratio", MPI_MAX_INFO_VAL, value,
+                     &flag);
+	if (flag && (int_val = atoi(value)) > 0) {
+            tmp_val = int_val;
+	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+	    if (tmp_val != int_val) {
+                MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+                                                   "romio_lustre_co_ratio",
+                                                   error_code);
+                ADIOI_Free(value);
+		return;
+	    }
+	    ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", value);
+            fd->hints->fs_hints.lustre.co_ratio = atoi(value);
+	}
+        /* coll_threshold:
+         * if the req size is bigger than this, collective IO may not be performed.
+         */
+	ADIOI_Info_get(users_info, "romio_lustre_coll_threshold", MPI_MAX_INFO_VAL, value,
+                     &flag);
+	if (flag && (int_val = atoi(value)) > 0) {
+            tmp_val = int_val;
+	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+	    if (tmp_val != int_val) {
+	        MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+		                                   "romio_lustre_coll_threshold",
+	                                           error_code);
+                ADIOI_Free(value);
+	        return;
+	    }
+	    ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", value);
+            fd->hints->fs_hints.lustre.coll_threshold = atoi(value);
+        }
+        /* ds_in_coll: disable data sieving in collective IO */
+	ADIOI_Info_get(users_info, "romio_lustre_ds_in_coll", MPI_MAX_INFO_VAL,
+	             value, &flag);
+	if (flag && (!strcmp(value, "disable") ||
+                     !strcmp(value, "DISABLE"))) {
+            tmp_val = int_val = 2;
+	    MPI_Bcast(&tmp_val, 2, MPI_INT, 0, fd->comm);
+	    if (tmp_val != int_val) {
+	        MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+		                                   "romio_lustre_ds_in_coll",
+						   error_code);
+                ADIOI_Free(value);
+                return;
+	    }
+	    ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "disable");
+            fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_DISABLE;
+	}
+    }
+    /* set the values for collective I/O and data sieving parameters */
+    ADIOI_GEN_SetInfo(fd, users_info, error_code);
+
     if (ADIOI_Direct_read) fd->direct_read = 1;
     if (ADIOI_Direct_write) fd->direct_write = 1;
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_open.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_open.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_open.c	2010-11-16 09:16:04.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_open.c	2010-11-15 15:03:31.000000000 +0100
@@ -1,17 +1,25 @@
 /* -*- Mode: C; c-basic-offset:4 ; -*- */
-/* 
- *   Copyright (C) 1997 University of Chicago. 
+/*
+ *   Copyright (C) 1997 University of Chicago.
  *   See COPYRIGHT notice in top-level directory.
  *
  *   Copyright (C) 2007 Oak Ridge National Laboratory
+ *
+ *   Copyright (C) 2008 Sun Microsystems, Lustre group
  */
 
 #include "ad_lustre.h"
 
+/* what is the basis for this define?
+ * what happens if there are more than 1k UUIDs? */
+
+#define MAX_LOV_UUID_COUNT      1000
+
 void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
 {
     int perm, old_mask, amode, amode_direct;
-    struct lov_user_md lum = { 0 };
+    int lumlen;
+    struct lov_user_md *lum = NULL;
     char *value;
 
 #if defined(MPICH2) || !defined(PRINT_ERR_MSG)
@@ -44,23 +52,37 @@
     if (fd->fd_sys != -1) {
         int err;
 
-        value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
-
         /* get file striping information and set it in info */
-        lum.lmm_magic = LOV_USER_MAGIC;
-        err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *) &lum);
-
+	/* odd malloc here because lov_user_md contains some fixed data and
+	 * then a list of 'lmm_objects' representing stripe */
+        lumlen = sizeof(struct lov_user_md) +
+                 MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data);
+	/* furthermore, Pascal Deveze reports that, even though we pass a
+	 * "GETSTRIPE" (read) flag to the ioctl, if some of the values of this
+	 * struct are uninitialzed, the call can give an error.  calloc in case
+	 * there are other members that must be initialized and in case
+	 * lov_user_md struct changes in future */
+	lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen);
+        lum->lmm_magic = LOV_USER_MAGIC;
+        err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum);
         if (!err) {
-            sprintf(value, "%d", lum.lmm_stripe_size);
-            MPI_Info_set(fd->info, "striping_unit", value);
+            value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
+
+            fd->hints->striping_unit = lum->lmm_stripe_size;
+            sprintf(value, "%d", lum->lmm_stripe_size);
+            ADIOI_Info_set(fd->info, "striping_unit", value);
+
+            fd->hints->striping_factor = lum->lmm_stripe_count;
+            sprintf(value, "%d", lum->lmm_stripe_count);
+            ADIOI_Info_set(fd->info, "striping_factor", value);
 
-            sprintf(value, "%d", lum.lmm_stripe_count);
-            MPI_Info_set(fd->info, "striping_factor", value);
+            fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset;
+            sprintf(value, "%d", lum->lmm_stripe_offset);
+            ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
 
-            sprintf(value, "%d", lum.lmm_stripe_offset);
-            MPI_Info_set(fd->info, "start_iodevice", value);
+            ADIOI_Free(value);
         }
-        ADIOI_Free(value);
+        ADIOI_Free(lum);
 
         if (fd->access_mode & ADIO_APPEND)
             fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_rwcontig.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_rwcontig.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_rwcontig.c	2010-11-16 09:16:04.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_rwcontig.c	2010-11-15 15:02:47.000000000 +0100
@@ -1,9 +1,11 @@
 /* -*- Mode: C; c-basic-offset:4 ; -*- */
-/* 
- *   Copyright (C) 1997 University of Chicago. 
+/*
+ *   Copyright (C) 1997 University of Chicago.
  *   See COPYRIGHT notice in top-level directory.
  *
  *   Copyright (C) 2007 Oak Ridge National Laboratory
+ *
+ *   Copyright (C) 2008 Sun Microsystems, Lustre group
  */
 
 #define _XOPEN_SOURCE 600
@@ -18,7 +20,7 @@
 static void ADIOI_LUSTRE_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len, 
               ADIO_Offset offset, int *err)
 {
-    int ntimes, rem, newrem, i, size, nbytes;
+    int rem, size, nbytes;
     if (!(len % fd->d_miniosz) && (len >= fd->d_miniosz)) {
 	*err = pwrite(fd->fd_direct, buf, len, offset);
     } else if (len < fd->d_miniosz) {
@@ -37,7 +39,7 @@
 static void ADIOI_LUSTRE_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len, 
               ADIO_Offset offset, int *err)
 {
-    int ntimes, rem, newrem, i, size, nbytes;
+    int rem, size, nbytes;
     if (!(len % fd->d_miniosz) && (len >= fd->d_miniosz))
 	*err = pread(fd->fd_direct, buf, len, offset);
     else if (len < fd->d_miniosz)
@@ -59,7 +61,6 @@
 {
     int err=-1, diff, size=len, nbytes = 0;
     void *newbuf;
-    static char myname[] = "ADIOI_LUSTRE_Directio";
 
     if (offset % fd->d_miniosz) {
 	diff = fd->d_miniosz - (offset % fd->d_miniosz);
@@ -87,7 +88,7 @@
 		memcpy(newbuf, buf, size);
 		ADIOI_LUSTRE_Aligned_Mem_File_Write(fd, newbuf, size, offset, &err);
 		nbytes += err;
-		free(newbuf);
+		ADIOI_Free(newbuf);
 	    }
 	    else nbytes += pwrite(fd->fd_sys, buf, size, offset);
 	}
@@ -102,7 +103,7 @@
 		ADIOI_LUSTRE_Aligned_Mem_File_Read(fd, newbuf, size, offset, &err);
 		if (err > 0) memcpy(buf, newbuf, err);
 		nbytes += err;
-		free(newbuf);
+		ADIOI_Free(newbuf);
 	    }
 	    else nbytes += pread(fd->fd_sys, buf, size, offset);
 	}
@@ -136,10 +137,23 @@
 	    if (err == -1) goto ioerr;
 	}
 	
-	if (io_mode)
+	if (io_mode) {
+#ifdef ADIOI_MPE_LOGGING
+        MPE_Log_event(ADIOI_MPE_write_a, 0, NULL);
+#endif
 	    err = write(fd->fd_sys, buf, len);
-	else 
+#ifdef ADIOI_MPE_LOGGING
+        MPE_Log_event(ADIOI_MPE_write_b, 0, NULL);
+#endif
+        } else {
+#ifdef ADIOI_MPE_LOGGING
+        MPE_Log_event(ADIOI_MPE_read_a, 0, NULL);
+#endif
 	    err = read(fd->fd_sys, buf, len);
+#ifdef ADIOI_MPE_LOGGING
+        MPE_Log_event(ADIOI_MPE_read_b, 0, NULL);
+#endif
+        }
     } else {
 	err = ADIOI_LUSTRE_Directio(fd, buf, len, offset, io_mode);
     }
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre: ad_lustre_wrcoll.c
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre: ad_lustre_wrstr.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/Makefile.am NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/Makefile.am
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/Makefile.am	2010-11-16 09:16:04.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/Makefile.am	2010-11-15 15:03:30.000000000 +0100
@@ -24,8 +24,11 @@
 noinst_LTLIBRARIES = libadio_lustre.la
 libadio_lustre_la_SOURCES = \
     ad_lustre.c \
+    ad_lustre_aggregate.c \
     ad_lustre_fcntl.c \
     ad_lustre.h \
     ad_lustre_hints.c \
     ad_lustre_open.c \
-    ad_lustre_rwcontig.c
+    ad_lustre_wrcoll.c \
+    ad_lustre_rwcontig.c \
+    ad_lustre_wrstr.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/README NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/README
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre/README	2010-11-16 09:16:04.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_lustre/README	2010-11-15 15:02:47.000000000 +0100
@@ -5,6 +5,21 @@
   o To post the code for ParColl (Partitioned collective IO)
  
 -----------------------------------------------------
+V05: 
+-----------------------------------------------------
+Improved data redistribution
+  o Improve I/O pattern identification. Besides checking interleaving,
+    if request I/O size is small, collective I/O will be performed.
+    The hint bigsize can be used to define the req size value.
+  o Provide hint CO for load balancing to control the number of
+    IO clients for each OST
+  o Produce stripe-contiguous I/O pattern that Lustre prefers
+  o Control read-modify-write in data sieving in collective IO
+    by hint ds_in_coll.
+  o Reduce extent lock conflicts by make each OST accessed by one or
+    more constant clients.
+
+-----------------------------------------------------
 V04: 
 -----------------------------------------------------
   o Direct IO and Lockless IO support
Only in ompi-trunk/ompi/mca/io/romio/romio/adio/ad_lustre: .state-cache
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.c	2010-11-16 09:16:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.c	2010-11-15 15:02:47.000000000 +0100
@@ -12,6 +12,7 @@
 
 struct ADIOI_Fns_struct ADIO_NFS_operations = {
     ADIOI_NFS_Open, /* Open */
+    ADIOI_FAILSAFE_OpenColl, /* OpenColl */
     ADIOI_NFS_ReadContig, /* ReadContig */
     ADIOI_NFS_WriteContig, /* WriteContig */
     ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
@@ -35,4 +36,5 @@
     ADIOI_GEN_Flush, /* Flush */
     ADIOI_NFS_Resize, /* Resize */
     ADIOI_GEN_Delete, /* Delete */
+    ADIOI_NFS_Feature, /* Features */
 };
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_nfs: ad_nfs_features.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.h	2010-11-16 09:16:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.h	2010-11-15 15:02:47.000000000 +0100
@@ -78,5 +78,6 @@
 			 int *error_code);
 void ADIOI_NFS_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code);
 void ADIOI_NFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
+int ADIOI_NFS_Feature(ADIO_File fd, int feature_flag);
 
 #endif
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_iwrite.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_iwrite.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_iwrite.c	2010-11-16 09:16:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_iwrite.c	2010-11-15 15:03:31.000000000 +0100
@@ -59,6 +59,7 @@
 
     struct aiocb *aiocbp;
     ADIOI_AIO_Request *aio_req;
+    MPI_Status status;
 
     fd_sys = fd->fd_sys;
 
@@ -108,7 +109,7 @@
         /* exceeded the max. no. of outstanding requests.
            complete all previous async. requests and try again. */
 	    ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
-			    offset, NULL, &error_code);
+			    offset, &status, &error_code);
 	    MPIO_Completed_request_create(&fd, len, &error_code, request);
 	    return 0;
 	} else {
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_read.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_read.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_read.c	2010-11-16 09:16:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_read.c	2010-11-15 15:02:47.000000000 +0100
@@ -177,7 +177,7 @@
     ADIO_Offset userbuf_off;
     ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off;
     char *readbuf, *tmp_buf, *value;
-    int flag, st_frd_size, st_n_filetypes, readbuf_len;
+    int st_frd_size, st_n_filetypes, readbuf_len;
     int new_brd_size, new_frd_size, err_flag=0, info_flag, max_bufsize;
 
     static char myname[] = "ADIOI_NFS_READSTRIDED";
@@ -201,7 +201,7 @@
 /* get max_bufsize from the info object. */
 
     value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
-    MPI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, 
+    ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, 
                  &info_flag);
     max_bufsize = atoi(value);
     ADIOI_Free(value);
@@ -278,25 +278,32 @@
 	disp = fd->disp;
 
 	if (file_ptr_type == ADIO_INDIVIDUAL) {
-	    offset = fd->fp_ind; /* in bytes */
-	    n_filetypes = -1;
-	    flag = 0;
-	    while (!flag) {
-                n_filetypes++;
-		for (i=0; i<flat_file->count; i++) {
-		    if (disp + flat_file->indices[i] + 
-                        (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] 
-                            >= offset) {
-			st_index = i;
-			frd_size = (int) (disp + flat_file->indices[i] + 
-			        (ADIO_Offset) n_filetypes*filetype_extent
-			         + flat_file->blocklens[i] - offset);
-			flag = 1;
-			break;
-		    }
-		}
-	    }
-	}
+          /* Wei-keng reworked type processing to be a bit more efficient */
+           offset       = fd->fp_ind - disp;
+           n_filetypes  = (offset - flat_file->indices[0]) / filetype_extent;
+          offset -= (ADIO_Offset)n_filetypes * filetype_extent;
+          /* now offset is local to this extent */
+ 
+           /* find the block where offset is located, skip blocklens[i]==0 */
+           for (i=0; i<flat_file->count; i++) {
+               ADIO_Offset dist;
+               if (flat_file->blocklens[i] == 0) continue;
+               dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
+               /* frd_size is from offset to the end of block i */
+              if (dist == 0) {
+                  i++;
+                  offset   = flat_file->indices[i];
+                  frd_size = flat_file->blocklens[i];
+                  break;
+              }
+              if (dist > 0 ) { 
+                   frd_size = dist;
+		   break;
+              }
+          }
+           st_index = i;  /* starting index in flat_file->indices[] */
+           offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
+       }
 	else {
 	    n_etypes_in_filetype = filetype_size/etype_size;
 	    n_filetypes = (int) (offset / n_etypes_in_filetype);
@@ -316,11 +323,42 @@
 	    }
 
 	    /* abs. offset in bytes in the file */
-	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + 
+		    abs_off_in_filetype;
 	}
 
         start_off = offset;
 
+       /* Wei-keng Liao: read request is within a single flat_file contig
+        * block e.g. with subarray types that actually describe the whole
+        * array */
+       if (buftype_is_contig && bufsize <= frd_size) {
+            ADIO_ReadContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
+                             offset, status, error_code);
+
+           if (file_ptr_type == ADIO_INDIVIDUAL) {
+                /* update MPI-IO file pointer to point to the first byte that 
+                * can be accessed in the fileview. */
+               fd->fp_ind = offset + bufsize;
+               if (bufsize == frd_size) {
+                   do {
+                       st_index++;
+                       if (st_index == flat_file->count) {
+                           st_index = 0;
+                           n_filetypes++;
+                       }
+                    } while (flat_file->blocklens[st_index] == 0);
+                   fd->fp_ind = disp + flat_file->indices[st_index]
+                               + n_filetypes*filetype_extent;
+               }
+           }
+           fd->fp_sys_posn = -1;   /* set it to null. */ 
+#ifdef HAVE_STATUS_SET_BYTES
+           MPIR_Status_set_bytes(status, datatype, bufsize);
+#endif 
+            return;
+       }
+
        /* Calculate end_offset, the last byte-offset that will be accessed.
          e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
 
@@ -333,11 +371,11 @@
 	while (i < bufsize) {
 	    i += frd_size;
 	    end_offset = off + frd_size - 1;
-
-	    if (j < (flat_file->count - 1)) j++;
-	    else {
-		j = 0;
-		n_filetypes++;
+            j = (j+1) % flat_file->count;
+            n_filetypes += (j == 0) ? 1 : 0;
+            while (flat_file->blocklens[j]==0) {
+               j = (j+1) % flat_file->count;
+               n_filetypes += (j == 0) ? 1 : 0;
 	    }
 
 	    off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
@@ -402,11 +440,12 @@
                 /* did not reach end of contiguous block in filetype.
                    no more I/O needed. off is incremented by frd_size. */
                 else {
-		    if (j < (flat_file->count - 1)) j++;
-		    else {
-			j = 0;
-			n_filetypes++;
-		    }
+                    j = (j+1) % flat_file->count;
+                    n_filetypes += (j == 0) ? 1 : 0;
+                    while (flat_file->blocklens[j]==0) {
+                        j = (j+1) % flat_file->count;
+                        n_filetypes += (j == 0) ? 1 : 0;
+                    }
 		    off = disp + flat_file->indices[j] + 
                                         (ADIO_Offset) n_filetypes*filetype_extent;
 		    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
@@ -445,12 +484,12 @@
 
 		if (size == frd_size) {
 /* reached end of contiguous block in file */
-		    if (j < (flat_file->count - 1)) j++;
-		    else {
-			j = 0;
-			n_filetypes++;
-		    }
-
+                    j = (j+1) % flat_file->count;
+                    n_filetypes += (j == 0) ? 1 : 0;
+                    while (flat_file->blocklens[j]==0) {
+                        j = (j+1) % flat_file->count;
+                        n_filetypes += (j == 0) ? 1 : 0;
+                    }
 		    off = disp + flat_file->indices[j] + 
                                               (ADIO_Offset) n_filetypes*filetype_extent;
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_wait.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_wait.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_wait.c	2010-11-16 09:16:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_wait.c	2010-11-15 15:03:31.000000000 +0100
@@ -10,120 +10,6 @@
 			    int *error_code)
 {
 	return;
-#if 0
-#ifdef ROMIO_HAVE_WORKING_AIO
-    int err;
-    static char myname[] = "ADIOI_NFS_READCOMPLETE";
-#ifdef ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_HANDLE
-    struct aiocb *tmp1;
-#endif
-#endif
-
-    if (*request == ADIO_REQUEST_NULL) {
-	*error_code = MPI_SUCCESS;
-	return;
-    }
-    
-#ifdef ROMIO_HAVE_AIO_SUSPEND_TWO_ARGS
-/* old IBM */
-    if ((*request)->queued) {
-	do {
-#if !defined(_AIO_AIX_SOURCE) && !defined(_NO_PROTO)
-	    err = aio_suspend((*request)->handle,1,NULL);
-#else
-	    err = aio_suspend(1, (struct aiocb **) &((*request)->handle));
-#endif
-	} while ((err == -1) && (errno == EINTR));
-
-	tmp1 = (struct aiocb *) (*request)->handle;
-	if (err != -1) {
-	    err = aio_return(tmp1->aio_handle);
-	    (*request)->nbytes = err;
-	    errno = aio_error(tmp1->aio_handle);
-	}
-	else (*request)->nbytes = -1;
-
-/* on DEC, it is required to call aio_return to dequeue the request.
-   IBM man pages don't indicate what function to use for dequeue.
-   I'm assuming it is aio_return! */
-
-	if (err == -1) {
-	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
-					       MPIR_ERR_RECOVERABLE, myname,
-					       __LINE__, MPI_ERR_IO, "**io",
-					       "**io %s", strerror(errno));
-	}
-	else *error_code = MPI_SUCCESS;
-    }
-    else *error_code = MPI_SUCCESS;  /* if ( (*request)->queued ) */
-
-#ifdef HAVE_STATUS_SET_BYTES
-    if ((*request)->nbytes != -1)
-	MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
-#endif
-
-#elif defined(ROMIO_HAVE_WORKING_AIO)
-/* all other aio types */
-    if ((*request)->queued) {
-	do {
-	    err = aio_suspend((const struct aiocb **) &((*request)->handle), 1, 0);
-	} while ((err == -1) && (errno == EINTR));
-
-	if (err != -1) {
-	    err = aio_return((struct aiocb *) (*request)->handle); 
-	    (*request)->nbytes = err;
-	    errno = aio_error((struct aiocb *) (*request)->handle);
-	}
-	else (*request)->nbytes = -1;
-
-	if (err == -1) {
-	    *error_code = MPIO_Err_create_code(MPI_SUCCESS,
-					       MPIR_ERR_RECOVERABLE, myname,
-					       __LINE__, MPI_ERR_IO, "**io",
-					       "**io %s", strerror(errno));
-	}
-	else *error_code = MPI_SUCCESS;
-    }
-    else *error_code = MPI_SUCCESS;  /* if ((*request)->queued) ... */
-#ifdef HAVE_STATUS_SET_BYTES
-    if ((*request)->nbytes != -1)
-	MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
-#endif
-#endif
-
-#ifdef ROMIO_HAVE_WORKING_AIO
-    if ((*request)->queued != -1) {
-
-	/* queued = -1 is an internal hack used when the request must
-	   be completed, but the request object should not be
-	   freed. This is used in ADIOI_Complete_async, because the user
-	   will call MPI_Wait later, which would require status to
-	   be filled. Ugly but works. queued = -1 should be used only
-	   in ADIOI_Complete_async. 
-           This should not affect the user in any way. */
-
-	/* if request is still queued in the system, it is also there
-           on ADIOI_Async_list. Delete it from there. */
-	if ((*request)->queued) ADIOI_Del_req_from_list(request);
-
-	(*request)->fd->async_count--;
-	if ((*request)->handle) ADIOI_Free((*request)->handle);
-	ADIOI_Free_request((ADIOI_Req_node *) (*request));
-	*request = ADIO_REQUEST_NULL;
-    }
-
-#else
-/* no aio */
-
-#ifdef HAVE_STATUS_SET_BYTES
-    MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
-#endif
-    (*request)->fd->async_count--;
-    ADIOI_Free_request((ADIOI_Req_node *) (*request));
-    *request = ADIO_REQUEST_NULL;
-    *error_code = MPI_SUCCESS;
-#endif    
-#endif
 }
 
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_write.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_write.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_write.c	2010-11-16 09:16:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_write.c	2010-11-15 15:02:47.000000000 +0100
@@ -281,7 +281,7 @@
     ADIO_Offset userbuf_off;
     ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
     char *writebuf, *value;
-    int flag, st_fwr_size, st_n_filetypes, writebuf_len, write_sz;
+    int st_fwr_size, st_n_filetypes, writebuf_len, write_sz;
     int new_bwr_size, new_fwr_size, err_flag=0, info_flag, max_bufsize;
     static char myname[] = "ADIOI_NFS_WRITESTRIDED";
 
@@ -304,7 +304,7 @@
 /* get max_bufsize from the info object. */
 
     value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
-    MPI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, 
+    ADIOI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, 
                  &info_flag);
     max_bufsize = atoi(value);
     ADIOI_Free(value);
@@ -381,25 +381,32 @@
 	disp = fd->disp;
 
 	if (file_ptr_type == ADIO_INDIVIDUAL) {
-	    offset = fd->fp_ind; /* in bytes */
-	    n_filetypes = -1;
-	    flag = 0;
-	    while (!flag) {
-                n_filetypes++;
-		for (i=0; i<flat_file->count; i++) {
-		    if (disp + flat_file->indices[i] + 
-                        (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] 
-                            >= offset) {
-			st_index = i;
-			fwr_size = (int) (disp + flat_file->indices[i] + 
-			        (ADIO_Offset) n_filetypes*filetype_extent
-			         + flat_file->blocklens[i] - offset);
-			flag = 1;
-			break;
-		    }
-		}
-	    }
-	}
+       /* Wei-keng reworked type processing to be a bit more efficient */
+            offset       = fd->fp_ind - disp;
+            n_filetypes  = (offset - flat_file->indices[0]) / filetype_extent;
+            offset      -= (ADIO_Offset)n_filetypes * filetype_extent;
+            /* now offset is local to this extent */
+
+            /* find the block where offset is located, skip blocklens[i]==0 */
+            for (i=0; i<flat_file->count; i++) {
+                ADIO_Offset dist;
+                if (flat_file->blocklens[i] == 0) continue;
+                dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
+                /* fwr_size is from offset to the end of block i */
+                if (dist == 0) {
+                    i++;
+                    offset   = flat_file->indices[i];
+                    fwr_size = flat_file->blocklens[i];
+                    break;
+                }
+                if (dist > 0) {
+                    fwr_size = dist;
+                    break;
+                }
+            }
+            st_index = i;  /* starting index in flat_file->indices[] */
+            offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
+        }
 	else {
 	    n_etypes_in_filetype = filetype_size/etype_size;
 	    n_filetypes = (int) (offset / n_etypes_in_filetype);
@@ -419,10 +426,40 @@
 	    }
 
 	    /* abs. offset in bytes in the file */
-	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + 
+		abs_off_in_filetype;
 	}
 
         start_off = offset;
+       /* Wei-keng Liao:write request is within single flat_file contig block*/
+       /* this could happen, for example, with subarray types that are
+        * actually fairly contiguous */
+        if (buftype_is_contig && bufsize <= fwr_size) {
+            ADIO_WriteContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
+                             offset, status, error_code);
+
+           if (file_ptr_type == ADIO_INDIVIDUAL) {
+                /* update MPI-IO file pointer to point to the first byte 
+                * that can be accessed in the fileview. */
+                fd->fp_ind = offset + bufsize;
+                if (bufsize == fwr_size) {
+                    do {
+                        st_index++;
+                        if (st_index == flat_file->count) {
+                            st_index = 0;
+                            n_filetypes++;
+                        }
+                    } while (flat_file->blocklens[st_index] == 0);
+                    fd->fp_ind = disp + flat_file->indices[st_index]
+                               + (ADIO_Offset)n_filetypes*filetype_extent;
+                }
+            }
+           fd->fp_sys_posn = -1;   /* set it to null. */ 
+#ifdef HAVE_STATUS_SET_BYTES
+           MPIR_Status_set_bytes(status, datatype, bufsize);
+#endif 
+            return;
+        }
 
        /* Calculate end_offset, the last byte-offset that will be accessed.
          e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/
@@ -436,14 +473,15 @@
 	while (i < bufsize) {
 	    i += fwr_size;
 	    end_offset = off + fwr_size - 1;
-
-	    if (j < (flat_file->count - 1)) j++;
-	    else {
-		j = 0;
-		n_filetypes++;
+            j = (j+1) % flat_file->count;
+            n_filetypes += (j == 0) ? 1 : 0;
+            while (flat_file->blocklens[j]==0) {
+                j = (j+1) % flat_file->count;
+                n_filetypes += (j == 0) ? 1 : 0;
 	    }
 
-	    off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
+	    off = disp + flat_file->indices[j] + 
+		    (ADIO_Offset) n_filetypes*filetype_extent;
 	    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
 	}
 
@@ -509,13 +547,14 @@
                 /* did not reach end of contiguous block in filetype.
                    no more I/O needed. off is incremented by fwr_size. */
                 else {
-		    if (j < (flat_file->count - 1)) j++;
-		    else {
-			j = 0;
-			n_filetypes++;
-		    }
+                    j = (j+1) % flat_file->count;
+                    n_filetypes += (j == 0) ? 1 : 0;
+                    while (flat_file->blocklens[j]==0) {
+                        j = (j+1) % flat_file->count;
+                        n_filetypes += (j == 0) ? 1 : 0;
+                    }
 		    off = disp + flat_file->indices[j] + 
-                                        (ADIO_Offset) n_filetypes*filetype_extent;
+                                      (ADIO_Offset) n_filetypes*filetype_extent;
 		    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
 		}
 	    }
@@ -552,10 +591,11 @@
 
 		if (size == fwr_size) {
 /* reached end of contiguous block in file */
-		    if (j < (flat_file->count - 1)) j++;
-		    else {
-			j = 0;
-			n_filetypes++;
+                   j = (j+1) % flat_file->count;
+                   n_filetypes += (j == 0) ? 1 : 0;
+                   while (flat_file->blocklens[j]==0) {
+                       j = (j+1) % flat_file->count;
+                       n_filetypes += (j == 0) ? 1 : 0;
 		    }
 
 		    off = disp + flat_file->indices[j] + 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_nfs/Makefile.am NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_nfs/Makefile.am
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_nfs/Makefile.am	2010-11-16 09:16:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_nfs/Makefile.am	2010-11-15 15:03:30.000000000 +0100
@@ -24,6 +24,7 @@
         ad_nfs.h \
         ad_nfs_done.c \
         ad_nfs_fcntl.c \
+        ad_nfs_features.c \
         ad_nfs_getsh.c \
         ad_nfs_hints.c \
         ad_nfs_iread.c \
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs.c	2010-11-16 09:16:36.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs.c	2010-11-15 15:02:47.000000000 +0100
@@ -12,6 +12,7 @@
 
 struct ADIOI_Fns_struct ADIO_NTFS_operations = {
     ADIOI_NTFS_Open, /* Open */
+	ADIOI_GEN_OpenColl, /* OpenColl */
     ADIOI_NTFS_ReadContig, /* ReadContig */
     ADIOI_NTFS_WriteContig, /* WriteContig */
     ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
@@ -33,4 +34,5 @@
     ADIOI_NTFS_Flush, /* Flush */
     ADIOI_NTFS_Resize, /* Resize */
     ADIOI_GEN_Delete, /* Delete */
+    ADIOI_GEN_Feature /* Features */
 };
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs_iwrite.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs_iwrite.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs_iwrite.c	2010-11-16 09:16:36.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs_iwrite.c	2010-11-15 15:03:31.000000000 +0100
@@ -60,7 +60,6 @@
         /* TODO: unsure how to handle this */    
         }
     }else{
-        MPIR_Nest_incr();
         mpi_errno = MPI_Grequest_complete(aio_req->req);
 	    if (mpi_errno != MPI_SUCCESS) {
 		    mpi_errno = MPIO_Err_create_code(MPI_SUCCESS,
@@ -69,7 +68,6 @@
 				    MPI_ERR_IO, "**mpi_grequest_complete",
 				    0);
 	    }
-        MPIR_Nest_decr();
     }
     return mpi_errno;
 }
@@ -111,16 +109,14 @@
                 aio_reqlist[retObject]->lpOvl, &(aio_reqlist[retObject]->nbytes), 
                 FALSE)){
         	/* XXX: mark completed requests as 'done'*/
-	        MPIR_Nest_incr();
-	        mpi_errno = MPI_Grequest_complete(aio_reqlist[retObject]->req);
+            mpi_errno = MPI_Grequest_complete(aio_reqlist[retObject]->req);
     	    if (mpi_errno != MPI_SUCCESS) {
 	    	    mpi_errno = MPIO_Err_create_code(MPI_SUCCESS,
 				    MPIR_ERR_RECOVERABLE,
 				    "ADIOI_NTFS_aio_wait_fn", __LINE__,
 				    MPI_ERR_IO, "**mpi_grequest_complete",
 				    0);
-	        }
-	        MPIR_Nest_decr();
+            }
         }else{
             if(GetLastError() == ERROR_IO_INCOMPLETE){
             /* IO in progress */
@@ -146,7 +142,6 @@
 
 	MPI_Status_set_elements(status, MPI_BYTE, aio_req->nbytes); 
 
-	/* do i need to nest_incr/nest_decr  here? */
 	/* can never cancel so always true */ 
 	MPI_Status_set_cancelled(status, 0); 
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs.c	2010-11-16 09:16:33.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs.c	2010-11-15 15:02:47.000000000 +0100
@@ -13,6 +13,7 @@
 
 struct ADIOI_Fns_struct ADIO_PANFS_operations = {
     ADIOI_PANFS_Open, /* Open */
+    ADIOI_GEN_OpenColl,
     ADIOI_PANFS_ReadContig, /* ReadContig */
     ADIOI_PANFS_WriteContig, /* WriteContig */
     ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
@@ -39,4 +40,5 @@
     ADIOI_GEN_Flush, /* Flush */
     ADIOI_PANFS_Resize, /* Resize */
     ADIOI_GEN_Delete, /* Delete */
+    ADIOI_GEN_Feature,
 };
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_hints.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_hints.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_hints.c	2010-11-16 09:16:33.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_hints.c	2010-11-15 15:02:47.000000000 +0100
@@ -36,7 +36,7 @@
         if (users_info != MPI_INFO_NULL) {
 	        value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
 
-            MPI_Info_get(users_info, "panfs_concurrent_write", MPI_MAX_INFO_VAL, 
+            ADIOI_Info_get(users_info, "panfs_concurrent_write", MPI_MAX_INFO_VAL, 
                  value, &flag);
             if (flag) {
                 concurrent_write = strtoul(value,NULL,10);
@@ -46,10 +46,10 @@
                     FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_concurrent_write\" must be the same on all processes\n");
                     MPI_Abort(MPI_COMM_WORLD, 1);
                 }
-	            MPI_Info_set(fd->info, "panfs_concurrent_write", value); 
+	            ADIOI_Info_set(fd->info, "panfs_concurrent_write", value); 
             }
 
-            MPI_Info_get(users_info, "panfs_layout_type", MPI_MAX_INFO_VAL, 
+            ADIOI_Info_get(users_info, "panfs_layout_type", MPI_MAX_INFO_VAL, 
                  value, &flag);
             if (flag) {
                 layout_type = strtoul(value,NULL,10);
@@ -59,10 +59,10 @@
                     FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_type\" must be the same on all processes\n");
                     MPI_Abort(MPI_COMM_WORLD, 1);
                 }
-	            MPI_Info_set(fd->info, "panfs_layout_type", value); 
+	            ADIOI_Info_set(fd->info, "panfs_layout_type", value); 
             }
 
-            MPI_Info_get(users_info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL, 
+            ADIOI_Info_get(users_info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL, 
                  value, &flag);
             if (flag) {
                 layout_stripe_unit = strtoul(value,NULL,10);
@@ -72,10 +72,10 @@
                     FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_stripe_unit\" must be the same on all processes\n");
                     MPI_Abort(MPI_COMM_WORLD, 1);
                 }
-	            MPI_Info_set(fd->info, "panfs_layout_stripe_unit", value); 
+	            ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", value); 
             }
 
-            MPI_Info_get(users_info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL, 
+            ADIOI_Info_get(users_info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL, 
                  value, &flag);
             if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)) {
                 layout_parity_stripe_width = strtoul(value,NULL,10);
@@ -85,10 +85,10 @@
                     FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_parity_stripe_width\" must be the same on all processes\n");
                     MPI_Abort(MPI_COMM_WORLD, 1);
                 }
-	            MPI_Info_set(fd->info, "panfs_layout_parity_stripe_width", value); 
+	            ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_width", value); 
             }
 
-            MPI_Info_get(users_info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL, 
+            ADIOI_Info_get(users_info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL, 
                  value, &flag);
             if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)) {
                 layout_parity_stripe_depth = strtoul(value,NULL,10);
@@ -98,10 +98,10 @@
                     FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_parity_stripe_depth\" must be the same on all processes\n");
                     MPI_Abort(MPI_COMM_WORLD, 1);
                 }
-	            MPI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", value); 
+	            ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", value); 
             }
 
-            MPI_Info_get(users_info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL, 
+            ADIOI_Info_get(users_info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL, 
                  value, &flag);
             if (flag) {
                 layout_total_num_comps = strtoul(value,NULL,10);
@@ -111,10 +111,10 @@
                     FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_total_num_comps\" must be the same on all processes\n");
                     MPI_Abort(MPI_COMM_WORLD, 1);
                 }
-	            MPI_Info_set(fd->info, "panfs_layout_total_num_comps", value); 
+	            ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", value); 
             }
 
-            MPI_Info_get(users_info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL, 
+            ADIOI_Info_get(users_info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL, 
                  value, &flag);
             if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE || layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)) {
                 layout_visit_policy = strtoul(value,NULL,10);
@@ -124,7 +124,7 @@
                     FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_visit_policy\" must be the same on all processes\n");
                     MPI_Abort(MPI_COMM_WORLD, 1);
                 }
-	            MPI_Info_set(fd->info, "panfs_layout_visit_policy", value); 
+	            ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", value); 
             }
 
 	        ADIOI_Free(value);
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_open.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_open.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_open.c	2010-11-16 09:16:33.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_open.c	2010-11-15 15:02:47.000000000 +0100
@@ -39,32 +39,32 @@
 
         *error_code = MPI_SUCCESS;
         value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
-        MPI_Info_get(fd->info, "panfs_layout_type", MPI_MAX_INFO_VAL, 
+        ADIOI_Info_get(fd->info, "panfs_layout_type", MPI_MAX_INFO_VAL, 
                  value, &flag);
         if (flag) {
             layout_type = strtoul(value,NULL,10);
         }
-        MPI_Info_get(fd->info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL, 
+        ADIOI_Info_get(fd->info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL, 
                  value, &flag);
         if (flag) {
             layout_stripe_unit = strtoul(value,NULL,10);
         }
-        MPI_Info_get(fd->info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL, 
+        ADIOI_Info_get(fd->info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL, 
                  value, &flag);
         if (flag) {
             layout_total_num_comps = strtoul(value,NULL,10);
         }
-        MPI_Info_get(fd->info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL, 
+        ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL, 
                  value, &flag);
         if (flag) {
             layout_parity_stripe_width = strtoul(value,NULL,10);
         }
-        MPI_Info_get(fd->info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL, 
+        ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL, 
                  value, &flag);
         if (flag) {
             layout_parity_stripe_depth = strtoul(value,NULL,10);
         }
-        MPI_Info_get(fd->info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL, 
+        ADIOI_Info_get(fd->info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL, 
                  value, &flag);
         if (flag) {
             layout_visit_policy = strtoul(value,NULL,10);
@@ -266,7 +266,7 @@
 	amode = amode | O_EXCL;
 
 	value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
-	MPI_Info_get(fd->info, "panfs_concurrent_write", MPI_MAX_INFO_VAL, 
+	ADIOI_Info_get(fd->info, "panfs_concurrent_write", MPI_MAX_INFO_VAL, 
 		     value, &flag);
 	if (flag) {
         unsigned long int concurrent_write = strtoul(value,NULL,10);
@@ -291,41 +291,41 @@
         if (rc < 0)
         {
             /* Error - set layout type to unknown */
-	        MPI_Info_set(fd->info, "panfs_layout_type", "PAN_FS_CLIENT_LAYOUT_TYPE__INVALID");
+	        ADIOI_Info_set(fd->info, "panfs_layout_type", "PAN_FS_CLIENT_LAYOUT_TYPE__INVALID");
         }
         else 
         {
             ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.agg_type);
-            MPI_Info_set(fd->info, "panfs_layout_type", temp_buffer);
+            ADIOI_Info_set(fd->info, "panfs_layout_type", temp_buffer);
             if (file_query_args.layout.layout_is_valid == 1)
             {
                 switch (file_query_args.layout.agg_type)
                 {
                     case PAN_FS_CLIENT_LAYOUT_TYPE__RAID0:
                         ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.stripe_unit);
-                        MPI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
+                        ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
                         ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.total_num_comps);
-                        MPI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
+                        ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
                         break;
                     case PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE:
                         ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.stripe_unit);
-                        MPI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
+                        ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
                         ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_width);
-                        MPI_Info_set(fd->info, "panfs_layout_parity_stripe_width", temp_buffer);
+                        ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_width", temp_buffer);
                         ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_depth);
-                        MPI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", temp_buffer);
+                        ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", temp_buffer);
                         ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.total_num_comps);
-                        MPI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
+                        ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
                         ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.layout_visit_policy);
-                        MPI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
+                        ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
                         break;
                     case PAN_FS_CLIENT_LAYOUT_TYPE__RAID10:
                         ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.stripe_unit);
-                        MPI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
+                        ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
                         ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.total_num_comps);
-                        MPI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
+                        ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
                         ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.layout_visit_policy);
-                        MPI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
+                        ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
                         break;
                 }
             }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_hints.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_hints.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_hints.c	2010-11-16 09:16:23.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_hints.c	2010-11-15 15:02:47.000000000 +0100
@@ -24,7 +24,7 @@
 	if (users_info != MPI_INFO_NULL) {
 	    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
 
-	    MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, 
+	    ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, 
 			 value, &flag);
 	    if (flag) {
 		str_factor=atoi(value);
@@ -40,7 +40,7 @@
 		/* --END ERROR HANDLING-- */
 	    }
 
-	    MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, 
+	    ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, 
 			 value, &flag);
 	    if (flag) {
 		str_unit=atoi(value);
@@ -56,7 +56,7 @@
 		/* --END ERROR HANDLING-- */
 	    }
 
-	    MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, 
+	    ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, 
 			 value, &flag);
 	    if (flag) {
 		start_iodev=atoi(value);
@@ -119,15 +119,15 @@
 	       If so, mark it as true in fd->info and turn it on in 
 	       ADIOI_PFS_Open after the file is opened */
 
-	    MPI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, 
+	    ADIOI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, 
 			 value, &flag);
 	    if (flag && (!strcmp(value, "true")))
-		MPI_Info_set(fd->info, "pfs_svr_buf", "true");
-	    else MPI_Info_set(fd->info, "pfs_svr_buf", "false");
+		ADIOI_Info_set(fd->info, "pfs_svr_buf", "true");
+	    else ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");
 
 	    ADIOI_Free(value);
 	}
-	else MPI_Info_set(fd->info, "pfs_svr_buf", "false");
+	else ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");
 	
 	/* set the values for collective I/O and data sieving parameters */
 	ADIOI_GEN_SetInfo(fd, users_info, error_code);
@@ -144,23 +144,23 @@
 	if (users_info != MPI_INFO_NULL) {
 	    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
 
-	    MPI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, 
+	    ADIOI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL, 
 			 value, &flag);
 	    if (flag && (!strcmp(value, "true") || !strcmp(value, "false"))) {
 		value_in_fd = (char *) 
                           ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
-		MPI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL, 
+		ADIOI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL, 
 			 value_in_fd, &flag);
 		if (strcmp(value, value_in_fd)) {
 		    if (!strcmp(value, "true")) {
 			err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, TRUE);
 			if (!err) 
-			    MPI_Info_set(fd->info, "pfs_svr_buf", "true");
+			    ADIOI_Info_set(fd->info, "pfs_svr_buf", "true");
 		    }
 		    else {
 			err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, FALSE);
 			if (!err) 
-			    MPI_Info_set(fd->info, "pfs_svr_buf", "false");
+			    ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");
 		    }
 		}
 		ADIOI_Free(value_in_fd);
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_open.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_open.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_open.c	2010-11-16 09:16:23.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_open.c	2010-11-15 15:02:47.000000000 +0100
@@ -49,11 +49,11 @@
            to ADIOI_PFS_SetInfo. Turn it on now, since we now have a 
            valid file descriptor. */
 
-	MPI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL, 
+	ADIOI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL, 
 		     value, &flag);
 	if (flag && (!strcmp(value, "true"))) {
 	    err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, TRUE);
-	    if (err) MPI_Info_set(fd->info, "pfs_svr_buf", "false");
+	    if (err) ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");
 	}
 
         /* get file striping information and set it in info */
@@ -61,13 +61,13 @@
 
 	if (!err) {
 	    ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", attr.s_sunitsize);
-	    MPI_Info_set(fd->info, "striping_unit", value);
+	    ADIOI_Info_set(fd->info, "striping_unit", value);
 
 	    ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", attr.s_sfactor);
-	    MPI_Info_set(fd->info, "striping_factor", value);
+	    ADIOI_Info_set(fd->info, "striping_factor", value);
 
 	    ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", attr.s_start_sdir);
-	    MPI_Info_set(fd->info, "start_iodevice", value);
+	    ADIOI_Info_set(fd->info, "start_iodevice", value);
 	}
 	ADIOI_Free(value);
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.c	2010-11-16 09:16:08.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.c	2010-11-15 15:02:47.000000000 +0100
@@ -33,4 +33,5 @@
     ADIOI_GEN_Flush, /* Flush */
     ADIOI_GEN_Resize, /* Resize */
     ADIOI_GEN_Delete, /* Delete */
+    ADIOI_PIOFS_Feature, 
 };
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_piofs: ad_piofs_features.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.h	2010-11-16 09:16:08.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.h	2010-11-15 15:02:47.000000000 +0100
@@ -35,4 +35,6 @@
 		       *error_code);
 void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
 
+void ADIOI_PIOFS_Feature(ADIO_File fd, int flag);
+
 #endif
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_hints.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_hints.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_hints.c	2010-11-16 09:16:08.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_hints.c	2010-11-15 15:02:47.000000000 +0100
@@ -25,7 +25,7 @@
 	if (users_info != MPI_INFO_NULL) {
 	    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
 
-	    MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, 
+	    ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, 
 			 value, &flag);
 	    if (flag) {
 		str_factor=atoi(value);
@@ -37,7 +37,7 @@
 		}
 	    }
 
-	    MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, 
+	    ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, 
 			 value, &flag);
 	    if (flag) {
 		str_unit=atoi(value);
@@ -49,7 +49,7 @@
 		}
 	    }
 
-	    MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, 
+	    ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, 
 			 value, &flag);
 	    if (flag) {
 		start_iodev=atoi(value);
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_open.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_open.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_open.c	2010-11-16 09:16:08.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_open.c	2010-11-15 15:02:47.000000000 +0100
@@ -49,13 +49,13 @@
 
 	if (!err) {
 	    ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", piofs_fstat.st_bsu);
-	    MPI_Info_set(fd->info, "striping_unit", value);
+	    ADIOI_Info_set(fd->info, "striping_unit", value);
 
 	    ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", piofs_fstat.st_cells);
-	    MPI_Info_set(fd->info, "striping_factor", value);
+	    ADIOI_Info_set(fd->info, "striping_factor", value);
 
 	    ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", piofs_fstat.st_base_node);
-	    MPI_Info_set(fd->info, "start_iodevice", value);
+	    ADIOI_Info_set(fd->info, "start_iodevice", value);
 	}
 	ADIOI_Free(value);
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_piofs/Makefile.am NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_piofs/Makefile.am
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_piofs/Makefile.am	2010-11-16 09:16:08.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_piofs/Makefile.am	2010-11-15 15:03:30.000000000 +0100
@@ -25,6 +25,7 @@
         ad_piofs.c \
         ad_piofs.h \
         ad_piofs_fcntl.c \
+        ad_piofs_features.c \
         ad_piofs_hints.c \
         ad_piofs_open.c \
         ad_piofs_read.c \
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs.c	2010-11-16 09:16:06.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs.c	2010-11-15 15:02:47.000000000 +0100
@@ -33,4 +33,5 @@
     ADIOI_PVFS_Flush, /* Flush */
     ADIOI_PVFS_Resize, /* Resize */
     ADIOI_PVFS_Delete, /* Delete */
+    ADIOI_PVFS_Feature, /* Features */
 };
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_hints.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_hints.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_hints.c	2010-11-16 09:16:06.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_hints.c	2010-11-15 15:02:47.000000000 +0100
@@ -17,8 +17,8 @@
 	/* This must be part of the open call. can set striping parameters 
            if necessary. */ 
 	MPI_Info_create(&(fd->info));
-	MPI_Info_set(fd->info, "romio_pvfs_listio_read", "disable");
-	MPI_Info_set(fd->info, "romio_pvfs_listio_write", "disable");
+	ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", "disable");
+	ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", "disable");
 	fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_DISABLE;
 	fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_DISABLE;
 	
@@ -27,7 +27,7 @@
 	if (users_info != MPI_INFO_NULL) {
 	    value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
 
-	    MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, 
+	    ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, 
 			 value, &flag);
 	    if (flag) {
 		str_factor=atoi(value);
@@ -41,10 +41,10 @@
 		    return;
 		    /* --END ERROR HANDLING-- */
 		}
-		else MPI_Info_set(fd->info, "striping_factor", value);
+		else ADIOI_Info_set(fd->info, "striping_factor", value);
 	    }
 
-	    MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, 
+	    ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, 
 			 value, &flag);
 	    if (flag) {
 		str_unit=atoi(value);
@@ -58,10 +58,10 @@
 		    return;
 		    /* --END ERROR HANDLING-- */
 		}
-		else MPI_Info_set(fd->info, "striping_unit", value);
+		else ADIOI_Info_set(fd->info, "striping_unit", value);
 	    }
 
-	    MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, 
+	    ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, 
 			 value, &flag);
 	    if (flag) {
 		start_iodev=atoi(value);
@@ -75,25 +75,25 @@
 		    return;
 		    /* --END ERROR HANDLING-- */
 		}
-		else MPI_Info_set(fd->info, "start_iodevice", value);
+		else ADIOI_Info_set(fd->info, "start_iodevice", value);
 	    }
 
-	    MPI_Info_get(users_info, "romio_pvfs_listio_read",
+	    ADIOI_Info_get(users_info, "romio_pvfs_listio_read",
 			 MPI_MAX_INFO_VAL,
 			 value, &flag);
 	    if (flag) {
 		if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) 
 		{
-		    MPI_Info_set(fd->info, "romio_pvfs_listio_read", value);
+		    ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", value);
 		    fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_ENABLE;
 		} else if ( !strcmp(value, "disable") || !strcmp(value, "DISABLE")) 
 		{
-		    MPI_Info_set(fd->info , "romio_pvfs_listio_read", value);
+		    ADIOI_Info_set(fd->info , "romio_pvfs_listio_read", value);
 		    fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_DISABLE;
 		}
 		else if ( !strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) 
 		{
-		    MPI_Info_set(fd->info, "romio_pvfs_listio_read", value);
+		    ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", value);
 		    fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_AUTO;
 		}
 		tmp_val = fd->hints->fs_hints.pvfs.listio_read;
@@ -107,21 +107,21 @@
 		    /* --END ERROR HANDLING-- */
 		}
 	    }
-	    MPI_Info_get(users_info, "romio_pvfs_listio_write", MPI_MAX_INFO_VAL,
+	    ADIOI_Info_get(users_info, "romio_pvfs_listio_write", MPI_MAX_INFO_VAL,
 			 value, &flag);
 	    if (flag) {
 		if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE")) 
 		{
-		    MPI_Info_set(fd->info, "romio_pvfs_listio_write", value);
+		    ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value);
 		    fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_ENABLE;
 		} else if ( !strcmp(value, "disable") || !strcmp(value, "DISABLE")) 
 		{
-		    MPI_Info_set(fd->info, "romio_pvfs_listio_write", value);
+		    ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value);
 		    fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_DISABLE;
 		}
 		else if ( !strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC")) 
 		{
-		    MPI_Info_set(fd->info, "romio_pvfs_listio_write", value);
+		    ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value);
 		    fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_AUTO;
 		}
 		tmp_val = fd->hints->fs_hints.pvfs.listio_write;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_open.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_open.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_open.c	2010-11-16 09:16:06.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_open.c	2010-11-15 15:02:47.000000000 +0100
@@ -37,15 +37,15 @@
 
     value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
 
-    MPI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL, 
+    ADIOI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL, 
 		 value, &flag);
     if (flag && (atoi(value) > 0)) pstat.pcount = atoi(value);
 
-    MPI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL, 
+    ADIOI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL, 
 		 value, &flag);
     if (flag && (atoi(value) > 0)) pstat.ssize = atoi(value);
 
-    MPI_Info_get(fd->info, "start_iodevice", MPI_MAX_INFO_VAL, 
+    ADIOI_Info_get(fd->info, "start_iodevice", MPI_MAX_INFO_VAL, 
 		 value, &flag);
     if (flag && (atoi(value) >= 0)) pstat.base = atoi(value);
 
@@ -71,11 +71,11 @@
     if (fd->fd_sys != -1) {
 	pvfs_ioctl(fd->fd_sys, GETMETA, &pstat);
 	ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", pstat.pcount);
-	MPI_Info_set(fd->info, "striping_factor", value);
+	ADIOI_Info_set(fd->info, "striping_factor", value);
 	ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", pstat.ssize);
-	MPI_Info_set(fd->info, "striping_unit", value);
+	ADIOI_Info_set(fd->info, "striping_unit", value);
 	ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", pstat.base);
-	MPI_Info_set(fd->info, "start_iodevice", value);
+	ADIOI_Info_set(fd->info, "start_iodevice", value);
     }
 
     ADIOI_Free(value);
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_read.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_read.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_read.c	2010-11-16 09:16:06.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_read.c	2010-11-15 15:02:47.000000000 +0100
@@ -43,7 +43,8 @@
 #ifdef ADIOI_MPE_LOGGING
         MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
 #endif
-	fd->fp_sys_posn = offset + err;
+	if (err>0)
+		fd->fp_sys_posn = offset + err;
 	/* individual file pointer not updated */        
     }
     else {  /* read from curr. location of ind. file pointer */
@@ -63,7 +64,8 @@
 #ifdef ADIOI_MPE_LOGGING
         MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
 #endif
-	fd->fp_ind += err; 
+	if (err > 0)
+		fd->fp_ind += err; 
 	fd->fp_sys_posn = fd->fp_ind;
     }         
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_write.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_write.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_write.c	2010-11-16 09:16:06.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_write.c	2010-11-15 15:02:47.000000000 +0100
@@ -43,7 +43,8 @@
 #ifdef ADIOI_MPE_LOGGING
         MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
 #endif
-	fd->fp_sys_posn = offset + err;
+	if (err > 0)
+		fd->fp_sys_posn = offset + err;
 	/* individual file pointer not updated */        
     }
     else { /* write from curr. location of ind. file pointer */
@@ -63,7 +64,8 @@
 #ifdef ADIOI_MPE_LOGGING
         MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
 #endif
-	fd->fp_ind += err;
+	if (err > 0)
+		fd->fp_ind += err;
 	fd->fp_sys_posn = fd->fp_ind;
     }
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_aio.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_aio.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_aio.c	2010-11-16 09:16:27.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_aio.c	2010-11-15 15:03:31.000000000 +0100
@@ -17,7 +17,6 @@
 #define READ 0
 #define WRITE 1
 
-#ifdef ROMIO_HAVE_WORKING_AIO
 static int ADIOI_PVFS2_greq_class = 0;
 int ADIOI_PVFS2_aio_free_fn(void *extra_state);
 int ADIOI_PVFS2_aio_poll_fn(void *extra_state, MPI_Status *status);
@@ -168,12 +167,10 @@
     aio_req = (ADIOI_AIO_Request *)extra_state;
 
     /* BUG: cannot PVFS_sys_testsome: does not work for a specific request */
-    ret = PVFS_sys_wait(aio_req->op_id, __FUNCTION__, &error);
+    ret = PVFS_sys_wait(aio_req->op_id, "ADIOI_PVFS2_aio_poll_fn", &error);
     if (ret == 0) {
 	aio_req->nbytes = aio_req->resp_io.total_completed;
-	MPIR_Nest_incr();
 	MPI_Grequest_complete(aio_req->req);
-	MPIR_Nest_decr();
 	return MPI_SUCCESS;
     } else
 	return MPI_UNDEFINED; /* TODO: what's this error? */
@@ -186,7 +183,7 @@
 
     ADIOI_AIO_Request **aio_reqlist;
     PVFS_sys_op_id *op_id_array;
-    int i,j, greq_count;
+    int i,j, greq_count, completed_count=0;
     int *error_array;
 
     aio_reqlist = (ADIOI_AIO_Request **)array_of_states;
@@ -195,25 +192,27 @@
     error_array = (int *)ADIOI_Calloc(count, sizeof(int));
     greq_count = count;
 
+
     /* PVFS-2.6: testsome actually tests all requests and fills in op_id_array
      * with the ones that have completed.  count is an in/out parameter.
      * returns with the number of completed operations.  what a mess! */
-    PVFS_sys_testsome(op_id_array, &count, NULL, error_array, INT_MAX);
-    for (i=0; i< count; i++) {
-	for (j=0; j<greq_count; j++) {
-	    if (op_id_array[i] == aio_reqlist[j]->op_id) {
-		aio_reqlist[j]->nbytes = 
-		    aio_reqlist[j]->resp_io.total_completed;
-		MPIR_Nest_incr();
-		MPI_Grequest_complete(aio_reqlist[j]->req);
-		MPIR_Nest_decr();
+    while (completed_count < greq_count ) {
+	count = greq_count;
+	PVFS_sys_testsome(op_id_array, &count, NULL, error_array, INT_MAX);
+	completed_count += count;
+	for (i=0; i< count; i++) {
+	    for (j=0; j<greq_count; j++) {
+		if (op_id_array[i] == aio_reqlist[j]->op_id) {
+		    aio_reqlist[j]->nbytes = 
+			aio_reqlist[j]->resp_io.total_completed;
+		    MPI_Grequest_complete(aio_reqlist[j]->req);
+		}
 	    }
 	}
     }
     return MPI_SUCCESS; /* TODO: no idea how to deal with errors */
 }
 
-#endif
 
 /*
  * vim: ts=8 sts=4 sw=4 noexpandtab 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.c	2010-11-16 09:16:27.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.c	2010-11-15 15:02:47.000000000 +0100
@@ -12,6 +12,7 @@
 
 struct ADIOI_Fns_struct ADIO_PVFS2_operations = {
     ADIOI_PVFS2_Open, /* Open */
+    ADIOI_SCALEABLE_OpenColl, /* OpenColl */
     ADIOI_PVFS2_ReadContig, /* ReadContig */
     ADIOI_PVFS2_WriteContig, /* WriteContig */
     ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
@@ -22,13 +23,8 @@
     ADIOI_PVFS2_ReadStrided, /* ReadStrided */
     ADIOI_PVFS2_WriteStrided, /* WriteStrided */
     ADIOI_PVFS2_Close, /* Close */
-#ifdef ROMIO_HAVE_WORKING_AIO
     ADIOI_PVFS2_IReadContig, /* IreadContig */
     ADIOI_PVFS2_IWriteContig, /* IwriteContig */
-#else
-    ADIOI_FAKE_IreadContig, /* IreadContig */
-    ADIOI_FAKE_IwriteContig, /* IwriteContig */
-#endif
     ADIOI_FAKE_IODone, /* ReadDone */
     ADIOI_FAKE_IODone, /* WriteDone */
     ADIOI_FAKE_IOComplete, /* ReadComplete */
@@ -38,6 +34,7 @@
     ADIOI_PVFS2_Flush, /* Flush */
     ADIOI_PVFS2_Resize, /* Resize */
     ADIOI_PVFS2_Delete, /* Delete */
+    ADIOI_PVFS2_Feature, 
 };
 
 /* 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_common.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_common.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_common.c	2010-11-16 09:16:27.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_common.c	2010-11-15 15:02:47.000000000 +0100
@@ -42,6 +42,7 @@
 {
     int error_code;
     ADIOI_PVFS2_End(&error_code);
+    MPI_Keyval_free(&keyval);
     return error_code;
 }
 
@@ -81,7 +82,7 @@
 		      &ADIOI_PVFS2_Initialized, (void *)0); 
     /* just like romio does, we make a dummy attribute so we 
      * get cleaned up */
-    MPI_Attr_put(MPI_COMM_WORLD, ADIOI_PVFS2_Initialized, (void *)0);
+    MPI_Attr_put(MPI_COMM_SELF, ADIOI_PVFS2_Initialized, (void *)0);
 }
 
 void ADIOI_PVFS2_makeattribs(PVFS_sys_attr * attribs)
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2: ad_pvfs2_features.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.h	2010-11-16 09:16:27.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.h	2010-11-15 15:02:47.000000000 +0100
@@ -17,7 +17,6 @@
 #include "pvfs2-compat.h"
 #endif
 
-
 void ADIOI_PVFS2_Open(ADIO_File fd, int *error_code);
 void ADIOI_PVFS2_Close(ADIO_File fd, int *error_code);
 void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count, 
@@ -42,6 +41,8 @@
 void ADIOI_PVFS2_Delete(char *filename, int *error_code);
 void ADIOI_PVFS2_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
 void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
+int ADIOI_PVFS2_Feature(ADIO_File fd, int flag);
+
 void ADIOI_PVFS2_IReadContig(ADIO_File fd, void *buf, int count, 
 			    MPI_Datatype datatype, int file_ptr_type,
 			    ADIO_Offset offset, MPI_Request *request,
@@ -54,4 +55,12 @@
 			    MPI_Datatype datatype, int file_ptr_type,
 			    ADIO_Offset offset, MPI_Request *request,
 			    int flag, int *error_code);
+void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, void *buf, int count,
+		       MPI_Datatype datatype, int file_ptr_type,
+		       ADIO_Offset offset, ADIO_Status *status, int
+		       *error_code);
+void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count,
+		       MPI_Datatype datatype, int file_ptr_type,
+		       ADIO_Offset offset, ADIO_Status *status, int
+		       *error_code);
 #endif
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_hints.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_hints.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_hints.c	2010-11-16 09:16:27.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_hints.c	2010-11-15 15:02:47.000000000 +0100
@@ -17,20 +17,37 @@
     if ((fd->info) == MPI_INFO_NULL) {
 	/* part of the open call */
 	MPI_Info_create(&(fd->info));
-	MPI_Info_set(fd->info, "romio_pvfs2_debugmask", "0");
+	ADIOI_Info_set(fd->info, "romio_pvfs2_debugmask", "0");
 	fd->hints->fs_hints.pvfs2.debugmask = 0;
 
-	MPI_Info_set(fd->info, "striping_factor", "0");
+	ADIOI_Info_set(fd->info, "striping_factor", "0");
 	fd->hints->striping_factor = 0;
 
-	MPI_Info_set(fd->info, "striping_unit", "0");
+	ADIOI_Info_set(fd->info, "striping_unit", "0");
 	fd->hints->striping_unit = 0;
+
+	/* disable the aggressive strided optimizations by default */
+        ADIOI_Info_set(fd->info, "romio_pvfs2_posix_read", "disable");
+        ADIOI_Info_set(fd->info, "romio_pvfs2_posix_write", "disable");
+        fd->hints->fs_hints.pvfs2.posix_read = ADIOI_HINT_DISABLE;
+        fd->hints->fs_hints.pvfs2.posix_write = ADIOI_HINT_DISABLE;
+
+        ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_read", "disable");
+        ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_write", "disable");
+        fd->hints->fs_hints.pvfs2.dtype_read = ADIOI_HINT_DISABLE;
+        fd->hints->fs_hints.pvfs2.dtype_write = ADIOI_HINT_DISABLE;
+
+        ADIOI_Info_set(fd->info, "romio_pvfs2_listio_read", "disable");
+        ADIOI_Info_set(fd->info, "romio_pvfs2_listio_write", "disable");
+        fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_DISABLE;
+        fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_DISABLE;
+
 	
 	/* any user-provided hints? */
 	if (users_info != MPI_INFO_NULL) {
 	    /* pvfs2 debugging */
 	    value = (char *) ADIOI_Malloc( (MPI_MAX_INFO_VAL+1)*sizeof(char));
-	    MPI_Info_get(users_info, "romio_pvfs2_debugmask", 
+	    ADIOI_Info_get(users_info, "romio_pvfs2_debugmask", 
 		    MPI_MAX_INFO_VAL, value, &flag);
 	    if (flag) {
 		tmp_value = fd->hints->fs_hints.pvfs2.debugmask = 
@@ -46,11 +63,11 @@
 		}
 		/* --END ERROR HANDLING-- */
 		
-		MPI_Info_set(fd->info, "romio_pvfs2_debugmask", value);
+		ADIOI_Info_set(fd->info, "romio_pvfs2_debugmask", value);
 	    }
 
 	    /* the striping factor */
-	    MPI_Info_get(users_info, "striping_factor", 
+	    ADIOI_Info_get(users_info, "striping_factor", 
 		    MPI_MAX_INFO_VAL, value, &flag);
 	    if (flag) {
 		tmp_value = fd->hints->striping_factor =  atoi(value);
@@ -65,11 +82,11 @@
 		}
 		/* --END ERROR HANDLING-- */
 		
-		MPI_Info_set(fd->info, "striping_factor", value);
+		ADIOI_Info_set(fd->info, "striping_factor", value);
 	    }
 
 	    /* the striping unit */
-	    MPI_Info_get(users_info, "striping_unit",
+	    ADIOI_Info_get(users_info, "striping_unit",
 		    MPI_MAX_INFO_VAL, value, &flag);
 	    if (flag) {
 		tmp_value = fd->hints->striping_unit = atoi(value);
@@ -83,16 +100,167 @@
 		}
 		/* --END ERROR HANDLING-- */
 
-		MPI_Info_set(fd->info, "striping_unit", value);
+		ADIOI_Info_set(fd->info, "striping_unit", value);
 	    }
 
 	    /* distribution name */
-	    MPI_Info_get(users_info, "romio_pvfs2_distribution_name",
+	    ADIOI_Info_get(users_info, "romio_pvfs2_distribution_name",
 		    MPI_MAX_INFO_VAL, value, &flag);
 	    if (flag) {
 	    }
 
+
+	    /* POSIX read */
+            ADIOI_Info_get(users_info, "romio_pvfs2_posix_read",
+                         MPI_MAX_INFO_VAL, value, &flag);
+            if (flag) {
+                if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
+                {
+                    ADIOI_Info_set(fd->info, "romio_pvfs2_posix_read", value);
+                    fd->hints->fs_hints.pvfs2.posix_read = ADIOI_HINT_ENABLE;
+                }
+                else if ( !strcmp(value, "disable") ||
+                          !strcmp(value, "DISABLE"))
+                {
+                    ADIOI_Info_set(fd->info , "romio_pvfs2_posix_read", value);
+                    fd->hints->fs_hints.pvfs2.posix_read = ADIOI_HINT_DISABLE;
+                }
+                tmp_value = fd->hints->fs_hints.pvfs2.posix_read;
+                MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
+                if (tmp_value != fd->hints->fs_hints.pvfs2.posix_read) {
+                    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+                                                       "posix_read",
+                                                       error_code);
+                    return;
+                }
+            }
+
+            /* POSIX write */
+            ADIOI_Info_get(users_info, "romio_pvfs2_posix_write",
+                         MPI_MAX_INFO_VAL, value, &flag);
+            if (flag) {
+                if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
+                {
+                    ADIOI_Info_set(fd->info, "romio_pvfs2_posix_write", value);
+                    fd->hints->fs_hints.pvfs2.posix_write = ADIOI_HINT_ENABLE;
+                }
+                else if ( !strcmp(value, "disable") ||
+                          !strcmp(value, "DISABLE"))
+                {
+                    ADIOI_Info_set(fd->info , "romio_pvfs2_posix_write", value);
+                    fd->hints->fs_hints.pvfs2.posix_write = ADIOI_HINT_DISABLE;
+                }
+                tmp_value = fd->hints->fs_hints.pvfs2.posix_write;
+                MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
+                if (tmp_value != fd->hints->fs_hints.pvfs2.posix_write) {
+                    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+                                                       "posix_write",
+                                                       error_code);
+                    return;
+                }
+            }
+
+	    /* Datatype read */
+            ADIOI_Info_get(users_info, "romio_pvfs2_dtype_read",
+                         MPI_MAX_INFO_VAL, value, &flag);
+            if (flag) {
+                if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
+                {
+                    ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_read", value);
+                    fd->hints->fs_hints.pvfs2.dtype_read = ADIOI_HINT_ENABLE;
+                }
+                else if ( !strcmp(value, "disable") ||
+                          !strcmp(value, "DISABLE"))
+                {
+                    ADIOI_Info_set(fd->info , "romio_pvfs2_dtype_read", value);
+                    fd->hints->fs_hints.pvfs2.dtype_read = ADIOI_HINT_DISABLE;
+                }
+                tmp_value = fd->hints->fs_hints.pvfs2.dtype_read;
+                MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
+                if (tmp_value != fd->hints->fs_hints.pvfs2.dtype_read) {
+                    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+                                                       "dtype_read",
+                                                       error_code);
+                    return;
+                }
+            }
+
+            /* Datatype write */
+            ADIOI_Info_get(users_info, "romio_pvfs2_dtype_write",
+                         MPI_MAX_INFO_VAL, value, &flag);
+            if (flag) {
+                if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
+                {
+                    ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_write", value);
+                    fd->hints->fs_hints.pvfs2.dtype_write = ADIOI_HINT_ENABLE;
+                }
+                else if ( !strcmp(value, "disable") ||
+                          !strcmp(value, "DISABLE"))
+                {
+                    ADIOI_Info_set(fd->info , "romio_pvfs2_dtype_write", value);
+                    fd->hints->fs_hints.pvfs2.dtype_write = ADIOI_HINT_DISABLE;
+                }
+                tmp_value = fd->hints->fs_hints.pvfs2.dtype_write;
+                MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
+                if (tmp_value != fd->hints->fs_hints.pvfs2.dtype_write) {
+                    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+                                                       "dtype_write",
+                                                       error_code);
+                    return;
+                }
+            }
+
+	    /* Listio read */
+            ADIOI_Info_get(users_info, "romio_pvfs2_listio_read",
+                         MPI_MAX_INFO_VAL, value, &flag);
+            if (flag) {
+                if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
+                {
+                    ADIOI_Info_set(fd->info, "romio_pvfs2_listio_read", value);
+                    fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_ENABLE;
+                }
+                else if ( !strcmp(value, "disable") ||
+                          !strcmp(value, "DISABLE"))
+                {
+                    ADIOI_Info_set(fd->info , "romio_pvfs2_listio_read", value);
+                    fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_DISABLE;
+                }
+                tmp_value = fd->hints->fs_hints.pvfs2.listio_read;
+                MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
+                if (tmp_value != fd->hints->fs_hints.pvfs2.listio_read) {
+                    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+                                                       "listio_read",
+                                                       error_code);
+                    return;
+                }
+            }
+
+            /* Datatype write */
+            ADIOI_Info_get(users_info, "romio_pvfs2_listio_write",
+                         MPI_MAX_INFO_VAL, value, &flag);
+            if (flag) {
+                if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
+                {
+                    ADIOI_Info_set(fd->info, "romio_pvfs2_listio_write", value);
+                    fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_ENABLE;
+                }
+                else if ( !strcmp(value, "disable") ||
+                          !strcmp(value, "DISABLE"))
+                {
+                    ADIOI_Info_set(fd->info , "romio_pvfs2_listio_write", value);
+                    fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_DISABLE;
+                }
+                tmp_value = fd->hints->fs_hints.pvfs2.listio_write;
+                MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
+                if (tmp_value != fd->hints->fs_hints.pvfs2.listio_write) {
+                    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+                                                       "listio_write",
+                                                       error_code);
+                    return;
+                }
+            }
             ADIOI_Free(value);
+
 	}
     }
     /* set the values for collective I/O and data sieving parameters */
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2: ad_pvfs2_io_dtype.c
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2: ad_pvfs2_io.h
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2: ad_pvfs2_io_list.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read.c	2010-11-16 09:16:27.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read.c	2010-11-15 15:02:47.000000000 +0100
@@ -8,7 +8,7 @@
 #include "adio.h"
 #include "adio_extern.h"
 #include "ad_pvfs2.h"
-
+#include "ad_pvfs2_io.h"
 #include "ad_pvfs2_common.h"
 
 void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count, 
@@ -92,899 +92,77 @@
     return;
 }
 
+static int ADIOI_PVFS2_ReadStridedListIO(ADIO_File fd, void *buf, int count,
+				  MPI_Datatype datatype, int file_ptr_type,
+				  ADIO_Offset offset, ADIO_Status *status,
+				  int *error_code)
+{
+    return ADIOI_PVFS2_StridedListIO(fd, buf, count,
+				     datatype, file_ptr_type,
+				     offset, status,
+				     error_code, READ);
+}
+
+static int ADIOI_PVFS2_ReadStridedDtypeIO(ADIO_File fd, void *buf, int count,
+				   MPI_Datatype datatype, int file_ptr_type,
+				   ADIO_Offset offset, ADIO_Status *status, 
+				   int *error_code)
+{
+    return ADIOI_PVFS2_StridedDtypeIO(fd, buf, count,
+				      datatype, file_ptr_type,
+				      offset, status, error_code,
+				      READ);
+}
 
 void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count,
 			     MPI_Datatype datatype, int file_ptr_type,
 			     ADIO_Offset offset, ADIO_Status *status, int
 			     *error_code)
 {
-    /* offset is in units of etype relative to the filetype. */
-    ADIOI_Flatlist_node *flat_buf, *flat_file;
-    int i, j, k,  brd_size, frd_size=0, st_index=0;
-    int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
-    int n_filetypes, etype_in_filetype;
-    ADIO_Offset abs_off_in_filetype=0;
-    int filetype_size, etype_size, buftype_size;
-    MPI_Aint filetype_extent, buftype_extent; 
-    int buf_count, buftype_is_contig, filetype_is_contig;
-    ADIO_Offset off, disp, start_off, initial_off;
-    int flag, st_frd_size, st_n_filetypes;
-
-    int mem_list_count, file_list_count;
-    PVFS_size *mem_offsets;
-    int64_t *file_offsets;
-    int *mem_lengths;
-    int32_t *file_lengths;
-    int total_blks_to_read;
-
-    int max_mem_list, max_file_list;
-
-    int b_blks_read;
-    int f_data_read;
-    int size_read=0, n_read_lists, extra_blks;
-
-    int end_brd_size, end_frd_size;
-    int start_k, start_j, new_file_read, new_buffer_read;
-    int start_mem_offset;
-    PVFS_Request mem_req, file_req;
-    ADIOI_PVFS2_fs * pvfs_fs;
-    PVFS_sysresp_io resp_io;
-    int err_flag=0;
-    MPI_Offset total_bytes_read = 0;
-    static char myname[] = "ADIOI_PVFS2_ReadStrided";
-
-#define MAX_ARRAY_SIZE 64
-
-    *error_code = MPI_SUCCESS;  /* changed below if error */
-
-    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
-    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
-
-    /* the HDF5 tests showed a bug in this list processing code (see many many
-     * lines down below).  We added a workaround, but common HDF5 file types
-     * are actually contiguous and do not need the expensive workarond */
-    if (!filetype_is_contig) {
-	flat_file = ADIOI_Flatlist;
-	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
-	if (flat_file->count == 1 && !buftype_is_contig)
-	    filetype_is_contig = 1;
-    }
-
-    MPI_Type_size(fd->filetype, &filetype_size);
-    if ( ! filetype_size ) {
-	*error_code = MPI_SUCCESS; 
+    /* four ways (to date) that we can carry out strided i/o accesses:
+     * - naive posix
+     * - 'true' Datatype (from avery)
+     * - new List I/O (from avery)
+     * - classic List I/O  (the one that's always been in ROMIO)
+     * I imagine we'll keep Datatype as an optional optimization, and afer a
+     * release or two promote it to the default 
+     */
+    int ret = -1;
+
+    if (fd->hints->fs_hints.pvfs2.posix_read == ADIOI_HINT_ENABLE) {
+	ADIOI_GEN_ReadStrided(fd, buf, count, datatype, 
+		file_ptr_type, offset, status, error_code);
 	return;
     }
-
-    MPI_Type_extent(fd->filetype, &filetype_extent);
-    MPI_Type_size(datatype, &buftype_size);
-    MPI_Type_extent(datatype, &buftype_extent);
-    etype_size = fd->etype_size;
-
-    bufsize = buftype_size * count;
-    
-    pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;
-
-    if (!buftype_is_contig && filetype_is_contig) {
-
-/* noncontiguous in memory, contiguous in file. */
-        int64_t file_offsets;
-	int32_t file_lengths;
-
-	ADIOI_Flatten_datatype(datatype);
-	flat_buf = ADIOI_Flatlist;
-	while (flat_buf->type != datatype) flat_buf = flat_buf->next;
-
-	off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
-	    fd->disp + etype_size * offset;
-
-	file_list_count = 1;
-	file_offsets = off;
-	file_lengths = 0;
-	total_blks_to_read = count*flat_buf->count;
-	b_blks_read = 0;
-
-	/* allocate arrays according to max usage */
-	if (total_blks_to_read > MAX_ARRAY_SIZE)
-	    mem_list_count = MAX_ARRAY_SIZE;
-	else mem_list_count = total_blks_to_read;
-	mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size));
-	mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int));
-
-	/* TODO: CHECK RESULTS OF MEMORY ALLOCATION */
-
-	j = 0;
-	/* step through each block in memory, filling memory arrays */
-	while (b_blks_read < total_blks_to_read) {
-	    for (i=0; i<flat_buf->count; i++) {
-		mem_offsets[b_blks_read % MAX_ARRAY_SIZE] = 
-		    /* TODO: fix this compiler warning */
-		    ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]);
-		mem_lengths[b_blks_read % MAX_ARRAY_SIZE] = 
-		    flat_buf->blocklens[i];
-		file_lengths += flat_buf->blocklens[i];
-		b_blks_read++;
-		if (!(b_blks_read % MAX_ARRAY_SIZE) ||
-		    (b_blks_read == total_blks_to_read)) {
-
-		    /* in the case of the last read list call,
-		       adjust mem_list_count */
-		    if (b_blks_read == total_blks_to_read) {
-		        mem_list_count = total_blks_to_read % MAX_ARRAY_SIZE;
-			/* in case last read list call fills max arrays */
-			if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
-		    }
-		    err_flag = PVFS_Request_hindexed(mem_list_count, 
-			    mem_lengths, mem_offsets, PVFS_BYTE, &mem_req);
-		    if (err_flag < 0) break;
-		    err_flag = PVFS_Request_contiguous(file_lengths, 
-			    PVFS_BYTE, &file_req);
-		    if (err_flag < 0) break;
-#ifdef ADIOI_MPE_LOGGING
-                    MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
-#endif
-		    err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 
-			    file_offsets, PVFS_BOTTOM, mem_req, 
-			    &(pvfs_fs->credentials), &resp_io);
-#ifdef ADIOI_MPE_LOGGING
-                    MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
-#endif
-		    /* --BEGIN ERROR HANDLING-- */
-		    if (err_flag != 0) {
-			*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-							   MPIR_ERR_RECOVERABLE,
-							   myname, __LINE__,
-							   ADIOI_PVFS2_error_convert(err_flag),
-							   "Error in PVFS_sys_read", 0);
-			goto error_state;
-		    }
-		    PVFS_Request_free(&mem_req);
-		    PVFS_Request_free(&file_req);
-		    total_bytes_read += resp_io.total_completed;
-		    /* --END ERROR HANDLING-- */
-		  
-		    /* in the case of error or the last read list call, 
-		     * leave here */
-		    if (err_flag || b_blks_read == total_blks_to_read) break;
-
-		    file_offsets += file_lengths;
-		    file_lengths = 0;
-		} 
-	    } /* for (i=0; i<flat_buf->count; i++) */
-	    j++;
-	} /* while (b_blks_read < total_blks_to_read) */
-	ADIOI_Free(mem_offsets);
-	ADIOI_Free(mem_lengths);
-
-        if (file_ptr_type == ADIO_INDIVIDUAL) 
-	    fd->fp_ind += total_bytes_read;
-
-	fd->fp_sys_posn = -1;  /* set it to null. */
-
-#ifdef HAVE_STATUS_SET_BYTES
-	MPIR_Status_set_bytes(status, datatype, bufsize);
-	/* This isa temporary way of filling in status.  The right way is to
-	   keep tracke of how much data was actually read adn placed in buf
-	   by ADIOI_BUFFERED_READ. */
-#endif
-	ADIOI_Delete_flattened(datatype);
-
+    if (fd->hints->fs_hints.pvfs2.dtype_read == ADIOI_HINT_ENABLE) {
+        ret = ADIOI_PVFS2_ReadStridedDtypeIO(fd, buf, count,
+                                             datatype, file_ptr_type,
+                                             offset, status, error_code);
+
+        /* Fall back to list I/O if datatype I/O didn't work */
+        if (ret != 0)
+        {
+            fprintf(stderr,
+                    "Falling back to list I/O since datatype I/O failed\n");
+            ret = ADIOI_PVFS2_ReadStridedListIO(fd, buf, count,
+                                                datatype, file_ptr_type,
+                                                offset, status, error_code);
+        }
+        return;
+    }
+    if (fd->hints->fs_hints.pvfs2.listio_read == ADIOI_HINT_ENABLE) {
+	ret = ADIOI_PVFS2_ReadStridedListIO(fd, buf, count, datatype,
+		file_ptr_type, offset, status, error_code);
 	return;
-    } /* if (!buftype_is_contig && filetype_is_contig) */
-
-    /* know file is noncontiguous from above */
-    /* noncontiguous in file */
-
-    /* filetype already flattened in ADIO_Open */
-    flat_file = ADIOI_Flatlist;
-    while (flat_file->type != fd->filetype) flat_file = flat_file->next;
-
-    disp = fd->disp;
-    initial_off = offset;
-
-
-    /* for each case - ADIO_Individual pointer or explicit, find the file
-       offset in bytes (offset), n_filetypes (how many filetypes into
-       file to start), frd_size (remaining amount of data in present
-       file block), and st_index (start point in terms of blocks in
-       starting filetype) */
-    if (file_ptr_type == ADIO_INDIVIDUAL) {
-        offset = fd->fp_ind; /* in bytes */
-	n_filetypes = -1;
-	flag = 0;
-	while (!flag) {
-	    n_filetypes++;
-	    for (i=0; i<flat_file->count; i++) {
-	        if (disp + flat_file->indices[i] + 
-		    ((ADIO_Offset) n_filetypes)*filetype_extent +
-		    flat_file->blocklens[i]  >= offset) {
-		    st_index = i;
-		    frd_size = (int) (disp + flat_file->indices[i] + 
-				    ((ADIO_Offset) n_filetypes)*filetype_extent
-				      + flat_file->blocklens[i] - offset);
-		    flag = 1;
-		    break;
-		}
-	    }
-	} /* while (!flag) */
-    } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
-    else {
-        n_etypes_in_filetype = filetype_size/etype_size;
-	n_filetypes = (int) (offset / n_etypes_in_filetype);
-	etype_in_filetype = (int) (offset % n_etypes_in_filetype);
-	size_in_filetype = etype_in_filetype * etype_size;
-	
-	sum = 0;
-	for (i=0; i<flat_file->count; i++) {
-	    sum += flat_file->blocklens[i];
-	    if (sum > size_in_filetype) {
-	        st_index = i;
-		frd_size = sum - size_in_filetype;
-		abs_off_in_filetype = flat_file->indices[i] +
-		    size_in_filetype - (sum - flat_file->blocklens[i]);
-		break;
-	    }
-	}
-	
-	/* abs. offset in bytes in the file */
-	offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent + 
-	    abs_off_in_filetype;
-    } /* else [file_ptr_type != ADIO_INDIVIDUAL] */
-
-    start_off = offset;
-    st_frd_size = frd_size;
-    st_n_filetypes = n_filetypes;
-    
-    if (buftype_is_contig && !filetype_is_contig) {
-
-/* contiguous in memory, noncontiguous in file. should be the most
-   common case. */
-
-        int mem_lengths;
-	char *mem_offsets;
-	
-	i = 0;
-	j = st_index;
-	n_filetypes = st_n_filetypes;
-	
-	mem_list_count = 1;
-	
-	/* determine how many blocks in file to read */
-	f_data_read = ADIOI_MIN(st_frd_size, bufsize);
-	total_blks_to_read = 1;
-	if (j < (flat_file->count-1)) j++;
-	else {
-	    j = 0;
-	    n_filetypes++;
-	}
-	while (f_data_read < bufsize) {
-	    f_data_read += flat_file->blocklens[j];
-	    total_blks_to_read++;
-	    if (j<(flat_file->count-1)) j++;
-	    else j = 0;	
-	}
-      
-	j = st_index;
-	n_filetypes = st_n_filetypes;
-	n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE;
-	extra_blks = total_blks_to_read%MAX_ARRAY_SIZE;
-	
-	mem_offsets = buf;
-	mem_lengths = 0;
-	
-	/* if at least one full readlist, allocate file arrays
-	   at max array size and don't free until very end */
-	if (n_read_lists) {
-	    file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
-						  sizeof(int64_t));
-	    file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
-						  sizeof(int32_t));
-	}
-	/* if there's no full readlist allocate file arrays according
-	   to needed size (extra_blks) */
-	else {
-	    file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
-						  sizeof(int64_t));
-	    file_lengths = (int32_t*)ADIOI_Malloc(extra_blks*
-						  sizeof(int32_t));
-	}
-	
-	/* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
-	for (i=0; i<n_read_lists; i++) {
-	    file_list_count = MAX_ARRAY_SIZE;
-	    if(!i) {
-	        file_offsets[0] = offset;
-		file_lengths[0] = st_frd_size;
-		mem_lengths = st_frd_size;
-	    }
-	    for (k=0; k<MAX_ARRAY_SIZE; k++) {
-	        if (i || k) {
-		    file_offsets[k] = disp + 
-			((ADIO_Offset)n_filetypes)*filetype_extent
-		      + flat_file->indices[j];
-		    file_lengths[k] = flat_file->blocklens[j];
-		    mem_lengths += file_lengths[k];
-		}
-		if (j<(flat_file->count - 1)) j++;
-		else {
-		    j = 0;
-		    n_filetypes++;
-		}
-	    } /* for (k=0; k<MAX_ARRAY_SIZE; k++) */
-	    err_flag = PVFS_Request_contiguous(mem_lengths, 
-					       PVFS_BYTE, &mem_req);
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_Request_contiguous (memory)", 0);
-		goto error_state;
-	    }
-	    /* --END ERROR HANDLING-- */
-
-	    err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, 
-					     file_offsets, PVFS_BYTE,
-					     &file_req);
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_Request_hindexed (file)", 0);
-		goto error_state;
-	    }
-	    /* --END ERROR HANDLING-- */
-
-	    /* PVFS_Request_hindexed already expresses the offsets into the
-	     * file, so we should not pass in an offset if we are using
-	     * hindexed for the file type */
-#ifdef ADIOI_MPE_LOGGING
-            MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
-#endif
-	    err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0, 
-				     mem_offsets, mem_req,
-				     &(pvfs_fs->credentials), &resp_io);
-#ifdef ADIOI_MPE_LOGGING
-            MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
-#endif
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_sys_read", 0);
-		goto error_state;
-	    }
-	    /* --END ERROR HANDING-- */
-	    PVFS_Request_free(&mem_req);
-	    PVFS_Request_free(&file_req);
-
-	    total_bytes_read += resp_io.total_completed;
-
-	    mem_offsets += mem_lengths;
-	    mem_lengths = 0;
-	} /* for (i=0; i<n_read_lists; i++) */
-
-	/* for file arrays smaller than MAX_ARRAY_SIZE (last read_list call) */
-	if (extra_blks) {
-	    file_list_count = extra_blks;
-	    if(!i) {
-	        file_offsets[0] = offset;
-		file_lengths[0] = st_frd_size;
-	    }
-	    for (k=0; k<extra_blks; k++) {
-	        if(i || k) {
-		    file_offsets[k] = disp + 
-			((ADIO_Offset)n_filetypes)*filetype_extent +
-			flat_file->indices[j];
-		    if (k == (extra_blks - 1)) {
-		        file_lengths[k] = bufsize - (int32_t) mem_lengths
-			  - (int32_t) mem_offsets + (int32_t)  buf;
-		    }
-		    else file_lengths[k] = flat_file->blocklens[j];
-		} /* if(i || k) */
-		mem_lengths += file_lengths[k];
-		if (j<(flat_file->count - 1)) j++;
-		else {
-		    j = 0;
-		    n_filetypes++;
-		}
-	    } /* for (k=0; k<extra_blks; k++) */
-	    err_flag = PVFS_Request_contiguous(mem_lengths,
-					       PVFS_BYTE, &mem_req);
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_Request_contiguous (memory)", 0);
-		goto error_state;
-	    }
-	    /* --END ERROR HANDLING-- */
-
-	    err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, 
-		    file_offsets, PVFS_BYTE, &file_req);
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_Request_hindexed (file)", 0);
-		goto error_state;
-	    }
-	    /* --END ERROR HANDLING-- */
-
-	    /* as above, use 0 for 'offset' when using hindexed file type */
-#ifdef ADIOI_MPE_LOGGING
-            MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
-#endif
-	    err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0, 
-		    mem_offsets, mem_req, &(pvfs_fs->credentials), &resp_io);
-#ifdef ADIOI_MPE_LOGGING
-            MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
-#endif
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_sys_read", 0);		
-		goto error_state;
-	    }
-	    /* --END ERROR HANDLING-- */
-	    PVFS_Request_free(&mem_req);
-	    PVFS_Request_free(&file_req);
-	    total_bytes_read += resp_io.total_completed;
-	}
     }
-    else {
-/* noncontiguous in memory as well as in file */
-      
-        ADIOI_Flatten_datatype(datatype);
-	flat_buf = ADIOI_Flatlist;
-	while (flat_buf->type != datatype) flat_buf = flat_buf->next;
-
-	size_read = 0;
-	n_filetypes = st_n_filetypes;
-	frd_size = st_frd_size;
-	brd_size = flat_buf->blocklens[0];
-	buf_count = 0;
-	start_mem_offset = 0;
-	start_k = k = 0;
-	start_j = st_index;
-	max_mem_list = 0;
-	max_file_list = 0;
-
-	/* run through and file max_file_list and max_mem_list so that you 
-	   can allocate the file and memory arrays less than MAX_ARRAY_SIZE
-	   if possible */
-
-	while (size_read < bufsize) {
-	    k = start_k;
-	    new_buffer_read = 0;
-	    mem_list_count = 0;
-	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
-		   (new_buffer_read < bufsize-size_read)) {
-	        /* find mem_list_count and file_list_count such that both are
-		   less than MAX_ARRAY_SIZE, the sum of their lengths are
-		   equal, and the sum of all the data read and data to be
-		   read in the next immediate read list is less than
-		   bufsize */
-	        if(mem_list_count) {
-		    if((new_buffer_read + flat_buf->blocklens[k] + 
-			size_read) > bufsize) {
-		        end_brd_size = new_buffer_read + 
-			    flat_buf->blocklens[k] - (bufsize - size_read);
-			new_buffer_read = bufsize - size_read;
-		    }
-		    else {
-		        new_buffer_read += flat_buf->blocklens[k];
-			end_brd_size = flat_buf->blocklens[k];
-		    }
-		}
-		else {
-		    if (brd_size > (bufsize - size_read)) {
-		        new_buffer_read = bufsize - size_read;
-			brd_size = new_buffer_read;
-		    }
-		    else new_buffer_read = brd_size;
-		}
-		mem_list_count++;
-		k = (k + 1)%flat_buf->count;
-	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
-	       (new_buffer_read < bufsize-size_read)) */
-	    j = start_j;
-	    new_file_read = 0;
-	    file_list_count = 0;
-	    while ((file_list_count < MAX_ARRAY_SIZE) && 
-		   (new_file_read < new_buffer_read)) {
-	        if(file_list_count) {
-		    if((new_file_read + flat_file->blocklens[j]) > 
-		       new_buffer_read) {
-		        end_frd_size = new_buffer_read - new_file_read;
-			new_file_read = new_buffer_read;
-			j--;
-		    }
-		    else {
-		        new_file_read += flat_file->blocklens[j];
-			end_frd_size = flat_file->blocklens[j];
-		    }
-		}
-		else {
-		    if (frd_size > new_buffer_read) {
-		        new_file_read = new_buffer_read;
-			frd_size = new_file_read;
-		    }
-		    else new_file_read = frd_size;
-		}
-		file_list_count++;
-		if (j < (flat_file->count - 1)) j++;
-		else j = 0;
-		
-		k = start_k;
-		if ((new_file_read < new_buffer_read) && 
-		    (file_list_count == MAX_ARRAY_SIZE)) {
-		    new_buffer_read = 0;
-		    mem_list_count = 0;
-		    while (new_buffer_read < new_file_read) {
-		        if(mem_list_count) {
-			    if((new_buffer_read + flat_buf->blocklens[k]) >
-			       new_file_read) {
-			        end_brd_size = new_file_read - new_buffer_read;
-				new_buffer_read = new_file_read;
-				k--;
-			    }
-			    else {
-			        new_buffer_read += flat_buf->blocklens[k];
-				end_brd_size = flat_buf->blocklens[k];
-			    }
-			}
-			else {
-			    new_buffer_read = brd_size;
-			    if (brd_size > (bufsize - size_read)) {
-			        new_buffer_read = bufsize - size_read;
-				brd_size = new_buffer_read;
-			    }
-			}
-			mem_list_count++;
-			k = (k + 1)%flat_buf->count;
-		    } /* while (new_buffer_read < new_file_read) */
-		} /* if ((new_file_read < new_buffer_read) && (file_list_count
-		     == MAX_ARRAY_SIZE)) */
-	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
-		 (new_buffer_read < bufsize-size_read)) */
-
-	    /*  fakes filling the readlist arrays of lengths found above  */
-	    k = start_k;
-	    j = start_j;
-	    for (i=0; i<mem_list_count; i++) {	     
-		if(i) {
-		    if (i == (mem_list_count - 1)) {
-			if (flat_buf->blocklens[k] == end_brd_size)
-			    brd_size = flat_buf->blocklens[(k+1)%
-							  flat_buf->count];
-			else {
-			    brd_size = flat_buf->blocklens[k] - end_brd_size;
-			    k--;
-			    buf_count--;
-			}
-		    }
-		}
-		buf_count++;
-		k = (k + 1)%flat_buf->count;
-	    } /* for (i=0; i<mem_list_count; i++) */
-	    for (i=0; i<file_list_count; i++) {
-		if (i) {
-		    if (i == (file_list_count - 1)) {
-			if (flat_file->blocklens[j] == end_frd_size)
-			    frd_size = flat_file->blocklens[(j+1)%
-							  flat_file->count];   
-			else {
-			    frd_size = flat_file->blocklens[j] - end_frd_size;
-			    j--;
-			}
-		    }
-		}
-		if (j < flat_file->count - 1) j++;
-		else {
-		    j = 0;
-		    n_filetypes++;
-		}
-	    } /* for (i=0; i<file_list_count; i++) */
-	    size_read += new_buffer_read;
-	    start_k = k;
-	    start_j = j;
-	    if (max_mem_list < mem_list_count)
-	        max_mem_list = mem_list_count;
-	    if (max_file_list < file_list_count)
-	        max_file_list = file_list_count;
-	} /* while (size_read < bufsize) */
-
-	/* one last check before we actually carry out the operation:
-	 * this code has hard-to-fix bugs when a noncontiguous file type has
-	 * such large pieces that the sum of the lengths of the memory type is
-	 * not larger than one of those pieces (and vice versa for large memory
-	 * types and many pices of file types.  In these cases, give up and
-	 * fall back to naive reads and writes.  The testphdf5 test created a
-	 * type with two very large memory regions and 600 very small file
-	 * regions.  The same test also created a type with one very large file
-	 * region and many (700) very small memory regions.  both cases caused
-	 * problems for this code */
-
-	if ( ( (file_list_count == 1) && 
-		    (new_file_read < flat_file->blocklens[0] ) ) ||
-		((mem_list_count == 1) && 
-		    (new_buffer_read < flat_buf->blocklens[0]) ) ||
-		((file_list_count == MAX_ARRAY_SIZE) && 
-		    (new_file_read < flat_buf->blocklens[0]) ) ||
-		( (mem_list_count == MAX_ARRAY_SIZE) &&
-		    (new_buffer_read < flat_file->blocklens[0])) )
-	{
-
-	    ADIOI_Delete_flattened(datatype);
-	    ADIOI_GEN_ReadStrided_naive(fd, buf, count, datatype,
-		    file_ptr_type, initial_off, status, error_code);
-	    return;
-	}
-
-	mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size));
-	mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int));
-	file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t));
-	file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t));
-	    
-	size_read = 0;
-	n_filetypes = st_n_filetypes;
-	frd_size = st_frd_size;
-	brd_size = flat_buf->blocklens[0];
-	buf_count = 0;
-	start_mem_offset = 0;
-	start_k = k = 0;
-	start_j = st_index;
-
-	/*  this section calculates mem_list_count and file_list_count
-	    and also finds the possibly odd sized last array elements
-	    in new_frd_size and new_brd_size  */
-	
-	while (size_read < bufsize) {
-	    k = start_k;
-	    new_buffer_read = 0;
-	    mem_list_count = 0;
-	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
-		   (new_buffer_read < bufsize-size_read)) {
-	        /* find mem_list_count and file_list_count such that both are
-		   less than MAX_ARRAY_SIZE, the sum of their lengths are
-		   equal, and the sum of all the data read and data to be
-		   read in the next immediate read list is less than
-		   bufsize */
-	        if(mem_list_count) {
-		    if((new_buffer_read + flat_buf->blocklens[k] + 
-			size_read) > bufsize) {
-		        end_brd_size = new_buffer_read + 
-			    flat_buf->blocklens[k] - (bufsize - size_read);
-			new_buffer_read = bufsize - size_read;
-		    }
-		    else {
-		        new_buffer_read += flat_buf->blocklens[k];
-			end_brd_size = flat_buf->blocklens[k];
-		    }
-		}
-		else {
-		    if (brd_size > (bufsize - size_read)) {
-		        new_buffer_read = bufsize - size_read;
-			brd_size = new_buffer_read;
-		    }
-		    else new_buffer_read = brd_size;
-		}
-		mem_list_count++;
-		k = (k + 1)%flat_buf->count;
-	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
-	       (new_buffer_read < bufsize-size_read)) */
-	    j = start_j;
-	    new_file_read = 0;
-	    file_list_count = 0;
-	    while ((file_list_count < MAX_ARRAY_SIZE) && 
-		   (new_file_read < new_buffer_read)) {
-	        if(file_list_count) {
-		    if((new_file_read + flat_file->blocklens[j]) > 
-		       new_buffer_read) {
-		        end_frd_size = new_buffer_read - new_file_read;
-			new_file_read = new_buffer_read;
-			j--;
-		    }
-		    else {
-		        new_file_read += flat_file->blocklens[j];
-			end_frd_size = flat_file->blocklens[j];
-		    }
-		}
-		else {
-		    if (frd_size > new_buffer_read) {
-		        new_file_read = new_buffer_read;
-			frd_size = new_file_read;
-		    }
-		    else new_file_read = frd_size;
-		}
-		file_list_count++;
-		if (j < (flat_file->count - 1)) j++;
-		else j = 0;
-		
-		k = start_k;
-		if ((new_file_read < new_buffer_read) && 
-		    (file_list_count == MAX_ARRAY_SIZE)) {
-		    new_buffer_read = 0;
-		    mem_list_count = 0;
-		    while (new_buffer_read < new_file_read) {
-		        if(mem_list_count) {
-			    if((new_buffer_read + flat_buf->blocklens[k]) >
-			       new_file_read) {
-			        end_brd_size = new_file_read - new_buffer_read;
-				new_buffer_read = new_file_read;
-				k--;
-			    }
-			    else {
-			        new_buffer_read += flat_buf->blocklens[k];
-				end_brd_size = flat_buf->blocklens[k];
-			    }
-			}
-			else {
-			    new_buffer_read = brd_size;
-			    if (brd_size > (bufsize - size_read)) {
-			        new_buffer_read = bufsize - size_read;
-				brd_size = new_buffer_read;
-			    }
-			}
-			mem_list_count++;
-			k = (k + 1)%flat_buf->count;
-		    } /* while (new_buffer_read < new_file_read) */
-		} /* if ((new_file_read < new_buffer_read) && (file_list_count
-		     == MAX_ARRAY_SIZE)) */
-	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
-		 (new_buffer_read < bufsize-size_read)) */
-
-	    /*  fills the allocated readlist arrays  */
-	    k = start_k;
-	    j = start_j;
-	    for (i=0; i<mem_list_count; i++) {	     
-	        mem_offsets[i] = ((PVFS_size)buf + buftype_extent*
-					 (buf_count/flat_buf->count) +
-					 (int)flat_buf->indices[k]);
-		if(!i) {
-		    mem_lengths[0] = brd_size;
-		    mem_offsets[0] += flat_buf->blocklens[k] - brd_size;
-		}
-		else {
-		    if (i == (mem_list_count - 1)) {
-		        mem_lengths[i] = end_brd_size;
-			if (flat_buf->blocklens[k] == end_brd_size)
-			    brd_size = flat_buf->blocklens[(k+1)%
-							  flat_buf->count];
-			else {
-			    brd_size = flat_buf->blocklens[k] - end_brd_size;
-			    k--;
-			    buf_count--;
-			}
-		    }
-		    else {
-		        mem_lengths[i] = flat_buf->blocklens[k];
-		    }
-		}
-		buf_count++;
-		k = (k + 1)%flat_buf->count;
-	    } /* for (i=0; i<mem_list_count; i++) */
-	    for (i=0; i<file_list_count; i++) {
-	        file_offsets[i] = disp + flat_file->indices[j] + 
-		    ((ADIO_Offset)n_filetypes) * filetype_extent;
-	        if (!i) {
-		    file_lengths[0] = frd_size;
-		    file_offsets[0] += flat_file->blocklens[j] - frd_size;
-		}
-		else {
-		    if (i == (file_list_count - 1)) {
-		        file_lengths[i] = end_frd_size;
-			if (flat_file->blocklens[j] == end_frd_size)
-			    frd_size = flat_file->blocklens[(j+1)%
-							  flat_file->count];   
-			else {
-			    frd_size = flat_file->blocklens[j] - end_frd_size;
-			    j--;
-			}
-		    }
-		    else file_lengths[i] = flat_file->blocklens[j];
-		}
-		if (j < flat_file->count - 1) j++;
-		else {
-		    j = 0;
-		    n_filetypes++;
-		}
-	    } /* for (i=0; i<file_list_count; i++) */
-	    err_flag = PVFS_Request_hindexed(mem_list_count, mem_lengths, 
-		    mem_offsets, PVFS_BYTE, &mem_req);
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0 ) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_Request_hindexed (memory)", 0);
-		goto error_state;
-	    }
-	    /* -- END ERROR HANDLING-- */
-	    err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, 
-		    file_offsets, PVFS_BYTE, &file_req);
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_Request_hindexed (file)", 0);
-		goto error_state;
-	    }
-	    /* --END ERROR HANDLING-- */
+    /* Use classic list I/O if no hints given base case */
 
-	    /* offset will be expressed in memory and file datatypes */
-#ifdef ADIOI_MPE_LOGGING
-            MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
-#endif
-	    err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0, 
-		    PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io);
-#ifdef ADIOI_MPE_LOGGING
-            MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
-#endif
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_sys_read", 0);
-	    }
-	    /* --END ERROR HANDLING-- */
-	    PVFS_Request_free(&mem_req);
-	    PVFS_Request_free(&file_req);
-	    total_bytes_read += resp_io.total_completed;
-	    size_read += new_buffer_read;
-	    start_k = k;
-	    start_j = j;
-	} /* while (size_read < bufsize) */
-	ADIOI_Free(mem_offsets);
-	ADIOI_Free(mem_lengths);
-    }
-    ADIOI_Free(file_offsets);
-    ADIOI_Free(file_lengths);
-    
-    /* Other ADIO routines will convert absolute bytes into counts of datatypes */
-    /* when incrementing fp_ind, need to also take into account the file type:
-     * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
-     * if we wrote N elements, offset needs to point at beginning of type, not
-     * at empty region at offset N+1) */
-    if (file_ptr_type == ADIO_INDIVIDUAL) {
-	/* this is closer, but still incorrect for the cases where a small
-	 * amount of a file type is "leftover" after a write */
-	fd->fp_ind = disp + flat_file->indices[j] + 
-	    ((ADIO_Offset)n_filetypes)*filetype_extent;
-    }
-    if (err_flag == 0) *error_code = MPI_SUCCESS;
-
-error_state:
-    fd->fp_sys_posn = -1;   /* set it to null. */
-
-#ifdef HAVE_STATUS_SET_BYTES
-    MPIR_Status_set_bytes(status, datatype, bufsize);
-    /* This is a temporary way of filling in status. The right way is to 
-       keep track of how much data was actually read and placed in buf 
-       by ADIOI_BUFFERED_READ. */
-#endif
-    
-    if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
+    ADIOI_PVFS2_OldReadStrided(fd, buf, count, datatype, 
+	    file_ptr_type, offset, status, error_code);
+    return;
 }
 
+
 /*
  * vim: ts=8 sts=4 sw=4 noexpandtab 
  */
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2: ad_pvfs2_read_list_classic.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write.c	2010-11-16 09:16:27.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write.c	2010-11-15 15:02:47.000000000 +0100
@@ -7,7 +7,7 @@
 
 #include "ad_pvfs2.h"
 #include "adio_extern.h"
-
+#include "ad_pvfs2_io.h"
 #include "ad_pvfs2_common.h"
 
 void ADIOI_PVFS2_WriteContig(ADIO_File fd, void *buf, int count, 
@@ -104,950 +104,78 @@
     return;
 }
 
+int ADIOI_PVFS2_WriteStridedListIO(ADIO_File fd, void *buf, int count,
+				   MPI_Datatype datatype, int file_ptr_type,
+				   ADIO_Offset offset, ADIO_Status *status,
+				   int *error_code)
+{
+    return ADIOI_PVFS2_StridedListIO(fd, buf, count,
+				     datatype, file_ptr_type,
+				     offset, status,
+				     error_code, WRITE);
+}
+
+int ADIOI_PVFS2_WriteStridedDtypeIO(ADIO_File fd, void *buf, int count,
+				    MPI_Datatype datatype, int file_ptr_type,
+				    ADIO_Offset offset, ADIO_Status *status, 
+				    int *error_code)
+{
+    return ADIOI_PVFS2_StridedDtypeIO(fd, buf, count,
+				      datatype, file_ptr_type,
+				      offset, status, error_code,
+				      WRITE);
+}
+
+
 void ADIOI_PVFS2_WriteStrided(ADIO_File fd, void *buf, int count,
 			      MPI_Datatype datatype, int file_ptr_type,
 			      ADIO_Offset offset, ADIO_Status *status,
 			      int *error_code)
 {
-    /* as with all the other WriteStrided functions, offset is in units of
-     * etype relative to the filetype */
-
-    /* Since PVFS2 does not support file locking, can't do buffered writes
-       as on Unix */
-
-    ADIOI_Flatlist_node *flat_buf, *flat_file;
-    int i, j, k, bwr_size, fwr_size=0, st_index=0;
-    int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
-    int n_filetypes, etype_in_filetype;
-    ADIO_Offset abs_off_in_filetype=0;
-    int filetype_size, etype_size, buftype_size;
-    MPI_Aint filetype_extent, buftype_extent;
-    int buf_count, buftype_is_contig, filetype_is_contig;
-    ADIO_Offset off, disp, start_off, initial_off;
-    int flag, st_fwr_size, st_n_filetypes;
-    int err_flag=0;
-
-    int mem_list_count, file_list_count;
-    PVFS_size * mem_offsets;
-    int64_t *file_offsets;
-    int *mem_lengths;
-    int32_t *file_lengths;
-    int total_blks_to_write;
-
-    int max_mem_list, max_file_list;
-
-    int b_blks_wrote;
-    int f_data_wrote;
-    int size_wrote=0, n_write_lists, extra_blks;
-
-    int end_bwr_size, end_fwr_size;
-    int start_k, start_j, new_file_write, new_buffer_write;
-    int start_mem_offset;
-    PVFS_Request mem_req, file_req;
-    ADIOI_PVFS2_fs * pvfs_fs;
-    PVFS_sysresp_io resp_io;
-    MPI_Offset total_bytes_written=0;
-    static char myname[] = "ADIOI_PVFS2_WRITESTRIDED";
-
-    /* note: don't increase this: several parts of PVFS2 now 
-     * assume this limit*/
-#define MAX_ARRAY_SIZE 64
-
-    /* --BEGIN ERROR HANDLING-- */
-    if (fd->atomicity) {
-	*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-					   MPIR_ERR_RECOVERABLE,
-					   myname, __LINE__,
-					   MPI_ERR_ARG,
-					   "Atomic noncontiguous writes are not supported by PVFS2", 0);
-	return;
-    }
-    /* --END ERROR HANDLING-- */
-
-    ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
-    ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
-
-    /* the HDF5 tests showed a bug in this list processing code (see many many
-     * lines down below).  We added a workaround, but common HDF5 file types
-     * are actually contiguous and do not need the expensive workarond */
-    if (!filetype_is_contig) {
-	flat_file = ADIOI_Flatlist;
-	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
-	if (flat_file->count == 1 && !buftype_is_contig)
-	    filetype_is_contig = 1;
-    }
-
-    MPI_Type_size(fd->filetype, &filetype_size);
-    if ( ! filetype_size ) {
-	*error_code = MPI_SUCCESS; 
-	return;
-    }
-
-    MPI_Type_extent(fd->filetype, &filetype_extent);
-    MPI_Type_size(datatype, &buftype_size);
-    MPI_Type_extent(datatype, &buftype_extent);
-    etype_size = fd->etype_size;
-    
-    bufsize = buftype_size * count;
-
-    pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;
-
-    if (!buftype_is_contig && filetype_is_contig) {
-
-/* noncontiguous in memory, contiguous in file.  */
-        int64_t file_offsets;
-	int32_t file_lengths;
-
-	ADIOI_Flatten_datatype(datatype);
-	flat_buf = ADIOI_Flatlist;
-	while (flat_buf->type != datatype) flat_buf = flat_buf->next;
-	
-	if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
-	    off = fd->disp + etype_size * offset;
-	}
-	else off = fd->fp_ind;
-
-	file_list_count = 1;
-	file_offsets = off;
-	file_lengths = 0;
-	total_blks_to_write = count*flat_buf->count;
-	b_blks_wrote = 0;
-
-	/* allocate arrays according to max usage */
-	if (total_blks_to_write > MAX_ARRAY_SIZE)
-	    mem_list_count = MAX_ARRAY_SIZE;
-	else mem_list_count = total_blks_to_write;
-	mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size));
-	mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int));
-
-	j = 0;
-	/* step through each block in memory, filling memory arrays */
-	while (b_blks_wrote < total_blks_to_write) {
-	    for (i=0; i<flat_buf->count; i++) {
-		mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] = 
-		    /* TODO: fix this warning by casting to an integer that's
-		     * the same size as a char * and /then/ casting to
-		     * PVFS_size */
-		    ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]);
-		mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = 
-		    flat_buf->blocklens[i];
-		file_lengths += flat_buf->blocklens[i];
-		b_blks_wrote++;
-		if (!(b_blks_wrote % MAX_ARRAY_SIZE) ||
-		    (b_blks_wrote == total_blks_to_write)) {
-
-		    /* in the case of the last write list call,
-		       adjust mem_list_count */
-		    if (b_blks_wrote == total_blks_to_write) {
-		        mem_list_count = total_blks_to_write % MAX_ARRAY_SIZE;
-			/* in case last write list call fills max arrays */
-			if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
-		    }
-		    err_flag = PVFS_Request_hindexed(mem_list_count, 
-						     mem_lengths, mem_offsets,
-						     PVFS_BYTE, &mem_req);
-		    /* --BEGIN ERROR HANDLING-- */
-		    if (err_flag != 0) {
-			*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-							   MPIR_ERR_RECOVERABLE,
-							   myname, __LINE__,
-							   ADIOI_PVFS2_error_convert(err_flag),
-							   "Error in PVFS_Request_hindexed (memory)", 0);
-			break;
-		    }
-		    /* --END ERROR HANDLING-- */
-
-		    err_flag = PVFS_Request_contiguous(file_lengths, 
-						       PVFS_BYTE, &file_req);
-		    /* --BEGIN ERROR HANDLING-- */
-		    if (err_flag != 0) {
-			*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-							   MPIR_ERR_RECOVERABLE,
-							   myname, __LINE__,
-							   ADIOI_PVFS2_error_convert(err_flag),
-							   "Error in PVFS_Request_contiguous (file)", 0);
-			break;
-		    }
-		    /* --END ERROR HANDLING-- */
-
-#ifdef ADIOI_MPE_LOGGING
-                    MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
-#endif
-		    err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 
-					      file_offsets, PVFS_BOTTOM,
-					      mem_req, 
-					      &(pvfs_fs->credentials),
-					      &resp_io);
-#ifdef ADIOI_MPE_LOGGING
-                    MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
-#endif
-		    total_bytes_written += resp_io.total_completed;
-		  
-		    /* in the case of error or the last write list call, 
-		     * leave here */
-		    /* --BEGIN ERROR HANDLING-- */
-		    if (err_flag) {
-			*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-							   MPIR_ERR_RECOVERABLE,
-							   myname, __LINE__,
-							   ADIOI_PVFS2_error_convert(err_flag),
-							   "Error in PVFS_sys_write", 0);
-			break;
-		    }
-		    /* --END ERROR HANDLING-- */
-		    if (b_blks_wrote == total_blks_to_write) break;
-
-		    file_offsets += file_lengths;
-		    file_lengths = 0;
-		    PVFS_Request_free(&mem_req);
-		    PVFS_Request_free(&file_req);
-		} 
-	    } /* for (i=0; i<flat_buf->count; i++) */
-	    j++;
-	} /* while (b_blks_wrote < total_blks_to_write) */
-	ADIOI_Free(mem_offsets);
-	ADIOI_Free(mem_lengths);
-
-	if (file_ptr_type == ADIO_INDIVIDUAL) 
-	    fd->fp_ind += total_bytes_written;
-
-	if (!err_flag)  *error_code = MPI_SUCCESS;
-
-	fd->fp_sys_posn = -1;   /* clear this. */
-
-#ifdef HAVE_STATUS_SET_BYTES
-	MPIR_Status_set_bytes(status, datatype, bufsize);
-/* This is a temporary way of filling in status. The right way is to 
-   keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
-#endif
-
-	ADIOI_Delete_flattened(datatype);
-	return;
-    } /* if (!buftype_is_contig && filetype_is_contig) */
-
-    /* already know that file is noncontiguous from above */
-    /* noncontiguous in file */
-
-/* filetype already flattened in ADIO_Open */
-    flat_file = ADIOI_Flatlist;
-    while (flat_file->type != fd->filetype) flat_file = flat_file->next;
-
-    disp = fd->disp;
-    initial_off = offset;
-
-    /* for each case - ADIO_Individual pointer or explicit, find offset
-       (file offset in bytes), n_filetypes (how many filetypes into file 
-       to start), fwr_size (remaining amount of data in present file
-       block), and st_index (start point in terms of blocks in starting
-       filetype) */
-    if (file_ptr_type == ADIO_INDIVIDUAL) {
-        offset = fd->fp_ind; /* in bytes */
-	n_filetypes = -1;
-	flag = 0;
-	while (!flag) {
-	    n_filetypes++;
-	    for (i=0; i<flat_file->count; i++) {
-	        if (disp + flat_file->indices[i] + 
-		    ((ADIO_Offset) n_filetypes)*filetype_extent +
-		      flat_file->blocklens[i] >= offset) {
-		  st_index = i;
-		  fwr_size = disp + flat_file->indices[i] + 
-		    ((ADIO_Offset) n_filetypes)*filetype_extent
-		    + flat_file->blocklens[i] - offset;
-		  flag = 1;
-		  break;
-		}
-	    }
-	} /* while (!flag) */
-    } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
-    else {
-        n_etypes_in_filetype = filetype_size/etype_size;
-	n_filetypes = (int) (offset / n_etypes_in_filetype);
-	etype_in_filetype = (int) (offset % n_etypes_in_filetype);
-	size_in_filetype = etype_in_filetype * etype_size;
-	
-	sum = 0;
-	for (i=0; i<flat_file->count; i++) {
-	    sum += flat_file->blocklens[i];
-	    if (sum > size_in_filetype) {
-	        st_index = i;
-		fwr_size = sum - size_in_filetype;
-		abs_off_in_filetype = flat_file->indices[i] +
-		    size_in_filetype - (sum - flat_file->blocklens[i]);
-		break;
-	    }
-	}
-
-	/* abs. offset in bytes in the file */
-	offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
-	    abs_off_in_filetype;
-    } /* else [file_ptr_type != ADIO_INDIVIDUAL] */
-
-    start_off = offset;
-    st_fwr_size = fwr_size;
-    st_n_filetypes = n_filetypes;
-    
-    if (buftype_is_contig && !filetype_is_contig) {
-
-/* contiguous in memory, noncontiguous in file. should be the most
-   common case. */
-
-        int mem_lengths;
-	char *mem_offsets;
-        
-	i = 0;
-	j = st_index;
-	off = offset;
-	n_filetypes = st_n_filetypes;
-        
-	mem_list_count = 1;
-        
-	/* determine how many blocks in file to write */
-	f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize);
-	total_blks_to_write = 1;
-	if (j < (flat_file->count -1)) j++;
-	else {
-	    j = 0;
-	    n_filetypes++;
-	}
-	while (f_data_wrote < bufsize) {
-	    f_data_wrote += flat_file->blocklens[j];
-	    total_blks_to_write++;
-	    if (j<(flat_file->count-1)) j++;
-	    else j = 0; 
-	}
-	    
-	j = st_index;
-	n_filetypes = st_n_filetypes;
-	n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE;
-	extra_blks = total_blks_to_write%MAX_ARRAY_SIZE;
-        
-	mem_offsets = buf;
-	mem_lengths = 0;
-        
-	/* if at least one full writelist, allocate file arrays
-	   at max array size and don't free until very end */
-	if (n_write_lists) {
-	    file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
-						  sizeof(int64_t));
-	    file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
-						  sizeof(int32_t));
-	}
-	/* if there's no full writelist allocate file arrays according
-	   to needed size (extra_blks) */
-	else {
-	    file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
-                                                  sizeof(int64_t));
-            file_lengths = (int32_t*)ADIOI_Malloc(extra_blks*
-                                                  sizeof(int32_t));
+    /* four ways (to date) that we can carry out strided i/o accesses:
+     * - naive posix
+     * - 'true' Datatype (from avery)
+     * - new List I/O (from avery)
+     * - classic List I/O  (the one that's always been in ROMIO)
+     * I imagine we'll keep Datatype as an optional optimization, and afer a
+     * release or two promote it to the default 
+     */
+
+    /* a lot of near-duplication from ADIOI_PVFS2_ReadStrided: for
+     * debugging/testing it's helpful to be able to turn on and off these
+     * optimizations separately for the read and write cases */
+    int ret = -1;
+    if ( fd->hints->fs_hints.pvfs2.posix_write == ADIOI_HINT_ENABLE) {
+        ADIOI_GEN_WriteStrided_naive(fd, buf, count,
+                                     datatype, file_ptr_type,
+                                     offset, status, error_code);
+        return;
+    }
+    if ( fd->hints->fs_hints.pvfs2.dtype_write == ADIOI_HINT_ENABLE) {
+        ret = ADIOI_PVFS2_WriteStridedDtypeIO(fd, buf, count,
+                                              datatype, file_ptr_type,
+                                              offset, status, error_code);
+
+        /* Fall back to list I/O if datatype I/O didn't work */
+        if (ret != 0)
+        {
+            fprintf(stderr,
+                    "Falling back to list I/O since datatype I/O failed\n");
+            ret = ADIOI_PVFS2_WriteStridedListIO(fd, buf, count,
+                                                 datatype, file_ptr_type,
+                                                 offset, status, error_code);
         }
-        
-        /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
-        for (i=0; i<n_write_lists; i++) {
-            file_list_count = MAX_ARRAY_SIZE;
-            if(!i) {
-                file_offsets[0] = offset;
-                file_lengths[0] = st_fwr_size;
-                mem_lengths = st_fwr_size;
-            }
-            for (k=0; k<MAX_ARRAY_SIZE; k++) {
-                if (i || k) {
-                    file_offsets[k] = disp + 
-			((ADIO_Offset)n_filetypes)*filetype_extent
-			+ flat_file->indices[j];
-                    file_lengths[k] = flat_file->blocklens[j];
-                    mem_lengths += file_lengths[k];
-                }
-                if (j<(flat_file->count - 1)) j++;
-                else {
-                    j = 0;
-                    n_filetypes++;
-                }
-            } /* for (k=0; k<MAX_ARRAY_SIZE; k++) */
-
-	    err_flag = PVFS_Request_contiguous(mem_lengths, 
-					       PVFS_BYTE, &mem_req);
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_Request_contiguous (memory)", 0);
-		goto error_state;
-	    }
-	    /* --END ERROR HANDLING-- */
-
-	    err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, 
-					     file_offsets, PVFS_BYTE,
-					     &file_req);
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_Request_hindexed (file)", 0);
-		goto error_state;
-	    }
-	    /* --END ERROR HANDLING-- */
-
-	    /* PVFS_Request_hindexed already expresses the offsets into the
-	     * file, so we should not pass in an offset if we are using
-	     * hindexed for the file type */
-#ifdef ADIOI_MPE_LOGGING
-            MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
-#endif
-	    err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, 
-				      mem_offsets, mem_req,
-				      &(pvfs_fs->credentials), &resp_io);
-#ifdef ADIOI_MPE_LOGGING
-            MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
-#endif
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_sys_write", 0);
-		goto error_state;
-	    }
-	    /* --END ERROR HANDLING-- */
-	    total_bytes_written += resp_io.total_completed;
-
-            mem_offsets += mem_lengths;
-            mem_lengths = 0;
-	    PVFS_Request_free(&file_req);
-	    PVFS_Request_free(&mem_req);
-
-        } /* for (i=0; i<n_write_lists; i++) */
-
-        /* for file arrays smaller than MAX_ARRAY_SIZE (last write_list call) */
-        if (extra_blks) {
-            file_list_count = extra_blks;
-            if(!i) {
-                file_offsets[0] = offset;
-                file_lengths[0] = st_fwr_size;
-            }
-            for (k=0; k<extra_blks; k++) {
-                if(i || k) {
-                    file_offsets[k] = disp + 
-			((ADIO_Offset)n_filetypes)*filetype_extent +
-			flat_file->indices[j];
-                    if (k == (extra_blks - 1)) {
-                        file_lengths[k] = bufsize - (int32_t) mem_lengths
-                          - (int32_t) mem_offsets + (int32_t)  buf;
-                    }
-                    else file_lengths[k] = flat_file->blocklens[j];
-                } /* if(i || k) */
-                mem_lengths += file_lengths[k];
-                if (j<(flat_file->count - 1)) j++;
-                else {
-                    j = 0;
-                    n_filetypes++;
-                }
-            } /* for (k=0; k<extra_blks; k++) */
-
-	    err_flag = PVFS_Request_contiguous(mem_lengths, 
-					       PVFS_BYTE, &mem_req);
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_Request_contiguous (memory)", 0);
-		goto error_state;
-	    }
-	    /* --END ERROR HANDLING-- */
-
-	    err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, 
-					     file_offsets, PVFS_BYTE,
-					     &file_req);
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_Request_hindexed(file)", 0);
-		goto error_state;
-	    }
-	    /* --END ERROR HANDLING-- */
-
-	    /* as above, use 0 for 'offset' when using hindexed file type*/
-#ifdef ADIOI_MPE_LOGGING
-            MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
-#endif
-	    err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, 
-				      mem_offsets, mem_req,
-				      &(pvfs_fs->credentials), &resp_io);
-#ifdef ADIOI_MPE_LOGGING
-            MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
-#endif
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_sys_write", 0);
-		goto error_state;
-	    }
-	    /* --END ERROR HANDLING-- */
-	    total_bytes_written += resp_io.total_completed;
-	    PVFS_Request_free(&mem_req);
-	    PVFS_Request_free(&file_req);
-        }
-    } 
-    else {
-        /* noncontiguous in memory as well as in file */
-
-        ADIOI_Flatten_datatype(datatype);
-	flat_buf = ADIOI_Flatlist;
-	while (flat_buf->type != datatype) flat_buf = flat_buf->next;
-
-	size_wrote = 0;
-	n_filetypes = st_n_filetypes;
-	fwr_size = st_fwr_size;
-	bwr_size = flat_buf->blocklens[0];
-	buf_count = 0;
-	start_mem_offset = 0;
-	start_k = k = 0;
-	start_j = st_index;
-	max_mem_list = 0;
-	max_file_list = 0;
-
-	/* run through and file max_file_list and max_mem_list so that you 
-	   can allocate the file and memory arrays less than MAX_ARRAY_SIZE
-	   if possible */
-
-	while (size_wrote < bufsize) {
-	    k = start_k;
-	    new_buffer_write = 0;
-	    mem_list_count = 0;
-	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
-		   (new_buffer_write < bufsize-size_wrote)) {
-	        /* find mem_list_count and file_list_count such that both are
-		   less than MAX_ARRAY_SIZE, the sum of their lengths are
-		   equal, and the sum of all the data written and data to be
-		   written in the next immediate write list is less than
-		   bufsize */
-	        if(mem_list_count) {
-		    if((new_buffer_write + flat_buf->blocklens[k] + 
-			size_wrote) > bufsize) {
-		        end_bwr_size = new_buffer_write + 
-			    flat_buf->blocklens[k] - (bufsize - size_wrote);
-			new_buffer_write = bufsize - size_wrote;
-		    }
-		    else {
-		        new_buffer_write += flat_buf->blocklens[k];
-			end_bwr_size = flat_buf->blocklens[k];
-		    }
-		}
-		else {
-		    if (bwr_size > (bufsize - size_wrote)) {
-		        new_buffer_write = bufsize - size_wrote;
-			bwr_size = new_buffer_write;
-		    }
-		    else new_buffer_write = bwr_size;
-		}
-		mem_list_count++;
-		k = (k + 1)%flat_buf->count;
-	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
-	       (new_buffer_write < bufsize-size_wrote)) */
-	    j = start_j;
-	    new_file_write = 0;
-	    file_list_count = 0;
-	    while ((file_list_count < MAX_ARRAY_SIZE) && 
-		   (new_file_write < new_buffer_write)) { 
-	        if(file_list_count) {
-		    if((new_file_write + flat_file->blocklens[j]) > 
-		       new_buffer_write) {
-		        end_fwr_size = new_buffer_write - new_file_write;
-			new_file_write = new_buffer_write;
-			j--;
-		    }
-		    else {
-		        new_file_write += flat_file->blocklens[j];
-			end_fwr_size = flat_file->blocklens[j];
-		    }
-		}
-		else {
-		    if (fwr_size > new_buffer_write) {
-		        new_file_write = new_buffer_write;
-			fwr_size = new_file_write;
-		    }
-		    else new_file_write = fwr_size;
-		}
-		file_list_count++;
-		if (j < (flat_file->count - 1)) j++;
-		else j = 0;
-		
-		k = start_k;
-		if ((new_file_write < new_buffer_write) && 
-		    (file_list_count == MAX_ARRAY_SIZE)) {
-		    new_buffer_write = 0;
-		    mem_list_count = 0;
-		    while (new_buffer_write < new_file_write) {
-		        if(mem_list_count) {
-			    if((new_buffer_write + flat_buf->blocklens[k]) >
-			       new_file_write) {
-			        end_bwr_size = new_file_write - 
-				    new_buffer_write;
-				new_buffer_write = new_file_write;
-				k--;
-			    }
-			    else {
-			        new_buffer_write += flat_buf->blocklens[k];
-				end_bwr_size = flat_buf->blocklens[k];
-			    }
-			}
-			else {
-			    new_buffer_write = bwr_size;
-			    if (bwr_size > (bufsize - size_wrote)) {
-			        new_buffer_write = bufsize - size_wrote;
-				bwr_size = new_buffer_write;
-			    }
-			}
-			mem_list_count++;
-			k = (k + 1)%flat_buf->count;
-		    } /* while (new_buffer_write < new_file_write) */
-		} /* if ((new_file_write < new_buffer_write) &&
-		     (file_list_count == MAX_ARRAY_SIZE)) */
-	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
-		 (new_buffer_write < bufsize-size_wrote)) */
-
-	    /*  fakes filling the writelist arrays of lengths found above  */
-	    k = start_k;
-	    j = start_j;
-	    for (i=0; i<mem_list_count; i++) {	     
-		if(i) {
-		    if (i == (mem_list_count - 1)) {
-			if (flat_buf->blocklens[k] == end_bwr_size)
-			    bwr_size = flat_buf->blocklens[(k+1)%
-							  flat_buf->count];
-			else {
-			    bwr_size = flat_buf->blocklens[k] - end_bwr_size;
-			    k--;
-			    buf_count--;
-			}
-		    }
-		}
-		buf_count++;
-		k = (k + 1)%flat_buf->count;
-	    } /* for (i=0; i<mem_list_count; i++) */
-	    for (i=0; i<file_list_count; i++) {
-		if (i) {
-		    if (i == (file_list_count - 1)) {
-			if (flat_file->blocklens[j] == end_fwr_size)
-			    fwr_size = flat_file->blocklens[(j+1)%
-							  flat_file->count];   
-			else {
-			    fwr_size = flat_file->blocklens[j] - end_fwr_size;
-			    j--;
-			}
-		    }
-		}
-		if (j < flat_file->count - 1) j++;
-		else {
-		    j = 0;
-		    n_filetypes++;
-		}
-	    } /* for (i=0; i<file_list_count; i++) */
-	    size_wrote += new_buffer_write;
-	    start_k = k;
-	    start_j = j;
-	    if (max_mem_list < mem_list_count)
-	        max_mem_list = mem_list_count;
-	    if (max_file_list < file_list_count)
-	        max_file_list = file_list_count;
-	} /* while (size_wrote < bufsize) */
-
-	/* one last check before we actually carry out the operation:
-	 * this code has hard-to-fix bugs when a noncontiguous file type has
-	 * such large pieces that the sum of the lengths of the memory type is
-	 * not larger than one of those pieces (and vice versa for large memory
-	 * types and many pices of file types.  In these cases, give up and
-	 * fall back to naive reads and writes.  The testphdf5 test created a
-	 * type with two very large memory regions and 600 very small file
-	 * regions.  The same test also created a type with one very large file
-	 * region and many (700) very small memory regions.  both cases caused
-	 * problems for this code */
-
-	if ( ( (file_list_count == 1) && 
-		    (new_file_write < flat_file->blocklens[0] ) ) ||
-		((mem_list_count == 1) && 
-		    (new_buffer_write < flat_buf->blocklens[0]) ) ||
-		((file_list_count == MAX_ARRAY_SIZE) && 
-		    (new_file_write < flat_buf->blocklens[0]) ) ||
-		( (mem_list_count == MAX_ARRAY_SIZE) &&
-		    (new_buffer_write < flat_file->blocklens[0])) )
-	{
-	    ADIOI_Delete_flattened(datatype);
-	    ADIOI_GEN_WriteStrided_naive(fd, buf, count, datatype,
-		    file_ptr_type, initial_off, status, error_code);
-	    return;
-	}
-
-
-	mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size));
-	mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int));
-	file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t));
-	file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t));
-	    
-	size_wrote = 0;
-	n_filetypes = st_n_filetypes;
-	fwr_size = st_fwr_size;
-	bwr_size = flat_buf->blocklens[0];
-	buf_count = 0;
-	start_mem_offset = 0;
-	start_k = k = 0;
-	start_j = st_index;
-
-	/*  this section calculates mem_list_count and file_list_count
-	    and also finds the possibly odd sized last array elements
-	    in new_fwr_size and new_bwr_size  */
-	
-	while (size_wrote < bufsize) {
-	    k = start_k;
-	    new_buffer_write = 0;
-	    mem_list_count = 0;
-	    while ((mem_list_count < MAX_ARRAY_SIZE) && 
-		   (new_buffer_write < bufsize-size_wrote)) {
-	        /* find mem_list_count and file_list_count such that both are
-		   less than MAX_ARRAY_SIZE, the sum of their lengths are
-		   equal, and the sum of all the data written and data to be
-		   written in the next immediate write list is less than
-		   bufsize */
-	        if(mem_list_count) {
-		    if((new_buffer_write + flat_buf->blocklens[k] + 
-			size_wrote) > bufsize) {
-		        end_bwr_size = new_buffer_write + 
-			    flat_buf->blocklens[k] - (bufsize - size_wrote);
-			new_buffer_write = bufsize - size_wrote;
-		    }
-		    else {
-		        new_buffer_write += flat_buf->blocklens[k];
-			end_bwr_size = flat_buf->blocklens[k];
-		    }
-		}
-		else {
-		    if (bwr_size > (bufsize - size_wrote)) {
-		        new_buffer_write = bufsize - size_wrote;
-			bwr_size = new_buffer_write;
-		    }
-		    else new_buffer_write = bwr_size;
-		}
-		mem_list_count++;
-		k = (k + 1)%flat_buf->count;
-	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
-	       (new_buffer_write < bufsize-size_wrote)) */
-	    j = start_j;
-	    new_file_write = 0;
-	    file_list_count = 0;
-	    while ((file_list_count < MAX_ARRAY_SIZE) && 
-		   (new_file_write < new_buffer_write)) {
-	        if(file_list_count) {
-		    if((new_file_write + flat_file->blocklens[j]) > 
-		       new_buffer_write) {
-		        end_fwr_size = new_buffer_write - new_file_write;
-			new_file_write = new_buffer_write;
-			j--;
-		    }
-		    else {
-		        new_file_write += flat_file->blocklens[j];
-			end_fwr_size = flat_file->blocklens[j];
-		    }
-		}
-		else {
-		    if (fwr_size > new_buffer_write) {
-		        new_file_write = new_buffer_write;
-			fwr_size = new_file_write;
-		    }
-		    else new_file_write = fwr_size;
-		}
-		file_list_count++;
-		if (j < (flat_file->count - 1)) j++;
-		else j = 0;
-		
-		k = start_k;
-		if ((new_file_write < new_buffer_write) && 
-		    (file_list_count == MAX_ARRAY_SIZE)) {
-		    new_buffer_write = 0;
-		    mem_list_count = 0;
-		    while (new_buffer_write < new_file_write) {
-		        if(mem_list_count) {
-			    if((new_buffer_write + flat_buf->blocklens[k]) >
-			       new_file_write) {
-			        end_bwr_size = new_file_write -
-				  new_buffer_write;
-				new_buffer_write = new_file_write;
-				k--;
-			    }
-			    else {
-			        new_buffer_write += flat_buf->blocklens[k];
-				end_bwr_size = flat_buf->blocklens[k];
-			    }
-			}
-			else {
-			    new_buffer_write = bwr_size;
-			    if (bwr_size > (bufsize - size_wrote)) {
-			        new_buffer_write = bufsize - size_wrote;
-				bwr_size = new_buffer_write;
-			    }
-			}
-			mem_list_count++;
-			k = (k + 1)%flat_buf->count;
-		    } /* while (new_buffer_write < new_file_write) */
-		} /* if ((new_file_write < new_buffer_write) &&
-		     (file_list_count == MAX_ARRAY_SIZE)) */
-	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) && 
-		 (new_buffer_write < bufsize-size_wrote)) */
-
-	    /*  fills the allocated writelist arrays  */
-	    k = start_k;
-	    j = start_j;
-	    for (i=0; i<mem_list_count; i++) {	     
-		/* TODO: fix this warning by casting to an integer that's the
-		 * same size as a char * and /then/ casting to PVFS_size */
-	        mem_offsets[i] = ((PVFS_size)buf + buftype_extent*
-				  (buf_count/flat_buf->count) +
-				  (int)flat_buf->indices[k]);
-		
-		if(!i) {
-		    mem_lengths[0] = bwr_size;
-		    mem_offsets[0] += flat_buf->blocklens[k] - bwr_size;
-		}
-		else {
-		    if (i == (mem_list_count - 1)) {
-		        mem_lengths[i] = end_bwr_size;
-			if (flat_buf->blocklens[k] == end_bwr_size)
-			    bwr_size = flat_buf->blocklens[(k+1)%
-							  flat_buf->count];
-			else {
-			    bwr_size = flat_buf->blocklens[k] - end_bwr_size;
-			    k--;
-			    buf_count--;
-			}
-		    }
-		    else {
-		        mem_lengths[i] = flat_buf->blocklens[k];
-		    }
-		}
-		buf_count++;
-		k = (k + 1)%flat_buf->count;
-	    } /* for (i=0; i<mem_list_count; i++) */
-	    for (i=0; i<file_list_count; i++) {
-	        file_offsets[i] = disp + flat_file->indices[j] + 
-		    ((ADIO_Offset)n_filetypes) * filetype_extent;
-	        if (!i) {
-		    file_lengths[0] = fwr_size;
-		    file_offsets[0] += flat_file->blocklens[j] - fwr_size;
-		}
-		else {
-		    if (i == (file_list_count - 1)) {
-		        file_lengths[i] = end_fwr_size;
-			if (flat_file->blocklens[j] == end_fwr_size)
-			    fwr_size = flat_file->blocklens[(j+1)%
-							  flat_file->count];   
-			else {
-			    fwr_size = flat_file->blocklens[j] - end_fwr_size;
-			    j--;
-			}
-		    }
-		    else file_lengths[i] = flat_file->blocklens[j];
-		}
-		if (j < flat_file->count - 1) j++;
-		else {
-		    j = 0;
-		    n_filetypes++;
-		}
-	    } /* for (i=0; i<file_list_count; i++) */
-
-	    err_flag = PVFS_Request_hindexed(mem_list_count, mem_lengths, 
-					     mem_offsets, PVFS_BYTE, &mem_req);
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0 ) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_Request_hindexed (memory)", 0);
-		goto error_state;
-	    }
-	    /* --END ERROR HANDLING-- */
-
-	    err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, 
-					     file_offsets, PVFS_BYTE,
-					     &file_req);
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_Request_hindexed", 0);
-		goto error_state;
-	    }
-	    /* --END ERROR HANDLING-- */
-
-	    /* offset will be expressed in memory and file datatypes */
-
-#ifdef ADIOI_MPE_LOGGING
-            MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
-#endif
-	    err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, 
-				      PVFS_BOTTOM, mem_req,
-				      &(pvfs_fs->credentials), &resp_io);
-#ifdef ADIOI_MPE_LOGGING
-            MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
-#endif
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (err_flag != 0) {
-		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
-						   MPIR_ERR_RECOVERABLE,
-						   myname, __LINE__,
-						   ADIOI_PVFS2_error_convert(err_flag),
-						   "Error in PVFS_sys_write", 0);
-		goto error_state;
-	    }
-	    /* --END ERROR HANDLING-- */
-
-	    size_wrote += new_buffer_write;
-	    total_bytes_written += resp_io.total_completed;
-	    start_k = k;
-	    start_j = j;
-	    PVFS_Request_free(&mem_req);
-	    PVFS_Request_free(&file_req);
-	} /* while (size_wrote < bufsize) */
-	ADIOI_Free(mem_offsets);
-	ADIOI_Free(mem_lengths);
+        return;
     }
-    ADIOI_Free(file_offsets);
-    ADIOI_Free(file_lengths);
-
-    /* when incrementing fp_ind, need to also take into account the file type:
-     * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
-     * if we wrote N elements, offset needs to point at beginning of type, not
-     * at empty region at offset N+1) */
-    if (file_ptr_type == ADIO_INDIVIDUAL) {
-	/* this is closer, but still incorrect for the cases where a small
-	 * amount of a file type is "leftover" after a write */
-	fd->fp_ind = disp + flat_file->indices[j] + 
-	    ((ADIO_Offset)n_filetypes)*filetype_extent;
+    /* Use list I/O in the base case */
+    if (fd->hints->fs_hints.pvfs2.listio_write == ADIOI_HINT_ENABLE) {
+	ret = ADIOI_PVFS2_WriteStridedListIO(fd, buf, count, datatype, 
+			file_ptr_type, offset, status, error_code);
+	return;
     }
-    *error_code = MPI_SUCCESS;
 
-error_state:
-    fd->fp_sys_posn = -1;   /* set it to null. */
-
-#ifdef HAVE_STATUS_SET_BYTES
-    MPIR_Status_set_bytes(status, datatype, bufsize);
-/* This is a temporary way of filling in status. The right way is to 
-   keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
-#endif
-
-    if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
+    /* Use classic list I/O if no hints given base case */
+    ADIOI_PVFS2_OldWriteStrided(fd, buf, count, datatype,
+	    file_ptr_type, offset, status, error_code);
+    return;
 }
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2: ad_pvfs2_write_list_classic.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/Makefile.am NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/Makefile.am
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_pvfs2/Makefile.am	2010-11-16 09:16:27.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_pvfs2/Makefile.am	2010-11-15 15:03:31.000000000 +0100
@@ -28,9 +28,15 @@
         ad_pvfs2_common.c \
         ad_pvfs2_delete.c \
         ad_pvfs2_fcntl.c \
+        ad_pvfs2_features.c \
         ad_pvfs2_flush.c \
         ad_pvfs2_hints.c \
+        ad_pvfs2_io.h \
+        ad_pvfs2_io_dtype.c \
+        ad_pvfs2_io_list.c \
         ad_pvfs2_open.c \
         ad_pvfs2_read.c \
+        ad_pvfs2_read_list_classic.c
         ad_pvfs2_resize.c \
-        ad_pvfs2_write.c
+        ad_pvfs2_write.c \
+        ad_pvfs2_write_list_classic.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs.c	2010-11-16 09:16:34.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs.c	2010-11-15 15:02:47.000000000 +0100
@@ -12,6 +12,7 @@
 
 struct ADIOI_Fns_struct ADIO_TESTFS_operations = {
     ADIOI_TESTFS_Open, /* Open */
+    ADIOI_GEN_OpenColl, /* OpenColl */
     ADIOI_TESTFS_ReadContig, /* ReadContig */
     ADIOI_TESTFS_WriteContig, /* WriteContig */
     ADIOI_TESTFS_ReadStridedColl, /* ReadStridedColl */
@@ -33,4 +34,5 @@
     ADIOI_TESTFS_Flush, /* Flush */
     ADIOI_TESTFS_Resize, /* Resize */
     ADIOI_TESTFS_Delete, /* Delete */
+    ADIOI_GEN_Feature, /* Features */
 };
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_hints.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_hints.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_hints.c	2010-11-16 09:16:34.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_hints.c	2010-11-15 15:02:47.000000000 +0100
@@ -7,7 +7,9 @@
 
 #include "ad_testfs.h"
 #include "adioi.h"
-
+#ifdef ROMIO_BGL
+#include "../ad_bgl/ad_bgl.h"
+#endif
 void ADIOI_TESTFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
 {
     int myrank, nprocs;
@@ -21,5 +23,10 @@
     FPRINTF(stdout, "[%d/%d]    calling ADIOI_GEN_SetInfo\n", 
 	    myrank, nprocs);
 
+#ifdef ROMIO_BGL   /* BlueGene support for pvfs through ufs */
+    /* BlueGene hack: force testfs to mimic BlueGene hints */
+    ADIOI_BGL_SetInfo(fd, users_info, error_code);
+#else
     ADIOI_GEN_SetInfo(fd, users_info, error_code);
+#endif
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_read.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_read.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_read.c	2010-11-16 09:16:34.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_read.c	2010-11-15 15:03:31.000000000 +0100
@@ -26,10 +26,6 @@
 	offset = fd->fp_ind;
 	fd->fp_ind += datatype_size * count;
 	fd->fp_sys_posn = fd->fp_ind;
-#if 0
-	FPRINTF(stdout, "[%d/%d]    new file position is %lld\n", myrank, 
-		nprocs, (long long) fd->fp_ind);
-#endif
     }
     else {
 	fd->fp_sys_posn = offset + datatype_size * count;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_seek.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_seek.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_seek.c	2010-11-16 09:16:34.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_seek.c	2010-11-15 15:02:47.000000000 +0100
@@ -26,8 +26,8 @@
     ADIO_Offset off;
     ADIOI_Flatlist_node *flat_file;
     int i, n_etypes_in_filetype, n_filetypes, etype_in_filetype;
-    ADIO_Offset abs_off_in_filetype=0;
-    int size_in_filetype, sum;
+    ADIO_Offset abs_off_in_filetype=0, sum;
+    int size_in_filetype;
     int filetype_size, etype_size, filetype_is_contig;
     MPI_Aint filetype_extent;
 
@@ -54,6 +54,7 @@
 	}
 
 	n_etypes_in_filetype = filetype_size/etype_size;
+  ADIOI_Assert((offset / n_etypes_in_filetype) == (int) (offset / n_etypes_in_filetype));
 	n_filetypes = (int) (offset / n_etypes_in_filetype);
 	etype_in_filetype = (int) (offset % n_etypes_in_filetype);
 	size_in_filetype = etype_in_filetype * etype_size;
@@ -70,7 +71,7 @@
 	}
 
 	/* abs. offset in bytes in the file */
-	off = fd->disp + (ADIO_Offset) n_filetypes * filetype_extent +
+	off = fd->disp + (ADIO_Offset)n_filetypes * (ADIO_Offset)filetype_extent +
                 abs_off_in_filetype;
     }
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_write.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_write.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_write.c	2010-11-16 09:16:34.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_write.c	2010-11-15 15:02:47.000000000 +0100
@@ -23,7 +23,7 @@
 	    nprocs, fd->filename);
     FPRINTF(stdout, "[%d/%d]    writing (buf = %p, loc = %lld, sz = %lld)\n",
 	    myrank, nprocs, buf, (long long) offset, 
-	    (long long) datatype_size * count);
+	    (long long)datatype_size * (long long)count);
 
     if (file_ptr_type != ADIO_EXPLICIT_OFFSET)
     {
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_ufs/ad_ufs.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_ufs/ad_ufs.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_ufs/ad_ufs.c	2010-11-16 09:16:24.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_ufs/ad_ufs.c	2010-11-15 15:02:47.000000000 +0100
@@ -12,6 +12,7 @@
 
 struct ADIOI_Fns_struct ADIO_UFS_operations = {
     ADIOI_UFS_Open, /* Open */
+    ADIOI_GEN_OpenColl, /* OpenColl */
     ADIOI_GEN_ReadContig, /* ReadContig */
     ADIOI_GEN_WriteContig, /* WriteContig */
     ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
@@ -38,4 +39,5 @@
     ADIOI_GEN_Flush, /* Flush */
     ADIOI_GEN_Resize, /* Resize */
     ADIOI_GEN_Delete, /* Delete */
+    ADIOI_GEN_Feature, /* Features */
 };
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.c	2010-11-16 09:16:25.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.c	2010-11-15 15:02:47.000000000 +0100
@@ -12,6 +12,7 @@
 
 struct ADIOI_Fns_struct ADIO_XFS_operations = {
     ADIOI_XFS_Open, /* Open */
+    ADIOI_GEN_OpenColl, /* OpenColl */
     ADIOI_XFS_ReadContig, /* ReadContig */
     ADIOI_XFS_WriteContig, /* WriteContig */
     ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
@@ -22,15 +23,21 @@
     ADIOI_GEN_ReadStrided, /* ReadStrided */
     ADIOI_GEN_WriteStrided, /* WriteStrided */
     ADIOI_GEN_Close, /* Close */
-    ADIOI_XFS_IreadContig, /* IreadContig */
-    ADIOI_XFS_IwriteContig, /* IwriteContig */
-    ADIOI_XFS_ReadDone, /* ReadDone */
-    ADIOI_XFS_WriteDone, /* WriteDone */
-    ADIOI_XFS_ReadComplete, /* ReadComplete */
-    ADIOI_XFS_WriteComplete, /* WriteComplete */
+#if defined(ROMIO_HAVE_WORKING_AIO)
+    ADIOI_GEN_IreadContig, /* IreadContig */
+    ADIOI_GEN_IwriteContig, /* IwriteContig */
+#else
+    ADIOI_FAKE_IreadContig, /* IreadContig */
+    ADIOI_FAKE_IwriteContig, /* IwriteContig */
+#endif /* ROMIO_HAVE_WORKING_AIO */
+    ADIOI_GEN_IODone, /* ReadDone */
+    ADIOI_GEN_IODone, /* WriteDone */
+    ADIOI_GEN_IOComplete, /* ReadComplete */
+    ADIOI_GEN_IOComplete, /* WriteComplete */
     ADIOI_GEN_IreadStrided, /* IreadStrided */
     ADIOI_GEN_IwriteStrided, /* IwriteStrided */
     ADIOI_GEN_Flush, /* Flush */
     ADIOI_XFS_Resize, /* Resize */
     ADIOI_GEN_Delete, /* Delete */
+    ADIOI_GEN_Feature, /* Features */
 };
Only in ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs: ad_xfs_done.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_fcntl.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_fcntl.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_fcntl.c	2010-11-16 09:16:25.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_fcntl.c	2010-11-15 15:02:47.000000000 +0100
@@ -7,6 +7,11 @@
 
 #include "ad_xfs.h"
 #include "adio_extern.h"
+#include <sys/ioctl.h>
+
+#ifndef HAVE_LSEEK64
+#define lseek64 lseek
+#endif
 
 void ADIOI_XFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
 {
@@ -37,7 +42,7 @@
 	fl.l_len = fcntl_struct->diskspace;
 
 #if defined(LINUX) && defined(MPISGI)
-	err = fcntl(fd->fd_sys, XFS_IOC_RESVSP64, &fl);
+	err = ioctl(fd->fd_sys, XFS_IOC_RESVSP64, &fl);
 #else
 	err = fcntl(fd->fd_sys, F_RESVSP64, &fl);
 #endif
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.h	2010-11-16 09:16:25.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.h	2010-11-15 15:02:47.000000000 +0100
@@ -8,20 +8,19 @@
 #ifndef AD_XFS_INCLUDE
 #define AD_XFS_INCLUDE
 
+#define _XOPEN_SOURCE 500
 #include <unistd.h>
 #include <sys/types.h>
 #include <fcntl.h>
 #include "adio.h"
-#include <aio.h>
 
-int ADIOI_XFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
-		  int wr, void *handle);
-
-#if (defined(HAVE_PREAD64) && (_ABIO32 == 1))
-#  define pread pread64
-#  define pwrite pwrite64
+#if defined(MPISGI)
+#include "xfs/xfs_fs.h"
+#ifndef  __USE_LARGEFILE64
+#define  __USE_LARGEFILE64
+#endif
+typedef struct aiocb64 aiocb64_t;
 #endif
-/* above needed for IRIX 6.5 */
 
 void ADIOI_XFS_Open(ADIO_File fd, int *error_code);
 void ADIOI_XFS_Close(ADIO_File fd, int *error_code);
@@ -33,22 +32,6 @@
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
 		      *error_code);   
-void ADIOI_XFS_IwriteContig(ADIO_File fd, void *buf, int count, 
-                      MPI_Datatype datatype, int file_ptr_type,
-                      ADIO_Offset offset, ADIO_Request *request, int
-		      *error_code);   
-void ADIOI_XFS_IreadContig(ADIO_File fd, void *buf, int count, 
-                      MPI_Datatype datatype, int file_ptr_type,
-                      ADIO_Offset offset, ADIO_Request *request, int
-		      *error_code);   
-int ADIOI_XFS_ReadDone(ADIO_Request *request, ADIO_Status *status, int
-		       *error_code);
-int ADIOI_XFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int
-		       *error_code);
-void ADIOI_XFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, int
-		       *error_code); 
-void ADIOI_XFS_WriteComplete(ADIO_Request *request, ADIO_Status *status,
-			int *error_code); 
 void ADIOI_XFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
 		*error_code);
 void ADIOI_XFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_hints.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_hints.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_hints.c	2010-11-16 09:16:25.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_hints.c	2010-11-15 15:02:47.000000000 +0100
@@ -8,36 +8,76 @@
 #include "ad_xfs.h"
 #include "adio_extern.h"
 
+static unsigned xfs_direct_read_chunk_size;
+static unsigned xfs_direct_write_chunk_size;
+
 void ADIOI_XFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
 {
-    char *value;
+    char *value, * c;
     int flag;
+    static char xfs_initialized = 0;
 
     if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info));
 
-    /* the nightly builds say somthing is calling MPI_Info_set w/ a null info,
-     * so protect the calls to MPI_Info_set */
-    if (fd->info != MPI_INFO_NULL ) {
-	    MPI_Info_set(fd->info, "direct_read", "false");
-	    MPI_Info_set(fd->info, "direct_write", "false");
-	    fd->direct_read = fd->direct_write = 0;
-    }
-	
-    /* has user specified values for keys "direct_read" and "direct wirte"? */
+    ADIOI_Info_set(fd->info, "direct_read", "false");
+    ADIOI_Info_set(fd->info, "direct_write", "false");
+    fd->direct_read = fd->direct_write = 0;
+
+	if (!xfs_initialized) {
+		xfs_initialized = 1;
+		c = getenv("MPIO_DIRECT_READ_CHUNK_SIZE");
+		if (c) {
+			int io;
+			io = atoi(c);
+			if (io <= 0) {
+				fprintf(stderr,
+"MPI: Ignoring an invalid setting for MPIO_DIRECT_READ_CHUNK_SIZE.\n"
+"     It must be set to a positive integer value.\n");
+			} else {
+				xfs_direct_read_chunk_size = io;
+			}
+		} else {
+			xfs_direct_read_chunk_size = 0;
+		}
+
+		c = getenv("MPIO_DIRECT_WRITE_CHUNK_SIZE");
+		if (c) {
+			int io;
+			io = atoi(c);
+			if (io <= 0) {
+				fprintf(stderr,
+"MPI: Ignoring an invalid setting for MPIO_DIRECT_WRITE_CHUNK_SIZE.\n"
+"     It must be set to a positive integer value.\n");
+			} else {
+				xfs_direct_write_chunk_size = io;
+			}
+		} else {
+			xfs_direct_write_chunk_size = 0;
+		}
+	}
+
+	if (!fd->hints->initialized) {
+		fd->hints->fs_hints.xfs.read_chunk_sz =
+			xfs_direct_read_chunk_size;
+		fd->hints->fs_hints.xfs.write_chunk_sz =
+			xfs_direct_write_chunk_size;
+	}
+
+    /* has user specified values for keys "direct_read" and "direct write"? */
     if (users_info != MPI_INFO_NULL) {
 	value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
 
-	MPI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL, 
+	ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL, 
 			 value, &flag);
 	if (flag && !strcmp(value, "true")) {
-	    MPI_Info_set(fd->info, "direct_read", "true");
+	    ADIOI_Info_set(fd->info, "direct_read", "true");
 	    fd->direct_read = 1;
 	}
 
-	MPI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL, 
+	ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL, 
 			 value, &flag);
 	if (flag && !strcmp(value, "true")) {
-	    MPI_Info_set(fd->info, "direct_write", "true");
+	    ADIOI_Info_set(fd->info, "direct_write", "true");
 	    fd->direct_write = 1;
 	}
 
@@ -47,8 +87,10 @@
     /* set the values for collective I/O and data sieving parameters */
     ADIOI_GEN_SetInfo(fd, users_info, error_code);
 
+    /* Environment variables override MPI_Info hints */
     if (ADIOI_Direct_read) fd->direct_read = 1;
     if (ADIOI_Direct_write) fd->direct_write = 1;
+
     /* environment variables checked in ADIO_Init */
 
     *error_code = MPI_SUCCESS;
Only in ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs: ad_xfs_iread.c
Only in ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs: ad_xfs_iwrite.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_open.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_open.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_open.c	2010-11-16 09:16:25.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_open.c	2010-11-15 15:02:47.000000000 +0100
@@ -5,22 +5,26 @@
  *   See COPYRIGHT notice in top-level directory.
  */
 
+#define _GNU_SOURCE          // for O_DIRECT
+
 #include "ad_xfs.h"
+#include <sys/ioctl.h>
 #ifdef HAVE_STDDEF_H
 #include <stddef.h>
 #endif
 
-#if defined(MPISGI)
-#include <mpitypedefs.h>
-#include <mpifunctions.h>
+#ifndef HAVE_LSEEK64
+#define lseek64 lseek
 #endif
 
 void ADIOI_XFS_Open(ADIO_File fd, int *error_code)
 {
-    int perm, amode, amode_direct;
+    int perm, amode, amode_direct, factor;
     unsigned int old_mask;
     struct dioattr st;
     static char myname[] = "ADIOI_XFS_OPEN";
+    unsigned read_chunk_sz = fd->hints->fs_hints.xfs.read_chunk_sz;
+    unsigned write_chunk_sz = fd->hints->fs_hints.xfs.write_chunk_sz;
 
     if (fd->perm == ADIO_PERM_NULL) {
 	old_mask = umask(022);
@@ -49,7 +53,7 @@
     fd->fd_direct = open(fd->filename, amode_direct, perm);
     if (fd->fd_direct != -1) {
 
-#if defined(LINUX) && defined(MPISGI)
+#if defined(MPISGI)
 	ioctl(fd->fd_direct, XFS_IOC_DIOINFO, &st);
 #else
 	fcntl(fd->fd_direct, F_DIOINFO, &st);
@@ -57,7 +61,34 @@
 
 	fd->d_mem = st.d_mem;
 	fd->d_miniosz = st.d_miniosz;
-	fd->d_maxiosz = st.d_maxiosz;
+
+	if (read_chunk_sz == 0) {
+		fd->hints->fs_hints.xfs.read_chunk_sz = st.d_maxiosz;
+	} else {
+		/*
+		 * MPIO_DIRECT_READ_CHUNK_SIZE was set.
+		 * Make read_chunk_sz a multiple of d_miniosz.
+		 */
+		factor = read_chunk_sz / fd->d_miniosz;
+		if (factor == 0 || read_chunk_sz != fd->d_miniosz * factor) {
+			fd->hints->fs_hints.xfs.read_chunk_sz =
+				fd->d_miniosz * (factor + 1);
+		}
+	}
+
+	if (write_chunk_sz == 0) {
+		fd->hints->fs_hints.xfs.write_chunk_sz = st.d_maxiosz;
+	} else {
+		/*
+		 * MPIO_DIRECT_WRITE_CHUNK_SIZE was set. 
+		 * Make write_chunk_sz a multiple of d_miniosz.
+		 */
+		factor = write_chunk_sz / fd->d_miniosz;
+		if (factor == 0 || write_chunk_sz != fd->d_miniosz * factor) {
+			fd->hints->fs_hints.xfs.write_chunk_sz =
+				fd->d_miniosz * (factor + 1);
+		}
+	}
 
 	if (fd->d_mem > XFS_MEMALIGN) {
 	    FPRINTF(stderr, "MPI: Run-time Direct-IO memory alignment, %d, does not match compile-time value, %d.\n",
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_read.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_read.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_read.c	2010-11-16 09:16:25.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_read.c	2010-11-15 15:02:47.000000000 +0100
@@ -63,7 +63,7 @@
 		    ADIOI_XFS_Aligned_Mem_File_Read(fd, newbuf, size, offset, &err);
 		    if (err > 0) memcpy(buf, newbuf, err);
 		    nbytes += err;
-		    free(newbuf);
+		    ADIOI_Free(newbuf);
 		}
 		else nbytes += pread(fd->fd_sys, buf, size, offset);
 	    }
@@ -77,7 +77,7 @@
 	    if (newbuf) {
 		ADIOI_XFS_Aligned_Mem_File_Read(fd, newbuf, len, offset, &err);
 		if (err > 0) memcpy(buf, newbuf, err);
-		free(newbuf);
+		ADIOI_Free(newbuf);
 	    }
 	    else err = pread(fd->fd_sys, buf, len, offset);
 	}
@@ -102,6 +102,7 @@
               ADIO_Offset offset, int *err)
 {
     int ntimes, rem, newrem, i, size, nbytes;
+    unsigned read_chunk_sz = fd->hints->fs_hints.xfs.read_chunk_sz;
 
     /* memory buffer is aligned, offset in file is aligned,
        io_size may or may not be of the right size.
@@ -109,33 +110,33 @@
        use buffered I/O for remaining. */
 
     if (!(len % fd->d_miniosz) && 
-	(len >= fd->d_miniosz) && (len <= fd->d_maxiosz))
+	(len >= fd->d_miniosz) && (len <= read_chunk_sz))
 	*err = pread(fd->fd_direct, buf, len, offset);
     else if (len < fd->d_miniosz)
 	*err = pread(fd->fd_sys, buf, len, offset);
-    else if (len > fd->d_maxiosz) {
-	ntimes = len/(fd->d_maxiosz);
-	rem = len - ntimes * fd->d_maxiosz;
+    else if (len > read_chunk_sz) {
+	ntimes = len/(read_chunk_sz);
+	rem = len - ntimes * read_chunk_sz;
 	nbytes = 0;
 	for (i=0; i<ntimes; i++) {
-	    nbytes += pread(fd->fd_direct, ((char *)buf) + i * fd->d_maxiosz,
-			 fd->d_maxiosz, offset);
-	    offset += fd->d_maxiosz;
+	    nbytes += pread(fd->fd_direct, ((char *)buf) + i * read_chunk_sz,
+			 read_chunk_sz, offset);
+	    offset += read_chunk_sz;
 	}
 	if (rem) {
 	    if (!(rem % fd->d_miniosz))
 		nbytes += pread(fd->fd_direct, 
-		     ((char *)buf) + ntimes * fd->d_maxiosz, rem, offset);
+		     ((char *)buf) + ntimes * read_chunk_sz, rem, offset);
 	    else {
 		newrem = rem % fd->d_miniosz;
 		size = rem - newrem;
 		if (size) {
 		    nbytes += pread(fd->fd_direct, 
-		         ((char *)buf) + ntimes * fd->d_maxiosz, size, offset);
+		         ((char *)buf) + ntimes * read_chunk_sz, size, offset);
 		    offset += size;
 		}
 		nbytes += pread(fd->fd_sys, 
-	              ((char *)buf) + ntimes*fd->d_maxiosz + size, newrem, offset);
+	              ((char *)buf) + ntimes * read_chunk_sz + size, newrem, offset);
 	    }
 	}
 	*err = nbytes;
Only in ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs: ad_xfs_wait.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_write.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_write.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_write.c	2010-11-16 09:16:25.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_write.c	2010-11-15 15:02:47.000000000 +0100
@@ -13,14 +13,15 @@
 
 /* style: allow:free:2 sig:0 */
 
-static void ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len, 
-					     ADIO_Offset offset, int *err);
+static int ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf,
+						  ADIO_Offset len, ADIO_Offset offset);
 
 void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count, 
                      MPI_Datatype datatype, int file_ptr_type,
 		     ADIO_Offset offset, ADIO_Status *status, int *error_code)
 {
-    int err=-1, datatype_size, len, diff, size, nbytes;
+    int err=-1, datatype_size, diff, size;
+    ssize_t len;
     void *newbuf;
     static char myname[] = "ADIOI_XFS_WRITECONTIG";
 
@@ -31,44 +32,48 @@
 
     if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
 
-    if (!(fd->direct_write))     /* direct I/O not enabled */
+    if (!(fd->direct_write)) {    /* direct I/O not enabled */
 	err = pwrite(fd->fd_sys, buf, len, offset);
-    else {       /* direct I/O enabled */
+	if (err < 0) {goto leaving;}
+    } else {       /* direct I/O enabled */
 
 	/* (1) if mem_aligned && file_aligned 
                     use direct I/O to write up to correct io_size
                     use buffered I/O for remaining  */
 
-	if (!(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz)) 
-	    ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, len, offset, &err);
+	if (!(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz)) {
+	    err = ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, len, offset);
+	    if (err < 0) {goto leaving;}
 
         /* (2) if !file_aligned
                     use buffered I/O to write up to file_aligned
                     At that point, if still mem_aligned, use (1)
    		        else copy into aligned buf and then use (1) */
-	else if (offset % fd->d_miniosz) {
+	} else if (offset % fd->d_miniosz) {
 	    diff = fd->d_miniosz - (offset % fd->d_miniosz);
 	    diff = ADIOI_MIN(diff, len);
-	    nbytes = pwrite(fd->fd_sys, buf, diff, offset);
+	    err = pwrite(fd->fd_sys, buf, diff, offset);
+	    if (err < 0) {goto leaving;}
 
 	    buf = ((char *) buf) + diff;
 	    offset += diff;
 	    size = len - diff;
 	    if (!(((long) buf) % fd->d_mem)) {
-		ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, size, offset, &err);
-		nbytes += err;
+		err = ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, size, offset);
+		if (err < 0) {goto leaving;}
 	    }
 	    else {
 		newbuf = (void *) memalign(XFS_MEMALIGN, size);
 		if (newbuf) {
 		    memcpy(newbuf, buf, size);
-		    ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, size, offset, &err);
-		    nbytes += err;
-		    free(newbuf);
+		    err = ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, size, offset);
+		    ADIOI_Free(newbuf);
+		    if (err < 0) {goto leaving;}
+		} else {
+		    err = pwrite(fd->fd_sys, buf, size, offset);
+		    if (err < 0) {goto leaving;}
 		}
-		else nbytes += pwrite(fd->fd_sys, buf, size, offset);
 	    }
-	    err = nbytes;
 	}
 
         /* (3) if !mem_aligned && file_aligned
@@ -77,19 +82,22 @@
 	    newbuf = (void *) memalign(XFS_MEMALIGN, len);
 	    if (newbuf) {
 		memcpy(newbuf, buf, len);
-		ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, len, offset, &err);
-		free(newbuf);
+		err = ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, len, offset);
+		ADIOI_Free(newbuf);
+	    } else {
+		 err = pwrite(fd->fd_sys, buf, len, offset);
 	    }
-	    else err = pwrite(fd->fd_sys, buf, len, offset);
+
+	    if (err < 0) {goto leaving;}
 	}
     }
 
-    if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += err;
+    if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len;
 
 #ifdef HAVE_STATUS_SET_BYTES
-    if (err != -1) MPIR_Status_set_bytes(status, datatype, err);
+    if (err != -1) MPIR_Status_set_bytes(status, datatype, len);
 #endif
-
+leaving:
     if (err == -1) {
 	*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
 					   myname, __LINE__, MPI_ERR_IO, "**io",
@@ -99,10 +107,13 @@
 }
 
 
-void ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len, 
-              ADIO_Offset offset, int *err)
+static int
+ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, ADIO_Offset len, 
+              ADIO_Offset offset)
 {
-    int ntimes, rem, newrem, i, size, nbytes;
+    unsigned write_chunk_sz = fd->hints->fs_hints.xfs.write_chunk_sz;
+    ADIO_Offset nbytes, rem, newrem, size;
+    int ntimes, i;
 
     /* memory buffer is aligned, offset in file is aligned,
        io_size may or may not be of the right size.
@@ -110,42 +121,50 @@
        use buffered I/O for remaining. */
 
     if (!(len % fd->d_miniosz) && 
-	(len >= fd->d_miniosz) && (len <= fd->d_maxiosz))
-	*err = pwrite(fd->fd_direct, buf, len, offset);
-    else if (len < fd->d_miniosz)
-	*err = pwrite(fd->fd_sys, buf, len, offset);
-    else if (len > fd->d_maxiosz) {
-	ntimes = len/(fd->d_maxiosz);
-	rem = len - ntimes * fd->d_maxiosz;
+	 (len >= fd->d_miniosz) && (len <= write_chunk_sz)) {
+	nbytes = pwrite(fd->fd_direct, buf, len, offset);
+	if (nbytes < 0) {return -1;}
+    } else if (len < fd->d_miniosz) {
+	nbytes = pwrite(fd->fd_sys, buf, len, offset);
+	if (nbytes < 0) {return -1;}
+    } else if (len > write_chunk_sz) {
+	ntimes = len/(write_chunk_sz);
+	rem = len - ntimes * write_chunk_sz;
 	nbytes = 0;
 	for (i=0; i<ntimes; i++) {
-	    nbytes += pwrite(fd->fd_direct, ((char *)buf) + i * fd->d_maxiosz,
-			 fd->d_maxiosz, offset);
-	    offset += fd->d_maxiosz;
+	    nbytes = pwrite(fd->fd_direct, ((char *)buf) + i * write_chunk_sz,
+			 write_chunk_sz, offset);
+	    offset += write_chunk_sz;
+	    if (nbytes < 0) {return -1;}
 	}
 	if (rem) {
-	    if (!(rem % fd->d_miniosz))
-		nbytes += pwrite(fd->fd_direct, 
-		             ((char *)buf) + ntimes * fd->d_maxiosz, rem, offset);
-	    else {
+	    if (!(rem % fd->d_miniosz)) {
+		nbytes = pwrite(fd->fd_direct, 
+		             ((char *)buf) + ntimes * write_chunk_sz, rem, offset);
+		if (nbytes < 0) {return -1;}
+	    } else {
 		newrem = rem % fd->d_miniosz;
 		size = rem - newrem;
 		if (size) {
-		    nbytes += pwrite(fd->fd_direct, 
-		            ((char *)buf) + ntimes * fd->d_maxiosz, size, offset);
+		    nbytes = pwrite(fd->fd_direct, 
+		            ((char *)buf) + ntimes * write_chunk_sz, size, offset);
 		    offset += size;
+		    if (nbytes < 0) {return -1;}
 		}
-		nbytes += pwrite(fd->fd_sys, 
-	              ((char *)buf) + ntimes*fd->d_maxiosz + size, newrem, offset);
+		nbytes = pwrite(fd->fd_sys, 
+	              ((char *)buf) + ntimes * write_chunk_sz + size, newrem, offset);
+		if (nbytes < 0) {return -1;}
 	    }
 	}
-	*err = nbytes;
     }
     else {
 	rem = len % fd->d_miniosz;
 	size = len - rem;
 	nbytes = pwrite(fd->fd_direct, buf, size, offset);
-	nbytes += pwrite(fd->fd_sys, (char *)buf + size, rem, offset+size);
-	*err = nbytes;
+	if (nbytes < 0) {return -1;}
+	nbytes = pwrite(fd->fd_sys, (char *)buf + size, rem, offset+size);
+	if (nbytes < 0) {return -1;}
     }
+
+    return 0;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/Makefile.am NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/Makefile.am
--- ompi-trunk/ompi/mca/io/romio/romio/adio/ad_xfs/Makefile.am	2010-11-16 09:16:25.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/ad_xfs/Makefile.am	2010-11-15 15:03:31.000000000 +0100
@@ -22,13 +22,9 @@
 libadio_xfs_la_SOURCES = \
         ad_xfs.c \
         ad_xfs.h \
-        ad_xfs_done.c \
         ad_xfs_fcntl.c \
         ad_xfs_hints.c \
-        ad_xfs_iread.c \
-        ad_xfs_iwrite.c \
         ad_xfs_open.c \
         ad_xfs_read.c \
         ad_xfs_resize.c \
-        ad_xfs_wait.c \
         ad_xfs_write.c
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio: ad_zoidfs
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_aggregate.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_aggregate.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_aggregate.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_aggregate.c	2010-11-15 15:03:31.000000000 +0100
@@ -7,6 +7,10 @@
 #include "adio.h"
 #include "adio_extern.h"
 
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
+
 #undef AGG_DEBUG
 
 /* This file contains four functions:
@@ -79,21 +83,26 @@
 
     ADIOI_UNREFERENCED_ARG(fd_start);
 
-#ifdef AGG_DEBUG
-#if 0
-    FPRINTF(stdout, "off = %lld, min_off = %lld, len = %lld, fd_size = %lld\n",
-	    off, min_off, *len, fd_size);
-#endif
-#endif
-    
     /* get an index into our array of aggregators */
     rank_index = (int) ((off - min_off + fd_size)/ fd_size - 1);
 
+    if (fd->hints->striping_unit > 0) {
+        /* wkliao: implementation for file domain alignment
+           fd_start[] and fd_end[] have been aligned with file lock
+	   boundaries when returned from ADIOI_Calc_file_domains() so cannot
+	   just use simple arithmatic as above */
+        rank_index = 0;
+        while (off > fd_end[rank_index]) rank_index++;
+    }
+
     /* we index into fd_end with rank_index, and fd_end was allocated to be no
      * bigger than fd->hins->cb_nodes.   If we ever violate that, we're
      * overrunning arrays.  Obviously, we should never ever hit this abort */
-    if (rank_index >= fd->hints->cb_nodes)
-	    MPI_Abort(MPI_COMM_WORLD, 1);
+    if (rank_index >= fd->hints->cb_nodes || rank_index < 0) {
+        FPRINTF(stderr, "Error in ADIOI_Calc_aggregator(): rank_index(%d) >= fd->hints->cb_nodes (%d) fd_size=%lld off=%lld\n",
+			rank_index,fd->hints->cb_nodes,fd_size,off);
+        MPI_Abort(MPI_COMM_WORLD, 1);
+    }
 
     /* remember here that even in Rajeev's original code it was the case that
      * different aggregators could end up with different amounts of data to
@@ -119,19 +128,21 @@
 			     *end_offsets, int nprocs, int nprocs_for_coll,
 			     ADIO_Offset *min_st_offset_ptr,
 			     ADIO_Offset **fd_start_ptr, ADIO_Offset 
-			     **fd_end_ptr, ADIO_Offset *fd_size_ptr)
+			     **fd_end_ptr, int min_fd_size, 
+			     ADIO_Offset *fd_size_ptr,
+			     int striping_unit)
 {
 /* Divide the I/O workload among "nprocs_for_coll" processes. This is
    done by (logically) dividing the file into file domains (FDs); each
    process may directly access only its own file domain. */
 
-	/* XXX: one idea: tweak the file domains so that no fd is smaller than
-	 * a threshold (one presumably well-suited to a file system).  We don't
-	 * do that, but this routine would be the place for it */
-
     ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, fd_size;
     int i;
 
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5004, 0, NULL);
+#endif
+
 #ifdef AGG_DEBUG
     FPRINTF(stderr, "ADIOI_Calc_file_domains: %d aggregator(s)\n", 
 	    nprocs_for_coll);
@@ -156,6 +167,14 @@
 	       1)/nprocs_for_coll; 
     /* ceiling division as in HPF block distribution */
 
+    /* Tweak the file domains so that no fd is smaller than a threshold.  We
+     * have to strike a balance between efficency and parallelism: somewhere
+     * between 10k processes sending 32-byte requests and one process sending a
+     * 320k request is a (system-dependent) sweet spot */
+
+    if (fd_size < min_fd_size)
+	fd_size = min_fd_size;
+
     *fd_start_ptr = (ADIO_Offset *)
 	ADIOI_Malloc(nprocs_for_coll*sizeof(ADIO_Offset)); 
     *fd_end_ptr = (ADIO_Offset *)
@@ -164,12 +183,46 @@
     fd_start = *fd_start_ptr;
     fd_end = *fd_end_ptr;
 
-    fd_start[0] = min_st_offset;
-    fd_end[0] = min_st_offset + fd_size - 1;
-
-    for (i=1; i<nprocs_for_coll; i++) {
-	fd_start[i] = fd_end[i-1] + 1;
-	fd_end[i] = fd_start[i] + fd_size - 1;
+    /* Wei-keng Liao: implementation for fild domain alignment to nearest file
+     * lock boundary (as specified by striping_unit hint).  Could also
+     * experiment with other alignment strategies here */
+    if (striping_unit > 0) {
+        ADIO_Offset end_off;
+        int         rem_front, rem_back;
+
+        /* align fd_end[0] to the nearest file lock boundary */
+        fd_start[0] = min_st_offset;
+        end_off     = fd_start[0] + fd_size;
+        rem_front   = end_off % striping_unit;
+        rem_back    = striping_unit - rem_front;
+        if (rem_front < rem_back) 
+		end_off -= rem_front;
+        else                      
+		end_off += rem_back;
+        fd_end[0] = end_off - 1;
+    
+        /* align fd_end[i] to the nearest file lock boundary */
+        for (i=1; i<nprocs_for_coll; i++) {
+            fd_start[i] = fd_end[i-1] + 1;
+            end_off     = min_st_offset + fd_size * (i+1);
+            rem_front   = end_off % striping_unit;
+            rem_back    = striping_unit - rem_front;
+            if (rem_front < rem_back) 
+		    end_off -= rem_front;
+            else                      
+		    end_off += rem_back;
+            fd_end[i] = end_off - 1;
+        }
+        fd_end[nprocs_for_coll-1] = max_end_offset;
+    }
+    else { /* no hints set: do things the 'old' way */
+        fd_start[0] = min_st_offset;
+        fd_end[0] = min_st_offset + fd_size - 1;
+
+        for (i=1; i<nprocs_for_coll; i++) {
+            fd_start[i] = fd_end[i-1] + 1;
+            fd_end[i] = fd_start[i] + fd_size - 1;
+        }
     }
 
 /* take care of cases in which the total file access range is not
@@ -187,6 +240,10 @@
 
     *fd_size_ptr = fd_size;
     *min_st_offset_ptr = min_st_offset;
+
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5005, 0, NULL);
+#endif
 }
 
 
@@ -194,7 +251,7 @@
  * of this process are located in the file domains of various processes
  * (including this one)
  */
-void ADIOI_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list, 
+void ADIOI_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list, 
 		       int contig_access_count, ADIO_Offset 
 		       min_st_offset, ADIO_Offset *fd_start,
 		       ADIO_Offset *fd_end, ADIO_Offset fd_size,
@@ -203,12 +260,18 @@
 		       int **count_my_req_per_proc_ptr,
 		       ADIOI_Access **my_req_ptr,
 		       int **buf_idx_ptr)
+/* Possibly reconsider if buf_idx's are ok as int's, or should they be aints/offsets? 
+   They are used as memory buffer indices so it seems like the 2G limit is in effect */
 {
     int *count_my_req_per_proc, count_my_req_procs, *buf_idx;
     int i, l, proc;
     ADIO_Offset fd_len, rem_len, curr_idx, off;
     ADIOI_Access *my_req;
 
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5024, 0, NULL);
+#endif
+
     *count_my_req_per_proc_ptr = (int *) ADIOI_Calloc(nprocs,sizeof(int)); 
     count_my_req_per_proc = *count_my_req_per_proc_ptr;
 /* count_my_req_per_proc[i] gives the no. of contig. requests of this
@@ -293,10 +356,14 @@
 				     fd_start, fd_end);
 
 	/* for each separate contiguous access from this process */
-	if (buf_idx[proc] == -1) buf_idx[proc] = (int) curr_idx;
+	if (buf_idx[proc] == -1) 
+  {
+    ADIOI_Assert(curr_idx == (int) curr_idx);
+    buf_idx[proc] = (int) curr_idx;
+  }
 
 	l = my_req[proc].count;
-	curr_idx += (int) fd_len; /* NOTE: Why is curr_idx an int?  Fix? */
+	curr_idx += fd_len; 
 
 	rem_len = len_list[i] - fd_len;
 
@@ -306,6 +373,7 @@
 	 * and the associated count. 
 	 */
 	my_req[proc].offsets[l] = off;
+  ADIOI_Assert(fd_len == (int) fd_len);
 	my_req[proc].lens[l] = (int) fd_len;
 	my_req[proc].count++;
 
@@ -315,13 +383,18 @@
 	    proc = ADIOI_Calc_aggregator(fd, off, min_st_offset, &fd_len, 
 					 fd_size, fd_start, fd_end);
 
-	    if (buf_idx[proc] == -1) buf_idx[proc] = (int) curr_idx;
+	    if (buf_idx[proc] == -1) 
+      {
+        ADIOI_Assert(curr_idx == (int) curr_idx);
+        buf_idx[proc] = (int) curr_idx;
+      }
 
 	    l = my_req[proc].count;
 	    curr_idx += fd_len;
 	    rem_len -= fd_len;
 
 	    my_req[proc].offsets[l] = off;
+      ADIOI_Assert(fd_len == (int) fd_len);
 	    my_req[proc].lens[l] = (int) fd_len;
 	    my_req[proc].count++;
 	}
@@ -336,17 +409,16 @@
 		FPRINTF(stdout, "   off[%d] = %lld, len[%d] = %d\n", l,
 			my_req[i].offsets[l], l, my_req[i].lens[l]);
 	    }
-	}
-    }
-#if 0
-    for (i=0; i<nprocs; i++) {
 	FPRINTF(stdout, "buf_idx[%d] = 0x%x\n", i, buf_idx[i]);
+	}
     }
 #endif
-#endif
 
     *count_my_req_procs_ptr = count_my_req_procs;
     *buf_idx_ptr = buf_idx;
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5025, 0, NULL);
+#endif
 }
 
 
@@ -373,7 +445,9 @@
     ADIOI_Access *others_req;
 
 /* first find out how much to send/recv and from/to whom */
-
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5026, 0, NULL);
+#endif
     count_others_req_per_proc = (int *) ADIOI_Malloc(nprocs*sizeof(int));
 
     MPI_Alltoall(count_my_req_per_proc, 1, MPI_INT,
@@ -437,4 +511,7 @@
     ADIOI_Free(count_others_req_per_proc);
 
     *count_others_req_procs_ptr = count_others_req_procs;
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5027, 0, NULL);
+#endif
 }
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common: ad_aggregate_new.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_close.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_close.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_close.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_close.c	2010-11-15 15:02:47.000000000 +0100
@@ -63,7 +63,29 @@
 
     if (fd->hints && fd->hints->ranklist) ADIOI_Free(fd->hints->ranklist);
     if (fd->hints && fd->hints->cb_config_list) ADIOI_Free(fd->hints->cb_config_list);
+
+    /* Persistent File Realms */
+    if (fd->hints->cb_pfr == ADIOI_HINT_ENABLE) {
+	/* AAR, FSIZE, and User provided uniform File realms */
+	if (1) {
+	    ADIOI_Delete_flattened (fd->file_realm_types[0]);
+	    MPI_Type_free (&fd->file_realm_types[0]);
+	}
+	else {
+	    for (i=0; i<fd->hints->cb_nodes; i++) {
+		ADIOI_Datatype_iscontig(fd->file_realm_types[i], &is_contig);
+		if (!is_contig)
+		    ADIOI_Delete_flattened(fd->file_realm_types[i]);
+		MPI_Type_free (&fd->file_realm_types[i]);
+	    }
+	}
+	ADIOI_Free(fd->file_realm_st_offs);
+	ADIOI_Free(fd->file_realm_types);
+    }
     if (fd->hints) ADIOI_Free(fd->hints);
+
+
+
     MPI_Comm_free(&(fd->comm));
     /* deferred open: if we created an aggregator communicator, free it */
     if (fd->agg_comm != MPI_COMM_NULL) {
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common: ad_coll_build_req_new.c
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common: ad_coll_exch_new.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_darray.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_darray.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_darray.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_darray.c	2010-11-15 15:02:47.000000000 +0100
@@ -81,7 +81,7 @@
 	tmp_size = 1;
 	for (i=1; i<ndims; i++) {
 	    tmp_size *= array_of_gsizes[i-1];
-	    disps[1] += tmp_size*st_offsets[i];
+	    disps[1] += (MPI_Aint)tmp_size*st_offsets[i];
 	}
         /* rest done below for both Fortran and C order */
     }
@@ -119,14 +119,14 @@
 	tmp_size = 1;
 	for (i=ndims-2; i>=0; i--) {
 	    tmp_size *= array_of_gsizes[i+1];
-	    disps[1] += tmp_size*st_offsets[i];
+	    disps[1] += (MPI_Aint)tmp_size*st_offsets[i];
 	}
     }
 
     disps[1] *= orig_extent;
 
     disps[2] = orig_extent;
-    for (i=0; i<ndims; i++) disps[2] *= array_of_gsizes[i];
+    for (i=0; i<ndims; i++) disps[2] *= (MPI_Aint)array_of_gsizes[i];
 	
     disps[0] = 0;
     blklens[0] = blklens[1] = blklens[2] = 1;
@@ -183,7 +183,7 @@
 	if (dim == 0) 
 	    MPI_Type_contiguous(mysize, type_old, type_new);
 	else {
-	    for (i=0; i<dim; i++) stride *= array_of_gsizes[i];
+	    for (i=0; i<dim; i++) stride *= (MPI_Aint)array_of_gsizes[i];
 	    MPI_Type_hvector(mysize, 1, stride, type_old, type_new);
 	}
     }
@@ -191,13 +191,13 @@
 	if (dim == ndims-1) 
 	    MPI_Type_contiguous(mysize, type_old, type_new);
 	else {
-	    for (i=ndims-1; i>dim; i--) stride *= array_of_gsizes[i];
+	    for (i=ndims-1; i>dim; i--) stride *= (MPI_Aint)array_of_gsizes[i];
 	    MPI_Type_hvector(mysize, 1, stride, type_old, type_new);
 	}
 
     }
 
-    *st_offset = blksize * rank;
+    *st_offset = (MPI_Aint)blksize * (MPI_Aint)rank;
      /* in terms of no. of elements of type oldtype in this dimension */
     if (mysize == 0) *st_offset = 0;
 
@@ -241,10 +241,10 @@
     count = local_size/blksize;
     rem = local_size % blksize;
     
-    stride = nprocs*blksize*orig_extent;
+    stride = (MPI_Aint)nprocs*(MPI_Aint)blksize*orig_extent;
     if (order == MPI_ORDER_FORTRAN)
-	for (i=0; i<dim; i++) stride *= array_of_gsizes[i];
-    else for (i=ndims-1; i>dim; i--) stride *= array_of_gsizes[i];
+	for (i=0; i<dim; i++) stride *= (MPI_Aint)array_of_gsizes[i];
+    else for (i=ndims-1; i>dim; i--) stride *= (MPI_Aint)array_of_gsizes[i];
 
     MPI_Type_hvector(count, blksize, stride, type_old, type_new);
 
@@ -255,7 +255,7 @@
 	types[0] = *type_new;
 	types[1] = type_old;
 	disps[0] = 0;
-	disps[1] = count*stride;
+	disps[1] = (MPI_Aint)count*stride;
 	blklens[0] = 1;
 	blklens[1] = rem;
 
@@ -272,9 +272,9 @@
         types[0] = MPI_LB;
         disps[0] = 0;
         types[1] = *type_new;
-        disps[1] = rank * blksize * orig_extent;
+        disps[1] = (MPI_Aint)rank * (MPI_Aint)blksize * orig_extent;
         types[2] = MPI_UB;
-        disps[2] = orig_extent * array_of_gsizes[dim];
+        disps[2] = orig_extent * (MPI_Aint)array_of_gsizes[dim];
         blklens[0] = blklens[1] = blklens[2] = 1;
         MPI_Type_struct(3, blklens, disps, types, &type_tmp);
         MPI_Type_free(type_new);
@@ -284,7 +284,7 @@
                             the struct above */
     }
     else {
-        *st_offset = rank * blksize; 
+        *st_offset = (MPI_Aint)rank * (MPI_Aint)blksize; 
         /* st_offset is in terms of no. of elements of type oldtype in
          * this dimension */ 
     }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_end.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_end.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_end.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_end.c	2010-11-15 15:02:47.000000000 +0100
@@ -17,6 +17,10 @@
     
 /*    FPRINTF(stderr, "reached end\n"); */
 
+    /* if a default errhandler was set on MPI_FILE_NULL then we need to ensure
+     * that our reference to that errhandler is released */
+    PMPI_File_set_errhandler(MPI_FILE_NULL, MPI_ERRORS_RETURN);
+
 /* delete the flattened datatype list */
     curr = ADIOI_Flatlist;
     while (curr) {
@@ -39,7 +43,7 @@
     datarep = ADIOI_Datarep_head;
     while (datarep) {
         datarep_next = datarep->next;
-#ifdef MPICH2
+#ifdef HAVE_MPIU_FUNCS
         MPIU_Free(datarep->name);
 #else
         ADIOI_Free(datarep->name);
@@ -48,13 +52,16 @@
         datarep = datarep_next;
     }
 
+    if( ADIOI_syshints != MPI_INFO_NULL)
+	    MPI_Info_free(&ADIOI_syshints);
+
     *error_code = MPI_SUCCESS;
 }
 
 
 
 /* This is the delete callback function associated with
-   ADIO_Init_keyval when MPI_COMM_WORLD is freed */
+   ADIO_Init_keyval when MPI_COMM_SELF is freed */
 
 int ADIOI_End_call(MPI_Comm comm, int keyval, void *attribute_val, void
 		  *extra_state)
@@ -62,10 +69,10 @@
     int error_code;
 
     ADIOI_UNREFERENCED_ARG(comm);
-    ADIOI_UNREFERENCED_ARG(keyval);
     ADIOI_UNREFERENCED_ARG(attribute_val);
     ADIOI_UNREFERENCED_ARG(extra_state);
 
+    MPI_Keyval_free(&keyval);
     ADIO_End(&error_code);
     return error_code;
 }
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common: ad_features.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_fstype.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_fstype.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_fstype.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_fstype.c	2010-11-15 15:02:47.000000000 +0100
@@ -26,6 +26,10 @@
 #include "pvfs2.h"
 #endif
 
+#ifdef HAVE_ZOIDFS_H
+#include "zoidfs.h"
+#endif
+
 /* Notes on detection process:
  *
  * There are three more "general" mechanisms that we use for detecting
@@ -298,7 +302,7 @@
     /* --END ERROR HANDLING-- */
 #endif /* STATVFS APPROACH */
 
-#if defined(HAVE_STRUCT_STATFS) && defined(HAVE_STATFS)
+#ifdef HAVE_STRUCT_STATFS
     do {
 	err = statfs(filename, &fsbuf);
     } while (err && (errno == ESTALE));
@@ -342,14 +346,15 @@
     }
 # endif
 
-/*#if defined(LINUX) && defined(ROMIO_LUSTRE)*/
 #ifdef ROMIO_LUSTRE
-#define LL_SUPER_MAGIC 0x0BD00BD0
+# ifndef LL_SUPER_MAGIC
+#  define LL_SUPER_MAGIC 0x0BD00BD0
+# endif
     if (fsbuf.f_type == LL_SUPER_MAGIC) {
 	*fstype = ADIO_LUSTRE;
 	return;
     }
-# endif
+#endif
 
 # ifdef PAN_KERNEL_FS_CLIENT_SUPER_MAGIC
     if (fsbuf.f_type == PAN_KERNEL_FS_CLIENT_SUPER_MAGIC) {
@@ -386,6 +391,13 @@
     }
 # endif
 
+# ifdef XFS_SUPER_MAGIC
+    if (fsbuf.f_type == XFS_SUPER_MAGIC) {
+	    *fstype = ADIO_XFS;
+	    return;
+    }
+# endif
+
 # ifdef ROMIO_UFS
     /* if UFS support is enabled, default to that */
     *fstype = ADIO_UFS;
@@ -460,6 +472,8 @@
     *error_code = buf[1];
 }
 
+
+
 /*
   ADIO_FileSysType_prefix - determines file system type for a file using 
   a prefix on the file name.  upper layer should have already determined
@@ -511,6 +525,10 @@
     else if (!strncmp(filename, "pvfs2:", 6)||!strncmp(filename, "PVFS2:", 6)) {
 	*fstype = ADIO_PVFS2;
     }
+    else if (!strncmp(filename, "zoidfs:", 7)||
+		    !strncmp(filename, "ZOIDFS:", 7)) {
+	    *fstype = ADIO_ZOIDFS;
+    } 
     else if (!strncmp(filename, "testfs:", 7) 
 	     || !strncmp(filename, "TESTFS:", 7))
     {
@@ -803,6 +821,16 @@
 	*ops = &ADIO_LUSTRE_operations;
 #endif
     }
+    if (file_system == ADIO_ZOIDFS) {
+#ifndef ROMIO_ZOIDFS
+	*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
+					   myname, __LINE__, MPI_ERR_IO,
+					   "**iofstypeunsupported", 0);
+	return;
+#else
+	*ops = &ADIO_ZOIDFS_operations;
+#endif
+    }
     *error_code = MPI_SUCCESS;
     *fstype = file_system;
     return;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_hints.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_hints.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_hints.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_hints.c	2010-11-15 15:02:47.000000000 +0100
@@ -19,8 +19,18 @@
     MPI_Info info;
     char *value;
     int flag, intval, tmp_val, nprocs=0, nprocs_is_valid = 0, len;
+    int ok_to_override_cb_nodes=0;
     static char myname[] = "ADIOI_GEN_SETINFO";
 
+
+    /* if we've already set up default hints and the user has not asked us to
+     * process any hints (MPI_INFO_NULL), then we can short-circuit hint
+     * processing */
+    if (fd->hints->initialized && fd->info == MPI_INFO_NULL) {
+	    *error_code = MPI_SUCCESS;
+	    return;
+    }
+
     if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info));
     info = fd->info;
 
@@ -37,17 +47,18 @@
      * previously initialized
      */
     if (!fd->hints->initialized) {
+
 	/* buffer size for collective I/O */
-	MPI_Info_set(info, "cb_buffer_size", ADIOI_CB_BUFFER_SIZE_DFLT); 
+	ADIOI_Info_set(info, "cb_buffer_size", ADIOI_CB_BUFFER_SIZE_DFLT); 
 	fd->hints->cb_buffer_size = atoi(ADIOI_CB_BUFFER_SIZE_DFLT);
 
 	/* default is to let romio automatically decide when to use
 	 * collective buffering
 	 */
-	MPI_Info_set(info, "romio_cb_read", "automatic"); 
+	ADIOI_Info_set(info, "romio_cb_read", "automatic"); 
 	fd->hints->cb_read = ADIOI_HINT_AUTO;
-        MPI_Info_set(info, "romio_cb_write", "automatic"); 
-        fd->hints->cb_write = ADIOI_HINT_AUTO;
+	ADIOI_Info_set(info, "romio_cb_write", "automatic"); 
+	fd->hints->cb_write = ADIOI_HINT_AUTO;
 
 	fd->hints->cb_config_list = NULL;
 
@@ -55,37 +66,71 @@
 	MPI_Comm_size(fd->comm, &nprocs);
 	nprocs_is_valid = 1;
 	ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", nprocs);
-	MPI_Info_set(info, "cb_nodes", value);
+	ADIOI_Info_set(info, "cb_nodes", value);
 	fd->hints->cb_nodes = nprocs;
 
 	/* hint indicating that no indep. I/O will be performed on this file */
-	MPI_Info_set(info, "romio_no_indep_rw", "false");
+	ADIOI_Info_set(info, "romio_no_indep_rw", "false");
 	fd->hints->no_indep_rw = 0;
-	 /* deferred_open derrived from no_indep_rw and cb_{read,write} */
+
+	/* hint instructing the use of persistent file realms */
+	ADIOI_Info_set(info, "romio_cb_pfr", "disable");
+	fd->hints->cb_pfr = ADIOI_HINT_DISABLE;
+	
+	/* hint guiding the assignment of persistent file realms */
+	ADIOI_Info_set(info, "romio_cb_fr_types", "aar");
+	fd->hints->cb_fr_type = ADIOI_FR_AAR;
+
+	/* hint to align file realms with a certain byte value */
+	ADIOI_Info_set(info, "romio_cb_fr_alignment", "1");
+	fd->hints->cb_fr_alignment = 1;
+
+	/* hint to set a threshold percentage for a datatype's size/extent at
+	 * which data sieving should be done in collective I/O */
+	ADIOI_Info_set(info, "romio_cb_ds_threshold", "0");
+	fd->hints->cb_ds_threshold = 0;
+
+	/* hint to switch between point-to-point or all-to-all for two-phase */
+	ADIOI_Info_set(info, "romio_cb_alltoall", "automatic");
+	fd->hints->cb_alltoall = ADIOI_HINT_AUTO;
+
+	 /* deferred_open derived from no_indep_rw and cb_{read,write} */
 	fd->hints->deferred_open = 0;
 
 	/* buffer size for data sieving in independent reads */
-	MPI_Info_set(info, "ind_rd_buffer_size", ADIOI_IND_RD_BUFFER_SIZE_DFLT);
+	ADIOI_Info_set(info, "ind_rd_buffer_size", ADIOI_IND_RD_BUFFER_SIZE_DFLT);
 	fd->hints->ind_rd_buffer_size = atoi(ADIOI_IND_RD_BUFFER_SIZE_DFLT);
 
 	/* buffer size for data sieving in independent writes */
-	MPI_Info_set(info, "ind_wr_buffer_size", ADIOI_IND_WR_BUFFER_SIZE_DFLT);
+	ADIOI_Info_set(info, "ind_wr_buffer_size", ADIOI_IND_WR_BUFFER_SIZE_DFLT);
 	fd->hints->ind_wr_buffer_size = atoi(ADIOI_IND_WR_BUFFER_SIZE_DFLT);
 
 	/* default is to let romio automatically decide when to use data
 	 * sieving
 	 */
-	MPI_Info_set(info, "romio_ds_read", "automatic"); 
+	ADIOI_Info_set(info, "romio_ds_read", "automatic"); 
 	fd->hints->ds_read = ADIOI_HINT_AUTO;
-	MPI_Info_set(info, "romio_ds_write", "automatic"); 
+	ADIOI_Info_set(info, "romio_ds_write", "automatic"); 
 	fd->hints->ds_write = ADIOI_HINT_AUTO;
 
+	/* still to do: tune this a bit for a variety of file systems. there's
+	 * no good default value so just leave it unset */
+	fd->hints->min_fdomain_size = 0;
+  fd->hints->striping_unit = 0;
+
 	fd->hints->initialized = 1;
+
+	/* ADIO_Open sets up collective buffering arrays.  If we are in this
+	 * path from say set_file_view, then we've don't want to adjust the
+	 * array: we'll get a segfault during collective i/o.  We only want to
+	 * look at the users cb_nodes if it's open time  */
+	ok_to_override_cb_nodes = 1;
+
     }
 
     /* add in user's info if supplied */
     if (users_info != MPI_INFO_NULL) {
-	MPI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL, 
+	ADIOI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL, 
 		     value, &flag);
 	if (flag && ((intval=atoi(value)) > 0)) {
 	    tmp_val = intval;
@@ -100,30 +145,104 @@
 	    }
 	    /* --END ERROR HANDLING-- */
 
-	    MPI_Info_set(info, "cb_buffer_size", value);
+	    ADIOI_Info_set(info, "cb_buffer_size", value);
 	    fd->hints->cb_buffer_size = intval;
 
 	}
+	/* aligning file realms to certain sizes (e.g. stripe sizes)
+	 * may benefit I/O performance */
+	ADIOI_Info_get(users_info, "romio_cb_fr_alignment", MPI_MAX_INFO_VAL, 
+		     value, &flag);
+	if (flag && ((intval=atoi(value)) > 0)) {
+	    tmp_val = intval;
+
+	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+	    /* --BEGIN ERROR HANDLING-- */
+	    if (tmp_val != intval) {
+		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+						   "romio_cb_fr_alignment",
+						   error_code);
+		return;
+	    }
+	    /* --END ERROR HANDLING-- */
+
+	    ADIOI_Info_set(info, "romio_cb_fr_alignment", value);
+	    fd->hints->cb_fr_alignment = intval;
+
+	}
+
+	/* for collective I/O, try to be smarter about when to do data sieving
+	 * using a specific threshold for the datatype size/extent
+	 * (percentage 0-100%) */
+	ADIOI_Info_get(users_info, "romio_cb_ds_threshold", MPI_MAX_INFO_VAL, 
+		     value, &flag);
+	if (flag && ((intval=atoi(value)) > 0)) {
+	    tmp_val = intval;
+
+	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+	    /* --BEGIN ERROR HANDLING-- */
+	    if (tmp_val != intval) {
+		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+						   "romio_cb_ds_threshold",
+						   error_code);
+		return;
+	    }
+	    /* --END ERROR HANDLING-- */
+
+	    ADIOI_Info_set(info, "romio_cb_ds_threshold", value);
+	    fd->hints->cb_ds_threshold = intval;
+
+	}
+	ADIOI_Info_get(users_info, "romio_cb_alltoall", MPI_MAX_INFO_VAL, value,
+		     &flag);
+	if (flag) {
+	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
+		ADIOI_Info_set(info, "romio_cb_alltoall", value);
+		fd->hints->cb_read = ADIOI_HINT_ENABLE;
+	    }
+	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
+		ADIOI_Info_set(info, "romio_cb_alltoall", value);
+		fd->hints->cb_read = ADIOI_HINT_DISABLE;
+	    }
+	    else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
+	    {
+		ADIOI_Info_set(info, "romio_cb_alltoall", value);
+		fd->hints->cb_read = ADIOI_HINT_AUTO;
+	    }
+
+	    tmp_val = fd->hints->cb_alltoall;
+
+	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+	    /* --BEGIN ERROR HANDLING-- */
+	    if (tmp_val != fd->hints->cb_alltoall) {
+		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+						   "romio_cb_alltoall",
+						   error_code);
+		return;
+	    }
+	    /* --END ERROR HANDLING-- */
+	}
 
 	/* new hints for enabling/disabling coll. buffering on
 	 * reads/writes
 	 */
-	MPI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value, &flag);
+	ADIOI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value,
+		     &flag);
 	if (flag) {
 	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
-		MPI_Info_set(info, "romio_cb_read", value);
+		ADIOI_Info_set(info, "romio_cb_read", value);
 		fd->hints->cb_read = ADIOI_HINT_ENABLE;
 	    }
 	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
 		    /* romio_cb_read overrides no_indep_rw */
-		MPI_Info_set(info, "romio_cb_read", value);
-		MPI_Info_set(info, "romio_no_indep_rw", "false");
+		ADIOI_Info_set(info, "romio_cb_read", value);
+		ADIOI_Info_set(info, "romio_no_indep_rw", "false");
 		fd->hints->cb_read = ADIOI_HINT_DISABLE;
 		fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
 	    }
 	    else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
 	    {
-		MPI_Info_set(info, "romio_cb_read", value);
+		ADIOI_Info_set(info, "romio_cb_read", value);
 		fd->hints->cb_read = ADIOI_HINT_AUTO;
 	    }
 
@@ -139,25 +258,25 @@
 	    }
 	    /* --END ERROR HANDLING-- */
 	}
-	MPI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value,
+	ADIOI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value,
 		     &flag);
 	if (flag) {
 	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
-		MPI_Info_set(info, "romio_cb_write", value);
+		ADIOI_Info_set(info, "romio_cb_write", value);
 		fd->hints->cb_write = ADIOI_HINT_ENABLE;
 	    }
 	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE"))
 	    {
 		/* romio_cb_write overrides no_indep_rw, too */
-		MPI_Info_set(info, "romio_cb_write", value);
-		MPI_Info_set(info, "romio_no_indep_rw", "false");
+		ADIOI_Info_set(info, "romio_cb_write", value);
+		ADIOI_Info_set(info, "romio_no_indep_rw", "false");
 		fd->hints->cb_write = ADIOI_HINT_DISABLE;
 		fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
 	    }
 	    else if (!strcmp(value, "automatic") ||
 		     !strcmp(value, "AUTOMATIC"))
 	    {
-		MPI_Info_set(info, "romio_cb_write", value);
+		ADIOI_Info_set(info, "romio_cb_write", value);
 		fd->hints->cb_write = ADIOI_HINT_AUTO;
 	    }
 	
@@ -174,24 +293,79 @@
 	    /* --END ERROR HANDLING-- */
 	}
 
+	/* enable/disable persistent file realms for collective I/O */
+	/* may want to check for no_indep_rdwr hint as well */
+	ADIOI_Info_get(users_info, "romio_cb_pfr", MPI_MAX_INFO_VAL, value,
+		     &flag);
+	if (flag) {
+	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
+		ADIOI_Info_set(info, "romio_cb_pfr", value);
+		fd->hints->cb_pfr = ADIOI_HINT_ENABLE;
+	    }
+	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
+		ADIOI_Info_set(info, "romio_cb_pfr", value);
+		fd->hints->cb_pfr = ADIOI_HINT_DISABLE;
+	    }
+	    else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
+	    {
+		ADIOI_Info_set(info, "romio_cb_pfr", value);
+		fd->hints->cb_pfr = ADIOI_HINT_AUTO;
+	    }
+
+	    tmp_val = fd->hints->cb_pfr;
+
+	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+	    /* --BEGIN ERROR HANDLING-- */
+	    if (tmp_val != fd->hints->cb_pfr) {
+		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+						   "romio_cb_pfr",
+						   error_code);
+		return;
+	    }
+	    /* --END ERROR HANDLING-- */
+	}
+
+	/* file realm assignment types ADIOI_FR_AAR(0),
+	 ADIOI_FR_FSZ(-1), ADIOI_FR_USR_REALMS(-2), all others specify
+	 a regular fr size in bytes. probably not the best way... */
+	ADIOI_Info_get(users_info, "romio_cb_fr_type", MPI_MAX_INFO_VAL, 
+		     value, &flag);
+	if (flag && ((intval=atoi(value)) >= -2)) {
+	    tmp_val = intval;
+
+	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+	    /* --BEGIN ERROR HANDLING-- */
+	    if (tmp_val != intval) {
+		MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+						   "romio_cb_fr_type",
+						   error_code);
+		return;
+	    }
+	    /* --END ERROR HANDLING-- */
+
+	    ADIOI_Info_set(info, "romio_cb_fr_type", value);
+	    fd->hints->cb_fr_type = intval;
+
+	}
+
 	/* new hint for specifying no indep. read/write will be performed */
-	MPI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value,
+	ADIOI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value,
 		     &flag);
 	if (flag) {
 	    if (!strcmp(value, "true") || !strcmp(value, "TRUE")) {
 		    /* if 'no_indep_rw' set, also hint that we will do
 		     * collective buffering: if we aren't doing independent io,
 		     * then we have to do collective  */
-		MPI_Info_set(info, "romio_no_indep_rw", value);
-		MPI_Info_set(info, "romio_cb_write", "enable");
-		MPI_Info_set(info, "romio_cb_read", "enable");
+		ADIOI_Info_set(info, "romio_no_indep_rw", value);
+		ADIOI_Info_set(info, "romio_cb_write", "enable");
+		ADIOI_Info_set(info, "romio_cb_read", "enable");
 		fd->hints->no_indep_rw = 1;
 		fd->hints->cb_read = 1;
 		fd->hints->cb_write = 1;
 		tmp_val = 1;
 	    }
 	    else if (!strcmp(value, "false") || !strcmp(value, "FALSE")) {
-		MPI_Info_set(info, "romio_no_indep_rw", value);
+		ADIOI_Info_set(info, "romio_no_indep_rw", value);
 		fd->hints->no_indep_rw = 0;
 		tmp_val = 0;
 	    }
@@ -213,86 +387,91 @@
 	/* new hints for enabling/disabling data sieving on
 	 * reads/writes
 	 */
-	MPI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value, 
+	ADIOI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value, 
 		     &flag);
 	if (flag) {
 	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
-		MPI_Info_set(info, "romio_ds_read", value);
+		ADIOI_Info_set(info, "romio_ds_read", value);
 		fd->hints->ds_read = ADIOI_HINT_ENABLE;
 	    }
 	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
-		MPI_Info_set(info, "romio_ds_read", value);
+		ADIOI_Info_set(info, "romio_ds_read", value);
 		fd->hints->ds_read = ADIOI_HINT_DISABLE;
 	    }
 	    else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
 	    {
-		MPI_Info_set(info, "romio_ds_read", value);
+		ADIOI_Info_set(info, "romio_ds_read", value);
 		fd->hints->ds_read = ADIOI_HINT_AUTO;
 	    }
 	    /* otherwise ignore */
 	}
-	MPI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value, 
+	ADIOI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value, 
 		     &flag);
 	if (flag) {
 	    if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
-		MPI_Info_set(info, "romio_ds_write", value);
+		ADIOI_Info_set(info, "romio_ds_write", value);
 		fd->hints->ds_write = ADIOI_HINT_ENABLE;
 	    }
 	    else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
-		MPI_Info_set(info, "romio_ds_write", value);
+		ADIOI_Info_set(info, "romio_ds_write", value);
 		fd->hints->ds_write = ADIOI_HINT_DISABLE;
 	    }
 	    else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
 	    {
-		MPI_Info_set(info, "romio_ds_write", value);
+		ADIOI_Info_set(info, "romio_ds_write", value);
 		fd->hints->ds_write = ADIOI_HINT_AUTO;
 	    }
 	    /* otherwise ignore */
 	}
 
-	MPI_Info_get(users_info, "cb_nodes", MPI_MAX_INFO_VAL, 
-		     value, &flag);
-	if (flag && ((intval=atoi(value)) > 0)) {
-	    tmp_val = intval;
-
-	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
-	    /* --BEGIN ERROR HANDLING-- */
-	    if (tmp_val != intval) {
+	if (ok_to_override_cb_nodes) {
+		/* MPI_File_open path sets up some data structrues that don't
+		 * get resized in the MPI_File_set_view path, so ignore
+		 * cb_nodes in the set_view case */
+	    ADIOI_Info_get(users_info, "cb_nodes", MPI_MAX_INFO_VAL, 
+	  	     value, &flag);
+	    if (flag && ((intval=atoi(value)) > 0)) {
+	        tmp_val = intval;
+
+	        MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+	       /* --BEGIN ERROR HANDLING-- */
+	       if (tmp_val != intval) {
 		    MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
 						       "cb_nodes",
 						       error_code);
 		    return;
-	    }
-	    /* --END ERROR HANDLING-- */
+	       }
+	       /* --END ERROR HANDLING-- */
 
-	    if (!nprocs_is_valid) {
-		/* if hints were already initialized, we might not
-		 * have already gotten this?
-		 */
-		MPI_Comm_size(fd->comm, &nprocs);
-		nprocs_is_valid = 1;
-	    }
-	    if (intval <= nprocs) {
-		MPI_Info_set(info, "cb_nodes", value);
-		fd->hints->cb_nodes = intval;
-	    }
-	}
+	       if (!nprocs_is_valid) {
+		   /* if hints were already initialized, we might not
+		    * have already gotten this?
+		    */
+		   MPI_Comm_size(fd->comm, &nprocs);
+		   nprocs_is_valid = 1;
+	       }
+	       if (intval <= nprocs) {
+		   ADIOI_Info_set(info, "cb_nodes", value);
+		   fd->hints->cb_nodes = intval;
+	       }
+	   }
+	} /* if (ok_to_override_cb_nodes) */
 
-	MPI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, 
+	ADIOI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, 
 		     value, &flag);
 	if (flag && ((intval = atoi(value)) > 0)) {
-	    MPI_Info_set(info, "ind_wr_buffer_size", value);
+	    ADIOI_Info_set(info, "ind_wr_buffer_size", value);
 	    fd->hints->ind_wr_buffer_size = intval;
 	}
 
-	MPI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, 
+	ADIOI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, 
 		     value, &flag);
 	if (flag && ((intval = atoi(value)) > 0)) {
-	    MPI_Info_set(info, "ind_rd_buffer_size", value);
+	    ADIOI_Info_set(info, "ind_rd_buffer_size", value);
 	    fd->hints->ind_rd_buffer_size = intval;
 	}
 
-	MPI_Info_get(users_info, "cb_config_list", MPI_MAX_INFO_VAL,
+	ADIOI_Info_get(users_info, "cb_config_list", MPI_MAX_INFO_VAL,
 		     value, &flag);
 	if (flag) {
 	    if (fd->hints->cb_config_list == NULL) {
@@ -301,7 +480,7 @@
 		 * the cb_config_list hint will be set at file open time
 		 * either by the user or to the default
 		 */
-	    	MPI_Info_set(info, "cb_config_list", value);
+	    	ADIOI_Info_set(info, "cb_config_list", value);
 		len = (strlen(value)+1) * sizeof(char);
 		fd->hints->cb_config_list = ADIOI_Malloc(len);
 		if (fd->hints->cb_config_list == NULL) {
@@ -314,13 +493,27 @@
 	     * info value with a cb_config_list value in it in a couple
 	     * of calls, which would be irritating. */
 	}
+	ADIOI_Info_get(users_info, "romio_min_fdomain_size", MPI_MAX_INFO_VAL,
+			value, &flag);
+	if ( flag && ((intval = atoi(value)) > 0) ) {
+		ADIOI_Info_set(info, "romio_min_fdomain_size", value);
+		fd->hints->min_fdomain_size = intval;
+	}
+  /* Now we use striping unit in common code so we should
+     process hints for it. */
+	ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
+			value, &flag);
+	if ( flag && ((intval = atoi(value)) > 0) ) {
+		ADIOI_Info_set(info, "striping_unit", value);
+		fd->hints->striping_unit = intval;
+	}
     }
 
     /* handle cb_config_list default value here; avoids an extra
      * free/alloc and insures it is always set
      */
     if (fd->hints->cb_config_list == NULL) {
-	MPI_Info_set(info, "cb_config_list", ADIOI_CB_CONFIG_LIST_DFLT);
+	ADIOI_Info_set(info, "cb_config_list", ADIOI_CB_CONFIG_LIST_DFLT);
 	len = (strlen(ADIOI_CB_CONFIG_LIST_DFLT)+1) * sizeof(char);
 	fd->hints->cb_config_list = ADIOI_Malloc(len);
 	if (fd->hints->cb_config_list == NULL) {
@@ -341,25 +534,24 @@
 	     * disable at the same time doesn't make sense. honor
 	     * romio_cb_{read,write} and force the no_indep_rw hint to
 	     * 'disable' */
-	    MPI_Info_set(info, "romio_no_indep_rw", "false");
+	    ADIOI_Info_set(info, "romio_no_indep_rw", "false");
 	    fd->hints->no_indep_rw = 0;
 	    fd->hints->deferred_open = 0;
     }
 
-    if ((fd->file_system == ADIO_PIOFS) || (fd->file_system == ADIO_PVFS) ||
-		    (fd->file_system == ADIO_PVFS2) ) {
-    /* no data sieving for writes in PIOFS, PVFS and PVFS2, because they do not
+    if (ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) == 0) {
+    /* disable data sieving for fs that do not
        support file locking */
-       	MPI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
+       	ADIOI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
 		     value, &flag);
 	if (flag) {
 	    /* get rid of this value if it is set */
-	    MPI_Info_delete(info, "ind_wr_buffer_size");
+	    ADIOI_Info_delete(info, "ind_wr_buffer_size");
 	}
 	/* note: leave ind_wr_buffer_size alone; used for other cases
 	 * as well. -- Rob Ross, 04/22/2003
 	 */
-	MPI_Info_set(info, "romio_ds_write", "disable");
+	ADIOI_Info_set(info, "romio_ds_write", "disable");
 	fd->hints->ds_write = ADIOI_HINT_DISABLE;
     }
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_init.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_init.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_init.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_init.c	2010-11-15 15:02:47.000000000 +0100
@@ -6,7 +6,6 @@
  */
 
 #include "adio.h"
-#include "adio_extern.h"
 
 ADIOI_Flatlist_node *ADIOI_Flatlist = NULL;
 ADIOI_Datarep *ADIOI_Datarep_head = NULL;
@@ -22,6 +21,8 @@
 int MPIR_Infotable_ptr = 0, MPIR_Infotable_max = 0;
 #endif
 
+MPI_Info ADIOI_syshints = MPI_INFO_NULL;
+
 #if defined(ROMIO_XFS) || defined(ROMIO_LUSTRE)
 int ADIOI_Direct_read = 0, ADIOI_Direct_write = 0;
 #endif
@@ -57,6 +58,12 @@
     else ADIOI_Direct_write = 0;
 #endif
 
+    /* Assume system-wide hints won't change between runs: move hint processing
+     * from ADIO_Open to here */
+    /* FIXME should be checking error code from MPI_Info_create here */
+    MPI_Info_create(&ADIOI_syshints);
+    ADIOI_process_system_hints(ADIOI_syshints);
+
 #ifdef ADIOI_MPE_LOGGING
     {
         MPE_Log_get_state_eventIDs( &ADIOI_MPE_open_a, &ADIOI_MPE_open_b );
@@ -71,9 +78,12 @@
         MPE_Log_get_state_eventIDs( &ADIOI_MPE_unlock_a, &ADIOI_MPE_unlock_b );
         MPE_Log_get_state_eventIDs( &ADIOI_MPE_postwrite_a,
                                     &ADIOI_MPE_postwrite_b );
+	MPE_Log_get_state_eventIDs( &ADIOI_MPE_openinternal_a, 
+			&ADIOI_MPE_openinternal_b);
+	MPE_Log_get_state_eventIDs( &ADIOI_MPE_stat_a, &ADIOI_MPE_stat_b);
 
         int  comm_world_rank;
-        PMPI_Comm_rank( MPI_COMM_WORLD, &comm_world_rank );
+        MPI_Comm_rank( MPI_COMM_WORLD, &comm_world_rank );
 
         if ( comm_world_rank == 0 ) {
             MPE_Describe_state( ADIOI_MPE_open_a, ADIOI_MPE_open_b,
@@ -94,6 +104,8 @@
                                 "unlock", "purple" );
             MPE_Describe_state( ADIOI_MPE_postwrite_a, ADIOI_MPE_postwrite_b,
                                 "postwrite", "ivory" );
+	    MPE_Describe_state( ADIOI_MPE_openinternal_a, ADIOI_MPE_openinternal_b, "open system", "blue");
+	    MPE_Describe_state( ADIOI_MPE_stat_a, ADIOI_MPE_stat_b, "stat", "purple");
         }
     }
 #endif
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common: ad_io_coll.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_iread.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_iread.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_iread.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_iread.c	2010-11-15 15:02:47.000000000 +0100
@@ -45,6 +45,7 @@
     static char myname[] = "ADIOI_GEN_IREADCONTIG";
 
     MPI_Type_size(datatype, &typesize);
+    ADIOI_Assert((count * typesize) == ((ADIO_Offset)(unsigned)count * (ADIO_Offset)typesize));
     len = count * typesize;
 
     if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
@@ -84,7 +85,7 @@
 
     if (*error_code == MPI_SUCCESS) {
 	MPI_Type_size(datatype, &typesize);
-	nbytes = count*typesize;
+	nbytes = (MPI_Offset)count*(MPI_Offset)typesize;
     }
     MPIO_Completed_request_create(&fd, nbytes, error_code, request);
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_iread_fake.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_iread_fake.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_iread_fake.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_iread_fake.c	2010-11-15 15:02:47.000000000 +0100
@@ -21,12 +21,13 @@
     MPI_Offset len;
 
     MPI_Type_size(datatype, &typesize);
-    len = count * typesize;
+    len = (MPI_Offset)count * (MPI_Offset)typesize;
 
     /* Call the blocking function.  It will create an error code
      * if necessary.
      */
-    ADIO_ReadContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset, 
+    ADIOI_Assert(len == (int) len); /* the count is an int parm */
+    ADIO_ReadContig(fd, buf, (int)len, MPI_BYTE, file_ptr_type, offset, 
 		    &status, error_code);  
     if (*error_code != MPI_SUCCESS) {
 	    len=0;
@@ -54,7 +55,7 @@
 		     offset, &status, error_code);  
     if (*error_code == MPI_SUCCESS) {
 	MPI_Type_size(datatype, &typesize);
-	nbytes = count*typesize;
+	nbytes = (MPI_Offset)count*(MPI_Offset)typesize;
     }
     MPIO_Completed_request_create(&fd, nbytes, error_code, request);
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_iwrite.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_iwrite.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_iwrite.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_iwrite.c	2010-11-15 15:03:31.000000000 +0100
@@ -55,6 +55,7 @@
 
     MPI_Type_size(datatype, &typesize);
     len = count * typesize;
+    ADIOI_Assert(len == (int)((ADIO_Offset)count * (ADIO_Offset)typesize)); /* the count is an int parm */
 
     if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
     aio_errno = ADIOI_GEN_aio(fd, buf, len, offset, 1, request);
@@ -85,10 +86,25 @@
     int error_code;
     struct aiocb *aiocbp;
     ADIOI_AIO_Request *aio_req;
-
+    MPI_Status status;
+#if defined(ROMIO_XFS)
+    unsigned maxiosz = wr ? fd->hints->fs_hints.xfs.write_chunk_sz :
+	    fd->hints->fs_hints.xfs.read_chunk_sz;
+#endif /* ROMIO_XFS */
 
     fd_sys = fd->fd_sys;
 
+#if defined(ROMIO_XFS)
+    /* Use Direct I/O if desired and properly aligned */
+    if (fd->fns == &ADIO_XFS_operations &&
+	 ((wr && fd->direct_write) || (!wr && fd->direct_read)) &&
+	 !(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz) && 
+	 !(len % fd->d_miniosz) && (len >= fd->d_miniosz) && 
+	 (len <= maxiosz)) {
+	    fd_sys = fd->fd_direct;
+    }
+#endif /* ROMIO_XFS */
+
     aio_req = (ADIOI_AIO_Request*)ADIOI_Calloc(sizeof(ADIOI_AIO_Request), 1);
     aiocbp = (struct aiocb *) ADIOI_Calloc(sizeof(struct aiocb), 1);
     aiocbp->aio_offset = offset;
@@ -133,10 +149,10 @@
 	    treat this as a blocking request and return.  */
 	    if (wr) 
 		ADIO_WriteContig(fd, buf, len, MPI_BYTE, 
-			    ADIO_EXPLICIT_OFFSET, offset, NULL, &error_code);  
+			    ADIO_EXPLICIT_OFFSET, offset, &status, &error_code);  
 	    else
 		ADIO_ReadContig(fd, buf, len, MPI_BYTE,
-			    ADIO_EXPLICIT_OFFSET, offset, NULL, &error_code);  
+			    ADIO_EXPLICIT_OFFSET, offset, &status, &error_code);  
 		    
 	    MPIO_Completed_request_create(&fd, len, &error_code, request);
 	    return 0;
@@ -178,7 +194,7 @@
 
     if (*error_code == MPI_SUCCESS) {
 	MPI_Type_size(datatype, &typesize);
-	nbytes = count * typesize;
+	nbytes = (MPI_Offset)count * (MPI_Offset)typesize;
     }
     MPIO_Completed_request_create(&fd, nbytes, error_code, request);
 }
@@ -202,7 +218,6 @@
     } else if (errno == 0) {
 	    int n = aio_return(aio_req->aiocbp);
 	    aio_req->nbytes = n;
-	    MPIR_Nest_incr();
 	    errcode = MPI_Grequest_complete(aio_req->req);
 	    /* --BEGIN ERROR HANDLING-- */
 	    if (errcode != MPI_SUCCESS) {
@@ -213,7 +228,6 @@
 				    0);
 	    }
 	    /* --END ERROR HANDLING-- */
-	    MPIR_Nest_decr();
     }
     return errcode;
 }
@@ -263,7 +277,6 @@
 		    if (errno == 0) {
 			int n = aio_return(aio_reqlist[i]->aiocbp);
 			aio_reqlist[i]->nbytes = n;
-			MPIR_Nest_incr();
 			errcode = MPI_Grequest_complete(aio_reqlist[i]->req);
 			if (errcode != MPI_SUCCESS) {
 			    errcode = MPIO_Err_create_code(MPI_SUCCESS,
@@ -272,7 +285,6 @@
 				    __LINE__, MPI_ERR_IO, 
 				    "**mpi_grequest_complete", 0);
 			}
-			MPIR_Nest_decr();
 			ADIOI_Free(aio_reqlist[i]->aiocbp);
 			aio_reqlist[i]->aiocbp = NULL;
 			cblist[i] = NULL;
@@ -289,6 +301,19 @@
         return errcode;
 }
 
+int ADIOI_GEN_aio_free_fn(void *extra_state)
+{
+	ADIOI_AIO_Request *aio_req;
+	aio_req = (ADIOI_AIO_Request*)extra_state;
+
+	if (aio_req->aiocbp != NULL)
+		ADIOI_Free(aio_req->aiocbp);
+	ADIOI_Free(aio_req);
+
+	return MPI_SUCCESS;
+}
+#endif /* working AIO */
+
 int ADIOI_GEN_aio_query_fn(void *extra_state, MPI_Status *status) 
 {
 	ADIOI_AIO_Request *aio_req;
@@ -298,7 +323,6 @@
 
 	MPI_Status_set_elements(status, MPI_BYTE, aio_req->nbytes); 
 
-	/* do i need to nest_incr/nest_decr  here? */
 	/* can never cancel so always true */ 
 	MPI_Status_set_cancelled(status, 0); 
 
@@ -309,19 +333,6 @@
 	/* this generalized request never fails */ 
 	return MPI_SUCCESS; 
 }
-
-int ADIOI_GEN_aio_free_fn(void *extra_state)
-{
-	ADIOI_AIO_Request *aio_req;
-	aio_req = (ADIOI_AIO_Request*)extra_state;
-
-	if (aio_req->aiocbp != NULL)
-		ADIOI_Free(aio_req->aiocbp);
-	ADIOI_Free(aio_req);
-
-	return MPI_SUCCESS;
-}
-#endif /* working AIO */
 /* 
  * vim: ts=8 sts=4 sw=4 noexpandtab 
  */
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_iwrite_fake.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_iwrite_fake.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_iwrite_fake.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_iwrite_fake.c	2010-11-15 15:02:47.000000000 +0100
@@ -18,20 +18,22 @@
 			    int *error_code)  
 {
     ADIO_Status status;
-    int len, typesize;
+    MPI_Offset len;
+    int typesize;
     MPI_Offset nbytes=0;
 
     MPI_Type_size(datatype, &typesize);
-    len = count * typesize;
+    len = (MPI_Offset)count * (MPI_Offset)typesize;
 
     /* Call the blocking function.  It will create an error code
      * if necessary.
      */
-    ADIO_WriteContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset,
+    ADIOI_Assert(len == (int) len); /* the count is an int parm */
+    ADIO_WriteContig(fd, buf, (int)len, MPI_BYTE, file_ptr_type, offset,
 		     &status, error_code);  
     if (*error_code == MPI_SUCCESS) {
 	MPI_Type_size(datatype, &typesize);
-	nbytes = count*typesize;
+	nbytes = (MPI_Offset)count*(MPI_Offset)typesize;
     }
     MPIO_Completed_request_create(&fd, nbytes, error_code, request);
 
@@ -57,7 +59,7 @@
 		      offset, &status, error_code);  
     if (*error_code == MPI_SUCCESS) {
 	MPI_Type_size(datatype, &typesize);
-	nbytes = count * typesize;
+	nbytes = (MPI_Offset)count * (MPI_Offset)typesize;
     }
     MPIO_Completed_request_create(&fd, nbytes, error_code, request);
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_open.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_open.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_open.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_open.c	2010-11-15 15:02:47.000000000 +0100
@@ -27,7 +27,7 @@
 {
     MPI_File mpi_fh;
     ADIO_File fd;
-    int orig_amode_excl, orig_amode_wronly, err, rank, procs;
+    int err, rank, procs;
     static char myname[] = "ADIO_OPEN";
     int  max_error_code;
     MPI_Info dupinfo;
@@ -59,6 +59,9 @@
     fd->filetype = filetype;    /* MPI_BYTE by default */
     fd->etype_size = 1;  /* default etype is MPI_BYTE */
 
+    fd->file_realm_st_offs = NULL;
+    fd->file_realm_types = NULL;
+
     fd->perm = perm;
 
     fd->async_count = 0;
@@ -70,7 +73,7 @@
     MPI_Comm_rank(comm, &rank);
     MPI_Comm_size(comm, &procs);
 /* create and initialize info object */
-    fd->hints = (ADIOI_Hints *)ADIOI_Malloc(sizeof(struct ADIOI_Hints_struct));
+    fd->hints = (ADIOI_Hints *)ADIOI_Calloc(1, sizeof(struct ADIOI_Hints_struct));
     if (fd->hints == NULL) {
 	/* NEED TO HANDLE ENOMEM ERRORS */
     }
@@ -79,18 +82,13 @@
     fd->hints->initialized = 0;
     fd->info = MPI_INFO_NULL;
 
-    if (info == MPI_INFO_NULL) 
-	*error_code = MPI_Info_create(&dupinfo);
-    else
-	*error_code = MPI_Info_dup(info, &dupinfo);
-    if (*error_code != MPI_SUCCESS)
-	goto fn_exit;
-
-    ADIOI_process_system_hints(dupinfo);
+    ADIOI_incorporate_system_hints(info, ADIOI_syshints, &dupinfo);
     ADIO_SetInfo(fd, dupinfo, &err);
-    *error_code = MPI_Info_free(&dupinfo);
-    if (*error_code != MPI_SUCCESS)
-	goto fn_exit;
+    if (dupinfo != MPI_INFO_NULL) {
+	*error_code = MPI_Info_free(&dupinfo);
+	if (*error_code != MPI_SUCCESS)
+	    goto fn_exit;
+    }
 
      /* deferred open: 
      * we can only do this optimization if 'fd->hints->deferred_open' is set
@@ -103,9 +101,9 @@
 			    && uses_generic_write(fd))) {
 	    fd->hints->deferred_open = 0;
     }
-    if (fd->file_system == ADIO_PVFS2)
-	    /* disable deferred open on PVFS2 so that scalable broadcast will
-	     * always use the propper communicator */
+    if (ADIO_Feature(fd, ADIO_SCALABLE_OPEN))
+	    /* disable deferred open on these fs so that scalable broadcast
+	     * will always use the propper communicator */
 	    fd->hints->deferred_open = 0;
 
 
@@ -123,134 +121,30 @@
       * IO */
     fd->agg_comm = MPI_COMM_NULL;
     fd->is_open = 0;
+    fd->my_cb_nodes_index = -2;
+    fd->is_agg = is_aggregator(rank, fd);
     if (fd->hints->deferred_open) {
 	    /* MPI_Comm_split will create a communication group of aggregators.
 	     * for non-aggregators it will return MPI_COMM_NULL .  we rely on
 	     * fd->agg_comm == MPI_COMM_NULL for non-aggregators in several
 	     * tests in the code  */
-	    if (is_aggregator(rank, fd)) {
+	    if (fd->is_agg) {
 		    MPI_Comm_split(fd->comm, 1, 0, &aggregator_comm);
 		    fd->agg_comm = aggregator_comm;
 	    } else {
 		    MPI_Comm_split(fd->comm, MPI_UNDEFINED, 0, &aggregator_comm);
 		    fd->agg_comm = aggregator_comm;
 	    }
-    }
-
-    orig_amode_excl = access_mode;
 
-    /* optimization: by having just one process create a file, close it, then
-     * have all N processes open it, we can possibly avoid contention for write
-     * locks on a directory for some file systems. 
-     *
-     * we used to special-case EXCL|CREATE, since when N processes are trying
-     * to create a file exclusively, only 1 will succeed and the rest will
-     * (spuriously) fail.   Since we are now carrying out the CREATE on one
-     * process anyway, the EXCL case falls out and we don't need to explicitly
-     * worry about it, other than turning off both the EXCL and CREATE flags 
-     */
-    /* pvfs2 handles opens specially, so it is actually more efficent for that
-     * file system if we skip this optimization */
-    /* NFS handles opens especially poorly, so we cannot use this optimization
-     * on that FS */
-    if (fd->file_system == ADIO_NFS) {
-        /* no optimizations for NFS: */
-	if ((access_mode & ADIO_CREATE) && (access_mode & ADIO_EXCL)) {
-	  /* the open should fail if the file exists. Only *1* process should
-	   check this. Otherwise, if all processes try to check and the file
-	   does not exist, one process will create the file and others who
-	   reach later will return error. */
-	    if(rank == fd->hints->ranklist[0]) {
-                fd->access_mode = access_mode;
-                (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
-                MPI_Bcast(error_code, 1, MPI_INT, \
-                                fd->hints->ranklist[0], fd->comm);
-                /* if no error, close the file and reopen normally below */
-                if (*error_code == MPI_SUCCESS)
-                        (*(fd->fns->ADIOI_xxx_Close))(fd, error_code);
-	    }
-	    else MPI_Bcast(error_code, 1, MPI_INT, 
-			    fd->hints->ranklist[0], fd->comm); 
-	    if (*error_code != MPI_SUCCESS) {
-		    goto fn_exit;
-	    }
-	    else {
-	        /* turn off EXCL for real open */
-	        access_mode = access_mode ^ ADIO_EXCL;
-           }
-        }
-    } else {
-
-	    /* the actual optimized create on one, open on all */
-    if (access_mode & ADIO_CREATE && fd->file_system != ADIO_PVFS2) {
-       if(rank == fd->hints->ranklist[0]) {
-	   /* remove delete_on_close flag if set */
-	   if (access_mode & ADIO_DELETE_ON_CLOSE)
-	       fd->access_mode = access_mode ^ ADIO_DELETE_ON_CLOSE;
-	   else 
-	       fd->access_mode = access_mode;
-	       
-	   (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
-	   MPI_Bcast(error_code, 1, MPI_INT, \
-		     fd->hints->ranklist[0], fd->comm);
-	   /* if no error, close the file and reopen normally below */
-	   if (*error_code == MPI_SUCCESS) 
-	       (*(fd->fns->ADIOI_xxx_Close))(fd, error_code);
-
-	   fd->access_mode = access_mode; /* back to original */
-       }
-       else MPI_Bcast(error_code, 1, MPI_INT, fd->hints->ranklist[0], fd->comm);
-
-       if (*error_code != MPI_SUCCESS) {
-           goto fn_exit;
-       } 
-       else {
-           /* turn off CREAT (and EXCL if set) for real multi-processor open */
-           access_mode ^= ADIO_CREATE; 
-	   if (access_mode & ADIO_EXCL)
-		   access_mode ^= ADIO_EXCL;
-       }
-    }
     }
 
-    /* if we are doing deferred open, non-aggregators should return now */
-    if (fd->hints->deferred_open ) {
-        if (fd->agg_comm == MPI_COMM_NULL) {
-            /* we might have turned off EXCL for the aggregators.
-             * restore access_mode that non-aggregators get the right
-             * value from get_amode */
-            fd->access_mode = orig_amode_excl;
-            *error_code = MPI_SUCCESS;
-            goto fn_exit;
-        }
-    }
+    /* actual opens start here */
+    /* generic open: one process opens to create the file, all others open */
+    /* nfs open: everybody opens or else you'll end up with "file not found"
+     * due to stupid nfs consistency semantics */
+    /* scalable open: one process opens and broadcasts results to everyone */
 
-/* For writing with data sieving, a read-modify-write is needed. If 
-   the file is opened for write_only, the read will fail. Therefore,
-   if write_only, open the file as read_write, but record it as write_only
-   in fd, so that get_amode returns the right answer. */
-
-    orig_amode_wronly = access_mode;
-    if (access_mode & ADIO_WRONLY) {
-	access_mode = access_mode ^ ADIO_WRONLY;
-	access_mode = access_mode | ADIO_RDWR;
-    }
-    fd->access_mode = access_mode;
-
-    (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
-
-    /* if error, may be it was due to the change in amode above. 
-       therefore, reopen with access mode provided by the user.*/ 
-    fd->access_mode = orig_amode_wronly;  
-    if (*error_code != MPI_SUCCESS) 
-        (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
-
-    /* if we turned off EXCL earlier, then we should turn it back on */
-    if (fd->access_mode != orig_amode_excl) fd->access_mode = orig_amode_excl;
-
-    /* for deferred open: this process has opened the file (because if we are
-     * not an aggregaor and we are doing deferred open, we returned earlier)*/
-    fd->is_open = 1;
+    ADIOI_OpenColl(fd, rank, access_mode, error_code);
 
  fn_exit:
     MPI_Allreduce(error_code, &max_error_code, 1, MPI_INT, MPI_MAX, comm);
@@ -300,10 +194,18 @@
 int is_aggregator(int rank, ADIO_File fd ) {
         int i;
         
-        for (i=0; i< fd->hints->cb_nodes; i++ ) {
-                if ( rank == fd->hints->ranklist[i] )
-                        return 1;
+	if (fd->my_cb_nodes_index == -2) {
+	    for (i=0; i< fd->hints->cb_nodes; i++ ) {
+		if ( rank == fd->hints->ranklist[i] ) {
+		    fd->my_cb_nodes_index = i;
+		    return 1;
+		}
+	    }
+	    fd->my_cb_nodes_index = -1;
         }
+	else if (fd->my_cb_nodes_index != -1)
+	    return 1;
+
         return 0;
 }
 
@@ -369,7 +271,7 @@
 	/* TEMPORARY -- REMOVE WHEN NO LONGER UPDATING INFO FOR FS-INDEP. */
 	value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
 	ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", rank_ct);
-	MPI_Info_set(fd->info, "cb_nodes", value);
+	ADIOI_Info_set(fd->info, "cb_nodes", value);
 	ADIOI_Free(value);
     }
 
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common: ad_opencoll.c
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common: ad_opencoll_failsafe.c
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common: ad_opencoll_scalable.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_prealloc.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_prealloc.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_prealloc.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_prealloc.c	2010-11-15 15:02:47.000000000 +0100
@@ -47,7 +47,10 @@
 
 	for (i=0; i<ntimes; i++) {
 	    len = ADIOI_MIN(size-done, ADIOI_PREALLOC_BUFSZ);
-	    ADIO_ReadContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, done,
+	    ADIO_ReadContig(fd, buf, 
+                      len, /* len is ADIO_Offset but is <= ADIOI_PREALLOC_BUFSZ (16M), 
+                              so it fits in an int parameter */
+                      MPI_BYTE, ADIO_EXPLICIT_OFFSET, done,
 			    &status, error_code);
 	    if (*error_code != MPI_SUCCESS) {
 		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
@@ -58,7 +61,10 @@
 						   0);
                 return;  
 	    }
-	    ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, 
+	    ADIO_WriteContig(fd, buf, 
+                       len, /* len is ADIO_Offset but is <= ADIOI_PREALLOC_BUFSZ (16M), 
+                               so it fits in an int parameter */
+                       MPI_BYTE, ADIO_EXPLICIT_OFFSET, 
                              done, &status, error_code);
 	    if (*error_code != MPI_SUCCESS) return;
 	    done += len;
@@ -70,7 +76,10 @@
 	    ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ;
 	    for (i=0; i<ntimes; i++) {
 		len = ADIOI_MIN(alloc_size-done, ADIOI_PREALLOC_BUFSZ);
-		ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, 
+		ADIO_WriteContig(fd, buf, 
+                     len, /* len is ADIO_Offset but is <= ADIOI_PREALLOC_BUFSZ (16M), 
+                             so it fits in an int parameter */
+                     MPI_BYTE, ADIO_EXPLICIT_OFFSET, 
 				 done, &status, error_code);
 		if (*error_code != MPI_SUCCESS) return;
 		done += len;  
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_read.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_read.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_read.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_read.c	2010-11-15 15:02:47.000000000 +0100
@@ -10,17 +10,25 @@
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 #endif
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
 
 void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count, 
 			  MPI_Datatype datatype, int file_ptr_type,
 			  ADIO_Offset offset, ADIO_Status *status,
 			  int *error_code)
 {
-    int err = -1, datatype_size, len;
+    int err = -1, datatype_size;
+    ADIO_Offset len;
     static char myname[] = "ADIOI_GEN_READCONTIG";
 
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5034, 0, NULL);
+#endif
     MPI_Type_size(datatype, &datatype_size);
-    len = datatype_size * count;
+    len = (ADIO_Offset)datatype_size * (ADIO_Offset)count;
+    ADIOI_Assert(len == (unsigned int) len); /* read takes an unsigned int parm */
 
     if (file_ptr_type == ADIO_INDIVIDUAL) {
 	offset = fd->fp_ind;
@@ -50,7 +58,7 @@
 #ifdef ADIOI_MPE_LOGGING
     MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
 #endif
-    err = read(fd->fd_sys, buf, len);
+    err = read(fd->fd_sys, buf, (unsigned int)len);
 #ifdef ADIOI_MPE_LOGGING
     MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
 #endif
@@ -77,4 +85,7 @@
 #endif
 
     *error_code = MPI_SUCCESS;
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5035, 0, NULL);
+#endif
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_read_coll.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_read_coll.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_read_coll.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_read_coll.c	2010-11-15 15:02:47.000000000 +0100
@@ -8,18 +8,25 @@
 #include "adio.h"
 #include "adio_extern.h"
 
+#ifdef USE_DBG_LOGGING
+  #define RDCOLL_DEBUG 1
+#endif
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
+
 /* prototypes of functions used for collective reads only. */
 static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
 				datatype, int nprocs,
 				int myrank, ADIOI_Access
 				*others_req, ADIO_Offset *offset_list,
-				int *len_list, int contig_access_count, 
+				ADIO_Offset *len_list, int contig_access_count, 
 				ADIO_Offset
 				min_st_offset, ADIO_Offset fd_size,
 				ADIO_Offset *fd_start, ADIO_Offset *fd_end,
 				int *buf_idx, int *error_code);
 static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
-				  *flat_buf, ADIO_Offset *offset_list, int
+				  *flat_buf, ADIO_Offset *offset_list, ADIO_Offset
 				  *len_list, int *send_size, int *recv_size,
 				  int *count, int *start_pos, 
 				  int *partial_send, 
@@ -34,8 +41,8 @@
 				  MPI_Aint buftype_extent, int *buf_idx);
 static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
 				   *flat_buf, char **recv_buf, ADIO_Offset 
-				   *offset_list, int *len_list, 
-				   int *recv_size, 
+				   *offset_list, ADIO_Offset *len_list, 
+				   unsigned *recv_size, 
 				   MPI_Request *requests, MPI_Status *statuses,
 				   int *recd_from_proc, int nprocs,
 				   int contig_access_count, 
@@ -70,12 +77,20 @@
     ADIO_Offset start_offset, end_offset, orig_fp, fd_size, min_st_offset, off;
     ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *fd_start = NULL,
 	*fd_end = NULL, *end_offsets = NULL;
-    int *len_list = NULL, *buf_idx = NULL;
+    ADIO_Offset *len_list = NULL;
+    int *buf_idx = NULL;
 
 #ifdef HAVE_STATUS_SET_BYTES
     int bufsize, size;
 #endif
 
+    if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) {
+        ADIOI_IOStridedColl (fd, buf, count, ADIOI_READ, datatype, 
+			file_ptr_type, offset, status, error_code);
+        return;
+    }
+
+
     MPI_Comm_size(fd->comm, &nprocs);
     MPI_Comm_rank(fd->comm, &myrank);
 
@@ -85,24 +100,26 @@
 
     /* only check for interleaving if cb_read isn't disabled */
     if (fd->hints->cb_read != ADIOI_HINT_DISABLE) {
-	/* For this process's request, calculate the list of offsets and
-	   lengths in the file and determine the start and end offsets. */
+    /* For this process's request, calculate the list of offsets and
+       lengths in the file and determine the start and end offsets. */
 
-	/* Note: end_offset points to the last byte-offset that will be accessed.
-	   e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
+    /* Note: end_offset points to the last byte-offset that will be accessed.
+       e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
 
 	ADIOI_Calc_my_off_len(fd, count, datatype, file_ptr_type, offset,
 			      &offset_list, &len_list, &start_offset,
 			      &end_offset, &contig_access_count); 
     
-	/*    for (i=0; i<contig_access_count; i++) {
-	      FPRINTF(stderr, "rank %d  off %ld  len %d\n", myrank, offset_list[i], 
-	      len_list[i]);
-	      }*/
+#ifdef RDCOLL_DEBUG
+    for (i=0; i<contig_access_count; i++) {
+	      DBG_FPRINTF(stderr, "rank %d  off %lld  len %lld\n", 
+			      myrank, offset_list[i], len_list[i]);
+	      }
+#endif
 
 	/* each process communicates its start and end offsets to other 
-	   processes. The result is an array each of start and end offsets stored
-	   in order of process rank. */ 
+	   processes. The result is an array each of start and end offsets
+	   stored in order of process rank. */ 
     
 	st_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs*sizeof(ADIO_Offset));
 	end_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs*sizeof(ADIO_Offset));
@@ -170,7 +187,9 @@
      */
     ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs,
 			    nprocs_for_coll, &min_st_offset,
-			    &fd_start, &fd_end, &fd_size);
+			    &fd_start, &fd_end, 
+			    fd->hints->min_fdomain_size, &fd_size,
+			    fd->hints->striping_unit);
 
     /* calculate where the portions of the access requests of this process 
      * are located in terms of the file domains.  this could be on the same
@@ -257,20 +276,30 @@
 
 void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
 			    datatype, int file_ptr_type, ADIO_Offset
-			    offset, ADIO_Offset **offset_list_ptr, int
+			    offset, ADIO_Offset **offset_list_ptr, ADIO_Offset
 			    **len_list_ptr, ADIO_Offset *start_offset_ptr,
 			    ADIO_Offset *end_offset_ptr, int
 			   *contig_access_count_ptr)
 {
-    int filetype_size, buftype_size, etype_size;
-    int i, j, k, frd_size=0, old_frd_size=0, st_index=0;
-    int n_filetypes, etype_in_filetype;
+    int filetype_size, etype_size;
+    unsigned buftype_size;
+    int i, j, k;
+    ADIO_Offset i_offset;
+    ADIO_Offset frd_size=0, old_frd_size=0;
+    int st_index=0;
+    ADIO_Offset n_filetypes, etype_in_filetype;
     ADIO_Offset abs_off_in_filetype=0;
-    int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
-    int contig_access_count, *len_list, flag, filetype_is_contig;
+    ADIO_Offset bufsize;
+    ADIO_Offset sum, n_etypes_in_filetype, size_in_filetype;
+    int contig_access_count, filetype_is_contig;
+    ADIO_Offset *len_list;
     MPI_Aint filetype_extent, filetype_lb;
     ADIOI_Flatlist_node *flat_file;
     ADIO_Offset *offset_list, off, end_offset=0, disp;
+
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5028, 0, NULL);
+#endif
     
 /* For this process's request, calculate the list of offsets and
    lengths in the file and determine the start and end offsets. */
@@ -280,19 +309,19 @@
     MPI_Type_size(fd->filetype, &filetype_size);
     MPI_Type_extent(fd->filetype, &filetype_extent);
     MPI_Type_lb(fd->filetype, &filetype_lb);
-    MPI_Type_size(datatype, &buftype_size);
+    MPI_Type_size(datatype, (int*)&buftype_size);
     etype_size = fd->etype_size;
 
     if ( ! filetype_size ) {
 	*contig_access_count_ptr = 0;
 	*offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
-	*len_list_ptr = (int *) ADIOI_Malloc(2*sizeof(int));
+	*len_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
         /* 2 is for consistency. everywhere I malloc one more than needed */
 
 	offset_list = *offset_list_ptr;
 	len_list = *len_list_ptr;
         offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
-                 fd->disp + etype_size * offset;
+                 fd->disp + (ADIO_Offset)etype_size * offset;
 	len_list[0] = 0;
 	*start_offset_ptr = offset_list[0];
 	*end_offset_ptr = offset_list[0] + len_list[0] - 1;
@@ -303,14 +332,14 @@
     if (filetype_is_contig) {
 	*contig_access_count_ptr = 1;        
 	*offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
-	*len_list_ptr = (int *) ADIOI_Malloc(2*sizeof(int));
+	*len_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
         /* 2 is for consistency. everywhere I malloc one more than needed */
 
 	offset_list = *offset_list_ptr;
 	len_list = *len_list_ptr;
         offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
-                 fd->disp + etype_size * offset;
-	len_list[0] = bufcount * buftype_size;
+                 fd->disp + (ADIO_Offset)etype_size * offset;
+	len_list[0] = (ADIO_Offset)bufcount * (ADIO_Offset)buftype_size;
 	*start_offset_ptr = offset_list[0];
 	*end_offset_ptr = offset_list[0] + len_list[0] - 1;
 
@@ -327,31 +356,47 @@
 	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
 	disp = fd->disp;
 
+#ifdef RDCOLL_DEBUG 
+        {
+            int ii;
+            DBG_FPRINTF(stderr, "flattened %3d : ", flat_file->count );
+            for (ii=0; ii<flat_file->count; ii++) {
+                DBG_FPRINTF(stderr, "%16qd:%-16qd", flat_file->indices[ii], flat_file->blocklens[ii] );
+            }
+            DBG_FPRINTF(stderr, "\n" );
+        }
+#endif
 	if (file_ptr_type == ADIO_INDIVIDUAL) {
-	    offset = fd->fp_ind; /* in bytes */
-	    n_filetypes = -1;
-	    flag = 0;
-	    while (!flag) {
-		n_filetypes++;
-		for (i=0; i<flat_file->count; i++) {
-		    if (disp + flat_file->indices[i] + 
-			(ADIO_Offset) n_filetypes*filetype_extent + 
-			flat_file->blocklens[i] >= offset) 
-		    {
-			st_index = i;
-			frd_size = (int) (disp + flat_file->indices[i] + 
-			    (ADIO_Offset) n_filetypes*filetype_extent
-			        + flat_file->blocklens[i] - offset);
-			flag = 1;
+           /* Wei-keng reworked type processing to be a bit more efficient */
+            offset       = fd->fp_ind - disp;
+            n_filetypes  = (offset - flat_file->indices[0]) / filetype_extent;
+             offset     -= (ADIO_Offset)n_filetypes * filetype_extent;
+	     	/* now offset is local to this extent */
+ 
+            /* find the block where offset is located, skip blocklens[i]==0 */
+            for (i=0; i<flat_file->count; i++) {
+                ADIO_Offset dist;
+                if (flat_file->blocklens[i] == 0) continue;
+                dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
+                /* frd_size is from offset to the end of block i */
+		if (dist == 0) {
+			i++;
+			offset   = flat_file->indices[i];
+			frd_size = flat_file->blocklens[i];
 			break;
-		    }
+		}
+		if (dist > 0) {
+                    frd_size = dist;
+		    break;
 		}
 	    }
-	}
+            st_index = i;  /* starting index in flat_file->indices[] */
+            offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
+        }
 	else {
 	    n_etypes_in_filetype = filetype_size/etype_size;
-	    n_filetypes = (int) (offset / n_etypes_in_filetype);
-	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+	    n_filetypes = offset / n_etypes_in_filetype;
+	    etype_in_filetype = offset % n_etypes_in_filetype;
 	    size_in_filetype = etype_in_filetype * etype_size;
  
 	    sum = 0;
@@ -367,29 +412,29 @@
 	    }
 
 	    /* abs. offset in bytes in the file */
-	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + 
+	    offset = disp + n_filetypes* (ADIO_Offset)filetype_extent + 
 		abs_off_in_filetype;
 	}
 
          /* calculate how much space to allocate for offset_list, len_list */
 
 	old_frd_size = frd_size;
-	contig_access_count = i = 0;
+	contig_access_count = i_offset = 0;
 	j = st_index;
-	bufsize = buftype_size * bufcount;
+	bufsize = (ADIO_Offset)buftype_size * (ADIO_Offset)bufcount;
 	frd_size = ADIOI_MIN(frd_size, bufsize);
-	while (i < bufsize) {
+	while (i_offset < bufsize) {
 	    if (frd_size) contig_access_count++;
-	    i += frd_size;
+	    i_offset += frd_size;
 	    j = (j + 1) % flat_file->count;
-	    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+	    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
 	}
 
         /* allocate space for offset_list and len_list */
 
 	*offset_list_ptr = (ADIO_Offset *)
 	         ADIOI_Malloc((contig_access_count+1)*sizeof(ADIO_Offset));  
-	*len_list_ptr = (int *) ADIOI_Malloc((contig_access_count+1)*sizeof(int));
+	*len_list_ptr = (ADIO_Offset *) ADIOI_Malloc((contig_access_count+1)*sizeof(ADIO_Offset));
         /* +1 to avoid a 0-size malloc */
 
 	offset_list = *offset_list_ptr;
@@ -399,17 +444,17 @@
 
 	*start_offset_ptr = offset; /* calculated above */
 
-	i = k = 0;
+	i_offset = k = 0;
 	j = st_index;
 	off = offset;
 	frd_size = ADIOI_MIN(old_frd_size, bufsize);
-	while (i < bufsize) {
+	while (i_offset < bufsize) {
 	    if (frd_size) {
 		offset_list[k] = off;
 		len_list[k] = frd_size;
 		k++;
 	    }
-	    i += frd_size;
+	    i_offset += frd_size;
 	    end_offset = off + frd_size - 1;
 
      /* Note: end_offset points to the last byte-offset that will be accessed.
@@ -417,7 +462,7 @@
 
 	    if (off + frd_size < disp + flat_file->indices[j] +
 		flat_file->blocklens[j] + 
-		(ADIO_Offset) n_filetypes*filetype_extent)
+		 n_filetypes* (ADIO_Offset)filetype_extent)
 	    {
 		off += frd_size;
 		/* did not reach end of contiguous block in filetype.
@@ -425,17 +470,17 @@
 		 */
 	    }
 	    else {
-		if (j < (flat_file->count - 1)) j++;
-		else {
-		    /* hit end of flattened filetype; 
-		     * start at beginning again 
-		     */
-		    j = 0;
-		    n_filetypes++;
+		j = (j+1) % flat_file->count;
+                n_filetypes += (j == 0) ? 1 : 0;
+                while (flat_file->blocklens[j]==0) {
+			j = (j+1) % flat_file->count;
+                    n_filetypes += (j == 0) ? 1 : 0;
+                    /* hit end of flattened filetype; start at beginning 
+		     * again */
 		}
 		off = disp + flat_file->indices[j] + 
-		    (ADIO_Offset) n_filetypes*filetype_extent;
-		frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+		     n_filetypes* (ADIO_Offset)filetype_extent;
+		frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
 	    }
 	}
 
@@ -443,15 +488,18 @@
 	if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
 
 	*contig_access_count_ptr = contig_access_count;
-	*end_offset_ptr = end_offset;
+	 *end_offset_ptr = end_offset;
     }
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5029, 0, NULL);
+#endif
 }
 
 static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
 			 datatype, int nprocs,
 			 int myrank, ADIOI_Access
 			 *others_req, ADIO_Offset *offset_list,
-			 int *len_list, int contig_access_count, ADIO_Offset
+			 ADIO_Offset *len_list, int contig_access_count, ADIO_Offset
                          min_st_offset, ADIO_Offset fd_size,
 			 ADIO_Offset *fd_start, ADIO_Offset *fd_end,
                          int *buf_idx, int *error_code)
@@ -466,12 +514,14 @@
    array from a file, where each local array is 8Mbytes, requiring
    at least another 8Mbytes of temp space is unacceptable. */
 
-    int i, j, m, size, ntimes, max_ntimes, buftype_is_contig;
+    int i, j, m, ntimes, max_ntimes, buftype_is_contig;
     ADIO_Offset st_loc=-1, end_loc=-1, off, done, real_off, req_off;
     char *read_buf = NULL, *tmp_buf;
     int *curr_offlen_ptr, *count, *send_size, *recv_size;
-    int *partial_send, *recd_from_proc, *start_pos, for_next_iter;
-    int real_size, req_len, flag, for_curr_iter, rank;
+    int *partial_send, *recd_from_proc, *start_pos;
+    /* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/
+    ADIO_Offset real_size, size, for_curr_iter, for_next_iter;
+    int req_len, flag, rank;
     MPI_Status status;
     ADIOI_Flatlist_node *flat_buf=NULL;
     MPI_Aint buftype_extent;
@@ -601,7 +651,7 @@
                        minus what was satisfied in previous iteration
              req_size = size corresponding to req_off */
 
-	size = (int) (ADIOI_MIN(coll_bufsize, end_loc-st_loc+1-done)); 
+	size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); 
 	real_off = off - for_curr_iter;
 	real_size = size + for_curr_iter;
 
@@ -609,7 +659,9 @@
 	for_next_iter = 0;
 
 	for (i=0; i<nprocs; i++) {
-	    /* FPRINTF(stderr, "rank %d, i %d, others_count %d\n", rank, i, others_req[i].count); */
+#ifdef RDCOLL_DEBUG
+	    DBG_FPRINTF(stderr, "rank %d, i %d, others_count %d\n", rank, i, others_req[i].count); 
+#endif
 	    if (others_req[i].count) {
 		start_pos[i] = curr_offlen_ptr[i];
 		for (j=curr_offlen_ptr[i]; j<others_req[i].count;
@@ -632,22 +684,22 @@
 		    }
 		    if (req_off < real_off + real_size) {
 			count[i]++;
+      ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)read_buf)+req_off-real_off) == (ADIO_Offset)(MPIR_Upint)(read_buf+req_off-real_off));
 			MPI_Address(read_buf+req_off-real_off, 
                                &(others_req[i].mem_ptrs[j]));
-			send_size[i] += (int)(ADIOI_MIN(real_off + (ADIO_Offset)real_size - 
-						  req_off, req_len));
+      ADIOI_Assert((real_off + real_size - req_off) == (int)(real_off + real_size - req_off));
+			send_size[i] += (int)(ADIOI_MIN(real_off + real_size - req_off, 
+                                      (ADIO_Offset)(unsigned)req_len)); 
 
-			if (real_off+real_size-req_off < req_len) {
-			    partial_send[i] = (int) (real_off+real_size-
-						     req_off);
+			if (real_off+real_size-req_off < (ADIO_Offset)(unsigned)req_len) {
+			    partial_send[i] = (int) (real_off + real_size - req_off);
 			    if ((j+1 < others_req[i].count) && 
                                  (others_req[i].offsets[j+1] < 
                                      real_off+real_size)) { 
 				/* this is the case illustrated in the
 				   figure above. */
-				for_next_iter = (int) (ADIOI_MAX(for_next_iter,
-					  real_off + real_size - 
-                                             others_req[i].offsets[j+1])); 
+				for_next_iter = ADIOI_MAX(for_next_iter,
+					  real_off + real_size - others_req[i].offsets[j+1]); 
 				/* max because it must cover requests 
 				   from different processes */
 			    }
@@ -665,7 +717,8 @@
 	    if (count[i]) flag = 1;
 
 	if (flag) {
-	    ADIO_ReadContig(fd, read_buf+for_curr_iter, size, MPI_BYTE,
+      ADIOI_Assert(size == (int)size);
+	    ADIO_ReadContig(fd, read_buf+for_curr_iter, (int)size, MPI_BYTE,
 			    ADIO_EXPLICIT_OFFSET, off, &status, error_code);
 	    if (*error_code != MPI_SUCCESS) return;
 	}
@@ -684,6 +737,8 @@
 
 	if (for_next_iter) {
 	    tmp_buf = (char *) ADIOI_Malloc(for_next_iter);
+      ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)read_buf)+real_size-for_next_iter) == (ADIO_Offset)(MPIR_Upint)(read_buf+real_size-for_next_iter));
+      ADIOI_Assert((for_next_iter+coll_bufsize) == (size_t)(for_next_iter+coll_bufsize));
 	    memcpy(tmp_buf, read_buf+real_size-for_next_iter, for_next_iter);
 	    ADIOI_Free(read_buf);
 	    read_buf = (char *) ADIOI_Malloc(for_next_iter+coll_bufsize);
@@ -718,7 +773,7 @@
 }
 
 static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
-			 *flat_buf, ADIO_Offset *offset_list, int
+			 *flat_buf, ADIO_Offset *offset_list, ADIO_Offset
                          *len_list, int *send_size, int *recv_size,
 			 int *count, int *start_pos, int *partial_send, 
 			 int *recd_from_proc, int nprocs, 
@@ -753,6 +808,10 @@
 /* post recvs. if buftype_is_contig, data can be directly recd. into
    user buf at location given by buf_idx. else use recv_buf. */
 
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5032, 0, NULL);
+#endif
+
     if (buftype_is_contig) {
 	j = 0;
 	for (i=0; i < nprocs; i++) 
@@ -776,8 +835,10 @@
 		    MPI_Irecv(recv_buf[i], recv_size[i], MPI_BYTE, i, 
 			      myrank+i+100*iter, fd->comm, requests+j);
 		    j++;
-		    /* FPRINTF(stderr, "node %d, recv_size %d, tag %d \n", 
-		       myrank, recv_size[i], myrank+i+100*iter); */
+#ifdef RDCOLL_DEBUG
+		    DBG_FPRINTF(stderr, "node %d, recv_size %d, tag %d \n", 
+		       myrank, recv_size[i], myrank+i+100*iter); 
+#endif
 		}
     }
 
@@ -822,7 +883,7 @@
 	/* if noncontiguous, to the copies from the recv buffers */
 	if (!buftype_is_contig) 
 	    ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf,
-				   offset_list, len_list, recv_size, 
+				   offset_list, len_list, (unsigned*)recv_size, 
 				   requests, statuses, recd_from_proc, 
 				   nprocs, contig_access_count,
 				   min_st_offset, fd_size, fd_start, fd_end,
@@ -840,9 +901,11 @@
 	    if (recv_size[i]) ADIOI_Free(recv_buf[i]);
 	ADIOI_Free(recv_buf);
     }
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5033, 0, NULL);
+#endif
 }
 
-
 #define ADIOI_BUF_INCR \
 { \
     while (buf_incr) { \
@@ -856,7 +919,7 @@
                 n_buftypes++; \
             } \
             user_buf_idx = flat_buf->indices[flat_buf_idx] + \
-                              n_buftypes*buftype_extent; \
+                              (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
 	    flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
 	} \
 	buf_incr -= size_in_buf; \
@@ -868,9 +931,11 @@
 { \
     while (size) { \
 	size_in_buf = ADIOI_MIN(size, flat_buf_sz); \
+  ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + user_buf_idx)); \
+  ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \
 	memcpy(((char *) buf) + user_buf_idx, \
 	       &(recv_buf[p][recv_buf_idx[p]]), size_in_buf); \
-	recv_buf_idx[p] += size_in_buf; \
+	recv_buf_idx[p] += size_in_buf; /* already tested (size_t)size_in_buf*/ \
 	user_buf_idx += size_in_buf; \
 	flat_buf_sz -= size_in_buf; \
 	if (!flat_buf_sz) { \
@@ -880,7 +945,7 @@
                 n_buftypes++; \
             } \
             user_buf_idx = flat_buf->indices[flat_buf_idx] + \
-                              n_buftypes*buftype_extent; \
+                              (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
 	    flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
 	} \
 	size -= size_in_buf; \
@@ -889,11 +954,10 @@
     ADIOI_BUF_INCR \
 }
 
-
 static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
 				   *flat_buf, char **recv_buf, ADIO_Offset 
-				   *offset_list, int *len_list, 
-				   int *recv_size, 
+				   *offset_list, ADIO_Offset *len_list, 
+				   unsigned *recv_size, 
 				   MPI_Request *requests, MPI_Status *statuses,
 				   int *recd_from_proc, int nprocs,
 				   int contig_access_count, 
@@ -902,12 +966,15 @@
 				   ADIO_Offset *fd_end,
 				   MPI_Aint buftype_extent)
 {
+
 /* this function is only called if buftype is not contig */
 
-    int i, p, flat_buf_idx, size, buf_incr;
-    int flat_buf_sz, size_in_buf, n_buftypes;
+    int i, p, flat_buf_idx;
+    ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size;
+    int n_buftypes;
     ADIO_Offset off, len, rem_len, user_buf_idx;
-    int *curr_from_proc, *done_from_proc, *recv_buf_idx;
+    /* Not sure unsigned is necessary, but it makes the math safer */
+    unsigned *curr_from_proc, *done_from_proc, *recv_buf_idx;
 
     ADIOI_UNREFERENCED_ARG(requests);
     ADIOI_UNREFERENCED_ARG(statuses);
@@ -918,9 +985,9 @@
                         filled into user buffer in previous iterations
     user_buf_idx = current location in user buffer 
     recv_buf_idx[p] = current location in recv_buf of proc. p  */
-    curr_from_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
-    done_from_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
-    recv_buf_idx   = (int *) ADIOI_Malloc(nprocs * sizeof(int));
+    curr_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned));
+    done_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned));
+    recv_buf_idx   = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned));
 
     for (i=0; i < nprocs; i++) {
 	recv_buf_idx[i] = curr_from_proc[i] = 0;
@@ -938,7 +1005,7 @@
 
     for (i=0; i<contig_access_count; i++) { 
 	off     = offset_list[i];
-	rem_len = (ADIO_Offset) len_list[i];
+	rem_len = len_list[i];
 
 	/* this request may span the file domains of more than one process */
 	while (rem_len != 0) {
@@ -958,29 +1025,32 @@
 	    if (recv_buf_idx[p] < recv_size[p]) {
 		if (curr_from_proc[p]+len > done_from_proc[p]) {
 		    if (done_from_proc[p] > curr_from_proc[p]) {
-			size = (int)ADIOI_MIN(curr_from_proc[p] + len - 
+			size = ADIOI_MIN(curr_from_proc[p] + len - 
 			      done_from_proc[p], recv_size[p]-recv_buf_idx[p]);
 			buf_incr = done_from_proc[p] - curr_from_proc[p];
 			ADIOI_BUF_INCR
-			buf_incr = (int)(curr_from_proc[p]+len-done_from_proc[p]);
+			buf_incr = curr_from_proc[p]+len-done_from_proc[p];
+      ADIOI_Assert((done_from_proc[p] + size) == (unsigned)((ADIO_Offset)done_from_proc[p] + size));
 			curr_from_proc[p] = done_from_proc[p] + size;
 			ADIOI_BUF_COPY
 		    }
 		    else {
-			size = (int)ADIOI_MIN(len,recv_size[p]-recv_buf_idx[p]);
-			buf_incr = (int)len;
-			curr_from_proc[p] += size;
+			size = ADIOI_MIN(len,recv_size[p]-recv_buf_idx[p]);
+			buf_incr = len;
+      ADIOI_Assert((curr_from_proc[p] + size) == (unsigned)((ADIO_Offset)curr_from_proc[p] + size));
+			curr_from_proc[p] += (unsigned) size;
 			ADIOI_BUF_COPY
 		    }
 		}
 		else {
-		    curr_from_proc[p] += (int)len;
-		    buf_incr = (int)len;
+        ADIOI_Assert((curr_from_proc[p] + len) == (unsigned)((ADIO_Offset)curr_from_proc[p] + len));
+		    curr_from_proc[p] += (unsigned) len;
+		    buf_incr = len;
 		    ADIOI_BUF_INCR
 		}
 	    }
 	    else {
-		buf_incr = (int)len;
+		buf_incr = len;
 		ADIOI_BUF_INCR
 	    }
 	    off     += len;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_read_str.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_read_str.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_read_str.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_read_str.c	2010-11-15 15:02:47.000000000 +0100
@@ -12,12 +12,13 @@
 { \
     if (req_off >= readbuf_off + readbuf_len) { \
 	readbuf_off = req_off; \
-	readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\
+	readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\
 	ADIO_ReadContig(fd, readbuf, readbuf_len, MPI_BYTE, \
               ADIO_EXPLICIT_OFFSET, readbuf_off, &status1, error_code); \
         if (*error_code != MPI_SUCCESS) return; \
     } \
     while (req_len > readbuf_off + readbuf_len - req_off) { \
+  ADIOI_Assert((readbuf_off + readbuf_len - req_off) == (int) (readbuf_off + readbuf_len - req_off));\
 	partial_read = (int) (readbuf_off + readbuf_len - req_off); \
 	tmp_buf = (char *) ADIOI_Malloc(partial_read); \
 	memcpy(tmp_buf, readbuf+readbuf_len-partial_read, partial_read); \
@@ -26,13 +27,14 @@
 	memcpy(readbuf, tmp_buf, partial_read); \
 	ADIOI_Free(tmp_buf); \
 	readbuf_off += readbuf_len-partial_read; \
-	readbuf_len = (int) (partial_read + ADIOI_MIN(max_bufsize, \
+	readbuf_len = (unsigned) (partial_read + ADIOI_MIN(max_bufsize, \
 				       end_offset-readbuf_off+1)); \
 	ADIO_ReadContig(fd, readbuf+partial_read, readbuf_len-partial_read, \
              MPI_BYTE, ADIO_EXPLICIT_OFFSET, readbuf_off+partial_read, \
              &status1, error_code); \
         if (*error_code != MPI_SUCCESS) return; \
     } \
+    ADIOI_Assert(req_len == (size_t)req_len); \
     memcpy((char *)buf + userbuf_off, readbuf+req_off-readbuf_off, req_len); \
 }
 
@@ -42,21 +44,25 @@
                        ADIO_Offset offset, ADIO_Status *status, int
                        *error_code)
 {
+
+
 /* offset is in units of etype relative to the filetype. */
 
     ADIOI_Flatlist_node *flat_buf, *flat_file;
-    int i, j, k, brd_size, frd_size=0, st_index=0;
-    int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
-    int n_filetypes, etype_in_filetype;
-    ADIO_Offset abs_off_in_filetype=0;
-    int filetype_size, etype_size, buftype_size, req_len, partial_read;
+    ADIO_Offset i_offset, new_brd_size, brd_size, size;
+    int i, j, k, st_index=0;
+    unsigned num, bufsize; 
+    int n_etypes_in_filetype;
+    ADIO_Offset n_filetypes, etype_in_filetype, st_n_filetypes, size_in_filetype;
+    ADIO_Offset abs_off_in_filetype=0, new_frd_size, frd_size=0, st_frd_size;
+    int filetype_size, etype_size, buftype_size, partial_read;
     MPI_Aint filetype_extent, buftype_extent; 
     int buf_count, buftype_is_contig, filetype_is_contig;
-    ADIO_Offset userbuf_off;
+    ADIO_Offset userbuf_off, req_len, sum;
     ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off;
     char *readbuf, *tmp_buf, *value;
-    int flag, st_frd_size, st_n_filetypes, readbuf_len;
-    int new_brd_size, new_frd_size, info_flag, max_bufsize;
+    int info_flag;
+    unsigned max_bufsize, readbuf_len;
     ADIO_Status status1;
 
     if (fd->hints->ds_read == ADIOI_HINT_DISABLE) {
@@ -90,12 +96,13 @@
     MPI_Type_extent(datatype, &buftype_extent);
     etype_size = fd->etype_size;
 
+    ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
     bufsize = buftype_size * count;
 
 /* get max_bufsize from the info object. */
 
     value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
-    MPI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, 
+    ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, 
                  &info_flag);
     max_bufsize = atoi(value);
     ADIOI_Free(value);
@@ -110,16 +117,16 @@
 	while (flat_buf->type != datatype) flat_buf = flat_buf->next;
 
         off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
-                 fd->disp + etype_size * offset;
+                 fd->disp + (ADIO_Offset)etype_size * offset;
 
 	start_off = off;
 	end_offset = off + bufsize - 1;
         readbuf_off = off;
         readbuf = (char *) ADIOI_Malloc(max_bufsize);
-        readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
+        readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
 
 /* if atomicity is true, lock (exclusive) the region to be accessed */
-        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS))
+        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
             ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
 
         ADIO_ReadContig(fd, readbuf, readbuf_len, MPI_BYTE, 
@@ -127,15 +134,17 @@
 	if (*error_code != MPI_SUCCESS) return;
 
         for (j=0; j<count; j++) 
-            for (i=0; i<flat_buf->count; i++) {
-                userbuf_off = j*buftype_extent + flat_buf->indices[i];
-		req_off = off;
-		req_len = flat_buf->blocklens[i];
-		ADIOI_BUFFERED_READ
-                off += flat_buf->blocklens[i];
-            }
+        {
+              for (i=0; i<flat_buf->count; i++) {
+                  userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i];
+      req_off = off;
+      req_len = flat_buf->blocklens[i];
+      ADIOI_BUFFERED_READ
+                  off += flat_buf->blocklens[i];
+              }
+        }
 
-        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS))
+        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
             ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
 
         if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
@@ -151,29 +160,36 @@
 	disp = fd->disp;
 
 	if (file_ptr_type == ADIO_INDIVIDUAL) {
-	    offset = fd->fp_ind; /* in bytes */
-	    n_filetypes = -1;
-	    flag = 0;
-	    while (!flag) {
-                n_filetypes++;
-		for (i=0; i<flat_file->count; i++) {
-		    if (disp + flat_file->indices[i] + 
-                        (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] 
-                            >= offset) {
-			st_index = i;
-			frd_size = (int) (disp + flat_file->indices[i] + 
-			        (ADIO_Offset) n_filetypes*filetype_extent
-			         + flat_file->blocklens[i] - offset);
-			flag = 1;
-			break;
-		    }
+	    /* Wei-keng reworked type processing to be a bit more efficient */
+            offset       = fd->fp_ind - disp;
+            n_filetypes  = (offset - flat_file->indices[0]) / filetype_extent;
+	    offset -= (ADIO_Offset)n_filetypes * filetype_extent;
+	    /* now offset is local to this extent */
+
+            /* find the block where offset is located, skip blocklens[i]==0 */
+            for (i=0; i<flat_file->count; i++) {
+                ADIO_Offset dist;
+                if (flat_file->blocklens[i] == 0) continue;
+                dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
+                /* frd_size is from offset to the end of block i */
+		if (dist == 0) {
+		    i++;
+		    offset   = flat_file->indices[i];
+		    frd_size = flat_file->blocklens[i];
+		    break;
+		}
+		if (dist > 0) {
+                    frd_size = dist;
+		    break;
 		}
 	    }
-	}
+            st_index = i;  /* starting index in flat_file->indices[] */
+            offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
+        }
 	else {
 	    n_etypes_in_filetype = filetype_size/etype_size;
-	    n_filetypes = (int) (offset / n_etypes_in_filetype);
-	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+	    n_filetypes = offset / n_etypes_in_filetype;
+	    etype_in_filetype = offset % n_etypes_in_filetype;
 	    size_in_filetype = etype_in_filetype * etype_size;
  
 	    sum = 0;
@@ -189,36 +205,67 @@
 	    }
 
 	    /* abs. offset in bytes in the file */
-	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + 
+		    abs_off_in_filetype;
 	}
 
         start_off = offset;
 
+	/* Wei-keng Liao: read request is within a single flat_file contig
+	 * block e.g. with subarray types that actually describe the whole
+	 * array */
+	if (buftype_is_contig && bufsize <= frd_size) {
+            ADIO_ReadContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
+                             offset, status, error_code);
+
+	    if (file_ptr_type == ADIO_INDIVIDUAL) {
+                /* update MPI-IO file pointer to point to the first byte that 
+		 * can be accessed in the fileview. */
+		fd->fp_ind = offset + bufsize;
+		if (bufsize == frd_size) {
+		    do {
+			st_index++;
+			if (st_index == flat_file->count) {
+			    st_index = 0;
+			    n_filetypes++;
+			}
+                    } while (flat_file->blocklens[st_index] == 0);
+		    fd->fp_ind = disp + flat_file->indices[st_index]
+                               + n_filetypes*filetype_extent;
+		}
+	    }
+	    fd->fp_sys_posn = -1;   /* set it to null. */ 
+#ifdef HAVE_STATUS_SET_BYTES
+	    MPIR_Status_set_bytes(status, datatype, bufsize);
+#endif 
+            return;
+	}
+
        /* Calculate end_offset, the last byte-offset that will be accessed.
          e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
 
 	st_frd_size = frd_size;
 	st_n_filetypes = n_filetypes;
-	i = 0;
+	i_offset = 0;
 	j = st_index;
 	off = offset;
 	frd_size = ADIOI_MIN(st_frd_size, bufsize);
-	while (i < bufsize) {
-	    i += frd_size;
+	while (i_offset < bufsize) {
+	    i_offset += frd_size;
 	    end_offset = off + frd_size - 1;
 
-	    if (j < (flat_file->count - 1)) j++;
-	    else {
-		j = 0;
-		n_filetypes++;
+	    j = (j+1) % flat_file->count;
+            n_filetypes += (j == 0) ? 1 : 0;
+            while (flat_file->blocklens[j]==0) {
+		j = (j+1) % flat_file->count;
+		n_filetypes += (j == 0) ? 1 : 0;
 	    }
-
-	    off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
-	    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+	    off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent;
+	    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
 	}
 
 /* if atomicity is true, lock (exclusive) the region to be accessed */
-        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS))
+        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
             ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
 
 	readbuf_off = 0;
@@ -230,12 +277,12 @@
 /* contiguous in memory, noncontiguous in file. should be the most
    common case. */
 
-	    i = 0;
+	    i_offset = 0;
 	    j = st_index;
 	    off = offset;
 	    n_filetypes = st_n_filetypes;
 	    frd_size = ADIOI_MIN(st_frd_size, bufsize);
-	    while (i < bufsize) {
+	    while (i_offset < bufsize) {
                 if (frd_size) { 
                     /* TYPE_UB and TYPE_LB can result in 
                        frd_size = 0. save system call in such cases */ 
@@ -244,25 +291,26 @@
 
 		    req_off = off;
 		    req_len = frd_size;
-		    userbuf_off = i;
+		    userbuf_off = i_offset;
 		    ADIOI_BUFFERED_READ
 		}
-		i += frd_size;
+		i_offset += frd_size;
 
                 if (off + frd_size < disp + flat_file->indices[j] +
-                   flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
+                   flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
                        off += frd_size;
                 /* did not reach end of contiguous block in filetype.
                    no more I/O needed. off is incremented by frd_size. */
                 else {
-		    if (j < (flat_file->count - 1)) j++;
-		    else {
-			j = 0;
-			n_filetypes++;
-		    }
+                    j = (j+1) % flat_file->count;
+                    n_filetypes += (j == 0) ? 1 : 0;
+                    while (flat_file->blocklens[j]==0) {
+                        j = (j+1) % flat_file->count;
+                        n_filetypes += (j == 0) ? 1 : 0;
+                    }
 		    off = disp + flat_file->indices[j] + 
-                                        (ADIO_Offset) n_filetypes*filetype_extent;
-		    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+                                        n_filetypes*(ADIO_Offset)filetype_extent;
+		    frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
 		}
 	    }
 	}
@@ -274,7 +322,7 @@
 	    while (flat_buf->type != datatype) flat_buf = flat_buf->next;
 
 	    k = num = buf_count = 0;
-	    i = (int) (flat_buf->indices[0]);
+	    i_offset = flat_buf->indices[0];
 	    j = st_index;
 	    off = offset;
 	    n_filetypes = st_n_filetypes;
@@ -289,7 +337,7 @@
 
 		    req_off = off;
 		    req_len = size;
-		    userbuf_off = i;
+		    userbuf_off = i_offset;
 		    ADIOI_BUFFERED_READ
 		}
 
@@ -298,18 +346,18 @@
 
 		if (size == frd_size) {
 /* reached end of contiguous block in file */
-		    if (j < (flat_file->count - 1)) j++;
-		    else {
-			j = 0;
-			n_filetypes++;
-		    }
-
+                    j = (j+1) % flat_file->count;
+                    n_filetypes += (j == 0) ? 1 : 0;
+                    while (flat_file->blocklens[j]==0) {
+                        j = (j+1) % flat_file->count;
+                        n_filetypes += (j == 0) ? 1 : 0;
+                    }
 		    off = disp + flat_file->indices[j] + 
-                                              (ADIO_Offset) n_filetypes*filetype_extent;
+          n_filetypes*(ADIO_Offset)filetype_extent;
 
 		    new_frd_size = flat_file->blocklens[j];
 		    if (size != brd_size) {
-			i += size;
+			i_offset += size;
 			new_brd_size -= size;
 		    }
 		}
@@ -319,7 +367,7 @@
 
 		    k = (k + 1)%flat_buf->count;
 		    buf_count++;
-		    i = (int) (buftype_extent*(buf_count/flat_buf->count) +
+		    i_offset = ((ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) +
 			flat_buf->indices[k]);
 		    new_brd_size = flat_buf->blocklens[k];
 		    if (size != frd_size) {
@@ -327,13 +375,14 @@
 			new_frd_size -= size;
 		    }
 		}
+    ADIOI_Assert(((ADIO_Offset)num + size) == (unsigned)(num + size));
 		num += size;
 		frd_size = new_frd_size;
                 brd_size = new_brd_size;
 	    }
 	}
 	
-        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS))
+        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
             ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
 
 	if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_read_str_naive.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_read_str_naive.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_read_str_naive.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_read_str_naive.c	2010-11-15 15:02:47.000000000 +0100
@@ -16,11 +16,13 @@
     /* offset is in units of etype relative to the filetype. */
 
     ADIOI_Flatlist_node *flat_buf, *flat_file;
-    int brd_size, frd_size=0, b_index;
-    int bufsize, size, sum, n_etypes_in_filetype, size_in_filetype;
-    int n_filetypes, etype_in_filetype;
+    ADIO_Offset size, brd_size, frd_size=0, req_len, sum;
+    int b_index;
+    int n_etypes_in_filetype;
+    ADIO_Offset n_filetypes, etype_in_filetype;
     ADIO_Offset abs_off_in_filetype=0;
-    int filetype_size, etype_size, buftype_size, req_len;
+    unsigned bufsize, filetype_size, buftype_size, size_in_filetype;
+    int etype_size;
     MPI_Aint filetype_extent, buftype_extent; 
     int buf_count, buftype_is_contig, filetype_is_contig;
     ADIO_Offset userbuf_off;
@@ -32,17 +34,18 @@
     ADIOI_Datatype_iscontig(buftype, &buftype_is_contig);
     ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
 
-    MPI_Type_size(fd->filetype, &filetype_size);
+    MPI_Type_size(fd->filetype, (int*)&filetype_size);
     if ( ! filetype_size ) {
 	*error_code = MPI_SUCCESS; 
 	return;
     }
 
     MPI_Type_extent(fd->filetype, &filetype_extent);
-    MPI_Type_size(buftype, &buftype_size);
+    MPI_Type_size(buftype,(int*) &buftype_size);
     MPI_Type_extent(buftype, &buftype_extent);
     etype_size = fd->etype_size;
 
+    ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)buftype_size * (ADIO_Offset)count));
     bufsize = buftype_size * count;
 
     /* contiguous in buftype and filetype is handled elsewhere */
@@ -62,8 +65,7 @@
 	end_offset = off + bufsize - 1;
 
 	/* if atomicity is true, lock (exclusive) the region to be accessed */
-        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && 
-	   (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2))
+        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
 	{
             ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
 	}
@@ -73,11 +75,13 @@
 	 */
         for (b_count=0; b_count < count; b_count++) {
             for (b_index=0; b_index < flat_buf->count; b_index++) {
-                userbuf_off = b_count*buftype_extent + 
+                userbuf_off = (ADIO_Offset)b_count*(ADIO_Offset)buftype_extent + 
 		              flat_buf->indices[b_index];
 		req_off = off;
 		req_len = flat_buf->blocklens[b_index];
 
+    ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off));
+    ADIOI_Assert(req_len == (int) req_len);
 		ADIO_ReadContig(fd, 
 				(char *) buf + userbuf_off,
 				req_len, 
@@ -93,8 +97,7 @@
             }
 	}
 
-        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && 
-	   (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2))
+        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
 	{
             ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
 	}
@@ -104,7 +107,9 @@
     }
 
     else {  /* noncontiguous in file */
-    	int f_index, st_frd_size, st_index = 0, st_n_filetypes;
+    	int f_index, st_index = 0; 
+      ADIO_Offset st_n_filetypes;
+      ADIO_Offset st_frd_size;
 	int flag;
 
         /* First we're going to calculate a set of values for use in all
@@ -134,15 +139,15 @@
                 n_filetypes++;
 		for (f_index=0; f_index < flat_file->count; f_index++) {
 		    if (disp + flat_file->indices[f_index] + 
-                       (ADIO_Offset) n_filetypes*filetype_extent + 
+                       n_filetypes*(ADIO_Offset)filetype_extent + 
 		       flat_file->blocklens[f_index] >= start_off) 
 		    {
 		    	/* this block contains our starting position */
 
 			st_index = f_index;
-			frd_size = (int) (disp + flat_file->indices[f_index] + 
-		 	           (ADIO_Offset) n_filetypes*filetype_extent + 
-				   flat_file->blocklens[f_index] - start_off);
+			frd_size = disp + flat_file->indices[f_index] + 
+		 	           n_filetypes*(ADIO_Offset)filetype_extent + 
+				   flat_file->blocklens[f_index] - start_off;
 			flag = 1;
 			break;
 		    }
@@ -151,9 +156,9 @@
 	}
 	else {
 	    n_etypes_in_filetype = filetype_size/etype_size;
-	    n_filetypes = (int) (offset / n_etypes_in_filetype);
+	    n_filetypes = offset / n_etypes_in_filetype;
 	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
-	    size_in_filetype = etype_in_filetype * etype_size;
+	    size_in_filetype = (unsigned)etype_in_filetype * (unsigned)etype_size;
  
 	    sum = 0;
 	    for (f_index=0; f_index < flat_file->count; f_index++) {
@@ -169,7 +174,7 @@
 	    }
 
 	    /* abs. offset in bytes in the file */
-	    start_off = disp + (ADIO_Offset) n_filetypes*filetype_extent + 
+	    start_off = disp + n_filetypes*(ADIO_Offset)filetype_extent + 
 	    	        abs_off_in_filetype;
 	}
 
@@ -198,9 +203,9 @@
 	    }
 
 	    off = disp + flat_file->indices[f_index] + 
-	          (ADIO_Offset) n_filetypes*filetype_extent;
+	          n_filetypes*(ADIO_Offset)filetype_extent;
 	    frd_size = ADIOI_MIN(flat_file->blocklens[f_index], 
-	                         bufsize-(int)userbuf_off);
+	                         bufsize-(unsigned)userbuf_off);
 	}
 
 	/* End of calculations.  At this point the following values have
@@ -213,8 +218,7 @@
 	 */
 
 	/* if atomicity is true, lock (exclusive) the region to be accessed */
-        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && 
-	   (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2))
+        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
 	{
             ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
 	}
@@ -238,6 +242,8 @@
 		    req_off = off;
 		    req_len = frd_size;
 
+        ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off));
+        ADIOI_Assert(req_len == (int) req_len);
 		    ADIO_ReadContig(fd, 
 				    (char *) buf + userbuf_off,
 				    req_len, 
@@ -252,7 +258,7 @@
 
                 if (off + frd_size < disp + flat_file->indices[f_index] +
                    flat_file->blocklens[f_index] + 
-		   (ADIO_Offset) n_filetypes*filetype_extent)
+		   n_filetypes*(ADIO_Offset)filetype_extent)
 		{
 		    /* important that this value be correct, as it is
 		     * used to set the offset in the fd near the end of
@@ -270,14 +276,14 @@
 			n_filetypes++;
 		    }
 		    off = disp + flat_file->indices[f_index] + 
-                          (ADIO_Offset) n_filetypes*filetype_extent;
+                          n_filetypes*(ADIO_Offset)filetype_extent;
 		    frd_size = ADIOI_MIN(flat_file->blocklens[f_index], 
-		                         bufsize-(int)userbuf_off);
+		                         bufsize-(unsigned)userbuf_off);
 		}
 	    }
 	}
 	else {
-	    int i, tmp_bufsize = 0;
+	    ADIO_Offset i_offset, tmp_bufsize = 0;
 	    /* noncontiguous in memory as well as in file */
 
 	    ADIOI_Flatten_datatype(buftype);
@@ -285,7 +291,7 @@
 	    while (flat_buf->type != buftype) flat_buf = flat_buf->next;
 
 	    b_index = buf_count = 0;
-	    i = (int) (flat_buf->indices[0]);
+	    i_offset = flat_buf->indices[0];
 	    f_index = st_index;
 	    off = start_off;
 	    n_filetypes = st_n_filetypes;
@@ -294,14 +300,16 @@
 
 	    /* while we haven't read size * count bytes, keep going */
 	    while (tmp_bufsize < bufsize) {
-    		int new_brd_size = brd_size, new_frd_size = frd_size;
+    		ADIO_Offset new_brd_size = brd_size, new_frd_size = frd_size;
 
 		size = ADIOI_MIN(frd_size, brd_size);
 		if (size) {
 		    req_off = off;
 		    req_len = size;
-		    userbuf_off = i;
+		    userbuf_off = i_offset;
 
+        ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off));
+        ADIOI_Assert(req_len == (int) req_len);
 		    ADIO_ReadContig(fd, 
 				    (char *) buf + userbuf_off,
 				    req_len, 
@@ -322,11 +330,11 @@
 		    }
 
 		    off = disp + flat_file->indices[f_index] + 
-                          (ADIO_Offset) n_filetypes*filetype_extent;
+                          n_filetypes*(ADIO_Offset)filetype_extent;
 
 		    new_frd_size = flat_file->blocklens[f_index];
 		    if (size != brd_size) {
-			i += size;
+			i_offset += size;
 			new_brd_size -= size;
 		    }
 		}
@@ -336,8 +344,8 @@
 
 		    b_index = (b_index + 1)%flat_buf->count;
 		    buf_count++;
-		    i = (int) (buftype_extent*(buf_count/flat_buf->count) +
-			flat_buf->indices[b_index]);
+		    i_offset = buftype_extent*(buf_count/flat_buf->count) +
+			flat_buf->indices[b_index];
 		    new_brd_size = flat_buf->blocklens[b_index];
 		    if (size != frd_size) {
 			off += size;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_resize.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_resize.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_resize.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_resize.c	2010-11-15 15:02:47.000000000 +0100
@@ -20,7 +20,8 @@
 
     /* first aggregator performs ftruncate() */
     if (rank == fd->hints->ranklist[0]) {
-	err = ftruncate(fd->fd_sys, size);
+    ADIOI_Assert(size == (off_t) size); 
+	err = ftruncate(fd->fd_sys, (off_t)size);
     }
 
     /* bcast return value */
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_seek.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_seek.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_seek.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_seek.c	2010-11-15 15:02:47.000000000 +0100
@@ -22,10 +22,12 @@
     ADIO_Offset off;
     ADIOI_Flatlist_node *flat_file;
 
-    int i, n_etypes_in_filetype, n_filetypes, etype_in_filetype;
+    int i;
+    ADIO_Offset n_etypes_in_filetype, n_filetypes, etype_in_filetype;
     ADIO_Offset abs_off_in_filetype=0;
-    int size_in_filetype, sum;
-    int filetype_size, etype_size, filetype_is_contig;
+    ADIO_Offset size_in_filetype, sum;
+    unsigned filetype_size;
+    int etype_size, filetype_is_contig;
     MPI_Aint filetype_extent;
 
     ADIOI_UNREFERENCED_ARG(whence);
@@ -33,13 +35,13 @@
     ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
     etype_size = fd->etype_size;
 
-    if (filetype_is_contig) off = fd->disp + etype_size * offset;
+    if (filetype_is_contig) off = fd->disp + (ADIO_Offset)etype_size * offset;
     else {
         flat_file = ADIOI_Flatlist;
         while (flat_file->type != fd->filetype) flat_file = flat_file->next;
 
 	MPI_Type_extent(fd->filetype, &filetype_extent);
-	MPI_Type_size(fd->filetype, &filetype_size);
+	MPI_Type_size(fd->filetype, (int*)&filetype_size);
 	if ( ! filetype_size ) {
 	    /* Since offset relative to the filetype size, we can't
 	       do compute the offset when that result is zero.
@@ -49,8 +51,8 @@
 	}
 
 	n_etypes_in_filetype = filetype_size/etype_size;
-	n_filetypes = (int) (offset / n_etypes_in_filetype);
-	etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+	n_filetypes = offset / n_etypes_in_filetype;
+	etype_in_filetype = offset % n_etypes_in_filetype;
 	size_in_filetype = etype_in_filetype * etype_size;
  
 	sum = 0;
@@ -64,7 +66,7 @@
 	}
 
 	/* abs. offset in bytes in the file */
-	off = fd->disp + (ADIO_Offset) n_filetypes * filetype_extent +
+	off = fd->disp + n_filetypes * filetype_extent +
                 abs_off_in_filetype;
     }
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_subarray.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_subarray.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_subarray.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_subarray.c	2010-11-15 15:02:47.000000000 +0100
@@ -32,9 +32,9 @@
 			    array_of_subsizes[0],
 			    array_of_sizes[0], oldtype, &tmp1);
 	    
-	    size = array_of_sizes[0]*extent;
+	    size = (MPI_Aint)array_of_sizes[0]*extent;
 	    for (i=2; i<ndims; i++) {
-		size *= array_of_sizes[i-1];
+		size *= (MPI_Aint)array_of_sizes[i-1];
 		MPI_Type_hvector(array_of_subsizes[i], 1, size, tmp1, &tmp2);
 		MPI_Type_free(&tmp1);
 		tmp1 = tmp2;
@@ -45,8 +45,8 @@
 	disps[1] = array_of_starts[0];
 	size = 1;
 	for (i=1; i<ndims; i++) {
-	    size *= array_of_sizes[i-1];
-	    disps[1] += size*array_of_starts[i];
+	    size *= (MPI_Aint)array_of_sizes[i-1];
+	    disps[1] += size*(MPI_Aint)array_of_starts[i];
 	}  
         /* rest done below for both Fortran and C order */
     }
@@ -61,9 +61,9 @@
 			    array_of_subsizes[ndims-1],
 			    array_of_sizes[ndims-1], oldtype, &tmp1);
 	    
-	    size = array_of_sizes[ndims-1]*extent;
+	    size = (MPI_Aint)array_of_sizes[ndims-1]*extent;
 	    for (i=ndims-3; i>=0; i--) {
-		size *= array_of_sizes[i+1];
+		size *= (MPI_Aint)array_of_sizes[i+1];
 		MPI_Type_hvector(array_of_subsizes[i], 1, size, tmp1, &tmp2);
 		MPI_Type_free(&tmp1);
 		tmp1 = tmp2;
@@ -74,15 +74,15 @@
 	disps[1] = array_of_starts[ndims-1];
 	size = 1;
 	for (i=ndims-2; i>=0; i--) {
-	    size *= array_of_sizes[i+1];
-	    disps[1] += size*array_of_starts[i];
+	    size *= (MPI_Aint)array_of_sizes[i+1];
+	    disps[1] += size*(MPI_Aint)array_of_starts[i];
 	}
     }
     
     disps[1] *= extent;
     
     disps[2] = extent;
-    for (i=0; i<ndims; i++) disps[2] *= array_of_sizes[i];
+    for (i=0; i<ndims; i++) disps[2] *= (MPI_Aint)array_of_sizes[i];
     
     disps[0] = 0;
     blklens[0] = blklens[1] = blklens[2] = 1;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_write.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_write.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_write.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_write.c	2010-11-15 15:02:47.000000000 +0100
@@ -10,17 +10,26 @@
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 #endif
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
 
 void ADIOI_GEN_WriteContig(ADIO_File fd, void *buf, int count, 
 			   MPI_Datatype datatype, int file_ptr_type,
 			   ADIO_Offset offset, ADIO_Status *status,
 			   int *error_code)
 {
-    int err = -1, datatype_size, len;
+    int err = -1, datatype_size;
+    ADIO_Offset len;
     static char myname[] = "ADIOI_GEN_WRITECONTIG";
 
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5036, 0, NULL);
+#endif
+
     MPI_Type_size(datatype, &datatype_size);
-    len = datatype_size * count;
+    len = (ADIO_Offset)datatype_size * (ADIO_Offset)count;
+    ADIOI_Assert(len == (unsigned int) len); /* read takes an unsigned int parm */
 
     if (file_ptr_type == ADIO_INDIVIDUAL) {
 	offset = fd->fp_ind;
@@ -50,7 +59,7 @@
 #ifdef ADIOI_MPE_LOGGING
     MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
 #endif
-    err = write(fd->fd_sys, buf, len);
+    err = write(fd->fd_sys, buf, (unsigned int)len);
 #ifdef ADIOI_MPE_LOGGING
     MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
 #endif
@@ -77,4 +86,7 @@
 #endif
 
     *error_code = MPI_SUCCESS;
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5037, 0, NULL);
+#endif
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_write_coll.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_write_coll.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_write_coll.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_write_coll.c	2010-11-15 15:02:47.000000000 +0100
@@ -8,18 +8,22 @@
 #include "adio.h"
 #include "adio_extern.h"
 
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
+
 /* prototypes of functions used for collective writes only. */
 static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
                          datatype, int nprocs, int myrank,
 			 ADIOI_Access
                          *others_req, ADIO_Offset *offset_list,
-                         int *len_list, int contig_access_count, ADIO_Offset
+                         ADIO_Offset *len_list, int contig_access_count, ADIO_Offset
                          min_st_offset, ADIO_Offset fd_size,
                          ADIO_Offset *fd_start, ADIO_Offset *fd_end,
                          int *buf_idx, int *error_code);
 static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
                          ADIOI_Flatlist_node *flat_buf, ADIO_Offset 
-                         *offset_list, int *len_list, int *send_size, 
+                         *offset_list, ADIO_Offset *len_list, int *send_size, 
                          int *recv_size, ADIO_Offset off, int size,
                          int *count, int *start_pos, int *partial_recv, 
                          int *sent_to_proc, int nprocs, 
@@ -33,7 +37,7 @@
                          MPI_Aint buftype_extent, int *buf_idx, int *error_code);
 static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
                            *flat_buf, char **send_buf, ADIO_Offset 
-                           *offset_list, int *len_list, int *send_size, 
+                           *offset_list, ADIO_Offset *len_list, int *send_size, 
                            MPI_Request *requests, int *sent_to_proc, 
                            int nprocs, int myrank, 
                            int contig_access_count, ADIO_Offset
@@ -42,7 +46,7 @@
                            int *send_buf_idx, int *curr_to_proc, 
                            int *done_to_proc, int iter, 
                            MPI_Aint buftype_extent);
-static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, 
+void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, 
                       ADIO_Offset *srt_off, int *srt_len, int *start_pos,
                       int nprocs, int nprocs_recv, int total_elements);
 
@@ -72,9 +76,15 @@
     ADIO_Offset orig_fp, start_offset, end_offset, fd_size, min_st_offset, off;
     ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *fd_start = NULL,
 	*fd_end = NULL, *end_offsets = NULL;
-    int *buf_idx = NULL, *len_list = NULL;
+    int *buf_idx = NULL;
+    ADIO_Offset *len_list = NULL;
     int old_error, tmp_error;
 
+    if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) { 
+	ADIOI_IOStridedColl (fd, buf, count, ADIOI_WRITE, datatype, 
+			file_ptr_type, offset, status, error_code);
+	return;
+    }
 
     MPI_Comm_size(fd->comm, &nprocs);
     MPI_Comm_rank(fd->comm, &myrank);
@@ -136,7 +146,7 @@
 
         if (buftype_is_contig && filetype_is_contig) {
             if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
-                off = fd->disp + (fd->etype_size) * offset;
+                off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset;
                 ADIO_WriteContig(fd, buf, count, datatype,
 				 ADIO_EXPLICIT_OFFSET,
 				 off, status, error_code);
@@ -156,7 +166,9 @@
 
     ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs,
 			    nprocs_for_coll, &min_st_offset,
-			    &fd_start, &fd_end, &fd_size);   
+			    &fd_start, &fd_end, 
+			    fd->hints->min_fdomain_size, &fd_size,
+			    fd->hints->striping_unit);   
 
 
 /* calculate what portions of the access requests of this process are
@@ -225,6 +237,9 @@
 #ifdef ADIOI_MPE_LOGGING
     MPE_Log_event( ADIOI_MPE_postwrite_b, 0, NULL );
 #endif
+#ifdef AGGREGATION_PROFILE
+	MPE_Log_event (5012, 0, NULL);
+#endif
 
     if ( (old_error != MPI_SUCCESS) && (old_error != MPI_ERR_IO) )
 	    *error_code = old_error;
@@ -264,6 +279,9 @@
 #endif
 
     fd->fp_sys_posn = -1;   /* set it to null. */
+#ifdef AGGREGATION_PROFILE
+	MPE_Log_event (5013, 0, NULL);
+#endif
 }
 
 
@@ -276,9 +294,8 @@
 				 int myrank,
 				 ADIOI_Access
 				 *others_req, ADIO_Offset *offset_list,
-				 int *len_list, int contig_access_count,
-				 ADIO_Offset
-				 min_st_offset, ADIO_Offset fd_size,
+				 ADIO_Offset *len_list, int contig_access_count,
+				 ADIO_Offset min_st_offset, ADIO_Offset fd_size,
 				 ADIO_Offset *fd_start, ADIO_Offset *fd_end,
 				 int *buf_idx, int *error_code)
 {
@@ -291,7 +308,9 @@
    array to a file, where each local array is 8Mbytes, requiring
    at least another 8Mbytes of temp space is unacceptable. */
 
-    int hole, i, j, m, size=0, ntimes, max_ntimes, buftype_is_contig;
+    /* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/
+    ADIO_Offset size=0;
+    int hole, i, j, m, ntimes, max_ntimes, buftype_is_contig;
     ADIO_Offset st_loc=-1, end_loc=-1, off, done, req_off;
     char *write_buf=NULL;
     int *curr_offlen_ptr, *count, *send_size, req_len, *recv_size;
@@ -312,7 +331,7 @@
    That gives the no. of communication phases as well. */
 
     value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
-    MPI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value, 
+    ADIOI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value, 
                  &info_flag);
     coll_bufsize = atoi(value);
     ADIOI_Free(value);
@@ -421,7 +440,7 @@
 
 	for (i=0; i < nprocs; i++) count[i] = recv_size[i] = 0;
 
-	size = (int) (ADIOI_MIN(coll_bufsize, end_loc-st_loc+1-done)); 
+	size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done); 
 
 	for (i=0; i < nprocs; i++) {
 	    if (others_req[i].count) {
@@ -445,12 +464,14 @@
 		    }
 		    if (req_off < off + size) {
 			count[i]++;
+      ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIR_Upint)(write_buf+req_off-off));
 			MPI_Address(write_buf+req_off-off, 
                                &(others_req[i].mem_ptrs[j]));
-			recv_size[i] += (int)(ADIOI_MIN(off + (ADIO_Offset)size - 
-						  req_off, req_len));
+      ADIOI_Assert((off + size - req_off) == (int)(off + size - req_off));
+			recv_size[i] += (int)(ADIOI_MIN(off + size - req_off, 
+                                      (unsigned)req_len));
 
-			if (off+size-req_off < req_len)
+			if (off+size-req_off < (unsigned)req_len)
 			{
 			    partial_recv[i] = (int) (off + size - req_off);
 
@@ -494,7 +515,8 @@
 	    if (count[i]) flag = 1;
 
 	if (flag) {
-	    ADIO_WriteContig(fd, write_buf, size, MPI_BYTE, ADIO_EXPLICIT_OFFSET, 
+      ADIOI_Assert(size == (int)size);
+	    ADIO_WriteContig(fd, write_buf, (int)size, MPI_BYTE, ADIO_EXPLICIT_OFFSET, 
                         off, &status, error_code);
 	    if (*error_code != MPI_SUCCESS) return;
 	}
@@ -537,7 +559,7 @@
  */
 static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
 				  ADIOI_Flatlist_node *flat_buf, ADIO_Offset 
-				  *offset_list, int *len_list, int *send_size, 
+				  *offset_list, ADIO_Offset *len_list, int *send_size, 
 				  int *recv_size, ADIO_Offset off, int size,
 				  int *count, int *start_pos,
 				  int *partial_recv,
@@ -558,7 +580,7 @@
     MPI_Request *requests, *send_req;
     MPI_Datatype *recv_types;
     MPI_Status *statuses, status;
-    int *srt_len, sum, sum_recv;
+    int *srt_len, sum;
     ADIO_Offset *srt_off;
     static char myname[] = "ADIOI_W_EXCHANGE_DATA";
 
@@ -617,26 +639,27 @@
         }
     ADIOI_Free(tmp_len);
 
-/* check if there are any holes */
+    /* check if there are any holes. If yes, must do read-modify-write.
+     * holes can be in three places.  'middle' is what you'd expect: the
+     * processes are operating on noncontigous data.  But holes can also show
+     * up at the beginning or end of the file domain (see John Bent ROMIO REQ
+     * #835). Missing these holes would result in us writing more data than
+     * recieved by everyone else. */
+
     *hole = 0;
-    for (i=0; i<sum-1; i++)
-	if (srt_off[i]+srt_len[i] < srt_off[i+1]) {
-	    *hole = 1;
-	    break;
-	}
-    /* In some cases (see John Bent ROMIO REQ # 835), an odd interaction
-     * between aggregation, nominally contiguous regions, and cb_buffer_size
-     * should be handled with a read-modify-write (otherwise we will write out
-     * more data than we receive from everyone else (inclusive), so override
-     * hole detection
-     */
-    if (*hole == 0) {
-        sum_recv=0;
-        for (i=0; i<nprocs; i++) {
-	   sum_recv += recv_size[i];
-	   sum_recv += partial_recv[i];
+    if (off != srt_off[0]) /* hole at the front */
+        *hole = 1;
+    else { /* coalesce the sorted offset-length pairs */
+        for (i=1; i<sum; i++) {
+            if (srt_off[i] <= srt_off[0] + srt_len[0]) {
+		int new_len = srt_off[i] + srt_len[i] - srt_off[0];
+		if (new_len > srt_len[0]) srt_len[0] = new_len;
+	    }
+            else
+                break;
         }
-        if (size > sum_recv) *hole = 1;
+        if (i < sum || size != srt_len[0]) /* hole in middle or end */
+            *hole = 1;
     }
 
     ADIOI_Free(srt_off);
@@ -687,6 +710,9 @@
 /* post sends. if buftype_is_contig, data can be directly sent from
    user buf at location given by buf_idx. else use send_buf. */
 
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5032, 0, NULL);
+#endif
     if (buftype_is_contig) {
 	j = 0;
 	for (i=0; i < nprocs; i++) 
@@ -761,6 +787,9 @@
         MPI_Waitall(nprocs_send+nprocs_recv, requests, statuses);
 #endif
 
+#ifdef AGGREGATION_PROFILE
+    MPE_Log_event (5033, 0, NULL);
+#endif
     ADIOI_Free(statuses);
     ADIOI_Free(requests);
     if (!buftype_is_contig && nprocs_send) {
@@ -770,7 +799,6 @@
     }
 }
 
-
 #define ADIOI_BUF_INCR \
 { \
     while (buf_incr) { \
@@ -784,7 +812,7 @@
                 n_buftypes++; \
             } \
             user_buf_idx = flat_buf->indices[flat_buf_idx] + \
-                              n_buftypes*buftype_extent; \
+                              (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
             flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
         } \
         buf_incr -= size_in_buf; \
@@ -796,6 +824,8 @@
 { \
     while (size) { \
         size_in_buf = ADIOI_MIN(size, flat_buf_sz); \
+  ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + user_buf_idx)); \
+  ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \
         memcpy(&(send_buf[p][send_buf_idx[p]]), \
                ((char *) buf) + user_buf_idx, size_in_buf); \
         send_buf_idx[p] += size_in_buf; \
@@ -808,7 +838,7 @@
                 n_buftypes++; \
             } \
             user_buf_idx = flat_buf->indices[flat_buf_idx] + \
-                              n_buftypes*buftype_extent; \
+                              (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
             flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
         } \
         size -= size_in_buf; \
@@ -819,9 +849,11 @@
 
 
 
+
+
 static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
                            *flat_buf, char **send_buf, ADIO_Offset 
-                           *offset_list, int *len_list, int *send_size, 
+                           *offset_list, ADIO_Offset *len_list, int *send_size, 
                            MPI_Request *requests, int *sent_to_proc, 
                            int nprocs, int myrank, 
                            int contig_access_count, 
@@ -833,8 +865,9 @@
 {
 /* this function is only called if buftype is not contig */
 
-    int i, p, flat_buf_idx, size;
-    int flat_buf_sz, buf_incr, size_in_buf, jj, n_buftypes;
+    int i, p, flat_buf_idx;
+    ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size;
+    int jj, n_buftypes;
     ADIO_Offset off, len, rem_len, user_buf_idx;
 
 /*  curr_to_proc[p] = amount of data sent to proc. p that has already
@@ -861,7 +894,7 @@
 
     for (i=0; i<contig_access_count; i++) { 
 	off     = offset_list[i];
-	rem_len = (ADIO_Offset) len_list[i];
+	rem_len = len_list[i];
 
 	/*this request may span the file domains of more than one process*/
 	while (rem_len != 0) {
@@ -881,17 +914,20 @@
 	    if (send_buf_idx[p] < send_size[p]) {
 		if (curr_to_proc[p]+len > done_to_proc[p]) {
 		    if (done_to_proc[p] > curr_to_proc[p]) {
-			size = (int)ADIOI_MIN(curr_to_proc[p] + len - 
+			size = ADIOI_MIN(curr_to_proc[p] + len - 
                                 done_to_proc[p], send_size[p]-send_buf_idx[p]);
 			buf_incr = done_to_proc[p] - curr_to_proc[p];
 			ADIOI_BUF_INCR
-		        buf_incr = (int)(curr_to_proc[p] + len - done_to_proc[p]);
+      ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p]));
+		        buf_incr = curr_to_proc[p] + len - done_to_proc[p];
+      ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size));
 			curr_to_proc[p] = done_to_proc[p] + size;
 		        ADIOI_BUF_COPY
 		    }
 		    else {
-			size = (int)ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
-			buf_incr = (int)len;
+			size = ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
+			buf_incr = len;
+      ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size));
 			curr_to_proc[p] += size;
 			ADIOI_BUF_COPY
 		    }
@@ -902,13 +938,14 @@
 		    }
 		}
 		else {
-		    curr_to_proc[p] += (int)len;
-		    buf_incr = (int)len;
+        ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len));
+		    curr_to_proc[p] += len;
+		    buf_incr = len;
 		    ADIOI_BUF_INCR
 		}
 	    }
 	    else {
-		buf_incr = (int)len;
+		buf_incr = len;
 		ADIOI_BUF_INCR
             }
 	    off     += len;
@@ -921,7 +958,7 @@
 
 
 
-static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, 
+void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count, 
 		      ADIO_Offset *srt_off, int *srt_len, int *start_pos,
 		      int nprocs, int nprocs_recv, int total_elements)
 {
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_write_nolock.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_write_nolock.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_write_nolock.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_write_nolock.c	2010-11-15 15:02:47.000000000 +0100
@@ -27,16 +27,18 @@
 /* offset is in units of etype relative to the filetype. */
 
     ADIOI_Flatlist_node *flat_buf, *flat_file;
-    int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0;
-    int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
-    int n_filetypes, etype_in_filetype;
-    ADIO_Offset abs_off_in_filetype=0;
+    int j, k, err=-1, st_index=0;
+    ADIO_Offset fwr_size=0, bwr_size, new_bwr_size, new_fwr_size, i_offset, num;
+    unsigned bufsize; 
+    int n_etypes_in_filetype;
+    ADIO_Offset n_filetypes, etype_in_filetype, size, sum;
+    ADIO_Offset abs_off_in_filetype=0, size_in_filetype;
     int filetype_size, etype_size, buftype_size;
     MPI_Aint filetype_extent, buftype_extent, indx;
     int buf_count, buftype_is_contig, filetype_is_contig;
     ADIO_Offset off, disp;
-    int flag, new_bwr_size, new_fwr_size, err_flag=0;
-    static char myname[] = "ADIOI_PVFS_WRITESTRIDED";
+    int flag, err_flag=0;
+    static char myname[] = "ADIOI_NOLOCK_WRITESTRIDED";
 #ifdef IO_DEBUG
     int rank,nprocs;
 #endif
@@ -70,6 +72,7 @@
     MPI_Type_extent(datatype, &buftype_extent);
     etype_size = fd->etype_size;
     
+    ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
     bufsize = buftype_size * count;
 
     if (!buftype_is_contig && filetype_is_contig) {
@@ -100,6 +103,7 @@
 	 * is also handled.
 	 */
 	for (j=0; j<count; j++) {
+    int i;
 	    for (i=0; i<flat_buf->count; i++) {
 		if (flat_buf->blocklens[i] > combine_buf_remain && combine_buf != combine_buf_ptr) {
 		    /* there is data in the buffer; write out the buffer so far */
@@ -134,12 +138,14 @@
 				    rank, nprocs, off, 
 				    flat_buf->blocklens[i]);
 #endif
+        ADIOI_Assert(flat_buf->blocklens[i] == (unsigned)flat_buf->blocklens[i]);
+        ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i]) == (ADIO_Offset)((MPIR_Upint)buf + (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i]));
 #ifdef ADIOI_MPE_LOGGING
 		    MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
 #endif
 		    err = write(fd->fd_sys,
-				     ((char *) buf) + j*buftype_extent + flat_buf->indices[i],
-				     flat_buf->blocklens[i]);
+				     ((char *) buf) + (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i],
+				     (unsigned)flat_buf->blocklens[i]);
 #ifdef ADIOI_MPE_LOGGING
 		    MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
 #endif
@@ -206,14 +212,15 @@
             n_filetypes = -1;
             flag = 0;
             while (!flag) {
+                int i;
                 n_filetypes++;
                 for (i=0; i<flat_file->count; i++) {
                     if (disp + flat_file->indices[i] + 
-                        (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] 
+                        n_filetypes*(ADIO_Offset)filetype_extent + flat_file->blocklens[i] 
                             >= offset) {
                         st_index = i;
                         fwr_size = disp + flat_file->indices[i] + 
-                                (ADIO_Offset) n_filetypes*filetype_extent
+                                n_filetypes*(ADIO_Offset)filetype_extent
                                  + flat_file->blocklens[i] - offset;
                         flag = 1;
                         break;
@@ -222,9 +229,10 @@
             }
 	}
 	else {
+            int i;
 	    n_etypes_in_filetype = filetype_size/etype_size;
-	    n_filetypes = (int) (offset / n_etypes_in_filetype);
-	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+	    n_filetypes = offset / n_etypes_in_filetype;
+	    etype_in_filetype = offset % n_etypes_in_filetype;
 	    size_in_filetype = etype_in_filetype * etype_size;
  
 	    sum = 0;
@@ -240,7 +248,7 @@
 	    }
 
 	    /* abs. offset in bytes in the file */
-            offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+            offset = disp + n_filetypes*(ADIO_Offset)filetype_extent + abs_off_in_filetype;
 	}
 
 	if (buftype_is_contig && !filetype_is_contig) {
@@ -248,11 +256,11 @@
 /* contiguous in memory, noncontiguous in file. should be the most
    common case. */
 
-	    i = 0;
+	    i_offset = 0;
 	    j = st_index;
 	    off = offset;
 	    fwr_size = ADIOI_MIN(fwr_size, bufsize);
-	    while (i < bufsize) {
+	    while (i_offset < bufsize) {
                 if (fwr_size) { 
                     /* TYPE_UB and TYPE_LB can result in 
                        fwr_size = 0. save system call in such cases */ 
@@ -271,16 +279,16 @@
 #ifdef ADIOI_MPE_LOGGING
 		    MPE_Log_event(ADIOI_MPE_write_a, 0, NULL);
 #endif
-		    err = write(fd->fd_sys, ((char *) buf) + i, fwr_size);
+		    err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);
 #ifdef ADIOI_MPE_LOGGING
 		    MPE_Log_event(ADIOI_MPE_write_b, 0, NULL);
 #endif
 		    if (err == -1) err_flag = 1;
 		}
-		i += fwr_size;
+		i_offset += fwr_size;
 
                 if (off + fwr_size < disp + flat_file->indices[j] +
-                   flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
+                   flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
                        off += fwr_size;
                 /* did not reach end of contiguous block in filetype.
                    no more I/O needed. off is incremented by fwr_size. */
@@ -291,8 +299,8 @@
 			n_filetypes++;
 		    }
 		    off = disp + flat_file->indices[j] + 
-                                        (ADIO_Offset) n_filetypes*filetype_extent;
-		    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+                                        n_filetypes*(ADIO_Offset)filetype_extent;
+		    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
 		}
 	    }
 	}
@@ -327,6 +335,8 @@
 #ifdef ADIOI_MPE_LOGGING 
 		    MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
 #endif
+                    ADIOI_Assert(size == (size_t) size);
+                    ADIOI_Assert(off == (off_t) off);
 		    err = write(fd->fd_sys, ((char *) buf) + indx, size);
 #ifdef ADIOI_MPE_LOGGING
 		    MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
@@ -346,7 +356,7 @@
                     }
 
                     off = disp + flat_file->indices[j] + 
-                                   (ADIO_Offset) n_filetypes*filetype_extent;
+                                   n_filetypes*(ADIO_Offset)filetype_extent;
 
 		    new_fwr_size = flat_file->blocklens[j];
 		    if (size != bwr_size) {
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_write_str.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_write_str.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_write_str.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_write_str.c	2010-11-15 15:02:47.000000000 +0100
@@ -24,7 +24,7 @@
            } \
         } \
 	writebuf_off = req_off; \
-        writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+        writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
 	if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
 	ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
                  ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
@@ -36,7 +36,8 @@
 	    return; \
 	} \
     } \
-    write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+    write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+    ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\
     memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
     while (write_sz != req_len) { \
         ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
@@ -52,7 +53,7 @@
         req_len -= write_sz; \
         userbuf_off += write_sz; \
         writebuf_off += writebuf_len; \
-        writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+        writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
 	if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
         ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
                   ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
@@ -84,9 +85,10 @@
             return; \
         } \
 	writebuf_off = req_off; \
-        writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+        writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
     } \
-    write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+    write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+    ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\
     memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
     while (write_sz != req_len) { \
         ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
@@ -101,40 +103,41 @@
         req_len -= write_sz; \
         userbuf_off += write_sz; \
         writebuf_off += writebuf_len; \
-        writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+        writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
         write_sz = ADIOI_MIN(req_len, writebuf_len); \
         memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
     } \
 }
-
-
 void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count,
                        MPI_Datatype datatype, int file_ptr_type,
                        ADIO_Offset offset, ADIO_Status *status, int
                        *error_code)
 {
+
 /* offset is in units of etype relative to the filetype. */
 
     ADIOI_Flatlist_node *flat_buf, *flat_file;
-    int i, j, k, bwr_size, fwr_size=0, st_index=0;
-    int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
-    int n_filetypes, etype_in_filetype;
+    ADIO_Offset i_offset, sum, size_in_filetype;
+    int i, j, k, st_index=0;
+    int n_etypes_in_filetype;
+    ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes;
     ADIO_Offset abs_off_in_filetype=0;
-    int filetype_size, etype_size, buftype_size, req_len;
+    int filetype_size, etype_size, buftype_size;
     MPI_Aint filetype_extent, buftype_extent; 
     int buf_count, buftype_is_contig, filetype_is_contig;
     ADIO_Offset userbuf_off;
     ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
     char *writebuf;
-    int flag, st_fwr_size, st_n_filetypes, writebuf_len, write_sz;
+    unsigned bufsize, writebuf_len, max_bufsize, write_sz;
     ADIO_Status status1;
-    int new_bwr_size, new_fwr_size, max_bufsize;
+    ADIO_Offset new_bwr_size, new_fwr_size, st_fwr_size, fwr_size=0, bwr_size, req_len;
     static char myname[] = "ADIOI_GEN_WriteStrided";
 
     if (fd->hints->ds_write == ADIOI_HINT_DISABLE) {
     	/* if user has disabled data sieving on reads, use naive
 	 * approach instead.
 	 */
+
 	ADIOI_GEN_WriteStrided_naive(fd, 
 				    buf,
 				    count,
@@ -146,6 +149,7 @@
     	return;
     }
 
+
     *error_code = MPI_SUCCESS;  /* changed below if error */
 
     ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
@@ -162,6 +166,7 @@
     MPI_Type_extent(datatype, &buftype_extent);
     etype_size = fd->etype_size;
 
+    ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
     bufsize = buftype_size * count;
 
 /* get max_bufsize from the info object. */
@@ -177,26 +182,28 @@
 	while (flat_buf->type != datatype) flat_buf = flat_buf->next;
 
         off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
-                 fd->disp + etype_size * offset;
+                 fd->disp + (ADIO_Offset)etype_size * offset;
 
 	start_off = off;
 	end_offset = off + bufsize - 1;
         writebuf_off = off;
         writebuf = (char *) ADIOI_Malloc(max_bufsize);
-        writebuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-writebuf_off+1));
+        writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-writebuf_off+1));
 
 /* if atomicity is true, lock the region to be accessed */
 	if (fd->atomicity) 
 	    ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
 
         for (j=0; j<count; j++) 
-            for (i=0; i<flat_buf->count; i++) {
-                userbuf_off = j*buftype_extent + flat_buf->indices[i];
-		req_off = off;
-		req_len = flat_buf->blocklens[i];
-		ADIOI_BUFFERED_WRITE_WITHOUT_READ
-                off += flat_buf->blocklens[i];
-            }
+        {
+              for (i=0; i<flat_buf->count; i++) {
+                  userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i];
+      req_off = off;
+      req_len = flat_buf->blocklens[i];
+      ADIOI_BUFFERED_WRITE_WITHOUT_READ
+                  off += flat_buf->blocklens[i];
+              }
+        }
 
         /* write the buffer out finally */
         ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
@@ -220,29 +227,36 @@
 	disp = fd->disp;
 
 	if (file_ptr_type == ADIO_INDIVIDUAL) {
-	    offset = fd->fp_ind; /* in bytes */
-	    n_filetypes = -1;
-	    flag = 0;
-	    while (!flag) {
-                n_filetypes++;
-		for (i=0; i<flat_file->count; i++) {
-		    if (disp + flat_file->indices[i] + 
-                        (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] 
-                            >= offset) {
-			st_index = i;
-			fwr_size = (int) (disp + flat_file->indices[i] + 
-			        (ADIO_Offset) n_filetypes*filetype_extent
-			         + flat_file->blocklens[i] - offset);
-			flag = 1;
-			break;
-		    }
-		}
-	    }
-	}
+	/* Wei-keng reworked type processing to be a bit more efficient */
+            offset       = fd->fp_ind - disp;
+            n_filetypes  = (offset - flat_file->indices[0]) / filetype_extent;
+            offset      -= (ADIO_Offset)n_filetypes * filetype_extent;
+            /* now offset is local to this extent */
+
+            /* find the block where offset is located, skip blocklens[i]==0 */
+            for (i=0; i<flat_file->count; i++) {
+                ADIO_Offset dist;
+                if (flat_file->blocklens[i] == 0) continue;
+                dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
+                /* fwr_size is from offset to the end of block i */
+                if (dist == 0) {
+                    i++;
+                    offset   = flat_file->indices[i];
+                    fwr_size = flat_file->blocklens[i];
+                    break;
+                }
+                if (dist > 0) {
+                    fwr_size = dist;
+                    break;
+                }
+            }
+            st_index = i;  /* starting index in flat_file->indices[] */
+            offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
+        }
 	else {
 	    n_etypes_in_filetype = filetype_size/etype_size;
-	    n_filetypes = (int) (offset / n_etypes_in_filetype);
-	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+	    n_filetypes = offset / n_etypes_in_filetype;
+	    etype_in_filetype = offset % n_etypes_in_filetype;
 	    size_in_filetype = etype_in_filetype * etype_size;
  
 	    sum = 0;
@@ -258,32 +272,65 @@
 	    }
 
 	    /* abs. offset in bytes in the file */
-	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+	    offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + 
+		    abs_off_in_filetype;
 	}
 
 	start_off = offset;
 
+        /* Wei-keng Liao:write request is within single flat_file contig block*/
+	/* this could happen, for example, with subarray types that are
+	 * actually fairly contiguous */
+        if (buftype_is_contig && bufsize <= fwr_size) {
+            ADIO_WriteContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
+                             offset, status, error_code);
+
+	    if (file_ptr_type == ADIO_INDIVIDUAL) {
+                /* update MPI-IO file pointer to point to the first byte 
+		 * that can be accessed in the fileview. */
+                fd->fp_ind = offset + bufsize;
+                if (bufsize == fwr_size) {
+                    do {
+                        st_index++;
+                        if (st_index == flat_file->count) {
+                            st_index = 0;
+                            n_filetypes++;
+                        }
+                    } while (flat_file->blocklens[st_index] == 0);
+                    fd->fp_ind = disp + flat_file->indices[st_index]
+                               + (ADIO_Offset)n_filetypes*filetype_extent;
+                }
+            }
+	    fd->fp_sys_posn = -1;   /* set it to null. */ 
+#ifdef HAVE_STATUS_SET_BYTES
+	    MPIR_Status_set_bytes(status, datatype, bufsize);
+#endif 
+            return;
+        }
+
        /* Calculate end_offset, the last byte-offset that will be accessed.
          e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/
 
 	st_fwr_size = fwr_size;
 	st_n_filetypes = n_filetypes;
-	i = 0;
+	i_offset = 0;
 	j = st_index;
 	off = offset;
 	fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
-	while (i < bufsize) {
-	    i += fwr_size;
+	while (i_offset < bufsize) {
+	    i_offset += fwr_size;
 	    end_offset = off + fwr_size - 1;
 
-	    if (j < (flat_file->count - 1)) j++;
-	    else {
-		j = 0;
-		n_filetypes++;
-	    }
+            j = (j+1) % flat_file->count;
+            n_filetypes += (j == 0) ? 1 : 0;
+            while (flat_file->blocklens[j]==0) {
+                j = (j+1) % flat_file->count;
+                n_filetypes += (j == 0) ? 1 : 0;
+            }
 
-	    off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
-	    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+	    off = disp + flat_file->indices[j] + 
+		    n_filetypes*(ADIO_Offset)filetype_extent;
+	    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
 	}
 
 /* if atomicity is true, lock the region to be accessed */
@@ -300,39 +347,41 @@
 /* contiguous in memory, noncontiguous in file. should be the most
    common case. */
 
-	    i = 0;
+	    i_offset = 0;
 	    j = st_index;
 	    off = offset;
 	    n_filetypes = st_n_filetypes;
 	    fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
-	    while (i < bufsize) {
+	    while (i_offset < bufsize) {
                 if (fwr_size) { 
                     /* TYPE_UB and TYPE_LB can result in 
                        fwr_size = 0. save system call in such cases */ 
 		    /* lseek(fd->fd_sys, off, SEEK_SET);
-		    err = write(fd->fd_sys, ((char *) buf) + i, fwr_size);*/
+		    err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/
 
 		    req_off = off;
 		    req_len = fwr_size;
-		    userbuf_off = i;
+		    userbuf_off = i_offset;
 		    ADIOI_BUFFERED_WRITE
 		}
-		i += fwr_size;
+		i_offset += fwr_size;
 
 		if (off + fwr_size < disp + flat_file->indices[j] +
-	           flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
+	           flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
 		       off += fwr_size;
 		/* did not reach end of contiguous block in filetype.
                    no more I/O needed. off is incremented by fwr_size. */
 		else {
-		    if (j < (flat_file->count - 1)) j++;
-		    else {
-			j = 0;
-			n_filetypes++;
-		    }
+                    j = (j+1) % flat_file->count;
+                    n_filetypes += (j == 0) ? 1 : 0;
+                    while (flat_file->blocklens[j]==0) {
+                        j = (j+1) % flat_file->count;
+                        n_filetypes += (j == 0) ? 1 : 0;
+                    }
 		    off = disp + flat_file->indices[j] + 
-                                        (ADIO_Offset) n_filetypes*filetype_extent;
-		    fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+                                    n_filetypes*(ADIO_Offset)filetype_extent;
+		    fwr_size = ADIOI_MIN(flat_file->blocklens[j], 
+				    bufsize-i_offset);
 		}
 	    }
 	}
@@ -344,7 +393,7 @@
 	    while (flat_buf->type != datatype) flat_buf = flat_buf->next;
 
 	    k = num = buf_count = 0;
-	    i = (int) (flat_buf->indices[0]);
+	    i_offset = flat_buf->indices[0];
 	    j = st_index;
 	    off = offset;
 	    n_filetypes = st_n_filetypes;
@@ -355,11 +404,11 @@
 		size = ADIOI_MIN(fwr_size, bwr_size);
 		if (size) {
 		    /* lseek(fd->fd_sys, off, SEEK_SET);
-		    err = write(fd->fd_sys, ((char *) buf) + i, size); */
+		    err = write(fd->fd_sys, ((char *) buf) + i_offset, size); */
 
 		    req_off = off;
 		    req_len = size;
-		    userbuf_off = i;
+		    userbuf_off = i_offset;
 		    ADIOI_BUFFERED_WRITE
 		}
 
@@ -368,18 +417,19 @@
 
 		if (size == fwr_size) {
 /* reached end of contiguous block in file */
-		    if (j < (flat_file->count - 1)) j++;
-		    else {
-			j = 0;
-			n_filetypes++;
+ 		    j = (j+1) % flat_file->count;
+ 		    n_filetypes += (j == 0) ? 1 : 0;
+ 		    while (flat_file->blocklens[j]==0) {
+ 			j = (j+1) % flat_file->count;
+ 			n_filetypes += (j == 0) ? 1 : 0;
 		    }
 
 		    off = disp + flat_file->indices[j] + 
-                                              (ADIO_Offset) n_filetypes*filetype_extent;
+                                      n_filetypes*(ADIO_Offset)filetype_extent;
 
 		    new_fwr_size = flat_file->blocklens[j];
 		    if (size != bwr_size) {
-			i += size;
+			i_offset += size;
 			new_bwr_size -= size;
 		    }
 		}
@@ -389,8 +439,8 @@
 
 		    k = (k + 1)%flat_buf->count;
 		    buf_count++;
-		    i = (int) (buftype_extent*(buf_count/flat_buf->count) +
-			flat_buf->indices[k]); 
+		    i_offset = (ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) +
+			flat_buf->indices[k]; 
 		    new_bwr_size = flat_buf->blocklens[k];
 		    if (size != fwr_size) {
 			off += size;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_write_str_naive.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_write_str_naive.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/ad_write_str_naive.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/ad_write_str_naive.c	2010-11-15 15:02:47.000000000 +0100
@@ -17,11 +17,13 @@
 
     ADIOI_Flatlist_node *flat_buf, *flat_file;
     /* bwr == buffer write; fwr == file write */
-    int bwr_size, fwr_size=0, b_index;
-    int bufsize, size, sum, n_etypes_in_filetype, size_in_filetype;
-    int n_filetypes, etype_in_filetype;
-    ADIO_Offset abs_off_in_filetype=0;
-    int filetype_size, etype_size, buftype_size, req_len;
+    ADIO_Offset bwr_size, fwr_size=0, sum, size_in_filetype; 
+    int b_index;
+    unsigned bufsize; 
+    int n_etypes_in_filetype;
+    ADIO_Offset size, n_filetypes, etype_in_filetype;
+    ADIO_Offset abs_off_in_filetype=0, req_len;
+    int filetype_size, etype_size, buftype_size;
     MPI_Aint filetype_extent, buftype_extent; 
     int buf_count, buftype_is_contig, filetype_is_contig;
     ADIO_Offset userbuf_off;
@@ -44,6 +46,7 @@
     MPI_Type_extent(buftype, &buftype_extent);
     etype_size = fd->etype_size;
 
+    ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
     bufsize = buftype_size * count;
 
     /* contiguous in buftype and filetype is handled elsewhere */
@@ -57,14 +60,13 @@
 	while (flat_buf->type != buftype) flat_buf = flat_buf->next;
 
         off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : 
-              fd->disp + etype_size * offset;
+              fd->disp + (ADIO_Offset)etype_size * offset;
 
 	start_off = off;
 	end_offset = off + bufsize - 1;
 
 	/* if atomicity is true, lock (exclusive) the region to be accessed */
-        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && 
-	   (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2))
+        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
 	{
             ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
 	}
@@ -74,14 +76,16 @@
 	 */
         for (b_count=0; b_count < count; b_count++) {
             for (b_index=0; b_index < flat_buf->count; b_index++) {
-                userbuf_off = b_count*buftype_extent + 
+                userbuf_off = (ADIO_Offset)b_count*(ADIO_Offset)buftype_extent + 
 		              flat_buf->indices[b_index];
 		req_off = off;
 		req_len = flat_buf->blocklens[b_index];
 
+    ADIOI_Assert(req_len == (int) req_len);
+    ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off));
 		ADIO_WriteContig(fd, 
 				(char *) buf + userbuf_off,
-				req_len, 
+				(int)req_len, 
 				MPI_BYTE, 
 		    		ADIO_EXPLICIT_OFFSET,
 				req_off,
@@ -94,8 +98,7 @@
             }
 	}
 
-        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && 
-	   (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2))
+        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
 	{
             ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
 	}
@@ -105,7 +108,8 @@
     }
 
     else {  /* noncontiguous in file */
-    	int f_index, st_fwr_size, st_index = 0, st_n_filetypes;
+    	int f_index, st_index = 0;
+      ADIO_Offset st_fwr_size, st_n_filetypes;
 	int flag;
 
         /* First we're going to calculate a set of values for use in all
@@ -135,15 +139,15 @@
                 n_filetypes++;
 		for (f_index=0; f_index < flat_file->count; f_index++) {
 		    if (disp + flat_file->indices[f_index] + 
-                       (ADIO_Offset) n_filetypes*filetype_extent + 
+                       n_filetypes*(ADIO_Offset)filetype_extent + 
 		       flat_file->blocklens[f_index] >= start_off) 
 		    {
 		    	/* this block contains our starting position */
 
 			st_index = f_index;
-			fwr_size = (int) (disp + flat_file->indices[f_index] + 
-		 	           (ADIO_Offset) n_filetypes*filetype_extent + 
-				   flat_file->blocklens[f_index] - start_off);
+			fwr_size = disp + flat_file->indices[f_index] + 
+		 	           n_filetypes*(ADIO_Offset)filetype_extent + 
+				   flat_file->blocklens[f_index] - start_off;
 			flag = 1;
 			break;
 		    }
@@ -152,8 +156,8 @@
 	}
 	else {
 	    n_etypes_in_filetype = filetype_size/etype_size;
-	    n_filetypes = (int) (offset / n_etypes_in_filetype);
-	    etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+	    n_filetypes = offset / n_etypes_in_filetype;
+	    etype_in_filetype = offset % n_etypes_in_filetype;
 	    size_in_filetype = etype_in_filetype * etype_size;
  
 	    sum = 0;
@@ -170,7 +174,7 @@
 	    }
 
 	    /* abs. offset in bytes in the file */
-	    start_off = disp + (ADIO_Offset) n_filetypes*filetype_extent + 
+	    start_off = disp + n_filetypes*(ADIO_Offset)filetype_extent + 
 	    	        abs_off_in_filetype;
 	}
 
@@ -199,9 +203,9 @@
 	    }
 
 	    off = disp + flat_file->indices[f_index] + 
-	          (ADIO_Offset) n_filetypes*filetype_extent;
+	          n_filetypes*(ADIO_Offset)filetype_extent;
 	    fwr_size = ADIOI_MIN(flat_file->blocklens[f_index], 
-	                         bufsize-(int)userbuf_off);
+	                         bufsize-(unsigned)userbuf_off);
 	}
 
 	/* End of calculations.  At this point the following values have
@@ -214,8 +218,7 @@
 	 */
 
 	/* if atomicity is true, lock (exclusive) the region to be accessed */
-        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && 
-	   (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2))
+        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
 	{
             ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
 	}
@@ -239,9 +242,11 @@
 		    req_off = off;
 		    req_len = fwr_size;
 
+        ADIOI_Assert(req_len == (int) req_len);
+        ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off));
 		    ADIO_WriteContig(fd, 
 				    (char *) buf + userbuf_off,
-				    req_len, 
+				    (int)req_len, 
 				    MPI_BYTE, 
 				    ADIO_EXPLICIT_OFFSET,
 				    req_off,
@@ -253,7 +258,7 @@
 
                 if (off + fwr_size < disp + flat_file->indices[f_index] +
                    flat_file->blocklens[f_index] + 
-		   (ADIO_Offset) n_filetypes*filetype_extent)
+		   n_filetypes*(ADIO_Offset)filetype_extent)
 		{
 		    /* important that this value be correct, as it is
 		     * used to set the offset in the fd near the end of
@@ -271,14 +276,14 @@
 			n_filetypes++;
 		    }
 		    off = disp + flat_file->indices[f_index] + 
-                          (ADIO_Offset) n_filetypes*filetype_extent;
+                          n_filetypes*(ADIO_Offset)filetype_extent;
 		    fwr_size = ADIOI_MIN(flat_file->blocklens[f_index], 
-		                         bufsize-(int)userbuf_off);
+		                         bufsize-(unsigned)userbuf_off);
 		}
 	    }
 	}
 	else {
-	    int i, tmp_bufsize = 0;
+	    ADIO_Offset i_offset, tmp_bufsize = 0;
 	    /* noncontiguous in memory as well as in file */
 
 	    ADIOI_Flatten_datatype(buftype);
@@ -286,7 +291,7 @@
 	    while (flat_buf->type != buftype) flat_buf = flat_buf->next;
 
 	    b_index = buf_count = 0;
-	    i = (int) (flat_buf->indices[0]);
+	    i_offset = flat_buf->indices[0];
 	    f_index = st_index;
 	    off = start_off;
 	    n_filetypes = st_n_filetypes;
@@ -295,17 +300,19 @@
 
 	    /* while we haven't read size * count bytes, keep going */
 	    while (tmp_bufsize < bufsize) {
-    		int new_bwr_size = bwr_size, new_fwr_size = fwr_size;
+    		ADIO_Offset new_bwr_size = bwr_size, new_fwr_size = fwr_size;
 
 		size = ADIOI_MIN(fwr_size, bwr_size);
 		if (size) {
 		    req_off = off;
 		    req_len = size;
-		    userbuf_off = i;
+		    userbuf_off = i_offset;
 
+        ADIOI_Assert(req_len == (int) req_len);
+        ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off));
 		    ADIO_WriteContig(fd, 
 				    (char *) buf + userbuf_off,
-				    req_len, 
+				    (int)req_len, 
 				    MPI_BYTE, 
 				    ADIO_EXPLICIT_OFFSET,
 				    req_off,
@@ -323,11 +330,11 @@
 		    }
 
 		    off = disp + flat_file->indices[f_index] + 
-                          (ADIO_Offset) n_filetypes*filetype_extent;
+                          n_filetypes*(ADIO_Offset)filetype_extent;
 
 		    new_fwr_size = flat_file->blocklens[f_index];
 		    if (size != bwr_size) {
-			i += size;
+			i_offset += size;
 			new_bwr_size -= size;
 		    }
 		}
@@ -337,8 +344,8 @@
 
 		    b_index = (b_index + 1)%flat_buf->count;
 		    buf_count++;
-		    i = (int) (buftype_extent*(buf_count/flat_buf->count) +
-			flat_buf->indices[b_index]);
+		    i_offset = (ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) +
+			flat_buf->indices[b_index];
 		    new_bwr_size = flat_buf->blocklens[b_index];
 		    if (size != fwr_size) {
 			off += size;
@@ -352,8 +359,7 @@
 	}
 
 	/* unlock the file region if we locked it */
-        if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && 
-	   (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2))
+        if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
 	{
             ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
 	}
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/byte_offset.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/byte_offset.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/byte_offset.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/byte_offset.c	2010-11-15 15:02:47.000000000 +0100
@@ -14,10 +14,10 @@
 void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset, ADIO_Offset *disp)
 {
     ADIOI_Flatlist_node *flat_file;
-    int i, sum, n_etypes_in_filetype, size_in_filetype;
-    int n_filetypes, etype_in_filetype;
-    ADIO_Offset abs_off_in_filetype=0;
-    int filetype_size, etype_size, filetype_is_contig;
+    int i;
+    ADIO_Offset n_filetypes, etype_in_filetype, sum, abs_off_in_filetype=0, size_in_filetype;
+    unsigned n_etypes_in_filetype, filetype_size, etype_size;
+    int filetype_is_contig;
     MPI_Aint filetype_extent;
 
     ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
@@ -29,10 +29,10 @@
         flat_file = ADIOI_Flatlist;
         while (flat_file->type != fd->filetype) flat_file = flat_file->next;
 
-	MPI_Type_size(fd->filetype, &filetype_size);
+	MPI_Type_size(fd->filetype, (int*)&filetype_size);
 	n_etypes_in_filetype = filetype_size/etype_size;
-	n_filetypes = (int) (offset / n_etypes_in_filetype);
-	etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+	n_filetypes = offset / n_etypes_in_filetype;
+	etype_in_filetype = offset % n_etypes_in_filetype;
 	size_in_filetype = etype_in_filetype * etype_size;
  
 	sum = 0;
@@ -47,6 +47,6 @@
 
 	/* abs. offset in bytes in the file */
 	MPI_Type_extent(fd->filetype, &filetype_extent);
-	*disp = fd->disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+	*disp = fd->disp + n_filetypes * ADIOI_AINT_CAST_TO_OFFSET filetype_extent + abs_off_in_filetype;
     }
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/cb_config_list.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/cb_config_list.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/cb_config_list.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/cb_config_list.c	2010-11-15 15:02:47.000000000 +0100
@@ -83,7 +83,7 @@
      * FS-INDEP. */
     value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
     ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", fd->hints->cb_nodes);
-    MPI_Info_set(fd->info, "cb_nodes", value);
+    ADIOI_Info_set(fd->info, "cb_nodes", value);
     ADIOI_Free(value);
 
     return 0;
@@ -115,10 +115,19 @@
 	MPI_Keyval_create((MPI_Copy_function *) ADIOI_cb_copy_name_array, 
 			  (MPI_Delete_function *) ADIOI_cb_delete_name_array,
 			  &cb_config_list_keyval, NULL);
+	/* Need a hook so we can cleanup in Finalize */
+	MPI_Attr_put(MPI_COMM_SELF, cb_config_list_keyval, NULL);
     }
     else {
 	MPI_Attr_get(comm, cb_config_list_keyval, (void *) &array, &found);
-	if (found) {
+	/* see above: we put a cb_config_list_keyval with NULL array on
+	 * COMM_SELF so we can clean it up on exit.  So it's not enough
+	 * to find the keyval. we also need a non-null array (every mpi
+	 * program will have at least one element in the array --
+	 * itself.  Not doing this confuses the shared file ponters
+	 * routines.  I know it is ugly but I can't figure out a better
+	 * way... if we find the e*/
+	if (found && (array != NULL)) {
 	    *arrayp = array;
 	    return 0;
 	}
@@ -362,7 +371,7 @@
 /* ADIOI_cb_copy_name_array() - attribute copy routine
  */
 int ADIOI_cb_copy_name_array(MPI_Comm comm, 
-		       int *keyval, 
+		       int keyval, 
 		       void *extra, 
 		       void *attr_in,
 		       void **attr_out, 
@@ -371,11 +380,11 @@
     ADIO_cb_name_array array;
 
     ADIOI_UNREFERENCED_ARG(comm);
-    ADIOI_UNREFERENCED_ARG(keyval);
+    ADIOI_UNREFERENCED_ARG(keyval); 
     ADIOI_UNREFERENCED_ARG(extra);
 
     array = (ADIO_cb_name_array) attr_in;
-    array->refct++;
+    if (array != NULL) array->refct++;
 
     *attr_out = attr_in;
     *flag = 1; /* make a copy in the new communicator */
@@ -386,17 +395,18 @@
 /* ADIOI_cb_delete_name_array() - attribute destructor
  */
 int ADIOI_cb_delete_name_array(MPI_Comm comm, 
-			 int *keyval, 
+			 int keyval, 
 			 void *attr_val, 
 			 void *extra)
 {
     ADIO_cb_name_array array;
 
     ADIOI_UNREFERENCED_ARG(comm);
-    ADIOI_UNREFERENCED_ARG(keyval);
     ADIOI_UNREFERENCED_ARG(extra);
 
     array = (ADIO_cb_name_array) attr_val;
+    if (array == NULL)
+	    goto fn_exit;
     array->refct--;
 
     if (array->refct <= 0) {
@@ -411,7 +421,8 @@
 	if (array->names != NULL) ADIOI_Free(array->names);
 	ADIOI_Free(array);
     }
-
+fn_exit:
+    MPI_Keyval_free(&keyval);
     return MPI_SUCCESS;
 }
 
@@ -679,19 +690,32 @@
  *
  * Returns a token of types defined at top of this file.
  */
+#ifdef ROMIO_BGL
+/* On BlueGene, the ',' character shows up in get_processor_name, so we have to
+ * use a different delimiter */
+#define COLON ':'
+#define COMMA ';'
+#define DELIMS ":;"
+#else 
+/* these tokens work for every other platform */
+#define COLON ':'
+#define COMMA ','
+#define DELIMS ":,"
+#endif
+
 static int cb_config_list_lex(void)
 {
     int slen;
 
     if (*token_ptr == '\0') return AGG_EOS;
 
-    slen = (int)strcspn(token_ptr, ":,");
+    slen = (int)strcspn(token_ptr, DELIMS);
 
-    if (*token_ptr == ':') {
+    if (*token_ptr == COLON) {
 	token_ptr++;
 	return AGG_COLON;
     }
-    if (*token_ptr == ',') {
+    if (*token_ptr == COMMA) {
 	token_ptr++;
 	return AGG_COMMA;
     }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/eof_offset.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/eof_offset.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/eof_offset.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/eof_offset.c	2010-11-15 15:02:47.000000000 +0100
@@ -13,9 +13,10 @@
 
 void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset)
 {
-    int error_code, filetype_is_contig, etype_size, filetype_size;
-    ADIO_Offset fsize, disp, sum=0, size_in_file;
-    int n_filetypes, flag, i, rem;
+    unsigned filetype_size;
+    int error_code, filetype_is_contig, etype_size;
+    ADIO_Offset fsize, disp, sum=0, size_in_file, n_filetypes, rem;
+    int flag, i;
     ADIO_Fcntl_t *fcntl_struct;
     MPI_Aint filetype_extent;
     ADIOI_Flatlist_node *flat_file;
@@ -43,7 +44,7 @@
 	while (flat_file->type != fd->filetype) 
 	    flat_file = flat_file->next;
 	
-	MPI_Type_size(fd->filetype, &filetype_size);
+	MPI_Type_size(fd->filetype, (int*)&filetype_size);
 	MPI_Type_extent(fd->filetype, &filetype_extent);
 
 	disp = fd->disp;
@@ -55,14 +56,14 @@
 	    for (i=0; i<flat_file->count; i++) {
 		sum += flat_file->blocklens[i];
 		if (disp + flat_file->indices[i] + 
-		    (ADIO_Offset) n_filetypes*filetype_extent + 
+		    n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent + 
 		       flat_file->blocklens[i] >= fsize) {
 		    if (disp + flat_file->indices[i] + 
-			   (ADIO_Offset) n_filetypes*filetype_extent >= fsize)
+			   n_filetypes * ADIOI_AINT_CAST_TO_OFFSET filetype_extent >= fsize)
 			sum -= flat_file->blocklens[i];
 		    else {
-			rem = (int) (disp + flat_file->indices[i] + 
-				(ADIO_Offset) n_filetypes*filetype_extent
+			rem = (disp + flat_file->indices[i] + 
+				n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent
 				+ flat_file->blocklens[i] - fsize);
 			sum -= rem;
 		    }
@@ -71,7 +72,7 @@
 		}
 	    }
 	}
-	size_in_file = (ADIO_Offset) n_filetypes*filetype_size + sum;
+	size_in_file = n_filetypes*(ADIO_Offset)filetype_size + sum;
 	*eof_offset = (size_in_file+etype_size-1)/etype_size; /* ceiling division */
     }
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/flatten.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/flatten.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/flatten.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/flatten.c	2010-11-15 15:03:31.000000000 +0100
@@ -1,8 +1,5 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- 
- *  vim: ts=8 sts=4 sw=4 noexpandtab 
- *
- *   $Id: flatten.c,v 1.24 2006/07/05 20:40:13 robl Exp $
- *
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
  *   Copyright (C) 1997 University of Chicago. 
  *   See COPYRIGHT notice in top-level directory.
  */
@@ -12,14 +9,15 @@
 /* #ifdef MPISGI
 #include "mpisgi2.h"
 #endif */
+#ifdef ROMIO_INSIDE_MPICH2
+#include "mpid_datatype.h"
+#endif
 
-void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type);
-void ADIOI_Flatten_copy_type(ADIOI_Flatlist_node *flat,
-			     int old_type_start,
-			     int old_type_end,
-			     int new_type_start,
-			     ADIO_Offset offset_adjustment);
+#ifdef USE_DBG_LOGGING
+  #define FLATTEN_DEBUG 1
+#endif
 
+void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type);
 /* flatten datatype and add it to Flatlist */
 void ADIOI_Flatten_datatype(MPI_Datatype datatype)
 {
@@ -29,16 +27,25 @@
     int curr_index=0, is_contig;
     ADIOI_Flatlist_node *flat, *prev=0;
 
+#ifdef ROMIO_INSIDE_MPICH2
+  if(MPIU_DBG_SELECTED(DATATYPE,TYPICAL)) MPIDU_Datatype_debug(datatype, 4); /* use -env MPICH_DBG_OUTPUT=stdout */
+#endif
     /* check if necessary to flatten. */
  
     /* is it entirely contiguous? */
     ADIOI_Datatype_iscontig(datatype, &is_contig);
+  #ifdef FLATTEN_DEBUG 
+  DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: is_contig %#X\n",is_contig);
+  #endif
     if (is_contig) return;
 
     /* has it already been flattened? */
     flat = ADIOI_Flatlist;
     while (flat) {
 	if (flat->type == datatype) {
+      #ifdef FLATTEN_DEBUG 
+      DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: found datatype %#X\n", datatype);
+      #endif
 		return;
 	}
 	else {
@@ -58,39 +65,42 @@
     flat->indices = NULL;
 
     flat->count = ADIOI_Count_contiguous_blocks(datatype, &curr_index);
-#if 0
-    FPRINTF(stderr, "cur_idx = %d\n", curr_index);
+#ifdef FLATTEN_DEBUG 
+    DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: count %#X, cur_idx = %#X\n",flat->count,curr_index);
 #endif
-/*    FPRINTF(stderr, "%d\n", flat->count);*/
+/*    DBG_FPRINTF(stderr, "%d\n", flat->count);*/
 
     if (flat->count) {
-	flat->blocklens = (int *) ADIOI_Malloc(flat->count * sizeof(int));
-	flat->indices = (ADIO_Offset *) ADIOI_Malloc(flat->count * \
-						  sizeof(ADIO_Offset));
+	flat->blocklens = (ADIO_Offset *) ADIOI_Malloc(flat->count * sizeof(ADIO_Offset));
+	flat->indices = (ADIO_Offset *) ADIOI_Malloc(flat->count * sizeof(ADIO_Offset));
     }
 	
     curr_index = 0;
 #ifdef HAVE_MPIR_TYPE_FLATTEN
     flatten_idx = (MPI_Aint) flat->count;
     MPIR_Type_flatten(datatype, flat->indices, flat->blocklens, &flatten_idx);
+  #ifdef FLATTEN_DEBUG 
+  DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: MPIR_Type_flatten\n");
+  #endif
 #else
     ADIOI_Flatten(datatype, flat, 0, &curr_index);
+  #ifdef FLATTEN_DEBUG 
+  DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: ADIOI_Flatten\n");
+  #endif
 
     ADIOI_Optimize_flattened(flat);
 #endif
 /* debug */
-#if 0
+#ifdef FLATTEN_DEBUG
     {
 	int i;
-	FPRINTF(stderr, "blens: ");
-	for (i=0; i<flat->count; i++) 
-	    FPRINTF(stderr, "%d ", flat->blocklens[i]);
-	FPRINTF(stderr, "\n\n");
-	FPRINTF(stderr, "indices: ");
 	for (i=0; i<flat->count; i++) 
-	    FPRINTF(stderr, "%ld ", (long) flat->indices[i]);
-	FPRINTF(stderr, "\n\n");
-    }
+      DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: i %#X, blocklens %#llX, indices %#llX\n",
+              i,
+              flat->blocklens[i],
+              flat->indices[i]
+             );
+  }
 #endif
 
 }
@@ -103,22 +113,46 @@
 		  ADIO_Offset st_offset, int *curr_index)  
 {
     int i, j, k, m, n, num, basic_num, prev_index;
-    int top_count, combiner, old_combiner, old_is_contig;
-    int old_size, nints, nadds, ntypes, old_nints, old_nadds, old_ntypes;
-    MPI_Aint old_extent;
+    int combiner, old_combiner, old_is_contig;
+    int nints, nadds, ntypes, old_nints, old_nadds, old_ntypes;
+    /* By using ADIO_Offset we preserve +/- sign and 
+         avoid >2G integer arithmetic problems */
+    ADIO_Offset top_count;
+    /* By using unsigned we avoid >2G integer arithmetic problems */
+    unsigned old_size;
+    MPI_Aint old_extent;/* Assume extents are non-negative */
     int *ints;
-    MPI_Aint *adds;
+    MPI_Aint *adds; /* Make no assumptions about +/- sign on these */
     MPI_Datatype *types;
-
     MPI_Type_get_envelope(datatype, &nints, &nadds, &ntypes, &combiner);
     ints = (int *) ADIOI_Malloc((nints+1)*sizeof(int));
     adds = (MPI_Aint *) ADIOI_Malloc((nadds+1)*sizeof(MPI_Aint));
     types = (MPI_Datatype *) ADIOI_Malloc((ntypes+1)*sizeof(MPI_Datatype));
     MPI_Type_get_contents(datatype, nints, nadds, ntypes, ints, adds, types);
 
+  #ifdef FLATTEN_DEBUG 
+  DBG_FPRINTF(stderr,"ADIOI_Flatten:: st_offset %#llX, curr_index %#X\n",st_offset,*curr_index);
+  DBG_FPRINTF(stderr,"ADIOI_Flatten:: nints %#X, nadds %#X, ntypes %#X\n",nints, nadds, ntypes);
+  for(i=0; i< nints; ++i)
+  {
+    DBG_FPRINTF(stderr,"ADIOI_Flatten:: ints[%d]=%#X\n",i,ints[i]);
+  }
+  for(i=0; i< nadds; ++i)
+  {
+    DBG_FPRINTF(stderr,"ADIOI_Flatten:: adds[%d]="MPI_AINT_FMT_HEX_SPEC"\n",i,adds[i]);
+  }
+  for(i=0; i< ntypes; ++i)
+  {
+    DBG_FPRINTF(stderr,"ADIOI_Flatten:: types[%d]=%#llX\n",i,(unsigned long long)(unsigned long)types[i]);
+  }
+  if(MPIU_DBG_SELECTED(DATATYPE,TYPICAL)) MPIDU_Datatype_debug(datatype, 4); /* use -env MPICH_DBG_OUTPUT=stdout */
+  #endif
     switch (combiner) {
 #ifdef MPIIMPL_HAVE_MPI_COMBINER_DUP
     case MPI_COMBINER_DUP:
+    #ifdef FLATTEN_DEBUG 
+    DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DUP\n");
+    #endif
         MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 			      &old_ntypes, &old_combiner); 
         ADIOI_Datatype_iscontig(types[0], &old_is_contig);
@@ -131,6 +165,9 @@
         {
 	    int dims = ints[0];
 	    MPI_Datatype stype;
+      #ifdef FLATTEN_DEBUG 
+      DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_SUBARRAY\n");
+      #endif
 
 	    ADIO_Type_create_subarray(dims,
 				      &ints[1],        /* sizes */
@@ -149,6 +186,9 @@
 	{
 	    int dims = ints[2];
 	    MPI_Datatype dtype;
+      #ifdef FLATTEN_DEBUG 
+      DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY\n");
+      #endif
 
 	    ADIO_Type_create_darray(ints[0],         /* size */
 				    ints[1],         /* rank */
@@ -160,12 +200,23 @@
 				    ints[4*dims+3],  /* order */
 				    types[0],
 				    &dtype);
+      #ifdef FLATTEN_DEBUG 
+      DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY <ADIOI_Flatten(dtype, flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#X);\n",
+              0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index);
+      #endif
 	    ADIOI_Flatten(dtype, flat, st_offset, curr_index);
+      #ifdef FLATTEN_DEBUG 
+      DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY >ADIOI_Flatten(dtype, flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#X);\n",
+              0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index);
+      #endif
 	    MPI_Type_free(&dtype);
 	}
 	break;
 #endif
     case MPI_COMBINER_CONTIGUOUS:
+    #ifdef FLATTEN_DEBUG 
+    DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_CONTIGUOUS\n");
+    #endif
 	top_count = ints[0];
         MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 			      &old_ntypes, &old_combiner); 
@@ -179,8 +230,11 @@
 /* simplest case, made up of basic or contiguous types */
 	    j = *curr_index;
 	    flat->indices[j] = st_offset;
-	    MPI_Type_size(types[0], &old_size);
+	    MPI_Type_size(types[0], (int*)&old_size);
 	    flat->blocklens[j] = top_count * old_size;
+      #ifdef FLATTEN_DEBUG 
+      DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",j, flat->indices[j], j, flat->blocklens[j]);
+      #endif
 	    (*curr_index)++;
 	}
 	else {
@@ -192,8 +246,11 @@
 	    MPI_Type_extent(types[0], &old_extent);
 	    for (m=1; m<top_count; m++) {
 		for (i=0; i<num; i++) {
-		    flat->indices[j] = flat->indices[j-num] + old_extent;
+		    flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
 		    flat->blocklens[j] = flat->blocklens[j-num];
+          #ifdef FLATTEN_DEBUG 
+          DBG_FPRINTF(stderr,"ADIOI_Flatten:: derived flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",j, flat->indices[j], j, flat->blocklens[j]);
+          #endif
 		    j++;
 		}
 	    }
@@ -202,6 +259,9 @@
 	break;
 
     case MPI_COMBINER_VECTOR: 
+    #ifdef FLATTEN_DEBUG 
+    DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_VECTOR\n");
+    #endif
 	top_count = ints[0];
         MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 			      &old_ntypes, &old_combiner); 
@@ -213,19 +273,24 @@
 
 	if (prev_index == *curr_index) {
 /* simplest case, vector of basic or contiguous types */
+    /* By using ADIO_Offset we preserve +/- sign and 
+         avoid >2G integer arithmetic problems */
+    ADIO_Offset blocklength = ints[1], stride = ints[2];
 	    j = *curr_index;
 	    flat->indices[j] = st_offset;
-	    MPI_Type_size(types[0], &old_size);
-	    flat->blocklens[j] = ints[1] * old_size;
+	    MPI_Type_size(types[0], (int*)&old_size);
+	    flat->blocklens[j] = blocklength * old_size;
 	    for (i=j+1; i<j+top_count; i++) {
-		flat->indices[i] = flat->indices[i-1] + 
-		    (unsigned) ints[2] * (unsigned) old_size;
+		flat->indices[i] = flat->indices[i-1] + stride * old_size;
 		flat->blocklens[i] = flat->blocklens[j];
 	    }
 	    *curr_index = i;
 	}
 	else {
 /* vector of noncontiguous derived types */
+    /* By using ADIO_Offset we preserve +/- sign and 
+         avoid >2G integer arithmetic problems */
+    ADIO_Offset blocklength = ints[1], stride = ints[2];
 
 	    j = *curr_index;
 	    num = *curr_index - prev_index;
@@ -233,9 +298,9 @@
 /* The noncontiguous types have to be replicated blocklen times
    and then strided. Replicate the first one. */
 	    MPI_Type_extent(types[0], &old_extent);
-	    for (m=1; m<ints[1]; m++) {
+	    for (m=1; m<blocklength; m++) {
 		for (i=0; i<num; i++) {
-		    flat->indices[j] = flat->indices[j-num] + old_extent;
+		    flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
 		    flat->blocklens[j] = flat->blocklens[j-num];
 		    j++;
 		}
@@ -246,8 +311,7 @@
 	    num = *curr_index - prev_index;
 	    for (i=1; i<top_count; i++) {
  		for (m=0; m<num; m++) {
-		   flat->indices[j] =  flat->indices[j-num] + ints[2]
-		       *old_extent;
+		   flat->indices[j] =  flat->indices[j-num] + stride * ADIOI_AINT_CAST_TO_OFFSET old_extent;
 		   flat->blocklens[j] = flat->blocklens[j-num];
 		   j++;
 		}
@@ -258,6 +322,9 @@
 
     case MPI_COMBINER_HVECTOR: 
     case MPI_COMBINER_HVECTOR_INTEGER: 
+    #ifdef FLATTEN_DEBUG 
+    DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_HVECTOR_INTEGER\n");
+    #endif
 	top_count = ints[0];
         MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 			      &old_ntypes, &old_combiner); 
@@ -269,10 +336,13 @@
 
 	if (prev_index == *curr_index) {
 /* simplest case, vector of basic or contiguous types */
+    /* By using ADIO_Offset we preserve +/- sign and 
+         avoid >2G integer arithmetic problems */
+    ADIO_Offset blocklength = ints[1];
 	    j = *curr_index;
 	    flat->indices[j] = st_offset;
-	    MPI_Type_size(types[0], &old_size);
-	    flat->blocklens[j] = ints[1] * old_size;
+	    MPI_Type_size(types[0], (int*)&old_size);
+	    flat->blocklens[j] = blocklength * old_size;
 	    for (i=j+1; i<j+top_count; i++) {
 		flat->indices[i] = flat->indices[i-1] + adds[0];
 		flat->blocklens[i] = flat->blocklens[j];
@@ -281,6 +351,9 @@
 	}
 	else {
 /* vector of noncontiguous derived types */
+    /* By using ADIO_Offset we preserve +/- sign and 
+         avoid >2G integer arithmetic problems */
+    ADIO_Offset blocklength = ints[1];
 
 	    j = *curr_index;
 	    num = *curr_index - prev_index;
@@ -288,9 +361,9 @@
 /* The noncontiguous types have to be replicated blocklen times
    and then strided. Replicate the first one. */
 	    MPI_Type_extent(types[0], &old_extent);
-	    for (m=1; m<ints[1]; m++) {
+	    for (m=1; m<blocklength; m++) {
 		for (i=0; i<num; i++) {
-		    flat->indices[j] = flat->indices[j-num] + old_extent;
+		    flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
 		    flat->blocklens[j] = flat->blocklens[j-num];
 		    j++;
 		}
@@ -311,6 +384,9 @@
 	break;
 
     case MPI_COMBINER_INDEXED: 
+    #ifdef FLATTEN_DEBUG 
+    DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_INDEXED\n");
+    #endif
 	top_count = ints[0];
         MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 			      &old_ntypes, &old_combiner); 
@@ -319,15 +395,23 @@
 
 	prev_index = *curr_index;
 	if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
-	    ADIOI_Flatten(types[0], flat,
-			 st_offset+ints[top_count+1]*old_extent, curr_index);
+  {
+    /* By using ADIO_Offset we preserve +/- sign and 
+         avoid >2G integer arithmetic problems */
+    ADIO_Offset stride = ints[top_count+1];
+        ADIOI_Flatten(types[0], flat,
+         st_offset+stride* ADIOI_AINT_CAST_TO_OFFSET old_extent, curr_index);
+  }
 
 	if (prev_index == *curr_index) {
 /* simplest case, indexed type made up of basic or contiguous types */
 	    j = *curr_index;
 	    for (i=j; i<j+top_count; i++) {
-		flat->indices[i] = st_offset + ints[top_count+1+i-j]*old_extent;
-		flat->blocklens[i] = (int) (ints[1+i-j]*old_extent);
+    /* By using ADIO_Offset we preserve +/- sign and 
+         avoid >2G integer arithmetic problems */
+    ADIO_Offset blocklength = ints[1+i-j], stride = ints[top_count+1+i-j];
+		flat->indices[i] = st_offset + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
+		flat->blocklens[i] = blocklength* ADIOI_AINT_CAST_TO_OFFSET old_extent;
 	    }
 	    *curr_index = i;
 	}
@@ -342,7 +426,7 @@
    and then strided. Replicate the first one. */
 	    for (m=1; m<ints[1]; m++) {
 		for (i=0; i<num; i++) {
-		    flat->indices[j] = flat->indices[j-num] + old_extent;
+		    flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
 		    flat->blocklens[j] = flat->blocklens[j-num];
 		    j++;
 		}
@@ -354,15 +438,17 @@
 		num = *curr_index - prev_index;
 		prev_index = *curr_index;
 		for (m=0; m<basic_num; m++) {
-		    flat->indices[j] = flat->indices[j-num] + 
-                        (ints[top_count+1+i]-ints[top_count+i])*old_extent;
+      /* By using ADIO_Offset we preserve +/- sign and 
+         avoid >2G integer arithmetic problems */
+      ADIO_Offset stride = ints[top_count+1+i]-ints[top_count+i];
+		    flat->indices[j] = flat->indices[j-num] + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
 		    flat->blocklens[j] = flat->blocklens[j-num];
 		    j++;
 		}
 		*curr_index = j;
 		for (m=1; m<ints[1+i]; m++) {
                     for (k=0; k<basic_num; k++) {
-                        flat->indices[j] = flat->indices[j-basic_num] + old_extent;
+                        flat->indices[j] = flat->indices[j-basic_num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
                         flat->blocklens[j] = flat->blocklens[j-basic_num];
                         j++;
                     }
@@ -373,6 +459,9 @@
 	break;
 
     case MPI_COMBINER_INDEXED_BLOCK:
+    #ifdef FLATTEN_DEBUG 
+    DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_INDEXED_BLOCK\n");
+    #endif
 	top_count = ints[0];
         MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 			      &old_ntypes, &old_combiner); 
@@ -381,15 +470,23 @@
 
 	prev_index = *curr_index;
 	if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
-	    ADIOI_Flatten(types[0], flat,
-			 st_offset+ints[1+1]*old_extent, curr_index);
+  {
+      /* By using ADIO_Offset we preserve +/- sign and 
+         avoid >2G integer arithmetic problems */
+      ADIO_Offset stride = ints[1+1];
+        ADIOI_Flatten(types[0], flat,
+         st_offset+stride* ADIOI_AINT_CAST_TO_OFFSET old_extent, curr_index);
+  }
 
 	if (prev_index == *curr_index) {
 /* simplest case, indexed type made up of basic or contiguous types */
 	    j = *curr_index;
 	    for (i=j; i<j+top_count; i++) {
-		flat->indices[i]   = st_offset + ints[1+1+i-j]*old_extent;
-		flat->blocklens[i] = (int) (ints[1]*old_extent);
+      /* By using ADIO_Offset we preserve +/- sign and 
+         avoid >2G integer arithmetic problems */
+      ADIO_Offset blocklength = ints[1], stride = ints[1+1+i-j];
+		flat->indices[i]   = st_offset + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
+		flat->blocklens[i] = blocklength* ADIOI_AINT_CAST_TO_OFFSET old_extent;
 	    }
 	    *curr_index = i;
 	}
@@ -403,7 +500,7 @@
    and then strided. Replicate the first one. */
 	    for (m=1; m<ints[1]; m++) {
 		for (i=0; i<num; i++) {
-		    flat->indices[j]   = flat->indices[j-num] + old_extent;
+		    flat->indices[j]   = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
 		    flat->blocklens[j] = flat->blocklens[j-num];
 		    j++;
 		}
@@ -414,7 +511,10 @@
 	    num = *curr_index - prev_index;
 	    for (i=1; i<top_count; i++) {
 		for (m=0; m<num; m++) {
-		    flat->indices[j]   = flat->indices[j-num] + (ints[2+i]-ints[1+i])*old_extent;
+      /* By using ADIO_Offset we preserve +/- sign and 
+         avoid >2G integer arithmetic problems */
+      ADIO_Offset stride = ints[2+i]-ints[1+i];
+		    flat->indices[j]   = flat->indices[j-num] + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
 		    flat->blocklens[j] = flat->blocklens[j-num];
 		    j++;
 		}
@@ -425,6 +525,9 @@
 
     case MPI_COMBINER_HINDEXED: 
     case MPI_COMBINER_HINDEXED_INTEGER:
+    #ifdef FLATTEN_DEBUG 
+    DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_HINDEXED_INTEGER\n");
+    #endif
 	top_count = ints[0];
         MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
 			      &old_ntypes, &old_combiner); 
@@ -432,15 +535,20 @@
 
 	prev_index = *curr_index;
 	if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
-	    ADIOI_Flatten(types[0], flat, st_offset+adds[0], curr_index); 
+  {
+        ADIOI_Flatten(types[0], flat, st_offset+adds[0], curr_index); 
+  }
 
 	if (prev_index == *curr_index) {
 /* simplest case, indexed type made up of basic or contiguous types */
 	    j = *curr_index;
-	    MPI_Type_size(types[0], &old_size);
+	    MPI_Type_size(types[0], (int*)&old_size);
 	    for (i=j; i<j+top_count; i++) {
+        /* By using ADIO_Offset we preserve +/- sign and 
+           avoid >2G integer arithmetic problems */
+        ADIO_Offset blocklength = ints[1+i-j];
 		flat->indices[i] = st_offset + adds[i-j];
-		flat->blocklens[i] = ints[1+i-j]*old_size;
+		flat->blocklens[i] = blocklength*old_size;
 	    }
 	    *curr_index = i;
 	}
@@ -456,7 +564,7 @@
 	    MPI_Type_extent(types[0], &old_extent);
 	    for (m=1; m<ints[1]; m++) {
 		for (i=0; i<num; i++) {
-		    flat->indices[j] = flat->indices[j-num] + old_extent;
+		    flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
 		    flat->blocklens[j] = flat->blocklens[j-num];
 		    j++;
 		}
@@ -475,7 +583,7 @@
 		*curr_index = j;
 		for (m=1; m<ints[1+i]; m++) {
                     for (k=0; k<basic_num; k++) {
-                        flat->indices[j] = flat->indices[j-basic_num] + old_extent;
+                        flat->indices[j] = flat->indices[j-basic_num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
                         flat->blocklens[j] = flat->blocklens[j-basic_num];
 		    j++;
                     }
@@ -487,6 +595,9 @@
 
     case MPI_COMBINER_STRUCT: 
     case MPI_COMBINER_STRUCT_INTEGER: 
+    #ifdef FLATTEN_DEBUG 
+    DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_STRUCT_INTEGER\n");
+    #endif
 	top_count = ints[0];
 	for (n=0; n<top_count; n++) {
 	    MPI_Type_get_envelope(types[n], &old_nints, &old_nadds,
@@ -499,10 +610,16 @@
 
 	    if (prev_index == *curr_index) {
 /* simplest case, current type is basic or contiguous types */
+        /* By using ADIO_Offset we preserve +/- sign and 
+           avoid >2G integer arithmetic problems */
+        ADIO_Offset blocklength = ints[1+n];
 		j = *curr_index;
 		flat->indices[j] = st_offset + adds[n];
-		MPI_Type_size(types[n], &old_size);
-		flat->blocklens[j] = ints[1+n] * old_size;
+		MPI_Type_size(types[n], (int*)&old_size);
+		flat->blocklens[j] = blocklength * old_size;
+        #ifdef FLATTEN_DEBUG 
+        DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",n,adds[n],j, flat->indices[j], j, flat->blocklens[j]);
+        #endif
 		(*curr_index)++;
 	    }
 	    else {
@@ -515,8 +632,11 @@
 		MPI_Type_extent(types[n], &old_extent);
 		for (m=1; m<ints[1+n]; m++) {
 		    for (i=0; i<num; i++) {
-			flat->indices[j] = flat->indices[j-num] + old_extent;
+			flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
 			flat->blocklens[j] = flat->blocklens[j-num];
+            #ifdef FLATTEN_DEBUG 
+            DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple old_extent "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",old_extent,j, flat->indices[j], j, flat->blocklens[j]);
+            #endif
 			j++;
 		    }
 		}
@@ -525,9 +645,63 @@
 	}
  	break;
 
+    case MPI_COMBINER_RESIZED: 
+    #ifdef FLATTEN_DEBUG 
+    DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_RESIZED\n");
+    #endif
+
+    /* This is done similar to a type_struct with an lb, datatype, ub */
+
+    /* handle the Lb */
+	j = *curr_index;
+	flat->indices[j] = st_offset + adds[0];
+	flat->blocklens[j] = 0;
+
+        #ifdef FLATTEN_DEBUG 
+        DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",0,adds[0],j, flat->indices[j], j, flat->blocklens[j]);
+        #endif
+
+	(*curr_index)++;
+
+	/* handle the datatype */
+
+	MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
+			      &old_ntypes, &old_combiner); 
+	ADIOI_Datatype_iscontig(types[0], &old_is_contig);
+
+	if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
+	    ADIOI_Flatten(types[0], flat, st_offset+adds[0], curr_index);
+	}
+	else {
+            /* current type is basic or contiguous */
+	    j = *curr_index;
+	    flat->indices[j] = st_offset;
+	    MPI_Type_size(types[0], (int*)&old_size);
+	    flat->blocklens[j] = old_size;
+
+            #ifdef FLATTEN_DEBUG 
+	    DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",0,adds[0],j, flat->indices[j], j, flat->blocklens[j]);
+            #endif
+
+	    (*curr_index)++;
+	}
+
+	/* take care of the extent as a UB */
+	j = *curr_index;
+	flat->indices[j] = st_offset + adds[0] + adds[1];
+	flat->blocklens[j] = 0;
+
+        #ifdef FLATTEN_DEBUG 
+        DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",1,adds[1],j, flat->indices[j], j, flat->blocklens[j]);
+        #endif
+
+	(*curr_index)++;
+
+ 	break;
+
     default:
 	/* TODO: FIXME (requires changing prototypes to return errors...) */
-	FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Flatten\n");
+	DBG_FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Flatten\n");
 	MPI_Abort(MPI_COMM_WORLD, 1);
     }
 
@@ -545,6 +719,10 @@
     ADIOI_Free(adds);
     ADIOI_Free(types);
 
+  #ifdef FLATTEN_DEBUG 
+  DBG_FPRINTF(stderr,"ADIOI_Flatten:: return st_offset %#llX, curr_index %#X\n",st_offset,*curr_index);
+  #endif
+
 }
 
 /********************************************************/
@@ -569,7 +747,7 @@
     int top_count, combiner, old_combiner, old_is_contig;
     int nints, nadds, ntypes, old_nints, old_nadds, old_ntypes;
     int *ints;
-    MPI_Aint *adds;
+    MPI_Aint *adds; /* Make no assumptions about +/- sign on these */
     MPI_Datatype *types;
 
     MPI_Type_get_envelope(datatype, &nints, &nadds, &ntypes, &combiner);
@@ -789,9 +967,32 @@
 	    }
 	}
 	break;
+
+    case MPI_COMBINER_RESIZED: 
+	/* treat it as a struct with lb, type, ub */
+
+	/* add 2 for lb and ub */
+	(*curr_index) += 2;
+	count += 2;
+
+	/* add for datatype */ 
+	MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
+                                  &old_ntypes, &old_combiner); 
+	ADIOI_Datatype_iscontig(types[0], &old_is_contig);
+
+	if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
+	    count += ADIOI_Count_contiguous_blocks(types[0], curr_index);
+	}
+	else {
+        /* basic or contiguous type */
+	    count++;
+	    (*curr_index)++;
+	}
+	break;
+
     default:
 	/* TODO: FIXME */
-	FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Count_contiguous_blocks, combiner = %d\n", combiner);
+	DBG_FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Count_contiguous_blocks, combiner = %d\n", combiner);
 	MPI_Abort(MPI_COMM_WORLD, 1);
     }
 
@@ -812,6 +1013,47 @@
 #endif /* HAVE_MPIR_TYPE_GET_CONTIG_BLOCKS */
 }
 
+/* removezeros() make a second pass over the
+ * flattented type knocking out zero-length blocks, but leave first and last
+ * alone (they mark LB and UB) */
+
+static void removezeros(ADIOI_Flatlist_node *flat_type)
+{
+    int i,j,opt_blocks;
+    ADIO_Offset *opt_blocklens;
+    ADIO_Offset *opt_indices;
+
+    opt_blocks = 2; /* LB and UB */
+    for (i=1; i < flat_type->count -1; i++) {
+        if(flat_type->blocklens[i] != 0)
+	    opt_blocks++;
+    }
+    /* no optimization possible */
+    if (opt_blocks == flat_type->count) return;
+    opt_blocklens = (ADIO_Offset *) ADIOI_Malloc(opt_blocks * sizeof(ADIO_Offset));
+    opt_indices = (ADIO_Offset *)ADIOI_Malloc(opt_blocks*sizeof(ADIO_Offset));
+
+   /* fill in new blocklists, keeping first and last no matter what  */
+    opt_blocklens[0] = flat_type->blocklens[0];
+    opt_indices[0] = flat_type->indices[0];
+    j = 1; /* always two entries: one for LB and UB  ([0] and [j])*/
+    for (i=1; i< flat_type->count -1; i++) {
+	if( flat_type->blocklens[i] != 0) {
+		opt_indices[j] = flat_type->indices[i];
+		opt_blocklens[j] = flat_type->blocklens[i];
+		j++;
+	}
+    }
+    opt_indices[j] = flat_type->indices[flat_type->count -1];
+    opt_blocklens[j] = flat_type->blocklens[flat_type->count -1];
+
+    flat_type->count = opt_blocks;
+    ADIOI_Free(flat_type->blocklens);
+    ADIOI_Free(flat_type->indices);
+    flat_type->blocklens = opt_blocklens;
+    flat_type->indices = opt_indices;
+    return;
+}
 
 /****************************************************************/
 
@@ -822,14 +1064,14 @@
  * contiguous operations).
  *
  * NOTE: a further optimization would be to remove zero length blocks. However,
- * we do not do this as parts of the code use the presence of zero length
- * blocks to indicate UB and LB.  
+ * the first and last blocks must remain as zero length first or last block 
+ * indicates UB and LB.  
  *
  */
 void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type)
 {
     int i, j, opt_blocks;
-    int *opt_blocklens;
+    ADIO_Offset *opt_blocklens;
     ADIO_Offset *opt_indices;
 
     opt_blocks = 1;
@@ -844,7 +1086,7 @@
     /* if we can't reduce the number of blocks, quit now */
     if (opt_blocks == flat_type->count) return;
 
-    opt_blocklens = (int *) ADIOI_Malloc(opt_blocks * sizeof(int));
+    opt_blocklens = (ADIO_Offset *) ADIOI_Malloc(opt_blocks * sizeof(ADIO_Offset));
     opt_indices = (ADIO_Offset *)ADIOI_Malloc(opt_blocks*sizeof(ADIO_Offset));
 
     /* fill in new blocklists */
@@ -866,6 +1108,7 @@
     ADIOI_Free(flat_type->indices);
     flat_type->blocklens = opt_blocklens;
     flat_type->indices = opt_indices;
+    removezeros(flat_type);
     return;
 }
 
@@ -885,26 +1128,3 @@
 	ADIOI_Free(flat);
     }
 }
-
-/* ADIOI_Flatten_copy_type()
- * flat - pointer to flatlist node holding offset and lengths
- * start - starting index of src type in arrays
- * end - one larger than ending index of src type (makes loop clean)
- * offset_adjustment - amount to add to "indices" (offset) component
- *                     of each off/len pair copied
- */
-void ADIOI_Flatten_copy_type(ADIOI_Flatlist_node *flat,
-			     int old_type_start,
-			     int old_type_end,
-			     int new_type_start,
-			     ADIO_Offset offset_adjustment)
-{
-    int i, out_index = new_type_start;
-
-    for (i=old_type_start; i < old_type_end; i++) {
-	flat->indices[out_index]   = flat->indices[i] + offset_adjustment;
-	flat->blocklens[out_index] = flat->blocklens[i];
-	out_index++;
-    }
-}
-
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/get_fp_posn.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/get_fp_posn.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/get_fp_posn.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/get_fp_posn.c	2010-11-15 15:02:47.000000000 +0100
@@ -7,6 +7,7 @@
 
 #include "adio.h"
 #include "adio_extern.h"
+#include "adioi.h"
 
 /* returns the current position of the individual file pointer
    in etype units relative to the current view. */
@@ -14,10 +15,11 @@
 void ADIOI_Get_position(ADIO_File fd, ADIO_Offset *offset)
 {
     ADIOI_Flatlist_node *flat_file;
-    int i, n_filetypes, flag, frd_size;
-    int filetype_size, etype_size, filetype_is_contig;
+    int i, flag;
+    unsigned filetype_size;
+    int etype_size, filetype_is_contig;
     MPI_Aint filetype_extent;
-    ADIO_Offset disp, byte_offset, sum=0, size_in_file;
+    ADIO_Offset disp, byte_offset, sum=0, size_in_file, n_filetypes, frd_size;
     
     ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
     etype_size = fd->etype_size;
@@ -28,7 +30,7 @@
         flat_file = ADIOI_Flatlist;
         while (flat_file->type != fd->filetype) flat_file = flat_file->next;
 
-	MPI_Type_size(fd->filetype, &filetype_size);
+	MPI_Type_size(fd->filetype, (int*)&filetype_size);
 	MPI_Type_extent(fd->filetype, &filetype_extent);
 
 	disp = fd->disp;
@@ -41,18 +43,18 @@
 	    for (i=0; i<flat_file->count; i++) {
 		sum += flat_file->blocklens[i];
 		if (disp + flat_file->indices[i] + 
-	     	    (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i] 
+	     	    n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent + flat_file->blocklens[i] 
 		    >= byte_offset) {
-		    frd_size = (int) (disp + flat_file->indices[i] + 
-			(ADIO_Offset) n_filetypes*filetype_extent
-			+ flat_file->blocklens[i] - byte_offset);
+		    frd_size = disp + flat_file->indices[i] + 
+			n_filetypes * ADIOI_AINT_CAST_TO_OFFSET filetype_extent
+			+ flat_file->blocklens[i] - byte_offset;
 		    sum -= frd_size;
 		    flag = 1;
 		    break;
 		}
 	    }
 	}
-	size_in_file = (ADIO_Offset) n_filetypes*filetype_size + sum;
+	size_in_file = n_filetypes * (ADIO_Offset)filetype_size + sum;
 	*offset = size_in_file/etype_size;
     }
 }
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common: heap-sort.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/iscontig.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/iscontig.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/iscontig.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/iscontig.c	2010-11-15 15:02:47.000000000 +0100
@@ -60,7 +60,8 @@
 
 #elif defined(OMPI_BUILDING) && OMPI_BUILDING
 
-/* This function is included in Open MPI source code */
+/* void ADIOI_Datatype_iscontig(MPI_Datatype datatype, int *flag) is defined
+ * and implemented in OpenMPI itself */
 
 #else
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/lock.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/lock.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/lock.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/lock.c	2010-11-15 15:03:31.000000000 +0100
@@ -93,7 +93,7 @@
 int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence,
 	     ADIO_Offset len) 
 {
-    int err, error_code;
+    int err, error_code, err_count = 0, sav_errno;
     struct flock lock;
 
     if (len == 0) return MPI_SUCCESS;
@@ -120,16 +120,42 @@
     lock.l_len	  = len;
 #endif
 
+    sav_errno = errno; /* save previous errno in case we recover from retryable errors */
     errno = 0;
     do {
 	err = fcntl(fd, cmd, &lock);
-    } while (err && (errno == EINTR));
+#ifdef USE_DBG_LOGGING
+/*      if (MPIU_DBG_SELECTED(ROMIO,TERSE)) */
+      {
+        if (err && ((errno == EINTR) || (errno == EINPROGRESS)))
+        {
+          if((err_count < 5) || (err_count > 9995))
+          {
+            fprintf(stderr, "File locking failed in ADIOI_Set_lock(fd %#X,cmd %s/%#X,type %s/%#X,whence %#X) with return value %#X and errno %#X.  Retry (%d).\n",                    
+                    fd,
+                    ((cmd == F_GETLK   )? "F_GETLK" :
+                    ((cmd == F_SETLK   )? "F_SETLK" :
+                    ((cmd == F_SETLKW  )? "F_SETLKW" : "UNEXPECTED"))),
+                    cmd, 
+                    ((type == F_RDLCK   )? "F_RDLCK" :
+                    ((type == F_WRLCK   )? "F_WRLCK" :
+                    ((type == F_UNLCK   )? "F_UNLOCK" : "UNEXPECTED"))),
+                    type, 
+                    whence, err, errno, err_count);
+          perror("ADIOI_Set_lock:");
+          fprintf(stderr,"ADIOI_Set_lock:offset %#llx, length %#llx\n",(unsigned long long)offset, (unsigned long long)len);
+          }
+        }
+      }
+#endif
+    } while (err && ((errno == EINTR) || ((errno == EINPROGRESS) && (++err_count < 10000))));
 
     if (err && (errno != EBADF)) {
 	/* FIXME: This should use the error message system, 
 	   especially for MPICH2 */
 	FPRINTF(stderr, "File locking failed in ADIOI_Set_lock(fd %X,cmd %s/%X,type %s/%X,whence %X) with return value %X and errno %X.\n"
-                  "If the file system is NFS, you need to use NFS version 3, ensure that the lockd daemon is running on all the machines, and mount the directory with the 'noac' option (no attribute caching).\n",
+                  "- If the file system is NFS, you need to use NFS version 3, ensure that the lockd daemon is running on all the machines, and mount the directory with the 'noac' option (no attribute caching).\n"
+                  "- If the file system is LUSTRE, ensure that the directory is mounted with the 'flock' option.\n",
           fd,
           ((cmd == F_GETLK   )? "F_GETLK" :
           ((cmd == F_SETLK   )? "F_SETLK" :
@@ -145,6 +171,9 @@
 	MPI_Abort(MPI_COMM_WORLD, 1);
     }
 
+    if(!err)             /* report fcntl failure errno's (EBADF), otherwise */
+      errno = sav_errno; /* restore previous errno in case we recovered from retryable errors */
+
     error_code = (err == 0) ? MPI_SUCCESS : MPI_ERR_UNKNOWN;
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/Makefile.am NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/Makefile.am
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/Makefile.am	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/Makefile.am	2010-11-15 15:03:31.000000000 +0100
@@ -26,24 +26,32 @@
 noinst_LTLIBRARIES = libadio_common.la
 libadio_common_la_SOURCES = \
         ad_aggregate.c \
+        ad_aggregate_new.c \
         ad_close.c \
+        ad_coll_build_req_new.c \
+        ad_coll_exch_new.c \
         ad_darray.c \
         ad_delete.c \
         ad_done.c \
         ad_done_fake.c \
         ad_end.c \
+        ad_features.c \
         ad_fcntl.c \
         ad_flush.c \
         ad_fstype.c \
         ad_get_sh_fp.c \
         ad_hints.c \
         ad_init.c \
+        ad_io_coll.c \
         ad_iopen.c \
         ad_iread.c \
         ad_iread_fake.c \
         ad_iwrite.c \
         ad_iwrite_fake.c \
         ad_open.c \
+        ad_opencoll.c \
+        ad_opencoll_failsafe.c \
+        ad_opencoll_scalable.c \
         ad_prealloc.c \
         ad_read.c \
         ad_read_coll.c \
@@ -69,6 +77,7 @@
         flatten.c \
         get_fp_posn.c \
         greq_fns.c \
+        heap-sort.c \
         iscontig.c \
         lock.c \
         malloc.c \
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/malloc.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/malloc.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/malloc.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/malloc.c	2010-11-15 15:02:47.000000000 +0100
@@ -14,16 +14,11 @@
    Later on, add some tracing and error checking, similar to 
    MPID_trmalloc. */
 
-/* can't include adio.h here, because of the macro, so 
- * include romioconf.h to make sure config-time defines get included */
-
-#include "romioconf.h"
+#include "adio.h"
 #include "mpi.h"
 #include <stdlib.h>
 #include <stdio.h>
 #include "mpipr.h"
-/* Open MPI: This seemes to have been missing */
-#include "adio.h"
 
 #ifdef HAVE_MALLOC_H
 #include <malloc.h>
@@ -35,66 +30,84 @@
 /* style: allow:calloc:1 sig:0 */
 /* style: allow:realloc:1 sig:0 */
 
-
 #define FPRINTF fprintf
-void *ADIOI_Malloc_fn(size_t size, int lineno, char *fname);
-void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, char *fname);
-void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, char *fname);
-void ADIOI_Free_fn(void *ptr, int lineno, char *fname);
 
-void *ADIOI_Malloc_fn(size_t size, int lineno, char *fname)
+void *ADIOI_Malloc_fn(size_t size, int lineno, const char *fname);
+void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, const char *fname);
+void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, const char *fname);
+void ADIOI_Free_fn(void *ptr, int lineno, const char *fname);
+
+void *ADIOI_Malloc_fn(size_t size, int lineno, const char *fname)
 {
     void *new;
 
 #ifdef ROMIO_XFS
     new = (void *) memalign(XFS_MEMALIGN, size);
 #else
+#ifdef HAVE_MPIU_FUNCS
+    new = (void *) MPIU_Malloc(size);
+#else
     new = (void *) malloc(size);
 #endif
+#endif
     if (!new) {
 	FPRINTF(stderr, "Out of memory in file %s, line %d\n", fname, lineno);
 	MPI_Abort(MPI_COMM_WORLD, 1);
     }
-
+    DBG_FPRINTF(stderr, "ADIOI_Malloc %s:<%d> %p (%#zX)\n", fname, lineno, new, size);
     return new;
 }
 
 
-void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, char *fname)
+void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, const char *fname)
 {
     void *new;
 
+#ifdef HAVE_MPIU_FUNCS
+    new = (void *) MPIU_Calloc(nelem, elsize);
+#else
     new = (void *) calloc(nelem, elsize);
+#endif
     if (!new) {
 	FPRINTF(stderr, "Out of memory in file %s, line %d\n", fname, lineno);
 	MPI_Abort(MPI_COMM_WORLD, 1);
     }
-
+    DBG_FPRINTF(stderr, "ADIOI_Calloc %s:<%d> %p\n", fname, lineno, new);
     return new;
 }
 
 
-void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, char *fname)
+void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, const char *fname)
 {
     void *new;
 
+#ifdef HAVE_MPIU_FUNCS
+    new = (void *) MPIU_Realloc(ptr, size);
+#else
     new = (void *) realloc(ptr, size);
+#endif
     if (!new) {
 	FPRINTF(stderr, "realloc failed in file %s, line %d\n", fname, lineno);
 	MPI_Abort(MPI_COMM_WORLD, 1);
     }
+    DBG_FPRINTF(stderr, "ADIOI_Realloc %s:<%d> %p\n", fname, lineno, new);
     return new;
 }
 
 
-void ADIOI_Free_fn(void *ptr, int lineno, char *fname)
+void ADIOI_Free_fn(void *ptr, int lineno, const char *fname)
 {
+    DBG_FPRINTF(stderr, "ADIOI_Free %s:<%d> %p\n", fname, lineno, ptr);
     if (!ptr) {
 	FPRINTF(stderr, "Attempt to free null pointer in file %s, line %d\n", fname, lineno);
 	MPI_Abort(MPI_COMM_WORLD, 1);
     }
 
+#ifdef HAVE_MPIU_FUNCS
+    MPIU_Free(ptr);
+#else
     free(ptr);
+#endif
 }
 
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/common/system_hints.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/system_hints.c
--- ompi-trunk/ompi/mca/io/romio/romio/adio/common/system_hints.c	2010-11-16 09:16:18.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/common/system_hints.c	2010-11-15 15:02:47.000000000 +0100
@@ -40,6 +40,28 @@
 #define ROMIO_HINT_DEFAULT_CFG "/etc/romio-hints"
 #define ROMIO_HINT_ENV_VAR "ROMIO_HINTS"
 
+ /* should suppress unused warnings on GCC */
+static void dump_keys(MPI_Info info) ATTRIBUTE((unused, used));
+
+/* debug function: a routine I want in the library to make my life easier when
+ * using a source debugger. please ignore any "defined but not used" warnings
+ */
+static void dump_keys(MPI_Info info)
+{
+    int i, nkeys, flag;
+    char key[MPI_MAX_INFO_KEY];
+    char value[MPI_MAX_INFO_VAL];
+
+    MPI_Info_get_nkeys(info, &nkeys);
+
+    for (i=0; i<nkeys; i++) {
+	MPI_Info_get_nthkey(info, i, key);
+	ADIOI_Info_get(info, key, MPI_MAX_INFO_VAL-1, value, &flag);
+	printf("key = %s, value = %s\n", key, value);
+    }
+    return;
+}
+
 /* if user set the environment variable, use its value to find the
  * file-of-hints.  Otherwise, we'll look for the default config file.  i.e. let
  * the user override systemwide hint processing */
@@ -71,7 +93,7 @@
 static int file_to_info(int fd, MPI_Info info)
 {
     char *buffer, *token, *key, *val, *garbage;
-    char *pos1, *pos2;
+    char *pos1=NULL, *pos2=NULL;
     int flag, ret;
     char dummy;
     struct stat statbuf;
@@ -79,7 +101,7 @@
     /* assumption: config files will be small (less than 1MB) */
     fstat(fd, &statbuf);
     /* add 1 to size to make room for NULL termination */
-    buffer = (char *)calloc(statbuf.st_size + 1, sizeof (char));
+    buffer = (char *)ADIOI_Calloc(statbuf.st_size + 1, sizeof (char));
     if (buffer == NULL) return -1;
 
     ret = read(fd, buffer, statbuf.st_size);
@@ -104,11 +126,11 @@
 #endif
 	/* don't actually care what the value is. only want to know if key
 	 * exists: we leave it alone if so*/
-	MPI_Info_get(info, key, 0, &dummy, &flag);
+	ADIOI_Info_get(info, key, 1, &dummy, &flag);
 	if (flag == 1) continue;
-	MPI_Info_set(info, key, val);
+	ADIOI_Info_set(info, key, val);
     } while ((token = strtok_r(NULL, "\n", &pos1)) != NULL);
-    free(buffer);
+    ADIOI_Free(buffer);
     return 0;
 }
 
@@ -127,23 +149,42 @@
     close(hintfd);
 }
 
-/* OMPI: Commented out so that we don't get compiler warnings */
-#if 0
-/* debug function: a routine I want in the library to make my life easier when
- * using a source debugger. please ignore any "defined but not used" warnings
- */
-static void dump_keys(MPI_Info info) {
-    int i, nkeys, flag;
-    char key[MPI_MAX_INFO_KEY];
-    char value[MPI_MAX_INFO_VAL];
+/* given 'info', incorporate any hints in 'sysinfo' that are not already set
+ * into 'new_info'.  Caller must free 'new_info' later. */
+void ADIOI_incorporate_system_hints(MPI_Info info, 
+	MPI_Info sysinfo, 
+	MPI_Info *new_info) 
+{
+    int i, nkeys_sysinfo, flag;
+    char  val[MPI_MAX_INFO_VAL], key[MPI_MAX_INFO_KEY];
 
-    MPI_Info_get_nkeys(info, &nkeys);
+    if (sysinfo == MPI_INFO_NULL)
+	nkeys_sysinfo = 0;
+    else
+	MPI_Info_get_nkeys(sysinfo, &nkeys_sysinfo);
+
+    /* short-circuit: return immediately if no hints to process */
+    if (info == MPI_INFO_NULL && nkeys_sysinfo == 0)  {
+	*new_info = MPI_INFO_NULL;
+	return;
+    }
 
-    for (i=0; i<nkeys; i++) {
-	MPI_Info_get_nthkey(info, i, key);
-	MPI_Info_get(info, key, MPI_MAX_INFO_VAL-1, value, &flag);
-	printf("key = %s, value = %s\n", key, value);
+    if (info == MPI_INFO_NULL) 
+	MPI_Info_create(new_info);
+    else
+	MPI_Info_dup(info, new_info);
+
+    for (i=0; i<nkeys_sysinfo; i++) {
+	MPI_Info_get_nthkey(sysinfo, i, key);
+	/* don't care about the value, just want to know if hint set already*/
+	if (info != MPI_INFO_NULL) ADIOI_Info_get(info, key, 1, val, &flag); 
+	if (flag == 1) continue;  /* skip any hints already set by user */
+	ADIOI_Info_get(sysinfo, key, MPI_MAX_INFO_VAL-1, val, &flag);
+	ADIOI_Info_set(*new_info, key, val);
+	flag = 0;
     }
+
     return;
 }
-#endif
+
+
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/include/adio_cb_config_list.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/adio_cb_config_list.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/include/adio_cb_config_list.h	2010-11-16 09:16:06.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/adio_cb_config_list.h	2010-11-15 15:02:47.000000000 +0100
@@ -17,10 +17,10 @@
 
 int ADIOI_cb_gather_name_array(MPI_Comm comm, MPI_Comm dupcomm, 
 			       ADIO_cb_name_array *arrayp);
-int ADIOI_cb_copy_name_array(MPI_Comm comm, int *keyval, void *extra, 
+int ADIOI_cb_copy_name_array(MPI_Comm comm, int keyval, void *extra, 
 			     void *attr_in,
 			     void **attr_out, int *flag);
-int ADIOI_cb_delete_name_array(MPI_Comm comm, int *keyval, void *attr_val, 
+int ADIOI_cb_delete_name_array(MPI_Comm comm, int keyval, void *attr_val, 
 			       void *extra);
 int ADIOI_cb_config_list_parse(char *config_list, ADIO_cb_name_array array, 
 			       int ranklist[], int cb_nodes);
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/include/adio_extern.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/adio_extern.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/include/adio_extern.h	2010-11-16 09:16:06.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/adio_extern.h	2010-11-15 15:02:47.000000000 +0100
@@ -23,3 +23,5 @@
 #endif
 
 extern MPI_Errhandler ADIOI_DFLT_ERR_HANDLER;
+
+extern MPI_Info ADIOI_syshints;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/include/adio.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/adio.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/include/adio.h	2010-11-16 09:16:06.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/adio.h	2010-11-15 15:03:31.000000000 +0100
@@ -123,16 +123,6 @@
 /* Open MPI: end of section of ignored stuff */
 #endif
 
-#ifndef SX4
-#   define MPI_AINT MPI_LONG    /* may need to change this later */
-#else
-#   if (defined(_SX) && !defined(_LONG64))
-#       define MPI_AINT MPI_LONG_LONG_INT
-#   else
-#       define MPI_AINT MPI_LONG
-#   endif
-#endif
-
 #define ADIO_Status MPI_Status   
 
 #ifndef MPIO_INCLUDE
@@ -216,7 +206,6 @@
     unsigned d_mem;          /* data buffer memory alignment */
     unsigned d_miniosz;      /* min xfer size, xfer size multiple,
                                 and file seek offset alignment */
-    unsigned d_maxiosz;      /* max xfer size */
     ADIO_Offset fp_ind;      /* individual file pointer in MPI-IO (in bytes)*/
     ADIO_Offset fp_sys_posn; /* current location of the system file-pointer
                                 in bytes */
@@ -224,6 +213,7 @@
     MPI_Comm comm;           /* communicator indicating who called open */
     MPI_Comm agg_comm;      /* deferred open: aggregators who called open */
     int is_open;	    /* deferred open: 0: not open yet 1: is open */
+    int is_agg;              /* bool: if I am an aggregator */
     char *filename;          
     int file_system;         /* type of file system */
     int access_mode;         /* Access mode (sequential, append, etc.) */
@@ -249,6 +239,11 @@
     int fortran_handle;     /* handle for Fortran interface if needed */
     MPI_Errhandler err_handler;
     void *fs_ptr;            /* file-system specific information */
+
+    /* Two phase collective I/O support */
+    ADIO_Offset *file_realm_st_offs; /* file realm starting offsets */
+    MPI_Datatype *file_realm_types;  /* file realm datatypes */
+    int my_cb_nodes_index; /* my index into cb_config_list. -1 if N/A */
 } ADIOI_FileD;
 
 typedef struct ADIOI_FileD *ADIO_File;
@@ -303,6 +298,7 @@
 #define ADIO_LUSTRE              163   /* Lustre */
 #define ADIO_BGL                 164   /* IBM BGL */
 #define ADIO_BGLOCKLESS          165   /* IBM BGL (lock-free) */
+#define ADIO_ZOIDFS              167   /* ZoidFS: the I/O forwarding fs */
 
 #define ADIO_SEEK_SET            SEEK_SET
 #define ADIO_SEEK_CUR            SEEK_CUR
@@ -312,6 +308,13 @@
 #define ADIO_FCNTL_SET_DISKSPACE 188
 #define ADIO_FCNTL_GET_FSIZE     200
 
+/* file system feature tests */
+#define ADIO_LOCKS               300
+#define ADIO_SHARED_FP           301
+#define ADIO_ATOMIC_MODE         302
+#define ADIO_DATA_SIEVING_WRITES 303
+#define ADIO_SCALABLE_OPEN       304
+
 /* for default file permissions */
 #define ADIO_PERM_NULL           -1
 
@@ -330,6 +333,7 @@
 		   int access_mode, ADIO_Offset disp, MPI_Datatype etype, 
 		   MPI_Datatype filetype, 
 		   MPI_Info info, int perm, int *error_code);
+void ADIOI_OpenColl(ADIO_File fd, int rank, int acces_mode, int *error_code);
 void ADIO_ImmediateOpen(ADIO_File fd, int *error_code);
 void ADIO_Close(ADIO_File fd, int *error_code);
 void ADIO_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype,
@@ -398,6 +402,7 @@
 void ADIO_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code);
 void ADIO_Set_view(ADIO_File fd, ADIO_Offset disp, MPI_Datatype etype, 
 		MPI_Datatype filetype, MPI_Info info,  int *error_code);
+int  ADIO_Feature(ADIO_File fd, int flag);
 
 /* functions to help deal with the array datatypes */
 int ADIO_Type_create_subarray(int ndims,
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/include/adioi_errmsg.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/adioi_errmsg.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/include/adioi_errmsg.h	2010-11-16 09:16:06.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/adioi_errmsg.h	2010-11-15 15:02:47.000000000 +0100
@@ -1,6 +1,5 @@
 /* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*  $Id: adioi_errmsg.h,v 1.5 2005/05/23 23:27:49 rross Exp $
- *
+/*
  *  (C) 2001 by Argonne National Laboratory.
  *      See COPYRIGHT in top-level directory.
  */
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/include/adioi_error.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/adioi_error.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/include/adioi_error.h	2010-11-16 09:16:06.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/adioi_error.h	2010-11-15 15:02:47.000000000 +0100
@@ -1,6 +1,5 @@
 /* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*  $Id: adioi_error.h,v 1.12 2006/01/05 23:53:58 robl Exp $
- *
+/*
  *  (C) 2001 by Argonne National Laboratory.
  *      See COPYRIGHT in top-level directory.
  */
@@ -33,6 +32,17 @@
     goto fn_exit;                                               \
 }
 
+#define MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code)         \
+if (count*datatype_size != (ADIO_Offset)(unsigned)count*(ADIO_Offset)(unsigned)datatype_size) {	\
+    error_code = MPIO_Err_create_code(MPI_SUCCESS,		\
+				      MPIR_ERR_RECOVERABLE,	\
+				      myname, __LINE__,		\
+				      MPI_ERR_ARG, 		\
+				      "**iobadcount", 0);	\
+    error_code = MPIO_Err_return_file(fh, error_code);		\
+    goto fn_exit;                                               \
+}
+
 #define MPIO_CHECK_DATATYPE(fh, datatype, myname, error_code)   \
 if (datatype == MPI_DATATYPE_NULL) {				\
     error_code = MPIO_Err_create_code(MPI_SUCCESS,		\
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/include/adioi_fs_proto.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/adioi_fs_proto.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/include/adioi_fs_proto.h	2010-11-16 09:16:06.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/adioi_fs_proto.h	2010-11-15 15:02:47.000000000 +0100
@@ -89,4 +89,9 @@
 extern struct ADIOI_Fns_struct ADIO_GRIDFTP_operations;
 #endif
 
+#ifdef ROMIO_ZOIDFS
+/* prototypes are in adio/ad_zoidfs/ad_zoidfs.h */
+extern struct ADIOI_Fns_struct ADIO_ZOIDFS_operations;
+#endif
+
 #endif
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/include/adioi.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/adioi.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/include/adioi.h	2010-11-16 09:16:06.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/adioi.h	2010-11-15 15:03:31.000000000 +0100
@@ -36,12 +36,18 @@
     int cb_write;
     int cb_nodes;
     int cb_buffer_size;
+    int cb_pfr;
+    int cb_fr_type;
+    int cb_fr_alignment;
+    int cb_ds_threshold;
+    int cb_alltoall;
     int ds_read;
     int ds_write;
     int no_indep_rw;
     int ind_rd_buffer_size;
     int ind_wr_buffer_size;
     int deferred_open;
+    int min_fdomain_size;
     char *cb_config_list;
     int *ranklist;
     union {
@@ -51,20 +57,27 @@
 	    } pvfs;
 	    struct {
 		    int debugmask;
+		    int posix_read;
+		    int posix_write;
+		    int listio_read;
+		    int listio_write;
+		    int dtype_read;
+		    int dtype_write;
 	    } pvfs2;
+            struct {
+                    int start_iodevice;
+                    int co_ratio;
+                    int coll_threshold;
+                    int ds_in_coll;
+            } lustre;
+		struct {
+			unsigned read_chunk_sz; /* chunk size for direct reads */
+			unsigned write_chunk_sz; /* chunk size for direct writes */
+		} xfs;
     } fs_hints;
 
 };
 
-#if 0
-typedef int MPI_Datarep_conversion_function(void *userbuf,
-					    MPI_Datatype datatype,
-					    int count,
-					    void *filebuf,
-					    MPI_Offset position,
-					    void *extra_state);
-#endif
-
 typedef struct ADIOI_Datarep {
     char *name;
     void *state;
@@ -92,8 +105,7 @@
 typedef struct ADIOI_Fl_node {  
     MPI_Datatype type;
     int count;                   /* no. of contiguous blocks */
-    int *blocklens;              /* array of contiguous block lengths (bytes)*/
-    /* may need to make it ADIO_Offset *blocklens */
+    ADIO_Offset *blocklens;      /* array of contiguous block lengths (bytes)*/
     ADIO_Offset *indices;        /* array of byte offsets of each block */
     struct ADIOI_Fl_node *next;  /* pointer to next node */
 } ADIOI_Flatlist_node;
@@ -126,6 +138,8 @@
 
 struct ADIOI_Fns_struct {
     void (*ADIOI_xxx_Open) (ADIO_File fd, int *error_code);
+    void (*ADIOI_xxx_OpenColl) (ADIO_File fd, int rank, 
+		    int access_mode, int *error_code);
     void (*ADIOI_xxx_ReadContig) (ADIO_File fd, void *buf, int count, 
                    MPI_Datatype datatype, int file_ptr_type, 
                    ADIO_Offset offset, ADIO_Status *status, int *error_code);
@@ -174,6 +188,7 @@
     void (*ADIOI_xxx_Flush) (ADIO_File fd, int *error_code); 
     void (*ADIOI_xxx_Resize) (ADIO_File fd, ADIO_Offset size, int *error_code);
     void (*ADIOI_xxx_Delete) (char *filename, int *error_code);
+    int  (*ADIOI_xxx_Feature) (ADIO_File fd, int flag);
 };
 
 /* optypes for ADIO_RequestD */
@@ -203,6 +218,9 @@
 
 /* some of the ADIO functions are macro-replaced */
 
+#define ADIOI_OpenColl(fd, rank, access_mode, error_code) \
+	(*(fd->fns->ADIOI_xxx_OpenColl))(fd, rank, access_mode, error_code)
+
 #define ADIO_ReadContig(fd,buf,count,datatype,file_ptr_type,offset,status,error_code) \
         (*(fd->fns->ADIOI_xxx_ReadContig))(fd,buf,count,datatype,file_ptr_type,offset,status,error_code)
 
@@ -269,19 +287,31 @@
 #define ADIO_SetInfo(fd, users_info, error_code) \
         (*(fd->fns->ADIOI_xxx_SetInfo))(fd, users_info, error_code)
 
+#define ADIO_Feature(fd, flag) \
+	(*(fd->fns->ADIOI_xxx_Feature))(fd, flag)
+
 
 /* structure for storing access info of this process's request 
    from the file domain of other processes, and vice-versa. used 
    as array of structures indexed by process number. */
 typedef struct {
     ADIO_Offset *offsets;   /* array of offsets */
-    int *lens;              /* array of lengths */
+    int *lens;              /* array of lengths */ 
+    /* consider aints or offsets for lens? Seems to be used as in-memory
+       buffer lengths, so it should be < 2G and ok as an int          */
     MPI_Aint *mem_ptrs;     /* array of pointers. used in the read/write
 			       phase to indicate where the data
 			       is stored in memory */
     int count;             /* size of above arrays */
 } ADIOI_Access;
 
+/* structure for storing generic offset/length pairs.  used to describe
+   file realms among other things */
+typedef struct {
+    ADIO_Offset *offsets; /* array of offsets */
+    int *lens;           /* array of lengths */
+    int count;            /* size of above arrays */
+} ADIOI_Offlen;
 
 /* prototypes for ADIO internal functions */
 
@@ -292,21 +322,29 @@
 void ADIOI_Delete_flattened(MPI_Datatype datatype);
 int ADIOI_Count_contiguous_blocks(MPI_Datatype type, int *curr_index);
 void ADIOI_Complete_async(int *error_code);
-void *ADIOI_Malloc_fn(size_t size, int lineno, char *fname);
-void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, char *fname);
-void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, char *fname);
-void ADIOI_Free_fn(void *ptr, int lineno, char *fname);
+void *ADIOI_Malloc_fn(size_t size, int lineno, const char *fname);
+void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, const char *fname);
+void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, const char *fname);
+void ADIOI_Free_fn(void *ptr, int lineno, const char *fname);
 void ADIOI_Datatype_iscontig(MPI_Datatype datatype, int *flag);
 void ADIOI_Get_position(ADIO_File fd, ADIO_Offset *offset);
 void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset);
 void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset,
 			   ADIO_Offset *disp);
 void ADIOI_process_system_hints(MPI_Info info);
+void ADIOI_incorporate_system_hints(MPI_Info info, MPI_Info sysinfo, 
+		MPI_Info *new_info);
 
 
 void ADIOI_GEN_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct,
 		     int *error_code);
 void ADIOI_GEN_Flush(ADIO_File fd, int *error_code);
+void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, 
+		int access_mode, int *error_code);
+void ADIOI_SCALEABLE_OpenColl(ADIO_File fd, int rank, 
+		int access_mode, int *error_code);
+void ADIOI_FAILSAFE_OpenColl(ADIO_File fd, int rank, 
+		int access_mode, int *error_code);
 void ADIOI_GEN_Delete(char *filename, int *error_code);
 void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count, 
 			  MPI_Datatype datatype, int file_ptr_type,
@@ -347,6 +385,8 @@
 		ADIO_Status *status);
 int ADIOI_GEN_aio_query_fn(void *extra_state, ADIO_Status *status);
 int ADIOI_GEN_aio_free_fn(void *extra_state);
+int ADIOI_GEN_Feature(ADIO_File fd, int feature);
+
 void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
                        MPI_Datatype buftype, int file_ptr_type,
                        ADIO_Offset offset, ADIO_Status *status, int
@@ -373,7 +413,7 @@
                        *error_code);
 void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
 			    datatype, int file_ptr_type, ADIO_Offset 
-			    offset, ADIO_Offset **offset_list_ptr, int
+			    offset, ADIO_Offset **offset_list_ptr, ADIO_Offset
 			    **len_list_ptr, ADIO_Offset *start_offset_ptr,
 			    ADIO_Offset *end_offset_ptr, int
 			   *contig_access_count_ptr);
@@ -381,7 +421,9 @@
 			     *end_offsets, int nprocs, int nprocs_for_coll,
 			     ADIO_Offset *min_st_offset_ptr,
 			     ADIO_Offset **fd_start_ptr, ADIO_Offset 
-			     **fd_end_ptr, ADIO_Offset *fd_size_ptr);
+			     **fd_end_ptr, int min_fd_size, 
+			     ADIO_Offset *fd_size_ptr,
+			     int striping_unit);
 int ADIOI_Calc_aggregator(ADIO_File fd,
                                  ADIO_Offset off,
                                  ADIO_Offset min_off,
@@ -390,7 +432,7 @@
                                  ADIO_Offset *fd_start,
                                  ADIO_Offset *fd_end);
 void ADIOI_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, 
-			    int *len_list, int
+			    ADIO_Offset *len_list, int
 			    contig_access_count, ADIO_Offset 
 			    min_st_offset, ADIO_Offset *fd_start,
 			    ADIO_Offset *fd_end, ADIO_Offset fd_size,
@@ -405,6 +447,107 @@
 				int nprocs, int myrank,
 				int *count_others_req_procs_ptr,
 				ADIOI_Access **others_req_ptr);  
+
+/* KC && AC - New Collective I/O internals*/
+
+#define TEMP_OFF 0
+#define REAL_OFF 1
+#define MAX_OFF_TYPE 2
+
+/* Communication Tags */
+#define DATA_TAG 30
+#define AMT_TAG 31
+
+/* cb_fr_type user size is non-zero */
+#define ADIOI_FR_AAR 0
+#define ADIOI_FR_FSZ -1
+#define ADIOI_FR_USR_REALMS -2
+
+typedef struct flatten_state
+{
+    ADIO_Offset abs_off;
+    ADIO_Offset cur_sz;
+    ADIO_Offset idx;
+    ADIO_Offset cur_reg_off;
+} flatten_state;
+
+typedef struct view_state
+{
+    ADIO_Offset fp_ind;    /* file view params*/
+    ADIO_Offset disp;      /* file view params*/
+    ADIO_Offset byte_off;
+    ADIO_Offset sz;
+    ADIO_Offset ext;       /* preserved extent from MPI_Type_extent */
+    ADIO_Offset type_sz;
+
+    /* Current state */
+    flatten_state cur_state;
+    /* Scratch state for counting up ol pairs */
+    flatten_state tmp_state;
+
+    /* Preprocessed data amount and ol pairs */
+    ADIO_Offset pre_sz;
+    int pre_ol_ct;
+    MPI_Aint *pre_disp_arr;
+    int *pre_blk_arr;
+    
+    ADIOI_Flatlist_node *flat_type_p;
+} view_state;
+
+void ADIOI_Calc_bounds (ADIO_File fd, int count, MPI_Datatype buftype,
+			int file_ptr_type, ADIO_Offset offset,
+			ADIO_Offset *st_offset, ADIO_Offset *end_offset);
+int ADIOI_Agg_idx (int rank, ADIO_File fd);
+void ADIOI_Calc_file_realms (ADIO_File fd, ADIO_Offset min_st_offset,
+			     ADIO_Offset max_end_offset);
+void ADIOI_IOFiletype(ADIO_File fd, void *buf, int count,
+		      MPI_Datatype datatype, int file_ptr_type,
+		      ADIO_Offset offset, MPI_Datatype custom_ftype,
+		      int rdwr, ADIO_Status *status, int
+		      *error_code);
+void ADIOI_IOStridedColl(ADIO_File fd, void *buf, int count, int rdwr,
+                       MPI_Datatype datatype, int file_ptr_type,
+                       ADIO_Offset offset, ADIO_Status *status, int
+                       *error_code);
+void ADIOI_Print_flatlist_node(ADIOI_Flatlist_node *flatlist_node_p);
+ADIOI_Flatlist_node * ADIOI_Add_contig_flattened(MPI_Datatype contig_type);
+void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type,
+			   ADIO_File fd, int count,
+			   MPI_Datatype datatype, ADIO_Offset off,
+			   view_state *my_mem_view_state_arr,
+			   view_state *agg_file_view_state_arr,
+			   view_state *client_file_view_state_arr);
+int ADIOI_init_view_state(int file_ptr_type,
+		    int nprocs, 
+		    view_state *view_state_arr,
+		    int op_type);
+int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs,
+			 view_state *client_file_view_state_arr,
+			 MPI_Datatype *client_comm_dtype_arr,
+			 ADIO_Offset *client_comm_sz_arr,
+			 ADIO_Offset *agg_dtype_offset_p,
+			 MPI_Datatype *agg_dtype_p);
+int ADIOI_Build_client_reqs(ADIO_File fd, 
+			    int nprocs,
+			    view_state *my_mem_view_state_arr,
+			    view_state *agg_file_view_state_arr,
+			    ADIO_Offset *agg_comm_sz_arr,
+			    MPI_Datatype *agg_comm_dtype_arr);
+int ADIOI_Build_client_pre_req(ADIO_File fd,
+                               int agg_rank,
+			       int agg_idx,
+                               view_state *my_mem_view_state_p,
+                               view_state *agg_file_view_state_p,
+                               ADIO_Offset max_pre_req_sz,
+                               int max_ol_ct);
+int ADIOI_Build_client_req(ADIO_File fd,
+			   int agg_rank,
+			   int agg_idx,
+			   view_state *my_mem_view_state_p,
+			   view_state *agg_file_view_state_p,
+			   ADIO_Offset agg_comm_sz,
+			   MPI_Datatype *agg_comm_dtype_p);
+
 ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset, 
 				     int whence, int *error_code);
 void ADIOI_GEN_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
@@ -585,6 +728,23 @@
 int ADIOI_Strnapp( char *, const char *, size_t );
 char *ADIOI_Strdup( const char * );
 
+/* the current MPI standard is not const-correct, and modern compilers warn
+ * about the following sort of code:
+ *
+ *   MPI_Info_set(info, "key", "val");
+ *
+ * reminding us that "key" and "val" are const.  We use the following macros to
+ * cast away the const and suppress the warning. */
+#define ADIOI_Info_set(info_,key_str_,val_) \
+    MPI_Info_set((info_),((char*)key_str_),(char*)(val_))
+#define ADIOI_Info_get(info_,key_str_,val_len_,val_,flag_) \
+    MPI_Info_get((info_),((char*)key_str_),(val_len_),(val_),(flag_))
+#define ADIOI_Info_get_valuelen(info_,key_str_,val_len_,flag_) \
+    MPI_Info_get_valuelen((info_),((char*)key_str_),(val_len_),(flag_))
+#define ADIOI_Info_delete(info_,key_str_) \
+    MPI_Info_delete((info_),((char*)key_str_))
+
+
 /* Provide a fallback snprintf for systems that do not have one */
 /* Define attribute as empty if it has no definition */
 #ifndef ATTRIBUTE
@@ -644,7 +804,55 @@
 int  ADIOI_MPE_unlock_b;
 int  ADIOI_MPE_postwrite_a;
 int  ADIOI_MPE_postwrite_b;
-#endif
+int  ADIOI_MPE_openinternal_a;
+int  ADIOI_MPE_openinternal_b;
+int  ADIOI_MPE_stat_a;
+int  ADIOI_MPE_stat_b;
+#endif
+
+#ifdef ROMIO_INSIDE_MPICH2
+/* Assert that this MPI_Aint value can be cast to a ptr value without problem.*/
+/* Basic idea is the value should be unchanged after casting 
+   (no loss of (meaningful) high order bytes in 8 byte MPI_Aint 
+      to (possible) 4 byte ptr cast)                              */
+/* Should work even on 64bit or old 32bit configs                 */
+  /* Use MPID_Ensure_Aint_fits_in_pointer from mpiutil.h and 
+         MPI_AINT_CAST_TO_VOID_PTR from configure (mpi.h) */
+  #include "mpiimpl.h"
+
+  #define ADIOI_AINT_CAST_TO_VOID_PTR (void*)(MPIR_Pint)
+  /* The next two casts are only used when you don't want sign extension
+     when casting a (possible 4 byte) aint to a (8 byte) long long or offset */
+  #define ADIOI_AINT_CAST_TO_LONG_LONG (long long)
+  #define ADIOI_AINT_CAST_TO_OFFSET ADIOI_AINT_CAST_TO_LONG_LONG
 
+  #define ADIOI_ENSURE_AINT_FITS_IN_PTR(aint_value) MPID_Ensure_Aint_fits_in_pointer(aint_value)
+  #define ADIOI_Assert MPIU_Assert
+#else
+  #include <assert.h>
+  #define ADIOI_AINT_CAST_TO_VOID_PTR (void*)
+  #define ADIOI_AINT_CAST_TO_LONG_LONG (long long)
+  #define ADIOI_AINT_CAST_TO_OFFSET ADIOI_AINT_CAST_TO_LONG_LONG
+  #define ADIOI_ENSURE_AINT_FITS_IN_PTR(aint_value) 
+  #define ADIOI_Assert assert
+  #define MPIR_Upint unsigned int
+  #define MPIU_THREADPRIV_DECL
+#endif
+
+#ifdef USE_DBG_LOGGING    /*todo fix dependency on mpich?*/
+/* DBGT_FPRINTF terse level printing */
+#define DBGT_FPRINTF if (MPIU_DBG_SELECTED(ROMIO,VERBOSE)) fprintf(stderr,"%s:%d:",__FILE__,__LINE__); \
+if (MPIU_DBG_SELECTED(ROMIO,TERSE)) fprintf
+/* DBG_FPRINTF default (typical level) printing */
+#define DBG_FPRINTF if (MPIU_DBG_SELECTED(ROMIO,VERBOSE)) fprintf(stderr,"%s:%d:",__FILE__,__LINE__); \
+if (MPIU_DBG_SELECTED(ROMIO,TYPICAL)) fprintf
+/* DBGV_FPRINTF verbose level printing */
+#define DBGV_FPRINTF if (MPIU_DBG_SELECTED(ROMIO,VERBOSE)) fprintf(stderr,"%s:%d:",__FILE__,__LINE__); \
+ if (MPIU_DBG_SELECTED(ROMIO,VERBOSE)) fprintf
+#else /* compile it out */
+#define DBGT_FPRINTF if (0) fprintf
+#define DBG_FPRINTF if (0) fprintf
+#define DBGV_FPRINTF if (0) fprintf
+#endif
 #endif
 
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include: heap-sort.h
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/include/Makefile.am NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/Makefile.am
--- ompi-trunk/ompi/mca/io/romio/romio/adio/include/Makefile.am	2010-11-16 09:16:06.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/Makefile.am	2010-11-15 15:03:31.000000000 +0100
@@ -27,6 +27,7 @@
         adioi_error.h \
         adioi_fs_proto.h \
         adioi.h \
+        heap-sort.h \
         mpio_error.h \
         mpipr.h \
         mpiu_greq.h \
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/include/mpio_error.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/mpio_error.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/include/mpio_error.h	2010-11-16 09:16:06.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/mpio_error.h	2010-11-15 15:02:47.000000000 +0100
@@ -1,6 +1,5 @@
 /* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*  $Id: mpio_error.h,v 1.6 2005/05/23 23:27:50 rross Exp $
- *
+/*
  *  (C) 2001 by Argonne National Laboratory.
  *      See COPYRIGHT in top-level directory.
  */
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/adio/include/mpipr.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/mpipr.h
--- ompi-trunk/ompi/mca/io/romio/romio/adio/include/mpipr.h	2010-11-16 09:16:06.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include/mpipr.h	2010-11-15 15:03:31.000000000 +0100
@@ -24,6 +24,8 @@
 #define MPI_Alltoall PMPI_Alltoall
 #undef MPI_Alltoallv
 #define MPI_Alltoallv PMPI_Alltoallv
+#undef MPI_Alltoallw
+#define MPI_Alltoallw PMPI_Alltoallw
 #undef MPI_Attr_delete
 #define MPI_Attr_delete PMPI_Attr_delete
 #undef MPI_Attr_get
@@ -150,26 +152,6 @@
 #define MPI_Group_union PMPI_Group_union
 #undef MPI_Ibsend
 #define MPI_Ibsend PMPI_Ibsend
-#if 0
-#undef MPI_Info_create
-#define MPI_Info_create PMPI_Info_create
-#undef MPI_Info_delete
-#define MPI_Info_delete PMPI_Info_delete
-#undef MPI_Info_dup
-#define MPI_Info_dup PMPI_Info_dup
-#undef MPI_Info_free
-#define MPI_Info_free PMPI_Info_free
-#undef MPI_Info_get
-#define MPI_Info_get PMPI_Info_get
-#undef MPI_Info_get_nkeys
-#define MPI_Info_get_nkeys PMPI_Info_get_nkeys
-#undef MPI_Info_get_nthkey
-#define MPI_Info_get_nthkey PMPI_Info_get_nthkey
-#undef MPI_Info_get_valuelen
-#define MPI_Info_get_valuelen PMPI_Info_get_valuelen
-#undef MPI_Info_set
-#define MPI_Info_set PMPI_Info_set
-#endif /* only conditionally set the info */
 #undef MPI_Init
 #define MPI_Init PMPI_Init
 #undef MPI_Initialized
@@ -306,13 +288,10 @@
 #define MPI_Waitany PMPI_Waitany
 #undef MPI_Waitsome
 #define MPI_Waitsome PMPI_Waitsome
-/* Open MPI: these functions are not supposed to be profiled */
-#if 0
 #undef MPI_Wtick
 #define MPI_Wtick PMPI_Wtick
 #undef MPI_Wtime
 #define MPI_Wtime PMPI_Wtime
-#endif
 
 /* commented out because these could be macros themselves, as in MPICH 
 #undef MPI_Type_c2f
@@ -392,4 +371,13 @@
 #define MPI_File_f2c PMPI_File_f2c
 #endif
 
+#undef MPI_Type_get_attr
+#define MPI_Type_get_attr PMPI_Type_get_attr
+#undef MPI_Type_set_attr
+#define MPI_Type_set_attr PMPI_Type_set_attr
+#undef MPI_Comm_set_attr
+#define MPI_Comm_set_attr PMPI_Comm_set_attr
+#undef MPI_Type_create_keyval
+#define MPI_Type_create_keyval PMPI_Type_create_keyval
+
 #endif
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/adio/include: romioconf.h.in
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/darray_support.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/darray_support.c
--- ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/darray_support.c	2010-11-16 09:15:32.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/darray_support.c	2010-11-15 15:02:47.000000000 +0100
@@ -34,7 +34,7 @@
     int procs, tmp_rank, i, tmp_size, blklens[3], *coords;
     MPI_Aint *st_offsets, orig_extent, disps[3];
 
-    PMPI_Type_extent(oldtype, &orig_extent);
+    MPI_Type_extent(oldtype, &orig_extent);
 
 /* calculate position in Cartesian grid as MPI would (row-major
    ordering) */
@@ -78,7 +78,7 @@
 				 st_offsets+i); 
 		break;
 	    }
-	    if (i) PMPI_Type_free(&type_old);
+	    if (i) MPI_Type_free(&type_old);
 	    type_old = type_new;
 	}
 
@@ -116,7 +116,7 @@
                            type_old, &type_new, st_offsets+i); 
 		break;
 	    }
-	    if (i != ndims-1) PMPI_Type_free(&type_old);
+	    if (i != ndims-1) MPI_Type_free(&type_old);
 	    type_old = type_new;
 	}
 
@@ -140,9 +140,9 @@
     types[1] = type_new;
     types[2] = MPI_UB;
     
-    PMPI_Type_struct(3, blklens, disps, types, newtype);
+    MPI_Type_struct(3, blklens, disps, types, newtype);
 
-    PMPI_Type_free(&type_new);
+    MPI_Type_free(&type_new);
     DLOOP_Free(st_offsets);
     DLOOP_Free(coords);
     return MPI_SUCCESS;
@@ -187,18 +187,18 @@
     stride = orig_extent;
     if (order == MPI_ORDER_FORTRAN) {
 	if (dim == 0) 
-	    PMPI_Type_contiguous(mysize, type_old, type_new);
+	    MPI_Type_contiguous(mysize, type_old, type_new);
 	else {
 	    for (i=0; i<dim; i++) stride *= array_of_gsizes[i];
-	    PMPI_Type_hvector(mysize, 1, stride, type_old, type_new);
+	    MPI_Type_hvector(mysize, 1, stride, type_old, type_new);
 	}
     }
     else {
 	if (dim == ndims-1) 
-	    PMPI_Type_contiguous(mysize, type_old, type_new);
+	    MPI_Type_contiguous(mysize, type_old, type_new);
 	else {
 	    for (i=ndims-1; i>dim; i--) stride *= array_of_gsizes[i];
-	    PMPI_Type_hvector(mysize, 1, stride, type_old, type_new);
+	    MPI_Type_hvector(mysize, 1, stride, type_old, type_new);
 	}
 
     }
@@ -252,7 +252,7 @@
 	for (i=0; i<dim; i++) stride *= array_of_gsizes[i];
     else for (i=ndims-1; i>dim; i--) stride *= array_of_gsizes[i];
 
-    PMPI_Type_hvector(count, blksize, stride, type_old, type_new);
+    MPI_Type_hvector(count, blksize, stride, type_old, type_new);
 
     if (rem) {
 	/* if the last block is of size less than blksize, include
@@ -265,9 +265,9 @@
 	blklens[0] = 1;
 	blklens[1] = rem;
 
-	PMPI_Type_struct(2, blklens, disps, types, &type_tmp);
+	MPI_Type_struct(2, blklens, disps, types, &type_tmp);
 
-	PMPI_Type_free(type_new);
+	MPI_Type_free(type_new);
 	*type_new = type_tmp;
     }
 
@@ -282,8 +282,8 @@
         types[2] = MPI_UB;
         disps[2] = orig_extent * array_of_gsizes[dim];
         blklens[0] = blklens[1] = blklens[2] = 1;
-        PMPI_Type_struct(3, blklens, disps, types, &type_tmp);
-        PMPI_Type_free(type_new);
+        MPI_Type_struct(3, blklens, disps, types, &type_tmp);
+        MPI_Type_free(type_new);
         *type_new = type_tmp;
 
         *st_offset = 0;  /* set it to 0 because it is taken care of in
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.c
--- ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.c	2010-11-16 09:15:32.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.c	2010-11-15 15:02:47.000000000 +0100
@@ -38,7 +38,7 @@
     MPI_Aint stride;
     MPI_Aint *disps;
 
-    PMPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner);
+    MPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner);
 
     /* some named types do need dataloops; handle separately. */
     if (combiner == MPI_COMBINER_NAMED) {
@@ -93,7 +93,7 @@
      * note: in the struct case below we'll handle any additional
      *       types "below" the current one.
      */
-    PMPI_Type_get_envelope(types[0], &dummy1, &dummy2, &dummy3,
+    MPI_Type_get_envelope(types[0], &dummy1, &dummy2, &dummy3,
 			   &type0_combiner);
     if (type0_combiner != MPI_COMBINER_NAMED)
     {
@@ -228,7 +228,7 @@
 	case MPI_COMBINER_STRUCT:
 	    for (i = 1; i < ints[0]; i++) {
 		int type_combiner;
-		PMPI_Type_get_envelope(types[i], &dummy1, &dummy2, &dummy3,
+		MPI_Type_get_envelope(types[i], &dummy1, &dummy2, &dummy3,
 				       &type_combiner);
 
 		if (type_combiner != MPI_COMBINER_NAMED) {
@@ -288,7 +288,7 @@
 					    dldepth_p,
 					    flag);
 	    
-	    PMPI_Type_free(&tmptype);
+	    MPI_Type_free(&tmptype);
 	    break;
 	case MPI_COMBINER_DARRAY:
 	    ndims = ints[2];
@@ -309,7 +309,7 @@
 					    dldepth_p,
 					    flag);
 
-	    PMPI_Type_free(&tmptype);
+	    MPI_Type_free(&tmptype);
 	    break;
 	case MPI_COMBINER_F90_REAL:
 	case MPI_COMBINER_F90_COMPLEX:
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.h
--- ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.h	2010-11-16 09:15:32.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.h	2010-11-15 15:03:31.000000000 +0100
@@ -80,16 +80,4 @@
 					MPI_Datatype oldtype, 
 					MPI_Datatype *newtype);
 
-#if 0
-/* Helper functions for accessing datatype contents */
-void PREPEND_PREFIX(Type_access_contents)(MPI_Datatype type,
-					  int **ints_p,
-					  MPI_Aint **aints_p,
-					  MPI_Datatype **types_p);
-void PREPEND_PREFIX(Type_release_contents)(MPI_Datatype type,
-					   int **ints_p,
-					   MPI_Aint **aints_p,
-					   MPI_Datatype **types_p);
-#endif
-
 #endif
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/dataloop_parts.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/dataloop_parts.h
--- ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/dataloop_parts.h	2010-11-16 09:15:32.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/dataloop_parts.h	2010-11-15 15:03:31.000000000 +0100
@@ -204,9 +204,6 @@
   this union, 'count', allows quick access to the shared 'count' field in the
   five dataloop structure.
 . extent - The extent of the dataloop
-#if 0
-- handle     - handle for the corresponding 'MPI_Datatype'.
-#endif
 
   Module:
   Datatype
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.c
--- ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.c	2010-11-16 09:15:32.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.c	2010-11-15 15:02:47.000000000 +0100
@@ -80,7 +80,7 @@
 	MPIO_Datatype_initialize();
     }
 
-    mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+    mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
 
     if (!attrflag) {
@@ -107,7 +107,7 @@
 	MPIO_Datatype_initialize();
     }
 
-    mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+    mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
     
     if (!attrflag) {
@@ -131,7 +131,7 @@
 	MPIO_Datatype_initialize();
     }
 
-    mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+    mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
     
     if (!attrflag) {
@@ -163,7 +163,7 @@
     int mpi_errno, attrflag;
     int nr_ints, nr_aints, nr_types, combiner;
 
-    mpi_errno = PMPI_Type_get_envelope(type, &nr_ints, &nr_aints,
+    mpi_errno = MPI_Type_get_envelope(type, &nr_ints, &nr_aints,
 				       &nr_types, &combiner);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
 
@@ -183,7 +183,7 @@
 	MPIO_Segment  *segp;
 	MPI_Offset     bytes;
 
-	mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp,
+	mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp,
 				      &attrflag);
 	DLOOP_Assert(mpi_errno == MPI_SUCCESS);
 	if (!attrflag) {
@@ -231,7 +231,7 @@
     int mpi_errno;
     int nr_ints, nr_aints, nr_types, combiner;
 
-    mpi_errno = PMPI_Type_get_envelope(type, &nr_ints, &nr_aints,
+    mpi_errno = MPI_Type_get_envelope(type, &nr_ints, &nr_aints,
 				       &nr_types, &combiner);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
 
@@ -273,7 +273,7 @@
 	MPIO_Datatype_initialize();
     }
 
-    mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+    mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
 
     if (!(dtp->valid & MPIO_DATATYPE_VALID_DLOOP_PTR))
@@ -293,7 +293,7 @@
 	MPIO_Datatype_initialize();
     }
 
-    mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+    mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
 
     if (!(dtp->valid & MPIO_DATATYPE_VALID_DLOOP_SIZE))
@@ -313,7 +313,7 @@
 	MPIO_Datatype_initialize();
     }
 
-    mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+    mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
 
     if (!(dtp->valid & MPIO_DATATYPE_VALID_DLOOP_DEPTH))
@@ -333,7 +333,7 @@
 	MPIO_Datatype_initialize();
     }
 
-    mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+    mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
     if (!attrflag) {
 	dtp = MPIO_Datatype_allocate(type);
@@ -355,7 +355,7 @@
 	MPIO_Datatype_initialize();
     }
 
-    mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+    mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
     if (!attrflag) {
 	dtp = MPIO_Datatype_allocate(type);
@@ -375,7 +375,7 @@
 	MPIO_Datatype_initialize();
     }
 
-    mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+    mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
     if (!attrflag) {
 	dtp = MPIO_Datatype_allocate(type);
@@ -390,7 +390,7 @@
 {
     int nr_ints, nr_aints, nr_types, combiner;
 
-    PMPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner);
+    MPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner);
     if (combiner != MPI_COMBINER_NAMED ||
 	type == MPI_FLOAT_INT ||
 	type == MPI_DOUBLE_INT ||
@@ -409,20 +409,20 @@
     DLOOP_Assert(MPIO_Datatype_keyval == MPI_KEYVAL_INVALID);
 
     /* create keyval for dataloop storage */
-    mpi_errno = PMPI_Type_create_keyval(MPIO_Datatype_copy_attr_function,
+    mpi_errno = MPI_Type_create_keyval(MPIO_Datatype_copy_attr_function,
 				       MPIO_Datatype_delete_attr_function,
 				       &MPIO_Datatype_keyval,
 				       NULL);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
 
     /* create keyval to hook to COMM_WORLD for finalize */
-    mpi_errno = PMPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,
+    mpi_errno = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,
 					MPIO_Datatype_finalize,
 					&MPIO_Datatype_finalize_keyval,
 					NULL);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
 
-    mpi_errno = PMPI_Comm_set_attr(MPI_COMM_WORLD,
+    mpi_errno = MPI_Comm_set_attr(MPI_COMM_WORLD,
 				   MPIO_Datatype_finalize_keyval,
 				   NULL);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
@@ -444,10 +444,10 @@
     DLOOP_Assert(MPIO_Datatype_keyval != MPI_KEYVAL_INVALID);
 
     /* remove keyvals */
-    mpi_errno = PMPI_Type_free_keyval(&MPIO_Datatype_keyval);
+    mpi_errno = MPI_Type_free_keyval(&MPIO_Datatype_keyval);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
 
-    mpi_errno = PMPI_Type_free_keyval(&MPIO_Datatype_finalize_keyval);
+    mpi_errno = MPI_Type_free_keyval(&MPIO_Datatype_finalize_keyval);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
 
     printf("freed keyvals\n");
@@ -468,7 +468,7 @@
     dtp->dloop_size  = -1;
     dtp->dloop_depth = -1;
     
-    mpi_errno = PMPI_Type_set_attr(type, MPIO_Datatype_keyval, dtp);
+    mpi_errno = MPI_Type_set_attr(type, MPIO_Datatype_keyval, dtp);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
 
     printf("allocated attr struct\n");
@@ -496,13 +496,13 @@
 	int size;
 	MPI_Aint lb, extent, true_lb, true_extent;
 	
-	mpi_errno = PMPI_Type_size(type, &size);
+	mpi_errno = MPI_Type_size(type, &size);
 	DLOOP_Assert(mpi_errno == MPI_SUCCESS);
 	
-	mpi_errno = PMPI_Type_get_extent(type, &lb, &extent);
+	mpi_errno = MPI_Type_get_extent(type, &lb, &extent);
 	DLOOP_Assert(mpi_errno == MPI_SUCCESS);
 	
-	mpi_errno = PMPI_Type_get_true_extent(type, &true_lb, &true_extent); 
+	mpi_errno = MPI_Type_get_true_extent(type, &true_lb, &true_extent); 
 
 	dtp->size        = (MPI_Offset) size;
 	dtp->extent      = (MPI_Offset) extent;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.h
--- ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.h	2010-11-16 09:15:32.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.h	2010-11-15 15:02:47.000000000 +0100
@@ -12,8 +12,12 @@
 #include <stdlib.h>
 
 /* romioconf.h must be included *before* mpi.h to avoid some redeclarations */
+#ifdef HAVE_MPITYPEDEFS_H
 #include "mpitypedefs.h"
+#endif
+#ifdef HAVE_MPICHCONF_H
 #include "mpichconf.h"
+#endif
 #include "romioconf.h"
 
 #include <mpi.h>
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/segment_ops.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/segment_ops.c
--- ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/segment_ops.c	2010-11-16 09:15:32.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/segment_ops.c	2010-11-15 15:03:31.000000000 +0100
@@ -548,16 +548,7 @@
      *       DLOOP_Count i and DLOOP_Offset size would need to be
      *       declared above.
      */
-#if 0
-    last_loc = rel_off * offsetarray[0] + blockarray[0] * el_size;
-    for (i=1; i < count; i++) {
-	if (last_loc == rel_off + offsetarray[i]) new_blk_count--;
-
-	last_loc = rel_off + offsetarray[i] + blockarray[i] * el_size;
-    }
-#else
     last_loc = rel_off + offsetarray[count-1] + blockarray[count-1] * el_size;
-#endif
 
     paramp->last_loc = last_loc;
     paramp->count += new_blk_count;
@@ -690,19 +681,6 @@
     DLOOP_Handle_get_size_macro(el_type, el_size);
     blocks_left = *blocks_p;
 
-#if 0
-    MPIU_DBG_MSG_FMT(DATATYPE,VERBOSE,(MPIU_DBG_FDEST,
-	     "\t[vector to vec: do=%d, dp=%x, len=%d, ind=%d, ct=%d, blksz=%d, str=%d, blks=%d]\n",
-		    (unsigned) rel_off,
-		    (unsigned) (MPI_Aint)bufp,
-		    paramp->u.pack_vector.length,
-		    paramp->u.pack_vector.index,
-		    count,
-		    blksz,
-		    stride,
-		    (int) *blocks_p));
-#endif
-
     for (i=0; i < count && blocks_left > 0; i++) {
 	int last_idx;
 	char *last_end = NULL;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/subarray_support.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/subarray_support.c
--- ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/subarray_support.c	2010-11-16 09:15:32.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/subarray_support.c	2010-11-15 15:02:47.000000000 +0100
@@ -21,23 +21,23 @@
     int i, blklens[3];
     MPI_Datatype tmp1, tmp2, types[3];
 
-    PMPI_Type_extent(oldtype, &extent);
+    MPI_Type_extent(oldtype, &extent);
 
     if (order == MPI_ORDER_FORTRAN) {
 	/* dimension 0 changes fastest */
 	if (ndims == 1) {
-	    PMPI_Type_contiguous(array_of_subsizes[0], oldtype, &tmp1);
+	    MPI_Type_contiguous(array_of_subsizes[0], oldtype, &tmp1);
 	}
 	else {
-	    PMPI_Type_vector(array_of_subsizes[1],
+	    MPI_Type_vector(array_of_subsizes[1],
 			     array_of_subsizes[0],
 			     array_of_sizes[0], oldtype, &tmp1);
 	    
 	    size = array_of_sizes[0]*extent;
 	    for (i=2; i<ndims; i++) {
 		size *= array_of_sizes[i-1];
-		PMPI_Type_hvector(array_of_subsizes[i], 1, size, tmp1, &tmp2);
-		PMPI_Type_free(&tmp1);
+		MPI_Type_hvector(array_of_subsizes[i], 1, size, tmp1, &tmp2);
+		MPI_Type_free(&tmp1);
 		tmp1 = tmp2;
 	    }
 	}
@@ -55,18 +55,18 @@
     else /* order == MPI_ORDER_C */ {
 	/* dimension ndims-1 changes fastest */
 	if (ndims == 1) {
-	    PMPI_Type_contiguous(array_of_subsizes[0], oldtype, &tmp1);
+	    MPI_Type_contiguous(array_of_subsizes[0], oldtype, &tmp1);
 	}
 	else {
-	    PMPI_Type_vector(array_of_subsizes[ndims-2],
+	    MPI_Type_vector(array_of_subsizes[ndims-2],
 			     array_of_subsizes[ndims-1],
 			     array_of_sizes[ndims-1], oldtype, &tmp1);
 	    
 	    size = array_of_sizes[ndims-1]*extent;
 	    for (i=ndims-3; i>=0; i--) {
 		size *= array_of_sizes[i+1];
-		PMPI_Type_hvector(array_of_subsizes[i], 1, size, tmp1, &tmp2);
-		PMPI_Type_free(&tmp1);
+		MPI_Type_hvector(array_of_subsizes[i], 1, size, tmp1, &tmp2);
+		MPI_Type_free(&tmp1);
 		tmp1 = tmp2;
 	    }
 	}
@@ -91,9 +91,9 @@
     types[1] = tmp1;
     types[2] = MPI_UB;
     
-    PMPI_Type_struct(3, blklens, disps, types, newtype);
+    MPI_Type_struct(3, blklens, disps, types, newtype);
 
-    PMPI_Type_free(&tmp1);
+    MPI_Type_free(&tmp1);
 
     return MPI_SUCCESS;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/typesize_support.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/typesize_support.c
--- ompi-trunk/ompi/mca/io/romio/romio/common/dataloop/typesize_support.c	2010-11-16 09:15:32.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/common/dataloop/typesize_support.c	2010-11-15 15:03:31.000000000 +0100
@@ -162,7 +162,7 @@
     int ndims;
     MPI_Datatype tmptype;
 
-    mpi_errno = PMPI_Type_get_envelope(type, &nr_ints, &nr_aints,
+    mpi_errno = MPI_Type_get_envelope(type, &nr_ints, &nr_aints,
 				       &nr_types, &combiner);
     DLOOP_Assert(mpi_errno == MPI_SUCCESS);
 
@@ -170,8 +170,8 @@
 	int mpisize;
 	MPI_Aint mpiextent;
 
-	PMPI_Type_size(type, &mpisize);
-	PMPI_Type_extent(type, &mpiextent);
+	MPI_Type_size(type, &mpisize);
+	MPI_Type_extent(type, &mpiextent);
 	tfp->size    = (DLOOP_Offset) mpisize;
 	tfp->lb      = 0;
 	tfp->ub      = (DLOOP_Offset) mpiextent;
@@ -369,7 +369,7 @@
 						  types[0],
 						  &tmptype);
 	    PREPEND_PREFIX(Type_calc_footprint)(tmptype, tfp);
-	    PMPI_Type_free(&tmptype);
+	    MPI_Type_free(&tmptype);
 	    break;
 	case MPI_COMBINER_DARRAY:
 	    ndims = ints[2];
@@ -386,7 +386,7 @@
 						&tmptype);
 
 	    PREPEND_PREFIX(Type_calc_footprint)(tmptype, tfp);
-	    PMPI_Type_free(&tmptype);
+	    MPI_Type_free(&tmptype);
 	    break;
 	case MPI_COMBINER_F90_REAL:
 	case MPI_COMBINER_F90_COMPLEX:
@@ -437,7 +437,7 @@
 	/* skip zero blocklength elements */
 	if (ints[i+1] == 0) continue;
 
-	PMPI_Type_get_envelope(types[i], &nr_ints, &nr_aints, &nr_types,
+	MPI_Type_get_envelope(types[i], &nr_ints, &nr_aints, &nr_types,
 			       &combiner);
 
 	/* opt: could just inline assignments for combiner == NAMED case */
@@ -530,10 +530,6 @@
 	}
     }
 
-#if 0
-    printf("size = %d, extent = %d\n", (int) tmp_size, (int) tmp_extent);
-#endif
-
     tfp->size    = tmp_size;
     tfp->lb      = min_lb;
     tfp->ub      = max_ub;
@@ -576,7 +572,7 @@
     if (type == MPI_LB || type == MPI_UB)
 	return 0;
 
-    PMPI_Type_size(type, &alignsize);
+    MPI_Type_size(type, &alignsize);
 
     switch(type)
     {
@@ -882,46 +878,3 @@
     if (padding_varies_by_pos) return 1;
     else                       return 0;
 }
-
-#if 0
-/* from MPICH2 PAC_C_DOUBLE_ALIGNMENT_EXCEPTION test:
- *
- * Other tests assume that there is potentially a maximum alignment
- * and that if there is no maximum alignment, or a type is smaller than
- * that value, then we align on the size of the value, with the exception
- * of the "position-based alignment" rules we test for separately.
- * 
- * It turns out that these assumptions have fallen short in at least one
- * case, on MacBook Pros, where doubles are aligned on 4-byte boundaries
- * even when long doubles are aligned on 16-byte boundaries. So this test
- * is here specifically to handle this case.
- * 
- * Return value is 4 or 0.
-*/
-static int double_align_exception()
-{
-    struct { char a; double b; } char_double;
-    struct { double b; char a; } double_char;
-    int extent1, extent2, align_4 = 0;
-
-    extent1 = sizeof(char_double);
-    extent2 = sizeof(double_char);
-
-    /* we're interested in the largest value, will let separate test
-     * deal with position-based issues.
-     */
-    if (extent1 < extent2) extent1 = extent2;
-    if ((sizeof(double) == 8) && (extent1 % 8) != 0) {
-       if (extent1 % 4 == 0) {
-#ifdef HAVE_MAX_FP_ALIGNMENT
-          if (HAVE_MAX_FP_ALIGNMENT >= 8) align_4 = 1;
-#else
-          align_4 = 1;
-#endif
-       }
-    }
-
-    if (align_4) return 4;
-    else         return 0;
-}
-#endif
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_am.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_atomic.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_attr_alias.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_bugfix.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_cache.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_cc.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_cxx.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_f77.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_f77new.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_fc.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_libs.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_make.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_mpi.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_romio.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_runlog.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_shl.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_subcfg.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: aclocal_util.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: ax_prefix_config_h.m4
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/confdb: ax_tls.m4
Only in ompi-trunk/ompi/mca/io/romio/romio/confdb: .placeholder
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/.config_params NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/.config_params
--- ompi-trunk/ompi/mca/io/romio/romio/.config_params	2010-11-16 09:16:36.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/.config_params	2010-11-15 15:11:10.000000000 +0100
@@ -35,4 +35,5 @@
 __hp_mpi
 __cray_mpi
 __lam_mpi
+__Darwin
 __open_mpi
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/configure.in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/configure.in
--- ompi-trunk/ompi/mca/io/romio/romio/configure.in	2010-11-16 09:16:36.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/configure.in	2010-11-19 10:32:06.000000000 +0100
@@ -3,7 +3,6 @@
 # autoconf --localdir=../confdb configure.in
 # (or wherever the confdb is)
 #
-
 # Open MPI: Modifications to this file were done on an "let's do the
 # minimum possible" basis, not so that we can skip on the work or
 # provide any less functionality, but more from a perspective that we
@@ -33,7 +32,6 @@
 fi
 
 AC_CONFIG_HEADER(adio/include/romioconf.h)
-
 # Open MPI: added AH_TOP
 AH_TOP([#include "romioconf-undefs.h"])
 
@@ -106,7 +104,6 @@
 MPI_FARRAY7=""
 DEFS=""
 ROMIO_LFLAGS=""
-ROMIO_LIBLIST=""
 ROMIO_TCFLAGS=""
 ROMIO_TCPPFLAGS=""
 ROMIO_TFFLAGS=""
@@ -143,7 +140,7 @@
 #
 have_aio=no
 #
-known_filesystems="nfs ufs pfs pvfs pvfs2 testfs xfs panfs gridftp lustre bgl bglockless"
+known_filesystems="nfs ufs pfs pvfs pvfs2 testfs xfs panfs gridftp lustre bgl bglockless zoidfs"
 # Open MPI: added "open_mpi_mpi"
 known_mpi_impls="mpich2_mpi mpich_mpi sgi_mpi hp_mpi cray_mpi lam_mpi open_mpi_mpi"
 #
@@ -202,6 +199,9 @@
 dnl scripts.
 AC_CONFIG_AUX_DIR(confdb)
 
+# Check if Make is working
+PAC_PROG_MAKE
+
 # Open MPI: Init automake
 AM_INIT_AUTOMAKE(io-romio, 1.0.0, 'no')
 
@@ -479,13 +479,13 @@
     ;;
 esac
 
-AC_PROG_CC
+PAC_PROG_CC
 
 if test "$NOF77" != 1 ; then
     # Grrr.   The autoconf test for F77 will abort the configure
     # if no compiler is found.  We'd prefer to simply turn off
     # support for Fortran, and/or give a more informative message.
-    AC_PROG_F77
+    PAC_PROG_F77
 fi
 if test "$CC" = "gcc" -a -z "$C_DEBUG_FLAG" ; then
      C_DEBUG_FLAG="-g -O -Wall -Wstrict-prototypes -Wmissing-prototypes"
@@ -523,7 +523,7 @@
     fi
     AC_DEFINE(AIX,1,[Define for AIX])
 # assume long long exists.
-    longlongsize=${longlongsize:-8} 
+    ac_cv_sizeof_long_long=${ac_cv_sizeof_long_long:-8} 
     MPI_OFFSET_KIND1="      INTEGER MPI_OFFSET_KIND"
     MPI_OFFSET_KIND2="      PARAMETER (MPI_OFFSET_KIND=8)"
     MPI_OFFSET_KIND_VAL=8
@@ -547,7 +547,7 @@
 #
 if test -n "$arch_freebsd" || test -n "$arch_LINUX" || test -n "$arch_LINUX_ALPHA" || test -n "$arch_netbsd" || test -n "$arch_openbsd" ; then
     if test -n "$arch_freebsd" || test -n "$arch_netbsd" || test -n "$arch_openbsd"; then
-        longlongsize=${longlongsize:-0}
+        ac_cv_sizeof_long_long=${ac_cv_sizeof_long_long:-0}
 	# printf doesn't work properly and no integer*8 as far as I can tell
     fi
     # Find the CPP before the header check
@@ -634,7 +634,7 @@
     MPI_OFFSET_KIND1="      INTEGER MPI_OFFSET_KIND"
     MPI_OFFSET_KIND2="      PARAMETER (MPI_OFFSET_KIND=8)"
     MPI_OFFSET_KIND_VAL=8
-    ROMIO_LIBLIST="$ROMIO_LIBLIST -laio"
+    LIBS="$LIBS -laio"
 fi    
 #
 if test -n "$arch_CRAY" ; then
@@ -719,7 +719,7 @@
                 mpi_mpich=1
             fi            
 	fi
-        #OMPI: Bad for automake: RANLIB=":"
+         #OMPI: Bad for automake: RANLIB=":"
         AC_DEFINE(AIO_SIGNOTIFY_NONE,1,[Define if no signotify])
 	if test $cputype -ge 5000 ; then
             MIPS=4
@@ -783,12 +783,29 @@
 # Header files
 # Find the CPP before the header check
 AC_PROG_CPP
-AC_CHECK_HEADERS(unistd.h fcntl.h malloc.h stddef.h)
+AC_CHECK_HEADERS([unistd.h fcntl.h malloc.h stddef.h sys/types.h])
 #
-CROSS_SIZEOF_INT=${CROSS_SIZEOF_INT:-0}
-CROSS_SIZEOF_VOID_P=${CROSS_SIZEOF_VOID_P:-0}
-AC_CHECK_SIZEOF(int,$CROSS_SIZEOF_INT)
-AC_CHECK_SIZEOF(void *,$CROSS_SIZEOF_VOID_P)
+
+# When compiling ROMIO on Darwin with _POSIX_C_SOURCE defined (such as when
+# using --enable-strict in MPICH2), sys/types.h does not define u_short and
+# friends unless _DARWIN_C_SOURCE is also defined (see compat(5) on a Darwin
+# box).  This would normally be fine, except sys/stat.h defines struct stat to
+# use u_long, so strict compiles fail.  One option is to also compile with
+# _DARWIN_C_SOURCE, but this disables much of the strictness that is intended
+# by _POSIX_C_SOURCE.  Instead we just define our own types if they are not
+# provided by the system.  This isn't quite as safe as typedef'ing the
+# replacement types, but it will apply to later configure tests, which is
+# important.
+AC_CHECK_TYPE([u_char],[],[AC_DEFINE_UNQUOTED([u_char],[unsigned char],[Define to "unsigned char" if sys/types.h does not define.])])
+AC_CHECK_TYPE([u_short],[],[AC_DEFINE_UNQUOTED([u_short],[unsigned short],[Define to "unsigned short" if sys/types.h does not define.])])
+AC_CHECK_TYPE([u_int],[],[AC_DEFINE_UNQUOTED([u_int],[unsigned int],[Define to "unsigned int" if sys/types.h does not define.])])
+AC_CHECK_TYPE([u_long],[],[AC_DEFINE_UNQUOTED([u_long],[unsigned long],[Define to "unsigned long" if sys/types.h does not define.])])
+
+# must come _after_ the above checks for u_char/u_short/u_int/u_long
+AC_CHECK_HEADERS([sys/attr.h])
+
+AC_CHECK_SIZEOF(int)
+AC_CHECK_SIZEOF(void *)
 AC_CACHE_CHECK([for int large enough for pointers],
 pac_cv_int_hold_pointer,[
 if test "$ac_cv_sizeof_int" = "0" -o \
@@ -805,14 +822,10 @@
     dnl Switch to a conforming name (start with HAVE or USE)
     AC_DEFINE(HAVE_INT_LT_POINTER,1,[Define if int smaller than pointer])
 fi
-#
-dnl The original ROMIO configure used a set of complex tests here; this
-dnl is a partial reworking using the autoconf2 sizeof macros, and allowing
-dnl for the standardized CROSS_xxx varaibles for cross-compilation environments
+
 # LL is the printf-style format name for output of a MPI_Offset.
 # We have to match this to the type that we use for MPI_Offset.
-CROSS_SIZEOF_LONG_LONG=${CROSS_SIZEOF_LONG_LONG:-0}
-AC_CHECK_SIZEOF(long long,$CROSS_SIZEOF_LONG_LONG)
+AC_CHECK_SIZEOF(long long)
 if test "$ac_cv_sizeof_long_long" != 0 ; then
     if test "$ac_cv_sizeof_long_long" = "8" ; then
        AC_DEFINE(HAVE_LONG_LONG_64,1,[Define if long long is 64 bits])
@@ -849,7 +862,7 @@
 
 
 #
-if test -n "$longlongsize"; then
+if test -n "$ac_cv_sizeof_long_long"; then
    if test $WITHIN_KNOWN_MPI_IMPL = no ; then
        PAC_MPI_LONG_LONG_INT
    else
@@ -1132,7 +1145,6 @@
 		# --with-file-system (or both)
 		CFLAGS="$CFLAGS $( $PVFS2_CONFIG --cflags)"
 		LIBS="$LIBS $( $PVFS2_CONFIG --libs)"
-		ROMIO_LIBLIST="$ROMIO_LIBLIST $LIBS"
 		FILE_SYSTEM="pvfs2 $FILE_SYSTEM"
 		file_system_pvfs2=1
 	fi
@@ -1185,8 +1197,20 @@
 if test -n "$file_system_testfs"; then
     AC_DEFINE(ROMIO_TESTFS,1,[Define for ROMIO with TESTFS])
 fi
+#
+# Verify presence of lustre/lustre_user.h
+#
 if test -n "$file_system_lustre"; then
-    AC_DEFINE(ROMIO_LUSTRE,1,[Define for ROMIO with LUSTRE])
+    AC_CHECK_HEADERS(lustre/lustre_user.h,
+        AC_DEFINE(ROMIO_LUSTRE,1,[Define for ROMIO with LUSTRE]),
+        AC_MSG_ERROR([LUSTRE support requested but cannot find lustre/lustre_user.h header file]),
+        [
+        #include <unistd.h>
+        #ifdef __linux__
+        #include <linux/types.h>
+        #endif
+        ]
+    )
 fi
 
 if test -n "$file_system_xfs"; then
@@ -1271,6 +1295,13 @@
 
 
 
+if test -n "$file_system_zoidfs"; then
+	AC_CHECK_HEADERS(zoidfs.h,
+		AC_DEFINE(ROMIO_ZOIDFS,1,[Define for ROMIO with ZoidFD]),
+		AC_MSG_ERROR([ZoidFS support requested but cannot find zoidfs.h header file])
+	)
+fi
+
 #
 # Verify presence of pvfs2.h
 #
@@ -1285,8 +1316,8 @@
 
 # layout change after pvfs-2.6.3:
 if test -n "$file_system_pvfs2"; then
-    AC_COMPILE_IFELSE(
-       [  
+    AC_COMPILE_IFELSE([
+        AC_LANG_SOURCE([
 #include <stdlib.h>
 #include "pvfs2.h"
           int main(int argc, char **argv) { 
@@ -1294,7 +1325,7 @@
 	      PVFS_sys_attr attr;
 	      PVFS_sys_create(NULL, ref, attr, NULL, NULL, NULL, NULL); 
 	  return 0; }
-       ],
+       ])],
        , AC_DEFINE(HAVE_PVFS2_CREATE_WITHOUT_LAYOUT, 1, 
        		[Define if PVFS_sys_create does not have layout parameter])
        )
@@ -1309,15 +1340,10 @@
     SYSDEP_INC=-I${prefix}/include
 else
     SYSDEP_INC=
-#
+
 # Check for presence and characteristics of async. I/O calls if
 # not disabled.
-#
-# Q: Do we need to list some "other libs" to get things to link?
-#
-save_libs=$LIBS
-LIBS=
-#
+
 # Some systems need pthreads to get AIO to work.  However, we don't want
 # to add pthreads just because it is there, as that can cause problems
 # with some implementations of pthreads and compilers (e.g., gcc version 3
@@ -1326,59 +1352,16 @@
 # *not linked* with pthreads.
 #
 if test "x$disable_aio" = "xno" ; then
-    foundPTHREAD=no
-
-    # Do we have aio_write in aio or rt?
-    saveLIBS=$LIBS
-    LIBS=
-    AC_SEARCH_LIBS(aio_write,aio rt,foundAIO=yes,foundAIO=no)
-    if test "$foundAIO" = yes ; then
-        AIOLIBS=$LIBS
-        LIBS="$saveLIBS $LIBS"
-    else
-        LIBS="$saveLIBS"
-    fi
-
-    # If not, try finding pthread_create first, and if found, try the
-    # test again.
-    if test "$foundAIO" = no ; then 
-        saveLIBS=$LIBS
-	LIBS=
-        AC_SEARCH_LIBS(pthread_create,pthread,foundPTHREAD=yes,
-	foundPTHREAD=no)
+    AC_SEARCH_LIBS(aio_write,aio rt,aio_write_found=yes,aio_write_found=no)
+    if test "$aio_write_found" = no ; then
+        # If not found, try finding pthread_create first, and if
+        # found, try the test again.
+        AC_SEARCH_LIBS(pthread_create,pthread,foundPTHREAD=yes,foundPTHREAD=no)
         if test "$foundPTHREAD" = yes ; then
-            AC_SEARCH_LIBS(aio_write,aio rt,foundAIO=yes,foundAIO=no)
-	    
-	    if test "$foundAIO" = yes ; then
-	        AIO_LIBS=$LIBS
-		LIBS="$saveLIBS $LIBS"
-	    else
-                LIBS=$saveLIBS
-            fi
-        else
-	    LIBS=$saveLIBS
+            AC_SEARCH_LIBS(aio_write,aio rt,aio_write_found=yes,aio_write_found=no)
         fi
     fi
-
-
-    if test "$foundAIO" = yes ; then
-        ROMIO_LIBLIST="$ROMIO_LIBLIST $AIOLIBS"
-        MPI_LIB="$MPI_LIB $AIOLIBS"
-	aio_write_found=yes
-    fi
- 
-dnl    AC_SEARCH_LIBS(pthread_create,pthread,
-dnl        ROMIO_LIBLIST="$ROMIO_LIBLIST $LIBS"
-dnl        MPI_LIB="$MPI_LIB $LIBS"
-dnl        )
-dnl    LIBS=
-dnl    AC_SEARCH_LIBS(aio_write,aio rt,
-dnl        ROMIO_LIBLIST="$ROMIO_LIBLIST $LIBS"
-dnl        MPI_LIB="$MPI_LIB $LIBS"
-dnl        aio_write_found=yes
-dnl    )
 fi
-LIBS=$save_libs
 
 if test "x$disable_aio" = "xno" -a -n "$aio_write_found" ; then
     AC_CHECK_HEADERS(signal.h)
@@ -1415,14 +1398,13 @@
     # The test is the following: if not cross compiling, try to run a 
     # program that includes a *reference* to aio_write but does not call it
     # If the libraries are not set up correctly, then this will fail.
-    save_libs=$LIBS
-    LIBS="$LIBS $ROMIO_LIBLIST"
+
     AC_MSG_CHECKING([whether aio routines can be used])
     # Include aio.h and the aiocb struct (since we'll need these to 
     # actually use the aio_write interface).  Note that this will 
     # fail for some pre-POSIX implementations of the aio interface 
     # (an old IBM interface needs an fd argument as well)
-    AC_COMPILE_IFELSE([AC_LANG_PROGRAM([
+    AC_TRY_RUN([
 #include <sys/types.h>
 #ifdef HAVE_SIGNAL_H
 #include <signal.h>
@@ -1433,22 +1415,24 @@
 #ifdef HAVE_SYS_AIO_H
 #include <sys/aio.h>
 #endif
-],[
+	int main(int argc, char **argv)
+	{
             struct aiocb *aiocbp;
-	    aio_write(aiocbp);
+	    if (argc > 10) aio_write(aiocbp);
 	    return 0;
-])],
-	[aio_runs=yes
-	 AC_MSG_RESULT(yes)],
-	[aio_runs=no
-	 AC_MSG_RESULT(no)]
-	[aio_runs=no
-	 AC_MSG_RESULT(no: aio routines disabled when cross compiling)]
+	}
+	],
+	aio_runs=yes
+	AC_MSG_RESULT(yes),
+	aio_runs=no
+	AC_MSG_RESULT(no),
+	aio_runs=no
+	AC_MSG_RESULT(no: aio routines disabled when cross compiling)
     )
     if test "$aio_runs" != "no" ; then
 	AC_DEFINE(ROMIO_HAVE_WORKING_AIO, 1, Define if AIO calls seem to work)
     fi
-
+    
     # now about that old IBM interface...
     # modern AIO interfaces have the file descriptor in the aiocb structure,
     # and will set ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_FILDES.  Old IBM
@@ -1460,7 +1444,7 @@
     # aio_read correctly
 
     AC_MSG_CHECKING([for obsolete two-argument aio_write])
-    AC_COMPILE_IFELSE([AC_LANG_PROGRAM([
+    AC_TRY_RUN([
 #include <sys/types.h>
 #ifdef HAVE_SIGNAL_H
 #include <signal.h>
@@ -1471,18 +1455,20 @@
 #ifdef HAVE_SYS_AIO_H
 #include <sys/aio.h>
 #endif
-],[
+	int main(int argc, char **argv)
+	{
 		int fd;
 		struct aiocb *aiocbp;
-		aio_write(fd, aiocbp);
+		if (argc > 10) aio_write(fd, aiocbp);
 		return 0;
-])],
-	[aio_two_arg_write=yes
-	 AC_MSG_RESULT(yes)],
-	[aio_two_arg_write=no 
-	 AC_MSG_RESULT(no)]
-	[aio_two_arg_write=no
-	 AC_MSG_RESULT(no: cannot test when cross-compiling)]
+	}
+	],
+	aio_two_arg_write=yes
+	AC_MSG_RESULT(yes),
+	aio_two_arg_write=no 
+	AC_MSG_RESULT(no),
+	aio_two_arg_write=no
+	AC_MSG_RESULT(no: cannot test when cross-compiling)
     )
 
     if test "$aio_two_arg_write" != "no" -a "$aio_runs" != "yes" ; then
@@ -1491,7 +1477,7 @@
     fi
 
     AC_MSG_CHECKING([for obsolete two-argument aio_suspend])
-    AC_COMPILE_IFELSE([AC_LANG_PROGRAM([
+    AC_TRY_RUN([
 #include <sys/types.h>
 #ifdef HAVE_SIGNAL_H
 #include <signal.h>
@@ -1502,25 +1488,25 @@
 #ifdef HAVE_SYS_AIO_H
 #include <sys/aio.h>
 #endif
-],[
+	int main(int argc, char **argv)
+	{
 		struct aiocb *aiocbp;
-		aio_suspend(1, &aiocbp);
+		if (argc > 10) aio_suspend(1, &aiocbp);
 		return 0;
-])],
-	[aio_two_arg_suspend=yes
-	 AC_MSG_RESULT(yes)],
-	[aio_two_arg_suspend=no 
-	 AC_MSG_RESULT(no)]
-	[aio_two_arg_suspend=no
-	 AC_MSG_RESULT(no: cannot test when cross compiling)]
+	}
+	],
+	aio_two_arg_suspend=yes
+	AC_MSG_RESULT(yes),
+	aio_two_arg_suspend=no 
+	AC_MSG_RESULT(no),
+	aio_two_arg_suspend=no
+	AC_MSG_RESULT(no: cannot test when cross compiling)
     )
 
     if test "$aio_two_arg_suspend" != "no" -a "$aio_runs" != "yes" ; then
 	AC_DEFINE(ROMIO_HAVE_AIO_SUSPEND_TWO_ARGS, 1, Define if aio_suspend needs two arguments)
     fi
 
-    LIBS=$save_libs
-
     AC_MSG_CHECKING([for aio_fildes member of aiocb structure])
     AC_TRY_COMPILE([
 #ifdef HAVE_SIGNAL_H
@@ -1646,7 +1632,6 @@
 # Check for statfs (many) and specifically f_fstypename field (BSD)
 #
 AC_CHECK_HEADERS(sys/vfs.h sys/param.h sys/mount.h sys/statvfs.h)
-AC_CHECK_FUNCS([statfs])
 AC_MSG_CHECKING([whether struct statfs properly defined])
 AC_TRY_COMPILE([
 #ifdef HAVE_SYS_VFS_H
@@ -1698,7 +1683,7 @@
 #
 AC_CHECK_HEADERS(sys/stat.h sys/types.h unistd.h)
 AC_CHECK_FUNCS(stat,
-    [AC_DEFINE(HAVE_STAT, 1, Define if stat function is present)
+    AC_DEFINE(HAVE_STAT, 1, Define if stat function is present)
     AC_MSG_CHECKING([for st_fstype member of stat structure])
     AC_TRY_COMPILE([
 #ifdef HAVE_SYS_TYPES_H
@@ -1719,14 +1704,14 @@
 	AC_DEFINE(ROMIO_HAVE_STRUCT_STAT_WITH_ST_FSTYPE, 1, Define if struct stat has a st_fstype member),
 	AC_MSG_RESULT(no)
     )
-])
+)
 
 #
 # Check for statvfs and f_basetype field (Solaris, Irix, AIX, etc.)
 #
 AC_CHECK_HEADERS(sys/types.h sys/statvfs.h sys/vfs.h)
 AC_CHECK_FUNCS(statvfs,
-    [AC_DEFINE(HAVE_STATVFS, 1, Define if statvfs function is present)
+    AC_DEFINE(HAVE_STATVFS, 1, Define if statvfs function is present)
     AC_MSG_CHECKING([for f_basetype member of statvfs structure])
     AC_TRY_COMPILE([
 #ifdef HAVE_SYS_TYPES_H
@@ -1747,7 +1732,7 @@
 	AC_DEFINE(ROMIO_HAVE_STRUCT_STATVFS_WITH_F_BASETYPE, 1, defined if struct statvfs has a f_basetype member),
 	AC_MSG_RESULT(no)
     )
-])
+)
 
 #
 # Check for large file support.  Make sure that we can use the off64_t 
@@ -2002,9 +1987,8 @@
    # Turn off the building of the Fortran interface and the Info routines
    EXTRA_DIRS=""
    AC_DEFINE(HAVE_STATUS_SET_BYTES,1,[Define if status_set_bytes available])
-   DEFINE_HAVE_MPI_GREQUEST="#define HAVE_MPI_GREQUEST"
-   # Add the MPICH2_INCLUDE_FLAGS to CPPFLAGS
-   CPPFLAGS="$CPPFLAGS $MPICH2_INCLUDE_FLAGS"
+   DEFINE_HAVE_MPI_GREQUEST="#define HAVE_MPI_GREQUEST 1"
+   AC_DEFINE(HAVE_MPIU_FUNCS,1,[Define if MPICH2 memory tracing macros defined])
 fi
 #
 #
@@ -2014,6 +1998,7 @@
 if test $WITHIN_KNOWN_MPI_IMPL = no ; then
    PAC_TEST_MPIR_STATUS_SET_BYTES
    PAC_TEST_MPI_GREQUEST
+   PAC_TEST_MPIU_FUNCS
    AC_DEFINE(PRINT_ERR_MSG,1,[Define for printing error messages])
 fi
 #
@@ -2054,6 +2039,11 @@
     # Do we need to declare ftruncate?
     PAC_FUNC_NEEDS_DECL([#include <unistd.h>],ftruncate)
 fi
+
+AC_CHECK_FUNCS(lseek64)
+if test "$ac_cv_func_lseek64" = "yes" ; then
+    PAC_FUNC_NEEDS_DECL([#include <unistd.h>],lseek64)
+fi
 #
 # Create the directory lists for the Makefile
 FILE_SYS_DIRS=""
@@ -2149,7 +2139,7 @@
 # Open MPI - AM doesn't want the following:
 #   VPATH, CC, CPPFLAGS, CFLAGS, AR, RANLIB, F77, MAKE
 #VPATH='VPATH = .:${srcdir}'
-#AC_SUBST(VPATH)
+#AC_SUBST(VPATH)#
 AC_SUBST(ARCH)
 AC_SUBST(FILE_SYSTEM)
 #AC_SUBST(CC)
@@ -2208,7 +2198,6 @@
 AC_SUBST(TEST_F77)
 AC_SUBST(ROMIO_INCLUDE)
 AC_SUBST(ROMIO_LFLAGS)
-AC_SUBST(ROMIO_LIBLIST)
 AC_SUBST(ROMIO_TCFLAGS)
 AC_SUBST(ROMIO_TCPPFLAGS)
 AC_SUBST(ROMIO_TFFLAGS)
@@ -2268,6 +2257,7 @@
     adio/ad_testfs/Makefile
     adio/ad_ufs/Makefile
     adio/ad_xfs/Makefile
+    adio/ad_zoidfs/Makefile
     adio/common/Makefile
     adio/include/Makefile
 
@@ -2309,6 +2299,7 @@
 # adio/ad_piofs/Makefile: old and no longer used
 AC_OUTPUT
 #
+#
 # Open MPI - don't need to remove this...
 # rm -f *.o
 
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/doc: makepubpage.sh
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/doc: pubs.bib
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/doc/users-guide.tex NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/doc/users-guide.tex
--- ompi-trunk/ompi/mca/io/romio/romio/doc/users-guide.tex	2010-11-16 09:15:57.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/doc/users-guide.tex	2010-11-15 15:02:47.000000000 +0100
@@ -21,11 +21,11 @@
 
 \rule{1.75in}{.01in} \\
 
-\vskip 1.3 in
+\vskip 1.3in 
 {\Large\bf Users Guide for ROMIO: A High-Performance, \\ [1ex]
 Portable MPI-IO Implementation} \\ [4ex]
 by \\ [2ex]
-{\large\it Rajeev Thakur, Robert Ross, Ewing Lusk, and William Gropp}
+{\large\it Rajeev Thakur, Robert Ross, Ewing Lusk, William Gropp, Robert Latham}
 \vspace{1in}
 
 Mathematics and Computer Science Division
@@ -36,7 +36,7 @@
 
 
 \vspace{1.4in}
-Revised May 2004
+Revised May 2004, November 2007, April 2010
 
 \end{center}
 
@@ -215,8 +215,13 @@
 \subsection{Hints}
 \label{sec:hints}
 
+If ROMIO doesn't understand a hint, or if the value is invalid, the hint
+will be ignored. The values of hints being used by ROMIO for a file
+can be obtained at any time via {\tt MPI\_File\_get\_info}.
+
 The following hints control the data sieving optimization and are
 applicable to all file system types:
+
 \begin{itemize}
 \item \texttt{ind\_rd\_buffer\_size} -- Controls the size (in bytes) of the
 intermediate buffer used by ROMIO when performing data sieving during
@@ -370,12 +375,15 @@
 striping\_factor - 1.
 \end{itemize}
 
-Also for PFS:
+\subsubsection{Hints for PFS}
+\label{sec:hints_pfs}
 \begin{itemize}
 \item \texttt{pfs\_svr\_buf} -- Turns on PFS server buffering.  Valid
 values are \texttt{true} and \texttt{false}.  Default is \texttt{false}.
 \end{itemize}
 
+\subsubsection{Hints for XFS}
+\label{sec:hints_xfs}
 For XFS control is provided for the direct I/O optimization:
 \begin{itemize}
 \item \texttt{direct\_read} -- Controls direct I/O for reads.  Valid
@@ -384,6 +392,9 @@
 values are \texttt{true} and \texttt{false}.  Default is \texttt{false}.
 \end{itemize}
 
+\subsubsection{Hints for PVFS (v1)}
+\label{sec:hints_oldpvfs}
+
 For PVFS control is provided for the use of the listio interface.  This
 interface to PVFS allows for a collection of noncontiguous regions to be
 requested (for reading or writing) with a single operation.  This can result
@@ -400,9 +411,126 @@
 Default is \texttt{disable}.
 \end{itemize}
 
-If ROMIO doesn't understand a hint, or if the value is invalid, the hint
-will be ignored. The values of hints being used by ROMIO for a file
-can be obtained at any time via {\tt MPI\_File\_get\_info}.
+\subsubsection{Hints for PVFS (v2)}
+\label{sec:hints_pvfs}
+
+The PVFS v2 file system has many tuning parameters. 
+\begin{itemize}
+\item dtype i/o
+\end{itemize}
+
+\subsubsection{Hints for Lustre}
+
+\begin{itemize}
+\item romio\_lustre\_co\_ratio
+
+In stripe-contiguous IO pattern, each OST will be accessed by a group of
+IO clients. CO means *C*lient/*O*ST ratio, or the max. number of IO clients
+for each OST.
+CO=1 by default.
+
+\item \texttt{romio\_lustre\_coll\_threshold}
+
+We won't do collective I/O if this hint is set and the IO request size is
+bigger than this value. That's because when the request size is big, the
+collective communication overhead increases and the benefits from collective
+I/O becomes limited.  A value of 0 means always perform collective I/O
+
+\item \texttt{romio\_lustre\_cb\_ds\_threshold}
+
+ROMIO can optimize collective I/O with a version of data sieving.  If the I/O
+request is smaller than this hint's value, though, ROMIO will not try to apply
+the data sieving optimization.
+
+\item \texttt{romio\_lustre\_ds\_in\_coll}
+
+Collective IO will apply read-modify-write to deal with non-contiguous
+data by default. However, it will introduce some overhead(IO operation and
+locking).  The Lustre developers have run tests where data sieving showed bad
+collective write performance for some kinds of workloads.  So, to avoid this,
+we define the \texttt{romio\_lustre\_ds\_in\_coll} hint to disable the read-modify-write
+step in collective I/O.  This optimization is distinct from the one in
+independent I/O (controlled by \texttt{romio\_ds\_read} and 
+\texttt{romio\_ds\_write}).
+
+\end{itemize}
+
+\subsubsection{Hints for PANFS (Panasas)}
+
+PanFS allows users to specify the layout of a file at file-creation time.
+Layout information includes the number of StorageBlades (SB) across which the
+data is stored, the number of SBs across which a parity stripe is written, and
+the number of consecutive stripes that are placed on the same set of SBs.   The
+\texttt{panfs\_layout\_*} hints are only used if supplied at file-creation
+time.
+\begin{itemize}
+
+\item \texttt{panfs\_layout\_type} Specifies the layout of a file: 2 = RAID0
+3 = RAID5 Parity Stripes 
+
+\item \texttt{panfs\_layout\_stripe\_unit} The size of the stripe unit
+in bytes
+
+\item \texttt{panfs\_layout\_total\_num\_comps} The total number of
+StorageBlades a file is striped across.
+
+\item \texttt{ panfs\_layout\_parity\_stripe\_width}  If the layout type is
+RAID5 Parity Stripes, this hint specifies the number of StorageBlades in a
+parity stripe.
+
+\item \texttt{panfs\_layout\_parity\_stripe\_depth} If the layout type is RAID5
+Parity Stripes, this hint specifies the number of contiguous parity stripes
+written across the same set of SBs.
+
+\item \texttt{panfs\_layout\_visit\_policy} If the layout type is RAID5 Parity
+Stripes, the policy used to determine the parity stripe a given file offset is
+written to: 1 = Round Robin 
+\end{itemize}
+
+PanFS supports the ``concurrent write'' (CW) mode, where groups of
+cooperating clients can disable the PanFS consistency mechanisms and use
+their own consistency protocol.  Clients participating in concurrent
+write mode use application specific information to improve performance
+while maintaining file consistency.  All clients accessing the file(s)
+must enable concurrent write mode.  If any client does not enable
+concurrent write mode, then the PanFS consistency protocol will be
+invoked.  Once a file is opened in CW mode on a machine, attempts to
+open a file in non-CW mode will fail with EACCES.  If a file is already
+opened in non-CW mode, attempts to open the file in CW mode will fail
+with EACCES.  The following hint is used to enable concurrent write
+mode.
+
+\begin{itemize}
+\item \texttt{panfs\_concurrent\_write} If set to 1 at file open time,
+the file is opened using the PanFS concurrent write mode flag.
+Concurrent write mode is not a persistent attribute of the file.
+\end{itemize}
+
+Below is an example PanFS layout using the following parameters:
+\begin{verbatim}
+ 
+  - panfs_layout_type                = 3
+  - panfs_layout_total_num_comps     = 100
+  - panfs_layout_parity_stripe_width = 10
+  - panfs_layout_parity_stripe_depth = 8
+  - panfs_layout_visit_policy        = 1
+
+   Parity Stripe Group 1     Parity Stripe Group 2  . . . Parity Stripe Group 10
+  ----------------------    ----------------------        --------------------
+   SB1    SB2  ... SB10     SB11    SB12 ...  SB20  ...   SB91   SB92 ... SB100
+  -----------------------   -----------------------       ---------------------
+   D1     D2   ...  D10      D91    D92  ...  D100        D181   D182  ... D190
+   D11    D12       D20      D101   D102      D110        D191   D192      D193
+   D21    D22       D30      . . .                        . . .
+   D31    D32       D40
+   D41    D42       D50
+   D51    D52       D60
+   D61    D62       D70
+   D71    D72       D80
+   D81    D82       D90      D171   D172      D180        D261   D262   D270
+   D271   D272      D273     . . .                        . . .
+   ...
+\end{verbatim}
 
 \subsubsection{Systemwide Hints}
 \label{sec:system_hints}
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/include/mpiof.h.in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/include/mpiof.h.in
--- ompi-trunk/ompi/mca/io/romio/romio/include/mpiof.h.in	2010-11-16 09:15:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/include/mpiof.h.in	2010-11-15 16:42:05.000000000 +0100
@@ -1,6 +1,4 @@
 ! 
-!     $Id: mpiof.h.in,v 1.3 1999/08/06 18:33:09 thakur Exp $    
-! 
 !     Copyright (C) 1997 University of Chicago. 
 !     See COPYRIGHT notice in top-level directory.
 !
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/include/mpio.h.in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/include/mpio.h.in
--- ompi-trunk/ompi/mca/io/romio/romio/include/mpio.h.in	2010-11-16 09:15:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/include/mpio.h.in	2010-11-15 15:58:10.000000000 +0100
@@ -11,7 +11,6 @@
 #define MPIO_INCLUDE
 
 #include "mpi.h"
-
 /* Open MPI: We need to rename almost all of these functions, as well
    a the types to be names that conform to the prefix rule */
 #include "io_romio_conv.h"
@@ -266,10 +265,10 @@
 MPI_Fint MPI_File_c2f(MPI_File);
 #endif
 
-
-/* The foll. functions are required, because an MPIO_Request object
-   is currently used for nonblocking I/O. These functions will go away
-   after generalized requests are implemented. */
+#ifndef HAVE_MPI_GREQUEST
+/* The following functions are required if generalized requests are not
+   available, because in that case, an MPIO_Request object
+   is currently used for nonblocking I/O. */
 int MPIO_Test(MPIO_Request *, int *, MPI_Status *);
 int MPIO_Wait(MPIO_Request *, MPI_Status *);
 int MPIO_Testall(int, MPIO_Request *, int *, MPI_Status *);
@@ -281,7 +280,7 @@
 
 MPI_Fint MPIO_Request_c2f(MPIO_Request);
 MPIO_Request MPIO_Request_f2c(MPI_Fint);
-
+#endif /* HAVE_MPI_GREQUEST */
 
 /* info functions if not defined in the MPI implementation */
 #ifndef HAVE_MPI_INFO
@@ -441,9 +440,10 @@
 MPI_File PMPI_File_f2c(MPI_Fint);
 MPI_Fint PMPI_File_c2f(MPI_File);
 
-/* The foll. functions are required, because an MPIO_Request object
-   is currently used for nonblocking I/O. These functions will go away
-   after generalized requests are implemented. */
+#ifndef HAVE_MPI_GREQUEST
+/* The following functions are required if generalized requests are not
+   available, because in that case, an MPIO_Request object
+   is currently used for nonblocking I/O. */
 int PMPIO_Test(MPIO_Request *, int *, MPI_Status *);
 int PMPIO_Wait(MPIO_Request *, MPI_Status *);
 int PMPIO_Testall(int, MPIO_Request *, int *, MPI_Status *);
@@ -454,7 +454,7 @@
 int PMPIO_Testsome(int, MPIO_Request *, int *, int *, MPI_Status *);
 MPI_Fint PMPIO_Request_c2f(MPIO_Request);
 MPIO_Request PMPIO_Request_f2c(MPI_Fint);
-
+#endif /* HAVE_MPI_GREQUEST */
 
 /* info functions if not defined in the MPI implementation */
 #ifndef HAVE_MPI_INFO
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/localdefs.in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/localdefs.in
--- ompi-trunk/ompi/mca/io/romio/romio/localdefs.in	2010-11-16 09:16:36.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/localdefs.in	2010-11-15 16:41:03.000000000 +0100
@@ -1,4 +1,4 @@
 #! /bin/sh
-LIBS="$LIBS @ROMIO_LIBLIST@"
+LIBS="@LIBS@"
 MPI_OFFSET_TYPE="@MPI_OFFSET_TYPE@"
 FORTRAN_MPI_OFFSET="@FORTRAN_MPI_OFFSET@"
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_c2f.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_close.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_delete.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_f2c.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_get_amode.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_get_atomicity.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_get_byte_offset.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_get_group.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_get_info.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_get_position.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_get_position_shared.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_get_size.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_get_type_extent.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_get_view.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_iread.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_iread_at.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_iread_shared.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_iwrite.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_iwrite_at.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_iwrite_shared.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_open.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_preallocate.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_read.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_read_all.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_read_all_begin.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_read_all_end.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_read_at.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_read_at_all.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_read_at_all_begin.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_read_at_all_end.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_read_ordered.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_read_ordered_begin.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_read_ordered_end.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_read_shared.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_seek.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_seek_shared.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_set_atomicity.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_set_info.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_set_size.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_set_view.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_sync.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_write.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_write_all.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_write_all_begin.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_write_all_end.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_write_at.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_write_at_all.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_write_at_all_begin.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_write_at_all_end.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_write_ordered.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_write_ordered_begin.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_write_ordered_end.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_File_write_shared.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_Info_c2f.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_Info_create.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_Info_delete.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_Info_dup.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_Info_f2c.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_Info_free.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_Info_get.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_Info_get_nkeys.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_Info_get_nthkey.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_Info_get_valuelen.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_Info_set.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPIO_Request_c2f.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPIO_Request_f2c.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPIO_Test.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPIO_Wait.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_Type_create_darray.3
Only in ompi-trunk/ompi/mca/io/romio/romio/man/man3: MPI_Type_create_subarray.3
Only in ompi-trunk/ompi/mca/io/romio/romio/mpi2-other/array/fortran: Makefile.in
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi2-other/info/info_create.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi2-other/info/info_create.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi2-other/info/info_create.c	2010-11-16 09:15:24.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi2-other/info/info_create.c	2010-11-15 15:02:47.000000000 +0100
@@ -23,8 +23,6 @@
 #include "mpioprof.h"
 #endif
 
-extern int ADIO_Init_keyval;
-
 /*@
     MPI_Info_create - Creates a new info object
 
@@ -35,33 +33,10 @@
 @*/
 int MPI_Info_create(MPI_Info *info)
 {
-    int flag, error_code;
-
-    /* first check if ADIO has been initialized. If not, initialize it */
-    if (ADIO_Init_keyval == MPI_KEYVAL_INVALID) {
-
-   /* check if MPI itself has been initialized. If not, flag an error.
-   Can't initialize it here, because don't know argc, argv */
-        MPI_Initialized(&flag);
-        if (!flag) {
-            FPRINTF(stderr, "Error: MPI_Init() must be called before using MPI_Info_create\n");
-            MPI_Abort(MPI_COMM_WORLD, 1);
-        }
-
-        MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_End_call, &ADIO_Init_keyval,
-                          (void *) 0);  
-
-   /* put a dummy attribute on MPI_COMM_WORLD, because we want the delete
-   function to be called when MPI_COMM_WORLD is freed. Hopefully the
-   MPI library frees MPI_COMM_WORLD when MPI_Finalize is called,
-   though the standard does not mandate this. */
-
-        MPI_Attr_put(MPI_COMM_WORLD, ADIO_Init_keyval, (void *) 0);
-
-/* initialize ADIO */
+    int error_code;
 
-        ADIO_Init( (int *)0, (char ***)0, &error_code);
-    }
+    MPIR_MPIOInit(&error_code);
+    if (error_code != MPI_SUCCESS) goto fn_exit;
 
     *info = (MPI_Info) ADIOI_Malloc(sizeof(struct MPIR_Info));
     (*info)->cookie = MPIR_INFO_COOKIE;
@@ -71,5 +46,6 @@
     /* this is the first structure in this linked list. it is 
        always kept empty. new (key,value) pairs are added after it. */
 
+fn_exit:
     return MPI_SUCCESS;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/close.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/close.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/close.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/close.c	2010-11-15 15:03:31.000000000 +0100
@@ -42,8 +42,7 @@
     HPMP_IO_WSTART(fl_xmpi, BLKMPIFILECLOSE, TRDTBLOCK, *fh);
 #endif /* MPI_hpux */
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(*mpi_fh);
 
@@ -51,15 +50,15 @@
     MPIO_CHECK_FILE_HANDLE(fh, myname, error_code);
     /* --END ERROR HANDLING-- */
 
-    if (((fh)->file_system != ADIO_PIOFS) &&
-	((fh)->file_system != ADIO_PVFS) &&
-	((fh)->file_system != ADIO_PVFS2) &&
-	((fh)->file_system != ADIO_GRIDFTP))
+    if (ADIO_Feature(fh, ADIO_SHARED_FP)) 
     {
 	ADIOI_Free((fh)->shared_fp_fname);
         /* need a barrier because the file containing the shared file
         pointer is opened with COMM_SELF. We don't want it to be
 	deleted while others are still accessing it. */ 
+	/* FIXME: It is wrong to use MPI_Barrier; the user could choose to
+	   re-implement MPI_Barrier in an unexpected way.  Either use 
+	   MPIR_Barrier_impl as in MPICH2 or PMPI_Barrier */
         MPI_Barrier((fh)->comm);
 	if ((fh)->shared_fp_fd != ADIO_FILE_NULL) {
 	    MPI_File *mpi_fh_shared = &(fh->shared_fp_fd);
@@ -71,6 +70,14 @@
 	}
     }
 
+    /* Because ROMIO expects the MPI library to provide error handler management
+     * routines but it doesn't ever participate in MPI_File_close, we have to
+     * somehow inform the MPI library that we no longer hold a reference to any
+     * user defined error handler.  We do this by setting the errhandler at this
+     * point to MPI_ERRORS_RETURN. */
+    error_code = PMPI_File_set_errhandler(*mpi_fh, MPI_ERRORS_RETURN);
+    if (error_code != MPI_SUCCESS) goto fn_fail;
+
     ADIO_Close(fh, &error_code);
     MPIO_File_free(mpi_fh);
     /* --BEGIN ERROR HANDLING-- */
@@ -81,13 +88,11 @@
     HPMP_IO_WEND(fl_xmpi);
 #endif /* MPI_hpux */
 
-    MPIR_Nest_decr();
 fn_exit:
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return error_code;
 fn_fail:
     /* --BEGIN ERROR HANDLING-- */
-    MPIR_Nest_decr();
     error_code = MPIO_Err_return_file(fh, error_code);
     goto fn_exit;
     /* --END ERROR HANDLING-- */
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/delete.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/delete.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/delete.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/delete.c	2010-11-15 15:03:31.000000000 +0100
@@ -23,8 +23,6 @@
 #include "mpioprof.h"
 #endif
 
-extern int ADIO_Init_keyval;
-
 /*@
     MPI_File_delete - Deletes a file
 
@@ -36,10 +34,9 @@
 @*/
 int MPI_File_delete(char *filename, MPI_Info info)
 {
-    int flag, error_code, file_system;
+    int error_code, file_system;
     char *tmp;
     ADIOI_Fns *fsops;
-    static char myname[] = "MPI_FILE_DELETE";
 #ifdef MPI_hpux
     int fl_xmpi;
   
@@ -49,37 +46,10 @@
 
     MPIU_UNREFERENCED_ARG(info);
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
-
-    /* first check if ADIO has been initialized. If not, initialize it */
-    if (ADIO_Init_keyval == MPI_KEYVAL_INVALID) {
-        MPI_Initialized(&flag);
-
-	/* --BEGIN ERROR HANDLING-- */
-        if (!flag) {
-	    error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
-					      myname, __LINE__, MPI_ERR_OTHER, 
-					      "**initialized", 0);
-	    error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code);
-	    goto fn_exit;
-	}
-	/* --END ERROR HANDLING-- */
-
-        MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_End_call, &ADIO_Init_keyval,
-                          (void *) 0);  
-
-	/* put a dummy attribute on MPI_COMM_WORLD, because we want the delete
-	   function to be called when MPI_COMM_WORLD is freed. Hopefully the
-	   MPI library frees MPI_COMM_WORLD when MPI_Finalize is called,
-	   though the standard does not mandate this. */
-
-        MPI_Attr_put(MPI_COMM_WORLD, ADIO_Init_keyval, (void *) 0);
-
-	/* initialize ADIO */
-        ADIO_Init( (int *)0, (char ***)0, &error_code);
-    }
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
+    MPIR_MPIOInit(&error_code);
+    if (error_code != MPI_SUCCESS) goto fn_exit;
 
     /* resolve file system type from file name; this is a collective call */
     ADIO_ResolveFileType(MPI_COMM_SELF, filename, &file_system, &fsops, 
@@ -118,7 +88,6 @@
 #endif /* MPI_hpux */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/fortran/get_extentf.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/fortran/get_extentf.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/fortran/get_extentf.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/fortran/get_extentf.c	2010-11-15 15:02:47.000000000 +0100
@@ -105,7 +105,7 @@
     datatype_c = MPI_Type_f2c(*datatype);
 
     *ierr = MPI_File_get_type_extent(fh_c,datatype_c, &extent_c);
-    *extent = (MPI_Fint) extent_c;
+    *(MPI_Aint*)extent = extent_c; /* Have to assume it's really an MPI_Aint?*/
 }
 
 #else
@@ -121,6 +121,6 @@
     
     fh_c = MPI_File_f2c(*fh);
     *ierr = MPI_File_get_type_extent(fh_c,*datatype, &extent_c);
-    *extent = (MPI_Fint) extent_c;
+    *(MPI_Aint*)extent = extent_c; /* Have to assume it's really an MPI_Aint?*/
 }
 #endif
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/fortran/Makefile.in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/fortran/Makefile.in
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/fortran/Makefile.in	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/fortran/Makefile.in	2010-11-15 16:38:28.000000000 +0100
@@ -6,12 +6,13 @@
 CC_SHL      = @CC_SHL@
 SHLIBNAME   = @SHLIBNAME@
 
-INCLUDE_DIR = -I@MPI_INCLUDE_DIR@ -I../../include -I${srcdir}/../../adio/include -I../../adio/include -I${srcdir}/..
-CFLAGS      = -DMPIO_FORTRAN_SRC @CFLAGS@ $(MPIOPROFILE) $(INCLUDE_DIR)
+INCLUDES    = -I@MPI_INCLUDE_DIR@ -I../../include -I${srcdir}/../../adio/include -I../../adio/include -I${srcdir}/..
+CFLAGS      = -DMPIO_FORTRAN_SRC @CFLAGS@ $(MPIOPROFILE) 
 
 top_builddir  = @master_topbuild_dir@
 LIBTOOL       = @LIBTOOL@
-C_COMPILE_SHL = $(CC_SHL)
+C_COMPILE       = $(CC) $(DEFS) $(INCLUDES) $(CFLAGS) $(CPPFLAGS)
+C_COMPILE_SHL   = $(CC_SHL) $(DEFS) $(INCLUDES) $(CFLAGS) $(CPPFLAGS)
 
 @VPATH@
 
@@ -65,10 +66,21 @@
 .SUFFIXES: $(SUFFIXES) .p .sp .lo
 
 .c.o:
-	$(CC) $(CFLAGS) -c $<
+	@if [ "x$(VERBOSE)" != "x1" ] ; then \
+	  echo "  CC              $<" ; \
+	else \
+	  echo $(C_COMPILE) -c $< ; \
+	fi
+	@$(C_COMPILE) -c $<
 
 .c.lo:
-	$(C_COMPILE_SHL) $(CFLAGS) -c $< -o _s$*.o
+	@if [ "x$(VERBOSE)" != "x1" ] ; then \
+	  echo "  CC              $<" ; \
+	else \
+	  echo $(C_COMPILE_SHL) -c $< -o _s$*.o ; \
+	  echo mv -f _s$*.o $*.lo ; \
+	fi
+	@$(C_COMPILE_SHL) -c $< -o _s$*.o
 	@mv -f _s$*.o $*.lo
 
 .c.p:
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/fsync.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/fsync.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/fsync.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/fsync.c	2010-11-15 15:03:31.000000000 +0100
@@ -43,8 +43,7 @@
     HPMP_IO_START(fl_xmpi, BLKMPIFILESYNC, TRDTBLOCK, fh,
 		  MPI_DATATYPE_NULL, -1);
 #endif /* MPI_hpux */
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
     /* --BEGIN ERROR HANDLING-- */
@@ -71,7 +70,6 @@
 #endif /* MPI_hpux */
  
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_bytoff.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_bytoff.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_bytoff.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_bytoff.c	2010-11-15 15:03:31.000000000 +0100
@@ -46,8 +46,6 @@
     ADIO_File fh;
     static char myname[] = "MPI_FILE_GET_BYTE_OFFSET";
 
-    MPIR_Nest_incr();
-
     fh = MPIO_File_resolve(mpi_fh);
 
     /* --BEGIN ERROR HANDLING-- */
@@ -68,7 +66,6 @@
     ADIOI_Get_byte_offset(fh, offset, disp);
 
 fn_exit:
-    MPIR_Nest_decr();
 
     return MPI_SUCCESS;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_errh.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_errh.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_errh.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_errh.c	2010-11-15 15:02:47.000000000 +0100
@@ -40,8 +40,9 @@
     int error_code = MPI_SUCCESS;
     ADIO_File fh;
     static char myname[] = "MPI_FILE_GET_ERRHANDLER";
+    MPIU_THREADPRIV_DECL;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     if (mpi_fh == MPI_FILE_NULL) {
 	*errhandler = ADIOI_DFLT_ERR_HANDLER;
@@ -63,6 +64,6 @@
     }
 
 fn_exit:
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return MPI_SUCCESS;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_extent.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_extent.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_extent.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_extent.c	2010-11-15 15:03:31.000000000 +0100
@@ -42,8 +42,6 @@
     ADIO_File fh;
     static char myname[] = "MPI_FILE_GET_TYPE_EXTENT";
 
-    MPIR_Nest_incr();
-
     fh = MPIO_File_resolve(mpi_fh);
 
     /* --BEGIN ERROR HANDLING-- */
@@ -56,6 +54,5 @@
     error_code = MPI_Type_extent(datatype, extent);
 
 fn_exit:
-    MPIR_Nest_decr();
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_group.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_group.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_group.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_group.c	2010-11-15 15:03:31.000000000 +0100
@@ -41,8 +41,7 @@
     ADIO_File fh;
     static char myname[] = "MPI_FILE_GET_GROUP";
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -58,7 +57,6 @@
     error_code = MPI_Comm_group(fh->comm, group);
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_info.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_info.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_info.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_info.c	2010-11-15 15:03:31.000000000 +0100
@@ -40,8 +40,7 @@
     ADIO_File fh;
     static char myname[] = "MPI_FILE_GET_INFO";
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -56,7 +55,6 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return  error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_posn.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_posn.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_posn.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_posn.c	2010-11-15 15:03:31.000000000 +0100
@@ -43,8 +43,6 @@
     ADIO_File fh;
     static char myname[] = "MPI_FILE_GET_POSITION";
 
-    MPIR_Nest_incr();
-
     fh = MPIO_File_resolve(mpi_fh);
 
     /* --BEGIN ERROR HANDLING-- */
@@ -55,7 +53,5 @@
     ADIOI_Get_position(fh, offset);
 
 fn_exit:
-    MPIR_Nest_decr();
-
     return MPI_SUCCESS;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_posn_sh.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_posn_sh.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_posn_sh.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_posn_sh.c	2010-11-15 15:03:31.000000000 +0100
@@ -41,8 +41,6 @@
     ADIO_File fh;
     static char myname[] = "MPI_FILE_GET_POSITION_SHARED";
 
-    MPIR_Nest_incr();
-
     fh = MPIO_File_resolve(mpi_fh);
 
     /* --BEGIN ERROR HANDLING-- */
@@ -60,7 +58,5 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_size.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_size.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_size.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_size.c	2010-11-15 15:03:31.000000000 +0100
@@ -47,8 +47,6 @@
 		  MPI_DATATYPE_NULL, -1);
 #endif /* MPI_hpux */
 
-    MPIR_Nest_incr();
-
     fh = MPIO_File_resolve(mpi_fh);
 
     /* --BEGIN ERROR HANDLING-- */
@@ -71,7 +69,5 @@
 #endif /* MPI_hpux */
 
 fn_exit:
-    MPIR_Nest_decr();
-
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_view.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_view.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/get_view.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/get_view.c	2010-11-15 15:03:31.000000000 +0100
@@ -52,9 +52,7 @@
     int i, j, k, combiner;
     MPI_Datatype copy_etype, copy_filetype;
 
-
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -77,15 +75,16 @@
     MPI_Type_get_envelope(fh->etype, &i, &j, &k, &combiner);
     if (combiner == MPI_COMBINER_NAMED) *etype = fh->etype;
     else {
-        MPIR_Nest_incr();
+	/* FIXME: It is wrong to use MPI_Type_contiguous; the user could choose to
+	   re-implement MPI_Type_contiguous in an unexpected way.  Either use 
+	   MPIR_Barrier_impl as in MPICH2 or PMPI_Type_contiguous */
         MPI_Type_contiguous(1, fh->etype, &copy_etype);
-        MPIR_Nest_decr();
 
-        MPIR_Nest_incr();
+	/* FIXME: Ditto for MPI_Type_commit - use NMPI or PMPI */
         MPI_Type_commit(&copy_etype);
-        MPIR_Nest_decr();
         *etype = copy_etype;
     }
+    /* FIXME: Ditto for MPI_Type_xxx - use NMPI or PMPI */
     MPI_Type_get_envelope(fh->filetype, &i, &j, &k, &combiner);
     if (combiner == MPI_COMBINER_NAMED) *filetype = fh->filetype;
     else {
@@ -96,8 +95,7 @@
     }
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return MPI_SUCCESS;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/glue/mpich2/Makefile.in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/glue/mpich2/Makefile.in
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/glue/mpich2/Makefile.in	2010-11-16 09:15:38.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/glue/mpich2/Makefile.in	2010-11-15 16:38:36.000000000 +0100
@@ -6,12 +6,17 @@
 CC_SHL      = @CC_SHL@
 SHLIBNAME   = @SHLIBNAME@
 
-INCLUDE_DIR = -I@MPI_INCLUDE_DIR@ -I../../../include -I${srcdir}/../../../mpi-io -I${srcdir}/../../../adio/include -I../../../adio/include -I${srcdir}/../../../../../../src/include  -I../../../../../../src/include 
-CFLAGS      = -DHAVE_MPI_INFO_SRC @CPPFLAGS@ @CFLAGS@ $(MPIOPROFILE) $(INCLUDE_DIR)
+#DEFS            = @DEFS@ -I. -I${srcdir}
+CPPFLAGS        = @CPPFLAGS@
+INCLUDES = -I@MPI_INCLUDE_DIR@ -I../../../include -I${srcdir}/../../../mpi-io -I${srcdir}/../../../adio/include -I../../../adio/include -I${srcdir}/../../../../../../src/include  -I../../../../../../src/include 
+#CFLAGS      = -DHAVE_MPI_INFO_SRC @CPPFLAGS@ @CFLAGS@ $(MPIOPROFILE) $(INCLUDES)
+CFLAGS          = -DHAVE_MPI_INFO_SRC @CFLAGS@ $(MPIOPROFILE)
 
 top_builddir  = @master_topbuild_dir@
 LIBTOOL       = @LIBTOOL@
-C_COMPILE_SHL = $(CC_SHL)
+C_COMPILE       = $(CC) $(DEFS) $(INCLUDES) $(CFLAGS) $(CPPFLAGS)
+C_COMPILE_SHL   = $(CC_SHL) $(DEFS) $(INCLUDES) $(CFLAGS) $(CPPFLAGS)
+#C_COMPILE_SHL = $(CC_SHL) 
 
 @VPATH@
 
@@ -24,20 +29,42 @@
 
 .SUFFIXES: $(SUFFIXES) .p .lo
 
+#	$(CC) $(CFLAGS) -c $<
 .c.o:
-	$(CC) $(CFLAGS) -c $<
+	@if [ "x$(VERBOSE)" != "x1" ] ; then \
+	  echo "  CC              $<" ; \
+	else \
+	  echo $(C_COMPILE) -c $< ; \
+	fi
+	@$(C_COMPILE) -c $<
+#	$(C_COMPILE_SHL) $(CFLAGS) -c $< -o _s$*.o
+#	@mv -f _s$*.o $*.lo
 .c.lo:
-	$(C_COMPILE_SHL) $(CFLAGS) -c $< -o _s$*.o
+	@if [ "x$(VERBOSE)" != "x1" ] ; then \
+	  echo "  CC              $<" ; \
+	else \
+	  echo $(C_COMPILE_SHL) -c $< -o _s$*.o ; \
+	  echo mv -f _s$*.o $*.lo ; \
+	fi
+	@$(C_COMPILE_SHL) -c $< -o _s$*.o
 	@mv -f _s$*.o $*.lo
 
+#	$(CC) $(CFLAGS) -c $< -o _$*.o
 .c.p:
-	@cp $(srcdir)/$*.c _$*.c
-	$(CC) $(CFLAGS) -c _$*.c
-	@rm -f _$*.c
+	@if [ "x$(VERBOSE)" != "x1" ] ; then \
+	  echo "  CC              $<" ; \
+	else \
+	  echo $(C_COMPILE) -c $< -o _$*.o ; \
+	fi
+	@$(C_COMPILE) -c $< -o _$*.o
 
 profile: 
 	sleep 1
 
+coverage:
+	-@for file in  ${MPIO_OBJECTS:.o=.c} ; do \
+		gcov -b -f $$file ; done
+
 $(LIBNAME): $(MPIO_OBJECTS)
 	$(AR) $(LIBNAME) $(MPIO_OBJECTS)
 	$(RANLIB) $(LIBNAME)  
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/glue/mpich2/mpio_err.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/glue/mpich2/mpio_err.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/glue/mpich2/mpio_err.c	2010-11-16 09:15:38.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/glue/mpich2/mpio_err.c	2010-11-15 15:02:47.000000000 +0100
@@ -74,7 +74,7 @@
        kind = 1: errors return
        kind = 2: errors call function
     */
-    if (e == MPI_ERRORS_RETURN || !e) {
+    if (e == MPI_ERRORS_RETURN || e == MPIR_ERRORS_THROW_EXCEPTIONS || !e) {
 	/* FIXME: This is a hack in case no error handler was set */
 	kind = 1;
 	c_errhandler = 0;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/ioreq_c2f.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/ioreq_c2f.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/ioreq_c2f.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/ioreq_c2f.c	2010-11-15 15:02:47.000000000 +0100
@@ -47,14 +47,16 @@
     return (MPI_Fint) request;
 #else
     int i;
+    MPIU_THREADPRIV_DECL;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-
+    /* We can make this test outside of the ALLFUNC mutex because it does
+       not access any shared data */
     if ((request <= (MPIO_Request) 0) || (request->cookie != ADIOI_REQ_COOKIE))
     {
-	    MPIU_THREAD_SINGLE_CS_EXIT("io"); 
 	    return (MPI_Fint) 0;
     }
+
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
     if (!ADIOI_Reqtable) {
 	ADIOI_Reqtable_max = 1024;
 	ADIOI_Reqtable = (MPIO_Request *)
@@ -73,7 +75,7 @@
     ADIOI_Reqtable_ptr++;
     ADIOI_Reqtable[ADIOI_Reqtable_ptr] = request;
 
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return (MPI_Fint) ADIOI_Reqtable_ptr;
 #endif
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/ioreq_f2c.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/ioreq_f2c.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/ioreq_f2c.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/ioreq_f2c.c	2010-11-15 15:02:47.000000000 +0100
@@ -41,13 +41,15 @@
 #else
 MPIO_Request MPIO_Request_f2c(MPI_Fint request)
 {
+    int error_code;
+    static char myname[] = "MPIO_REQUEST_F2C";
+    MPIU_THREADPRIV_DECL;
+
 #ifndef INT_LT_POINTER
     return (MPIO_Request) request;
 #else
-    int error_code;
-    static char myname[] = "MPIO_REQUEST_F2C";
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
     
     if (!request) {
 	return MPIO_REQUEST_NULL;
@@ -63,7 +65,7 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return ADIOI_Reqtable[request];
 #endif
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iotestall.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iotestall.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iotestall.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iotestall.c	2010-11-15 15:03:31.000000000 +0100
@@ -32,13 +32,11 @@
 		 MPI_Status statuses[])
 {
     int done, i, err; 
+    MPIU_THREADPRIV_DECL;
 
-
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
     if (count == 1)  {
-            MPIR_Nest_decr();
 	    err = MPIO_Test( requests, flag, statuses );
-            MPIR_Nest_decr();
 	    goto fn_exit;
     }
 
@@ -50,9 +48,7 @@
     done = 1;
     for (i=0; i<count; i++) {
       if (requests[i] != MPIO_REQUEST_NULL) {
-        MPIR_Nest_incr();
 	err = MPIO_Test( &requests[i], flag, &statuses[i] );
-        MPIR_Nest_decr();
 	if (!*flag) done = 0;
 	if (err) goto fn_exit;
       }
@@ -75,7 +71,7 @@
 
     err = MPI_SUCCESS;
 fn_exit:
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return err;
 }
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iotestany.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iotestany.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iotestany.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iotestany.c	2010-11-15 15:03:31.000000000 +0100
@@ -32,13 +32,12 @@
 		 int *flag, MPI_Status *status)
 {
     int i, err; 
+    MPIU_THREADPRIV_DECL;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     if (count == 1) {
-        MPIR_Nest_incr();
 	err = MPIO_Test( requests, flag, status );
-    	MPIR_Nest_decr();
 	if (!err) *index = 0;
 	goto fn_exit;
     }
@@ -67,9 +66,7 @@
     err = MPI_SUCCESS;
     for (i=0; i<count; i++) {
       if (requests[i] != MPIO_REQUEST_NULL) {
-        MPIR_Nest_incr();
 	err = MPIO_Test( &requests[i], flag, status );
-        MPIR_Nest_decr();
 	if (*flag) {
 	  if (!err) *index = i;
 	  break;
@@ -79,6 +76,6 @@
 
 
 fn_exit:
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return err;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iotest.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iotest.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iotest.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iotest.c	2010-11-15 15:02:47.000000000 +0100
@@ -47,6 +47,7 @@
 {
     int error_code;
     static char myname[] = "MPIO_TEST";
+    MPIU_THREADPRIV_DECL;
 #ifdef MPI_hpux
     int fl_xmpi;
 
@@ -55,7 +56,7 @@
     }
 #endif /* MPI_hpux */
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     if (*request == MPIO_REQUEST_NULL) {
 	    error_code = MPI_SUCCESS;
@@ -88,7 +89,7 @@
 #endif /* MPI_hpux */
 
 fn_exit:
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return error_code;
 }
 #endif
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iotestsome.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iotestsome.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iotestsome.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iotestsome.c	2010-11-15 15:03:31.000000000 +0100
@@ -33,13 +33,12 @@
 {
     int i, err; 
     int flag;
+    MPIU_THREADPRIV_DECL;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     if (count == 1) {
-    	MPIR_Nest_incr();
 	err = MPIO_Test( requests, &flag, statuses );
-    	MPIR_Nest_decr();
 	if (!err) {
 	    if (flag) {
 		indices[0] = 0;
@@ -68,9 +67,7 @@
     *outcount = 0;
     for (i=0; i<count; i++) {
       if (requests[i] != MPIO_REQUEST_NULL) {
-    	MPIR_Nest_incr();
 	err = MPIO_Test( &requests[i], &flag, statuses );
-    	MPIR_Nest_decr();
 	if (flag) {
 	  if (!err) {
 	      indices[0] = i;
@@ -84,6 +81,6 @@
 
 fn_exit:
 
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return err;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iowaitall.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iowaitall.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iowaitall.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iowaitall.c	2010-11-15 15:03:31.000000000 +0100
@@ -31,13 +31,12 @@
 int MPIO_Waitall( int count, MPIO_Request requests[], MPI_Status statuses[] )
 {
     int notdone, i, flag, err; 
+    MPIU_THREADPRIV_DECL;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     if (count == 1)  {
-    	    MPIR_Nest_incr();
 	    err = MPIO_Wait(requests, statuses);
-    	    MPIR_Nest_decr();
 	    goto fn_exit;
     }
     
@@ -46,9 +45,7 @@
 	notdone = 0;
 	for (i=0; i<count; i++) {
 	    if (requests[i] != MPIO_REQUEST_NULL) {
-    		MPIR_Nest_incr();
 		err = MPIO_Test( &requests[i], &flag, &statuses[i] );
-    		MPIR_Nest_decr();
 		if (!flag) notdone = 1;
 		if (err) goto fn_exit;
 	    }
@@ -71,7 +68,7 @@
     err = MPI_SUCCESS;
 fn_exit:
 
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return err;
 }
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iowaitany.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iowaitany.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iowaitany.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iowaitany.c	2010-11-15 15:03:31.000000000 +0100
@@ -32,13 +32,12 @@
 		 MPI_Status *status)
 {
     int i, flag, err; 
+    MPIU_THREADPRIV_DECL;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     if (count == 1) {
-	MPIR_Nest_incr();
 	err = MPIO_Wait( requests, status );
-    	MPIR_Nest_decr();
 	if (!err) *index = 0;
 	goto fn_exit;
     }
@@ -79,7 +78,7 @@
     } while (flag == 0);
 
 fn_exit:
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return err;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iowait.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iowait.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iowait.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iowait.c	2010-11-15 15:03:31.000000000 +0100
@@ -46,6 +46,7 @@
 {
     int error_code;
     static char myname[] = "MPIO_WAIT";
+    MPIU_THREADPRIV_DECL;
 
 #ifdef MPI_hpux
     int fl_xmpi;
@@ -55,8 +56,7 @@
     }
 #endif /* MPI_hpux */
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     if (*request == MPIO_REQUEST_NULL) {
 	    error_code = MPI_SUCCESS;
@@ -90,8 +90,7 @@
 #endif /* MPI_hpux */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return error_code;
 }
 #endif
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iowaitsome.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iowaitsome.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iowaitsome.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iowaitsome.c	2010-11-15 15:03:31.000000000 +0100
@@ -32,13 +32,12 @@
 		  int indices[], MPI_Status *statuses)
 {
     int i, flag, err; 
+    MPIU_THREADPRIV_DECL;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     if (count == 1) {
-    	MPIR_Nest_incr();
 	err = MPIO_Wait( requests, statuses );
-    	MPIR_Nest_decr();
 	if (!err) {
 	    *outcount = 1;
 	    indices[0] = 0;
@@ -77,6 +76,6 @@
     } while (*outcount == 0);
 
 fn_exit:
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return err;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iread.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iread.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iread.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iread.c	2010-11-15 15:03:31.000000000 +0100
@@ -52,8 +52,7 @@
 		  count);
 #endif /* MPI_hpux */
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     error_code = MPIOI_File_iread(mpi_fh, (MPI_Offset) 0, ADIO_INDIVIDUAL,
 				  buf, count, datatype, myname, request);
@@ -66,7 +65,7 @@
 #ifdef MPI_hpux
     HPMP_IO_END(fl_xmpi, mpi_fh, datatype, count);
 #endif /* MPI_hpux */
-
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
@@ -89,8 +88,6 @@
     ADIO_Offset off;
     MPI_Offset nbytes=0;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
     fh = MPIO_File_resolve(mpi_fh);
 
     /* --BEGIN ERROR HANDLING-- */
@@ -113,6 +110,7 @@
     MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code);
     MPIO_CHECK_READABLE(fh, myname, error_code);
     MPIO_CHECK_NOT_SEQUENTIAL_MODE(fh, myname, error_code);
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
     /* --END ERROR HANDLING-- */
 
     ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
@@ -137,9 +135,7 @@
         else {
             /* to maintain strict atomicity semantics with other concurrent
               operations, lock (exclusive) and call blocking routine */
-            if ((fh->file_system != ADIO_PIOFS) && 
-              (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS)
-	      && (fh->file_system != ADIO_PVFS2))
+	    if (ADIO_Feature(fh, ADIO_LOCKS))
 	    {
                 ADIOI_WRITE_LOCK(fh, off, SEEK_SET, bufsize);
 	    }
@@ -147,9 +143,7 @@
             ADIO_ReadContig(fh, buf, count, datatype, file_ptr_type, 
 			    off, &status, &error_code);
 
-            if ((fh->file_system != ADIO_PIOFS) && 
-               (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS)
-	       && (fh->file_system != ADIO_PVFS2))
+	    if (ADIO_Feature(fh, ADIO_LOCKS)) 
 	    {
                 ADIOI_UNLOCK(fh, off, SEEK_SET, bufsize);
 	    }
@@ -163,8 +157,6 @@
 			   offset, request, &error_code); 
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iread_sh.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iread_sh.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iread_sh.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iread_sh.c	2010-11-15 15:03:31.000000000 +0100
@@ -52,8 +52,7 @@
     ADIO_Offset off, shared_fp;
     MPI_Offset nbytes=0;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -68,6 +67,7 @@
     /* --BEGIN ERROR HANDLING-- */
     MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code);
     MPIO_CHECK_FS_SUPPORTS_SHARED(fh, myname, error_code);
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
     /* --END ERROR HANDLING-- */
 
     ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
@@ -131,8 +131,7 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return error_code;
 }
 #endif
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iwrite.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iwrite.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iwrite.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iwrite.c	2010-11-15 15:03:31.000000000 +0100
@@ -45,9 +45,7 @@
 		    MPI_Datatype datatype, MPI_Request *request)
 {
     int error_code=MPI_SUCCESS;
-
     static char myname[] = "MPI_FILE_IWRITE";
-
 #ifdef MPI_hpux
     int fl_xmpi;
 
@@ -55,8 +53,7 @@
 		  count);
 #endif /* MPI_hpux */
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     error_code = MPIOI_File_iwrite(mpi_fh, (MPI_Offset) 0, ADIO_INDIVIDUAL,
 				   buf, count, datatype, myname, request);
@@ -69,6 +66,7 @@
 #ifdef MPI_hpux
     HPMP_IO_END(fl_xmpi, mpi_fh, datatype, count);
 #endif /* MPI_hpux */
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
@@ -91,9 +89,6 @@
     ADIO_File fh;
     MPI_Offset nbytes=0;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
-
     fh = MPIO_File_resolve(mpi_fh);
 
     /* --BEGIN ERROR HANDLING-- */
@@ -116,6 +111,7 @@
     MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code);
     MPIO_CHECK_WRITABLE(fh, myname, error_code);
     MPIO_CHECK_NOT_SEQUENTIAL_MODE(fh, myname, error_code);
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
     /* --END ERROR HANDLING-- */
 
     ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
@@ -140,9 +136,7 @@
 	else {
             /* to maintain strict atomicity semantics with other concurrent
               operations, lock (exclusive) and call blocking routine */
-            if ((fh->file_system != ADIO_PIOFS) && 
-              (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS)
-	      && (fh->file_system != ADIO_PVFS2))
+	    if (ADIO_Feature(fh, ADIO_LOCKS) )
 	    {
                 ADIOI_WRITE_LOCK(fh, off, SEEK_SET, bufsize);
 	    }
@@ -150,9 +144,7 @@
             ADIO_WriteContig(fh, buf, count, datatype, file_ptr_type, off, 
 			     &status, &error_code);  
 
-            if ((fh->file_system != ADIO_PIOFS) && 
-               (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS)
-	       && (fh->file_system != ADIO_PVFS2))
+	    if (ADIO_Feature(fh, ADIO_LOCKS) )
 	    {
                 ADIOI_UNLOCK(fh, off, SEEK_SET, bufsize);
 	    }
@@ -168,9 +160,6 @@
 			   offset, request, &error_code);
     }
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
-
     return error_code;
 }
 #endif
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iwrite_sh.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iwrite_sh.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/iwrite_sh.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/iwrite_sh.c	2010-11-15 15:03:31.000000000 +0100
@@ -51,8 +51,7 @@
     ADIO_Offset off, shared_fp;
     static char myname[] = "MPI_FILE_IWRITE_SHARED";
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -67,6 +66,7 @@
     /* --BEGIN ERROR HANDLING-- */
     MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code);
     MPIO_CHECK_FS_SUPPORTS_SHARED(fh, myname, error_code);
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
     /* --END ERROR HANDLING-- */
 
     ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
@@ -110,8 +110,7 @@
 			   shared_fp, request, &error_code); 
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/Makefile.am NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/Makefile.am
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/Makefile.am	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/Makefile.am	2010-11-15 15:03:31.000000000 +0100
@@ -89,6 +89,7 @@
         write_ord.c \
         write_orde.c \
         write_sh.c \
-	register_datarep.c \
-	mpiu_greq.c \
-	mpich2_fileutil.c
+        register_datarep.c \
+        mpiu_greq.c \
+        mpich2_fileutil.c \
+        mpir-mpioinit.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/mpioimpl.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/mpioimpl.h
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/mpioimpl.h	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/mpioimpl.h	2010-11-15 15:03:31.000000000 +0100
@@ -21,27 +21,13 @@
 #include "mpiimpl.h"
 #include "mpiimplthread.h"
 
-/* Use the routine versions of the nest macros, to avoid requiring 
-   access to the MPIR_Process and MPIR_Thread structures */
-#ifdef MPIR_Nest_incr
-#undef MPIR_Nest_incr
-#undef MPIR_Nest_decr
-#endif
-
-void MPIR_Nest_incr_export(void);
-void MPIR_Nest_decr_export(void);
-#define MPIR_Nest_incr MPIR_Nest_incr_export
-#define MPIR_Nest_decr MPIR_Nest_decr_export
-
 #else /* not ROMIO_INSIDE_MPICH2 */
 /* Any MPI implementation that wishes to follow the thread-safety and
    error reporting features provided by MPICH2 must implement these 
    four functions.  Defining these as empty should not change the behavior 
    of correct programs */
-#define MPIU_THREAD_SINGLE_CS_ENTER(_msg)
-#define MPIU_THREAD_SINGLE_CS_EXIT(_msg)
-#define MPIR_Nest_incr()
-#define MPIR_Nest_decr()
+#define MPIU_THREAD_CS_ENTER(x,y)
+#define MPIU_THREAD_CS_EXIT(x,y)
 #ifdef HAVE_WINDOWS_H
 #define MPIU_UNREFERENCED_ARG(a) a
 #else
@@ -60,6 +46,10 @@
 
 MPI_Delete_function ADIOI_End_call;
 
+/* common initialization routine */
+void MPIR_MPIOInit(int * error_code);
+
+
 #include "mpioprof.h"
 
 #ifdef MPI_hpux
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/mpioprof.h NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/mpioprof.h
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/mpioprof.h	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/mpioprof.h	2010-11-15 15:03:31.000000000 +0100
@@ -19,7 +19,7 @@
  * modification to all the files in the mpi-io directory.
  */
 #if 0
-#ifdef MPIO_BUILD_PROFILING 
+#ifdef MPIO_BUILD_PROFILING
 
 #undef MPI_File_open
 #define MPI_File_open PMPI_File_open
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io: mpir-mpioinit.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/mpiu_greq.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/mpiu_greq.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/mpiu_greq.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/mpiu_greq.c	2010-11-15 15:03:31.000000000 +0100
@@ -27,9 +27,7 @@
     status->MPI_ERROR = foo;
 
     /* and let Test|Wait know we weren't canceled */
-    MPIR_Nest_incr();
     MPI_Status_set_cancelled(status, 0);
-    MPIR_Nest_decr();
 
     return MPI_SUCCESS;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/open.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/open.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/open.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/open.c	2010-11-15 15:03:31.000000000 +0100
@@ -47,15 +47,13 @@
     MPI_Comm dupcomm;
     ADIOI_Fns *fsops;
     static char myname[] = "MPI_FILE_OPEN";
-
 #ifdef MPI_hpux
     int fl_xmpi;
 
     HPMP_IO_OPEN_START(fl_xmpi, comm);
 #endif /* MPI_hpux */
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     /* --BEGIN ERROR HANDLING-- */
     if (comm == MPI_COMM_NULL)
@@ -118,34 +116,8 @@
 */
 
 /* check if ADIO has been initialized. If not, initialize it */
-    if (ADIO_Init_keyval == MPI_KEYVAL_INVALID) {
-	MPI_Initialized(&flag);
-
-	/* --BEGIN ERROR HANDLING-- */
-	if (!flag) {
-	    error_code = MPIO_Err_create_code(MPI_SUCCESS,
-					      MPIR_ERR_RECOVERABLE,
-					      myname, __LINE__, MPI_ERR_OTHER,
-					      "**initialized", 0);
-	    goto fn_fail;
-	}
-	/* --END ERROR HANDLING-- */
-
-	MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_End_call, &ADIO_Init_keyval,
-			  (void *) 0);  
-
-/* put a dummy attribute on MPI_COMM_WORLD, because we want the delete
-   function to be called when MPI_COMM_WORLD is freed. Hopefully the
-   MPI library frees MPI_COMM_WORLD when MPI_Finalize is called,
-   though the standard does not mandate this. */
-
-	MPI_Attr_put(MPI_COMM_WORLD, ADIO_Init_keyval, (void *) 0);
-
-/* initialize ADIO */
-
-	ADIO_Init( (int *)0, (char ***)0, &error_code);
-    }
-
+    MPIR_MPIOInit(&error_code);
+    if (error_code != MPI_SUCCESS) goto fn_fail;
 
     file_system = -1;
 
@@ -161,24 +133,6 @@
 	goto fn_fail;
     }
 
-    /* Test for invalid flags in amode.
-     *
-     * eventually we should allow the ADIO implementations to test for 
-     * invalid flags through some functional interface rather than having
-     *  these tests here. -- Rob, 06/06/2001
-     */
-    if (((file_system == ADIO_PIOFS) ||
-	 (file_system == ADIO_PVFS) ||
-	 (file_system == ADIO_PVFS2) ||
-	 (file_system == ADIO_GRIDFTP)) && 
-        (amode & MPI_MODE_SEQUENTIAL))
-    {
-	error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
-					  myname, __LINE__,
-					  MPI_ERR_UNSUPPORTED_OPERATION, 
-					  "**iosequnsupported", 0);
-	goto fn_fail;
-    }
     /* --END ERROR HANDLING-- */
 
     /* strip off prefix if there is one, but only skip prefixes
@@ -202,13 +156,24 @@
     }
     /* --END ERROR HANDLING-- */
 
+    /* if MPI_MODE_SEQUENTIAL requested, file systems cannot do explicit offset
+     * or independent file pointer accesses, leaving not much else aside from
+     * shared file pointer accesses. */
+    if ( !ADIO_Feature((*fh), ADIO_SHARED_FP) && (amode & MPI_MODE_SEQUENTIAL)) 
+    {
+        error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, 
+			                  myname, __LINE__, 
+					  MPI_ERR_UNSUPPORTED_OPERATION,
+					  "**iosequnsupported", 0);
+	ADIO_Close(*fh, &error_code);
+	goto fn_fail;
+    }
+
     /* determine name of file that will hold the shared file pointer */
     /* can't support shared file pointers on a file system that doesn't
        support file locking. */
-    if ((error_code == MPI_SUCCESS) && ((*fh)->file_system != ADIO_PIOFS)
-          && ((*fh)->file_system != ADIO_PVFS) 
-	  && ((*fh)->file_system != ADIO_PVFS2)
-	  && ((*fh)->file_system != ADIO_GRIDFTP) ){
+    if ((error_code == MPI_SUCCESS) && 
+		    ADIO_Feature((*fh), ADIO_SHARED_FP)) {
 	MPI_Comm_rank(dupcomm, &rank);
 	ADIOI_Shfp_fname(*fh, rank);
 
@@ -226,14 +191,11 @@
     HPMP_IO_OPEN_END(fl_xmpi, *fh, comm);
 #endif /* MPI_hpux */
 
-    MPIR_Nest_decr();
-
 fn_exit:
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return error_code;
 fn_fail:
     /* --BEGIN ERROR HANDLING-- */
-    MPIR_Nest_decr();
     error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code);
     goto fn_exit;
     /* --END ERROR HANDLING-- */
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/prealloc.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/prealloc.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/prealloc.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/prealloc.c	2010-11-15 15:03:31.000000000 +0100
@@ -46,8 +46,7 @@
 		  fh, MPI_DATATYPE_NULL, -1);
 #endif /* MPI_hpux */
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -74,7 +73,7 @@
     }
     /* --END ERROR HANDLING-- */
 
-    if (size == 0) return MPI_SUCCESS;
+    if (size == 0) goto fn_exit;
 
     ADIOI_TEST_DEFERRED(fh, myname, &error_code);
 
@@ -97,8 +96,7 @@
 
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     /* TODO: bcast result? */
     if (!mynod) return error_code;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read_allb.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read_allb.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read_allb.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read_allb.c	2010-11-15 15:03:31.000000000 +0100
@@ -62,8 +62,7 @@
     int error_code, datatype_size;
     ADIO_File fh;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -96,6 +95,7 @@
 	error_code = MPIO_Err_return_file(fh, error_code);
 	goto fn_exit;
     }
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
     /* --END ERROR HANDLING-- */
 
     fh->split_coll_count = 1;
@@ -109,8 +109,7 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read_all.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read_all.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read_all.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read_all.c	2010-11-15 15:03:31.000000000 +0100
@@ -75,8 +75,7 @@
     int error_code, datatype_size;
     ADIO_File fh;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -101,6 +100,7 @@
     MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code);
     MPIO_CHECK_READABLE(fh, myname, error_code);
     MPIO_CHECK_NOT_SEQUENTIAL_MODE(fh, myname, error_code);
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
     /* --END ERROR HANDLING-- */
 
     ADIO_ReadStridedColl(fh, buf, count, datatype, file_ptr_type,
@@ -112,8 +112,7 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read_alle.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read_alle.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read_alle.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read_alle.c	2010-11-15 15:03:31.000000000 +0100
@@ -58,8 +58,7 @@
 
     MPIU_UNREFERENCED_ARG(buf);
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -82,8 +81,7 @@
     fh->split_coll_count = 0;
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return MPI_SUCCESS;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read.c	2010-11-15 15:03:31.000000000 +0100
@@ -75,8 +75,7 @@
     ADIO_File fh;
     ADIO_Offset off;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -96,6 +95,11 @@
     /* --END ERROR HANDLING-- */
 
     MPI_Type_size(datatype, &datatype_size);
+
+    /* --BEGIN ERROR HANDLING-- */
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
+    /* --END ERROR HANDLING-- */
+
     if (count*datatype_size == 0)
     {
 #ifdef HAVE_STATUS_SET_BYTES
@@ -128,22 +132,15 @@
 	}
 
         /* if atomic mode requested, lock (exclusive) the region, because
-           there could be a concurrent noncontiguous request. Locking doesn't
-           work on PIOFS and PVFS, and on NFS it is done in the
-           ADIO_ReadContig.
+           there could be a concurrent noncontiguous request.
 	 */
-
-        if ((fh->atomicity) && (fh->file_system != ADIO_PIOFS) && 
-            (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS) && 
-	   	 (fh->file_system != ADIO_PVFS2))
+        if ((fh->atomicity) && ADIO_Feature(fh, ADIO_LOCKS))
             ADIOI_WRITE_LOCK(fh, off, SEEK_SET, bufsize);
 
 	ADIO_ReadContig(fh, buf, count, datatype, file_ptr_type,
 			off, status, &error_code); 
 
-        if ((fh->atomicity) && (fh->file_system != ADIO_PIOFS) && 
-            (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS) &&
-	    	(fh->file_system != ADIO_PVFS2))
+        if ((fh->atomicity) && ADIO_Feature(fh, ADIO_LOCKS))
             ADIOI_UNLOCK(fh, off, SEEK_SET, bufsize);
     }
     else
@@ -159,8 +156,7 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read_ordb.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read_ordb.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read_ordb.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read_ordb.c	2010-11-15 15:03:31.000000000 +0100
@@ -45,8 +45,7 @@
     ADIO_File fh;
     static char myname[] = "MPI_FILE_READ_ORDERED_BEGIN";
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -72,6 +71,7 @@
     /* --BEGIN ERROR HANDLING-- */
     MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code);
     MPIO_CHECK_FS_SUPPORTS_SHARED(fh, myname, error_code);
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
     /* --END ERROR HANDLING-- */
 
     ADIOI_TEST_DEFERRED(fh, myname, &error_code);
@@ -107,8 +107,7 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read_ord.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read_ord.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read_ord.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read_ord.c	2010-11-15 15:03:31.000000000 +0100
@@ -48,8 +48,7 @@
     ADIO_Offset shared_fp=0;
     ADIO_File fh;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -64,6 +63,7 @@
     /* --BEGIN ERROR HANDLING-- */
     MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code);
     MPIO_CHECK_FS_SUPPORTS_SHARED(fh, myname, error_code);
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
     /* --END ERROR HANDLING-- */
 
     ADIOI_TEST_DEFERRED(fh, "MPI_File_read_ordered", &error_code);
@@ -99,8 +99,7 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     /* FIXME: Check for error code from ReadStridedColl? */
     return error_code;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read_orde.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read_orde.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read_orde.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read_orde.c	2010-11-15 15:02:47.000000000 +0100
@@ -43,7 +43,7 @@
 
     MPIU_UNREFERENCED_ARG(buf);
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -67,7 +67,7 @@
     fh->split_coll_count = 0;
 
 fn_exit:
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read_sh.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read_sh.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/read_sh.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/read_sh.c	2010-11-15 15:03:31.000000000 +0100
@@ -48,8 +48,7 @@
     ADIO_Offset off, shared_fp;
     ADIO_File fh;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -60,6 +59,11 @@
     /* --END ERROR HANDLING-- */
 
     MPI_Type_size(datatype, &datatype_size);
+
+    /* --BEGIN ERROR HANDLING-- */
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
+    /* --END ERROR HANDLING-- */
+
     if (count*datatype_size == 0)
     {
 #ifdef HAVE_STATUS_SET_BYTES
@@ -124,8 +128,7 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/register_datarep.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/register_datarep.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/register_datarep.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/register_datarep.c	2010-11-15 15:02:47.000000000 +0100
@@ -23,8 +23,6 @@
 #include "mpioprof.h"
 #endif
 
-extern int ADIO_Init_keyval;
-
 /*@
   MPI_Register_datarep - Register functions for user-defined data 
                          representations
@@ -56,11 +54,11 @@
 			 MPI_Datarep_extent_function *extent_fn,
 			 void *state)
 {
-    int error_code, flag;
+    int error_code;
     ADIOI_Datarep *datarep;
     static char myname[] = "MPI_REGISTER_DATAREP";
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     /* --BEGIN ERROR HANDLING-- */
     /* check datarep name (use strlen instead of strnlen because
@@ -79,33 +77,8 @@
     }
     /* --END ERROR HANDLING-- */
 
-    /* first check if ADIO has been initialized. If not, initialize it */
-    if (ADIO_Init_keyval == MPI_KEYVAL_INVALID) {
-        MPI_Initialized(&flag);
-
-	/* --BEGIN ERROR HANDLING-- */
-        if (!flag) {
-	    error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
-					      myname, __LINE__, MPI_ERR_OTHER, 
-					      "**initialized", 0);
-	    error_code = MPIO_Err_return_file(MPI_FILE_NULL, error_code);
-	    goto fn_exit;
-	}
-	/* --END ERROR HANDLING-- */
-
-        MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_End_call, &ADIO_Init_keyval,
-                          (void *) 0);  
-
-	/* put a dummy attribute on MPI_COMM_WORLD, because we want the delete
-	   function to be called when MPI_COMM_WORLD is freed. Hopefully the
-	   MPI library frees MPI_COMM_WORLD when MPI_Finalize is called,
-	   though the standard does not mandate this. */
-
-        MPI_Attr_put(MPI_COMM_WORLD, ADIO_Init_keyval, (void *) 0);
-
-	/* initialize ADIO */
-        ADIO_Init( (int *)0, (char ***)0, &error_code);
-    }
+    MPIR_MPIOInit(&error_code);
+    if (error_code != MPI_SUCCESS) goto fn_exit;
 
     /* --BEGIN ERROR HANDLING-- */
     /* check datarep isn't already registered */
@@ -156,7 +129,7 @@
     error_code = MPI_SUCCESS;
 
 fn_exit:
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/seek.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/seek.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/seek.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/seek.c	2010-11-15 15:03:31.000000000 +0100
@@ -47,8 +47,7 @@
     HPMP_IO_START(fl_xmpi, BLKMPIFILESEEK, TRDTBLOCK, fh, MPI_DATATYPE_NULL, -1);
 #endif /* MPI_hpux */
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -133,7 +132,6 @@
     error_code = MPI_SUCCESS;
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/seek_sh.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/seek_sh.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/seek_sh.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/seek_sh.c	2010-11-15 15:03:31.000000000 +0100
@@ -39,8 +39,7 @@
     MPI_Offset curr_offset, eof_offset, tmp_offset;
     ADIO_File fh;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -177,8 +176,7 @@
     error_code = MPI_SUCCESS;
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/set_atom.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/set_atom.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/set_atom.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/set_atom.c	2010-11-15 15:03:31.000000000 +0100
@@ -39,8 +39,7 @@
     ADIO_Fcntl_t *fcntl_struct;
     ADIO_File fh;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -85,7 +84,6 @@
     ADIOI_Free(fcntl_struct);
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/set_errh.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/set_errh.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/set_errh.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/set_errh.c	2010-11-15 15:02:47.000000000 +0100
@@ -38,8 +38,9 @@
     int error_code = MPI_SUCCESS;
     static char myname[] = "MPI_FILE_SET_ERRHANDLER";
     ADIO_File fh;
+    MPIU_THREADPRIV_DECL;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     if (mpi_fh == MPI_FILE_NULL) {
 	ADIOI_DFLT_ERR_HANDLER = errhandler;
@@ -68,6 +69,6 @@
     }
 
 fn_exit:
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/set_info.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/set_info.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/set_info.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/set_info.c	2010-11-15 15:03:31.000000000 +0100
@@ -38,8 +38,7 @@
     static char myname[] = "MPI_FILE_SET_INFO";
     ADIO_File fh;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -57,8 +56,7 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/set_size.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/set_size.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/set_size.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/set_size.c	2010-11-15 15:03:31.000000000 +0100
@@ -46,8 +46,7 @@
 		  MPI_DATATYPE_NULL, -1);
 #endif /* MPI_hpux */
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -92,8 +91,7 @@
 #endif /* MPI_hpux */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/set_view.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/set_view.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/set_view.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/set_view.c	2010-11-15 15:03:31.000000000 +0100
@@ -44,8 +44,7 @@
     ADIO_Offset shared_fp, byte_off;
     ADIO_File fh;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -145,9 +144,7 @@
     /* --END ERROR HANDLING-- */
 
     /* reset shared file pointer to zero */
-    if ((fh->file_system != ADIO_PIOFS) &&
-	(fh->file_system != ADIO_PVFS) &&
-	(fh->file_system != ADIO_PVFS2) && 
+    if (ADIO_Feature(fh, ADIO_SHARED_FP) && 
         (fh->shared_fp_fd != ADIO_FILE_NULL))
     {
 	/* only one process needs to set it to zero, but I don't want to 
@@ -166,16 +163,13 @@
 	/* --END ERROR HANDLING-- */
     }
 
-    if ((fh->file_system != ADIO_PIOFS) &&
-	(fh->file_system != ADIO_PVFS) &&
-	(fh->file_system != ADIO_PVFS2 ))
+    if (ADIO_Feature(fh, ADIO_SHARED_FP))
     {
 	MPI_Barrier(fh->comm); /* for above to work correctly */
     }
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write_allb.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write_allb.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write_allb.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write_allb.c	2010-11-15 15:03:31.000000000 +0100
@@ -61,8 +61,7 @@
     int error_code, datatype_size;
     ADIO_File fh;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -96,6 +95,7 @@
     MPI_Type_size(datatype, &datatype_size);
     /* --BEGIN ERROR HANDLING-- */
     MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code);
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
     /* --END ERROR HANDLING-- */
 
     fh->split_datatype = datatype;
@@ -108,8 +108,7 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write_all.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write_all.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write_all.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write_all.c	2010-11-15 15:03:31.000000000 +0100
@@ -75,8 +75,7 @@
     int error_code, datatype_size;
     ADIO_File fh;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -101,6 +100,7 @@
     MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code);
     MPIO_CHECK_WRITABLE(fh, myname, error_code);
     MPIO_CHECK_NOT_SEQUENTIAL_MODE(fh, myname, error_code);
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
     /* --END ERROR HANDLING-- */
 
     ADIO_WriteStridedColl(fh, buf, count, datatype, file_ptr_type,
@@ -112,8 +112,7 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write_alle.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write_alle.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write_alle.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write_alle.c	2010-11-15 15:03:31.000000000 +0100
@@ -57,8 +57,7 @@
 
     MPIU_UNREFERENCED_ARG(buf);
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -87,8 +86,7 @@
     error_code = MPI_SUCCESS;
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write.c	2010-11-15 15:03:31.000000000 +0100
@@ -75,8 +75,7 @@
     ADIO_Offset off;
     ADIO_File fh;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -96,6 +95,11 @@
     /* --END ERROR HANDLING-- */
 
     MPI_Type_size(datatype, &datatype_size);
+
+    /* --BEGIN ERROR HANDLING-- */
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
+    /* --END ERROR HANDLING-- */
+
     if (count*datatype_size == 0)
     {
 #ifdef HAVE_STATUS_SET_BYTES
@@ -133,9 +137,7 @@
            ADIO_WriteContig.
 	 */
 
-        if ((fh->atomicity) && (fh->file_system != ADIO_PIOFS) && 
-            (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS) &&
-	    	(fh->file_system != ADIO_PVFS2))
+        if ((fh->atomicity) && ADIO_Feature(fh, ADIO_LOCKS))
 	{
             ADIOI_WRITE_LOCK(fh, off, SEEK_SET, bufsize);
 	}
@@ -143,9 +145,7 @@
 	ADIO_WriteContig(fh, buf, count, datatype, file_ptr_type,
 		     off, status, &error_code); 
 
-        if ((fh->atomicity) && (fh->file_system != ADIO_PIOFS) && 
-            (fh->file_system != ADIO_NFS) && (fh->file_system != ADIO_PVFS)&&
-	    	(fh->file_system != ADIO_PVFS2))
+        if ((fh->atomicity) && ADIO_Feature(fh, ADIO_LOCKS))
 	{
             ADIOI_UNLOCK(fh, off, SEEK_SET, bufsize);
 	}
@@ -163,8 +163,7 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write_ordb.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write_ordb.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write_ordb.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write_ordb.c	2010-11-15 15:03:31.000000000 +0100
@@ -45,8 +45,7 @@
     ADIO_Offset shared_fp;
     ADIO_File fh;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -71,6 +70,7 @@
     /* --BEGIN ERROR HANDLING-- */
     MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code);
     MPIO_CHECK_FS_SUPPORTS_SHARED(fh, myname, error_code);
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
     /* --END ERROR HANDLING-- */
 
     ADIOI_TEST_DEFERRED(fh, myname, &error_code);
@@ -109,8 +109,7 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     /* FIXME: Check for error code from WriteStridedColl? */
     return error_code;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write_ord.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write_ord.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write_ord.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write_ord.c	2010-11-15 15:03:31.000000000 +0100
@@ -48,8 +48,7 @@
     ADIO_Offset shared_fp;
     ADIO_File fh;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -64,6 +63,7 @@
     /* --BEGIN ERROR HANDLING-- */
     MPIO_CHECK_INTEGRAL_ETYPE(fh, count, datatype_size, myname, error_code);
     MPIO_CHECK_FS_SUPPORTS_SHARED(fh, myname, error_code);
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
     /* --END ERROR HANDLING-- */
 
     ADIOI_TEST_DEFERRED(fh, myname, &error_code);
@@ -102,8 +102,7 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
 
     /* FIXME: Check for error code from WriteStridedColl? */
     return error_code;
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write_orde.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write_orde.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write_orde.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write_orde.c	2010-11-15 15:02:47.000000000 +0100
@@ -43,7 +43,7 @@
 
     MPIU_UNREFERENCED_ARG(buf);
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -68,6 +68,6 @@
 
 
 fn_exit:
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return MPI_SUCCESS;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write_sh.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write_sh.c
--- ompi-trunk/ompi/mca/io/romio/romio/mpi-io/write_sh.c	2010-11-16 09:15:42.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/mpi-io/write_sh.c	2010-11-15 15:03:31.000000000 +0100
@@ -48,8 +48,7 @@
     ADIO_Offset off, shared_fp;
     ADIO_File fh;
 
-    MPIU_THREAD_SINGLE_CS_ENTER("io");
-    MPIR_Nest_incr();
+    MPIU_THREAD_CS_ENTER(ALLFUNC,);
 
     fh = MPIO_File_resolve(mpi_fh);
 
@@ -60,6 +59,11 @@
     /* --END ERROR HANDLING-- */
 
     MPI_Type_size(datatype, &datatype_size);
+
+    /* --BEGIN ERROR HANDLING-- */
+    MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code);
+    /* --END ERROR HANDLING-- */
+
     if (count*datatype_size == 0) {
 #ifdef HAVE_STATUS_SET_BYTES
        MPIR_Status_set_bytes(status, datatype, 0);
@@ -124,7 +128,6 @@
     /* --END ERROR HANDLING-- */
 
 fn_exit:
-    MPIR_Nest_decr();
-    MPIU_THREAD_SINGLE_CS_EXIT("io");
+    MPIU_THREAD_CS_EXIT(ALLFUNC,);
     return error_code;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/test/aggregation1.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test/aggregation1.c
--- ompi-trunk/ompi/mca/io/romio/romio/test/aggregation1.c	2010-11-16 09:15:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test/aggregation1.c	2010-11-15 15:02:47.000000000 +0100
@@ -1,3 +1,9 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*  
+ *  (C) 2007 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
 /* Test case from John Bent (ROMIO req #835)
  * Aggregation code was not handling certain access patterns when collective
  * buffering forced */
@@ -57,6 +63,7 @@
             MPI_Info_get( info, key, 1024, value, &dummy_int ); 
             printf( "%s\n", value );
         }
+	MPI_Info_free(&info);
     }
     MPI_Barrier( MPI_COMM_WORLD );
 }
@@ -163,6 +170,7 @@
     if( (mpi_ret = MPI_File_close( &rfh ) ) != MPI_SUCCESS ) {
         fatal_error( mpi_ret, NULL, "close for read" );
     }
+    free(verify_buf);
 
 }
 
@@ -244,8 +252,9 @@
                 corrupt_blocks, nproc * NUM_OBJS );
 	}
     }
+    MPI_Info_free(&info);
 
     MPI_Finalize();
-
+    free(prog);
     exit( 0 );
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/test/aggregation2.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test/aggregation2.c
--- ompi-trunk/ompi/mca/io/romio/romio/test/aggregation2.c	2010-11-16 09:15:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test/aggregation2.c	2010-11-15 15:02:47.000000000 +0100
@@ -1,3 +1,9 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*  
+ *  (C) 2007 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
 /* Look for regressions in aggregator code.  A more simple access pattern than
  * aggregation1 */
 
@@ -76,6 +82,7 @@
 	    fprintf( stdout, " No Errors\n" );
 	}
     }
+    MPI_Info_free(&info);
     MPI_Finalize();
 
     return 0;
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test: big_extents.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/test/coll_test.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test/coll_test.c
--- ompi-trunk/ompi/mca/io/romio/romio/test/coll_test.c	2010-11-16 09:15:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test/coll_test.c	2010-11-15 15:03:31.000000000 +0100
@@ -125,20 +125,6 @@
     }
 /* end of initialization */
 
-#if 0 
-    /* XXX: make the romio testcases handle more than one command line 
-     * argument.. like --aggregation  */
-    
-    /* for deferred open: hint stuff */
-    MPI_Info_create(&info);
-    MPI_Info_set(info, "romio_no_indep_rw", "true");
-    MPI_Info_set(info, "romio_cb_read", "enable");
-    MPI_Info_set(info, "romio_cb_write", "enable");
-    MPI_Info_set(info, "cb_nodes", "1");
-    MPI_Info_set(info, "cb_config_list", "schwinn.mcs.anl.gov:1");
-#endif
-
-
     /* write the array to the file */
     errcode = MPI_File_open(MPI_COMM_WORLD, filename, 
 		    MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh);
@@ -152,6 +138,31 @@
     errcode = MPI_File_close(&fh);
     if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_close");
 
+    if (!mynod) {
+        /* wkl suggests potential for false " No Errors" if both read 
+	 * and write use the same file view */
+        /* solution: rank 0 reads entire file and checks write values */
+	errcode = MPI_File_open(MPI_COMM_SELF, filename, 
+			MPI_MODE_RDONLY, info, &fh);
+        if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_open");
+
+        readbuf = (int *) malloc(array_size * sizeof(int));
+        errcode = MPI_File_read(fh, readbuf, array_size, MPI_INT, &status);
+        if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_read");
+
+        errcode = MPI_File_close(&fh);
+        if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_close");
+
+        for (i=0; i<array_size; i++)
+            if (readbuf[i] != i) {
+                errs++;
+                fprintf(stderr, "Error: write integer %d but read %d\n", 
+				i,readbuf[i]);
+                break;
+            }
+        free(readbuf);
+    }
+    MPI_Barrier(MPI_COMM_WORLD);
 
     /* now read it back */
     readbuf = (int *) malloc(bufcount * sizeof(int));
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/test/file_info.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test/file_info.c
--- ompi-trunk/ompi/mca/io/romio/romio/test/file_info.c	2010-11-16 09:15:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test/file_info.c	2010-11-15 15:03:31.000000000 +0100
@@ -3,15 +3,32 @@
  *  (C) 2001 by Argonne National Laboratory.
  *      See COPYRIGHT in top-level directory.
  */
+
+/* Change for BG/L made by Hao Yu, yuh@us.ibm.com
+ */
+
 #include "mpi.h"
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 
+/* this test wants to compare the hints it gets from a file with a set of
+ * default hints.  These hints are specific to the MPI-IO implementation, so
+ * pick one of the following profiles to use */
+
+#   define DFLT_CB_BUFFER_SIZE     16777216
+#   define DFLT_IND_RD_BUFFER_SIZE 4194304
+#   define DFLT_IND_WR_BUFFER_SIZE 524288
+#   define DFLT_ROMIO_CB_READ      "automatic"
+#   define DFLT_ROMIO_CB_WRITE     "automatic"
 /* #undef INFO_DEBUG */
 
-/* Set verbose to 0 only if you want no information about any failure */
-static int verbose = 1;
+/* Test will print out information about unexpected hint keys or values that
+ * differ from the default.  Since this is often interesting but rarely an
+ * error, default will be to increment errror cound for true error conditions
+ * but not print out these "interesting" non-error cases. */
+
+static int verbose = 0;
 
 int main(int argc, char **argv)
 {
@@ -74,35 +91,28 @@
 	    /* no check */
 	}
 	else if (!strcmp("cb_buffer_size", key)) {
-	    if (atoi(value) != 16777216) {
+	    if (atoi(value) != DFLT_CB_BUFFER_SIZE) {
 		errs++;
 		if (verbose) fprintf(stderr, "cb_buffer_size is %d; should be %d\n",
-				     atoi(value), 16777216);
+				     atoi(value), DFLT_CB_BUFFER_SIZE);
 	    }
 	}
 	else if (!strcmp("romio_cb_read", key)) {
-	    if (strcmp("automatic", value)) {
+	    if (strcmp(DFLT_ROMIO_CB_READ, value)) {
 		errs++;
 		if (verbose) fprintf(stderr, "romio_cb_read is set to %s; should be %s\n",
-				     value, "automatic");
+				     value, DFLT_ROMIO_CB_READ);
 	    }
 	}
 	else if (!strcmp("romio_cb_write", key)) {
-	    if (strcmp("automatic", value)) {
+	    if (strcmp(DFLT_ROMIO_CB_WRITE, value)) {
 		errs++;
 		if (verbose) fprintf(stderr, "romio_cb_write is set to %s; should be %s\n",
-				     value, "automatic");
+				     value, DFLT_ROMIO_CB_WRITE);
 	    }
 	}
 	else if (!strcmp("cb_nodes", key)) {
 	    /* unreliable test -- just ignore value */
-#if 0
-	    if (atoi(value) != 1) {
-		errs++;
-		if (verbose) fprintf(stderr, "cb_nodes is %d; should be %d\n", atoi(value),
-				     1);
-	    }
-#endif
 	}
 	else if (!strcmp("romio_no_indep_rw", key)) {
 	    if (strcmp("false", value)) {
@@ -112,17 +122,17 @@
 	    }
 	}
 	else if (!strcmp("ind_rd_buffer_size", key)) {
-	    if (atoi(value) != 4194304) {
+	    if (atoi(value) != DFLT_IND_RD_BUFFER_SIZE) {
 		errs++;
 		if (verbose) fprintf(stderr, "ind_rd_buffer_size is %d; should be %d\n",
-				     atoi(value), 4194304);
+				     atoi(value), DFLT_IND_RD_BUFFER_SIZE);
 	    }
 	}
 	else if (!strcmp("ind_wr_buffer_size", key)) {
-	    if (atoi(value) != 524288) {
+	    if (atoi(value) != DFLT_IND_WR_BUFFER_SIZE) {
 		errs++;
 		if (verbose) fprintf(stderr, "ind_wr_buffer_size is %d; should be %d\n",
-				     atoi(value), 524288);
+				     atoi(value), DFLT_IND_WR_BUFFER_SIZE);
 	    }
 	}
 	else if (!strcmp("romio_ds_read", key)) {
@@ -134,20 +144,26 @@
 	}
 	else if (!strcmp("romio_ds_write", key)) {
 	    /* Unreliable test -- value is file system dependent.  Ignore. */
-#if 0
-	    if (strcmp("automatic", value)) {
-		errs++;
-		if (verbose) fprintf(stderr, "romio_ds_write is set to %s; should be %s\n",
-				     value, "automatic");
-	    }
-#endif
 	}
 	else if (!strcmp("cb_config_list", key)) {
+#ifndef SKIP_CB_CONFIG_LIST_TEST
 	    if (strcmp("*:1", value)) {
 		errs++;
 		if (verbose) fprintf(stderr, "cb_config_list is set to %s; should be %s\n",
 				     value, "*:1");
 	    }
+#endif
+	}
+	/* don't care about the defaults for these keys */
+	else if (!strcmp("romio_cb_pfr", key)) {
+	}
+	else if (!strcmp("romio_cb_fr_types", key)) {
+	}
+	else if (!strcmp("romio_cb_fr_alignment", key)) {
+	}
+	else if (!strcmp("romio_cb_ds_threshold", key)) {
+	}
+	else if (!strcmp("romio_cb_alltoall", key)) {
 	}
 	else {
 	    if (verbose) fprintf(stderr, "unexpected key %s (not counted as an error)\n", key);
@@ -202,8 +218,10 @@
     /* the striping unit in bytes */
     MPI_Info_set(info, "striping_unit", "131072");
 
+#ifndef SKIP_CB_CONFIG_LIST_TEST
     /* set the cb_config_list so we'll get deterministic cb_nodes output */
     MPI_Info_set(info, "cb_config_list", "*:*");
+#endif
 
     /* the I/O device number from which to start striping the file.
        accepted only if 0 <= value < default_striping_factor; 
@@ -251,17 +269,17 @@
 	    }
 	}
 	else if (!strcmp("romio_cb_read", key)) {
-	    if (strcmp("automatic", value)) {
+	    if (strcmp(DFLT_ROMIO_CB_READ, value)) {
 		errs++;
 		if (verbose) fprintf(stderr, "romio_cb_read is set to %s; should be %s\n",
-				     value, "automatic");
+				     value, DFLT_ROMIO_CB_READ);
 	    }
 	}
 	else if (!strcmp("romio_cb_write", key)) {
-	    if (strcmp("automatic", value)) {
+	    if (strcmp(DFLT_ROMIO_CB_WRITE, value)) {
 		errs++;
 		if (verbose) fprintf(stderr, "romio_cb_write is set to %s; should be %s\n",
-				     value, "automatic");
+				     value, DFLT_ROMIO_CB_WRITE);
 	    }
 	}
 	else if (!strcmp("cb_nodes", key)) {
@@ -301,21 +319,52 @@
 	}
 	else if (!strcmp("romio_ds_write", key)) {
 	    /* Unreliable test -- value is file system dependent.  Ignore. */
-#if 0
-	    if (strcmp("automatic", value)) {
-		errs++;
-		if (verbose) fprintf(stderr, "romio_ds_write is set to %s; should be %s\n",
-				     value, "automatic");
-	    }
-#endif
 	}
 	else if (!strcmp("cb_config_list", key)) {
+#ifndef SKIP_CB_CONFIG_LIST_TEST
 	    if (strcmp("*:*", value)) {
 		errs++;
 		if (verbose) fprintf(stderr, "cb_config_list is set to %s; should be %s\n",
 				     value, "*:*");
 	    }
+#endif
+	}
+	else if (!strcmp("romio_cb_pfr", key)) {
+   	    if(strcmp("disable", value)) {
+		errs++;
+		if (verbose) fprintf(stderr, "romio_cb_pfr is set to %s; should be %s\n",
+				     value, "automatic");
+	    }
+	}
+	else if (!strcmp("romio_cb_fr_types", key)) {
+   	    if(strcmp("aar", value)) {
+		errs++;
+		if (verbose) fprintf(stderr, "romio_cb_fr_types is set to %s; should be %s\n",
+				     value, "aar");
+	    }
+	}
+	else if (!strcmp("romio_cb_fr_alignment", key)) {
+   	    if(strcmp("1", value)) {
+		errs++;
+		if (verbose) fprintf(stderr, "romio_cb_fr_alignment is set to %s; should be %s\n",
+				     value, "1");
+	    }
+	}
+	else if (!strcmp("romio_cb_ds_threshold", key)) {
+   	    if(strcmp("0", value)) {
+		errs++;
+		if (verbose) fprintf(stderr, "romio_cb_ds_threshold is set to %s; should be %s\n",
+				     value, "0");
+	    }
+	}
+	else if (!strcmp("romio_cb_alltoall", key)) {
+   	    if(strcmp("automatic", value)) {
+		errs++;
+		if (verbose) fprintf(stderr, "romio_cb_alltoall is set to %s; should be %s\n",
+				     value, "automatic");
+	    }
 	}
+
 	else {
 	    if (verbose) fprintf(stderr, "unexpected key %s (not counted as an error)\n", key);
 	}
@@ -334,12 +383,3 @@
     MPI_Finalize();
     return 0;
 }
-
-
-
-
-
-
-
-
-
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test: hindexed.c
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/test/Makefile.in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test/Makefile.in
--- ompi-trunk/ompi/mca/io/romio/romio/test/Makefile.in	2010-11-16 09:15:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test/Makefile.in	2010-11-19 09:25:30.000000000 +0100
@@ -3,13 +3,13 @@
 INCLUDE_DIR = @ROMIO_INCLUDE@
 # because := is not universally avalible, we have to play games to use the
 # user-specified LDFLAGS and OUR_LIBS env. variables (if set)
-OUR_LIBS = @TEST_LIBNAME@ @MPI_LIB@ @ROMIO_LIBLIST@ ${LDFLAGS} ${LIBS}
+OUR_LIBS = @TEST_LIBNAME@ @MPI_LIB@ ${LDFLAGS} ${LIBS}
 USER_CFLAGS = @CPPFLAGS@ @USER_CFLAGS@ $(INCLUDE_DIR)
 USER_FFLAGS = @CPPFLAGS@ @USER_FFLAGS@ $(INCLUDE_DIR)
 CTESTS = simple perf async coll_test coll_perf misc file_info excl large_array \
      atomicity noncontig i_noncontig noncontig_coll split_coll shared_fp \
      large_file psimple error status noncontig_coll2 aggregation1 aggregation2 \
-     async-multiple ordered_fp
+     async-multiple ordered_fp hindexed
 FTESTS = fcoll_test fperf fmisc pfcoll_test 
 srcdir=@srcdir@
 
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/test/noncontig_coll2.c NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test/noncontig_coll2.c
--- ompi-trunk/ompi/mca/io/romio/romio/test/noncontig_coll2.c	2010-11-16 09:15:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test/noncontig_coll2.c	2010-11-15 15:02:47.000000000 +0100
@@ -25,8 +25,6 @@
 int test_file(char *filename, int mynod, int nprocs, char * cb_hosts, 
 		char *msg, int verbose); 
 
-static int cb_config_list_keyval = MPI_KEYVAL_INVALID;
-
 #define ADIOI_Free free
 #define ADIOI_Malloc malloc
 #define FPRINTF fprintf
@@ -40,16 +38,6 @@
 typedef struct ADIO_cb_name_arrayD *ADIO_cb_name_array;
 
 void handle_error(int errcode, char *str);
-int cb_copy_name_array(MPI_Comm comm, 
-		       int *keyval, 
-		       void *extra, 
-		       void *attr_in,
-		       void **attr_out, 
-		       int *flag);
-int cb_delete_name_array(MPI_Comm comm, 
-			 int *keyval, 
-			 void *attr_val, 
-			 void *extra);
 int cb_gather_name_array(MPI_Comm comm, ADIO_cb_name_array *arrayp);
 void default_str(int mynod, int len, ADIO_cb_name_array array, char *dest);
 void reverse_str(int mynod, int len, ADIO_cb_name_array array, char *dest);
@@ -65,51 +53,7 @@
 	fprintf(stderr, "%s: %s\n", str, msg);
 	MPI_Abort(MPI_COMM_WORLD, 1);
 }
- /* cb_copy_name_array() - attribute copy routine
- */
-int cb_copy_name_array(MPI_Comm comm, 
-		       int *keyval, 
-		       void *extra, 
-		       void *attr_in,
-		       void **attr_out, 
-		       int *flag)
-{
-    ADIO_cb_name_array array;
-
-    array = (ADIO_cb_name_array) attr_in;
-    array->refct++;
-
-    *attr_out = attr_in;
-    *flag = 1; /* make a copy in the new communicator */
-    
-    return MPI_SUCCESS;
-}
-
-/* cb_delete_name_array() - attribute destructor
- */
-int cb_delete_name_array(MPI_Comm comm, 
-			 int *keyval, 
-			 void *attr_val, 
-			 void *extra)
-{
-    int i;
-    ADIO_cb_name_array array;
-
-    array = (ADIO_cb_name_array) attr_val;
-    array->refct--;
-
-    if (array->refct <= 0) {
-	/* time to free the structures (names, array of ptrs to names, struct) 
-	 */
-	for (i=0; i < array->namect; i++) {
-	    ADIOI_Free(array->names[i]);
-	}
-	if (array->names != NULL) ADIOI_Free(array->names);
-	ADIOI_Free(array);
-    }
-
-    return MPI_SUCCESS;
-}   
+   
 
 /* cb_gather_name_array() - gather a list of processor names from all processes
  *                          in a communicator and store them on rank 0.
@@ -125,24 +69,16 @@
  */
 int cb_gather_name_array(MPI_Comm comm, ADIO_cb_name_array *arrayp)
 {
+	/* this is copied from ROMIO, but since this test is for correctness,
+	 * not performance, note that we have removed the parts where ROMIO
+	 * uses a keyval to cache the name array.  We'll just rebuild it if we
+	 * need to */
+
     char my_procname[MPI_MAX_PROCESSOR_NAME], **procname = 0;
     int *procname_len = NULL, my_procname_len, *disp = NULL, i;
-    int commsize, commrank, found;
+    int commsize, commrank;
     ADIO_cb_name_array array = NULL;
 
-    if (cb_config_list_keyval == MPI_KEYVAL_INVALID) {
-	MPI_Keyval_create((MPI_Copy_function *) cb_copy_name_array, 
-			  (MPI_Delete_function *) cb_delete_name_array,
-			  &cb_config_list_keyval, NULL);
-    }
-    else {
-	MPI_Attr_get(comm, cb_config_list_keyval, (void *) &array, &found);
-	if (found) {
-	    *arrayp = array;
-	    return 0;
-	}
-    }
-
     MPI_Comm_size(comm, &commsize);
     MPI_Comm_rank(comm, &commrank);
 
@@ -240,11 +176,6 @@
 #endif
     }
 
-    /* store the attribute; we want to store SOMETHING on all processes
-     * so that they can all tell if we have gone through this procedure 
-     * or not for the given communicator.
-     */
-    MPI_Attr_put(comm, cb_config_list_keyval, array);
     *arrayp = array;
     return 0;
 }
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/test/runtests.in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test/runtests.in
--- ompi-trunk/ompi/mca/io/romio/romio/test/runtests.in	2010-11-16 09:15:22.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/test/runtests.in	2010-11-15 16:18:04.000000000 +0100
@@ -273,14 +273,21 @@
 $mpirun -np 4 ./noncontig_coll2 -fname $FILENAME
 # CheckOutput noncontig_coll2
 CleanExe noncontig_coll2
+MakeExe aggregation1
 echo '**** Testing aggregation1 ****'
-$mpirun -np 4 ./aggregation1 -h -fname $FILENAME
+$mpirun -np 4 ./aggregation1 -h -f $FILENAME
 # CheckOutput aggregation1
 CleanExe aggregation1
+MakeExe aggregation2
 echo '**** Testing aggregation2 ****'
 $mpirun -np 4 ./aggregation2 $FILENAME
 # CheckOutput aggregation2
 CleanExe aggregation2
+MakeExe hindexed
+echo '**** Testing hindexed ****'
+$mpirun -np 4 ./hindexed $FILENAME
+# CheckOutput hindexed
+CleanExe hindexed
 #
 #echo '**** Testing write_all_test (run 1)****'
 #$mpirun -np 4 ./write_all_test -nzp 2 -zplace 2 -nzw 2 -naw 2 -size 100 \
Only in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/: test-internal
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/util/romioinstall.in NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/util/romioinstall.in
--- ompi-trunk/ompi/mca/io/romio/romio/util/romioinstall.in	2010-11-16 09:16:36.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/util/romioinstall.in	2010-11-15 16:42:54.000000000 +0100
@@ -28,9 +28,10 @@
 #sysconfdir=@sysconfdir@
 libdir=@libdir@
 #sharedlib_dir=@sharedlib_dir@
-datarootdir=@datarootdir@
 mandir=@mandir@
 htmldir=@htmldir@
+# datarootdir required as of autoconf 2.60
+datarootdir=@datatrootdir@
 #datadir=@datadir@
 docdir=@docdir@
 ## Location of sources
diff -u -r -x .svn ompi-trunk/ompi/mca/io/romio/romio/util/tarch NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/util/tarch
--- ompi-trunk/ompi/mca/io/romio/romio/util/tarch	2010-11-16 09:16:36.000000000 +0100
+++ NEW-ROMIO-FOR-OPENMPI/ompi/mca/io/romio/romio/util/tarch	2010-11-15 15:02:47.000000000 +0100
@@ -53,20 +53,20 @@
    #   LARCH4=`expr "$LARCH" : "\(....\)"`
    #   LARCH6=`expr "$LARCH" : "\(......\)"`
    case $LARCH in
-        SUPER-UX) FARCH=SX4; break ;;
-	AIX|RIOS) FARCH=rs6000; break ;;
-	HP-UX) 
+       SUPER-UX) FARCH=SX4; break ;;
+       AIX|RIOS) FARCH=rs6000; break ;;
+       HP-UX) 
  	if [ -a /dev/kmem ] ; then
  	    FARCH=hpux ;
  	else
  	    FARCH=sppux ;
  	fi
         break ;;
-	IRIX64|IRIX) FARCH=$LARCH ; break ;;
-	Linux)  FARCH=LINUX ; break ;;
-	i586|i486|i86pc)
+       IRIX64|IRIX) FARCH=$LARCH ; break ;;
+       Linux)  FARCH=LINUX ; break ;;
+       i586|i486|i86pc)
 	    GARCH=$LARCH ;;
-	sun4*)
+       sun4*)
 	Version=`$UNAME -r`
         # In "improving" SunOS, the useful feature of "substr" was withdrawn 
         # from expr.  Can't let the users have life too easy, can we?  This 
@@ -81,21 +81,27 @@
 	    FARCH=sun4
 	fi
 	break ;;
-	hp9000*|hp7000*)
+       hp9000*|hp7000*)
  	if [ -a /dev/kmem ] ; then
  	    FARCH=hpux ;
  	else
  	    FARCH=sppux ;
  	fi
  	break ;;
-	mips|dec-5000)   FARCH=dec5000 ; break ;;
-        IP12|iris-4d)    GARCH=IRIX ;;
-        cray|CRAY*)      GARCH=CRAY ;;
+       Darwin*)
+	   FARCH=Darwin
+	   break ;;
+
+       mips|dec-5000)   FARCH=dec5000 ; break ;;
         next)            FARCH=NeXT ; break ;;	
 	KSR1|KSR2)       FARCH=ksr ; break ;;	
         FreeBSD)         FARCH=freebsd ; break ;;
         OpenBSD)         FARCH=openbsd ; break ;;
         NetBSD)          FARCH=netbsd ; break ;;
+
+       # The following are guesses
+        IP12|iris-4d)    GARCH=IRIX ;;
+        cray|CRAY*)      GARCH=CRAY ;;
 	i386)            GARCH=ipsc2 ;;
 	ULTRIX|RISC)     GARCH=dec5000 ;;
    esac
