Dear all, please find attached a (trivial) patch to MPI_Dims_create(). When computing the prime factors of nnodes, it is sufficient to check for primes less or equal to sqrt(nnodes).
This was not so much of a problem in the past, but now that Tier 0 systems are capable of running O(10^6) MPI processes, the difference in execution time is on the order of seconds (e.g. 8.86s vs. 0.04s on my notebook, with nnproc = 10^6). Best -Andreas PS: oh, and the patch removes some trailing whitespace. Yuck. :-) -- ========================================================== Andreas Schäfer HPC and Grid Computing Chair of Computer Science 3 Friedrich-Alexander-Universität Erlangen-Nürnberg, Germany +49 9131 85-27910 PGP/GPG key via keyserver http://www.libgeodecomp.org ========================================================== (\___/) (+'.'+) (")_(") This is Bunny. Copy and paste Bunny into your signature to help him gain world domination!
Index: ompi/mpi/c/dims_create.c
===================================================================
--- ompi/mpi/c/dims_create.c (revision 29976)
+++ ompi/mpi/c/dims_create.c (working copy)
@@ -5,19 +5,23 @@
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
- * reserved.
+ * reserved.
+ * Copyright (c) 2013 Friedrich-Alexander-Universitaet
+ * Erlangen-Nuernberg. All rights reserved.
* $COPYRIGHT$
- *
+ *
* Additional copyrights may follow
- *
+ *
* $HEADER$
*/
+#include <math.h>
+
#include "ompi_config.h"
#include "ompi/mpi/c/bindings.h"
@@ -44,8 +48,8 @@
/*
* This is a utility function, no need to have anything in the lower
* layer for this at all
- */
-int MPI_Dims_create(int nnodes, int ndims, int dims[])
+ */
+int MPI_Dims_create(int nnodes, int ndims, int dims[])
{
int i;
int freeprocs;
@@ -66,9 +70,9 @@
return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD,
MPI_ERR_ARG, FUNC_NAME);
}
-
+
if (1 > ndims) {
- return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD,
+ return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD,
MPI_ERR_DIMS, FUNC_NAME);
}
}
@@ -109,11 +113,11 @@
}
/* Compute the relevant prime numbers for factoring */
- if (MPI_SUCCESS != (err = getprimes(freeprocs, &nprimes, &primes))) {
+ if (MPI_SUCCESS != (err = getprimes(sqrt(freeprocs), &nprimes, &primes))) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, err,
FUNC_NAME);
}
-
+
/* Factor the number of free processes */
if (MPI_SUCCESS != (err = getfactors(freeprocs, nprimes, primes, &factors))) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, err,
@@ -166,7 +170,7 @@
int f;
int *p;
int *pmin;
-
+
if (0 >= ndim) {
return MPI_ERR_DIMS;
}
@@ -181,7 +185,7 @@
for (i = 0, p = bins; i < ndim; ++i, ++p) {
*p = 1;
}
-
+
/* Loop assigning factors from the highest to the lowest */
for (j = nfactor - 1; j >= 0; --j) {
f = pfacts[j];
@@ -196,7 +200,7 @@
*pmin *= f;
}
}
-
+
/* Sort dimensions in decreasing order (O(n^2) for now) */
for (i = 0, pmin = bins; i < ndim - 1; ++i, ++pmin) {
for (j = i + 1, p = pmin + 1; j < ndim; ++j, ++p) {
@@ -228,7 +232,7 @@
int i;
int *p;
int *c;
-
+
if (0 >= nprime) {
return MPI_ERR_INTERN;
}
@@ -309,4 +313,3 @@
*pnprime = i;
return MPI_SUCCESS;
}
-
signature.asc
Description: Digital signature
