Ummm...you can't do that, Josh. You are violating the abstraction break rather 
badly by searching for specific IB devices down in ORTE.

Please revert this and let's talk about what you are actually trying to do.


On Nov 7, 2013, at 8:28 PM, svn-commit-mai...@open-mpi.org wrote:

> Author: jladd (Joshua Ladd)
> Date: 2013-11-07 23:28:53 EST (Thu, 07 Nov 2013)
> New Revision: 29644
> URL: https://svn.open-mpi.org/trac/ompi/changeset/29644
> 
> Log:
> Adds a check in the mindist mapper for whether or not the user asks for a 
> specific device. This patch was submited by Elena Elkina and reviewed by Josh 
> Ladd and should be added to    
> 
> cmr=v1.7.4:reviewer=jladd
> 
> Text files modified: 
>   trunk/orte/mca/rmaps/mindist/rmaps_mindist_module.c |    65 
> ++++++++++++++++++++++++++++++++++++--- 
>   1 files changed, 60 insertions(+), 5 deletions(-)
> 
> Modified: trunk/orte/mca/rmaps/mindist/rmaps_mindist_module.c
> ==============================================================================
> --- trunk/orte/mca/rmaps/mindist/rmaps_mindist_module.c       Thu Nov  7 
> 23:21:05 2013        (r29643)
> +++ trunk/orte/mca/rmaps/mindist/rmaps_mindist_module.c       2013-11-07 
> 23:28:53 EST (Thu, 07 Nov 2013)      (r29644)
> @@ -47,6 +47,52 @@
>     mindist_map
> };
> 
> +static int num_devices_in_list(char *list)
> +{
> +    int count = 0;
> +    list = strtok(list, ",");
> +    while (NULL != list) {
> +        ++count;
> +        list = strtok(NULL, ",");
> +    }
> +    return count;
> +}
> +
> +static char* get_hca_name(orte_app_context_t *app)
> +{
> +    int found_ind = -1;
> +    char** env = app->env;
> +    int i;
> +    for (i = 0; env[i]; i++) {
> +        if (strstr(env[i], "OMPI_MCA_btl_openib_if_include") != NULL) {
> +            found_ind = i;
> +            break;
> +        }
> +    }
> +    if (found_ind == -1) {
> +        for (i = 0; env[i]; i++) {
> +            if (strstr(env[i], "MXM_RDMA_PORTS") != NULL) {
> +                found_ind = i;
> +                break;
> +            }
> +        }
> +    }
> +    if (found_ind != -1) {
> +        char* start = strstr(env[found_ind], "=");
> +        if (start != NULL) {
> +            start = strdup(start+sizeof(char));
> +            if (num_devices_in_list(start) == 1) {
> +                return strtok(start, ":");
> +            }
> +            else {
> +                free(start);
> +                return NULL;
> +            }
> +        }
> +    }
> +    return NULL;
> +}
> +
> /*
>  * Create a round-robin mapping for the job.
>  */
> @@ -248,10 +294,17 @@
>             OBJ_CONSTRUCT(&numa_list, opal_list_t);
>             ret = opal_hwloc_get_sorted_numa_list(node->topology, 
> orte_rmaps_base.device, &numa_list);
>             if (ret > 1) {
> -                orte_show_help("help-orte-rmaps-md.txt", 
> "orte-rmaps-mindist:several-hca-devices",
> -                        true, ret, node->name);
> -                rc = ORTE_ERR_SILENT;
> -                goto error;
> +                /* check if hca device is specified via openib or mxm 
> parameter */
> +                free(orte_rmaps_base.device);
> +                orte_rmaps_base.device = get_hca_name(app);
> +                if (orte_rmaps_base.device != NULL) {
> +                    ret = opal_hwloc_get_sorted_numa_list(node->topology, 
> orte_rmaps_base.device, &numa_list);
> +                } else {
> +                    orte_show_help("help-orte-rmaps-md.txt", 
> "orte-rmaps-mindist:several-hca-devices",
> +                            true, ret, node->name);
> +                    rc = ORTE_ERR_SILENT;
> +                    goto error;
> +                }
>             } else if (ret < 0) {
>                 orte_show_help("help-orte-rmaps-md.txt", 
> "orte-rmaps-mindist:device-not-found",
>                         true, orte_rmaps_base.device, node->name);
> @@ -402,7 +455,9 @@
>         }
>         OBJ_DESTRUCT(&node_list);
>     }
> -    free(orte_rmaps_base.device);
> +    if (orte_rmaps_base.device != NULL) {
> +        free(orte_rmaps_base.device);
> +    }
>     return ORTE_SUCCESS;
> 
> error:
> _______________________________________________
> svn mailing list
> s...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/svn

Reply via email to