Hi,
Proposed small patch to extend current rankfile syntax to be
compliant with orte_hosts syntax
making it possible to claim relative hosts from the hostfile/
scheduler
by using +n# hostname, where 0 <= # < np
ex:
cat ~/work/svn/hpc/dev/test/Rankfile/rankfile
rank 0=+n0 slot=0
rank 1=+n0 slot=1
rank 2=+n1 slot=2
rank 3=+n1 slot=1
for your review and blessing before I commit it to the trunk.
I also ask to add it to 1.3 branch.
thanks,
Lenny.
Index: orte/mca/rmaps/rank_file/help-rmaps_rank_file.txt
===================================================================
--- orte/mca/rmaps/rank_file/help-rmaps_rank_file.txt (revision
21529)
+++ orte/mca/rmaps/rank_file/help-rmaps_rank_file.txt (working copy)
@@ -56,6 +56,9 @@
Please review your rank-slot assignments and your host allocation
to ensure
a proper match.
+[bad-index]
+Rankfile claimed host %s by index that is bigger than number of
allocated hosts.
+
[orte-rmaps-rf:alloc-error]
There are not enough slots available in the system to satisfy the
%d slots
that were requested by the application:
Index: orte/mca/rmaps/rank_file/rmaps_rank_file_lex.h
===================================================================
--- orte/mca/rmaps/rank_file/rmaps_rank_file_lex.h (revision 21529)
+++ orte/mca/rmaps/rank_file/rmaps_rank_file_lex.h (working copy)
@@ -75,6 +75,7 @@
#define ORTE_RANKFILE_NEWLINE 13
#define ORTE_RANKFILE_IPV6 14
#define ORTE_RANKFILE_SLOT 15
+#define ORTE_RANKFILE_RELATIVE 16
#if defined(c_plusplus) || defined(__cplusplus)
}
Index: orte/mca/rmaps/rank_file/rmaps_rank_file.c
===================================================================
--- orte/mca/rmaps/rank_file/rmaps_rank_file.c (revision 21529)
+++ orte/mca/rmaps/rank_file/rmaps_rank_file.c (working copy)
@@ -273,11 +273,11 @@
orte_vpid_t total_procs;
opal_list_t node_list;
opal_list_item_t *item;
- orte_node_t *node, *nd;
+ orte_node_t *node, *nd, *root_node;
orte_vpid_t rank, vpid_start;
orte_std_cntr_t num_nodes, num_slots;
orte_rmaps_rank_file_map_t *rfmap;
- orte_std_cntr_t slots_per_node;
+ orte_std_cntr_t slots_per_node, relative_index, tmp_cnt;
int rc;
/* convenience def */
@@ -411,7 +411,25 @@
0 == strcmp(nd->name, rfmap->node_name)) {
node = nd;
break;
- }
+ } else if (NULL != rfmap->node_name &&
+ (('+' == rfmap->node_name[0]) &&
+ (('n' == rfmap->node_name[1]) ||
+ ('N' == rfmap->node_name[1])))) {
+
+ relative_index=atoi(strtok(rfmap->node_name,"+n"));
+ if ( relative_index >= opal_list_get_size (&node_list) || ( 0 >
relative_index)){
+ orte_show_help("help-rmaps_rank_file.txt","bad-index", true,rfmap-
>node_name);
+ ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
+ return ORTE_ERR_BAD_PARAM;
+ }
+ root_node = (orte_node_t*) opal_list_get_first(&node_list);
+ for(tmp_cnt=0; tmp_cnt<relative_index; tmp_cnt++) {
+ root_node = (orte_node_t*) opal_list_get_next(root_node);
+ }
+ node = root_node;
+ break;
+ }
+
}
if (NULL == node) {
orte_show_help("help-rmaps_rank_file.txt","bad-host", true, rfmap-
>node_name);
@@ -631,6 +649,7 @@
case ORTE_RANKFILE_IPV6:
case ORTE_RANKFILE_STRING:
case ORTE_RANKFILE_INT:
+ case ORTE_RANKFILE_RELATIVE:
if(ORTE_RANKFILE_INT == token) {
sprintf(buff,"%d", orte_rmaps_rank_file_value.ival);
value = buff;
Index: orte/mca/rmaps/rank_file/rmaps_rank_file_lex.l
===================================================================
--- orte/mca/rmaps/rank_file/rmaps_rank_file_lex.l (revision 21529)
+++ orte/mca/rmaps/rank_file/rmaps_rank_file_lex.l (working copy)
@@ -111,6 +111,9 @@
orte_rmaps_rank_file_value.sval = yytext;
return ORTE_RANKFILE_HOSTNAME; }
+\+n[0-9]+ { orte_rmaps_rank_file_value.sval = yytext;
+ return ORTE_RANKFILE_RELATIVE; }
+
. { orte_rmaps_rank_file_value.sval = yytext;
return ORTE_RANKFILE_ERROR; }
<rankfile.patch>_______________________________________________
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel