Re: [infiniband-diags] [UPDATED PATCH] [3/3] support --load-cache in iblinkinfo and ibqueryerrors
Hey Sasha, Here's an updated patch with the cleanup changes as we discussed. Al On Sat, 2010-01-16 at 16:28 +0200, Sasha Khapyorsky wrote: On 10:23 Fri 15 Jan , Al Chu wrote: Hi Sasha, This adds the --load-cache options to iblinkinfo and ibqueryerrors. Al -- Albert Chu ch...@llnl.gov Computer Scientist High Performance Systems Division Lawrence Livermore National Laboratory From: Albert Chu ch...@llnl.gov Date: Thu, 10 Dec 2009 11:22:50 -0800 Subject: [PATCH] support --load-cache in iblinkinfo and ibqueryerrors Signed-off-by: Albert Chu ch...@llnl.gov --- infiniband-diags/man/iblinkinfo.8| 11 ++- infiniband-diags/man/ibqueryerrors.8 | 10 ++- infiniband-diags/src/iblinkinfo.c| 52 +--- infiniband-diags/src/ibqueryerrors.c | 53 ++--- 4 files changed, 99 insertions(+), 27 deletions(-) diff --git a/infiniband-diags/man/iblinkinfo.8 b/infiniband-diags/man/iblinkinfo.8 index 0f53b00..f184edf 100644 --- a/infiniband-diags/man/iblinkinfo.8 +++ b/infiniband-diags/man/iblinkinfo.8 @@ -6,7 +6,7 @@ iblinkinfo \- report link info for all links in the fabric .SH SYNOPSIS .B iblinkinfo [-hcdl -C ca_name -P ca_port -v lt,hoq,vlstall -S guid --D direct_route] +-D direct_route \-\-load\-cache filename] .SH DESCRIPTION .PP @@ -42,7 +42,14 @@ Print port capabilities (enabled and supported values) \fB\-P ca_port\fRuse the specified ca_port for the search. .TP \fB\-R\fR (This option is obsolete and does nothing) - +.TP +\fB\-\-load\-cache\fR filename +Load and use the cached ibnetdiscover data stored in the specified +filename. May be useful for outputting and learning about other +fabrics or a previous state of a fabric. Cannot be used if user +specifies a directo route path. See +.B ibnetdiscover +for information on caching ibnetdiscover output. .SH AUTHOR .TP diff --git a/infiniband-diags/man/ibqueryerrors.8 b/infiniband-diags/man/ibqueryerrors.8 index 83a2b5a..56a0d67 100644 --- a/infiniband-diags/man/ibqueryerrors.8 +++ b/infiniband-diags/man/ibqueryerrors.8 @@ -6,7 +6,7 @@ ibqueryerrors \- query and report non-zero IB port counters .SH SYNOPSIS .B ibqueryerrors [-s err1,err2,... -c -r -C ca_name -P ca_port -G node_guid --D direct_route -d -k -K] +-D direct_route -d -k -K \-\-load\-cache filename] .SH DESCRIPTION .PP @@ -60,6 +60,14 @@ specified the data counters will be cleared without any printed output. .TP \fB\-\-details\fR include transmit discard details .TP +\fB\-\-load\-cache\fR filename +Load and use the cached ibnetdiscover data stored in the specified +filename. May be useful for outputting and learning about other +fabrics or a previous state of a fabric. Cannot be used if user +specifies a directo route path. See +.B ibnetdiscover +for information on caching ibnetdiscover output. +.TP \fB\-R\fR (This option is obsolete and does nothing) .SH COMMON OPTIONS diff --git a/infiniband-diags/src/iblinkinfo.c b/infiniband-diags/src/iblinkinfo.c index 21b31bb..10e3ad5 100644 --- a/infiniband-diags/src/iblinkinfo.c +++ b/infiniband-diags/src/iblinkinfo.c @@ -55,6 +55,7 @@ static char *node_name_map_file = NULL; static nn_map_t *node_name_map = NULL; +static char *load_cache_file = NULL; static uint64_t guid = 0; static char *guid_str = NULL; @@ -230,6 +231,9 @@ static int process_opt(void *context, int ch, char *optarg) case 1: node_name_map_file = strdup(optarg); break; + case 2: + load_cache_file = strdup(optarg); + break; case 'S': guid_str = optarg; guid = (uint64_t) strtoull(guid_str, 0, 0); @@ -291,6 +295,7 @@ int main(int argc, char **argv) print additional switch settings (PktLifeTime, HoqLife, VLStallCount)}, {portguids, 'g', 0, NULL, print port guids instead of node guids}, + {load-cache, 2, 1, file, filename of ibnetdiscover cache to load}, {GNDN, 'R', 0, NULL, (This option is obsolete and does nothing)}, {0} @@ -317,6 +322,11 @@ int main(int argc, char **argv) mad_rpc_set_timeout(ibmad_port, ibd_timeout); node_name_map = open_node_name_map(node_name_map_file); + + if (dr_path load_cache_file) { + fprintf(stderr, Cannot specify cache and direct route path\n); + exit(1); + } Why is this limitation needed really? if (dr_path) { /* only scan part of the fabric */ @@ -334,19 +344,37 @@ int main(int argc, char **argv) guid_str); } - if (resolved = 0) - if ((fabric = ibnd_discover_fabric(ibmad_port, port_id, -
Re: [infiniband-diags] [UPDATED PATCH] [3/3] support --load-cache in iblinkinfo and ibqueryerrors
On 10:23 Fri 15 Jan , Al Chu wrote: Hi Sasha, This adds the --load-cache options to iblinkinfo and ibqueryerrors. Al -- Albert Chu ch...@llnl.gov Computer Scientist High Performance Systems Division Lawrence Livermore National Laboratory From: Albert Chu ch...@llnl.gov Date: Thu, 10 Dec 2009 11:22:50 -0800 Subject: [PATCH] support --load-cache in iblinkinfo and ibqueryerrors Signed-off-by: Albert Chu ch...@llnl.gov --- infiniband-diags/man/iblinkinfo.8| 11 ++- infiniband-diags/man/ibqueryerrors.8 | 10 ++- infiniband-diags/src/iblinkinfo.c| 52 +--- infiniband-diags/src/ibqueryerrors.c | 53 ++--- 4 files changed, 99 insertions(+), 27 deletions(-) diff --git a/infiniband-diags/man/iblinkinfo.8 b/infiniband-diags/man/iblinkinfo.8 index 0f53b00..f184edf 100644 --- a/infiniband-diags/man/iblinkinfo.8 +++ b/infiniband-diags/man/iblinkinfo.8 @@ -6,7 +6,7 @@ iblinkinfo \- report link info for all links in the fabric .SH SYNOPSIS .B iblinkinfo [-hcdl -C ca_name -P ca_port -v lt,hoq,vlstall -S guid --D direct_route] +-D direct_route \-\-load\-cache filename] .SH DESCRIPTION .PP @@ -42,7 +42,14 @@ Print port capabilities (enabled and supported values) \fB\-P ca_port\fRuse the specified ca_port for the search. .TP \fB\-R\fR (This option is obsolete and does nothing) - +.TP +\fB\-\-load\-cache\fR filename +Load and use the cached ibnetdiscover data stored in the specified +filename. May be useful for outputting and learning about other +fabrics or a previous state of a fabric. Cannot be used if user +specifies a directo route path. See +.B ibnetdiscover +for information on caching ibnetdiscover output. .SH AUTHOR .TP diff --git a/infiniband-diags/man/ibqueryerrors.8 b/infiniband-diags/man/ibqueryerrors.8 index 83a2b5a..56a0d67 100644 --- a/infiniband-diags/man/ibqueryerrors.8 +++ b/infiniband-diags/man/ibqueryerrors.8 @@ -6,7 +6,7 @@ ibqueryerrors \- query and report non-zero IB port counters .SH SYNOPSIS .B ibqueryerrors [-s err1,err2,... -c -r -C ca_name -P ca_port -G node_guid --D direct_route -d -k -K] +-D direct_route -d -k -K \-\-load\-cache filename] .SH DESCRIPTION .PP @@ -60,6 +60,14 @@ specified the data counters will be cleared without any printed output. .TP \fB\-\-details\fR include transmit discard details .TP +\fB\-\-load\-cache\fR filename +Load and use the cached ibnetdiscover data stored in the specified +filename. May be useful for outputting and learning about other +fabrics or a previous state of a fabric. Cannot be used if user +specifies a directo route path. See +.B ibnetdiscover +for information on caching ibnetdiscover output. +.TP \fB\-R\fR (This option is obsolete and does nothing) .SH COMMON OPTIONS diff --git a/infiniband-diags/src/iblinkinfo.c b/infiniband-diags/src/iblinkinfo.c index 21b31bb..10e3ad5 100644 --- a/infiniband-diags/src/iblinkinfo.c +++ b/infiniband-diags/src/iblinkinfo.c @@ -55,6 +55,7 @@ static char *node_name_map_file = NULL; static nn_map_t *node_name_map = NULL; +static char *load_cache_file = NULL; static uint64_t guid = 0; static char *guid_str = NULL; @@ -230,6 +231,9 @@ static int process_opt(void *context, int ch, char *optarg) case 1: node_name_map_file = strdup(optarg); break; + case 2: + load_cache_file = strdup(optarg); + break; case 'S': guid_str = optarg; guid = (uint64_t) strtoull(guid_str, 0, 0); @@ -291,6 +295,7 @@ int main(int argc, char **argv) print additional switch settings (PktLifeTime, HoqLife, VLStallCount)}, {portguids, 'g', 0, NULL, print port guids instead of node guids}, + {load-cache, 2, 1, file, filename of ibnetdiscover cache to load}, {GNDN, 'R', 0, NULL, (This option is obsolete and does nothing)}, {0} @@ -317,6 +322,11 @@ int main(int argc, char **argv) mad_rpc_set_timeout(ibmad_port, ibd_timeout); node_name_map = open_node_name_map(node_name_map_file); + + if (dr_path load_cache_file) { + fprintf(stderr, Cannot specify cache and direct route path\n); + exit(1); + } Why is this limitation needed really? if (dr_path) { /* only scan part of the fabric */ @@ -334,19 +344,37 @@ int main(int argc, char **argv) guid_str); } - if (resolved = 0) - if ((fabric = ibnd_discover_fabric(ibmad_port, port_id, -hops)) == NULL) - IBWARN - (Single node discover failed; attempting full scan\n); - - if (!fabric) - if ((fabric = -
Re: [infiniband-diags] [UPDATED PATCH] [3/3] support --load-cache in iblinkinfo and ibqueryerrors
Hey Sasha, answers inlined below On Sat, 2010-01-16 at 16:28 +0200, Sasha Khapyorsky wrote: On 10:23 Fri 15 Jan , Al Chu wrote: Hi Sasha, This adds the --load-cache options to iblinkinfo and ibqueryerrors. Al -- Albert Chu ch...@llnl.gov Computer Scientist High Performance Systems Division Lawrence Livermore National Laboratory From: Albert Chu ch...@llnl.gov Date: Thu, 10 Dec 2009 11:22:50 -0800 Subject: [PATCH] support --load-cache in iblinkinfo and ibqueryerrors Signed-off-by: Albert Chu ch...@llnl.gov --- infiniband-diags/man/iblinkinfo.8| 11 ++- infiniband-diags/man/ibqueryerrors.8 | 10 ++- infiniband-diags/src/iblinkinfo.c| 52 +--- infiniband-diags/src/ibqueryerrors.c | 53 ++--- 4 files changed, 99 insertions(+), 27 deletions(-) diff --git a/infiniband-diags/man/iblinkinfo.8 b/infiniband-diags/man/iblinkinfo.8 index 0f53b00..f184edf 100644 --- a/infiniband-diags/man/iblinkinfo.8 +++ b/infiniband-diags/man/iblinkinfo.8 @@ -6,7 +6,7 @@ iblinkinfo \- report link info for all links in the fabric .SH SYNOPSIS .B iblinkinfo [-hcdl -C ca_name -P ca_port -v lt,hoq,vlstall -S guid --D direct_route] +-D direct_route \-\-load\-cache filename] .SH DESCRIPTION .PP @@ -42,7 +42,14 @@ Print port capabilities (enabled and supported values) \fB\-P ca_port\fRuse the specified ca_port for the search. .TP \fB\-R\fR (This option is obsolete and does nothing) - +.TP +\fB\-\-load\-cache\fR filename +Load and use the cached ibnetdiscover data stored in the specified +filename. May be useful for outputting and learning about other +fabrics or a previous state of a fabric. Cannot be used if user +specifies a directo route path. See +.B ibnetdiscover +for information on caching ibnetdiscover output. .SH AUTHOR .TP diff --git a/infiniband-diags/man/ibqueryerrors.8 b/infiniband-diags/man/ibqueryerrors.8 index 83a2b5a..56a0d67 100644 --- a/infiniband-diags/man/ibqueryerrors.8 +++ b/infiniband-diags/man/ibqueryerrors.8 @@ -6,7 +6,7 @@ ibqueryerrors \- query and report non-zero IB port counters .SH SYNOPSIS .B ibqueryerrors [-s err1,err2,... -c -r -C ca_name -P ca_port -G node_guid --D direct_route -d -k -K] +-D direct_route -d -k -K \-\-load\-cache filename] .SH DESCRIPTION .PP @@ -60,6 +60,14 @@ specified the data counters will be cleared without any printed output. .TP \fB\-\-details\fR include transmit discard details .TP +\fB\-\-load\-cache\fR filename +Load and use the cached ibnetdiscover data stored in the specified +filename. May be useful for outputting and learning about other +fabrics or a previous state of a fabric. Cannot be used if user +specifies a directo route path. See +.B ibnetdiscover +for information on caching ibnetdiscover output. +.TP \fB\-R\fR (This option is obsolete and does nothing) .SH COMMON OPTIONS diff --git a/infiniband-diags/src/iblinkinfo.c b/infiniband-diags/src/iblinkinfo.c index 21b31bb..10e3ad5 100644 --- a/infiniband-diags/src/iblinkinfo.c +++ b/infiniband-diags/src/iblinkinfo.c @@ -55,6 +55,7 @@ static char *node_name_map_file = NULL; static nn_map_t *node_name_map = NULL; +static char *load_cache_file = NULL; static uint64_t guid = 0; static char *guid_str = NULL; @@ -230,6 +231,9 @@ static int process_opt(void *context, int ch, char *optarg) case 1: node_name_map_file = strdup(optarg); break; + case 2: + load_cache_file = strdup(optarg); + break; case 'S': guid_str = optarg; guid = (uint64_t) strtoull(guid_str, 0, 0); @@ -291,6 +295,7 @@ int main(int argc, char **argv) print additional switch settings (PktLifeTime, HoqLife, VLStallCount)}, {portguids, 'g', 0, NULL, print port guids instead of node guids}, + {load-cache, 2, 1, file, filename of ibnetdiscover cache to load}, {GNDN, 'R', 0, NULL, (This option is obsolete and does nothing)}, {0} @@ -317,6 +322,11 @@ int main(int argc, char **argv) mad_rpc_set_timeout(ibmad_port, ibd_timeout); node_name_map = open_node_name_map(node_name_map_file); + + if (dr_path load_cache_file) { + fprintf(stderr, Cannot specify cache and direct route path\n); + exit(1); + } Why is this limitation needed really? I spoke to Ira about it awhile ago. I think what we decided was that while technically you can do a DR path, b/c you can load a cache from anywhere in the cluster, you won't know if the DR path is legal or correct at point A vs point B. In contrast, the node_guid input is valid anywhere you are on the cluster. I suppose we