Re: [infiniband-diags] [UPDATED PATCH] [3/3] support --load-cache in iblinkinfo and ibqueryerrors

2010-01-18 Thread Al Chu
Hey Sasha,

Here's an updated patch with the cleanup changes as we discussed.

Al

On Sat, 2010-01-16 at 16:28 +0200, Sasha Khapyorsky wrote:
 On 10:23 Fri 15 Jan , Al Chu wrote:
  Hi Sasha,
  
  This adds the --load-cache options to iblinkinfo and ibqueryerrors.
  
  Al
  
  -- 
  Albert Chu
  ch...@llnl.gov
  Computer Scientist
  High Performance Systems Division
  Lawrence Livermore National Laboratory
 
  From: Albert Chu ch...@llnl.gov
  Date: Thu, 10 Dec 2009 11:22:50 -0800
  Subject: [PATCH] support --load-cache in iblinkinfo and ibqueryerrors
  
  
  Signed-off-by: Albert Chu ch...@llnl.gov
  ---
   infiniband-diags/man/iblinkinfo.8|   11 ++-
   infiniband-diags/man/ibqueryerrors.8 |   10 ++-
   infiniband-diags/src/iblinkinfo.c|   52 
  +---
   infiniband-diags/src/ibqueryerrors.c |   53 
  ++---
   4 files changed, 99 insertions(+), 27 deletions(-)
  
  diff --git a/infiniband-diags/man/iblinkinfo.8 
  b/infiniband-diags/man/iblinkinfo.8
  index 0f53b00..f184edf 100644
  --- a/infiniband-diags/man/iblinkinfo.8
  +++ b/infiniband-diags/man/iblinkinfo.8
  @@ -6,7 +6,7 @@ iblinkinfo \- report link info for all links in the fabric
   .SH SYNOPSIS
   .B iblinkinfo
  [-hcdl -C ca_name -P ca_port -v lt,hoq,vlstall -S guid
  --D direct_route]
  +-D direct_route \-\-load\-cache filename]
   
   .SH DESCRIPTION
   .PP
  @@ -42,7 +42,14 @@ Print port capabilities (enabled and supported values)
   \fB\-P ca_port\fRuse the specified ca_port for the search.
   .TP
   \fB\-R\fR (This option is obsolete and does nothing)
  -
  +.TP
  +\fB\-\-load\-cache\fR filename
  +Load and use the cached ibnetdiscover data stored in the specified
  +filename.  May be useful for outputting and learning about other
  +fabrics or a previous state of a fabric.  Cannot be used if user
  +specifies a directo route path.  See
  +.B ibnetdiscover
  +for information on caching ibnetdiscover output.
   
   .SH AUTHOR
   .TP
  diff --git a/infiniband-diags/man/ibqueryerrors.8 
  b/infiniband-diags/man/ibqueryerrors.8
  index 83a2b5a..56a0d67 100644
  --- a/infiniband-diags/man/ibqueryerrors.8
  +++ b/infiniband-diags/man/ibqueryerrors.8
  @@ -6,7 +6,7 @@ ibqueryerrors \- query and report non-zero IB port counters
   .SH SYNOPSIS
   .B ibqueryerrors
   [-s err1,err2,... -c -r -C ca_name -P ca_port -G node_guid
  --D direct_route -d -k -K]
  +-D direct_route -d -k -K \-\-load\-cache filename]
   
   .SH DESCRIPTION
   .PP
  @@ -60,6 +60,14 @@ specified the data counters will be cleared without any 
  printed output.
   .TP
   \fB\-\-details\fR include transmit discard details
   .TP
  +\fB\-\-load\-cache\fR filename
  +Load and use the cached ibnetdiscover data stored in the specified
  +filename.  May be useful for outputting and learning about other
  +fabrics or a previous state of a fabric.  Cannot be used if user
  +specifies a directo route path.  See
  +.B ibnetdiscover
  +for information on caching ibnetdiscover output.
  +.TP
   \fB\-R\fR  (This option is obsolete and does nothing)
   
   .SH COMMON OPTIONS
  diff --git a/infiniband-diags/src/iblinkinfo.c 
  b/infiniband-diags/src/iblinkinfo.c
  index 21b31bb..10e3ad5 100644
  --- a/infiniband-diags/src/iblinkinfo.c
  +++ b/infiniband-diags/src/iblinkinfo.c
  @@ -55,6 +55,7 @@
   
   static char *node_name_map_file = NULL;
   static nn_map_t *node_name_map = NULL;
  +static char *load_cache_file = NULL;
   
   static uint64_t guid = 0;
   static char *guid_str = NULL;
  @@ -230,6 +231,9 @@ static int process_opt(void *context, int ch, char 
  *optarg)
  case 1:
  node_name_map_file = strdup(optarg);
  break;
  +   case 2:
  +   load_cache_file = strdup(optarg);
  +   break;
  case 'S':
  guid_str = optarg;
  guid = (uint64_t) strtoull(guid_str, 0, 0);
  @@ -291,6 +295,7 @@ int main(int argc, char **argv)
   print additional switch settings (PktLifeTime, HoqLife, 
  VLStallCount)},
  {portguids, 'g', 0, NULL,
   print port guids instead of node guids},
  +   {load-cache, 2, 1, file, filename of ibnetdiscover cache 
  to load},
  {GNDN, 'R', 0, NULL,
   (This option is obsolete and does nothing)},
  {0}
  @@ -317,6 +322,11 @@ int main(int argc, char **argv)
  mad_rpc_set_timeout(ibmad_port, ibd_timeout);
   
  node_name_map = open_node_name_map(node_name_map_file);
  +   
  +   if (dr_path  load_cache_file) {
  +   fprintf(stderr, Cannot specify cache and direct route path\n);
  +   exit(1);
  +   }
 
 Why is this limitation needed really?
 
   
  if (dr_path) {
  /* only scan part of the fabric */
  @@ -334,19 +344,37 @@ int main(int argc, char **argv)
 guid_str);
  }
   
  -   if (resolved = 0)
  -   if ((fabric = ibnd_discover_fabric(ibmad_port, port_id,
  -  

Re: [infiniband-diags] [UPDATED PATCH] [3/3] support --load-cache in iblinkinfo and ibqueryerrors

2010-01-16 Thread Sasha Khapyorsky
On 10:23 Fri 15 Jan , Al Chu wrote:
 Hi Sasha,
 
 This adds the --load-cache options to iblinkinfo and ibqueryerrors.
 
 Al
 
 -- 
 Albert Chu
 ch...@llnl.gov
 Computer Scientist
 High Performance Systems Division
 Lawrence Livermore National Laboratory

 From: Albert Chu ch...@llnl.gov
 Date: Thu, 10 Dec 2009 11:22:50 -0800
 Subject: [PATCH] support --load-cache in iblinkinfo and ibqueryerrors
 
 
 Signed-off-by: Albert Chu ch...@llnl.gov
 ---
  infiniband-diags/man/iblinkinfo.8|   11 ++-
  infiniband-diags/man/ibqueryerrors.8 |   10 ++-
  infiniband-diags/src/iblinkinfo.c|   52 +---
  infiniband-diags/src/ibqueryerrors.c |   53 ++---
  4 files changed, 99 insertions(+), 27 deletions(-)
 
 diff --git a/infiniband-diags/man/iblinkinfo.8 
 b/infiniband-diags/man/iblinkinfo.8
 index 0f53b00..f184edf 100644
 --- a/infiniband-diags/man/iblinkinfo.8
 +++ b/infiniband-diags/man/iblinkinfo.8
 @@ -6,7 +6,7 @@ iblinkinfo \- report link info for all links in the fabric
  .SH SYNOPSIS
  .B iblinkinfo
 [-hcdl -C ca_name -P ca_port -v lt,hoq,vlstall -S guid
 --D direct_route]
 +-D direct_route \-\-load\-cache filename]
  
  .SH DESCRIPTION
  .PP
 @@ -42,7 +42,14 @@ Print port capabilities (enabled and supported values)
  \fB\-P ca_port\fRuse the specified ca_port for the search.
  .TP
  \fB\-R\fR (This option is obsolete and does nothing)
 -
 +.TP
 +\fB\-\-load\-cache\fR filename
 +Load and use the cached ibnetdiscover data stored in the specified
 +filename.  May be useful for outputting and learning about other
 +fabrics or a previous state of a fabric.  Cannot be used if user
 +specifies a directo route path.  See
 +.B ibnetdiscover
 +for information on caching ibnetdiscover output.
  
  .SH AUTHOR
  .TP
 diff --git a/infiniband-diags/man/ibqueryerrors.8 
 b/infiniband-diags/man/ibqueryerrors.8
 index 83a2b5a..56a0d67 100644
 --- a/infiniband-diags/man/ibqueryerrors.8
 +++ b/infiniband-diags/man/ibqueryerrors.8
 @@ -6,7 +6,7 @@ ibqueryerrors \- query and report non-zero IB port counters
  .SH SYNOPSIS
  .B ibqueryerrors
  [-s err1,err2,... -c -r -C ca_name -P ca_port -G node_guid
 --D direct_route -d -k -K]
 +-D direct_route -d -k -K \-\-load\-cache filename]
  
  .SH DESCRIPTION
  .PP
 @@ -60,6 +60,14 @@ specified the data counters will be cleared without any 
 printed output.
  .TP
  \fB\-\-details\fR include transmit discard details
  .TP
 +\fB\-\-load\-cache\fR filename
 +Load and use the cached ibnetdiscover data stored in the specified
 +filename.  May be useful for outputting and learning about other
 +fabrics or a previous state of a fabric.  Cannot be used if user
 +specifies a directo route path.  See
 +.B ibnetdiscover
 +for information on caching ibnetdiscover output.
 +.TP
  \fB\-R\fR  (This option is obsolete and does nothing)
  
  .SH COMMON OPTIONS
 diff --git a/infiniband-diags/src/iblinkinfo.c 
 b/infiniband-diags/src/iblinkinfo.c
 index 21b31bb..10e3ad5 100644
 --- a/infiniband-diags/src/iblinkinfo.c
 +++ b/infiniband-diags/src/iblinkinfo.c
 @@ -55,6 +55,7 @@
  
  static char *node_name_map_file = NULL;
  static nn_map_t *node_name_map = NULL;
 +static char *load_cache_file = NULL;
  
  static uint64_t guid = 0;
  static char *guid_str = NULL;
 @@ -230,6 +231,9 @@ static int process_opt(void *context, int ch, char 
 *optarg)
   case 1:
   node_name_map_file = strdup(optarg);
   break;
 + case 2:
 + load_cache_file = strdup(optarg);
 + break;
   case 'S':
   guid_str = optarg;
   guid = (uint64_t) strtoull(guid_str, 0, 0);
 @@ -291,6 +295,7 @@ int main(int argc, char **argv)
print additional switch settings (PktLifeTime, HoqLife, 
 VLStallCount)},
   {portguids, 'g', 0, NULL,
print port guids instead of node guids},
 + {load-cache, 2, 1, file, filename of ibnetdiscover cache 
 to load},
   {GNDN, 'R', 0, NULL,
(This option is obsolete and does nothing)},
   {0}
 @@ -317,6 +322,11 @@ int main(int argc, char **argv)
   mad_rpc_set_timeout(ibmad_port, ibd_timeout);
  
   node_name_map = open_node_name_map(node_name_map_file);
 + 
 + if (dr_path  load_cache_file) {
 + fprintf(stderr, Cannot specify cache and direct route path\n);
 + exit(1);
 + }

Why is this limitation needed really?

  
   if (dr_path) {
   /* only scan part of the fabric */
 @@ -334,19 +344,37 @@ int main(int argc, char **argv)
  guid_str);
   }
  
 - if (resolved = 0)
 - if ((fabric = ibnd_discover_fabric(ibmad_port, port_id,
 -hops)) == NULL)
 - IBWARN
 - (Single node discover failed; attempting full 
 scan\n);
 -
 - if (!fabric)
 - if ((fabric =
 -   

Re: [infiniband-diags] [UPDATED PATCH] [3/3] support --load-cache in iblinkinfo and ibqueryerrors

2010-01-16 Thread Al Chu
Hey Sasha,

answers inlined below

On Sat, 2010-01-16 at 16:28 +0200, Sasha Khapyorsky wrote:
 On 10:23 Fri 15 Jan , Al Chu wrote:
  Hi Sasha,
  
  This adds the --load-cache options to iblinkinfo and ibqueryerrors.
  
  Al
  
  -- 
  Albert Chu
  ch...@llnl.gov
  Computer Scientist
  High Performance Systems Division
  Lawrence Livermore National Laboratory
 
  From: Albert Chu ch...@llnl.gov
  Date: Thu, 10 Dec 2009 11:22:50 -0800
  Subject: [PATCH] support --load-cache in iblinkinfo and ibqueryerrors
  
  
  Signed-off-by: Albert Chu ch...@llnl.gov
  ---
   infiniband-diags/man/iblinkinfo.8|   11 ++-
   infiniband-diags/man/ibqueryerrors.8 |   10 ++-
   infiniband-diags/src/iblinkinfo.c|   52 
  +---
   infiniband-diags/src/ibqueryerrors.c |   53 
  ++---
   4 files changed, 99 insertions(+), 27 deletions(-)
  
  diff --git a/infiniband-diags/man/iblinkinfo.8 
  b/infiniband-diags/man/iblinkinfo.8
  index 0f53b00..f184edf 100644
  --- a/infiniband-diags/man/iblinkinfo.8
  +++ b/infiniband-diags/man/iblinkinfo.8
  @@ -6,7 +6,7 @@ iblinkinfo \- report link info for all links in the fabric
   .SH SYNOPSIS
   .B iblinkinfo
  [-hcdl -C ca_name -P ca_port -v lt,hoq,vlstall -S guid
  --D direct_route]
  +-D direct_route \-\-load\-cache filename]
   
   .SH DESCRIPTION
   .PP
  @@ -42,7 +42,14 @@ Print port capabilities (enabled and supported values)
   \fB\-P ca_port\fRuse the specified ca_port for the search.
   .TP
   \fB\-R\fR (This option is obsolete and does nothing)
  -
  +.TP
  +\fB\-\-load\-cache\fR filename
  +Load and use the cached ibnetdiscover data stored in the specified
  +filename.  May be useful for outputting and learning about other
  +fabrics or a previous state of a fabric.  Cannot be used if user
  +specifies a directo route path.  See
  +.B ibnetdiscover
  +for information on caching ibnetdiscover output.
   
   .SH AUTHOR
   .TP
  diff --git a/infiniband-diags/man/ibqueryerrors.8 
  b/infiniband-diags/man/ibqueryerrors.8
  index 83a2b5a..56a0d67 100644
  --- a/infiniband-diags/man/ibqueryerrors.8
  +++ b/infiniband-diags/man/ibqueryerrors.8
  @@ -6,7 +6,7 @@ ibqueryerrors \- query and report non-zero IB port counters
   .SH SYNOPSIS
   .B ibqueryerrors
   [-s err1,err2,... -c -r -C ca_name -P ca_port -G node_guid
  --D direct_route -d -k -K]
  +-D direct_route -d -k -K \-\-load\-cache filename]
   
   .SH DESCRIPTION
   .PP
  @@ -60,6 +60,14 @@ specified the data counters will be cleared without any 
  printed output.
   .TP
   \fB\-\-details\fR include transmit discard details
   .TP
  +\fB\-\-load\-cache\fR filename
  +Load and use the cached ibnetdiscover data stored in the specified
  +filename.  May be useful for outputting and learning about other
  +fabrics or a previous state of a fabric.  Cannot be used if user
  +specifies a directo route path.  See
  +.B ibnetdiscover
  +for information on caching ibnetdiscover output.
  +.TP
   \fB\-R\fR  (This option is obsolete and does nothing)
   
   .SH COMMON OPTIONS
  diff --git a/infiniband-diags/src/iblinkinfo.c 
  b/infiniband-diags/src/iblinkinfo.c
  index 21b31bb..10e3ad5 100644
  --- a/infiniband-diags/src/iblinkinfo.c
  +++ b/infiniband-diags/src/iblinkinfo.c
  @@ -55,6 +55,7 @@
   
   static char *node_name_map_file = NULL;
   static nn_map_t *node_name_map = NULL;
  +static char *load_cache_file = NULL;
   
   static uint64_t guid = 0;
   static char *guid_str = NULL;
  @@ -230,6 +231,9 @@ static int process_opt(void *context, int ch, char 
  *optarg)
  case 1:
  node_name_map_file = strdup(optarg);
  break;
  +   case 2:
  +   load_cache_file = strdup(optarg);
  +   break;
  case 'S':
  guid_str = optarg;
  guid = (uint64_t) strtoull(guid_str, 0, 0);
  @@ -291,6 +295,7 @@ int main(int argc, char **argv)
   print additional switch settings (PktLifeTime, HoqLife, 
  VLStallCount)},
  {portguids, 'g', 0, NULL,
   print port guids instead of node guids},
  +   {load-cache, 2, 1, file, filename of ibnetdiscover cache 
  to load},
  {GNDN, 'R', 0, NULL,
   (This option is obsolete and does nothing)},
  {0}
  @@ -317,6 +322,11 @@ int main(int argc, char **argv)
  mad_rpc_set_timeout(ibmad_port, ibd_timeout);
   
  node_name_map = open_node_name_map(node_name_map_file);
  +   
  +   if (dr_path  load_cache_file) {
  +   fprintf(stderr, Cannot specify cache and direct route path\n);
  +   exit(1);
  +   }
 
 Why is this limitation needed really?

I spoke to Ira about it awhile ago.  I think what we decided was that
while technically you can do a DR path, b/c you can load a cache from
anywhere in the cluster, you won't know if the DR path is legal or
correct at point A vs point B.  In contrast, the node_guid input is
valid anywhere you are on the cluster.

I suppose we