[PATCH] opensm: Multicast root switch calculation

2009-11-02 Thread Slava Strebkov
Proposed new algorithm for calculation of root switch for multicast
spanning tree. Only edge switches(those connected to hosts) and
switches - multicast members themselves are involved in root calculation.
This gives improvement, especially on large fabrics, since number of
switches usually much less then the number of ports, shared same mcast
group.

Signed-off-by: Slava Strebkov sla...@voltaire.com
---
 opensm/include/opensm/osm_switch.h |   14 -
 opensm/opensm/osm_mcast_mgr.c  |  132 
 2 files changed, 132 insertions(+), 14 deletions(-)

diff --git a/opensm/include/opensm/osm_switch.h 
b/opensm/include/opensm/osm_switch.h
index 655491d..6204b37 100644
--- a/opensm/include/opensm/osm_switch.h
+++ b/opensm/include/opensm/osm_switch.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2002-2009 Mellanox Technologies LTD. All rights reserved.
  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
  *
@@ -109,6 +109,9 @@ typedef struct osm_switch {
unsigned endport_links;
unsigned need_update;
void *priv;
+   cl_map_item_t mcast_item;
+   uint32_t num_of_mcm;
+   uint8_t is_mc_member;
 } osm_switch_t;
 /*
 * FIELDS
@@ -151,6 +154,15 @@ typedef struct osm_switch {
 *  When set indicates that switch was probably reset, so
 *  fwd tables and rest cached data should be flushed
 *
+*  mcast_item
+*  map item for switch in building mcast tree
+*
+*  num_of_mcm
+*  number of mcast members(ports) connected to switch
+*
+*  is_mc_member
+*  whether switch is a mcast member itself
+*
 * SEE ALSO
 *  Switch object
 */
diff --git a/opensm/opensm/osm_mcast_mgr.c b/opensm/opensm/osm_mcast_mgr.c
index 0ee689c..c9c93a2 100644
--- a/opensm/opensm/osm_mcast_mgr.c
+++ b/opensm/opensm/osm_mcast_mgr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2002-2009 Mellanox Technologies LTD. All rights reserved.
  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
  * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
@@ -203,25 +203,132 @@ static float osm_mcast_mgr_compute_max_hops(osm_sm_t * 
sm,
return (float)max_hops;
 }
 
+static void mcast_mgr_build_switch_map(osm_sm_t * sm,
+   const osm_mgrp_t * p_mgrp,
+   cl_qmap_t *p_mcast_member_sw_tbl)
+{
+   osm_switch_t*remote_sw;
+   const osm_mcm_port_t *p_mcm_port;
+   const cl_qmap_t *p_mcm_tbl;
+   osm_port_t *p_port;
+   ib_net64_t  port_guid;
+   osm_physp_t *p_physp_remote;
+   osm_node_t *remote_node;
+
+   OSM_LOG_ENTER(sm-p_log);
+
+   cl_qmap_init(p_mcast_member_sw_tbl);
+   p_mcm_tbl = p_mgrp-mcm_port_tbl;
+   for (p_mcm_port = (osm_mcm_port_t *) cl_qmap_head(p_mcm_tbl);
+   p_mcm_port != (osm_mcm_port_t *) cl_qmap_end(p_mcm_tbl);
+   p_mcm_port = (osm_mcm_port_t *) 
cl_qmap_next(p_mcm_port-map_item)) {
+   p_port = (osm_port_t*)osm_get_port_by_guid(sm-p_subn,
+   ib_gid_get_guid(p_mcm_port-port_gid));
+   if (!p_port)
+   continue;
+   if (osm_node_get_type(p_port-p_node) == IB_NODE_TYPE_CA) {
+   p_physp_remote = osm_physp_get_remote(p_port-p_physp);
+   remote_node = osm_physp_get_node_ptr(p_physp_remote);
+   }
+   else {
+   /* for switches - remote switch would be the switch 
itself*/
+   remote_node = osm_physp_get_node_ptr( p_port-p_physp);
+   }
+   /* get the remote switch of the mcmember */
+   remote_sw = remote_node-sw;
+   port_guid = osm_node_get_node_guid(remote_node);
+   if (cl_qmap_get(p_mcast_member_sw_tbl, port_guid) ==
+   cl_qmap_end(p_mcast_member_sw_tbl)) {
+   /* insert switch to table */
+   cl_qmap_insert(p_mcast_member_sw_tbl, port_guid, 
remote_sw-mcast_item);
+   /* New element in the table */
+   if (osm_node_get_type(p_port-p_node) == 
IB_NODE_TYPE_CA) {
+   /* for HCA update the MC count on the remote 
switch */
+   remote_sw-num_of_mcm++;
+   }
+   else
+   remote_sw-is_mc_member = 1; /* the switch is 
MC memeber */
+   }
+   }
+   OSM_LOG_EXIT(sm-p_log);
+}
+
+static void mcast_mgr_destroy_switch_map(osm_sm_t * sm,
+   cl_qmap_t *p_mcast_member_sw_tbl)
+{
+   cl_map_item_t *p_item;
+  

Re: [PATCH] infiniband-diags/saquery: Report SA MAD Class specific status.

2009-11-02 Thread Eli Dorfman (Voltaire)
Sasha Khapyorsky wrote:
 On 10:09 Sun 01 Nov , Eli Dorfman (Voltaire) wrote:
 Report SA MAD Class specific status.

 Fixes wrong error report for SA query status.
 
 I agree with patch, but one comment is below.
 
 Signed-off-by: Eli Dorfman e...@voltaire.com
 ---
  infiniband-diags/src/saquery.c |   41 
 ---
  1 files changed, 37 insertions(+), 4 deletions(-)

 diff --git a/infiniband-diags/src/saquery.c b/infiniband-diags/src/saquery.c
 index 6c44b63..71823d5 100644
 --- a/infiniband-diags/src/saquery.c
 +++ b/infiniband-diags/src/saquery.c
 @@ -124,6 +124,41 @@ int requested_lid_flag = 0;
  uint64_t requested_guid = 0;
  int requested_guid_flag = 0;
  
 +#define SA_ERR_UNKNOWN IB_SA_MAD_STATUS_PRIO_SUGGESTED
 +
 +const char *ib_sa_error_str[] = {
 +SA_NO_ERROR,
 +SA_ERR_NO_RESOURCES,
 +SA_ERR_REQ_INVALID,
 +SA_ERR_NO_RECORDS,
 +SA_ERR_TOO_MANY_RECORDS,
 +SA_ERR_REQ_INVALID_GID,
 +SA_ERR_REQ_INSUFFICIENT_COMPONENTS,
 +SA_ERR_REQ_DENIED,
 +SA_ERR_STATUS_PRIO_SUGGESTED,
 +SA_ERR_UNKNOWN
 +};
 +
 +static inline const char *ib_sa_err_str(IN uint8_t status)
 +{
 +if (status  SA_ERR_UNKNOWN)
 +status = SA_ERR_UNKNOWN;
 +return (ib_sa_error_str[status]);
 +}
 +
 +static inline void report_err(int status)
 +{
 +int st = status  0xff;
 +
 +if (st)
 +fprintf(stderr, ERROR: Query result returned: %s (0x%x)\n,
 +ib_get_err_str(st), status);
 +st = status  8;
 +if (st)
 +fprintf(stderr, ERROR: Query result returned: %s (0x%x)\n,
 +ib_sa_err_str(st), status);
 
 Such two identical messages with different error strings seems
 confusing to me. Wouldn't it be better to merge it in a single line,
 like:
 
   ERROR: Query result returned 0x: SM blah1 , SA blah2
 
 (or similar), with making each part optional.

I agree.
Is it possible according to the spec to have both SM and SA (i don't think so)

Eli

 
 Sasha
 
 +}
 +
  static int sa_query(struct bind_handle *h, uint8_t method,
  uint16_t attr, uint32_t mod, uint64_t comp_mask,
  uint64_t sm_key, void *data)
 @@ -794,8 +829,7 @@ static int get_any_records(bind_handle_t h,
  }
  
  if (result.status != IB_SUCCESS) {
 -fprintf(stderr, Query result returned: %s\n,
 -ib_get_err_str(result.status));
 +report_err(result.status);
  return result.status;
  }
  
 @@ -1009,8 +1043,7 @@ static int get_print_class_port_info(bind_handle_t h)
  return ret;
  }
  if (result.status != IB_SUCCESS) {
 -fprintf(stderr, ERROR: Query result returned: %s\n,
 -ib_get_err_str(result.status));
 +report_err(result.status);
  return (result.status);
  }
  dump_results(result, dump_class_port_info);
 -- 
 1.5.5


--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] infiniband-diags/saquery: Report SA MAD Class specific status.

2009-11-02 Thread Eli Dorfman (Voltaire)
Report SA MAD Class specific status.

In addition to SM status.

Signed-off-by: Eli Dorfman e...@voltaire.com
---
 infiniband-diags/src/saquery.c |   45 ---
 1 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/infiniband-diags/src/saquery.c b/infiniband-diags/src/saquery.c
index 6c44b63..9495cd9 100644
--- a/infiniband-diags/src/saquery.c
+++ b/infiniband-diags/src/saquery.c
@@ -124,6 +124,45 @@ int requested_lid_flag = 0;
 uint64_t requested_guid = 0;
 int requested_guid_flag = 0;
 
+#define SA_ERR_UNKNOWN IB_SA_MAD_STATUS_PRIO_SUGGESTED
+
+const char *ib_sa_error_str[] = {
+   SA_NO_ERROR,
+   SA_ERR_NO_RESOURCES,
+   SA_ERR_REQ_INVALID,
+   SA_ERR_NO_RECORDS,
+   SA_ERR_TOO_MANY_RECORDS,
+   SA_ERR_REQ_INVALID_GID,
+   SA_ERR_REQ_INSUFFICIENT_COMPONENTS,
+   SA_ERR_REQ_DENIED,
+   SA_ERR_STATUS_PRIO_SUGGESTED,
+   SA_ERR_UNKNOWN
+};
+
+static inline const char *ib_sa_err_str(IN uint8_t status)
+{
+   if (status  SA_ERR_UNKNOWN)
+   status = SA_ERR_UNKNOWN;
+   return (ib_sa_error_str[status]);
+}
+
+static inline void report_err(int status)
+{
+   int st = status  0xff;
+   char sm_err_str[64] = { 0 };
+   char sa_err_str[64] = { 0 };
+
+   if (st)
+   sprintf(sm_err_str,  SM(%s), ib_get_err_str(st));
+
+   st = status  8;
+   if (st)
+   sprintf(sa_err_str,  SA(%s), ib_sa_err_str(st));
+
+   fprintf(stderr, ERROR: Query result returned 0x%04x, %s%s\n, 
+   status, sm_err_str, sa_err_str);
+}
+
 static int sa_query(struct bind_handle *h, uint8_t method,
uint16_t attr, uint32_t mod, uint64_t comp_mask,
uint64_t sm_key, void *data)
@@ -794,8 +833,7 @@ static int get_any_records(bind_handle_t h,
}
 
if (result.status != IB_SUCCESS) {
-   fprintf(stderr, Query result returned: %s\n,
-   ib_get_err_str(result.status));
+   report_err(result.status);
return result.status;
}
 
@@ -1009,8 +1047,7 @@ static int get_print_class_port_info(bind_handle_t h)
return ret;
}
if (result.status != IB_SUCCESS) {
-   fprintf(stderr, ERROR: Query result returned: %s\n,
-   ib_get_err_str(result.status));
+   report_err(result.status);
return (result.status);
}
dump_results(result, dump_class_port_info);
-- 
1.5.5

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/3] opensm SA DB dump/restore: SA DB persistence

2009-11-02 Thread Yevgeny Kliteynik
Hi Sasha,

OSM can dump SA DB, but it is done every heavy sweep, and
only when running with high verbosity, which cannot be used
in production.
OSM can also load SA DB from file, but then it also stays
in this static SA configuration.

The following patch series improves SA DB persistence:

1/3: Added option to load SA DB only on the first master
 heavy sweep. After that SA goes to the usual (dynamic)
 mode of operation
2/3: Added option to dump SA DB on every sweep (both light
 and heavy), regardless of the verbosity level.
3/3: Optimize SA DB dumping: added 'dirty' flag to the SA
 struct to denote that SA DB was modified since the
 last dump. SA DB will be dumped only if the DB is dirty.

-- Yevgeny

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] opensm SA DB dump/restore: added option to load SA DB once

2009-11-02 Thread Yevgeny Kliteynik
Added option to load SA DB once: 'sa_db_load_once'.
This will cause OSM to load SA DB once during first master
heavy sweep, and then OSM will move to the usual SA mode.

The option is not exposed through OSM command line,
but only through options file.

Signed-off-by: Yevgeny Kliteynik klit...@dev.mellanox.co.il
---
 opensm/include/opensm/osm_subnet.h |5 +
 opensm/opensm/osm_sa.c |   20 +++-
 opensm/opensm/osm_subnet.c |7 +++
 3 files changed, 31 insertions(+), 1 deletions(-)

diff --git a/opensm/include/opensm/osm_subnet.h 
b/opensm/include/opensm/osm_subnet.h
index 0302f91..871a833 100644
--- a/opensm/include/opensm/osm_subnet.h
+++ b/opensm/include/opensm/osm_subnet.h
@@ -200,6 +200,7 @@ typedef struct osm_subn_opt {
char *ids_guid_file;
char *guid_routing_order_file;
char *sa_db_file;
+   boolean_t sa_db_load_once;
boolean_t do_mesh_analysis;
boolean_t exit_on_fatal;
boolean_t honor_guid2lid_file;
@@ -411,6 +412,10 @@ typedef struct osm_subn_opt {
 *  sa_db_file
 *  Name of the SA database file.
 *
+*  sa_db_load_once
+*  When TRUE causes sa_db_file to be loaded only at the
+*  first master sweep.
+*
 *  exit_on_fatal
 *  If TRUE (default) - SM will exit on fatal subnet initialization
 *  issues.
diff --git a/opensm/opensm/osm_sa.c b/opensm/opensm/osm_sa.c
index 4988dec..a5eb796 100644
--- a/opensm/opensm/osm_sa.c
+++ b/opensm/opensm/osm_sa.c
@@ -924,6 +924,12 @@ int osm_sa_db_file_load(osm_opensm_t * p_osm)
return 0;
}

+   if (p_osm-subn.opt.sa_db_load_once  
!p_osm-subn.first_time_master_sweep) {
+   OSM_LOG(p_osm-log, OSM_LOG_VERBOSE,
+   Not first sweep - skip SA DB restore\n);
+   return 0;
+   }
+
file = fopen(file_name, r);
if (!file) {
OSM_LOG(p_osm-log, OSM_LOG_ERROR | OSM_LOG_SYS, ERR 4C02: 
@@ -932,6 +938,10 @@ int osm_sa_db_file_load(osm_opensm_t * p_osm)
return -1;
}

+   OSM_LOG(p_osm-log, OSM_LOG_VERBOSE,
+   Restoring SA DB from file \'%s\'\n,
+   file_name);
+
lineno = 0;

while (fgets(line, sizeof(line) - 1, file) != NULL) {
@@ -1108,7 +1118,15 @@ int osm_sa_db_file_load(osm_opensm_t * p_osm)
}
}

-   if (!rereg_clients)
+   /*
+* If restoring SA DB is required only once, SM should go
+* into the usual mode right after that, which means that
+* client re-registration should be required even after
+* the restore - there is a chance that OSM died right after
+* some MCMember joined MCast group, and his membership
+* didn't make it into the SA DB file.
+*/
+   if (!p_osm-subn.opt.sa_db_load_once  !rereg_clients)
p_osm-subn.opt.no_clients_rereg = TRUE;

 _error:
diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
index dde83e1..e9cfe9c 100644
--- a/opensm/opensm/osm_subnet.c
+++ b/opensm/opensm/osm_subnet.c
@@ -348,6 +348,7 @@ static const opt_rec_t opt_tbl[] = {
{ ids_guid_file, OPT_OFFSET(ids_guid_file), opts_parse_charp, NULL, 0 
},
{ guid_routing_order_file, OPT_OFFSET(guid_routing_order_file), 
opts_parse_charp, NULL, 0 },
{ sa_db_file, OPT_OFFSET(sa_db_file), opts_parse_charp, NULL, 0 },
+   { sa_db_load_once, OPT_OFFSET(sa_db_load_once), opts_parse_boolean, 
NULL, 1 },
{ do_mesh_analysis, OPT_OFFSET(do_mesh_analysis), opts_parse_boolean, 
NULL, 1 },
{ exit_on_fatal, OPT_OFFSET(exit_on_fatal), opts_parse_boolean, NULL, 
1 },
{ honor_guid2lid_file, OPT_OFFSET(honor_guid2lid_file), 
opts_parse_boolean, NULL, 1 },
@@ -766,6 +767,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * p_opt)
p_opt-ids_guid_file = NULL;
p_opt-guid_routing_order_file = NULL;
p_opt-sa_db_file = NULL;
+   p_opt-sa_db_load_once = FALSE;
p_opt-do_mesh_analysis = FALSE;
p_opt-exit_on_fatal = TRUE;
p_opt-enable_quirks = FALSE;
@@ -1478,6 +1480,11 @@ int osm_subn_output_conf(FILE *out, IN osm_subn_opt_t * 
p_opts)
p_opts-sa_db_file ? p_opts-sa_db_file : null_str);

fprintf(out,
+   # If TRUE causes SA database to be loaded only at\n
+   # the first master sweep\nsa_db_load_once %s\n\n,
+   p_opts-sa_db_load_once ? TRUE : FALSE);
+
+   fprintf(out,
#\n# HANDOVER - MULTIPLE SMs OPTIONS\n#\n
# SM priority used for deciding who is the master\n
# Range goes from 0 (lowest priority) to 15 (highest).\n
-- 
1.5.1.4

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] opensm SA DB dump/restore: added option to dump SA DB on every sweep

2009-11-02 Thread Yevgeny Kliteynik
Added option to dump SA DB at every sweep (both heavy
and light): 'sa_db_dump'.
The option is not exposed through OSM command line,
but only through options file.

Signed-off-by: Yevgeny Kliteynik klit...@dev.mellanox.co.il
---
 opensm/include/opensm/osm_subnet.h |5 +
 opensm/opensm/osm_state_mgr.c  |5 -
 opensm/opensm/osm_subnet.c |8 
 3 files changed, 17 insertions(+), 1 deletions(-)

diff --git a/opensm/include/opensm/osm_subnet.h 
b/opensm/include/opensm/osm_subnet.h
index 871a833..7bc59f8 100644
--- a/opensm/include/opensm/osm_subnet.h
+++ b/opensm/include/opensm/osm_subnet.h
@@ -201,6 +201,7 @@ typedef struct osm_subn_opt {
char *guid_routing_order_file;
char *sa_db_file;
boolean_t sa_db_load_once;
+   boolean_t sa_db_dump;
boolean_t do_mesh_analysis;
boolean_t exit_on_fatal;
boolean_t honor_guid2lid_file;
@@ -416,6 +417,10 @@ typedef struct osm_subn_opt {
 *  When TRUE causes sa_db_file to be loaded only at the
 *  first master sweep.
 *
+*  sa_db_dump
+*  When TRUE causes OpenSM to dump SA DB at the end of every
+*  light sweep regardless the current verbosity level.
+*
 *  exit_on_fatal
 *  If TRUE (default) - SM will exit on fatal subnet initialization
 *  issues.
diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c
index 315c83e..ef88de4 100644
--- a/opensm/opensm/osm_state_mgr.c
+++ b/opensm/opensm/osm_state_mgr.c
@@ -1090,6 +1090,8 @@ static void do_sweep(osm_sm_t * sm)
if (wait_for_pending_transactions(sm-p_subn-p_osm-stats))
return;
if (!sm-p_subn-force_heavy_sweep) {
+   if (sm-p_subn-opt.sa_db_dump)
+   osm_sa_db_file_dump(sm-p_subn-p_osm);
OSM_LOG_MSG_BOX(sm-p_log, OSM_LOG_VERBOSE,
LIGHT SWEEP COMPLETE);
return;
@@ -1354,7 +1356,8 @@ _repeat_discovery:
state_mgr_up_msg(sm);
sm-p_subn-first_time_master_sweep = FALSE;

-   if (osm_log_is_active(sm-p_log, OSM_LOG_VERBOSE))
+   if (osm_log_is_active(sm-p_log, OSM_LOG_VERBOSE) ||
+   sm-p_subn-opt.sa_db_dump)
osm_sa_db_file_dump(sm-p_subn-p_osm);
}

diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
index e9cfe9c..8f35a57 100644
--- a/opensm/opensm/osm_subnet.c
+++ b/opensm/opensm/osm_subnet.c
@@ -349,6 +349,7 @@ static const opt_rec_t opt_tbl[] = {
{ guid_routing_order_file, OPT_OFFSET(guid_routing_order_file), 
opts_parse_charp, NULL, 0 },
{ sa_db_file, OPT_OFFSET(sa_db_file), opts_parse_charp, NULL, 0 },
{ sa_db_load_once, OPT_OFFSET(sa_db_load_once), opts_parse_boolean, 
NULL, 1 },
+   { sa_db_dump, OPT_OFFSET(sa_db_dump), opts_parse_boolean, NULL, 1 },
{ do_mesh_analysis, OPT_OFFSET(do_mesh_analysis), opts_parse_boolean, 
NULL, 1 },
{ exit_on_fatal, OPT_OFFSET(exit_on_fatal), opts_parse_boolean, NULL, 
1 },
{ honor_guid2lid_file, OPT_OFFSET(honor_guid2lid_file), 
opts_parse_boolean, NULL, 1 },
@@ -768,6 +769,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * p_opt)
p_opt-guid_routing_order_file = NULL;
p_opt-sa_db_file = NULL;
p_opt-sa_db_load_once = FALSE;
+   p_opt-sa_db_dump = FALSE;
p_opt-do_mesh_analysis = FALSE;
p_opt-exit_on_fatal = TRUE;
p_opt-enable_quirks = FALSE;
@@ -1485,6 +1487,12 @@ int osm_subn_output_conf(FILE *out, IN osm_subn_opt_t * 
p_opts)
p_opts-sa_db_load_once ? TRUE : FALSE);

fprintf(out,
+   # If TRUE causes OpenSM to dump SA database at the end of\n
+   # every light sweep, regardless of the verbosity level\n
+   sa_db_dump %s\n\n,
+   p_opts-sa_db_dump ? TRUE : FALSE);
+
+   fprintf(out,
#\n# HANDOVER - MULTIPLE SMs OPTIONS\n#\n
# SM priority used for deciding who is the master\n
# Range goes from 0 (lowest priority) to 15 (highest).\n
-- 
1.5.1.4

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Return single PathRecord for SubnAdmGet when SGID and/or DGID

2009-11-02 Thread Sasha Khapyorsky
Hi Eli,

On 13:40 Fri 30 Oct , Eli Dorfman wrote:
 From: Eli Dorfman e...@voltaire.com

Please add descriptive change log. It is hard (for me) to just remember
an issue in all details.

 
 Signed-off-by: Eli Dorfman e...@voltaire.com
 ---
  opensm/opensm/osm_sa_path_record.c |   38 ---
  1 files changed, 26 insertions(+), 12 deletions(-)
 
 diff --git a/opensm/opensm/osm_sa_path_record.c 
 b/opensm/opensm/osm_sa_path_record.c
 index f36eb46..0c6621b 100644
 --- a/opensm/opensm/osm_sa_path_record.c
 +++ b/opensm/opensm/osm_sa_path_record.c
 @@ -890,7 +890,7 @@ Exit:
  
  /**
   **/
 -static void pr_rcv_get_port_pair_paths(IN osm_sa_t * sa,
 +static int pr_rcv_get_port_pair_paths(IN osm_sa_t * sa,
  IN const osm_madw_t * p_madw,
  IN const osm_port_t * p_req_port,
  IN const osm_port_t * p_src_port,
 @@ -908,7 +908,7 @@ static void pr_rcv_get_port_pair_paths(IN osm_sa_t * sa,
   uint16_t dest_lid_max_ho;
   uint16_t src_lid_ho;
   uint16_t dest_lid_ho;
 - uint32_t path_num;
 + uint32_t path_num = 0;

It is reinitialized later as: 

path_num = cl_qlist_count(p_list);

, one of them is not needed.

   uint8_t preference;
   uintn_t iterations;
   uintn_t src_offset;
 @@ -1019,7 +1019,7 @@ static void pr_rcv_get_port_pair_paths(IN osm_sa_t * sa,
  Preferred paths come first in OpenSM
*/
   preference = 0;
 - path_num = 0;
 + path_num = cl_qlist_count(p_list);

Is this correct?

In this way pr_rcv_get_port_pair_paths() will return a total number of
PRs collected in previous calls too (not for just specific
source/destination call). No?

  
   /* If SubnAdmGet, assume NumbPaths 1 (1.2 erratum) */
   if (p_sa_mad-method != IB_MAD_METHOD_GET)
 @@ -,6 +,7 @@ static void pr_rcv_get_port_pair_paths(IN osm_sa_t * sa,
  
  Exit:
   OSM_LOG_EXIT(sa-p_log);
 + return path_num;
  }
  
  /**
 @@ -1314,6 +1315,8 @@ static void pr_rcv_process_world(IN osm_sa_t * sa, IN 
 const osm_madw_t * p_madw,
   const cl_qmap_t *p_tbl;
   const osm_port_t *p_dest_port;
   const osm_port_t *p_src_port;
 + const ib_sa_mad_t *p_sa_mad;
 + int   num_paths = 0;
  
   OSM_LOG_ENTER(sa-p_log);
  
 @@ -1326,14 +1329,17 @@ static void pr_rcv_process_world(IN osm_sa_t * sa, IN 
 const osm_madw_t * p_madw,
  any check to determine the reversability of the paths.
*/
   p_tbl = sa-p_subn-port_guid_tbl;
 + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
  
   p_dest_port = (osm_port_t *) cl_qmap_head(p_tbl);
   while (p_dest_port != (osm_port_t *) cl_qmap_end(p_tbl)) {
   p_src_port = (osm_port_t *) cl_qmap_head(p_tbl);
   while (p_src_port != (osm_port_t *) cl_qmap_end(p_tbl)) {
 - pr_rcv_get_port_pair_paths(sa, p_madw, requester_port,
 -p_src_port, p_dest_port,
 -p_dgid, comp_mask, p_list);
 + num_paths += pr_rcv_get_port_pair_paths(sa, p_madw, 
 requester_port,
 + p_src_port, 
 p_dest_port,
 + p_dgid, 
 comp_mask, p_list);
 + if (p_sa_mad-method == IB_MAD_METHOD_GET  num_paths 
  1)
 + return;

So it will return with num_paths  1. Then wouldn't an error (too many
records) be generated by osm_sa_respond() (just similar to as it is
now)? I guess so.

So shouldn't here be something like:

if (p_sa_mad-method == IB_MAD_METHOD_GET 
cl_qlist_count(p_list) = 1)
break;

(, and then you don't need to bother with num_paths in
pr_rcv_get_port_pair_paths())?

  
   p_src_port =
   (osm_port_t *) cl_qmap_next(p_src_port-map_item);
 @@ -1358,6 +1364,8 @@ static void pr_rcv_process_half(IN osm_sa_t * sa, IN 
 const osm_madw_t * p_madw,
  {
   const cl_qmap_t *p_tbl;
   const osm_port_t *p_port;
 + const ib_sa_mad_t *p_sa_mad;
 + int   num_paths = 0;
  
   OSM_LOG_ENTER(sa-p_log);
  
 @@ -1367,6 +1375,7 @@ static void pr_rcv_process_half(IN osm_sa_t * sa, IN 
 const osm_madw_t * p_madw,
  need to special case that one.
*/
   p_tbl = sa-p_subn-port_guid_tbl;
 + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
  
   if (p_src_port) {
   /*
 @@ -1374,9 +1383,11 @@ static void pr_rcv_process_half(IN osm_sa_t * sa, IN 
 const osm_madw_t * p_madw,
*/
   p_port = (osm_port_t *) cl_qmap_head(p_tbl);

Re: [PATCH] opensm/osm_ucast_cache.c: Eliminate unneeded define

2009-11-02 Thread Hal Rosenstock
On Thu, Oct 22, 2009 at 11:01 AM, Sasha Khapyorsky sas...@voltaire.com wrote:
 On 08:50 Wed 21 Oct     , Hal Rosenstock wrote:

 Also, cosmetic change to OSM_LOG message

 Please don't mix.

 I used Unicast Cache (with upper case starting letters) as for a name.

A nit but any reason for that one message being inconsistent (in terms
of case) with the other log messages there ?

-- Hal



 Signed-off-by: Hal Rosenstock hal.rosenst...@gmail.com

 Applied with changes. Thanks.

 Sasha
 --
 To unsubscribe from this list: send the line unsubscribe linux-rdma in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] opensm: Return single PathRecord for SubnAdmGet with DGID/SGID wild carded

2009-11-02 Thread elid
Return single PathRecord for SubnAdmGet with DGID/SGID wildcarded

Instead of iterating over all ports in the fabric and returning an error 
(TOO_MANY_RECORDS),
when SGID and/or DGID are wild carded return only single PathRecord.

Signed-off-by: Eli Dorfman e...@voltaire.com
---
 opensm/opensm/osm_sa_path_record.c |   14 ++
 1 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/opensm/opensm/osm_sa_path_record.c 
b/opensm/opensm/osm_sa_path_record.c
index c2ef8c5..b3e1072 100644
--- a/opensm/opensm/osm_sa_path_record.c
+++ b/opensm/opensm/osm_sa_path_record.c
@@ -1313,6 +1313,7 @@ static void pr_rcv_process_world(IN osm_sa_t * sa, IN 
const osm_madw_t * p_madw,
const cl_qmap_t *p_tbl;
const osm_port_t *p_dest_port;
const osm_port_t *p_src_port;
+   const ib_sa_mad_t *p_sa_mad;
 
OSM_LOG_ENTER(sa-p_log);
 
@@ -1325,6 +1326,7 @@ static void pr_rcv_process_world(IN osm_sa_t * sa, IN 
const osm_madw_t * p_madw,
   any check to determine the reversability of the paths.
 */
p_tbl = sa-p_subn-port_guid_tbl;
+   p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
 
p_dest_port = (osm_port_t *) cl_qmap_head(p_tbl);
while (p_dest_port != (osm_port_t *) cl_qmap_end(p_tbl)) {
@@ -1333,6 +1335,9 @@ static void pr_rcv_process_world(IN osm_sa_t * sa, IN 
const osm_madw_t * p_madw,
pr_rcv_get_port_pair_paths(sa, p_madw, requester_port,
   p_src_port, p_dest_port,
   p_dgid, comp_mask, p_list);
+   if (p_sa_mad-method == IB_MAD_METHOD_GET  
+   cl_qlist_count(p_list)  0)
+   goto Exit;
 
p_src_port =
(osm_port_t *) cl_qmap_next(p_src_port-map_item);
@@ -1342,6 +1347,7 @@ static void pr_rcv_process_world(IN osm_sa_t * sa, IN 
const osm_madw_t * p_madw,
(osm_port_t *) cl_qmap_next(p_dest_port-map_item);
}
 
+Exit:
OSM_LOG_EXIT(sa-p_log);
 }
 
@@ -1357,6 +1363,7 @@ static void pr_rcv_process_half(IN osm_sa_t * sa, IN 
const osm_madw_t * p_madw,
 {
const cl_qmap_t *p_tbl;
const osm_port_t *p_port;
+   const ib_sa_mad_t *p_sa_mad;
 
OSM_LOG_ENTER(sa-p_log);
 
@@ -1366,6 +1373,7 @@ static void pr_rcv_process_half(IN osm_sa_t * sa, IN 
const osm_madw_t * p_madw,
   need to special case that one.
 */
p_tbl = sa-p_subn-port_guid_tbl;
+   p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw);
 
if (p_src_port) {
/*
@@ -1376,6 +1384,9 @@ static void pr_rcv_process_half(IN osm_sa_t * sa, IN 
const osm_madw_t * p_madw,
pr_rcv_get_port_pair_paths(sa, p_madw, requester_port,
   p_src_port, p_port, p_dgid,
   comp_mask, p_list);
+   if (p_sa_mad-method == IB_MAD_METHOD_GET  
+   cl_qlist_count(p_list)  0)
+   break;
p_port = (osm_port_t *) cl_qmap_next(p_port-map_item);
}
} else {
@@ -1387,6 +1398,9 @@ static void pr_rcv_process_half(IN osm_sa_t * sa, IN 
const osm_madw_t * p_madw,
pr_rcv_get_port_pair_paths(sa, p_madw, requester_port,
   p_port, p_dest_port, p_dgid,
   comp_mask, p_list);
+   if (p_sa_mad-method == IB_MAD_METHOD_GET  
+   cl_qlist_count(p_list)  0)
+   break;
p_port = (osm_port_t *) cl_qmap_next(p_port-map_item);
}
}
-- 
1.5.5

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] opensm/osm_ucast_cache.c: Eliminate unneeded define

2009-11-02 Thread Sasha Khapyorsky
On 11:31 Mon 02 Nov , Hal Rosenstock wrote:
 
 A nit but any reason for that one message being inconsistent (in terms
 of case) with the other log messages there ?

Other messages are debug printouts and cannot be compared.

Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] infiniband-diags/perfquery.c: Fix extended counter reset mask

2009-11-02 Thread Sasha Khapyorsky
On 11:23 Mon 02 Nov , Hal Rosenstock wrote:
 On Mon, Nov 2, 2009 at 9:03 AM, Sasha Khapyorsky sas...@voltaire.com wrote:
  On 18:16 Fri 30 Oct     , Hal Rosenstock wrote:
 
  Because a PMA implementation which follows the IBA recommendation will
  reject this
 
  Hmm, where did you find such recommendation where stated that if
  reserved bits are not '0' the request should be rejected (I would rather
  expect ignoring of those values at all)?
 
 I sent a subsequent email indicating there was an errata on this and
 that these changes are needed.

Let me understand correctly. Do you mean that proposed patch is not
needed?

Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] infiniband-diags/perfquery.c: Fix extended counter reset mask

2009-11-02 Thread Hal Rosenstock
On Mon, Nov 2, 2009 at 12:13 PM, Sasha Khapyorsky sas...@voltaire.com wrote:
 On 11:23 Mon 02 Nov     , Hal Rosenstock wrote:
 On Mon, Nov 2, 2009 at 9:03 AM, Sasha Khapyorsky sas...@voltaire.com wrote:
  On 18:16 Fri 30 Oct     , Hal Rosenstock wrote:
 
  Because a PMA implementation which follows the IBA recommendation will
  reject this
 
  Hmm, where did you find such recommendation where stated that if
  reserved bits are not '0' the request should be rejected (I would rather
  expect ignoring of those values at all)?

 I sent a subsequent email indicating there was an errata on this and
 that these changes are needed.

 Let me understand correctly. Do you mean that proposed patch is not
 needed?

Yes, it's not strictly required and you typically nix those sorts of things.

-- Hal


 Sasha

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] opensm/osm_ucast_cache.c: Eliminate unneeded define

2009-11-02 Thread Hal Rosenstock
On Mon, Nov 2, 2009 at 12:01 PM, Sasha Khapyorsky sas...@voltaire.com wrote:
 On 11:31 Mon 02 Nov     , Hal Rosenstock wrote:

 A nit but any reason for that one message being inconsistent (in terms
 of case) with the other log messages there ?

 Other messages are debug printouts and cannot be compared.

That seems pretty arbitrary to me. I don't think that's the case with
different levels of printing elsewhere.

-- Hal


 Sasha

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] librdmacm/mckey: enforce local binding for unmapped multicast addresses

2009-11-02 Thread Sean Hefty
Sean, using unmapped multicast addresses I see that a different broacast group
is
created by the SM such that mckey doesn't manage to join the ipv4 broadcast
group

$ ./mckey -M ff12:401b::0:0:0:: -b 10.10.5.62 -p 0x2

Unmapped multicast groups only support the case where the SA has created the
group with the MGID undefined.  The MGID must be in this format:

0xff1 scope 0xA01B

(see figure 196 on page 928 of the spec).  The kernel checks for this specific
address format to see if it needs to convert the address or not.  (The original
feature request for this came from the US national labs, which wanted the
ability to create a group a get back a unique group ID.)

the latter sets the lower 32 bits for this mgid, any idea what can be  done
here?

I thought about this, and once support for AF_IB is added, then the format of
the address becomes clear and the full range of unmapped addresses becomes
available.

I'll add your patch into my git tree - thanks.

- Sean

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] opensm: Return single PathRecord for SubnAdmGet with DGID/SGID wild carded

2009-11-02 Thread Sasha Khapyorsky
On 17:59 Mon 02 Nov , elid wrote:
 Return single PathRecord for SubnAdmGet with DGID/SGID wildcarded
 
 Instead of iterating over all ports in the fabric and returning an error 
 (TOO_MANY_RECORDS),
 when SGID and/or DGID are wild carded return only single PathRecord.
 
 Signed-off-by: Eli Dorfman e...@voltaire.com

Applied. Thanks.

Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] opensm/mcast_tbl: set max_mlid_ho as actually configured mlid

2009-11-02 Thread Sasha Khapyorsky
On 12:18 Mon 02 Nov , Hal Rosenstock wrote:
 
  Yet another (likely even more efficient) approach would be memset()ing
  MFTs in realloc function above requested mlid_offset, then we will be
  able to remove osm_mcast_tbl_clear_mlid() completely.
 
 Isn't mlid clearing done on a per mlid basis rather than based on
 above some mlid (offset) ? Also and perhaps more significantly, an
 mlid can be removed in the middle of a range of mlids. So I don't see
 how clear_mlid can be removed.

Yes, correct, we cannot remove clear_mlid completely.

Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] opensm/mcast_tbl: set max_mlid_ho as actually configured mlid

2009-11-02 Thread Sasha Khapyorsky
On 12:44 Mon 02 Nov , Hal Rosenstock wrote:
 
 clear_mlid could be implemented with memset rather than loop.

Yes, memset() would be better.

Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Can we get daily digests back?

2009-11-02 Thread John Russo
Is there any way to recreate the roll-up digests that we used to get on this 
mailing list?
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] librdmacm/mckey: enforce local binding for unmapped multicast addresses

2009-11-02 Thread Jason Gunthorpe
On Sun, Nov 01, 2009 at 11:31:22AM +0200, Or Gerlitz wrote:

 Sean, using unmapped multicast addresses I see that a different broacast 
 group is
 created by the SM such that mckey doesn't manage to join the ipv4 broadcast 
 group
 
 $ ./mckey -M ff12:401b::0:0:0:: -b 10.10.5.62 -p 0x2
 
 mckey: joined dgid: ff12:401b::: mlid c00b sl 0

Erm, I'm not sure what is going on by the time things get to the SA,
but the above output shows this kernel bug. The joined DGID for that
AF_INET6 address should be FF12:601b:::

The AF_INET6 -M argument to mckey is not treated as a MGID unless it is
prefixed with FF10:A01B:: (so ugly..)

If you want to join the IPv4 all hosts group I think you need to use
-M 255.255.255.255

Your result does show that something else is wrong too, the group with
MLID 0xC00B should have been MGID ff12:401b::: like mckey reported
..

From 9f3a76deb5bfda0f8243eadfa024eb547c03f583 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe jguntho...@obsidianresearch.com
Date: Mon, 2 Nov 2009 11:23:38 -0700
Subject: [PATCH] RDMA CM: Fix AF_INET6 support in multicast joining

If joining to an AF_INET6 address we need to map the address to a MGID
in the same way as the IP stack. The old code would just fall through to
the IPv4 case and generate garbage.

Signed-off-by: Jason Gunthorpe jguntho...@obsidianresearch.com
---
 drivers/infiniband/core/cma.c |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

compile tested only.

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index a0fa241..1e9a78a 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2724,6 +2724,11 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
 0xFF10A01B)) {
/* IPv6 address is an SA assigned MGID. */
memcpy(mgid, sin6-sin6_addr, sizeof *mgid);
+   } else if ((addr-sa_family == AF_INET6)) {
+   ipv6_ib_mc_map(sin6-sin6_addr, dev_addr-broadcast, mc_map);
+   if (id_priv-id.ps == RDMA_PS_UDP)
+   mc_map[7] = 0x01;   /* Use RDMA CM signature */
+   *mgid = *(union ib_gid *) (mc_map + 4);
} else {
ip_ib_mc_map(sin-sin_addr.s_addr, dev_addr-broadcast, mc_map);
if (id_priv-id.ps == RDMA_PS_UDP)
-- 
1.5.4.2

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


QoS in local SA entity: was rdma/cm: support option to allow manually setting IB path

2009-11-02 Thread Sean Hefty
Before enhancing the rdma-cm to support the full feature set of the IB
CM, something which I personally don't see the actual need for (but I
will be happy to get educated what applications will or can migrate to
rdma-cm once this is implemented), how about trying to allow for reduced
QoS scheme also when the entity that resolved this patch didn't
consulted with the SA?

I think this really needs to be discussed wrt the implementation of the entity
providing the path records.

IB QoS is based on the query providing the SGID, DGID, PKEY, SID, TOS
tuple and the SA returning a SLID, DLID, SL, MTU,  QoS tuple. Now
I'd like to see how can we let the application / querying middleware to
take advantage of the knowledge on what partition it runs and use the SL
associated with the IPv4 (e.g AF_INET rdma-cm ID's) IPoIB broadcast
group. This way, one can still program a QoS scheme at the SA which is
based on partitions.

I think what's needed is a way for the SA to distribute QoS information to the
end nodes, so that the decisions can be made locally.  If someone wants some
sort of dynamic QoS management and is happy using a small cluster, then they can
disable any local SA entities and contact the SA directly.

In the case of ACM, the pkey is embedded in the MGID.  'Something' could tell
the SA to create ACM multicast groups using a specific SL for a given MGID or
pkey in the join request.  That SL would be distributed to the end nodes when
they joined their groups.

Looking on mckey, the user space code (e.g ACM), could just do rdma_bind
to an IP address of an IPoIB NIC that uses this partition and then
rdma_join to an unmapped multicast address which correspond to the
broadcast group, take the SL and leave the group, makes sense?

The entity that provides the path records cannot depend on calling into the
librdmacm.  The dependency needs to go the other way.

- Sean

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] opensm/osm_mcast_tbl: Fix size of port mask table array

2009-11-02 Thread Hal Rosenstock

Should be IB_MCAST_POSITION_MAX + 1 rather than just IB_MCAST_POSITION_MAX

Signed-off-by: Hal Rosenstock hal.rosenst...@gmail.com
---
diff --git a/opensm/include/opensm/osm_mcast_tbl.h 
b/opensm/include/opensm/osm_mcast_tbl.h
index 6d3f083..0745b5b 100644
--- a/opensm/include/opensm/osm_mcast_tbl.h
+++ b/opensm/include/opensm/osm_mcast_tbl.h
@@ -76,7 +76,7 @@ typedef struct osm_mcast_fwdbl {
uint16_t num_entries;
uint16_t max_mlid_ho;
uint16_t mft_depth;
-   uint16_t(*p_mask_tbl)[][IB_MCAST_POSITION_MAX];
+   uint16_t(*p_mask_tbl)[][IB_MCAST_POSITION_MAX + 1];
 } osm_mcast_tbl_t;
 /*
 * FIELDS
@@ -106,7 +106,7 @@ typedef struct osm_mcast_fwdbl {
 *
 *  p_mask_tbl
 *  Pointer to a two dimensional array of port_masks for this 
switch.
-*  The first dimension is MLID, the second dimension is mask 
position.
+*  The first dimension is MLID offset, second dimension is mask 
position.
 *  This pointer is null for switches that do not support multicast.
 *
 * SEE ALSO
diff --git a/opensm/opensm/osm_mcast_tbl.c b/opensm/opensm/osm_mcast_tbl.c
index 818f2e0..c2c5395 100644
--- a/opensm/opensm/osm_mcast_tbl.c
+++ b/opensm/opensm/osm_mcast_tbl.c
@@ -119,7 +119,7 @@ void osm_mcast_tbl_set(IN osm_mcast_tbl_t * p_tbl, IN 
uint16_t mlid_ho,
 int osm_mcast_tbl_realloc(IN osm_mcast_tbl_t * p_tbl, IN uintn_t mlid_offset)
 {
size_t mft_depth, size;
-   uint16_t (*p_mask_tbl)[][IB_MCAST_POSITION_MAX];
+   uint16_t (*p_mask_tbl)[][IB_MCAST_POSITION_MAX + 1];
 
if (mlid_offset  p_tbl-mft_depth)
goto done;
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[infiniband-diags] [PATCH] [1/2] remove ibnd_update_node

2009-11-02 Thread Al Chu
Hey Sasha,

After talking to Ira about it, we think it's best remove
ibnd_update_node.

A) It's not being used.
B) It probably not implemented properly
C) Some of Ira's original plans for its use require more API functions
to be added, which of course isn't done right now.

So for now, it seems best to just remove it since it's an additional
API function that can lead to confusion.

Al

-- 
Albert Chu
ch...@llnl.gov
Computer Scientist
High Performance Systems Division
Lawrence Livermore National Laboratory
From: Albert Chu ch...@llnl.gov
Date: Thu, 29 Oct 2009 18:56:32 -0700
Subject: [PATCH] remove ibnd_update_node


Signed-off-by: Albert Chu ch...@llnl.gov
---
 .../libibnetdisc/include/infiniband/ibnetdisc.h|3 -
 infiniband-diags/libibnetdisc/src/ibnetdisc.c  |   55 
 2 files changed, 0 insertions(+), 58 deletions(-)

diff --git a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h 
b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h
index bb5df02..6120453 100644
--- a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h
+++ b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h
@@ -172,9 +172,6 @@ MAD_EXPORT void ibnd_destroy_fabric(ibnd_fabric_t * fabric);
 MAD_EXPORT ibnd_node_t *ibnd_find_node_guid(ibnd_fabric_t * fabric,
uint64_t guid);
 MAD_EXPORT ibnd_node_t *ibnd_find_node_dr(ibnd_fabric_t * fabric, char 
*dr_str);
-MAD_EXPORT ibnd_node_t *ibnd_update_node(struct ibmad_port *ibmad_port,
-ibnd_fabric_t * fabric,
-ibnd_node_t * node);
 
 typedef void (*ibnd_iter_node_func_t) (ibnd_node_t * node, void *user_data);
 MAD_EXPORT void ibnd_iter_nodes(ibnd_fabric_t * fabric,
diff --git a/infiniband-diags/libibnetdisc/src/ibnetdisc.c 
b/infiniband-diags/libibnetdisc/src/ibnetdisc.c
index ebc45ba..ffa35e4 100644
--- a/infiniband-diags/libibnetdisc/src/ibnetdisc.c
+++ b/infiniband-diags/libibnetdisc/src/ibnetdisc.c
@@ -264,61 +264,6 @@ static int _check_ibmad_port(struct ibmad_port *ibmad_port)
return 0;
 }
 
-ibnd_node_t *ibnd_update_node(struct ibmad_port * ibmad_port,
- ibnd_fabric_t * fabric, ibnd_node_t * node)
-{
-   char portinfo_port0[IB_SMP_DATA_SIZE];
-   void *nd = node-nodedesc;
-   int p = 0;
-
-   if (_check_ibmad_port(ibmad_port)  0)
-   return NULL;
-
-   if (!fabric) {
-   IBND_DEBUG(fabric parameter NULL\n);
-   return NULL;
-   }
-
-   if (!node) {
-   IBND_DEBUG(node parameter NULL\n);
-   return NULL;
-   }
-
-   if (query_node_info(ibmad_port, fabric, node, (node-path_portid)))
-   return NULL;
-
-   if (!smp_query_via(nd, (node-path_portid), IB_ATTR_NODE_DESC, 0, 0,
-  ibmad_port))
-   return NULL;
-
-   /* update all the port info's */
-   for (p = 1; p = node-numports; p++) {
-   get_port_info(ibmad_port, fabric, node-ports[p],
- p, (node-path_portid));
-   }
-
-   if (node-type != IB_NODE_SWITCH)
-   goto done;
-
-   if (!smp_query_via
-   (portinfo_port0, (node-path_portid), IB_ATTR_PORT_INFO, 0, 0,
-ibmad_port))
-   return NULL;
-
-   node-smalid = mad_get_field(portinfo_port0, 0, IB_PORT_LID_F);
-   node-smalmc = mad_get_field(portinfo_port0, 0, IB_PORT_LMC_F);
-
-   if (!smp_query_via(node-switchinfo, (node-path_portid),
-  IB_ATTR_SWITCH_INFO, 0, 0, ibmad_port))
-   node-smaenhsp0 = 0;/* assume base SP0 */
-   else
-   mad_decode_field(node-switchinfo, IB_SW_ENHANCED_PORT0_F,
-node-smaenhsp0);
-
-done:
-   return node;
-}
-
 ibnd_node_t *ibnd_find_node_dr(ibnd_fabric_t * fabric, char *dr_str)
 {
int i = 0;
-- 
1.5.4.5



[infiniband-diags] [PATCH] [2/2] split out scan specific data from ibnd_node_t

2009-11-02 Thread Al Chu
Hey Sasha,

This splits out some scan specific data from ibnd_node_t that doesn't
need to be in the public struct.

Al

-- 
Albert Chu
ch...@llnl.gov
Computer Scientist
High Performance Systems Division
Lawrence Livermore National Laboratory
From: Albert Chu ch...@llnl.gov
Date: Thu, 29 Oct 2009 18:59:26 -0700
Subject: [PATCH] split out scan specific data from ibnd_node_t


Signed-off-by: Albert Chu ch...@llnl.gov
---
 .../libibnetdisc/include/infiniband/ibnetdisc.h|2 -
 infiniband-diags/libibnetdisc/src/chassis.c|   18 --
 infiniband-diags/libibnetdisc/src/ibnetdisc.c  |   32 +++
 infiniband-diags/libibnetdisc/src/internal.h   |8 -
 4 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h 
b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h
index 6120453..f1cb00c 100644
--- a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h
+++ b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h
@@ -48,7 +48,6 @@ struct ibnd_port; /* forward declare */
 typedef struct ibnd_node {
struct ibnd_node *next; /* all node list in fabric */
 
-   ib_portid_t path_portid;/* path from from_node */
int smalid;
int smalmc;
 
@@ -81,7 +80,6 @@ typedef struct ibnd_node {
/* internal use only */
unsigned char ch_found;
struct ibnd_node *htnext;   /* hash table list */
-   struct ibnd_node *dnext;/* nodesdist next */
struct ibnd_node *type_next;/* next based on type */
 } ibnd_node_t;
 
diff --git a/infiniband-diags/libibnetdisc/src/chassis.c 
b/infiniband-diags/libibnetdisc/src/chassis.c
index 15c17d2..3bd0108 100644
--- a/infiniband-diags/libibnetdisc/src/chassis.c
+++ b/infiniband-diags/libibnetdisc/src/chassis.c
@@ -822,6 +822,7 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan)
int chassisnum = 0;
ibnd_chassis_t *chassis;
ibnd_chassis_t *ch, *ch_next;
+   ibnd_node_scan_t *node_scan;
 
scan-first_chassis = NULL;
scan-current_chassis = NULL;
@@ -832,16 +833,21 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan)
/* according to internal connectivity */
/* not very efficient but clear code so... */
for (dist = 0; dist = fabric-maxhops_discovered; dist++)
-   for (node = scan-nodesdist[dist]; node; node = node-dnext)
+   for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = 
node_scan-dnext) {
+   node = node_scan-node;
+
if (mad_get_field(node-info, 0,
  IB_NODE_VENDORID_F) == VTR_VENDOR_ID
 fill_voltaire_chassis_record(node))
goto cleanup;
+   }
 
/* separate every Voltaire chassis from each other and build linked 
list of them */
/* algorithm: catch spine and find all surrounding nodes */
for (dist = 0; dist = fabric-maxhops_discovered; dist++)
-   for (node = scan-nodesdist[dist]; node; node = node-dnext) {
+   for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = 
node_scan-dnext) {
+   node = node_scan-node;
+
if (mad_get_field(node-info, 0,
  IB_NODE_VENDORID_F) != VTR_VENDOR_ID)
continue;
@@ -859,7 +865,9 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan)
/* now make pass on nodes for chassis which are not Voltaire */
/* grouped by common SystemImageGUID */
for (dist = 0; dist = fabric-maxhops_discovered; dist++)
-   for (node = scan-nodesdist[dist]; node; node = node-dnext) {
+   for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = 
node_scan-dnext) {
+   node = node_scan-node;
+
if (mad_get_field(node-info, 0,
  IB_NODE_VENDORID_F) == VTR_VENDOR_ID)
continue;
@@ -885,7 +893,9 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan)
/* now, make another pass to see which nodes are part of chassis */
/* (defined as chassis-nodecount  1) */
for (dist = 0; dist = MAXHOPS;) {
-   for (node = scan-nodesdist[dist]; node; node = node-dnext) {
+   for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = 
node_scan-dnext) {
+   node = node_scan-node;
+
if (mad_get_field(node-info, 0,
  IB_NODE_VENDORID_F) == VTR_VENDOR_ID)
continue;
diff --git a/infiniband-diags/libibnetdisc/src/ibnetdisc.c 
b/infiniband-diags/libibnetdisc/src/ibnetdisc.c
index ffa35e4..283584b 100644
--- 

Re: [infiniband-diags] [PATCH] [2/2] split out scan specific data from ibnd_node_t

2009-11-02 Thread Al Chu
Hi Sasha,

Oops.  I forgot to free the newly created memory.  Here's a new patch.

Al

On Mon, 2009-11-02 at 11:33 -0800, Al Chu wrote:
 Hey Sasha,
 
 This splits out some scan specific data from ibnd_node_t that doesn't
 need to be in the public struct.
 
 Al
 
-- 
Albert Chu
ch...@llnl.gov
Computer Scientist
High Performance Systems Division
Lawrence Livermore National Laboratory
From: Albert Chu ch...@llnl.gov
Date: Thu, 29 Oct 2009 18:59:26 -0700
Subject: [PATCH] split out scan specific data from ibnd_node_t


Signed-off-by: Albert Chu ch...@llnl.gov
---
 .../libibnetdisc/include/infiniband/ibnetdisc.h|2 -
 infiniband-diags/libibnetdisc/src/chassis.c|   18 +--
 infiniband-diags/libibnetdisc/src/ibnetdisc.c  |   51 +---
 infiniband-diags/libibnetdisc/src/internal.h   |8 +++-
 4 files changed, 65 insertions(+), 14 deletions(-)

diff --git a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h 
b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h
index 6120453..f1cb00c 100644
--- a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h
+++ b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h
@@ -48,7 +48,6 @@ struct ibnd_port; /* forward declare */
 typedef struct ibnd_node {
struct ibnd_node *next; /* all node list in fabric */
 
-   ib_portid_t path_portid;/* path from from_node */
int smalid;
int smalmc;
 
@@ -81,7 +80,6 @@ typedef struct ibnd_node {
/* internal use only */
unsigned char ch_found;
struct ibnd_node *htnext;   /* hash table list */
-   struct ibnd_node *dnext;/* nodesdist next */
struct ibnd_node *type_next;/* next based on type */
 } ibnd_node_t;
 
diff --git a/infiniband-diags/libibnetdisc/src/chassis.c 
b/infiniband-diags/libibnetdisc/src/chassis.c
index 15c17d2..3bd0108 100644
--- a/infiniband-diags/libibnetdisc/src/chassis.c
+++ b/infiniband-diags/libibnetdisc/src/chassis.c
@@ -822,6 +822,7 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan)
int chassisnum = 0;
ibnd_chassis_t *chassis;
ibnd_chassis_t *ch, *ch_next;
+   ibnd_node_scan_t *node_scan;
 
scan-first_chassis = NULL;
scan-current_chassis = NULL;
@@ -832,16 +833,21 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan)
/* according to internal connectivity */
/* not very efficient but clear code so... */
for (dist = 0; dist = fabric-maxhops_discovered; dist++)
-   for (node = scan-nodesdist[dist]; node; node = node-dnext)
+   for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = 
node_scan-dnext) {
+   node = node_scan-node;
+
if (mad_get_field(node-info, 0,
  IB_NODE_VENDORID_F) == VTR_VENDOR_ID
 fill_voltaire_chassis_record(node))
goto cleanup;
+   }
 
/* separate every Voltaire chassis from each other and build linked 
list of them */
/* algorithm: catch spine and find all surrounding nodes */
for (dist = 0; dist = fabric-maxhops_discovered; dist++)
-   for (node = scan-nodesdist[dist]; node; node = node-dnext) {
+   for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = 
node_scan-dnext) {
+   node = node_scan-node;
+
if (mad_get_field(node-info, 0,
  IB_NODE_VENDORID_F) != VTR_VENDOR_ID)
continue;
@@ -859,7 +865,9 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan)
/* now make pass on nodes for chassis which are not Voltaire */
/* grouped by common SystemImageGUID */
for (dist = 0; dist = fabric-maxhops_discovered; dist++)
-   for (node = scan-nodesdist[dist]; node; node = node-dnext) {
+   for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = 
node_scan-dnext) {
+   node = node_scan-node;
+
if (mad_get_field(node-info, 0,
  IB_NODE_VENDORID_F) == VTR_VENDOR_ID)
continue;
@@ -885,7 +893,9 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan)
/* now, make another pass to see which nodes are part of chassis */
/* (defined as chassis-nodecount  1) */
for (dist = 0; dist = MAXHOPS;) {
-   for (node = scan-nodesdist[dist]; node; node = node-dnext) {
+   for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = 
node_scan-dnext) {
+   node = node_scan-node;
+
if (mad_get_field(node-info, 0,
  IB_NODE_VENDORID_F) == VTR_VENDOR_ID)
continue;
diff --git 

Re: Crash in bonding

2009-11-02 Thread Or Gerlitz

Pradeep Satyanarayana wrote:
This crash was originally reported against Rhel5.4. However, one can recreate this crash quite easily in OFED-1.5 too. 
I understand that you get the crash when working with the RHEL5.4 
bonding driver, correct? does it happen only with IPoIB devices acting 
as the bonding slaves or also with Ethernet devices? Please note that 
with RHEL 5.4 there's no need to use the ofed provided bonding module, 
more over, I believe that the distro provided one is more stable and 
uptodate in this case. Moving forward, ofed bonding support for newish 
distributions is to be removed. Moni, any reason to support bonding/EL 
5.4 in ofed?


Or.


The steps to recreate the crash are as follows:
1. Run traffic (I used ping) on the IB interfaces through the bond master
2. ifdown ib0
3. ifdown ib1
4. modprobe -r ib_ipoib


--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html