[PATCH] opensm: Multicast root switch calculation
Proposed new algorithm for calculation of root switch for multicast spanning tree. Only edge switches(those connected to hosts) and switches - multicast members themselves are involved in root calculation. This gives improvement, especially on large fabrics, since number of switches usually much less then the number of ports, shared same mcast group. Signed-off-by: Slava Strebkov sla...@voltaire.com --- opensm/include/opensm/osm_switch.h | 14 - opensm/opensm/osm_mcast_mgr.c | 132 2 files changed, 132 insertions(+), 14 deletions(-) diff --git a/opensm/include/opensm/osm_switch.h b/opensm/include/opensm/osm_switch.h index 655491d..6204b37 100644 --- a/opensm/include/opensm/osm_switch.h +++ b/opensm/include/opensm/osm_switch.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. * Copyright (c) 2002-2009 Mellanox Technologies LTD. All rights reserved. * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. * @@ -109,6 +109,9 @@ typedef struct osm_switch { unsigned endport_links; unsigned need_update; void *priv; + cl_map_item_t mcast_item; + uint32_t num_of_mcm; + uint8_t is_mc_member; } osm_switch_t; /* * FIELDS @@ -151,6 +154,15 @@ typedef struct osm_switch { * When set indicates that switch was probably reset, so * fwd tables and rest cached data should be flushed * +* mcast_item +* map item for switch in building mcast tree +* +* num_of_mcm +* number of mcast members(ports) connected to switch +* +* is_mc_member +* whether switch is a mcast member itself +* * SEE ALSO * Switch object */ diff --git a/opensm/opensm/osm_mcast_mgr.c b/opensm/opensm/osm_mcast_mgr.c index 0ee689c..c9c93a2 100644 --- a/opensm/opensm/osm_mcast_mgr.c +++ b/opensm/opensm/osm_mcast_mgr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. * Copyright (c) 2002-2009 Mellanox Technologies LTD. All rights reserved. * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. @@ -203,25 +203,132 @@ static float osm_mcast_mgr_compute_max_hops(osm_sm_t * sm, return (float)max_hops; } +static void mcast_mgr_build_switch_map(osm_sm_t * sm, + const osm_mgrp_t * p_mgrp, + cl_qmap_t *p_mcast_member_sw_tbl) +{ + osm_switch_t*remote_sw; + const osm_mcm_port_t *p_mcm_port; + const cl_qmap_t *p_mcm_tbl; + osm_port_t *p_port; + ib_net64_t port_guid; + osm_physp_t *p_physp_remote; + osm_node_t *remote_node; + + OSM_LOG_ENTER(sm-p_log); + + cl_qmap_init(p_mcast_member_sw_tbl); + p_mcm_tbl = p_mgrp-mcm_port_tbl; + for (p_mcm_port = (osm_mcm_port_t *) cl_qmap_head(p_mcm_tbl); + p_mcm_port != (osm_mcm_port_t *) cl_qmap_end(p_mcm_tbl); + p_mcm_port = (osm_mcm_port_t *) cl_qmap_next(p_mcm_port-map_item)) { + p_port = (osm_port_t*)osm_get_port_by_guid(sm-p_subn, + ib_gid_get_guid(p_mcm_port-port_gid)); + if (!p_port) + continue; + if (osm_node_get_type(p_port-p_node) == IB_NODE_TYPE_CA) { + p_physp_remote = osm_physp_get_remote(p_port-p_physp); + remote_node = osm_physp_get_node_ptr(p_physp_remote); + } + else { + /* for switches - remote switch would be the switch itself*/ + remote_node = osm_physp_get_node_ptr( p_port-p_physp); + } + /* get the remote switch of the mcmember */ + remote_sw = remote_node-sw; + port_guid = osm_node_get_node_guid(remote_node); + if (cl_qmap_get(p_mcast_member_sw_tbl, port_guid) == + cl_qmap_end(p_mcast_member_sw_tbl)) { + /* insert switch to table */ + cl_qmap_insert(p_mcast_member_sw_tbl, port_guid, remote_sw-mcast_item); + /* New element in the table */ + if (osm_node_get_type(p_port-p_node) == IB_NODE_TYPE_CA) { + /* for HCA update the MC count on the remote switch */ + remote_sw-num_of_mcm++; + } + else + remote_sw-is_mc_member = 1; /* the switch is MC memeber */ + } + } + OSM_LOG_EXIT(sm-p_log); +} + +static void mcast_mgr_destroy_switch_map(osm_sm_t * sm, + cl_qmap_t *p_mcast_member_sw_tbl) +{ + cl_map_item_t *p_item; +
Re: [PATCH] infiniband-diags/saquery: Report SA MAD Class specific status.
Sasha Khapyorsky wrote: On 10:09 Sun 01 Nov , Eli Dorfman (Voltaire) wrote: Report SA MAD Class specific status. Fixes wrong error report for SA query status. I agree with patch, but one comment is below. Signed-off-by: Eli Dorfman e...@voltaire.com --- infiniband-diags/src/saquery.c | 41 --- 1 files changed, 37 insertions(+), 4 deletions(-) diff --git a/infiniband-diags/src/saquery.c b/infiniband-diags/src/saquery.c index 6c44b63..71823d5 100644 --- a/infiniband-diags/src/saquery.c +++ b/infiniband-diags/src/saquery.c @@ -124,6 +124,41 @@ int requested_lid_flag = 0; uint64_t requested_guid = 0; int requested_guid_flag = 0; +#define SA_ERR_UNKNOWN IB_SA_MAD_STATUS_PRIO_SUGGESTED + +const char *ib_sa_error_str[] = { +SA_NO_ERROR, +SA_ERR_NO_RESOURCES, +SA_ERR_REQ_INVALID, +SA_ERR_NO_RECORDS, +SA_ERR_TOO_MANY_RECORDS, +SA_ERR_REQ_INVALID_GID, +SA_ERR_REQ_INSUFFICIENT_COMPONENTS, +SA_ERR_REQ_DENIED, +SA_ERR_STATUS_PRIO_SUGGESTED, +SA_ERR_UNKNOWN +}; + +static inline const char *ib_sa_err_str(IN uint8_t status) +{ +if (status SA_ERR_UNKNOWN) +status = SA_ERR_UNKNOWN; +return (ib_sa_error_str[status]); +} + +static inline void report_err(int status) +{ +int st = status 0xff; + +if (st) +fprintf(stderr, ERROR: Query result returned: %s (0x%x)\n, +ib_get_err_str(st), status); +st = status 8; +if (st) +fprintf(stderr, ERROR: Query result returned: %s (0x%x)\n, +ib_sa_err_str(st), status); Such two identical messages with different error strings seems confusing to me. Wouldn't it be better to merge it in a single line, like: ERROR: Query result returned 0x: SM blah1 , SA blah2 (or similar), with making each part optional. I agree. Is it possible according to the spec to have both SM and SA (i don't think so) Eli Sasha +} + static int sa_query(struct bind_handle *h, uint8_t method, uint16_t attr, uint32_t mod, uint64_t comp_mask, uint64_t sm_key, void *data) @@ -794,8 +829,7 @@ static int get_any_records(bind_handle_t h, } if (result.status != IB_SUCCESS) { -fprintf(stderr, Query result returned: %s\n, -ib_get_err_str(result.status)); +report_err(result.status); return result.status; } @@ -1009,8 +1043,7 @@ static int get_print_class_port_info(bind_handle_t h) return ret; } if (result.status != IB_SUCCESS) { -fprintf(stderr, ERROR: Query result returned: %s\n, -ib_get_err_str(result.status)); +report_err(result.status); return (result.status); } dump_results(result, dump_class_port_info); -- 1.5.5 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] infiniband-diags/saquery: Report SA MAD Class specific status.
Report SA MAD Class specific status. In addition to SM status. Signed-off-by: Eli Dorfman e...@voltaire.com --- infiniband-diags/src/saquery.c | 45 --- 1 files changed, 41 insertions(+), 4 deletions(-) diff --git a/infiniband-diags/src/saquery.c b/infiniband-diags/src/saquery.c index 6c44b63..9495cd9 100644 --- a/infiniband-diags/src/saquery.c +++ b/infiniband-diags/src/saquery.c @@ -124,6 +124,45 @@ int requested_lid_flag = 0; uint64_t requested_guid = 0; int requested_guid_flag = 0; +#define SA_ERR_UNKNOWN IB_SA_MAD_STATUS_PRIO_SUGGESTED + +const char *ib_sa_error_str[] = { + SA_NO_ERROR, + SA_ERR_NO_RESOURCES, + SA_ERR_REQ_INVALID, + SA_ERR_NO_RECORDS, + SA_ERR_TOO_MANY_RECORDS, + SA_ERR_REQ_INVALID_GID, + SA_ERR_REQ_INSUFFICIENT_COMPONENTS, + SA_ERR_REQ_DENIED, + SA_ERR_STATUS_PRIO_SUGGESTED, + SA_ERR_UNKNOWN +}; + +static inline const char *ib_sa_err_str(IN uint8_t status) +{ + if (status SA_ERR_UNKNOWN) + status = SA_ERR_UNKNOWN; + return (ib_sa_error_str[status]); +} + +static inline void report_err(int status) +{ + int st = status 0xff; + char sm_err_str[64] = { 0 }; + char sa_err_str[64] = { 0 }; + + if (st) + sprintf(sm_err_str, SM(%s), ib_get_err_str(st)); + + st = status 8; + if (st) + sprintf(sa_err_str, SA(%s), ib_sa_err_str(st)); + + fprintf(stderr, ERROR: Query result returned 0x%04x, %s%s\n, + status, sm_err_str, sa_err_str); +} + static int sa_query(struct bind_handle *h, uint8_t method, uint16_t attr, uint32_t mod, uint64_t comp_mask, uint64_t sm_key, void *data) @@ -794,8 +833,7 @@ static int get_any_records(bind_handle_t h, } if (result.status != IB_SUCCESS) { - fprintf(stderr, Query result returned: %s\n, - ib_get_err_str(result.status)); + report_err(result.status); return result.status; } @@ -1009,8 +1047,7 @@ static int get_print_class_port_info(bind_handle_t h) return ret; } if (result.status != IB_SUCCESS) { - fprintf(stderr, ERROR: Query result returned: %s\n, - ib_get_err_str(result.status)); + report_err(result.status); return (result.status); } dump_results(result, dump_class_port_info); -- 1.5.5 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/3] opensm SA DB dump/restore: SA DB persistence
Hi Sasha, OSM can dump SA DB, but it is done every heavy sweep, and only when running with high verbosity, which cannot be used in production. OSM can also load SA DB from file, but then it also stays in this static SA configuration. The following patch series improves SA DB persistence: 1/3: Added option to load SA DB only on the first master heavy sweep. After that SA goes to the usual (dynamic) mode of operation 2/3: Added option to dump SA DB on every sweep (both light and heavy), regardless of the verbosity level. 3/3: Optimize SA DB dumping: added 'dirty' flag to the SA struct to denote that SA DB was modified since the last dump. SA DB will be dumped only if the DB is dirty. -- Yevgeny -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/3] opensm SA DB dump/restore: added option to load SA DB once
Added option to load SA DB once: 'sa_db_load_once'. This will cause OSM to load SA DB once during first master heavy sweep, and then OSM will move to the usual SA mode. The option is not exposed through OSM command line, but only through options file. Signed-off-by: Yevgeny Kliteynik klit...@dev.mellanox.co.il --- opensm/include/opensm/osm_subnet.h |5 + opensm/opensm/osm_sa.c | 20 +++- opensm/opensm/osm_subnet.c |7 +++ 3 files changed, 31 insertions(+), 1 deletions(-) diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h index 0302f91..871a833 100644 --- a/opensm/include/opensm/osm_subnet.h +++ b/opensm/include/opensm/osm_subnet.h @@ -200,6 +200,7 @@ typedef struct osm_subn_opt { char *ids_guid_file; char *guid_routing_order_file; char *sa_db_file; + boolean_t sa_db_load_once; boolean_t do_mesh_analysis; boolean_t exit_on_fatal; boolean_t honor_guid2lid_file; @@ -411,6 +412,10 @@ typedef struct osm_subn_opt { * sa_db_file * Name of the SA database file. * +* sa_db_load_once +* When TRUE causes sa_db_file to be loaded only at the +* first master sweep. +* * exit_on_fatal * If TRUE (default) - SM will exit on fatal subnet initialization * issues. diff --git a/opensm/opensm/osm_sa.c b/opensm/opensm/osm_sa.c index 4988dec..a5eb796 100644 --- a/opensm/opensm/osm_sa.c +++ b/opensm/opensm/osm_sa.c @@ -924,6 +924,12 @@ int osm_sa_db_file_load(osm_opensm_t * p_osm) return 0; } + if (p_osm-subn.opt.sa_db_load_once !p_osm-subn.first_time_master_sweep) { + OSM_LOG(p_osm-log, OSM_LOG_VERBOSE, + Not first sweep - skip SA DB restore\n); + return 0; + } + file = fopen(file_name, r); if (!file) { OSM_LOG(p_osm-log, OSM_LOG_ERROR | OSM_LOG_SYS, ERR 4C02: @@ -932,6 +938,10 @@ int osm_sa_db_file_load(osm_opensm_t * p_osm) return -1; } + OSM_LOG(p_osm-log, OSM_LOG_VERBOSE, + Restoring SA DB from file \'%s\'\n, + file_name); + lineno = 0; while (fgets(line, sizeof(line) - 1, file) != NULL) { @@ -1108,7 +1118,15 @@ int osm_sa_db_file_load(osm_opensm_t * p_osm) } } - if (!rereg_clients) + /* +* If restoring SA DB is required only once, SM should go +* into the usual mode right after that, which means that +* client re-registration should be required even after +* the restore - there is a chance that OSM died right after +* some MCMember joined MCast group, and his membership +* didn't make it into the SA DB file. +*/ + if (!p_osm-subn.opt.sa_db_load_once !rereg_clients) p_osm-subn.opt.no_clients_rereg = TRUE; _error: diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c index dde83e1..e9cfe9c 100644 --- a/opensm/opensm/osm_subnet.c +++ b/opensm/opensm/osm_subnet.c @@ -348,6 +348,7 @@ static const opt_rec_t opt_tbl[] = { { ids_guid_file, OPT_OFFSET(ids_guid_file), opts_parse_charp, NULL, 0 }, { guid_routing_order_file, OPT_OFFSET(guid_routing_order_file), opts_parse_charp, NULL, 0 }, { sa_db_file, OPT_OFFSET(sa_db_file), opts_parse_charp, NULL, 0 }, + { sa_db_load_once, OPT_OFFSET(sa_db_load_once), opts_parse_boolean, NULL, 1 }, { do_mesh_analysis, OPT_OFFSET(do_mesh_analysis), opts_parse_boolean, NULL, 1 }, { exit_on_fatal, OPT_OFFSET(exit_on_fatal), opts_parse_boolean, NULL, 1 }, { honor_guid2lid_file, OPT_OFFSET(honor_guid2lid_file), opts_parse_boolean, NULL, 1 }, @@ -766,6 +767,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * p_opt) p_opt-ids_guid_file = NULL; p_opt-guid_routing_order_file = NULL; p_opt-sa_db_file = NULL; + p_opt-sa_db_load_once = FALSE; p_opt-do_mesh_analysis = FALSE; p_opt-exit_on_fatal = TRUE; p_opt-enable_quirks = FALSE; @@ -1478,6 +1480,11 @@ int osm_subn_output_conf(FILE *out, IN osm_subn_opt_t * p_opts) p_opts-sa_db_file ? p_opts-sa_db_file : null_str); fprintf(out, + # If TRUE causes SA database to be loaded only at\n + # the first master sweep\nsa_db_load_once %s\n\n, + p_opts-sa_db_load_once ? TRUE : FALSE); + + fprintf(out, #\n# HANDOVER - MULTIPLE SMs OPTIONS\n#\n # SM priority used for deciding who is the master\n # Range goes from 0 (lowest priority) to 15 (highest).\n -- 1.5.1.4 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/3] opensm SA DB dump/restore: added option to dump SA DB on every sweep
Added option to dump SA DB at every sweep (both heavy and light): 'sa_db_dump'. The option is not exposed through OSM command line, but only through options file. Signed-off-by: Yevgeny Kliteynik klit...@dev.mellanox.co.il --- opensm/include/opensm/osm_subnet.h |5 + opensm/opensm/osm_state_mgr.c |5 - opensm/opensm/osm_subnet.c |8 3 files changed, 17 insertions(+), 1 deletions(-) diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h index 871a833..7bc59f8 100644 --- a/opensm/include/opensm/osm_subnet.h +++ b/opensm/include/opensm/osm_subnet.h @@ -201,6 +201,7 @@ typedef struct osm_subn_opt { char *guid_routing_order_file; char *sa_db_file; boolean_t sa_db_load_once; + boolean_t sa_db_dump; boolean_t do_mesh_analysis; boolean_t exit_on_fatal; boolean_t honor_guid2lid_file; @@ -416,6 +417,10 @@ typedef struct osm_subn_opt { * When TRUE causes sa_db_file to be loaded only at the * first master sweep. * +* sa_db_dump +* When TRUE causes OpenSM to dump SA DB at the end of every +* light sweep regardless the current verbosity level. +* * exit_on_fatal * If TRUE (default) - SM will exit on fatal subnet initialization * issues. diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c index 315c83e..ef88de4 100644 --- a/opensm/opensm/osm_state_mgr.c +++ b/opensm/opensm/osm_state_mgr.c @@ -1090,6 +1090,8 @@ static void do_sweep(osm_sm_t * sm) if (wait_for_pending_transactions(sm-p_subn-p_osm-stats)) return; if (!sm-p_subn-force_heavy_sweep) { + if (sm-p_subn-opt.sa_db_dump) + osm_sa_db_file_dump(sm-p_subn-p_osm); OSM_LOG_MSG_BOX(sm-p_log, OSM_LOG_VERBOSE, LIGHT SWEEP COMPLETE); return; @@ -1354,7 +1356,8 @@ _repeat_discovery: state_mgr_up_msg(sm); sm-p_subn-first_time_master_sweep = FALSE; - if (osm_log_is_active(sm-p_log, OSM_LOG_VERBOSE)) + if (osm_log_is_active(sm-p_log, OSM_LOG_VERBOSE) || + sm-p_subn-opt.sa_db_dump) osm_sa_db_file_dump(sm-p_subn-p_osm); } diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c index e9cfe9c..8f35a57 100644 --- a/opensm/opensm/osm_subnet.c +++ b/opensm/opensm/osm_subnet.c @@ -349,6 +349,7 @@ static const opt_rec_t opt_tbl[] = { { guid_routing_order_file, OPT_OFFSET(guid_routing_order_file), opts_parse_charp, NULL, 0 }, { sa_db_file, OPT_OFFSET(sa_db_file), opts_parse_charp, NULL, 0 }, { sa_db_load_once, OPT_OFFSET(sa_db_load_once), opts_parse_boolean, NULL, 1 }, + { sa_db_dump, OPT_OFFSET(sa_db_dump), opts_parse_boolean, NULL, 1 }, { do_mesh_analysis, OPT_OFFSET(do_mesh_analysis), opts_parse_boolean, NULL, 1 }, { exit_on_fatal, OPT_OFFSET(exit_on_fatal), opts_parse_boolean, NULL, 1 }, { honor_guid2lid_file, OPT_OFFSET(honor_guid2lid_file), opts_parse_boolean, NULL, 1 }, @@ -768,6 +769,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * p_opt) p_opt-guid_routing_order_file = NULL; p_opt-sa_db_file = NULL; p_opt-sa_db_load_once = FALSE; + p_opt-sa_db_dump = FALSE; p_opt-do_mesh_analysis = FALSE; p_opt-exit_on_fatal = TRUE; p_opt-enable_quirks = FALSE; @@ -1485,6 +1487,12 @@ int osm_subn_output_conf(FILE *out, IN osm_subn_opt_t * p_opts) p_opts-sa_db_load_once ? TRUE : FALSE); fprintf(out, + # If TRUE causes OpenSM to dump SA database at the end of\n + # every light sweep, regardless of the verbosity level\n + sa_db_dump %s\n\n, + p_opts-sa_db_dump ? TRUE : FALSE); + + fprintf(out, #\n# HANDOVER - MULTIPLE SMs OPTIONS\n#\n # SM priority used for deciding who is the master\n # Range goes from 0 (lowest priority) to 15 (highest).\n -- 1.5.1.4 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Return single PathRecord for SubnAdmGet when SGID and/or DGID
Hi Eli, On 13:40 Fri 30 Oct , Eli Dorfman wrote: From: Eli Dorfman e...@voltaire.com Please add descriptive change log. It is hard (for me) to just remember an issue in all details. Signed-off-by: Eli Dorfman e...@voltaire.com --- opensm/opensm/osm_sa_path_record.c | 38 --- 1 files changed, 26 insertions(+), 12 deletions(-) diff --git a/opensm/opensm/osm_sa_path_record.c b/opensm/opensm/osm_sa_path_record.c index f36eb46..0c6621b 100644 --- a/opensm/opensm/osm_sa_path_record.c +++ b/opensm/opensm/osm_sa_path_record.c @@ -890,7 +890,7 @@ Exit: /** **/ -static void pr_rcv_get_port_pair_paths(IN osm_sa_t * sa, +static int pr_rcv_get_port_pair_paths(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, IN const osm_port_t * p_req_port, IN const osm_port_t * p_src_port, @@ -908,7 +908,7 @@ static void pr_rcv_get_port_pair_paths(IN osm_sa_t * sa, uint16_t dest_lid_max_ho; uint16_t src_lid_ho; uint16_t dest_lid_ho; - uint32_t path_num; + uint32_t path_num = 0; It is reinitialized later as: path_num = cl_qlist_count(p_list); , one of them is not needed. uint8_t preference; uintn_t iterations; uintn_t src_offset; @@ -1019,7 +1019,7 @@ static void pr_rcv_get_port_pair_paths(IN osm_sa_t * sa, Preferred paths come first in OpenSM */ preference = 0; - path_num = 0; + path_num = cl_qlist_count(p_list); Is this correct? In this way pr_rcv_get_port_pair_paths() will return a total number of PRs collected in previous calls too (not for just specific source/destination call). No? /* If SubnAdmGet, assume NumbPaths 1 (1.2 erratum) */ if (p_sa_mad-method != IB_MAD_METHOD_GET) @@ -,6 +,7 @@ static void pr_rcv_get_port_pair_paths(IN osm_sa_t * sa, Exit: OSM_LOG_EXIT(sa-p_log); + return path_num; } /** @@ -1314,6 +1315,8 @@ static void pr_rcv_process_world(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, const cl_qmap_t *p_tbl; const osm_port_t *p_dest_port; const osm_port_t *p_src_port; + const ib_sa_mad_t *p_sa_mad; + int num_paths = 0; OSM_LOG_ENTER(sa-p_log); @@ -1326,14 +1329,17 @@ static void pr_rcv_process_world(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, any check to determine the reversability of the paths. */ p_tbl = sa-p_subn-port_guid_tbl; + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); p_dest_port = (osm_port_t *) cl_qmap_head(p_tbl); while (p_dest_port != (osm_port_t *) cl_qmap_end(p_tbl)) { p_src_port = (osm_port_t *) cl_qmap_head(p_tbl); while (p_src_port != (osm_port_t *) cl_qmap_end(p_tbl)) { - pr_rcv_get_port_pair_paths(sa, p_madw, requester_port, -p_src_port, p_dest_port, -p_dgid, comp_mask, p_list); + num_paths += pr_rcv_get_port_pair_paths(sa, p_madw, requester_port, + p_src_port, p_dest_port, + p_dgid, comp_mask, p_list); + if (p_sa_mad-method == IB_MAD_METHOD_GET num_paths 1) + return; So it will return with num_paths 1. Then wouldn't an error (too many records) be generated by osm_sa_respond() (just similar to as it is now)? I guess so. So shouldn't here be something like: if (p_sa_mad-method == IB_MAD_METHOD_GET cl_qlist_count(p_list) = 1) break; (, and then you don't need to bother with num_paths in pr_rcv_get_port_pair_paths())? p_src_port = (osm_port_t *) cl_qmap_next(p_src_port-map_item); @@ -1358,6 +1364,8 @@ static void pr_rcv_process_half(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, { const cl_qmap_t *p_tbl; const osm_port_t *p_port; + const ib_sa_mad_t *p_sa_mad; + int num_paths = 0; OSM_LOG_ENTER(sa-p_log); @@ -1367,6 +1375,7 @@ static void pr_rcv_process_half(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, need to special case that one. */ p_tbl = sa-p_subn-port_guid_tbl; + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); if (p_src_port) { /* @@ -1374,9 +1383,11 @@ static void pr_rcv_process_half(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, */ p_port = (osm_port_t *) cl_qmap_head(p_tbl);
Re: [PATCH] opensm/osm_ucast_cache.c: Eliminate unneeded define
On Thu, Oct 22, 2009 at 11:01 AM, Sasha Khapyorsky sas...@voltaire.com wrote: On 08:50 Wed 21 Oct , Hal Rosenstock wrote: Also, cosmetic change to OSM_LOG message Please don't mix. I used Unicast Cache (with upper case starting letters) as for a name. A nit but any reason for that one message being inconsistent (in terms of case) with the other log messages there ? -- Hal Signed-off-by: Hal Rosenstock hal.rosenst...@gmail.com Applied with changes. Thanks. Sasha -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] opensm: Return single PathRecord for SubnAdmGet with DGID/SGID wild carded
Return single PathRecord for SubnAdmGet with DGID/SGID wildcarded Instead of iterating over all ports in the fabric and returning an error (TOO_MANY_RECORDS), when SGID and/or DGID are wild carded return only single PathRecord. Signed-off-by: Eli Dorfman e...@voltaire.com --- opensm/opensm/osm_sa_path_record.c | 14 ++ 1 files changed, 14 insertions(+), 0 deletions(-) diff --git a/opensm/opensm/osm_sa_path_record.c b/opensm/opensm/osm_sa_path_record.c index c2ef8c5..b3e1072 100644 --- a/opensm/opensm/osm_sa_path_record.c +++ b/opensm/opensm/osm_sa_path_record.c @@ -1313,6 +1313,7 @@ static void pr_rcv_process_world(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, const cl_qmap_t *p_tbl; const osm_port_t *p_dest_port; const osm_port_t *p_src_port; + const ib_sa_mad_t *p_sa_mad; OSM_LOG_ENTER(sa-p_log); @@ -1325,6 +1326,7 @@ static void pr_rcv_process_world(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, any check to determine the reversability of the paths. */ p_tbl = sa-p_subn-port_guid_tbl; + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); p_dest_port = (osm_port_t *) cl_qmap_head(p_tbl); while (p_dest_port != (osm_port_t *) cl_qmap_end(p_tbl)) { @@ -1333,6 +1335,9 @@ static void pr_rcv_process_world(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, pr_rcv_get_port_pair_paths(sa, p_madw, requester_port, p_src_port, p_dest_port, p_dgid, comp_mask, p_list); + if (p_sa_mad-method == IB_MAD_METHOD_GET + cl_qlist_count(p_list) 0) + goto Exit; p_src_port = (osm_port_t *) cl_qmap_next(p_src_port-map_item); @@ -1342,6 +1347,7 @@ static void pr_rcv_process_world(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, (osm_port_t *) cl_qmap_next(p_dest_port-map_item); } +Exit: OSM_LOG_EXIT(sa-p_log); } @@ -1357,6 +1363,7 @@ static void pr_rcv_process_half(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, { const cl_qmap_t *p_tbl; const osm_port_t *p_port; + const ib_sa_mad_t *p_sa_mad; OSM_LOG_ENTER(sa-p_log); @@ -1366,6 +1373,7 @@ static void pr_rcv_process_half(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, need to special case that one. */ p_tbl = sa-p_subn-port_guid_tbl; + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); if (p_src_port) { /* @@ -1376,6 +1384,9 @@ static void pr_rcv_process_half(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, pr_rcv_get_port_pair_paths(sa, p_madw, requester_port, p_src_port, p_port, p_dgid, comp_mask, p_list); + if (p_sa_mad-method == IB_MAD_METHOD_GET + cl_qlist_count(p_list) 0) + break; p_port = (osm_port_t *) cl_qmap_next(p_port-map_item); } } else { @@ -1387,6 +1398,9 @@ static void pr_rcv_process_half(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, pr_rcv_get_port_pair_paths(sa, p_madw, requester_port, p_port, p_dest_port, p_dgid, comp_mask, p_list); + if (p_sa_mad-method == IB_MAD_METHOD_GET + cl_qlist_count(p_list) 0) + break; p_port = (osm_port_t *) cl_qmap_next(p_port-map_item); } } -- 1.5.5 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] opensm/osm_ucast_cache.c: Eliminate unneeded define
On 11:31 Mon 02 Nov , Hal Rosenstock wrote: A nit but any reason for that one message being inconsistent (in terms of case) with the other log messages there ? Other messages are debug printouts and cannot be compared. Sasha -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] infiniband-diags/perfquery.c: Fix extended counter reset mask
On 11:23 Mon 02 Nov , Hal Rosenstock wrote: On Mon, Nov 2, 2009 at 9:03 AM, Sasha Khapyorsky sas...@voltaire.com wrote: On 18:16 Fri 30 Oct , Hal Rosenstock wrote: Because a PMA implementation which follows the IBA recommendation will reject this Hmm, where did you find such recommendation where stated that if reserved bits are not '0' the request should be rejected (I would rather expect ignoring of those values at all)? I sent a subsequent email indicating there was an errata on this and that these changes are needed. Let me understand correctly. Do you mean that proposed patch is not needed? Sasha -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] infiniband-diags/perfquery.c: Fix extended counter reset mask
On Mon, Nov 2, 2009 at 12:13 PM, Sasha Khapyorsky sas...@voltaire.com wrote: On 11:23 Mon 02 Nov , Hal Rosenstock wrote: On Mon, Nov 2, 2009 at 9:03 AM, Sasha Khapyorsky sas...@voltaire.com wrote: On 18:16 Fri 30 Oct , Hal Rosenstock wrote: Because a PMA implementation which follows the IBA recommendation will reject this Hmm, where did you find such recommendation where stated that if reserved bits are not '0' the request should be rejected (I would rather expect ignoring of those values at all)? I sent a subsequent email indicating there was an errata on this and that these changes are needed. Let me understand correctly. Do you mean that proposed patch is not needed? Yes, it's not strictly required and you typically nix those sorts of things. -- Hal Sasha -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] opensm/osm_ucast_cache.c: Eliminate unneeded define
On Mon, Nov 2, 2009 at 12:01 PM, Sasha Khapyorsky sas...@voltaire.com wrote: On 11:31 Mon 02 Nov , Hal Rosenstock wrote: A nit but any reason for that one message being inconsistent (in terms of case) with the other log messages there ? Other messages are debug printouts and cannot be compared. That seems pretty arbitrary to me. I don't think that's the case with different levels of printing elsewhere. -- Hal Sasha -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH] librdmacm/mckey: enforce local binding for unmapped multicast addresses
Sean, using unmapped multicast addresses I see that a different broacast group is created by the SM such that mckey doesn't manage to join the ipv4 broadcast group $ ./mckey -M ff12:401b::0:0:0:: -b 10.10.5.62 -p 0x2 Unmapped multicast groups only support the case where the SA has created the group with the MGID undefined. The MGID must be in this format: 0xff1 scope 0xA01B (see figure 196 on page 928 of the spec). The kernel checks for this specific address format to see if it needs to convert the address or not. (The original feature request for this came from the US national labs, which wanted the ability to create a group a get back a unique group ID.) the latter sets the lower 32 bits for this mgid, any idea what can be done here? I thought about this, and once support for AF_IB is added, then the format of the address becomes clear and the full range of unmapped addresses becomes available. I'll add your patch into my git tree - thanks. - Sean -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] opensm: Return single PathRecord for SubnAdmGet with DGID/SGID wild carded
On 17:59 Mon 02 Nov , elid wrote: Return single PathRecord for SubnAdmGet with DGID/SGID wildcarded Instead of iterating over all ports in the fabric and returning an error (TOO_MANY_RECORDS), when SGID and/or DGID are wild carded return only single PathRecord. Signed-off-by: Eli Dorfman e...@voltaire.com Applied. Thanks. Sasha -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] opensm/mcast_tbl: set max_mlid_ho as actually configured mlid
On 12:18 Mon 02 Nov , Hal Rosenstock wrote: Yet another (likely even more efficient) approach would be memset()ing MFTs in realloc function above requested mlid_offset, then we will be able to remove osm_mcast_tbl_clear_mlid() completely. Isn't mlid clearing done on a per mlid basis rather than based on above some mlid (offset) ? Also and perhaps more significantly, an mlid can be removed in the middle of a range of mlids. So I don't see how clear_mlid can be removed. Yes, correct, we cannot remove clear_mlid completely. Sasha -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] opensm/mcast_tbl: set max_mlid_ho as actually configured mlid
On 12:44 Mon 02 Nov , Hal Rosenstock wrote: clear_mlid could be implemented with memset rather than loop. Yes, memset() would be better. Sasha -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Can we get daily digests back?
Is there any way to recreate the roll-up digests that we used to get on this mailing list? -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] librdmacm/mckey: enforce local binding for unmapped multicast addresses
On Sun, Nov 01, 2009 at 11:31:22AM +0200, Or Gerlitz wrote: Sean, using unmapped multicast addresses I see that a different broacast group is created by the SM such that mckey doesn't manage to join the ipv4 broadcast group $ ./mckey -M ff12:401b::0:0:0:: -b 10.10.5.62 -p 0x2 mckey: joined dgid: ff12:401b::: mlid c00b sl 0 Erm, I'm not sure what is going on by the time things get to the SA, but the above output shows this kernel bug. The joined DGID for that AF_INET6 address should be FF12:601b::: The AF_INET6 -M argument to mckey is not treated as a MGID unless it is prefixed with FF10:A01B:: (so ugly..) If you want to join the IPv4 all hosts group I think you need to use -M 255.255.255.255 Your result does show that something else is wrong too, the group with MLID 0xC00B should have been MGID ff12:401b::: like mckey reported .. From 9f3a76deb5bfda0f8243eadfa024eb547c03f583 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe jguntho...@obsidianresearch.com Date: Mon, 2 Nov 2009 11:23:38 -0700 Subject: [PATCH] RDMA CM: Fix AF_INET6 support in multicast joining If joining to an AF_INET6 address we need to map the address to a MGID in the same way as the IP stack. The old code would just fall through to the IPv4 case and generate garbage. Signed-off-by: Jason Gunthorpe jguntho...@obsidianresearch.com --- drivers/infiniband/core/cma.c |5 + 1 files changed, 5 insertions(+), 0 deletions(-) compile tested only. diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index a0fa241..1e9a78a 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2724,6 +2724,11 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, 0xFF10A01B)) { /* IPv6 address is an SA assigned MGID. */ memcpy(mgid, sin6-sin6_addr, sizeof *mgid); + } else if ((addr-sa_family == AF_INET6)) { + ipv6_ib_mc_map(sin6-sin6_addr, dev_addr-broadcast, mc_map); + if (id_priv-id.ps == RDMA_PS_UDP) + mc_map[7] = 0x01; /* Use RDMA CM signature */ + *mgid = *(union ib_gid *) (mc_map + 4); } else { ip_ib_mc_map(sin-sin_addr.s_addr, dev_addr-broadcast, mc_map); if (id_priv-id.ps == RDMA_PS_UDP) -- 1.5.4.2 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
QoS in local SA entity: was rdma/cm: support option to allow manually setting IB path
Before enhancing the rdma-cm to support the full feature set of the IB CM, something which I personally don't see the actual need for (but I will be happy to get educated what applications will or can migrate to rdma-cm once this is implemented), how about trying to allow for reduced QoS scheme also when the entity that resolved this patch didn't consulted with the SA? I think this really needs to be discussed wrt the implementation of the entity providing the path records. IB QoS is based on the query providing the SGID, DGID, PKEY, SID, TOS tuple and the SA returning a SLID, DLID, SL, MTU, QoS tuple. Now I'd like to see how can we let the application / querying middleware to take advantage of the knowledge on what partition it runs and use the SL associated with the IPv4 (e.g AF_INET rdma-cm ID's) IPoIB broadcast group. This way, one can still program a QoS scheme at the SA which is based on partitions. I think what's needed is a way for the SA to distribute QoS information to the end nodes, so that the decisions can be made locally. If someone wants some sort of dynamic QoS management and is happy using a small cluster, then they can disable any local SA entities and contact the SA directly. In the case of ACM, the pkey is embedded in the MGID. 'Something' could tell the SA to create ACM multicast groups using a specific SL for a given MGID or pkey in the join request. That SL would be distributed to the end nodes when they joined their groups. Looking on mckey, the user space code (e.g ACM), could just do rdma_bind to an IP address of an IPoIB NIC that uses this partition and then rdma_join to an unmapped multicast address which correspond to the broadcast group, take the SL and leave the group, makes sense? The entity that provides the path records cannot depend on calling into the librdmacm. The dependency needs to go the other way. - Sean -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] opensm/osm_mcast_tbl: Fix size of port mask table array
Should be IB_MCAST_POSITION_MAX + 1 rather than just IB_MCAST_POSITION_MAX Signed-off-by: Hal Rosenstock hal.rosenst...@gmail.com --- diff --git a/opensm/include/opensm/osm_mcast_tbl.h b/opensm/include/opensm/osm_mcast_tbl.h index 6d3f083..0745b5b 100644 --- a/opensm/include/opensm/osm_mcast_tbl.h +++ b/opensm/include/opensm/osm_mcast_tbl.h @@ -76,7 +76,7 @@ typedef struct osm_mcast_fwdbl { uint16_t num_entries; uint16_t max_mlid_ho; uint16_t mft_depth; - uint16_t(*p_mask_tbl)[][IB_MCAST_POSITION_MAX]; + uint16_t(*p_mask_tbl)[][IB_MCAST_POSITION_MAX + 1]; } osm_mcast_tbl_t; /* * FIELDS @@ -106,7 +106,7 @@ typedef struct osm_mcast_fwdbl { * * p_mask_tbl * Pointer to a two dimensional array of port_masks for this switch. -* The first dimension is MLID, the second dimension is mask position. +* The first dimension is MLID offset, second dimension is mask position. * This pointer is null for switches that do not support multicast. * * SEE ALSO diff --git a/opensm/opensm/osm_mcast_tbl.c b/opensm/opensm/osm_mcast_tbl.c index 818f2e0..c2c5395 100644 --- a/opensm/opensm/osm_mcast_tbl.c +++ b/opensm/opensm/osm_mcast_tbl.c @@ -119,7 +119,7 @@ void osm_mcast_tbl_set(IN osm_mcast_tbl_t * p_tbl, IN uint16_t mlid_ho, int osm_mcast_tbl_realloc(IN osm_mcast_tbl_t * p_tbl, IN uintn_t mlid_offset) { size_t mft_depth, size; - uint16_t (*p_mask_tbl)[][IB_MCAST_POSITION_MAX]; + uint16_t (*p_mask_tbl)[][IB_MCAST_POSITION_MAX + 1]; if (mlid_offset p_tbl-mft_depth) goto done; -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[infiniband-diags] [PATCH] [1/2] remove ibnd_update_node
Hey Sasha, After talking to Ira about it, we think it's best remove ibnd_update_node. A) It's not being used. B) It probably not implemented properly C) Some of Ira's original plans for its use require more API functions to be added, which of course isn't done right now. So for now, it seems best to just remove it since it's an additional API function that can lead to confusion. Al -- Albert Chu ch...@llnl.gov Computer Scientist High Performance Systems Division Lawrence Livermore National Laboratory From: Albert Chu ch...@llnl.gov Date: Thu, 29 Oct 2009 18:56:32 -0700 Subject: [PATCH] remove ibnd_update_node Signed-off-by: Albert Chu ch...@llnl.gov --- .../libibnetdisc/include/infiniband/ibnetdisc.h|3 - infiniband-diags/libibnetdisc/src/ibnetdisc.c | 55 2 files changed, 0 insertions(+), 58 deletions(-) diff --git a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h index bb5df02..6120453 100644 --- a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h +++ b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h @@ -172,9 +172,6 @@ MAD_EXPORT void ibnd_destroy_fabric(ibnd_fabric_t * fabric); MAD_EXPORT ibnd_node_t *ibnd_find_node_guid(ibnd_fabric_t * fabric, uint64_t guid); MAD_EXPORT ibnd_node_t *ibnd_find_node_dr(ibnd_fabric_t * fabric, char *dr_str); -MAD_EXPORT ibnd_node_t *ibnd_update_node(struct ibmad_port *ibmad_port, -ibnd_fabric_t * fabric, -ibnd_node_t * node); typedef void (*ibnd_iter_node_func_t) (ibnd_node_t * node, void *user_data); MAD_EXPORT void ibnd_iter_nodes(ibnd_fabric_t * fabric, diff --git a/infiniband-diags/libibnetdisc/src/ibnetdisc.c b/infiniband-diags/libibnetdisc/src/ibnetdisc.c index ebc45ba..ffa35e4 100644 --- a/infiniband-diags/libibnetdisc/src/ibnetdisc.c +++ b/infiniband-diags/libibnetdisc/src/ibnetdisc.c @@ -264,61 +264,6 @@ static int _check_ibmad_port(struct ibmad_port *ibmad_port) return 0; } -ibnd_node_t *ibnd_update_node(struct ibmad_port * ibmad_port, - ibnd_fabric_t * fabric, ibnd_node_t * node) -{ - char portinfo_port0[IB_SMP_DATA_SIZE]; - void *nd = node-nodedesc; - int p = 0; - - if (_check_ibmad_port(ibmad_port) 0) - return NULL; - - if (!fabric) { - IBND_DEBUG(fabric parameter NULL\n); - return NULL; - } - - if (!node) { - IBND_DEBUG(node parameter NULL\n); - return NULL; - } - - if (query_node_info(ibmad_port, fabric, node, (node-path_portid))) - return NULL; - - if (!smp_query_via(nd, (node-path_portid), IB_ATTR_NODE_DESC, 0, 0, - ibmad_port)) - return NULL; - - /* update all the port info's */ - for (p = 1; p = node-numports; p++) { - get_port_info(ibmad_port, fabric, node-ports[p], - p, (node-path_portid)); - } - - if (node-type != IB_NODE_SWITCH) - goto done; - - if (!smp_query_via - (portinfo_port0, (node-path_portid), IB_ATTR_PORT_INFO, 0, 0, -ibmad_port)) - return NULL; - - node-smalid = mad_get_field(portinfo_port0, 0, IB_PORT_LID_F); - node-smalmc = mad_get_field(portinfo_port0, 0, IB_PORT_LMC_F); - - if (!smp_query_via(node-switchinfo, (node-path_portid), - IB_ATTR_SWITCH_INFO, 0, 0, ibmad_port)) - node-smaenhsp0 = 0;/* assume base SP0 */ - else - mad_decode_field(node-switchinfo, IB_SW_ENHANCED_PORT0_F, -node-smaenhsp0); - -done: - return node; -} - ibnd_node_t *ibnd_find_node_dr(ibnd_fabric_t * fabric, char *dr_str) { int i = 0; -- 1.5.4.5
[infiniband-diags] [PATCH] [2/2] split out scan specific data from ibnd_node_t
Hey Sasha, This splits out some scan specific data from ibnd_node_t that doesn't need to be in the public struct. Al -- Albert Chu ch...@llnl.gov Computer Scientist High Performance Systems Division Lawrence Livermore National Laboratory From: Albert Chu ch...@llnl.gov Date: Thu, 29 Oct 2009 18:59:26 -0700 Subject: [PATCH] split out scan specific data from ibnd_node_t Signed-off-by: Albert Chu ch...@llnl.gov --- .../libibnetdisc/include/infiniband/ibnetdisc.h|2 - infiniband-diags/libibnetdisc/src/chassis.c| 18 -- infiniband-diags/libibnetdisc/src/ibnetdisc.c | 32 +++ infiniband-diags/libibnetdisc/src/internal.h |8 - 4 files changed, 46 insertions(+), 14 deletions(-) diff --git a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h index 6120453..f1cb00c 100644 --- a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h +++ b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h @@ -48,7 +48,6 @@ struct ibnd_port; /* forward declare */ typedef struct ibnd_node { struct ibnd_node *next; /* all node list in fabric */ - ib_portid_t path_portid;/* path from from_node */ int smalid; int smalmc; @@ -81,7 +80,6 @@ typedef struct ibnd_node { /* internal use only */ unsigned char ch_found; struct ibnd_node *htnext; /* hash table list */ - struct ibnd_node *dnext;/* nodesdist next */ struct ibnd_node *type_next;/* next based on type */ } ibnd_node_t; diff --git a/infiniband-diags/libibnetdisc/src/chassis.c b/infiniband-diags/libibnetdisc/src/chassis.c index 15c17d2..3bd0108 100644 --- a/infiniband-diags/libibnetdisc/src/chassis.c +++ b/infiniband-diags/libibnetdisc/src/chassis.c @@ -822,6 +822,7 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan) int chassisnum = 0; ibnd_chassis_t *chassis; ibnd_chassis_t *ch, *ch_next; + ibnd_node_scan_t *node_scan; scan-first_chassis = NULL; scan-current_chassis = NULL; @@ -832,16 +833,21 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan) /* according to internal connectivity */ /* not very efficient but clear code so... */ for (dist = 0; dist = fabric-maxhops_discovered; dist++) - for (node = scan-nodesdist[dist]; node; node = node-dnext) + for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = node_scan-dnext) { + node = node_scan-node; + if (mad_get_field(node-info, 0, IB_NODE_VENDORID_F) == VTR_VENDOR_ID fill_voltaire_chassis_record(node)) goto cleanup; + } /* separate every Voltaire chassis from each other and build linked list of them */ /* algorithm: catch spine and find all surrounding nodes */ for (dist = 0; dist = fabric-maxhops_discovered; dist++) - for (node = scan-nodesdist[dist]; node; node = node-dnext) { + for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = node_scan-dnext) { + node = node_scan-node; + if (mad_get_field(node-info, 0, IB_NODE_VENDORID_F) != VTR_VENDOR_ID) continue; @@ -859,7 +865,9 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan) /* now make pass on nodes for chassis which are not Voltaire */ /* grouped by common SystemImageGUID */ for (dist = 0; dist = fabric-maxhops_discovered; dist++) - for (node = scan-nodesdist[dist]; node; node = node-dnext) { + for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = node_scan-dnext) { + node = node_scan-node; + if (mad_get_field(node-info, 0, IB_NODE_VENDORID_F) == VTR_VENDOR_ID) continue; @@ -885,7 +893,9 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan) /* now, make another pass to see which nodes are part of chassis */ /* (defined as chassis-nodecount 1) */ for (dist = 0; dist = MAXHOPS;) { - for (node = scan-nodesdist[dist]; node; node = node-dnext) { + for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = node_scan-dnext) { + node = node_scan-node; + if (mad_get_field(node-info, 0, IB_NODE_VENDORID_F) == VTR_VENDOR_ID) continue; diff --git a/infiniband-diags/libibnetdisc/src/ibnetdisc.c b/infiniband-diags/libibnetdisc/src/ibnetdisc.c index ffa35e4..283584b 100644 ---
Re: [infiniband-diags] [PATCH] [2/2] split out scan specific data from ibnd_node_t
Hi Sasha, Oops. I forgot to free the newly created memory. Here's a new patch. Al On Mon, 2009-11-02 at 11:33 -0800, Al Chu wrote: Hey Sasha, This splits out some scan specific data from ibnd_node_t that doesn't need to be in the public struct. Al -- Albert Chu ch...@llnl.gov Computer Scientist High Performance Systems Division Lawrence Livermore National Laboratory From: Albert Chu ch...@llnl.gov Date: Thu, 29 Oct 2009 18:59:26 -0700 Subject: [PATCH] split out scan specific data from ibnd_node_t Signed-off-by: Albert Chu ch...@llnl.gov --- .../libibnetdisc/include/infiniband/ibnetdisc.h|2 - infiniband-diags/libibnetdisc/src/chassis.c| 18 +-- infiniband-diags/libibnetdisc/src/ibnetdisc.c | 51 +--- infiniband-diags/libibnetdisc/src/internal.h |8 +++- 4 files changed, 65 insertions(+), 14 deletions(-) diff --git a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h index 6120453..f1cb00c 100644 --- a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h +++ b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h @@ -48,7 +48,6 @@ struct ibnd_port; /* forward declare */ typedef struct ibnd_node { struct ibnd_node *next; /* all node list in fabric */ - ib_portid_t path_portid;/* path from from_node */ int smalid; int smalmc; @@ -81,7 +80,6 @@ typedef struct ibnd_node { /* internal use only */ unsigned char ch_found; struct ibnd_node *htnext; /* hash table list */ - struct ibnd_node *dnext;/* nodesdist next */ struct ibnd_node *type_next;/* next based on type */ } ibnd_node_t; diff --git a/infiniband-diags/libibnetdisc/src/chassis.c b/infiniband-diags/libibnetdisc/src/chassis.c index 15c17d2..3bd0108 100644 --- a/infiniband-diags/libibnetdisc/src/chassis.c +++ b/infiniband-diags/libibnetdisc/src/chassis.c @@ -822,6 +822,7 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan) int chassisnum = 0; ibnd_chassis_t *chassis; ibnd_chassis_t *ch, *ch_next; + ibnd_node_scan_t *node_scan; scan-first_chassis = NULL; scan-current_chassis = NULL; @@ -832,16 +833,21 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan) /* according to internal connectivity */ /* not very efficient but clear code so... */ for (dist = 0; dist = fabric-maxhops_discovered; dist++) - for (node = scan-nodesdist[dist]; node; node = node-dnext) + for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = node_scan-dnext) { + node = node_scan-node; + if (mad_get_field(node-info, 0, IB_NODE_VENDORID_F) == VTR_VENDOR_ID fill_voltaire_chassis_record(node)) goto cleanup; + } /* separate every Voltaire chassis from each other and build linked list of them */ /* algorithm: catch spine and find all surrounding nodes */ for (dist = 0; dist = fabric-maxhops_discovered; dist++) - for (node = scan-nodesdist[dist]; node; node = node-dnext) { + for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = node_scan-dnext) { + node = node_scan-node; + if (mad_get_field(node-info, 0, IB_NODE_VENDORID_F) != VTR_VENDOR_ID) continue; @@ -859,7 +865,9 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan) /* now make pass on nodes for chassis which are not Voltaire */ /* grouped by common SystemImageGUID */ for (dist = 0; dist = fabric-maxhops_discovered; dist++) - for (node = scan-nodesdist[dist]; node; node = node-dnext) { + for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = node_scan-dnext) { + node = node_scan-node; + if (mad_get_field(node-info, 0, IB_NODE_VENDORID_F) == VTR_VENDOR_ID) continue; @@ -885,7 +893,9 @@ int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *scan) /* now, make another pass to see which nodes are part of chassis */ /* (defined as chassis-nodecount 1) */ for (dist = 0; dist = MAXHOPS;) { - for (node = scan-nodesdist[dist]; node; node = node-dnext) { + for (node_scan = scan-nodesdist[dist]; node_scan; node_scan = node_scan-dnext) { + node = node_scan-node; + if (mad_get_field(node-info, 0, IB_NODE_VENDORID_F) == VTR_VENDOR_ID) continue; diff --git
Re: Crash in bonding
Pradeep Satyanarayana wrote: This crash was originally reported against Rhel5.4. However, one can recreate this crash quite easily in OFED-1.5 too. I understand that you get the crash when working with the RHEL5.4 bonding driver, correct? does it happen only with IPoIB devices acting as the bonding slaves or also with Ethernet devices? Please note that with RHEL 5.4 there's no need to use the ofed provided bonding module, more over, I believe that the distro provided one is more stable and uptodate in this case. Moving forward, ofed bonding support for newish distributions is to be removed. Moni, any reason to support bonding/EL 5.4 in ofed? Or. The steps to recreate the crash are as follows: 1. Run traffic (I used ping) on the IB interfaces through the bond master 2. ifdown ib0 3. ifdown ib1 4. modprobe -r ib_ipoib -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html