From: Ira Weiny <wei...@llnl.gov> Date: Tue, 11 May 2010 15:36:08 -0700 Subject: [PATCH] ibnetdisc: Separate calls to umad and mad layer to avoid race condition on response MAD's
Specify CA/Port to use which allows parallel scanning to other operations. Signed-off-by: Ira Weiny <wei...@llnl.gov> --- .../libibnetdisc/include/infiniband/ibnetdisc.h | 15 ++-- infiniband-diags/libibnetdisc/src/ibnetdisc.c | 52 +++++++----- infiniband-diags/libibnetdisc/src/internal.h | 11 ++- infiniband-diags/libibnetdisc/src/query_smp.c | 83 ++++++++++++++++---- infiniband-diags/libibnetdisc/test/testleaks.c | 16 +--- infiniband-diags/src/iblinkinfo.c | 8 +- infiniband-diags/src/ibnetdiscover.c | 14 +--- infiniband-diags/src/ibqueryerrors.c | 11 ++- 8 files changed, 134 insertions(+), 76 deletions(-) diff --git a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h index 2735224..83d0ba7 100644 --- a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h +++ b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h @@ -134,7 +134,9 @@ typedef struct ibnd_config { unsigned show_progress; unsigned max_hops; unsigned debug; - uint8_t pad[64]; + unsigned timeout_ms; + unsigned retries; + uint8_t pad[56]; } ibnd_config_t; /** ========================================================================= @@ -166,15 +168,16 @@ typedef struct ibnd_fabric { * Initialization (fabric operations) */ -MAD_EXPORT ibnd_fabric_t *ibnd_discover_fabric(struct ibmad_port *ibmad_port, +MAD_EXPORT ibnd_fabric_t *ibnd_discover_fabric(char * ca_name, + int ca_port, ib_portid_t * from, struct ibnd_config *config); /** - * open: (required) ibmad_port object from libibmad + * ca_name: (optional) name of the CA to use + * ca_port: (optional) CA port to use * from: (optional) specify the node to start scanning from. - * If NULL start from the node we are running on. - * hops: (optional) Specify how much of the fabric to traverse. - * negative value == scan entire fabric + * If NULL start from the CA/CA port specified + * config: (optional) additional config options for the scan */ MAD_EXPORT void ibnd_destroy_fabric(ibnd_fabric_t * fabric); diff --git a/infiniband-diags/libibnetdisc/src/ibnetdisc.c b/infiniband-diags/libibnetdisc/src/ibnetdisc.c index 98801de..3c374c7 100644 --- a/infiniband-diags/libibnetdisc/src/ibnetdisc.c +++ b/infiniband-diags/libibnetdisc/src/ibnetdisc.c @@ -380,21 +380,6 @@ ibnd_node_t *ibnd_find_node_guid(ibnd_fabric_t * fabric, uint64_t guid) return NULL; } -static int _check_ibmad_port(struct ibmad_port *ibmad_port) -{ - if (!ibmad_port) { - IBND_DEBUG("ibmad_port must be specified\n"); - return -1; - } - if (mad_rpc_class_agent(ibmad_port, IB_SMI_CLASS) == -1 - || mad_rpc_class_agent(ibmad_port, IB_SMI_DIRECT_CLASS) == -1) { - IBND_DEBUG("ibmad_port must be opened with " - "IB_SMI_CLASS && IB_SMI_DIRECT_CLASS\n"); - return -1; - } - return 0; -} - ibnd_node_t *ibnd_find_node_dr(ibnd_fabric_t * fabric, char *dr_str) { int i = 0; @@ -462,17 +447,38 @@ void add_to_type_list(ibnd_node_t * node, ibnd_fabric_t * fabric) } } -ibnd_fabric_t *ibnd_discover_fabric(struct ibmad_port *ibmad_port, - ib_portid_t * from, struct ibnd_config *cfg) +static int set_config(struct ibnd_config *config, struct ibnd_config *cfg) { - struct ibnd_config default_config = { 0 }; + if (!config) + return (-EINVAL); + + if (cfg) + memcpy(config, cfg, sizeof(*config)); + + if (!config->max_smps) + config->max_smps = DEFAULT_MAX_SMP_ON_WIRE; + if (!config->timeout_ms) + config->timeout_ms = DEFAULT_TIMEOUT; + if (!config->retries) + config->retries = DEFAULT_RETRIES; + + return (0); +} + +ibnd_fabric_t *ibnd_discover_fabric(char * ca_name, int ca_port, + ib_portid_t * from, + struct ibnd_config *cfg) +{ + struct ibnd_config config = { 0 }; ibnd_fabric_t *fabric = NULL; ib_portid_t my_portid = { 0 }; smp_engine_t engine; ibnd_scan_t scan; - if (_check_ibmad_port(ibmad_port) < 0) + if (set_config(&config, cfg)) { + IBND_ERROR("Invalid ibnd_config\n"); return NULL; + } /* If not specified start from "my" port */ if (!from) @@ -488,10 +494,12 @@ ibnd_fabric_t *ibnd_discover_fabric(struct ibmad_port *ibmad_port, memset(&scan.selfportid, 0, sizeof(scan.selfportid)); scan.fabric = fabric; - scan.cfg = cfg ? cfg : &default_config; + scan.cfg = &config; - smp_engine_init(&engine, ibmad_port, &scan, cfg->max_smps ? - cfg->max_smps : DEFAULT_MAX_SMP_ON_WIRE); + if (smp_engine_init(&engine, ca_name, ca_port, &scan, &config)) { + free(fabric); + return (NULL); + } IBND_DEBUG("from %s\n", portid2str(from)); diff --git a/infiniband-diags/libibnetdisc/src/internal.h b/infiniband-diags/libibnetdisc/src/internal.h index 2cfde02..d037a60 100644 --- a/infiniband-diags/libibnetdisc/src/internal.h +++ b/infiniband-diags/libibnetdisc/src/internal.h @@ -54,6 +54,8 @@ #define MAXHOPS 63 #define DEFAULT_MAX_SMP_ON_WIRE 2 +#define DEFAULT_TIMEOUT 1000 +#define DEFAULT_RETRIES 3 typedef struct ibnd_scan { ib_portid_t selfportid; @@ -76,16 +78,19 @@ struct ibnd_smp { struct smp_engine { struct ibmad_port *ibmad_port; + int umad_fd; + int smi_agent; + int smi_dir_agent; ibnd_smp_t *smp_queue_head; ibnd_smp_t *smp_queue_tail; void *user_data; cl_qmap_t smps_on_wire; - int max_smps_on_wire; + struct ibnd_config *cfg; unsigned total_smps; }; -void smp_engine_init(smp_engine_t * engine, struct ibmad_port *ibmad_port, - void *user_data, int max_smps_on_wire); +int smp_engine_init(smp_engine_t * engine, char * ca_name, int ca_port, + void *user_data, ibnd_config_t *cfg); int issue_smp(smp_engine_t * engine, ib_portid_t * portid, unsigned attrid, unsigned mod, smp_comp_cb_t cb, void *cb_data); int process_mads(smp_engine_t * engine); diff --git a/infiniband-diags/libibnetdisc/src/query_smp.c b/infiniband-diags/libibnetdisc/src/query_smp.c index 7234844..4dbfa0d 100644 --- a/infiniband-diags/libibnetdisc/src/query_smp.c +++ b/infiniband-diags/libibnetdisc/src/query_smp.c @@ -61,25 +61,32 @@ static ibnd_smp_t *get_smp(smp_engine_t * engine) return rc; } -static int send_smp(ibnd_smp_t * smp, struct ibmad_port *srcport) +static int send_smp(ibnd_smp_t * smp, smp_engine_t * engine) { int rc = 0; uint8_t umad[1024]; ib_rpc_t *rpc = &smp->rpc; + int agent = 0; memset(umad, 0, umad_size() + IB_MAD_SIZE); + if (rpc->mgtclass == IB_SMI_CLASS) { + agent = engine->smi_agent; + } else if (rpc->mgtclass == IB_SMI_DIRECT_CLASS) { + agent = engine->smi_dir_agent; + } else { + IBND_ERROR("Invalid class for RPC\n"); + return (-EIO); + } + if ((rc = mad_build_pkt(umad, &smp->rpc, &smp->path, NULL, NULL)) < 0) { IBND_ERROR("mad_build_pkt failed; %d\n", rc); return rc; } - if ((rc = umad_send(mad_rpc_portid(srcport), - mad_rpc_class_agent(srcport, rpc->mgtclass), - umad, IB_MAD_SIZE, - mad_get_timeout(srcport, rpc->timeout), - mad_get_retries(srcport))) < 0) { + if ((rc = umad_send(engine->umad_fd, agent, umad, IB_MAD_SIZE, + engine->cfg->timeout_ms, engine->cfg->retries)) < 0) { IBND_ERROR("send failed; %d\n", rc); return rc; } @@ -91,12 +98,13 @@ static int process_smp_queue(smp_engine_t * engine) { int rc = 0; ibnd_smp_t *smp; - while (cl_qmap_count(&engine->smps_on_wire) < engine->max_smps_on_wire) { + while (cl_qmap_count(&engine->smps_on_wire) + < engine->cfg->max_smps) { smp = get_smp(engine); if (!smp) return 0; - if ((rc = send_smp(smp, engine->ibmad_port)) != 0) { + if ((rc = send_smp(smp, engine)) != 0) { free(smp); return rc; } @@ -122,7 +130,7 @@ int issue_smp(smp_engine_t * engine, ib_portid_t * portid, smp->rpc.method = IB_MAD_METHOD_GET; smp->rpc.attr.id = attrid; smp->rpc.attr.mod = mod; - smp->rpc.timeout = mad_get_timeout(engine->ibmad_port, 0); + smp->rpc.timeout = engine->cfg->timeout_ms; smp->rpc.datasz = IB_SMP_DATA_SIZE; smp->rpc.dataoffs = IB_SMP_DATA_OFFS; smp->rpc.trid = mad_trid(); @@ -153,7 +161,7 @@ static int process_one_recv(smp_engine_t * engine) memset(umad, 0, sizeof(umad)); /* wait for the next message */ - if ((rc = umad_recv(mad_rpc_portid(engine->ibmad_port), umad, &length, + if ((rc = umad_recv(engine->umad_fd, umad, &length, 0)) < 0) { if (rc == -EWOULDBLOCK) return 0; @@ -190,14 +198,58 @@ error: return rc; } -void smp_engine_init(smp_engine_t * engine, struct ibmad_port *ibmad_port, - void *user_data, int max_smps_on_wire) +int smp_engine_init(smp_engine_t * engine, char * ca_name, int ca_port, + void *user_data, ibnd_config_t *cfg) { + int nc = 2; + int mc[2] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS }; + memset(engine, 0, sizeof(*engine)); - engine->ibmad_port = ibmad_port; + + engine->ibmad_port = mad_rpc_open_port(ca_name, ca_port, mc, nc); + if (!engine->ibmad_port) { + IBND_ERROR("can't open MAD port (%s:%d)\n", ca_name, ca_port); + return -EIO; + } + mad_rpc_set_timeout(engine->ibmad_port, cfg->timeout_ms); + mad_rpc_set_retries(engine->ibmad_port, cfg->retries); + + if (umad_init() < 0) { + IBND_ERROR("umad_init failed\n"); + mad_rpc_close_port(engine->ibmad_port); + return -EIO; + } + + engine->umad_fd = umad_open_port(ca_name, ca_port); + if (engine->umad_fd < 0) { + IBND_ERROR("can't open UMAD port (%s:%d)\n", ca_name, ca_port); + mad_rpc_close_port(engine->ibmad_port); + return -EIO; + } + + if ((engine->smi_agent = umad_register(engine->umad_fd, + IB_SMI_CLASS, 1, 0, 0)) < 0) { + IBND_ERROR("Failed to register SMI agent on (%s:%d)\n", + ca_name, ca_port); + goto eio_close; + } + + if ((engine->smi_dir_agent = umad_register(engine->umad_fd, + IB_SMI_DIRECT_CLASS, 1, 0, 0)) < 0) { + IBND_ERROR("Failed to register SMI_DIRECT agent on (%s:%d)\n", + ca_name, ca_port); + goto eio_close; + } + engine->user_data = user_data; cl_qmap_init(&engine->smps_on_wire); - engine->max_smps_on_wire = max_smps_on_wire; + engine->cfg = cfg; + return (0); + +eio_close: + mad_rpc_close_port(engine->ibmad_port); + umad_close_port(engine->umad_fd); + return (-EIO); } void smp_engine_destroy(smp_engine_t * engine) @@ -221,6 +273,9 @@ void smp_engine_destroy(smp_engine_t * engine) cl_qmap_remove_item(&engine->smps_on_wire, item); free(item); } + + umad_close_port(engine->umad_fd); + mad_rpc_close_port(engine->ibmad_port); } int process_mads(smp_engine_t * engine) diff --git a/infiniband-diags/libibnetdisc/test/testleaks.c b/infiniband-diags/libibnetdisc/test/testleaks.c index da2fc0a..9a91f50 100644 --- a/infiniband-diags/libibnetdisc/test/testleaks.c +++ b/infiniband-diags/libibnetdisc/test/testleaks.c @@ -54,8 +54,6 @@ char *argv0 = "iblinkinfotest"; static FILE *f; -static int timeout_ms = 500; - void usage(void) { fprintf(stderr, @@ -88,9 +86,6 @@ int main(int argc, char **argv) ib_portid_t port_id; int iters = -1; - struct ibmad_port *ibmad_port; - int mgmt_classes[2] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS }; - static char const str_opts[] = "S:D:n:C:P:t:shuf:i:"; static const struct option long_opts[] = { {"S", 1, 0, 'S'}, @@ -139,7 +134,7 @@ int main(int argc, char **argv) iters = (int)strtol(optarg, NULL, 0); break; case 't': - timeout_ms = strtoul(optarg, 0, 0); + config.timeout_ms = strtoul(optarg, 0, 0); break; case 'S': guid = (uint64_t) strtoull(optarg, 0, 0); @@ -152,15 +147,11 @@ int main(int argc, char **argv) argc -= optind; argv += optind; - ibmad_port = mad_rpc_open_port(ca, ca_port, mgmt_classes, 2); - - mad_rpc_set_timeout(ibmad_port, timeout_ms); - while (iters == -1 || iters-- > 0) { if (from) { /* only scan part of the fabric */ str2drpath(&(port_id.drpath), from, 0, 0); - if ((fabric = ibnd_discover_fabric(ibmad_port, + if ((fabric = ibnd_discover_fabric(ca, ca_port, &port_id, &config)) == NULL) { fprintf(stderr, "discover failed\n"); @@ -168,7 +159,7 @@ int main(int argc, char **argv) goto close_port; } guid = 0; - } else if ((fabric = ibnd_discover_fabric(ibmad_port, NULL, + } else if ((fabric = ibnd_discover_fabric(ca, ca_port, NULL, &config)) == NULL) { fprintf(stderr, "discover failed\n"); rc = 1; @@ -179,6 +170,5 @@ int main(int argc, char **argv) } close_port: - mad_rpc_close_port(ibmad_port); exit(rc); } diff --git a/infiniband-diags/src/iblinkinfo.c b/infiniband-diags/src/iblinkinfo.c index 029573f..d0c9b13 100644 --- a/infiniband-diags/src/iblinkinfo.c +++ b/infiniband-diags/src/iblinkinfo.c @@ -337,8 +337,10 @@ int main(int argc, char **argv) exit(1); } - if (ibd_timeout) + if (ibd_timeout) { mad_rpc_set_timeout(ibmad_port, ibd_timeout); + config.timeout_ms = ibd_timeout; + } node_name_map = open_node_name_map(node_name_map_file); @@ -371,12 +373,12 @@ int main(int argc, char **argv) } else { if (resolved >= 0 && !(fabric = - ibnd_discover_fabric(ibmad_port, &port_id, &config))) + ibnd_discover_fabric(ibd_ca, ibd_ca_port, &port_id, &config))) IBWARN("Single node discover failed;" " attempting full scan\n"); if (!fabric && - !(fabric = ibnd_discover_fabric(ibmad_port, NULL, &config))) { + !(fabric = ibnd_discover_fabric(ibd_ca, ibd_ca_port, NULL, &config))) { fprintf(stderr, "discover failed\n"); rc = 1; goto close_port; diff --git a/infiniband-diags/src/ibnetdiscover.c b/infiniband-diags/src/ibnetdiscover.c index 57f9625..8f08f06 100644 --- a/infiniband-diags/src/ibnetdiscover.c +++ b/infiniband-diags/src/ibnetdiscover.c @@ -67,8 +67,6 @@ #define DIFF_FLAG_DEFAULT (DIFF_FLAG_SWITCH | DIFF_FLAG_CA | DIFF_FLAG_ROUTER \ | DIFF_FLAG_PORT_CONNECTION) -struct ibmad_port *srcport; - static FILE *f; static char *node_name_map_file = NULL; @@ -938,9 +936,6 @@ int main(int argc, char **argv) ibnd_fabric_t *fabric = NULL; ibnd_fabric_t *diff_fabric = NULL; - struct ibmad_port *ibmad_port; - int mgmt_classes[2] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS }; - const struct ibdiag_opt opts[] = { {"show", 's', 0, NULL, "show more information"}, {"list", 'l', 0, NULL, "list of connected nodes"}, @@ -975,12 +970,8 @@ int main(int argc, char **argv) argc -= optind; argv += optind; - ibmad_port = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 2); - if (!ibmad_port) - IBERROR("Failed to open %s port %d", ibd_ca, ibd_ca_port); - if (ibd_timeout) - mad_rpc_set_timeout(ibmad_port, ibd_timeout); + config.timeout_ms = ibd_timeout; if (argc && !(f = fopen(argv[0], "w"))) IBERROR("can't open file %s for writing", argv[0]); @@ -996,7 +987,7 @@ int main(int argc, char **argv) IBERROR("loading cached fabric failed\n"); } else { if ((fabric = - ibnd_discover_fabric(ibmad_port, NULL, &config)) == NULL) + ibnd_discover_fabric(ibd_ca, ibd_ca_port, NULL, &config)) == NULL) IBERROR("discover failed\n"); } @@ -1017,6 +1008,5 @@ int main(int argc, char **argv) if (diff_fabric) ibnd_destroy_fabric(diff_fabric); close_node_name_map(node_name_map); - mad_rpc_close_port(ibmad_port); exit(0); } diff --git a/infiniband-diags/src/ibqueryerrors.c b/infiniband-diags/src/ibqueryerrors.c index e896254..f04e47f 100644 --- a/infiniband-diags/src/ibqueryerrors.c +++ b/infiniband-diags/src/ibqueryerrors.c @@ -600,8 +600,10 @@ int main(int argc, char **argv) if (!ibmad_port) IBERROR("Failed to open port; %s:%d\n", ibd_ca, ibd_ca_port); - if (ibd_timeout) + if (ibd_timeout) { mad_rpc_set_timeout(ibmad_port, ibd_timeout); + config.timeout_ms = ibd_timeout; + } node_name_map = open_node_name_map(node_name_map_file); @@ -633,11 +635,14 @@ int main(int argc, char **argv) } } else { if (resolved >= 0 && - !(fabric = ibnd_discover_fabric(ibmad_port, &portid, 0))) + !(fabric = ibnd_discover_fabric(ibd_ca, ibd_ca_port, + &portid, &config))) IBWARN("Single node discover failed;" " attempting full scan"); - if (!fabric && !(fabric = ibnd_discover_fabric(ibmad_port, NULL, + if (!fabric && !(fabric = ibnd_discover_fabric(ibd_ca, + ibd_ca_port, + NULL, &config))) { fprintf(stderr, "discover failed\n"); rc = 1; -- 1.5.4.5 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html