On 16:48 Tue 11 May , Ira Weiny wrote: > > From: Ira Weiny <wei...@llnl.gov> > Date: Tue, 11 May 2010 15:36:08 -0700 > Subject: [PATCH] ibnetdisc: Separate calls to umad and mad layer to avoid > race condition on response MAD's > > Specify CA/Port to use which allows parallel scanning to other > operations. > > Signed-off-by: Ira Weiny <wei...@llnl.gov>
Applied. Thanks. However see a minor comment below. > --- > .../libibnetdisc/include/infiniband/ibnetdisc.h | 15 ++-- > infiniband-diags/libibnetdisc/src/ibnetdisc.c | 52 +++++++----- > infiniband-diags/libibnetdisc/src/internal.h | 11 ++- > infiniband-diags/libibnetdisc/src/query_smp.c | 83 > ++++++++++++++++---- > infiniband-diags/libibnetdisc/test/testleaks.c | 16 +--- > infiniband-diags/src/iblinkinfo.c | 8 +- > infiniband-diags/src/ibnetdiscover.c | 14 +--- > infiniband-diags/src/ibqueryerrors.c | 11 ++- > 8 files changed, 134 insertions(+), 76 deletions(-) [snip] > diff --git a/infiniband-diags/libibnetdisc/src/internal.h > b/infiniband-diags/libibnetdisc/src/internal.h > index 2cfde02..d037a60 100644 > --- a/infiniband-diags/libibnetdisc/src/internal.h > +++ b/infiniband-diags/libibnetdisc/src/internal.h > @@ -54,6 +54,8 @@ > #define MAXHOPS 63 > > #define DEFAULT_MAX_SMP_ON_WIRE 2 > +#define DEFAULT_TIMEOUT 1000 > +#define DEFAULT_RETRIES 3 > > typedef struct ibnd_scan { > ib_portid_t selfportid; > @@ -76,16 +78,19 @@ struct ibnd_smp { > > struct smp_engine { > struct ibmad_port *ibmad_port; After this patch ibmad_port is not used by smp_engine, but instead by an "upper" layer (ibnetdisc). So I would suggest to move this there (for example to be a member of scan struct). Sasha > + int umad_fd; > + int smi_agent; > + int smi_dir_agent; > ibnd_smp_t *smp_queue_head; > ibnd_smp_t *smp_queue_tail; > void *user_data; > cl_qmap_t smps_on_wire; > - int max_smps_on_wire; > + struct ibnd_config *cfg; > unsigned total_smps; > }; > > -void smp_engine_init(smp_engine_t * engine, struct ibmad_port *ibmad_port, > - void *user_data, int max_smps_on_wire); > +int smp_engine_init(smp_engine_t * engine, char * ca_name, int ca_port, > + void *user_data, ibnd_config_t *cfg); > int issue_smp(smp_engine_t * engine, ib_portid_t * portid, > unsigned attrid, unsigned mod, smp_comp_cb_t cb, void *cb_data); > int process_mads(smp_engine_t * engine); > diff --git a/infiniband-diags/libibnetdisc/src/query_smp.c > b/infiniband-diags/libibnetdisc/src/query_smp.c > index 7234844..4dbfa0d 100644 > --- a/infiniband-diags/libibnetdisc/src/query_smp.c > +++ b/infiniband-diags/libibnetdisc/src/query_smp.c > @@ -61,25 +61,32 @@ static ibnd_smp_t *get_smp(smp_engine_t * engine) > return rc; > } > > -static int send_smp(ibnd_smp_t * smp, struct ibmad_port *srcport) > +static int send_smp(ibnd_smp_t * smp, smp_engine_t * engine) > { > int rc = 0; > uint8_t umad[1024]; > ib_rpc_t *rpc = &smp->rpc; > + int agent = 0; > > memset(umad, 0, umad_size() + IB_MAD_SIZE); > > + if (rpc->mgtclass == IB_SMI_CLASS) { > + agent = engine->smi_agent; > + } else if (rpc->mgtclass == IB_SMI_DIRECT_CLASS) { > + agent = engine->smi_dir_agent; > + } else { > + IBND_ERROR("Invalid class for RPC\n"); > + return (-EIO); > + } > + > if ((rc = mad_build_pkt(umad, &smp->rpc, &smp->path, NULL, NULL)) > < 0) { > IBND_ERROR("mad_build_pkt failed; %d\n", rc); > return rc; > } > > - if ((rc = umad_send(mad_rpc_portid(srcport), > - mad_rpc_class_agent(srcport, rpc->mgtclass), > - umad, IB_MAD_SIZE, > - mad_get_timeout(srcport, rpc->timeout), > - mad_get_retries(srcport))) < 0) { > + if ((rc = umad_send(engine->umad_fd, agent, umad, IB_MAD_SIZE, > + engine->cfg->timeout_ms, engine->cfg->retries)) < > 0) { > IBND_ERROR("send failed; %d\n", rc); > return rc; > } > @@ -91,12 +98,13 @@ static int process_smp_queue(smp_engine_t * engine) > { > int rc = 0; > ibnd_smp_t *smp; > - while (cl_qmap_count(&engine->smps_on_wire) < engine->max_smps_on_wire) > { > + while (cl_qmap_count(&engine->smps_on_wire) > + < engine->cfg->max_smps) { > smp = get_smp(engine); > if (!smp) > return 0; > > - if ((rc = send_smp(smp, engine->ibmad_port)) != 0) { > + if ((rc = send_smp(smp, engine)) != 0) { > free(smp); > return rc; > } > @@ -122,7 +130,7 @@ int issue_smp(smp_engine_t * engine, ib_portid_t * portid, > smp->rpc.method = IB_MAD_METHOD_GET; > smp->rpc.attr.id = attrid; > smp->rpc.attr.mod = mod; > - smp->rpc.timeout = mad_get_timeout(engine->ibmad_port, 0); > + smp->rpc.timeout = engine->cfg->timeout_ms; > smp->rpc.datasz = IB_SMP_DATA_SIZE; > smp->rpc.dataoffs = IB_SMP_DATA_OFFS; > smp->rpc.trid = mad_trid(); > @@ -153,7 +161,7 @@ static int process_one_recv(smp_engine_t * engine) > memset(umad, 0, sizeof(umad)); > > /* wait for the next message */ > - if ((rc = umad_recv(mad_rpc_portid(engine->ibmad_port), umad, &length, > + if ((rc = umad_recv(engine->umad_fd, umad, &length, > 0)) < 0) { > if (rc == -EWOULDBLOCK) > return 0; > @@ -190,14 +198,58 @@ error: > return rc; > } > > -void smp_engine_init(smp_engine_t * engine, struct ibmad_port *ibmad_port, > - void *user_data, int max_smps_on_wire) > +int smp_engine_init(smp_engine_t * engine, char * ca_name, int ca_port, > + void *user_data, ibnd_config_t *cfg) > { > + int nc = 2; > + int mc[2] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS }; > + > memset(engine, 0, sizeof(*engine)); > - engine->ibmad_port = ibmad_port; > + > + engine->ibmad_port = mad_rpc_open_port(ca_name, ca_port, mc, nc); > + if (!engine->ibmad_port) { > + IBND_ERROR("can't open MAD port (%s:%d)\n", ca_name, ca_port); > + return -EIO; > + } > + mad_rpc_set_timeout(engine->ibmad_port, cfg->timeout_ms); > + mad_rpc_set_retries(engine->ibmad_port, cfg->retries); > + > + if (umad_init() < 0) { > + IBND_ERROR("umad_init failed\n"); > + mad_rpc_close_port(engine->ibmad_port); > + return -EIO; > + } > + > + engine->umad_fd = umad_open_port(ca_name, ca_port); > + if (engine->umad_fd < 0) { > + IBND_ERROR("can't open UMAD port (%s:%d)\n", ca_name, ca_port); > + mad_rpc_close_port(engine->ibmad_port); > + return -EIO; > + } > + > + if ((engine->smi_agent = umad_register(engine->umad_fd, > + IB_SMI_CLASS, 1, 0, 0)) < 0) { > + IBND_ERROR("Failed to register SMI agent on (%s:%d)\n", > + ca_name, ca_port); > + goto eio_close; > + } > + > + if ((engine->smi_dir_agent = umad_register(engine->umad_fd, > + IB_SMI_DIRECT_CLASS, 1, 0, 0)) < 0) { > + IBND_ERROR("Failed to register SMI_DIRECT agent on (%s:%d)\n", > + ca_name, ca_port); > + goto eio_close; > + } > + > engine->user_data = user_data; > cl_qmap_init(&engine->smps_on_wire); > - engine->max_smps_on_wire = max_smps_on_wire; > + engine->cfg = cfg; > + return (0); > + > +eio_close: > + mad_rpc_close_port(engine->ibmad_port); > + umad_close_port(engine->umad_fd); > + return (-EIO); > } > > void smp_engine_destroy(smp_engine_t * engine) > @@ -221,6 +273,9 @@ void smp_engine_destroy(smp_engine_t * engine) > cl_qmap_remove_item(&engine->smps_on_wire, item); > free(item); > } > + > + umad_close_port(engine->umad_fd); > + mad_rpc_close_port(engine->ibmad_port); > } > > int process_mads(smp_engine_t * engine) > diff --git a/infiniband-diags/libibnetdisc/test/testleaks.c > b/infiniband-diags/libibnetdisc/test/testleaks.c > index da2fc0a..9a91f50 100644 > --- a/infiniband-diags/libibnetdisc/test/testleaks.c > +++ b/infiniband-diags/libibnetdisc/test/testleaks.c > @@ -54,8 +54,6 @@ > char *argv0 = "iblinkinfotest"; > static FILE *f; > > -static int timeout_ms = 500; > - > void usage(void) > { > fprintf(stderr, > @@ -88,9 +86,6 @@ int main(int argc, char **argv) > ib_portid_t port_id; > int iters = -1; > > - struct ibmad_port *ibmad_port; > - int mgmt_classes[2] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS }; > - > static char const str_opts[] = "S:D:n:C:P:t:shuf:i:"; > static const struct option long_opts[] = { > {"S", 1, 0, 'S'}, > @@ -139,7 +134,7 @@ int main(int argc, char **argv) > iters = (int)strtol(optarg, NULL, 0); > break; > case 't': > - timeout_ms = strtoul(optarg, 0, 0); > + config.timeout_ms = strtoul(optarg, 0, 0); > break; > case 'S': > guid = (uint64_t) strtoull(optarg, 0, 0); > @@ -152,15 +147,11 @@ int main(int argc, char **argv) > argc -= optind; > argv += optind; > > - ibmad_port = mad_rpc_open_port(ca, ca_port, mgmt_classes, 2); > - > - mad_rpc_set_timeout(ibmad_port, timeout_ms); > - > while (iters == -1 || iters-- > 0) { > if (from) { > /* only scan part of the fabric */ > str2drpath(&(port_id.drpath), from, 0, 0); > - if ((fabric = ibnd_discover_fabric(ibmad_port, > + if ((fabric = ibnd_discover_fabric(ca, ca_port, > &port_id, &config)) > == NULL) { > fprintf(stderr, "discover failed\n"); > @@ -168,7 +159,7 @@ int main(int argc, char **argv) > goto close_port; > } > guid = 0; > - } else if ((fabric = ibnd_discover_fabric(ibmad_port, NULL, > + } else if ((fabric = ibnd_discover_fabric(ca, ca_port, NULL, > &config)) == NULL) { > fprintf(stderr, "discover failed\n"); > rc = 1; > @@ -179,6 +170,5 @@ int main(int argc, char **argv) > } > > close_port: > - mad_rpc_close_port(ibmad_port); > exit(rc); > } > diff --git a/infiniband-diags/src/iblinkinfo.c > b/infiniband-diags/src/iblinkinfo.c > index 029573f..d0c9b13 100644 > --- a/infiniband-diags/src/iblinkinfo.c > +++ b/infiniband-diags/src/iblinkinfo.c > @@ -337,8 +337,10 @@ int main(int argc, char **argv) > exit(1); > } > > - if (ibd_timeout) > + if (ibd_timeout) { > mad_rpc_set_timeout(ibmad_port, ibd_timeout); > + config.timeout_ms = ibd_timeout; > + } > > node_name_map = open_node_name_map(node_name_map_file); > > @@ -371,12 +373,12 @@ int main(int argc, char **argv) > } else { > if (resolved >= 0 && > !(fabric = > - ibnd_discover_fabric(ibmad_port, &port_id, &config))) > + ibnd_discover_fabric(ibd_ca, ibd_ca_port, &port_id, > &config))) > IBWARN("Single node discover failed;" > " attempting full scan\n"); > > if (!fabric && > - !(fabric = ibnd_discover_fabric(ibmad_port, NULL, > &config))) { > + !(fabric = ibnd_discover_fabric(ibd_ca, ibd_ca_port, NULL, > &config))) { > fprintf(stderr, "discover failed\n"); > rc = 1; > goto close_port; > diff --git a/infiniband-diags/src/ibnetdiscover.c > b/infiniband-diags/src/ibnetdiscover.c > index 57f9625..8f08f06 100644 > --- a/infiniband-diags/src/ibnetdiscover.c > +++ b/infiniband-diags/src/ibnetdiscover.c > @@ -67,8 +67,6 @@ > #define DIFF_FLAG_DEFAULT (DIFF_FLAG_SWITCH | DIFF_FLAG_CA | > DIFF_FLAG_ROUTER \ > | DIFF_FLAG_PORT_CONNECTION) > > -struct ibmad_port *srcport; > - > static FILE *f; > > static char *node_name_map_file = NULL; > @@ -938,9 +936,6 @@ int main(int argc, char **argv) > ibnd_fabric_t *fabric = NULL; > ibnd_fabric_t *diff_fabric = NULL; > > - struct ibmad_port *ibmad_port; > - int mgmt_classes[2] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS }; > - > const struct ibdiag_opt opts[] = { > {"show", 's', 0, NULL, "show more information"}, > {"list", 'l', 0, NULL, "list of connected nodes"}, > @@ -975,12 +970,8 @@ int main(int argc, char **argv) > argc -= optind; > argv += optind; > > - ibmad_port = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 2); > - if (!ibmad_port) > - IBERROR("Failed to open %s port %d", ibd_ca, ibd_ca_port); > - > if (ibd_timeout) > - mad_rpc_set_timeout(ibmad_port, ibd_timeout); > + config.timeout_ms = ibd_timeout; > > if (argc && !(f = fopen(argv[0], "w"))) > IBERROR("can't open file %s for writing", argv[0]); > @@ -996,7 +987,7 @@ int main(int argc, char **argv) > IBERROR("loading cached fabric failed\n"); > } else { > if ((fabric = > - ibnd_discover_fabric(ibmad_port, NULL, &config)) == NULL) > + ibnd_discover_fabric(ibd_ca, ibd_ca_port, NULL, &config)) > == NULL) > IBERROR("discover failed\n"); > } > > @@ -1017,6 +1008,5 @@ int main(int argc, char **argv) > if (diff_fabric) > ibnd_destroy_fabric(diff_fabric); > close_node_name_map(node_name_map); > - mad_rpc_close_port(ibmad_port); > exit(0); > } > diff --git a/infiniband-diags/src/ibqueryerrors.c > b/infiniband-diags/src/ibqueryerrors.c > index e896254..f04e47f 100644 > --- a/infiniband-diags/src/ibqueryerrors.c > +++ b/infiniband-diags/src/ibqueryerrors.c > @@ -600,8 +600,10 @@ int main(int argc, char **argv) > if (!ibmad_port) > IBERROR("Failed to open port; %s:%d\n", ibd_ca, ibd_ca_port); > > - if (ibd_timeout) > + if (ibd_timeout) { > mad_rpc_set_timeout(ibmad_port, ibd_timeout); > + config.timeout_ms = ibd_timeout; > + } > > node_name_map = open_node_name_map(node_name_map_file); > > @@ -633,11 +635,14 @@ int main(int argc, char **argv) > } > } else { > if (resolved >= 0 && > - !(fabric = ibnd_discover_fabric(ibmad_port, &portid, 0))) > + !(fabric = ibnd_discover_fabric(ibd_ca, ibd_ca_port, > + &portid, &config))) > IBWARN("Single node discover failed;" > " attempting full scan"); > > - if (!fabric && !(fabric = ibnd_discover_fabric(ibmad_port, NULL, > + if (!fabric && !(fabric = ibnd_discover_fabric(ibd_ca, > + ibd_ca_port, > + NULL, > &config))) { > fprintf(stderr, "discover failed\n"); > rc = 1; > -- > 1.5.4.5 > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html