On 12:49 Thu 18 Feb , Ira Weiny wrote: > > From: Ira Weiny <wei...@llnl.gov> > Date: Fri, 22 Jan 2010 17:33:30 -0800 > Subject: [PATCH] libibnetdisc: Convert to a multi-smp algorithm > > v3: change DEFAULT_MAX_SMP_ON_WIRE to 2 > > Allow for multiple SMP's to be on the wire at a single time. This > algorithm splits the processing of SMP's to a small smp engine which > may be useful to split out in the future. > > Signed-off-by: Ira Weiny <wei...@llnl.gov>
Applied. Thanks. However see some comments and questions below. > +static int recv_port_info(smp_engine_t *engine, ibnd_smp_t * smp, > + uint8_t *mad, void *cb_data) > +{ > + ibnd_fabric_t *fabric = ((ibnd_scan_t *)engine->user_data)->fabric; > + ibnd_node_t *node = (ibnd_node_t *)cb_data; > + ibnd_port_t *port; > + uint8_t *port_info = mad + IB_SMP_DATA_OFFS; > + uint8_t port_num, local_port; > + > + port_num = mad_get_field(mad, 0, IB_MAD_ATTRMOD_F); > + local_port = mad_get_field(port_info, 0, IB_PORT_LOCAL_PORT_F); > + > + /* this may have been created before */ > + port = node->ports[port_num]; > + if (!port) { > + port = node->ports[port_num] = calloc(1, sizeof(*port)); > + if (!port) { > + IBND_ERROR("Failed to allocate port\n"); > + return -1; > + } > + } > + > + memcpy(port->info, port_info, sizeof(port->info)); > + port->node = node; > + port->portnum = port_num; > + port->ext_portnum = 0; > + port->base_lid = (uint16_t) mad_get_field(port->info, 0, IB_PORT_LID_F); > + port->lmc = (uint8_t) mad_get_field(port->info, 0, IB_PORT_LMC_F); > + > + if (port_num == 0) { > + node->smalid = port->base_lid; > + node->smalmc = port->lmc; > + } else if (node->type == IB_NODE_SWITCH) { > + port->base_lid = node->smalid; > + port->lmc = node->smalmc; > + } > + > + add_to_portguid_hash(port, fabric->portstbl); > + > + debug_port(&smp->path, port); > > - mad_dump_node_type(type, 64, &node->type, sizeof(int)); > - printf("%s -> %s %s {%016" PRIx64 "} portnum %d base lid %d-%d\"%s\"\n", > - portid2str(path), prompt, type, node->guid, > - node->type == IB_NODE_SWITCH ? 0 : port->portnum, > - port->base_lid, > - port->base_lid + (1 << port->lmc) - 1, node->nodedesc); > + if (port_num && > + (mad_get_field(port->info, 0, IB_PORT_PHYS_STATE_F) > + == IB_PORT_PHYS_STATE_LINKUP) > + && > + (node->type == IB_NODE_SWITCH || node == fabric->from_node)) { What will happen when ibnetdiscover runs from host connected by two ports to different subnets? Wouldn't it run over both fabrics (following PortInfo querying loop in recv_node_info() below)? > + > + ib_portid_t path = smp->path; > + if (extend_dpath(engine, &path, port_num) != -1) > + query_node_info(engine, &path, node); > + } > + > + return 0; > +} [snip] > +static int recv_node_info(smp_engine_t *engine, ibnd_smp_t * smp, > + uint8_t *mad, void *cb_data) > +{ > + ibnd_fabric_t *fabric = ((ibnd_scan_t *)engine->user_data)->fabric; > + int i = 0; > + uint8_t *node_info = mad + IB_SMP_DATA_OFFS; > + ibnd_node_t * rem_node = (ibnd_node_t *)cb_data; > ibnd_node_t *node; > + int node_is_new = 0; > + uint64_t node_guid = mad_get_field64(node_info, 0, IB_NODE_GUID_F); > + uint64_t port_guid = mad_get_field64(node_info, 0, IB_NODE_PORT_GUID_F); > + int port_num = mad_get_field(node_info, 0, IB_NODE_LOCAL_PORT_F); > + ibnd_port_t *port = NULL; > > - for (node = fabric->nodestbl[hash]; node; node = node->htnext) > - if (node->guid == new->guid) > - return node; > + node = ibnd_find_node_guid(fabric, node_guid); > + if (!node) { > + node = create_node(engine, &smp->path, node_info); > + if (!node) > + return -1; > + node_is_new = 1; > + } > + IBND_DEBUG("Found %s node GUID %lx (%s)\n", > + (node_is_new) ? "new": "old", node->guid, > + portid2str(&smp->path)); > > - return NULL; > + port = node->ports[port_num]; > + if (!port) { > + /* If we have not see this port before create a shell for it */ > + port = node->ports[port_num] = calloc(1, sizeof(*port)); > + port->node = node; > + port->portnum = port_num; > + } > + port->guid = port_guid; > + > + if (rem_node == NULL) /* this is the start node */ > + fabric->from_node = node; > + else { > + /* link ports... */ > + int rem_port_num = get_last_port(&smp->path); > + > + if (!rem_node->ports[rem_port_num]) { > + IBND_ERROR("Internal Error; " > + "Node(%p) %lx Port %d no port > created!?!?!?\n\n", > + rem_node, rem_node->guid, rem_port_num); > + return (-1); > + } > + > + link_ports(node, port, rem_node, rem_node->ports[rem_port_num]); > + } > + > + if (!node_is_new) > + return 0; > + > + query_node_desc(engine, &smp->path, node); > + > + if (node->type == IB_NODE_SWITCH) > + query_switch_info(engine, &smp->path, node); > + > + /* process all the ports on this node */ > + for (i = (node->type == IB_NODE_SWITCH) ? 0 : 1; > + i <= node->numports; i++) { > + query_port_info(engine, &smp->path, node, i); > + } This one. > + > + return 0; > +} [snip] > diff --git a/infiniband-diags/libibnetdisc/src/internal.h > b/infiniband-diags/libibnetdisc/src/internal.h > index 348bd0f..61b644d 100644 > --- a/infiniband-diags/libibnetdisc/src/internal.h > +++ b/infiniband-diags/libibnetdisc/src/internal.h > @@ -39,6 +39,7 @@ > #define _INTERNAL_H_ > > #include <infiniband/ibnetdisc.h> > +#include <complib/cl_qmap.h> I'm not really happy with complib dependency introduction - we had an issues with it in the past. Also I think that we can implement simpler transaction tracker (likely even more efficient) by storing sent MADs in cyclic array and encoding its index as part of TRID. > > #define IBND_DEBUG(fmt, ...) \ > if (ibdebug) { \ > @@ -52,16 +53,44 @@ > > #define MAXHOPS 63 > > -typedef struct ibnd_node_scan { > - ibnd_node_t *node; > - struct ibnd_node_scan *dnext; /* nodesdist next */ > -} ibnd_node_scan_t; > +#define DEFAULT_MAX_SMP_ON_WIRE 2 > > typedef struct ibnd_scan { > - ibnd_node_scan_t *nodesdist[MAXHOPS + 1]; > ib_portid_t selfportid; > + ibnd_fabric_t *fabric; > } ibnd_scan_t; > > + > +typedef struct ibnd_smp ibnd_smp_t; > +typedef struct smp_engine smp_engine_t; > +typedef int (*smp_comp_cb_t)(smp_engine_t *engine, ibnd_smp_t * smp, > + uint8_t *mad_resp, void *cb_data); > +struct ibnd_smp { > + cl_map_item_t on_wire; > + struct ibnd_smp * qnext; > + smp_comp_cb_t cb; > + void * cb_data; > + ib_portid_t path; > + ib_rpc_t rpc; > +}; > +struct smp_engine { > + struct ibmad_port *ibmad_port; > + ibnd_smp_t *smp_queue_head; > + ibnd_smp_t *smp_queue_tail; > + void * user_data; > + cl_qmap_t smps_on_wire; > + int num_smps_outstanding; > + int max_smps_on_wire; > +}; > + > +void smp_engine_init(smp_engine_t * engine, struct ibmad_port *ibmad_port, > + void * user_data, int max_smps_on_wire); > +int issue_smp(smp_engine_t *engine, ib_portid_t * portid, > + unsigned attrid, unsigned mod, > + smp_comp_cb_t cb, void * cb_data); > +int process_mads(smp_engine_t *engine); > +void smp_engine_destroy(smp_engine_t *engine); > + > void add_to_nodeguid_hash(ibnd_node_t * node, ibnd_node_t * hash[]); > > void add_to_portguid_hash(ibnd_port_t * port, ibnd_port_t * hash[]); > diff --git a/infiniband-diags/libibnetdisc/src/query_smp.c > b/infiniband-diags/libibnetdisc/src/query_smp.c > new file mode 100644 > index 0000000..5571314 > --- /dev/null > +++ b/infiniband-diags/libibnetdisc/src/query_smp.c > @@ -0,0 +1,249 @@ > +/* > + * Copyright (c) 2010 Lawrence Livermore National Laboratory > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > + * > + */ > + > +#include <errno.h> > +#include <infiniband/ibnetdisc.h> > +#include <infiniband/umad.h> > +#include "internal.h" > + > +void > +queue_smp(smp_engine_t *engine, ibnd_smp_t *smp) > +{ > + smp->qnext = NULL; > + if (!engine->smp_queue_head) { > + engine->smp_queue_head = smp; > + engine->smp_queue_tail = smp; > + } else { > + engine->smp_queue_tail->qnext = smp; > + engine->smp_queue_tail = smp; > + } > +} > + > +ibnd_smp_t * > +get_smp(smp_engine_t *engine) > +{ > + ibnd_smp_t *head = engine->smp_queue_head; > + ibnd_smp_t *tail = engine->smp_queue_tail; > + ibnd_smp_t *rc = head; > + if (head) { > + if (tail == head) > + engine->smp_queue_tail = NULL; > + engine->smp_queue_head = head->qnext; > + } > + return rc; > +} > + > +int send_smp(ibnd_smp_t * smp, struct ibmad_port *srcport) > +{ > + int rc = 0; > + uint8_t umad[1024]; > + ib_rpc_t *rpc = &smp->rpc; > + > + memset(umad, 0, umad_size() + IB_MAD_SIZE); > + > + if ((rc = mad_build_pkt(umad, &smp->rpc, &smp->path, NULL, NULL)) > + < 0) { > + IBND_ERROR("mad_build_pkt failed; %d", rc); > + return rc; > + } > + > + if ((rc = umad_send(mad_rpc_portid(srcport), > + mad_rpc_class_agent(srcport, rpc->mgtclass), > + umad, IB_MAD_SIZE, > + mad_get_timeout(srcport, rpc->timeout), > + mad_get_retries(srcport))) < 0) { > + IBND_ERROR("send failed; %d", rc); > + return rc; > + } > + > + return 0; > +} > + > +static int process_smp_queue(smp_engine_t *engine) > +{ > + int rc = 0; > + ibnd_smp_t *smp; > + while (cl_qmap_count(&engine->smps_on_wire) < engine->max_smps_on_wire) > { > + smp = get_smp(engine); > + if (!smp) > + return 0; > + > + cl_qmap_insert(&engine->smps_on_wire, (uint32_t)smp->rpc.trid, > + (cl_map_item_t *)smp); > + if ((rc = send_smp(smp, engine->ibmad_port)) != 0) > + return rc; > + } > + return 0; > +} > + > +int issue_smp(smp_engine_t *engine, ib_portid_t * portid, > + unsigned attrid, unsigned mod, > + smp_comp_cb_t cb, void * cb_data) > +{ > + ibnd_smp_t *smp = calloc(1, sizeof *smp); > + if (!smp) { > + IBND_ERROR("OOM"); > + return -ENOMEM; > + } > + > + smp->cb = cb; > + smp->cb_data = cb_data; > + smp->path = *portid; > + smp->rpc.method = IB_MAD_METHOD_GET; > + smp->rpc.attr.id = attrid; > + smp->rpc.attr.mod = mod; > + smp->rpc.timeout = mad_get_timeout(engine->ibmad_port, 0); > + smp->rpc.datasz = IB_SMP_DATA_SIZE; > + smp->rpc.dataoffs = IB_SMP_DATA_OFFS; > + smp->rpc.trid = mad_trid(); BTW, by reviewing mad_trid() code in libibmad: uint64_t mad_trid(void) { static uint64_t base; static uint64_t trid; uint64_t next; if (!base) { srandom((int)time(0) * getpid()); base = random(); trid = random(); } next = ++trid | (base << 32); return next; } For me it doesn't look as thread safe function. > + > + if ((portid->lid <= 0) || > + (portid->drpath.drslid == 0xffff) || > + (portid->drpath.drdlid == 0xffff)) > + smp->rpc.mgtclass = IB_SMI_DIRECT_CLASS; /* direct SMI */ > + else > + smp->rpc.mgtclass = IB_SMI_CLASS; /* Lid routed SMI */ > + > + portid->sl = 0; > + portid->qp = 0; > + > + engine->num_smps_outstanding++; > + queue_smp(engine, smp); > + return (process_smp_queue(engine)); > +} > + > +int process_one_recv(smp_engine_t *engine) > +{ > + int rc = 0; > + int status = 0; > + ibnd_smp_t *smp; > + uint8_t *mad; > + uint32_t trid; > + uint8_t umad[umad_size() + IB_MAD_SIZE]; > + int length = umad_size() + IB_MAD_SIZE; > + > + memset(umad, 0, sizeof(umad)); > + > + /* wait for the next message */ > + if ((rc = umad_recv(mad_rpc_portid(engine->ibmad_port), umad, &length, > + 0)) < 0) { > + if (rc == -EWOULDBLOCK) > + return 0; > + IBND_ERROR("umad_recv failed: %d\n", rc); > + return -1; > + } > + > + rc = process_smp_queue(engine); > + > + mad = umad_get_mad(umad); > + trid = (uint32_t)mad_get_field64(mad, 0, IB_MAD_TRID_F); > + > + smp = (ibnd_smp_t *)cl_qmap_remove(&engine->smps_on_wire, trid); > + if ((cl_map_item_t *)smp == cl_qmap_end(&engine->smps_on_wire)) { > + IBND_ERROR("Failed to find matching smp for trid (%x)\n", > + trid); > + return -1; > + } > + > + if (rc) > + goto error; > + > + if ((status = umad_status(umad))) { > + IBND_ERROR("umad (%s Attr 0x%x:%u) bad status %d; %s\n", > + portid2str(&smp->path), > + smp->rpc.attr.id, smp->rpc.attr.mod, > + status, strerror(status)); > + } else if ((status = mad_get_field(mad, 0, IB_DRSMP_STATUS_F))) { > + IBND_ERROR("mad (%s Attr 0x%x:%u) bad status 0x%x\n", > + portid2str(&smp->path), > + smp->rpc.attr.id, smp->rpc.attr.mod, > + status); > + } else > + rc = smp->cb(engine, smp, mad, smp->cb_data); > + > +error: > + free(smp); > + engine->num_smps_outstanding--; > + return (rc); > +} > + > +void smp_engine_init(smp_engine_t * engine, struct ibmad_port *ibmad_port, > + void * user_data, int max_smps_on_wire) > +{ > + memset(engine, '\0', sizeof(*engine)); > + engine->ibmad_port = ibmad_port; > + engine->user_data = user_data; > + cl_qmap_init(&engine->smps_on_wire); > + engine->num_smps_outstanding = 0; > + engine->max_smps_on_wire = max_smps_on_wire; > +} > + > +void smp_engine_destroy(smp_engine_t *engine) > +{ > + cl_map_item_t *item; > + ibnd_smp_t *smp; > + > + /* remove queued smps */ > + smp = get_smp(engine); > + if (smp) > + IBND_ERROR("outstanding SMP's\n"); > + for (/* */; smp; smp = get_smp(engine)) { > + free(smp); > + } > + > + /* remove smps from the wire queue */ > + item = cl_qmap_head(&engine->smps_on_wire); > + if (item != cl_qmap_end(&engine->smps_on_wire)) > + IBND_ERROR("outstanding SMP's on wire\n"); > + for (/* */; item != cl_qmap_end(&engine->smps_on_wire); > + item = cl_qmap_head(&engine->smps_on_wire)) { > + cl_qmap_remove_item(&engine->smps_on_wire, item); > + free(item); > + } > + > + engine->num_smps_outstanding = 0; > +} > + > +int process_mads(smp_engine_t *engine) > +{ > + int rc = 0; > + while (engine->num_smps_outstanding > 0) { > + if ((rc = process_smp_queue(engine)) != 0) > + return rc; Is it really needed to run process_smp_queue() here (assuming that it is already executed in issue_smp() and process_one_recv())? Or this is just for handling process_one_recv() error case? If so wouldn't it better to run it there? Sasha > + while (!cl_is_qmap_empty(&engine->smps_on_wire)) > + if ((rc = process_one_recv(engine)) != 0) > + return rc; > + } > + return 0; > +} > + > -- > 1.5.4.5 > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html