'subnet_discover' is simple test utility which implements "non-blocking"
discovery method where mads are sending "in parallel" (unlike the
current implementation of 'ibnetdiscover' and similar to how OpenSM
does). For this a recently discovered node id value is encoded as lower
16 bits of mad transaction id.

Signed-off-by: Sasha Khapyorsky <[email protected]>
---

Hi Ira,

On 01:43 Sat 24 Oct     , Sasha Khapyorsky wrote:
> > 
> > Current Master:        Threaded version:
> > real    0m9.149s        0m2.223s
> > user    0m0.016s        0m0.374s
> > sys     0m0.372s        0m1.056s
> > 
> > With that in mind...
> 
> Good. So what do you think due to which factor most of this performance
> gain was achieved? Due to using multiple threads or due to SMP queries
> parallelism? I would suspect that it is a parallelism.

For some purposes in ibsim/tests I wrote a simple utility
'subnet_discover', this works as single thread and utilizes a "parallel"
mad sending method and also uses libibumad for all mad
sending/receiving stuff.

I think that similar implementation in libibnetdisc (I can do it if we
are in agreement :)) would improve its performance.

Would you like to look at this?

Sasha

 tests/Makefile          |    2 +-
 tests/subnet_discover.c |  495 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 496 insertions(+), 1 deletions(-)
 create mode 100644 tests/subnet_discover.c

diff --git a/tests/Makefile b/tests/Makefile
index dd4cd55..bd415d8 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,4 +1,4 @@
-progs:= mcast_storm
+progs:= subnet_discover mcast_storm
 
 -include ../defs.mk
 
diff --git a/tests/subnet_discover.c b/tests/subnet_discover.c
new file mode 100644
index 0000000..a577cc7
--- /dev/null
+++ b/tests/subnet_discover.c
@@ -0,0 +1,495 @@
+/*
+ * Copyright (c) 2009 Voltaire, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <getopt.h>
+
+#include <infiniband/umad.h>
+#include <infiniband/mad.h>
+
+struct port {
+       struct node *node;
+       uint64_t guid;
+       struct port *remote;
+       uint8_t port_info[IB_SMP_DATA_SIZE];
+};
+
+struct node {
+       uint64_t guid;
+       unsigned num_ports;
+       unsigned is_switch;
+       uint8_t node_info[IB_SMP_DATA_SIZE];
+       uint8_t node_desc[IB_SMP_DATA_SIZE];
+       uint8_t switch_info[IB_SMP_DATA_SIZE];
+       struct port ports[];
+};
+
+static struct node *node_array[32 * 1024];
+static unsigned node_count = 0;
+static unsigned trid_cnt = 0;
+static unsigned outstanding = 0;
+static unsigned timeout = 100;
+static unsigned retries = 3;
+static unsigned verbose = 0;
+
+#define ERROR(fmt, ...) fprintf(stderr, "ERR: " fmt, ##__VA_ARGS__)
+#define VERBOSE(fmt, ...) if (verbose) fprintf(stderr, fmt, ##__VA_ARGS__)
+#define NOISE(fmt, ...) if (verbose > 1) fprintf(stderr, fmt, ##__VA_ARGS__)
+
+static const char *print_path(uint8_t path[], size_t path_cnt)
+{
+       static char buf[256];
+       int i, n = 0;
+       for (i = 0; i <= path_cnt; i++)
+               n += snprintf(buf + n, sizeof(buf) - n, "%u,", path[i]);
+       buf[n] = '\0';
+       return buf;
+}
+
+#define DBG_DUMP_FUNC(name) static void dbg_dump_##name(void *data) \
+{ \
+       char buf[2048]; \
+       mad_dump_##name(buf, sizeof(buf), data, IB_SMP_DATA_SIZE); \
+       NOISE("### "#name":\n%s\n", buf); \
+}
+
+DBG_DUMP_FUNC(nodeinfo);
+DBG_DUMP_FUNC(nodedesc);
+DBG_DUMP_FUNC(portinfo);
+DBG_DUMP_FUNC(switchinfo);
+
+static void build_umad_req(void *umad, uint8_t * path, unsigned path_cnt,
+                          uint64_t trid, uint8_t method,
+                          uint16_t attr_id, uint32_t attr_mod, uint64_t mkey)
+{
+       void *mad = umad_get_mad(umad);
+
+       memset(umad, 0, umad_size() + IB_MAD_SIZE);
+       umad_set_addr(umad, 0xffff, 0, 0, 0);
+       mad_set_field(mad, 0, IB_MAD_METHOD_F, method);
+       mad_set_field(mad, 0, IB_MAD_CLASSVER_F, 1);
+       mad_set_field(mad, 0, IB_MAD_MGMTCLASS_F, IB_SMI_DIRECT_CLASS);
+       mad_set_field(mad, 0, IB_MAD_BASEVER_F, 1);
+       mad_set_field(mad, 0, IB_DRSMP_HOPCNT_F, path_cnt);
+       mad_set_field(mad, 0, IB_DRSMP_HOPPTR_F, 0);
+       mad_set_field64(mad, 0, IB_MAD_TRID_F, trid);
+       mad_set_field(mad, 0, IB_DRSMP_DRDLID_F, 0xffff);
+       mad_set_field(mad, 0, IB_DRSMP_DRSLID_F, 0xffff);
+       mad_set_array(mad, 0, IB_DRSMP_PATH_F, path);
+       mad_set_field(mad, 0, IB_MAD_ATTRID_F, attr_id);
+       mad_set_field(mad, 0, IB_MAD_ATTRMOD_F, attr_mod);
+       mad_set_field64(mad, 0, IB_MAD_MKEY_F, mkey);
+}
+
+static int send_query(int fd, int agent, void *umad, unsigned node_id,
+                     uint8_t * path, size_t path_cnt, uint16_t attr_id,
+                     uint32_t attr_mod)
+{
+       uint64_t trid;
+       int ret;
+
+       trid = (trid_cnt++ << 16) | (node_id & 0xffff);
+       build_umad_req(umad, path, path_cnt, trid, IB_MAD_METHOD_GET, attr_id,
+                      attr_mod, 0);
+
+       ret = umad_send(fd, agent, umad, IB_MAD_SIZE, timeout, retries);
+       if (ret < 0) {
+               ERROR("umad_send failed: trid 0x%016" PRIx64
+                     ", attr_id %x, attr_mod %x: %s\n",
+                     trid, attr_id, attr_mod, strerror(errno));
+               return -1;
+       }
+
+       outstanding++;
+
+       VERBOSE("send %016" PRIx64 ": attr %x, mod %x to %s\n", trid, attr_id,
+               attr_mod, print_path(path, path_cnt));
+
+       return ret;
+}
+
+static int recv_response(int fd, int agent, uint8_t * umad, size_t length)
+{
+       int len = length, ret;
+
+       do {
+               ret = umad_recv(fd, umad, &len, timeout);
+       } while (ret >= 0 && ret != agent);
+
+       if (ret < 0 || umad_status(umad)) {
+               ERROR("umad_recv failed: umad status %x: %s\n",
+                     umad_status(umad), strerror(errno));
+               return -1;
+       }
+
+       return ret;
+}
+
+static int query_node_info(int fd, int agent, void *umad, unsigned node_id,
+                          uint8_t * path, size_t path_cnt)
+{
+       return send_query(fd, agent, umad, node_id, path, path_cnt,
+                         IB_ATTR_NODE_INFO, 0);
+}
+
+static int query_node_desc(int fd, int agent, void *umad, unsigned node_id,
+                          uint8_t * path, size_t path_cnt)
+{
+       return send_query(fd, agent, umad, node_id, path, path_cnt,
+                         IB_ATTR_NODE_DESC, 0);
+}
+
+static int query_switch_info(int fd, int agent, void *umad, unsigned node_id,
+                            uint8_t * path, size_t path_cnt)
+{
+       return send_query(fd, agent, umad, node_id, path, path_cnt,
+                         IB_ATTR_SWITCH_INFO, 0);
+}
+
+static int query_port_info(int fd, int agent, void *umad, unsigned node_id,
+                          uint8_t * path, size_t path_cnt, unsigned port_num)
+{
+       return send_query(fd, agent, umad, node_id, path, path_cnt,
+                         IB_ATTR_PORT_INFO, port_num);
+}
+
+static int add_node(uint8_t * node_info)
+{
+       struct node *node;
+       unsigned i, num_ports = mad_get_field(node_info, 0, IB_NODE_NPORTS_F);
+
+       node = malloc(sizeof(*node) + (num_ports + 1) * sizeof(node->ports[0]));
+       if (!node)
+               return -1;
+       memset(node, 0,
+              sizeof(*node) + (num_ports + 1) * sizeof(node->ports[0]));
+
+       node->num_ports = num_ports;
+       node->guid = mad_get_field64(node_info, 0, IB_NODE_GUID_F);
+       node->is_switch = ((mad_get_field(node_info, 0, IB_NODE_TYPE_F)) ==
+                          IB_NODE_SWITCH);
+       memcpy(node->node_info, node_info, sizeof(node->node_info));
+       for (i = 0; i <= num_ports; i++)
+               node->ports[i].node = node;
+
+       node_array[node_count] = node;
+
+       return node_count++;
+}
+
+static int find_node(uint8_t * node_info)
+{
+       uint64_t guid = mad_get_field64(node_info, 0, IB_NODE_GUID_F);
+       unsigned i;
+
+       for (i = 0; i < node_count; i++)
+               if (node_array[i]->guid == guid)
+                       return i;
+       return -1;
+}
+
+static int process_port_info(void *umad, unsigned node_id, int fd, int agent,
+                            uint8_t path[], size_t path_cnt)
+{
+       struct node *node = node_array[node_id];
+       struct port *port;
+       uint8_t *port_info = umad + umad_size() + IB_SMP_DATA_OFFS;
+       unsigned port_num, local_port;
+
+       dbg_dump_portinfo(port_info);
+
+       port_num = mad_get_field(umad_get_mad(umad), 0, IB_MAD_ATTRMOD_F);
+       local_port = mad_get_field(port_info, 0, IB_PORT_LOCAL_PORT_F);
+
+       port = &node->ports[port_num];
+       memcpy(port->port_info, port_info, sizeof(port->port_info));
+
+       if (port_num &&
+           mad_get_field(port_info, 0, IB_PORT_PHYS_STATE_F) == 5 &&
+           ((node->is_switch && port_num != local_port) ||
+            (node_id == 0 && port_num == local_port))) {
+               path[++path_cnt] = port_num;
+               return query_node_info(fd, agent, umad, node_id, path,
+                                      path_cnt);
+       }
+
+       return 0;
+}
+
+static int process_switch_info(unsigned node_id, uint8_t * switch_info)
+{
+       struct node *node = node_array[node_id];
+
+       dbg_dump_switchinfo(switch_info);
+       memcpy(node->switch_info, switch_info, sizeof(node->switch_info));
+
+       return 0;
+}
+
+static int process_node_desc(unsigned node_id, uint8_t * node_desc)
+{
+       struct node *node = node_array[node_id];
+
+       dbg_dump_nodedesc(node_desc);
+       memcpy(node->node_desc, node_desc, sizeof(node->node_desc));
+
+       return 0;
+}
+
+static void connect_ports(unsigned node1_id, unsigned port1_num,
+                         unsigned node2_id, unsigned port2_num)
+{
+       struct port *port1 = &node_array[node1_id]->ports[port1_num];
+       struct port *port2 = &node_array[node2_id]->ports[port2_num];
+       VERBOSE("connecting %u:%u <--> %u:%u\n",
+               node1_id, port1_num, node2_id, port2_num);
+       port1->remote = port2;
+       port2->remote = port1;
+}
+
+static int process_node(void *umad, unsigned remote_id, int fd, int agent,
+                       uint8_t path[], size_t path_cnt)
+{
+       struct node *node;
+       uint8_t *node_info = umad_get_mad(umad) + IB_SMP_DATA_OFFS;
+       unsigned port_num = mad_get_field(node_info, 0, IB_NODE_LOCAL_PORT_F);
+       unsigned node_is_new = 0;
+       int i, id;
+
+       dbg_dump_nodeinfo(node_info);
+
+       if ((id = find_node(node_info)) < 0) {
+               id = add_node(node_info);
+               if (id < 0)
+                       return -1;
+               node_is_new = 1;
+       }
+
+       node = node_array[id];
+
+       node->ports[port_num].guid =
+           mad_get_field64(node_info, 0, IB_NODE_PORT_GUID_F);
+
+       if (id)                 /* skip connect for very first node */
+               connect_ports(id, port_num, remote_id, path[path_cnt]);
+
+       if (!node_is_new)
+               return 0;
+
+       query_node_desc(fd, agent, umad, id, path, path_cnt);
+
+       if (node->is_switch)
+               query_switch_info(fd, agent, umad, id, path, path_cnt);
+
+       for (i = !node->is_switch; i <= node->num_ports; i++)
+               query_port_info(fd, agent, umad, id, path, path_cnt, i);
+
+       return 0;
+}
+
+static int recv_smp_resp(int fd, int agent, uint8_t * umad, uint8_t path[])
+{
+       void *mad;
+       uint64_t trid;
+       uint8_t method;
+       uint16_t status;
+       uint16_t attr_id;
+       uint32_t attr_mod;
+       size_t path_cnt;
+       unsigned node_id;
+       int ret;
+
+       ret = recv_response(fd, agent, umad, IB_MAD_SIZE);
+
+       mad = umad_get_mad(umad);
+       status = mad_get_field(mad, 0, IB_DRSMP_STATUS_F);
+       method = mad_get_field(mad, 0, IB_MAD_METHOD_F);
+       trid = mad_get_field64(mad, 0, IB_MAD_TRID_F);
+       attr_id = mad_get_field(mad, 0, IB_MAD_ATTRID_F);
+       attr_mod = mad_get_field(mad, 0, IB_MAD_ATTRMOD_F);
+       path_cnt = mad_get_field(mad, 0, IB_DRSMP_HOPCNT_F);
+       mad_get_array(mad, 0, IB_DRSMP_PATH_F, path);
+
+       if (method != IB_MAD_METHOD_GET)
+               return 0;
+
+       outstanding--;
+
+       if (ret < 0 || status) {
+               ERROR("error response 0x%016" PRIx64 ": attr_id %x"
+                     ", attr_mod %x from %s with status %x\n", trid,
+                     attr_id, attr_mod, print_path(path, path_cnt), status);
+               return -1;
+       }
+
+       node_id = trid & 0xffff;
+
+       VERBOSE("recv %016" PRIx64 ": attr %x, mod %x from %s\n", trid, attr_id,
+               attr_mod, print_path(path, path_cnt));
+
+       switch (attr_id) {
+       case IB_ATTR_NODE_INFO:
+               process_node(umad, node_id, fd, agent, path, path_cnt);
+               break;
+       case IB_ATTR_NODE_DESC:
+               process_node_desc(node_id, mad + IB_SMP_DATA_OFFS);
+               break;
+       case IB_ATTR_SWITCH_INFO:
+               process_switch_info(node_id, mad + IB_SMP_DATA_OFFS);
+               break;
+       case IB_ATTR_PORT_INFO:
+               process_port_info(umad, node_id, fd, agent, path, path_cnt);
+               break;
+       default:
+               VERBOSE("unsolicited response 0x%016" PRIx64 ": attr_id %x"
+                       ", attr_mod %x\n", trid, attr_id, attr_mod);
+               return 0;
+       }
+
+       return ret;
+}
+
+static int discovery(int fd, int agent)
+{
+       uint8_t path[64] = { 0 };
+       void *umad;
+       int ret;
+
+       umad = malloc(IB_MAD_SIZE + umad_size());
+       if (!umad)
+               return -ENOMEM;
+
+       ret = query_node_info(fd, agent, umad, 0, path, 0);
+       if (ret < 0)
+               return ret;
+
+       while (outstanding)
+               if (recv_smp_resp(fd, agent, umad, path))
+                       ret = 1;
+
+       free(umad);
+
+       return ret;
+}
+
+static int umad_discovery(char *card_name, unsigned int port_num)
+{
+       int fd, agent, ret;
+
+       ret = umad_init();
+       if (ret) {
+               ERROR("cannot init umad\n");
+               return -1;
+       }
+
+       fd = umad_open_port(card_name, port_num);
+       if (fd < 0) {
+               ERROR("cannot open umad port %s:%u: %s\n",
+                     card_name ? card_name : "NULL", port_num,
+                     strerror(errno));
+               return -1;
+       }
+
+       agent = umad_register(fd, IB_SMI_DIRECT_CLASS, 1, 0, NULL);
+       if (agent < 0) {
+               ERROR("cannot register SMI DR class for umad port %s:%u: %s\n",
+                     card_name ? card_name : "NULL", port_num,
+                     strerror(errno));
+               return -1;
+       }
+
+       ret = discovery(fd, agent);
+       if (ret)
+               ERROR("Failed to discover.\n");
+
+       umad_unregister(fd, agent);
+       umad_close_port(fd);
+
+       umad_done();
+
+       return ret;
+}
+
+static void print_subnet()
+{
+       struct node *node;
+       struct port *local, *remote;
+       unsigned i, j;
+
+       for (i = 0; i < node_count; i++) {
+               node = node_array[i];
+               printf("%s %u \"%s-%016" PRIx64 "\" \t# %s\n",
+                      node->is_switch ? "Switch" : "Ca", node->num_ports,
+                      node->is_switch ? "S" : "H", node->guid,
+                      node->node_desc);
+               for (j = 1; j <= node->num_ports; j++) {
+                       local = &node->ports[j];
+                       remote = local->remote;
+                       if (!remote)
+                               continue;
+                       printf("[%u] \t\"%s-%016" PRIx64 "\"[%lu] \t# %s\n", j,
+                              remote->node->is_switch ? "S" : "H",
+                              remote->node->guid, remote - remote->node->ports,
+                              remote->node->node_desc);
+               }
+               printf("\n");
+       }
+}
+
+int main(int argc, char **argv)
+{
+       const struct option long_opts[] = {
+               {"Card", 1, 0, 'C'},
+               {"Port", 1, 0, 'P'},
+               {"timeout", 1, 0, 't'},
+               {"retries", 1, 0, 'r'},
+               {}
+       };
+       char *card_name = NULL;
+       unsigned int port_num = 0;
+       int ch, ret;
+
+       while (1) {
+               ch = getopt_long(argc, argv, "C:P:t:r:v", long_opts, NULL);
+               if (ch == -1)
+                       break;
+               switch (ch) {
+               case 'C':
+                       card_name = optarg;
+                       break;
+               case 'P':
+                       port_num = strtoul(optarg, NULL, 0);
+                       break;
+               case 't':
+                       timeout = strtoul(optarg, NULL, 0);
+                       break;
+               case 'r':
+                       retries = strtoul(optarg, NULL, 0);
+                       break;
+               case 'v':
+                       verbose++;
+                       break;
+               default:
+                       printf("usage: %s [-C card_name] [-P port_num]"
+                              " [-t timeout] [-r retries] [-v[v]]\n", argv[0]);
+                       exit(2);
+                       break;
+               }
+       }
+
+       ret = umad_discovery(card_name, port_num);
+
+       print_subnet();
+
+       return ret;
+}
-- 
1.6.6.rc3

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to