Allow the librdmacm to contact a service via sockets to obtain
address mapping and path record data.  The use of the service
is controlled through a build option (with-ib_acm).  If the
library fails to contact the service, it falls back to using
the kernel services to resolve address and routing data.

Signed-off-by: Sean Hefty <sean.he...@intel.com>
---
Once IB ACM is proven, the build option can be removed.

 Makefile.am    |    2 -
 configure.in   |   14 +++++
 src/acm.c      |  160 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/addrinfo.c |    3 +
 src/cma.c      |    9 ++-
 src/cma.h      |   13 ++++-
 6 files changed, 197 insertions(+), 4 deletions(-)

diff --git a/Makefile.am b/Makefile.am
index be53c78..8d86045 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -12,7 +12,7 @@ else
     librdmacm_version_script =
 endif
 
-src_librdmacm_la_SOURCES = src/cma.c src/addrinfo.c
+src_librdmacm_la_SOURCES = src/cma.c src/addrinfo.c src/acm.c
 src_librdmacm_la_LDFLAGS = -version-info 1 -export-dynamic \
                           $(librdmacm_version_script)
 src_librdmacm_la_DEPENDENCIES =  $(srcdir)/src/librdmacm.map
diff --git a/configure.in b/configure.in
index 1122966..3db4247 100644
--- a/configure.in
+++ b/configure.in
@@ -21,6 +21,15 @@ if test "$with_valgrind" != "" && test "$with_valgrind" != 
"no"; then
        fi
 fi
 
+AC_ARG_WITH([ib_acm],
+    AC_HELP_STRING([--with-ib_acm],
+                  [Use IB ACM for route resolution - default NO]))
+
+if test "$with_ib_acm" != "" && test "$with_ib_acm" != "no"; then
+       AC_DEFINE([USE_IB_ACM], 1,
+                 [Define to 1 to use IB ACM for endpoint resolution])
+fi
+
 AC_ARG_ENABLE(libcheck, [  --disable-libcheck      do not test for presence of 
ib libraries],
 [       if test "$enableval" = "no"; then
                 disable_libcheck=yes
@@ -51,6 +60,11 @@ AC_CHECK_HEADER(valgrind/memcheck.h, [],
     AC_MSG_ERROR([valgrind requested but <valgrind/memcheck.h> not found.]))
 fi
 
+if test "$with_ib_acm" != "" && test "$with_ib_acm" != "no"; then
+AC_CHECK_HEADER(infiniband/acm.h, [],
+    AC_MSG_ERROR([IB ACM requested but <infiniband/acm.h> not found.]))
+fi
+
 fi
 
 AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script,
diff --git a/src/acm.c b/src/acm.c
new file mode 100644
index 0000000..34fdf3c
--- /dev/null
+++ b/src/acm.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2010 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <unistd.h>
+
+#include "cma.h"
+#include <rdma/rdma_cma.h>
+#include <infiniband/ib.h>
+#include <infiniband/sa.h>
+
+#ifdef USE_IB_ACM
+#include <infiniband/acm.h>
+
+static pthread_mutex_t acm_lock = PTHREAD_MUTEX_INITIALIZER;
+static int sock;
+static short server_port = 6125;
+
+void ucma_ib_init(void)
+{
+       struct sockaddr_in addr;
+       int ret;
+
+       sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+       if (sock < 0)
+               return;
+
+       memset(&addr, 0, sizeof addr);
+       addr.sin_family = AF_INET;
+       addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+       addr.sin_port = htons(server_port);
+       ret = connect(sock, (struct sockaddr *) &addr, sizeof(addr));
+       if (ret)
+               goto err;
+
+       return;
+
+err:
+       close(sock);
+       sock = 0;
+}
+
+void ucma_ib_cleanup(void)
+{
+       if (sock > 0) {
+               shutdown(sock, SHUT_RDWR);
+               close(sock);
+       }
+}
+
+static void ucma_ib_save_resp(struct rdma_addrinfo *rai, struct 
acm_resolve_msg *msg)
+{
+       struct ib_path_data *path_data = NULL;
+       int len, i, cnt;
+
+       len = msg->hdr.length - ACM_MSG_HDR_LENGTH;
+       cnt = len / sizeof(struct acm_ep_addr_data);
+       path_data = malloc(len);
+       if (!path_data)
+               return;
+
+       memcpy(path_data, msg->data, len);
+       for (i = 0; i < cnt; i++) {
+               if (msg->data[i].type != ACM_EP_INFO_PATH)
+                       goto err;
+               path_data[i].reserved = 0;
+       }
+
+       rai->ai_route = path_data;
+       rai->ai_route_len = len;
+       return;
+err:
+       free(path_data);
+}
+
+void ucma_ib_resolve(struct rdma_addrinfo *rai)
+{
+       struct acm_msg msg;
+       struct acm_resolve_msg *resolve_msg = (struct acm_resolve_msg *) &msg;
+       struct acm_ep_addr_data *src_data, *dst_data;
+       int ret;
+
+       if (sock <= 0)
+               return;
+
+       memset(&msg, 0, sizeof msg);
+       msg.hdr.version = ACM_VERSION;
+       msg.hdr.opcode = ACM_OP_RESOLVE;
+       msg.hdr.length = ACM_MSG_HDR_LENGTH + (2 * ACM_MSG_EP_LENGTH);
+
+       src_data = &resolve_msg->data[0];
+       dst_data = &resolve_msg->data[1];
+
+       src_data->flags = ACM_EP_FLAG_SOURCE;
+       dst_data->flags = ACM_EP_FLAG_DEST;
+       if (rai->ai_family == AF_INET) {
+               src_data->type = dst_data->type = ACM_EP_INFO_ADDRESS_IP;
+               memcpy(src_data->info.addr,
+                      &((struct sockaddr_in *) rai->ai_src_addr)->sin_addr, 4);
+               memcpy(dst_data->info.addr,
+                      &((struct sockaddr_in *) rai->ai_dst_addr)->sin_addr, 4);
+       } else {
+               src_data->type = dst_data->type = ACM_EP_INFO_ADDRESS_IP6;
+               memcpy(src_data->info.addr,
+                      &((struct sockaddr_in6 *) rai->ai_src_addr)->sin6_addr, 
16);
+               memcpy(dst_data->info.addr,
+                      &((struct sockaddr_in6 *) rai->ai_dst_addr)->sin6_addr, 
16);
+       }
+       
+       pthread_mutex_lock(&acm_lock);
+       ret = send(sock, (char *) &msg, msg.hdr.length, 0);
+       if (ret != msg.hdr.length) {
+               pthread_mutex_unlock(&acm_lock);
+               return;
+       }
+
+       ret = recv(sock, (char *) &msg, sizeof msg, 0);
+       pthread_mutex_unlock(&acm_lock);
+       if (ret < ACM_MSG_HDR_LENGTH || ret != msg.hdr.length || msg.hdr.status)
+               return;
+
+       ucma_ib_save_resp(rai, resolve_msg);
+}
+
+#endif /* USE_IB_ACM */
diff --git a/src/addrinfo.c b/src/addrinfo.c
index dfaf9d5..dad9b82 100644
--- a/src/addrinfo.c
+++ b/src/addrinfo.c
@@ -219,6 +219,9 @@ int rdma_getaddrinfo(char *node, char *service,
                }
        }
 
+       if (!(rai->ai_flags & RAI_PASSIVE))
+               ucma_ib_resolve(rai);
+
        freeaddrinfo(ai);
        *res = rai;
        return 0;
diff --git a/src/cma.c b/src/cma.c
index 1cc4f1f..6ac949a 100644
--- a/src/cma.c
+++ b/src/cma.c
@@ -149,6 +149,8 @@ int af_ib_support;
 
 static void ucma_cleanup(void)
 {
+       ucma_ib_cleanup();
+
        if (cma_dev_cnt) {
                while (cma_dev_cnt--) {
                        ibv_dealloc_pd(cma_dev_array[cma_dev_cnt].pd);
@@ -196,7 +198,7 @@ int ucma_init(void)
        struct ibv_device **dev_list = NULL;
        struct cma_device *cma_dev;
        struct ibv_device_attr attr;
-       int i, ret, dev_cnt;
+       int i, ret, dev_cnt, ib;
 
        /* Quick check without lock to see if we're already initialized */
        if (cma_dev_cnt)
@@ -225,7 +227,7 @@ int ucma_init(void)
                goto err2;
        }
 
-       for (i = 0; dev_list[i];) {
+       for (i = 0, ib = 0; dev_list[i];) {
                cma_dev = &cma_dev_array[i];
 
                cma_dev->guid = ibv_get_device_guid(dev_list[i]);
@@ -253,8 +255,11 @@ int ucma_init(void)
                cma_dev->port_cnt = attr.phys_port_cnt;
                cma_dev->max_initiator_depth = (uint8_t) 
attr.max_qp_init_rd_atom;
                cma_dev->max_responder_resources = (uint8_t) 
attr.max_qp_rd_atom;
+               ib += (cma_dev->verbs->device->transport_type == 
IBV_TRANSPORT_IB);
        }
 
+       if (ib)
+               ucma_ib_init();
        cma_dev_cnt = dev_cnt;
        pthread_mutex_unlock(&mut);
        ibv_free_device_list(dev_list);
diff --git a/src/cma.h b/src/cma.h
index 06ca38c..62785de 100644
--- a/src/cma.h
+++ b/src/cma.h
@@ -44,6 +44,8 @@
 #include <byteswap.h>
 #include <string.h>
 
+#include <rdma/rdma_cma.h>
+
 #ifdef INCLUDE_VALGRIND
 #   include <valgrind/memcheck.h>
 #   ifndef VALGRIND_MAKE_MEM_DEFINED
@@ -84,5 +86,14 @@ static inline void *zalloc(size_t size)
 
 int ucma_init();
 
-#endif /* CMA_H */
+#ifdef USE_IB_ACM
+void ucma_ib_init();
+void ucma_ib_cleanup();
+void ucma_ib_resolve(struct rdma_addrinfo *rai);
+#else
+#define ucma_ib_init()
+#define ucma_ib_cleanup()
+#define ucma_ib_resolve(x)
+#endif
 
+#endif /* CMA_H */



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to