Hi,

We indeed have a fix for XRC support on our branch at Bull and sorry I
neglected to contribute it, my bad…

I join here the patch on top of current v1.6.6 (should I rather
submit it as a pull request ?).

For v1.8+, a merge of the v1.6 code is not enough as openib connect
changed from xoob to udcm. I made a version on a pre-git state, so I
will update it and make a pull request.

Piotr




________________________________________
De : devel [devel-boun...@open-mpi.org] de la part de Gilles Gouaillardet 
[gilles.gouaillar...@iferc.org]
Envoyé : lundi 8 décembre 2014 03:27
À : Open MPI Developers
Objet : Re: [OMPI devel] openmpi and XRC API from ofed-3.12

Hi Piotr,

this  is quite an old thread now, but i did not see any support for XRC
with ofed 3.12 yet
(nor in trunk nor in v1.8)

my understanding is that Bull already did something similar for the v1.6
series,
so let me put this the other way around :

does Bull have any plan to contribute this work ?
(for example, publish a patch for the v1.6 series, or submit pull
request(s) for master and v1.8 branch)

Cheers,

Gilles

On 2014/04/23 21:58, Piotr Lesnicki wrote:
> Hi,
>
> In OFED-3.12 the API for XRC has changed. I did not find
> corresponding changes in Open MPI: for example the function
> 'ibv_create_xrc_rcv_qp()' queried in openmpi configure script no
> longer exists in ofed-3.12-rc1.
>
> Are there any plans to support the new XRC API ?
>
>
> --
> Piotr
> _______________________________________________
> devel mailing list
> de...@open-mpi.org
> Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/devel
> Link to this post:
> http://www.open-mpi.org/community/lists/devel/2014/04/14583.php

_______________________________________________
devel mailing list
de...@open-mpi.org
Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/devel
Link to this post: 
http://www.open-mpi.org/community/lists/devel/2014/12/16445.php
diff --git a/ompi/config/ompi_check_openib.m4 b/ompi/config/ompi_check_openib.m4
index 187356f..97ee8fb 100644
--- a/ompi/config/ompi_check_openib.m4
+++ b/ompi/config/ompi_check_openib.m4
@@ -15,6 +15,7 @@
 #                         reserved.
 # Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved.
 # Copyright (c) 2010-2012 Oracle and/or its affiliates.  All rights reserved.
+# Copyright (c) 2014      Bull SAS.  All rights reserved.
 # $COPYRIGHT$
 # 
 # Additional copyrights may follow
@@ -175,6 +176,7 @@ AC_DEFUN([OMPI_CHECK_OPENIB],[
     # (unconditionally)
     $1_have_xrc=0
     $1_have_rdmacm=0
+    $1_have_xrc_connectib=0
     $1_have_opensm_devel=0
 
     # If we have the openib stuff available, find out what we've got
@@ -188,10 +190,15 @@ AC_DEFUN([OMPI_CHECK_OPENIB],[
                             [#include <infiniband/verbs.h>])
 
            # ibv_create_xrc_rcv_qp was added in OFED 1.3
+	   # ibv_open_xrcd was added in  OFED 3.12 (new API)
            if test "$enable_connectx_xrc" = "yes"; then
-               AC_CHECK_FUNCS([ibv_create_xrc_rcv_qp], [$1_have_xrc=1])
+               AC_CHECK_FUNCS([ibv_create_xrc_rcv_qp ibv_cmd_open_xrcd], [$1_have_xrc=1])
+           fi
+           if test "$enable_connectx_xrc" = "yes"; then
+               AC_CHECK_FUNCS([ibv_cmd_open_xrcd], [$1_have_xrc_connectib=1])
            fi
 
+
            if test "no" != "$enable_openib_dynamic_sl"; then
                # We need ib_types.h file, which is installed with opensm-devel
                # package. However, ib_types.h has a bad include directive,
@@ -279,6 +286,15 @@ AC_DEFUN([OMPI_CHECK_OPENIB],[
         AC_MSG_RESULT([no])
     fi
 
+    AC_MSG_CHECKING([if ConnectIB XRC support is enabled])
+    AC_DEFINE_UNQUOTED([OMPI_HAVE_CONNECTIB_XRC], [$$1_have_xrc_connectib],
+        [Enable features required for ConnectIB XRC support])
+    if test "1" = "$$1_have_xrc_connectib"; then
+        AC_MSG_RESULT([yes])
+    else
+        AC_MSG_RESULT([no])
+    fi
+
     AC_MSG_CHECKING([if dynamic SL is enabled])
     AC_DEFINE_UNQUOTED([OMPI_ENABLE_DYNAMIC_SL], [$$1_have_opensm_devel],
         [Enable features required for dynamic SL support])
diff --git a/ompi/mca/btl/openib/btl_openib.c b/ompi/mca/btl/openib/btl_openib.c
index 8a9d942..80f833b 100644
--- a/ompi/mca/btl/openib/btl_openib.c
+++ b/ompi/mca/btl/openib/btl_openib.c
@@ -17,6 +17,7 @@
  * Copyright (c) 2006-2007 Voltaire All rights reserved.
  * Copyright (c) 2008-2012 Oracle and/or its affiliates.  All rights reserved.
  * Copyright (c) 2009      IBM Corporation.  All rights reserved.
+ * Copyright (c) 2014      Bull SAS.  All rights reserved
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -323,10 +324,26 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
             openib_btl->qps[qp].u.srq_qp.rd_posted = 0;
 #if HAVE_XRC
             if(BTL_OPENIB_QP_TYPE_XRC(qp)) {
+#if OMPI_HAVE_CONNECTIB_XRC
+		struct ibv_srq_init_attr_ex attr_ex;
+		memset(&attr_ex, 0, sizeof(struct ibv_srq_init_attr_ex));
+		attr_ex.attr.max_wr = attr.attr.max_wr;
+		attr_ex.attr.max_sge = attr.attr.max_sge;
+		attr_ex.comp_mask = IBV_SRQ_INIT_ATTR_TYPE | IBV_SRQ_INIT_ATTR_XRCD |
+		    IBV_SRQ_INIT_ATTR_CQ | IBV_SRQ_INIT_ATTR_PD;
+		attr_ex.srq_type = IBV_SRQT_XRC;
+		attr_ex.xrcd = openib_btl->device->xrcd;
+		attr_ex.cq = openib_btl->device->ib_cq[qp_cq_prio(qp)];
+		attr_ex.pd = openib_btl->device->ib_pd;
+
+		openib_btl->qps[qp].u.srq_qp.srq =
+		    ibv_create_srq_ex(openib_btl->device->ib_dev_context, &attr_ex);
+#else
                 openib_btl->qps[qp].u.srq_qp.srq =
                     ibv_create_xrc_srq(openib_btl->device->ib_pd,
                             openib_btl->device->xrc_domain,
                             openib_btl->device->ib_cq[qp_cq_prio(qp)], &attr);
+#endif
             } else
 #endif
             {
@@ -1755,8 +1772,12 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
     to_com_frag(frag)->endpoint = ep;
 #if HAVE_XRC
     if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp))
+#if OMPI_HAVE_CONNECTIB_XRC
+        frag->sr_desc.qp_type.xrc.remote_srqn=ep->rem_info.rem_srqs[qp].rem_srq_num;
+#else
         frag->sr_desc.xrc_remote_srq_num=ep->rem_info.rem_srqs[qp].rem_srq_num;
 #endif
+#endif
 
     descriptor->order = qp;
     /* Setting opcode on a frag constructor isn't enough since prepare_src
@@ -1839,8 +1860,12 @@ int mca_btl_openib_get(mca_btl_base_module_t* btl,
 
 #if HAVE_XRC
     if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp))
+#if OMPI_HAVE_CONNECTIB_XRC
+        frag->sr_desc.qp_type.xrc.remote_srqn=ep->rem_info.rem_srqs[qp].rem_srq_num;
+#else
         frag->sr_desc.xrc_remote_srq_num=ep->rem_info.rem_srqs[qp].rem_srq_num;
 #endif
+#endif
     descriptor->order = qp;
 
     qp_inflight_wqe_to_frag(ep, qp, to_com_frag(frag));
diff --git a/ompi/mca/btl/openib/btl_openib.h b/ompi/mca/btl/openib/btl_openib.h
index a685ef4..104b897 100644
--- a/ompi/mca/btl/openib/btl_openib.h
+++ b/ompi/mca/btl/openib/btl_openib.h
@@ -16,6 +16,7 @@
  *                         reserved.
  * Copyright (c) 2006-2007 Voltaire All rights reserved.
  * Copyright (c) 2009-2010 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2014      Bull SAS.  All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -150,6 +151,12 @@ typedef struct mca_btl_openib_srq_manager_t {
 } mca_btl_openib_srq_manager_t;
 #endif
 
+typedef enum {
+	MCA_BTL_IB_XRC_API_NONE,
+	MCA_BTL_IB_XRC_API_BETA,
+	MCA_BTL_IB_XRC_API_OFED_3_12
+} mca_btl_openib_xrc_api_t;
+
 struct mca_btl_openib_component_t {
     mca_btl_base_component_2_0_0_t          super;  /**< base BTL component */
 
@@ -297,6 +304,10 @@ struct mca_btl_openib_component_t {
     char* default_recv_qps;
     /** GID index to use */
     int gid_index;
+    int xrc_enable_warning;
+#if HAVE_XRC
+    mca_btl_openib_xrc_api_t xrc_api_version;
+#endif
     /** Whether we want a dynamically resizing srq, enabled by default */
     bool enable_srq_resize;
 #if BTL_OPENIB_FAILOVER_ENABLED
@@ -383,7 +394,11 @@ typedef struct mca_btl_openib_device_t {
     volatile bool got_port_event;
 #endif
 #if HAVE_XRC
+#if OMPI_HAVE_CONNECTIB_XRC
+    struct ibv_xrcd *xrcd;
+#else
     struct ibv_xrc_domain *xrc_domain;
+#endif
     int xrc_fd;
 #endif
     int32_t non_eager_rdma_endpoints;
diff --git a/ompi/mca/btl/openib/btl_openib_async.c b/ompi/mca/btl/openib/btl_openib_async.c
index 1cf9d5b..0763a8f 100644
--- a/ompi/mca/btl/openib/btl_openib_async.c
+++ b/ompi/mca/btl/openib/btl_openib_async.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2007-2009 Cisco Systems, Inc.  All rights reserved.
  * Copyright (c) 2006-2007 Voltaire All rights reserved.
  * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved
+ * Copyright (c) 2014      Bull SAS.  All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -115,7 +116,11 @@ static mca_btl_openib_endpoint_t * xrc_qp2endpoint(uint32_t qp_num, mca_btl_open
     int  ep_i;
     for(ep_i = 0; ep_i < opal_pointer_array_get_size(device->endpoints); ep_i++) {
         ep = opal_pointer_array_get_item(device->endpoints, ep_i);
+#if OMPI_HAVE_CONNECTIB_XRC
+        if (qp_num == ep->xrc_recv_qp->qp_num)
+#else
         if (qp_num == ep->xrc_recv_qp_num)
+#endif
             return ep;
     }
     return NULL;
@@ -316,12 +321,20 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
         event_type = event.event_type;
 #if HAVE_XRC
         /* is it XRC event ?*/
+#if OMPI_HAVE_CONNECTIB_XRC
+        if (event.element.qp &&
+            (IBV_QPT_XRC_RECV == event.element.qp->qp_type
+             || IBV_QPT_XRC_SEND == event.element.qp->qp_type)) {
+            xrc_event = true;
+        }
+#else
         if (IBV_XRC_QP_EVENT_FLAG & event.event_type) {
             xrc_event = true;
             /* Clean the bitnd handel as usual */
             event_type ^= IBV_XRC_QP_EVENT_FLAG;
         }
 #endif
+#endif
         switch(event_type) {
             case IBV_EVENT_PATH_MIG:
                 BTL_ERROR(("Alternative path migration event reported"));
@@ -331,10 +344,16 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
                         mca_btl_openib_load_apm(event.element.qp,
                                 qp2endpoint(event.element.qp, device));
 #if HAVE_XRC
+#if OMPI_HAVE_CONNECTIB_XRC
+                    else
+                        mca_btl_openib_load_apm(event.element.qp,
+                                xrc_qp2endpoint(event.element.qp->qp_num, device));
+#else
                     else
                         mca_btl_openib_load_apm_xrc_rcv(event.element.xrc_qp_num,
                                 xrc_qp2endpoint(event.element.xrc_qp_num, device));
 #endif
+#endif
                 }
                 break;
             case IBV_EVENT_DEVICE_FATAL:
@@ -584,7 +603,7 @@ void mca_btl_openib_load_apm(struct ibv_qp *qp, mca_btl_openib_endpoint_t *ep)
                    qp->qp_num, strerror(errno), errno));
 }
 
-#if HAVE_XRC
+#if HAVE_XRC && ! OMPI_HAVE_CONNECTIB_XRC
 void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t *ep)
 {
     struct ibv_qp_init_attr qp_init_attr;
@@ -614,6 +633,7 @@ void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t
     }
 
     ibv_modify_xrc_rcv_qp(btl->device->xrc_domain, qp_num, &attr, mask);
+
     /* Maybe the qp already was modified by other process - ignoring error */
 }
 #endif
diff --git a/ompi/mca/btl/openib/btl_openib_async.h b/ompi/mca/btl/openib/btl_openib_async.h
index f35694b..8eda380 100644
--- a/ompi/mca/btl/openib/btl_openib_async.h
+++ b/ompi/mca/btl/openib/btl_openib_async.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2014      Bull SAS.  All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -16,7 +17,7 @@
 void*      btl_openib_async_thread(void *one_hca);
 void       mca_btl_openib_load_apm(struct ibv_qp *qp, mca_btl_openib_endpoint_t *ep);
 int        btl_openib_async_command_done(int exp);
-#if HAVE_XRC
+#if HAVE_XRC && ! OMPI_HAVE_CONNECTIB_XRC
 void       mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t *ep);
 #endif
 
diff --git a/ompi/mca/btl/openib/btl_openib_component.c b/ompi/mca/btl/openib/btl_openib_component.c
index 6e61b44..e8f2361 100644
--- a/ompi/mca/btl/openib/btl_openib_component.c
+++ b/ompi/mca/btl/openib/btl_openib_component.c
@@ -16,6 +16,7 @@
  *                         reserved.
  * Copyright (c) 2006-2007 Voltaire All rights reserved.
  * Copyright (c) 2009-2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2014      Bull SAS.  All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -1133,6 +1134,27 @@ static int prepare_device_for_use(mca_btl_openib_device_t *device)
         return OMPI_ERROR;
     }
 
+
+    mca_btl_openib_component.xrc_api_version = mca_btl_openib_xrc_api();
+    opal_output_verbose(5, mca_btl_base_output, "openib BTL detected XRC API: %s\n",
+                mca_btl_openib_xrc_api_str(mca_btl_openib_component.xrc_api_version));
+    if (MCA_BTL_XRC_ENABLED) {
+#if ! OMPI_HAVE_CONNECTIB_XRC
+        if (mca_btl_openib_component.xrc_api_version != MCA_BTL_IB_XRC_API_BETA) {
+            BTL_ERROR(("XRC error: bad XRC API (compiled with %s api).",
+                       mca_btl_openib_xrc_api_str(MCA_BTL_IB_XRC_API_OFED_3_12)));
+            return OMPI_ERROR;
+        }
+#else
+        if (mca_btl_openib_component.xrc_api_version != MCA_BTL_IB_XRC_API_OFED_3_12) {
+            BTL_ERROR(("XRC error: bad XRC API (compiled with %s api).",
+                       mca_btl_openib_xrc_api_str(MCA_BTL_IB_XRC_API_OFED_3_12)));
+            return OMPI_ERROR;
+        }
+#endif
+    }
+
+
     if (MCA_BTL_XRC_ENABLED) {
         if (OMPI_SUCCESS != mca_btl_openib_open_xrc_domain(device)) {
             BTL_ERROR(("XRC Internal error. Failed to open xrc domain"));
diff --git a/ompi/mca/btl/openib/btl_openib_endpoint.c b/ompi/mca/btl/openib/btl_openib_endpoint.c
index d99b7b9..8f20e46 100644
--- a/ompi/mca/btl/openib/btl_openib_endpoint.c
+++ b/ompi/mca/btl/openib/btl_openib_endpoint.c
@@ -17,6 +17,7 @@
  * Copyright (c) 2006-2009 Mellanox Technologies, Inc.  All rights reserved.
  * Copyright (c) 2009      IBM Corporation.  All rights reserved.
  * Copyright (c) 2010-2011 Oracle and/or its affiliates.  All rights reserved
+ * Copyright (c) 2014      Bull SAS.  All rights reserved.
  *
  * $COPYRIGHT$
  *
@@ -346,7 +347,11 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
     }
 
     endpoint->ib_addr = NULL;
+#if OMPI_HAVE_CONNECTIB_XRC
+    endpoint->xrc_recv_qp = NULL;
+#else
     endpoint->xrc_recv_qp_num = 0;
+#endif
     endpoint->endpoint_btl = 0;
     endpoint->endpoint_proc = 0;
     endpoint->endpoint_local_cpc = NULL;
@@ -457,12 +462,24 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
 
     /* unregister xrc recv qp */
 #if HAVE_XRC
+#if ! OMPI_HAVE_CONNECTIB_XRC
     if (0 != endpoint->xrc_recv_qp_num) {
         if(ibv_unreg_xrc_rcv_qp(endpoint->endpoint_btl->device->xrc_domain,
                     endpoint->xrc_recv_qp_num)) {
             BTL_ERROR(("Failed to unregister XRC recv QP:%d\n", endpoint->xrc_recv_qp_num));
-        }
+        } else {
+	    BTL_VERBOSE(("Unregistered XRC Recv QP:%d\n", endpoint->xrc_recv_qp_num));
+	}
     }
+#else
+    if (NULL != endpoint->xrc_recv_qp) {
+        if(ibv_destroy_qp(endpoint->xrc_recv_qp)) {
+            BTL_ERROR(("Failed to unregister XRC recv QP:%d\n", endpoint->xrc_recv_qp->qp_num));
+        } else {
+	    BTL_VERBOSE(("Unregistered XRC Recv QP:%d\n", endpoint->xrc_recv_qp->qp_num));
+	}
+    }
+#endif
 #endif
 
     OBJ_DESTRUCT(&endpoint->endpoint_lock);
diff --git a/ompi/mca/btl/openib/btl_openib_endpoint.h b/ompi/mca/btl/openib/btl_openib_endpoint.h
index 57f03f7..648ca1d 100644
--- a/ompi/mca/btl/openib/btl_openib_endpoint.h
+++ b/ompi/mca/btl/openib/btl_openib_endpoint.h
@@ -15,6 +15,7 @@
  * Copyright (c) 2006-2007 Voltaire All rights reserved.
  * Copyright (c) 2007-2009 Mellanox Technologies.  All rights reserved.
  * Copyright (c) 2010-2012 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2014      Bull SAS.  All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -206,7 +207,11 @@ struct mca_btl_base_endpoint_t {
     opal_list_t                 pending_lazy_frags;
 
     mca_btl_openib_endpoint_qp_t *qps;
+#if OMPI_HAVE_CONNECTIB_XRC
+    struct ibv_qp *xrc_recv_qp;
+#else
     uint32_t xrc_recv_qp_num; /* in xrc we will use it as recv qp */
+#endif
     uint32_t xrc_recv_psn;
 
     /** list of pending rget ops */
@@ -590,9 +595,14 @@ static inline int post_send(mca_btl_openib_endpoint_t *ep,
     }
 
 #if HAVE_XRC
+#if OMPI_HAVE_CONNECTIB_XRC
+    if(BTL_OPENIB_QP_TYPE_XRC(qp))
+        sr_desc->qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num;
+#else
     if(BTL_OPENIB_QP_TYPE_XRC(qp))
         sr_desc->xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num;
 #endif
+#endif
     assert(sg->addr == (uint64_t)(uintptr_t)frag->hdr);
 
     if (sr_desc->send_flags & IBV_SEND_SIGNALED) {
diff --git a/ompi/mca/btl/openib/btl_openib_xrc.c b/ompi/mca/btl/openib/btl_openib_xrc.c
index 8236199..f1f738c 100644
--- a/ompi/mca/btl/openib/btl_openib_xrc.c
+++ b/ompi/mca/btl/openib/btl_openib_xrc.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2009      Cisco Systems, Inc.  All rights reserved.
+ * Copyright (c) 2014      Bull SAS.  All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -16,6 +17,7 @@
 #include <fcntl.h>
 #include <errno.h>
 #include <unistd.h>
+#include <dlfcn.h>
 
 #include "ompi/mca/btl/base/base.h"
 #include "btl_openib_xrc.h"
@@ -32,12 +34,50 @@ OBJ_CLASS_INSTANCE(ib_address_t,
                    ib_address_constructor,
                    ib_address_destructor);
 
+/*
+ * Run-time check for which libibverbs XRC API we really have
+ * underneath.
+ *
+ * Note: I do not know any reliable way other than library symbols to
+ * distinguish between libibverbs having "beta" and official XRC API
+ * (no different versions or capabilities).
+ */
+mca_btl_openib_xrc_api_t mca_btl_openib_xrc_api()
+{
+	mca_btl_openib_xrc_api_t api = MCA_BTL_IB_XRC_API_NONE;
+	void *h_old, *h_new;
+	void *lib = dlopen(NULL, RTLD_NOW); /* current program */
+	if (!lib) return api;
+
+	h_old = dlsym(lib, "ibv_create_xrc_rcv_qp");
+	if (h_old) api = MCA_BTL_IB_XRC_API_BETA;
+
+	h_new = dlsym(lib, "ibv_cmd_open_xrcd");
+	if (h_new) api = MCA_BTL_IB_XRC_API_OFED_3_12;
+
+	dlclose(lib);
+	return api;
+
+}
+
+const char *mca_btl_openib_xrc_api_str(mca_btl_openib_xrc_api_t xrc_api)
+{
+        switch(xrc_api) {
+        case MCA_BTL_IB_XRC_API_BETA:      return "beta, ofed 1.3+";
+        case MCA_BTL_IB_XRC_API_OFED_3_12: return "ofed 3.12+";
+        default:                           return "none";
+	}
+}
+
 /* This func. opens XRC domain */
 int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device)
 {
     int len;
     char *xrc_file_name;
     const char *dev_name;
+#if OMPI_HAVE_CONNECTIB_XRC
+    struct ibv_xrcd_init_attr xrcd_attr;
+#endif
 
     dev_name = ibv_get_device_name(device->ib_dev);
     len = asprintf(&xrc_file_name,
@@ -56,9 +96,17 @@ int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device)
         free(xrc_file_name);
         return OMPI_ERROR;
     }
-
+#if OMPI_HAVE_CONNECTIB_XRC
+    memset(&xrcd_attr, 0, sizeof xrcd_attr);
+    xrcd_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | IBV_XRCD_INIT_ATTR_OFLAGS;
+    xrcd_attr.fd = device->xrc_fd;
+    xrcd_attr.oflags = O_CREAT;
+    device->xrcd = ibv_open_xrcd(device->ib_dev_context, &xrcd_attr);
+    if (NULL == device->xrcd) {
+#else
     device->xrc_domain = ibv_open_xrc_domain(device->ib_dev_context, device->xrc_fd, O_CREAT);
     if (NULL == device->xrc_domain) {
+#endif
         BTL_ERROR(("Failed to open XRC domain\n"));
         close(device->xrc_fd);
         free(xrc_file_name);
@@ -71,11 +119,19 @@ int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device)
 /* This func. closes XRC domain */
 int mca_btl_openib_close_xrc_domain(struct mca_btl_openib_device_t *device)
 {
+#if OMPI_HAVE_CONNECTIB_XRC
+    if (NULL == device->xrcd) {
+#else
     if (NULL == device->xrc_domain) {
+#endif
         /* No XRC domain, just exit */
         return OMPI_SUCCESS;
     }
+#if OMPI_HAVE_CONNECTIB_XRC
+    if (ibv_close_xrcd(device->xrcd)) {
+#else
     if (ibv_close_xrc_domain(device->xrc_domain)) {
+#endif
         BTL_ERROR(("Failed to close XRC domain, errno %d says %s\n",
                     device->xrc_fd, strerror(errno)));
         return OMPI_ERROR;
diff --git a/ompi/mca/btl/openib/btl_openib_xrc.h b/ompi/mca/btl/openib/btl_openib_xrc.h
index d8313f4..b62540f 100644
--- a/ompi/mca/btl/openib/btl_openib_xrc.h
+++ b/ompi/mca/btl/openib/btl_openib_xrc.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2014      Bull SAS.  All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -41,9 +42,13 @@ struct ib_address_t {
 };
 typedef struct ib_address_t ib_address_t;
 
+
 int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device);
 int mca_btl_openib_close_xrc_domain(struct mca_btl_openib_device_t *device);
 int mca_btl_openib_ib_address_add_new (uint16_t lid, uint64_t s_id,
         orte_jobid_t ep_jobid, mca_btl_openib_endpoint_t *ep);
 
+mca_btl_openib_xrc_api_t mca_btl_openib_xrc_api(void);
+const char *mca_btl_openib_xrc_api_str(mca_btl_openib_xrc_api_t xrc_api);
+
 #endif
diff --git a/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c b/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c
index 50c1ef5..07cd404 100644
--- a/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c
+++ b/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c
@@ -2,6 +2,7 @@
  * Copyright (c) 2007-2011 Mellanox Technologies.  All rights reserved.
  * Copyright (c) 2009      Cisco Systems, Inc.  All rights reserved.
  * Copyright (c) 2009      IBM Corporation.  All rights reserved.
+ * Copyright (c) 2014      Bull SAS.  All rights reserved.
  *
  * $COPYRIGHT$
  *
@@ -265,7 +266,11 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
             qp_num = endpoint->qps[0].qp->lcl_qp->qp_num;
             psn = endpoint->qps[0].qp->lcl_psn;
         } else {
+#if OMPI_HAVE_CONNECTIB_XRC
+            qp_num = endpoint->xrc_recv_qp->qp_num;
+#else
             qp_num = endpoint->xrc_recv_qp_num;
+#endif
             psn = endpoint->xrc_recv_psn;
         }
         /* stuff all the QP info into the buffer */
@@ -341,10 +346,21 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
         }
         /* on response we add all SRQ numbers */
         for (srq = 0; srq < mca_btl_openib_component.num_xrc_qps; srq++) {
+#if OMPI_HAVE_CONNECTIB_XRC
+            uint32_t srq_num;
+	    if (ibv_get_srq_num(endpoint->endpoint_btl->qps[srq].u.srq_qp.srq, &srq_num)) {
+		    BTL_ERROR(("BTL openib XOOB internal error: can't get srq num"));
+	    }
+            BTL_VERBOSE(("Send pack srq[%d] num  = %d", srq, srq_num));
+            BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
+            rc = opal_dss.pack(buffer, &srq_num,
+                    1, OPAL_UINT32);
+#else
             BTL_VERBOSE(("Send pack srq[%d] num  = %d", srq, endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num));
             BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
             rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num,
                     1, OPAL_UINT32);
+#endif
             if (ORTE_SUCCESS != rc) {
                 ORTE_ERROR_LOG(rc);
                 return rc;
@@ -376,7 +392,11 @@ static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint)
     uint32_t send_wr;
     struct ibv_qp **qp;
     uint32_t *psn;
+#if OMPI_HAVE_CONNECTIB_XRC
+    struct ibv_qp_init_attr_ex qp_init_attr;
+#else
     struct ibv_qp_init_attr qp_init_attr;
+#endif
     struct ibv_qp_attr attr;
     int ret;
     size_t req_inline;
@@ -392,7 +412,11 @@ static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint)
     send_wr = endpoint->ib_addr->qp->sd_wqe +
         (mca_btl_openib_component.use_eager_rdma ?
          mca_btl_openib_component.max_eager_rdma : 0);
+#if OMPI_HAVE_CONNECTIB_XRC
+    memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr_ex));
+#else
     memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr));
+#endif
     memset(&attr, 0, sizeof(struct ibv_qp_attr));
 
     qp_init_attr.send_cq = qp_init_attr.recv_cq = openib_btl->device->ib_cq[prio];
@@ -405,9 +429,16 @@ static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint)
     qp_init_attr.cap.max_send_sge = 1;
     /* this one is ignored by driver */
     qp_init_attr.cap.max_recv_sge = 1; /* we do not use SG list */
+#if OMPI_HAVE_CONNECTIB_XRC
+    qp_init_attr.qp_type = IBV_QPT_XRC_SEND;
+    qp_init_attr.comp_mask = IBV_QP_INIT_ATTR_PD;
+    qp_init_attr.pd = openib_btl->device->ib_pd;
+    *qp = ibv_create_qp_ex(openib_btl->device->ib_dev_context, &qp_init_attr);
+#else
     qp_init_attr.qp_type = IBV_QPT_XRC;
     qp_init_attr.xrc_domain = openib_btl->device->xrc_domain;
     *qp = ibv_create_qp(openib_btl->device->ib_pd, &qp_init_attr);
+#endif
     if (NULL == *qp) {
 	orte_show_help("help-mpi-btl-openib-cpc-base.txt",
 		       "ibv_create_qp failed", true,
@@ -544,7 +575,11 @@ static int xoob_send_qp_connect(mca_btl_openib_endpoint_t *endpoint, mca_btl_ope
 /* Recv qp create */
 static int xoob_recv_qp_create(mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_rem_info_t *rem_info)
 {
+#if OMPI_HAVE_CONNECTIB_XRC
+    struct ibv_qp_init_attr_ex qp_init_attr;
+#else
     struct ibv_qp_init_attr qp_init_attr;
+#endif
     struct ibv_qp_attr attr;
     int ret;
 
@@ -553,6 +588,19 @@ static int xoob_recv_qp_create(mca_btl_openib_endpoint_t *endpoint, mca_btl_open
 
     BTL_VERBOSE(("Connecting Recv QP\n"));
 
+#if OMPI_HAVE_CONNECTIB_XRC
+    memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr_ex));
+    qp_init_attr.qp_type = IBV_QPT_XRC_RECV;
+    qp_init_attr.comp_mask = IBV_QP_INIT_ATTR_XRCD;
+    qp_init_attr.xrcd = openib_btl->device->xrcd;
+    endpoint->xrc_recv_qp = ibv_create_qp_ex(openib_btl->device->ib_dev_context,
+					     &qp_init_attr);
+    if (NULL == endpoint->xrc_recv_qp) {
+        BTL_ERROR(("Error creating XRC recv QP, errno says: %s [%d]",
+                    strerror(errno), errno));
+        return OMPI_ERROR;
+    }
+#else
     memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr));
     /* Only xrc_domain is required, all other are ignored */
     qp_init_attr.xrc_domain = openib_btl->device->xrc_domain;
@@ -562,12 +610,26 @@ static int xoob_recv_qp_create(mca_btl_openib_endpoint_t *endpoint, mca_btl_open
                     endpoint->xrc_recv_qp_num, strerror(ret), ret));
         return OMPI_ERROR;
     }
+#endif
 
     memset(&attr, 0, sizeof(struct ibv_qp_attr));
     attr.qp_state = IBV_QPS_INIT;
     attr.pkey_index = openib_btl->pkey_index;
     attr.port_num = openib_btl->port_num;
     attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
+#if OMPI_HAVE_CONNECTIB_XRC
+    ret = ibv_modify_qp(endpoint->xrc_recv_qp,
+            &attr,
+            IBV_QP_STATE|
+            IBV_QP_PKEY_INDEX|
+            IBV_QP_PORT|
+            IBV_QP_ACCESS_FLAGS);
+    if (ret) {
+        BTL_ERROR(("Error modifying XRC recv QP to IBV_QPS_INIT, errno says: %s [%d]",
+                    strerror(ret), ret));
+        return OMPI_ERROR;
+    }
+#else
     ret = ibv_modify_xrc_rcv_qp(openib_btl->device->xrc_domain,
             endpoint->xrc_recv_qp_num,
             &attr,
@@ -578,9 +640,10 @@ static int xoob_recv_qp_create(mca_btl_openib_endpoint_t *endpoint, mca_btl_open
     if (ret) {
         BTL_ERROR(("Error modifying XRC recv QP[%x] to IBV_QPS_INIT, errno says: %s [%d]",
                      endpoint->xrc_recv_qp_num, strerror(ret), ret));
-        while(1);
         return OMPI_ERROR;
     }
+#endif
+
 
     memset(&attr, 0, sizeof(struct ibv_qp_attr));
     attr.qp_state           = IBV_QPS_RTR;
@@ -612,6 +675,7 @@ static int xoob_recv_qp_create(mca_btl_openib_endpoint_t *endpoint, mca_btl_open
     }
 #endif
 
+#if ! OMPI_HAVE_CONNECTIB_XRC
     ret = ibv_modify_xrc_rcv_qp(openib_btl->device->xrc_domain,
             endpoint->xrc_recv_qp_num,
             &attr,
@@ -627,12 +691,37 @@ static int xoob_recv_qp_create(mca_btl_openib_endpoint_t *endpoint, mca_btl_open
                     endpoint->xrc_recv_qp_num, strerror(ret), ret));
         return OMPI_ERROR;
     }
+#else
+    ret = ibv_modify_qp(endpoint->xrc_recv_qp,
+            &attr,
+            IBV_QP_STATE|
+            IBV_QP_AV|
+            IBV_QP_PATH_MTU|
+            IBV_QP_DEST_QPN|
+            IBV_QP_RQ_PSN|
+            IBV_QP_MAX_DEST_RD_ATOMIC|
+            IBV_QP_MIN_RNR_TIMER);
+    if (ret) {
+        BTL_ERROR(("Error modifying XRC recv QP to IBV_QPS_RTR, errno says: %s [%d]",
+                    strerror(ret), ret));
+        return OMPI_ERROR;
+    }
+#endif
+
 #if OPAL_HAVE_THREADS
     if (APM_ENABLED) {
+#if ! OMPI_HAVE_CONNECTIB_XRC
         mca_btl_openib_load_apm_xrc_rcv(endpoint->xrc_recv_qp_num, endpoint);
+#else
+        mca_btl_openib_load_apm(endpoint->xrc_recv_qp, endpoint);
+#endif
     }
 #endif
-
+#if ! OMPI_HAVE_CONNECTIB_XRC
+    BTL_VERBOSE(("XRC Recv QP[%d] is in state RTR\n", endpoint->xrc_recv_qp_num));
+#else
+    BTL_VERBOSE(("XRC Recv QP[%d] is in state RTR\n", endpoint->xrc_recv_qp->qp_num));
+#endif
     return OMPI_SUCCESS;
 }
 
@@ -643,7 +732,7 @@ static int xoob_recv_qp_connect(mca_btl_openib_endpoint_t *endpoint, mca_btl_ope
 
     mca_btl_openib_module_t* openib_btl =
         (mca_btl_openib_module_t*)endpoint->endpoint_btl;
-
+#if ! OMPI_HAVE_CONNECTIB_XRC
     BTL_VERBOSE(("Connecting Recv QP\n"));
     ret = ibv_reg_xrc_rcv_qp(openib_btl->device->xrc_domain, rem_info->rem_qps->rem_qp_num);
     if (ret) { /* failed to regester the qp, so it is already die and we should create new one */
@@ -656,6 +745,25 @@ static int xoob_recv_qp_connect(mca_btl_openib_endpoint_t *endpoint, mca_btl_ope
         endpoint->xrc_recv_qp_num = rem_info->rem_qps->rem_qp_num;
         return OMPI_SUCCESS;
     }
+#else
+    struct ibv_qp_open_attr attr;
+    memset(&attr, 0, sizeof(struct ibv_qp_open_attr));
+    attr.comp_mask = IBV_QP_OPEN_ATTR_NUM | IBV_QP_OPEN_ATTR_XRCD | IBV_QP_OPEN_ATTR_TYPE;
+    attr.qp_num = rem_info->rem_qps->rem_qp_num;
+    attr.qp_type = IBV_QPT_XRC_RECV;
+    attr.xrcd = openib_btl->device->xrcd;
+    BTL_VERBOSE(("Connecting Recv QP\n"));
+    endpoint->xrc_recv_qp = ibv_open_qp(openib_btl->device->ib_dev_context, &attr);
+    if (NULL == endpoint->xrc_recv_qp) { /* failed to regester the qp, so it is already die and we should create new one */
+       /* Return NOT READY !!!*/
+        BTL_ERROR(("Failed to register qp_num: %d , get error: %s (%d)\n. Replying with RNR",
+                    rem_info->rem_qps->rem_qp_num, strerror(errno), errno));
+        return OMPI_ERROR;
+    } else {
+	    BTL_VERBOSE(("Connected to XRC Recv qp [%d]", rem_info->rem_qps->rem_qp_num));
+	    return OMPI_SUCCESS;
+    }
+#endif
 }
 
 /*
diff --git a/ompi/mca/btl/openib/mca-btl-openib-device-params.ini b/ompi/mca/btl/openib/mca-btl-openib-device-params.ini
index 5c9e339..0577b0d 100644
--- a/ompi/mca/btl/openib/mca-btl-openib-device-params.ini
+++ b/ompi/mca/btl/openib/mca-btl-openib-device-params.ini
@@ -1,6 +1,7 @@
 #
 # Copyright (c) 2006-2013 Cisco Systems, Inc.  All rights reserved.
 # Copyright (c) 2006-2011 Mellanox Technologies. All rights reserved.
+# Copyright (c) 2014      Bull SAS.  All rights reserved.
 # $COPYRIGHT$
 # 
 # Additional copyrights may follow
@@ -165,7 +166,7 @@ max_inline_data = 128
 
 [Mellanox ConnectIB]
 vendor_id = 0x2c9,0x5ad,0x66a,0x8f1,0x1708,0x03ba,0x15b3,0x119f
-vendor_part_id = 4113
+vendor_part_id = 4113,7059,7060
 use_eager_rdma = 1
 mtu = 4096
 max_inline_data = 256

Reply via email to