Were all the issueswith this code fixed? There were m4 issues and solaris issues, IIRC.
Sent from my phone. No type good. On Jun 28, 2011, at 9:28 AM, "klit...@osl.iu.edu" <klit...@osl.iu.edu> wrote: > Author: kliteyn > Date: 2011-06-28 10:28:29 EDT (Tue, 28 Jun 2011) > New Revision: 24830 > URL: https://svn.open-mpi.org/trac/ompi/changeset/24830 > > Log: > Supporting dynamic SL (#2674) > > - Added enable/disable configuration parameter for dynamic SL > - All the dynamic SL code is conditionalized > - Removed libibmad dependency > - Using only one include - ib_types.h (part of opensm-devel package) > - Removed all the macro and data types definitions, using the > existing definitions from ib_types.h instead > - general cleaning here and there > > The async mode is not implemented yet - stay tuned... > > > Text files modified: > trunk/ompi/config/ompi_check_openib.m4 | 38 ++++ > > trunk/ompi/mca/btl/openib/btl_openib.h | 5 > > trunk/ompi/mca/btl/openib/btl_openib_mca.c | 10 > > trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c | 309 > +++++++++++++++++---------------------- > 4 files changed, 182 insertions(+), 180 deletions(-) > > Modified: trunk/ompi/config/ompi_check_openib.m4 > ============================================================================== > --- trunk/ompi/config/ompi_check_openib.m4 (original) > +++ trunk/ompi/config/ompi_check_openib.m4 2011-06-28 10:28:29 EDT (Tue, > 28 Jun 2011) > @@ -155,11 +155,21 @@ > [$ompi_cv_func_ibv_create_cq_args], > [Number of arguments to > ibv_create_cq])])]) > > + # > + # OpenIB dynamic SL > + # > + AC_ARG_ENABLE([openib-dynamic-sl], > + [AC_HELP_STRING([--enable-openib-dynamic-sl], > + [Enable openib BTL to query Subnet Manager for IB SL > (default: enabled)])], > + [enable_openib_dynamic_sl="$enableval"], > + [enable_openib_dynamic_sl="yes"]) > + > # Set these up so that we can do an AC_DEFINE below > # (unconditionally) > $1_have_xrc=0 > $1_have_rdmacm=0 > $1_have_ibcm=0 > + $1_have_dynamic_sl=0 > > # If we have the openib stuff available, find out what we've got > AS_IF([test "$ompi_check_openib_happy" = "yes"], > @@ -176,6 +186,19 @@ > AC_CHECK_FUNCS([ibv_create_xrc_rcv_qp], [$1_have_xrc=1]) > fi > > + if test "$enable_openib_dynamic_sl" = "yes"; then > + # We need ib_types.h file, which is installed with > opensm-devel > + # package. However, ib_types.h has a bad include directive, > + # which will cause AC_CHECK_HEADER to fail. > + # So instead, we will look for another file that is also > + # installed as part of opensm-devel package and included in > + # ib_types.h, but it doesn't include any other IB-related > files. > + AC_CHECK_HEADER([infiniband/complib/cl_types_osd.h], > + [$1_have_dynamic_sl=1], > + [AC_MSG_ERROR([opensm-devel package not found > - please install it or disable dynamic SL support with > \"--disable-openib-dynamic-sl\"])], > + []) > + fi > + > # Do we have a recent enough RDMA CM? Need to have the > # rdma_get_peer_addr (inline) function (originally appeared > # in OFED v1.3). > @@ -244,6 +267,15 @@ > else > AC_MSG_RESULT([no]) > fi > + > + AC_MSG_CHECKING([if dynamic SL is enabled]) > + AC_DEFINE_UNQUOTED([OMPI_ENABLE_DYNAMIC_SL], [$$1_have_dynamic_sl], > + [Enable features required for dynamic SL support]) > + if test "1" = "$$1_have_dynamic_sl"; then > + AC_MSG_RESULT([yes]) > + else > + AC_MSG_RESULT([no]) > + fi > > AC_MSG_CHECKING([if OpenFabrics RDMACM support is enabled]) > AC_DEFINE_UNQUOTED([OMPI_HAVE_RDMACM], [$$1_have_rdmacm], > @@ -267,7 +299,11 @@ > AC_MSG_RESULT([no]) > fi > > - CPPFLAGS="$ompi_check_openib_$1_save_CPPFLAGS" > + AS_IF([test -z "$ompi_check_openib_dir"], > + [openib_include_dir="/usr/include"], > + [openib_include_dir="$ompi_check_openib_dir/include"]) > + > + CPPFLAGS="$ompi_check_openib_$1_save_CPPFLAGS > -I$openib_include_dir/infiniband" > LDFLAGS="$ompi_check_openib_$1_save_LDFLAGS" > LIBS="$ompi_check_openib_$1_save_LIBS" > > > Modified: trunk/ompi/mca/btl/openib/btl_openib.h > ============================================================================== > --- trunk/ompi/mca/btl/openib/btl_openib.h (original) > +++ trunk/ompi/mca/btl/openib/btl_openib.h 2011-06-28 10:28:29 EDT (Tue, > 28 Jun 2011) > @@ -52,6 +52,7 @@ > BEGIN_C_DECLS > > #define HAVE_XRC (1 == OMPI_HAVE_CONNECTX_XRC) > +#define ENABLE_DYNAMIC_SL (1 == OMPI_ENABLE_DYNAMIC_SL) > > #define MCA_BTL_IB_LEAVE_PINNED 1 > #define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll > @@ -215,7 +216,9 @@ > uint32_t ib_rnr_retry; > uint32_t ib_max_rdma_dst_ops; > uint32_t ib_service_level; > - uint32_t ib_path_rec_service_level; > +#if (ENABLE_DYNAMIC_SL) > + uint32_t ib_path_record_service_level; > +#endif > int32_t use_eager_rdma; > int32_t eager_rdma_threshold; /**< After this number of msg, use RDMA for > short messages, always */ > int32_t eager_rdma_num; > > Modified: trunk/ompi/mca/btl/openib/btl_openib_mca.c > ============================================================================== > --- trunk/ompi/mca/btl/openib/btl_openib_mca.c (original) > +++ trunk/ompi/mca/btl/openib/btl_openib_mca.c 2011-06-28 10:28:29 EDT > (Tue, 28 Jun 2011) > @@ -398,10 +398,14 @@ > } > mca_btl_openib_component.ib_service_level = (uint32_t) ival; > > - CHECK(reg_int("ib_path_rec_service_level", NULL, "Enable getting > InfiniBand service level from PathRecord " > - "(must be >= 0, 0 = disabled, positive = try to get the > service level from PathRecord)", > +#if (ENABLE_DYNAMIC_SL) > + CHECK(reg_int("ib_path_record_service_level", NULL, > + "Enable getting InfiniBand service level from PathRecord " > + "(must be >= 0, 0 = disabled, positive = try to get the " > + "service level from PathRecord)", > 0, &ival, REGINT_GE_ZERO)); > - mca_btl_openib_component.ib_path_rec_service_level = (uint32_t) ival; > + mca_btl_openib_component.ib_path_record_service_level = (uint32_t) ival; > +#endif > > CHECK(reg_int("use_eager_rdma", NULL, "Use RDMA for eager messages " > "(-1 = use device default, 0 = do not use eager RDMA, " > > Modified: trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c > ============================================================================== > --- trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c (original) > +++ trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c 2011-06-28 > 10:28:29 EDT (Tue, 28 Jun 2011) > @@ -44,6 +44,10 @@ > #include "connect/connect.h" > #include "orte/util/show_help.h" > > +#if (ENABLE_DYNAMIC_SL) > +#include <infiniband/iba/ib_types.h> > +#endif > + > #ifdef HAVE_UNISTD_H > #include <unistd.h> > #endif > @@ -54,109 +58,17 @@ > ENDPOINT_CONNECT_ACK > } connect_message_type_t; > > -#ifndef __WINDOWS__ > -#define PACK_SUFFIX __attribute__((packed)) > -#else > -#define PACK_SUFFIX > -#endif > - > -#define SL_NOT_PRESENT 0x7F > +#define SL_NOT_PRESENT 0xFF > #define MAX_GET_SL_REC_RETRIES 20 > #define GET_SL_REC_RETRIES_TIMEOUT_MS 2000000 > > -#define IB_SA_QPN 1 > -#define IB_GLOBAL_QKEY 0x80010000UL > -#define IB_MGMT_BASE_VERSION 1 > -#define IB_MGMT_CLASS_SUBN_ADM 0x03 > -#define IB_MGMT_METHOD_GET 0x01 > -#define IB_SA_TID_GET_PATH_REC_0 0xCA000000UL > -#define IB_SA_TID_GET_PATH_REC_1 0xBEEF0000UL > -#define IB_PATH_REC_SL_MASK 0x000F > -#define IB_SA_ATTR_PATH_REC 0x35 > -#define IB_SA_PATH_REC_DLID (1<<4) > -#define IB_SA_PATH_REC_SLID (1<<5) > - > - > -#ifdef __WINDOWS__ > - #pragma pack(push) > - #pragma pack(1) > -#endif > - > -struct ib_mad_hdr { > - uint8_t base_version; > - uint8_t mgmt_class; > - uint8_t class_version; > - uint8_t method; > - uint16_t status; > - uint16_t class_spec; > - uint32_t tid[2]; > - uint16_t attr_id; > - uint16_t resv; > - uint32_t attr_mod; > -} PACK_SUFFIX; > - > -struct ib_rmpp_hdr { > - uint32_t raw[3]; > -} PACK_SUFFIX; > - > -struct ib_sa_hdr { > - uint32_t sm_key[2]; > - uint16_t reserved; > - uint16_t attrib_offset; > - uint32_t comp_mask[2]; > -} PACK_SUFFIX; > - > -typedef union _ib_gid { > - uint8_t raw[16]; > - struct _ib_gid_unicast { > - uint64_t prefix; > - uint64_t interface_id; > - } PACK_SUFFIX unicast; > - struct _ib_gid_multicast { > - uint8_t header[2]; > - uint8_t raw_group_id[14]; > - } PACK_SUFFIX multicast; > -} PACK_SUFFIX ib_gid_t; > - > -struct ib_path_record { > - uint64_t service_id; > - ib_gid_t dgit; > - ib_gid_t sgit; > - uint16_t dlid; > - uint16_t slid; > - uint32_t hop_flow_raw; > - uint8_t tclass; > - uint8_t num_path; > - uint16_t pkey; > - uint8_t reserved1; > - uint8_t qos_class_sl; > - uint8_t mtu; > - uint8_t rate; > - uint32_t preference__packet_lifetime__packet_lifetime_selector; > - uint32_t reserved2[35]; > -} PACK_SUFFIX; > - > -union ib_sa_data { > - struct ib_path_record path_record; > -} PACK_SUFFIX; > - > -struct ib_mad_sa { > - struct ib_mad_hdr mad_hdr; > - struct ib_rmpp_hdr rmpp_hdr; > - struct ib_sa_hdr sa_hdr; > - union ib_sa_data sa_data; > -} PACK_SUFFIX; > - > -#ifdef __WINDOWS__ > - #pragma pack(pop) > -#endif > - > +#if (ENABLE_DYNAMIC_SL) > static struct mca_btl_openib_sa_qp_cache { > /* There will be a MR with the one send and receive buffer together */ > /* The send buffer is first, the receive buffer is second */ > /* The receive buffer in a UD queue pair needs room for the 40 byte GRH */ > /* The buffers are first in the structure for page alignment */ > - char send_recv_buffer[sizeof(struct ib_mad_sa) * 2 + 40]; > + char send_recv_buffer[MAD_BLOCK_SIZE * 2 + 40]; > struct mca_btl_openib_sa_qp_cache *next; > struct ibv_context *context; > char *device_name; > @@ -168,8 +80,9 @@ > struct ibv_pd *pd; > struct ibv_recv_wr rwr; > struct ibv_sge rsge; > - char sl_values[65536]; > + uint8_t sl_values[65536]; /* 64K */ > } *sa_qp_cache = 0; > +#endif > > static int oob_priority = 50; > static bool rml_recv_posted = false; > @@ -198,27 +111,31 @@ > static void rml_recv_cb(int status, orte_process_name_t* process_name, > opal_buffer_t* buffer, orte_rml_tag_t tag, > void* cbdata); > + > +#if (ENABLE_DYNAMIC_SL) > static int init_ud_qp(struct ibv_context *context_arg, > struct mca_btl_openib_sa_qp_cache *cache); > static void init_sa_mad(struct mca_btl_openib_sa_qp_cache *cache, > - struct ib_mad_sa *sag, > - struct ibv_send_wr *swr, > - struct ibv_sge *ssge, > - uint16_t lid, > - uint16_t rem_lid); > + ib_sa_mad_t *sa_mad, > + struct ibv_send_wr *swr, > + struct ibv_sge *ssge, > + uint16_t lid, > + uint16_t rem_lid); > static int get_pathrecord_info(struct mca_btl_openib_sa_qp_cache *cache, > - struct ib_mad_sa *sag, > - struct ib_mad_sa *sar, > - struct ibv_send_wr *swr, > - uint16_t lid, > - uint16_t rem_lid); > -static int init_device(struct ibv_context *context_arg, > - struct mca_btl_openib_sa_qp_cache *cache, > - uint32_t port_num); > -static int get_pathrecord_sl(struct ibv_context *context_arg, > - uint32_t port_num, > + ib_sa_mad_t *sa_mad, > + ib_sa_mad_t *sar, > + struct ibv_send_wr *swr, > uint16_t lid, > uint16_t rem_lid); > +static int init_device(struct ibv_context *context_arg, > + struct mca_btl_openib_sa_qp_cache *cache, > + uint32_t port_num); > +static int get_pathrecord_sl(struct ibv_context *context_arg, > + uint32_t port_num, > + uint16_t lid, > + uint16_t rem_lid); > +static void free_sa_qp_cache(void); > +#endif > > /* > * The "component" struct -- the top-level function pointers for the > @@ -351,6 +268,33 @@ > return OMPI_SUCCESS; > } > > +#if (ENABLE_DYNAMIC_SL) > +static void free_sa_qp_cache(void) > +{ > + struct mca_btl_openib_sa_qp_cache *cache, *tmp; > + > + cache = sa_qp_cache; > + while (NULL != cache) { > + /* free cache data */ > + if (cache->device_name) > + free(cache->device_name); > + if (NULL != cache->qp) > + ibv_destroy_qp(cache->qp); > + if (NULL != cache->ah) > + ibv_destroy_ah(cache->ah); > + if (NULL != cache->cq) > + ibv_destroy_cq(cache->cq); > + if (NULL != cache->mr) > + ibv_dereg_mr(cache->mr); > + if (NULL != cache->pd) > + ibv_dealloc_pd(cache->pd); > + tmp = cache->next; > + free(cache); > + cache = tmp; > + } > +} > +#endif > + > /* > * Component finalize function. Cleanup RML non-blocking receive. > */ > @@ -360,7 +304,9 @@ > orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_OPENIB); > rml_recv_posted = false; > } > - > + #if (ENABLE_DYNAMIC_SL) > + free_sa_qp_cache(); > +#endif > return OMPI_SUCCESS; > } > > @@ -425,7 +371,7 @@ > */ > static int qp_connect_all(mca_btl_openib_endpoint_t *endpoint) > { > - int i, rc; > + int i; > mca_btl_openib_module_t* openib_btl = > (mca_btl_openib_module_t*)endpoint->endpoint_btl; > > @@ -446,18 +392,24 @@ > attr.ah_attr.dlid = endpoint->rem_info.rem_lid; > attr.ah_attr.src_path_bits = openib_btl->src_path_bits; > attr.ah_attr.port_num = openib_btl->port_num; > - attr.ah_attr.sl = mca_btl_openib_component.ib_service_level; > - /* if user enable ib_path_rec_service_level - dynamically get the sl > from PathRecord */ > - if (mca_btl_openib_component.ib_path_rec_service_level > 0) { > - rc = get_pathrecord_sl(qp->context, > + > +#if (ENABLE_DYNAMIC_SL) > + /* if user enabled dynamic SL, get it from PathRecord */ > + if (0 != mca_btl_openib_component.ib_path_record_service_level) { > + int rc = get_pathrecord_sl(qp->context, > attr.ah_attr.port_num, > openib_btl->lid, > attr.ah_attr.dlid); > if (OMPI_ERROR == rc) { > + free_sa_qp_cache(); > return OMPI_ERROR; > } > attr.ah_attr.sl = rc; > } > +#else > + attr.ah_attr.sl = mca_btl_openib_component.ib_service_level; > +#endif > + > /* JMS to be filled in later dynamically */ > attr.ah_attr.static_rate = 0; > > @@ -1056,6 +1008,7 @@ > OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock); > } > > +#if (ENABLE_DYNAMIC_SL) > static int init_ud_qp(struct ibv_context *context_arg, > struct mca_btl_openib_sa_qp_cache *cache) > { > @@ -1094,7 +1047,7 @@ > memset(&mattr, 0, sizeof(mattr)); > mattr.qp_state = IBV_QPS_INIT; > mattr.port_num = cache->port_num; > - mattr.qkey = IB_GLOBAL_QKEY; > + mattr.qkey = ntohl(IB_QP1_WELL_KNOWN_Q_KEY); > rc = ibv_modify_qp(cache->qp, &mattr, > IBV_QP_STATE | > IBV_QP_PKEY_INDEX | > @@ -1128,61 +1081,75 @@ > return OMPI_SUCCESS; > } > static void init_sa_mad(struct mca_btl_openib_sa_qp_cache *cache, > - struct ib_mad_sa *sag, > - struct ibv_send_wr *swr, > - struct ibv_sge *ssge, > - uint16_t lid, > - uint16_t rem_lid) > + ib_sa_mad_t *sa_mad, > + struct ibv_send_wr *swr, > + struct ibv_sge *ssge, > + uint16_t lid, > + uint16_t rem_lid) > { > - memset(sag, 0, sizeof(*sag)); > + ib_path_rec_t *path_record = (ib_path_rec_t*)sa_mad->data; > + > memset(swr, 0, sizeof(*swr)); > memset(ssge, 0, sizeof(*ssge)); > > - sag->mad_hdr.base_version = IB_MGMT_BASE_VERSION; > - sag->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; > - sag->mad_hdr.class_version = 2; > - sag->mad_hdr.method = IB_MGMT_METHOD_GET; > - sag->mad_hdr.attr_id = htons (IB_SA_ATTR_PATH_REC); > - sag->mad_hdr.tid[0] = IB_SA_TID_GET_PATH_REC_0 + cache->qp->qp_num; > - sag->mad_hdr.tid[1] = IB_SA_TID_GET_PATH_REC_1 + rem_lid; > - sag->sa_hdr.comp_mask[1] = > - htonl(IB_SA_PATH_REC_DLID | IB_SA_PATH_REC_SLID); > - sag->sa_data.path_record.dlid = htons(rem_lid); > - sag->sa_data.path_record.slid = htons(lid); > + /* Initialize the standard MAD header. */ > + memset(sa_mad, 0, MAD_BLOCK_SIZE); > + ib_mad_init_new((ib_mad_t *)sa_mad, /* mad header pointer */ > + IB_MCLASS_SUBN_ADM, /* management class */ > + (uint8_t) 2, /* version */ > + IB_MAD_METHOD_GET, /* method */ > + hton64((uint64_t)lid << 48 | /* transaction ID */ > + (uint64_t)rem_lid << 32 | > + (uint64_t)cache->qp->qp_num << 8), > + IB_MAD_ATTR_PATH_RECORD, /* attribute ID */ > + 0); /* attribute modifier */ > + > + sa_mad->comp_mask = IB_PR_COMPMASK_DLID | IB_PR_COMPMASK_SLID; > + path_record->dlid = htons(rem_lid); > + path_record->slid = htons(lid); > > swr->sg_list = ssge; > swr->num_sge = 1; > swr->opcode = IBV_WR_SEND; > swr->wr.ud.ah = cache->ah; > - swr->wr.ud.remote_qpn = IB_SA_QPN; > - swr->wr.ud.remote_qkey = IB_GLOBAL_QKEY; > + swr->wr.ud.remote_qpn = ntohl(IB_QP1); > + swr->wr.ud.remote_qkey = ntohl(IB_QP1_WELL_KNOWN_Q_KEY); > swr->send_flags = IBV_SEND_SIGNALED | IBV_SEND_SOLICITED; > > - ssge->addr = (uint64_t)(void *)sag; > - ssge->length = sizeof(*sag); > + ssge->addr = (uint64_t)(void *)sa_mad; > + ssge->length = MAD_BLOCK_SIZE; > ssge->lkey = cache->mr->lkey; > } > > static int get_pathrecord_info(struct mca_btl_openib_sa_qp_cache *cache, > - struct ib_mad_sa *sag, > - struct ib_mad_sa *sar, > - struct ibv_send_wr *swr, > - uint16_t lid, > - uint16_t rem_lid) > + ib_sa_mad_t *req_mad, > + ib_sa_mad_t *resp_mad, > + struct ibv_send_wr *swr, > + uint16_t lid, > + uint16_t rem_lid) > { > struct ibv_send_wr *bswr; > struct ibv_wc wc; > struct timeval get_sl_rec_last_sent, get_sl_rec_last_poll; > struct ibv_recv_wr *brwr; > int got_sl_value, get_sl_rec_retries, rc, ne, i; > + ib_path_rec_t *req_path_record = ib_sa_mad_get_payload_ptr(req_mad); > + ib_path_rec_t *resp_path_record = ib_sa_mad_get_payload_ptr(resp_mad); > > got_sl_value = 0; > get_sl_rec_retries = 0; > > + rc = ibv_post_recv(cache->qp, &(cache->rwr), &brwr); > + if (0 != rc) { > + BTL_ERROR(("error posting receive on QP [0x%x] errno says: %s [%d]", > + cache->qp->qp_num, strerror(errno), errno)); > + return OMPI_ERROR; > + } > + > while (0 == got_sl_value) { > rc = ibv_post_send(cache->qp, swr, &bswr); > if (0 != rc) { > - BTL_ERROR(("error posing send on QP[%x] errno says: %s [%d]", > + BTL_ERROR(("error posting send on QP [0x%x] errno says: %s [%d]", > cache->qp->qp_num, strerror(errno), errno)); > return OMPI_ERROR; > } > @@ -1190,25 +1157,23 @@ > > while (0 == got_sl_value) { > ne = ibv_poll_cq(cache->cq, 1, &wc); > - if (ne > 0 > - && wc.status == IBV_WC_SUCCESS > - && wc.opcode == IBV_WC_RECV > - && wc.byte_len >= sizeof(*sar) > - && sar->mad_hdr.tid[0] == sag->mad_hdr.tid[0] > - && sar->mad_hdr.tid[1] == sag->mad_hdr.tid[1]) { > - if (0 == sar->mad_hdr.status > - && sar->sa_data.path_record.slid == htons(lid) > - && sar->sa_data.path_record.dlid == htons(rem_lid)) { > + if (ne > 0 && > + IBV_WC_SUCCESS == wc.status && > + IBV_WC_RECV == wc.opcode && > + wc.byte_len >= MAD_BLOCK_SIZE && > + resp_mad->trans_id == req_mad->trans_id) { > + if (0 == resp_mad->status && > + req_path_record->slid == htons(lid) && > + req_path_record->dlid == htons(rem_lid)) { > /* Everything matches, so we have the desired SL */ > - cache->sl_values[rem_lid] = > - sar->sa_data.path_record.qos_class_sl & > IB_PATH_REC_SL_MASK; > + cache->sl_values[rem_lid] = > ib_path_rec_sl(resp_path_record); > got_sl_value = 1; /* still must repost recieve buf */ > } else { > /* Probably bad status, unlikely bad lid match. We will */ > /* ignore response and let it time out so that we do a */ > /* retry, but after a delay. We must make a new TID so */ > /* the SM doesn't see it as the same request. */ > - sag->mad_hdr.tid[1] += 0x10000; > + req_mad->trans_id += hton64(1); > } > rc = ibv_post_recv(cache->qp, &(cache->rwr), &brwr); > if (0 != rc) { > @@ -1249,7 +1214,6 @@ > { > struct ibv_ah_attr aattr; > struct ibv_port_attr pattr; > - struct ibv_recv_wr *brwr; > int rc; > > cache->context = ibv_open_device(context_arg->device); > @@ -1315,16 +1279,10 @@ > cache->rwr.sg_list = &(cache->rsge); > memset(&(cache->rsge), 0, sizeof(cache->rsge)); > cache->rsge.addr = (uint64_t)(void *) > - (cache->send_recv_buffer + sizeof(struct ib_mad_sa)); > - cache->rsge.length = sizeof(struct ib_mad_sa) + 40; > + (cache->send_recv_buffer + MAD_BLOCK_SIZE); > + cache->rsge.length = MAD_BLOCK_SIZE + 40; > cache->rsge.lkey = cache->mr->lkey; > > - rc = ibv_post_recv(cache->qp, &(cache->rwr), &brwr); > - if (0 != rc) { > - BTL_ERROR(("error posing receive on QP[%x] errno says: %s [%d]", > - cache->qp->qp_num, strerror(errno), errno)); > - return OMPI_ERROR; > - } > return 0; > } > > @@ -1334,7 +1292,7 @@ > uint16_t rem_lid) > { > struct ibv_send_wr swr; > - struct ib_mad_sa *sag, *sar; > + ib_sa_mad_t *req_mad, *resp_mad; > struct ibv_sge ssge; > struct mca_btl_openib_sa_qp_cache *cache; > long page_size = sysconf(_SC_PAGESIZE); > @@ -1342,8 +1300,8 @@ > > /* search for a cached item */ > for (cache = sa_qp_cache; cache; cache = cache->next) { > - if (strcmp(cache->device_name, > - ibv_get_device_name(context_arg->device)) == 0 > + if (0 == strcmp(cache->device_name, > + ibv_get_device_name(context_arg->device)) > && cache->port_num == port_num) { > break; > } > @@ -1365,15 +1323,15 @@ > > /* if the destination lid SL value is not in the cache, go get it */ > if (SL_NOT_PRESENT == cache->sl_values[rem_lid]) { > - /* sag is first buffer, where we build the SA Get request to send */ > - sag = (struct ib_mad_sa *)(cache->send_recv_buffer); > + /* sa_mad is first buffer, where we build the SA Get request to send > */ > + req_mad = (ib_sa_mad_t *)(cache->send_recv_buffer); > > - init_sa_mad(cache, sag, &swr, &ssge, lid, rem_lid); > + init_sa_mad(cache, req_mad, &swr, &ssge, lid, rem_lid); > > - /* sar is the receive buffer (40 byte GRH) */ > - sar = (struct ib_mad_sa *)(cache->send_recv_buffer + sizeof(struct > ib_mad_sa) + 40); > + /* resp_mad is the receive buffer (40 byte offset is for GRH) */ > + resp_mad = (ib_sa_mad_t *)(cache->send_recv_buffer + MAD_BLOCK_SIZE > + 40); > > - rc = get_pathrecord_info(cache, sag, sar, &swr, lid, rem_lid); > + rc = get_pathrecord_info(cache, req_mad, resp_mad, &swr, lid, > rem_lid); > if (0 != rc) { > return rc; > } > @@ -1382,3 +1340,4 @@ > /* now all we do is send back the value laying around */ > return cache->sl_values[rem_lid]; > } > +#endif > _______________________________________________ > svn-full mailing list > svn-f...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/svn-full