Hi Jeff, On 02-Jul-11 11:52 PM, Jeff Squyres (jsquyres) wrote: > Were all the issueswith this code fixed? There were m4 issues and solaris > issues, IIRC.
I took all the fixes I could find based on the trac: "Be sure also to look at r24196; Josh committed a bunch of warning fixes for you after r24915" I also removed all the libibmad dependencies and unneded macros, so I hope that this is OK. However, I don't have any Solaris machine to try this to make sure that there are no issues. The only complaint w.r.t. Solaris that I could find was Terry's mail from last week, but it turned out to be a different problem. Are there any other problems that I'm not aware of? -- YK > Sent from my phone. No type good. > > On Jun 28, 2011, at 9:28 AM, "klit...@osl.iu.edu"<klit...@osl.iu.edu> wrote: > >> Author: kliteyn >> Date: 2011-06-28 10:28:29 EDT (Tue, 28 Jun 2011) >> New Revision: 24830 >> URL: https://svn.open-mpi.org/trac/ompi/changeset/24830 >> >> Log: >> Supporting dynamic SL (#2674) >> >> - Added enable/disable configuration parameter for dynamic SL >> - All the dynamic SL code is conditionalized >> - Removed libibmad dependency >> - Using only one include - ib_types.h (part of opensm-devel package) >> - Removed all the macro and data types definitions, using the >> existing definitions from ib_types.h instead >> - general cleaning here and there >> >> The async mode is not implemented yet - stay tuned... >> >> >> Text files modified: >> trunk/ompi/config/ompi_check_openib.m4 | 38 ++++ >> trunk/ompi/mca/btl/openib/btl_openib.h | 5 >> trunk/ompi/mca/btl/openib/btl_openib_mca.c | 10 >> trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c | 309 >> +++++++++++++++++---------------------- >> 4 files changed, 182 insertions(+), 180 deletions(-) >> >> Modified: trunk/ompi/config/ompi_check_openib.m4 >> ============================================================================== >> --- trunk/ompi/config/ompi_check_openib.m4 (original) >> +++ trunk/ompi/config/ompi_check_openib.m4 2011-06-28 10:28:29 EDT (Tue, >> 28 Jun 2011) >> @@ -155,11 +155,21 @@ >> [$ompi_cv_func_ibv_create_cq_args], >> [Number of arguments to >> ibv_create_cq])])]) >> >> + # >> + # OpenIB dynamic SL >> + # >> + AC_ARG_ENABLE([openib-dynamic-sl], >> + [AC_HELP_STRING([--enable-openib-dynamic-sl], >> + [Enable openib BTL to query Subnet Manager for IB >> SL (default: enabled)])], >> + [enable_openib_dynamic_sl="$enableval"], >> + [enable_openib_dynamic_sl="yes"]) >> + >> # Set these up so that we can do an AC_DEFINE below >> # (unconditionally) >> $1_have_xrc=0 >> $1_have_rdmacm=0 >> $1_have_ibcm=0 >> + $1_have_dynamic_sl=0 >> >> # If we have the openib stuff available, find out what we've got >> AS_IF([test "$ompi_check_openib_happy" = "yes"], >> @@ -176,6 +186,19 @@ >> AC_CHECK_FUNCS([ibv_create_xrc_rcv_qp], [$1_have_xrc=1]) >> fi >> >> + if test "$enable_openib_dynamic_sl" = "yes"; then >> + # We need ib_types.h file, which is installed with >> opensm-devel >> + # package. However, ib_types.h has a bad include directive, >> + # which will cause AC_CHECK_HEADER to fail. >> + # So instead, we will look for another file that is also >> + # installed as part of opensm-devel package and included in >> + # ib_types.h, but it doesn't include any other IB-related >> files. >> + AC_CHECK_HEADER([infiniband/complib/cl_types_osd.h], >> + [$1_have_dynamic_sl=1], >> + [AC_MSG_ERROR([opensm-devel package not >> found - please install it or disable dynamic SL support with >> \"--disable-openib-dynamic-sl\"])], >> + []) >> + fi >> + >> # Do we have a recent enough RDMA CM? Need to have the >> # rdma_get_peer_addr (inline) function (originally appeared >> # in OFED v1.3). >> @@ -244,6 +267,15 @@ >> else >> AC_MSG_RESULT([no]) >> fi >> + >> + AC_MSG_CHECKING([if dynamic SL is enabled]) >> + AC_DEFINE_UNQUOTED([OMPI_ENABLE_DYNAMIC_SL], [$$1_have_dynamic_sl], >> + [Enable features required for dynamic SL support]) >> + if test "1" = "$$1_have_dynamic_sl"; then >> + AC_MSG_RESULT([yes]) >> + else >> + AC_MSG_RESULT([no]) >> + fi >> >> AC_MSG_CHECKING([if OpenFabrics RDMACM support is enabled]) >> AC_DEFINE_UNQUOTED([OMPI_HAVE_RDMACM], [$$1_have_rdmacm], >> @@ -267,7 +299,11 @@ >> AC_MSG_RESULT([no]) >> fi >> >> - CPPFLAGS="$ompi_check_openib_$1_save_CPPFLAGS" >> + AS_IF([test -z "$ompi_check_openib_dir"], >> + [openib_include_dir="/usr/include"], >> + [openib_include_dir="$ompi_check_openib_dir/include"]) >> + >> + CPPFLAGS="$ompi_check_openib_$1_save_CPPFLAGS >> -I$openib_include_dir/infiniband" >> LDFLAGS="$ompi_check_openib_$1_save_LDFLAGS" >> LIBS="$ompi_check_openib_$1_save_LIBS" >> >> >> Modified: trunk/ompi/mca/btl/openib/btl_openib.h >> ============================================================================== >> --- trunk/ompi/mca/btl/openib/btl_openib.h (original) >> +++ trunk/ompi/mca/btl/openib/btl_openib.h 2011-06-28 10:28:29 EDT (Tue, >> 28 Jun 2011) >> @@ -52,6 +52,7 @@ >> BEGIN_C_DECLS >> >> #define HAVE_XRC (1 == OMPI_HAVE_CONNECTX_XRC) >> +#define ENABLE_DYNAMIC_SL (1 == OMPI_ENABLE_DYNAMIC_SL) >> >> #define MCA_BTL_IB_LEAVE_PINNED 1 >> #define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll >> @@ -215,7 +216,9 @@ >> uint32_t ib_rnr_retry; >> uint32_t ib_max_rdma_dst_ops; >> uint32_t ib_service_level; >> - uint32_t ib_path_rec_service_level; >> +#if (ENABLE_DYNAMIC_SL) >> + uint32_t ib_path_record_service_level; >> +#endif >> int32_t use_eager_rdma; >> int32_t eager_rdma_threshold; /**< After this number of msg, use RDMA >> for short messages, always */ >> int32_t eager_rdma_num; >> >> Modified: trunk/ompi/mca/btl/openib/btl_openib_mca.c >> ============================================================================== >> --- trunk/ompi/mca/btl/openib/btl_openib_mca.c (original) >> +++ trunk/ompi/mca/btl/openib/btl_openib_mca.c 2011-06-28 10:28:29 EDT >> (Tue, 28 Jun 2011) >> @@ -398,10 +398,14 @@ >> } >> mca_btl_openib_component.ib_service_level = (uint32_t) ival; >> >> - CHECK(reg_int("ib_path_rec_service_level", NULL, "Enable getting >> InfiniBand service level from PathRecord " >> - "(must be>= 0, 0 = disabled, positive = try to get the >> service level from PathRecord)", >> +#if (ENABLE_DYNAMIC_SL) >> + CHECK(reg_int("ib_path_record_service_level", NULL, >> + "Enable getting InfiniBand service level from PathRecord " >> + "(must be>= 0, 0 = disabled, positive = try to get the " >> + "service level from PathRecord)", >> 0,&ival, REGINT_GE_ZERO)); >> - mca_btl_openib_component.ib_path_rec_service_level = (uint32_t) ival; >> + mca_btl_openib_component.ib_path_record_service_level = (uint32_t) ival; >> +#endif >> >> CHECK(reg_int("use_eager_rdma", NULL, "Use RDMA for eager messages " >> "(-1 = use device default, 0 = do not use eager RDMA, " >> >> Modified: trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c >> ============================================================================== >> --- trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c (original) >> +++ trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c 2011-06-28 >> 10:28:29 EDT (Tue, 28 Jun 2011) >> @@ -44,6 +44,10 @@ >> #include "connect/connect.h" >> #include "orte/util/show_help.h" >> >> +#if (ENABLE_DYNAMIC_SL) >> +#include<infiniband/iba/ib_types.h> >> +#endif >> + >> #ifdef HAVE_UNISTD_H >> #include<unistd.h> >> #endif >> @@ -54,109 +58,17 @@ >> ENDPOINT_CONNECT_ACK >> } connect_message_type_t; >> >> -#ifndef __WINDOWS__ >> -#define PACK_SUFFIX __attribute__((packed)) >> -#else >> -#define PACK_SUFFIX >> -#endif >> - >> -#define SL_NOT_PRESENT 0x7F >> +#define SL_NOT_PRESENT 0xFF >> #define MAX_GET_SL_REC_RETRIES 20 >> #define GET_SL_REC_RETRIES_TIMEOUT_MS 2000000 >> >> -#define IB_SA_QPN 1 >> -#define IB_GLOBAL_QKEY 0x80010000UL >> -#define IB_MGMT_BASE_VERSION 1 >> -#define IB_MGMT_CLASS_SUBN_ADM 0x03 >> -#define IB_MGMT_METHOD_GET 0x01 >> -#define IB_SA_TID_GET_PATH_REC_0 0xCA000000UL >> -#define IB_SA_TID_GET_PATH_REC_1 0xBEEF0000UL >> -#define IB_PATH_REC_SL_MASK 0x000F >> -#define IB_SA_ATTR_PATH_REC 0x35 >> -#define IB_SA_PATH_REC_DLID (1<<4) >> -#define IB_SA_PATH_REC_SLID (1<<5) >> - >> - >> -#ifdef __WINDOWS__ >> - #pragma pack(push) >> - #pragma pack(1) >> -#endif >> - >> -struct ib_mad_hdr { >> - uint8_t base_version; >> - uint8_t mgmt_class; >> - uint8_t class_version; >> - uint8_t method; >> - uint16_t status; >> - uint16_t class_spec; >> - uint32_t tid[2]; >> - uint16_t attr_id; >> - uint16_t resv; >> - uint32_t attr_mod; >> -} PACK_SUFFIX; >> - >> -struct ib_rmpp_hdr { >> - uint32_t raw[3]; >> -} PACK_SUFFIX; >> - >> -struct ib_sa_hdr { >> - uint32_t sm_key[2]; >> - uint16_t reserved; >> - uint16_t attrib_offset; >> - uint32_t comp_mask[2]; >> -} PACK_SUFFIX; >> - >> -typedef union _ib_gid { >> - uint8_t raw[16]; >> - struct _ib_gid_unicast { >> - uint64_t prefix; >> - uint64_t interface_id; >> - } PACK_SUFFIX unicast; >> - struct _ib_gid_multicast { >> - uint8_t header[2]; >> - uint8_t raw_group_id[14]; >> - } PACK_SUFFIX multicast; >> -} PACK_SUFFIX ib_gid_t; >> - >> -struct ib_path_record { >> - uint64_t service_id; >> - ib_gid_t dgit; >> - ib_gid_t sgit; >> - uint16_t dlid; >> - uint16_t slid; >> - uint32_t hop_flow_raw; >> - uint8_t tclass; >> - uint8_t num_path; >> - uint16_t pkey; >> - uint8_t reserved1; >> - uint8_t qos_class_sl; >> - uint8_t mtu; >> - uint8_t rate; >> - uint32_t preference__packet_lifetime__packet_lifetime_selector; >> - uint32_t reserved2[35]; >> -} PACK_SUFFIX; >> - >> -union ib_sa_data { >> - struct ib_path_record path_record; >> -} PACK_SUFFIX; >> - >> -struct ib_mad_sa { >> - struct ib_mad_hdr mad_hdr; >> - struct ib_rmpp_hdr rmpp_hdr; >> - struct ib_sa_hdr sa_hdr; >> - union ib_sa_data sa_data; >> -} PACK_SUFFIX; >> - >> -#ifdef __WINDOWS__ >> - #pragma pack(pop) >> -#endif >> - >> +#if (ENABLE_DYNAMIC_SL) >> static struct mca_btl_openib_sa_qp_cache { >> /* There will be a MR with the one send and receive buffer together */ >> /* The send buffer is first, the receive buffer is second */ >> /* The receive buffer in a UD queue pair needs room for the 40 byte GRH >> */ >> /* The buffers are first in the structure for page alignment */ >> - char send_recv_buffer[sizeof(struct ib_mad_sa) * 2 + 40]; >> + char send_recv_buffer[MAD_BLOCK_SIZE * 2 + 40]; >> struct mca_btl_openib_sa_qp_cache *next; >> struct ibv_context *context; >> char *device_name; >> @@ -168,8 +80,9 @@ >> struct ibv_pd *pd; >> struct ibv_recv_wr rwr; >> struct ibv_sge rsge; >> - char sl_values[65536]; >> + uint8_t sl_values[65536]; /* 64K */ >> } *sa_qp_cache = 0; >> +#endif >> >> static int oob_priority = 50; >> static bool rml_recv_posted = false; >> @@ -198,27 +111,31 @@ >> static void rml_recv_cb(int status, orte_process_name_t* process_name, >> opal_buffer_t* buffer, orte_rml_tag_t tag, >> void* cbdata); >> + >> +#if (ENABLE_DYNAMIC_SL) >> static int init_ud_qp(struct ibv_context *context_arg, >> struct mca_btl_openib_sa_qp_cache *cache); >> static void init_sa_mad(struct mca_btl_openib_sa_qp_cache *cache, >> - struct ib_mad_sa *sag, >> - struct ibv_send_wr *swr, >> - struct ibv_sge *ssge, >> - uint16_t lid, >> - uint16_t rem_lid); >> + ib_sa_mad_t *sa_mad, >> + struct ibv_send_wr *swr, >> + struct ibv_sge *ssge, >> + uint16_t lid, >> + uint16_t rem_lid); >> static int get_pathrecord_info(struct mca_btl_openib_sa_qp_cache *cache, >> - struct ib_mad_sa *sag, >> - struct ib_mad_sa *sar, >> - struct ibv_send_wr *swr, >> - uint16_t lid, >> - uint16_t rem_lid); >> -static int init_device(struct ibv_context *context_arg, >> - struct mca_btl_openib_sa_qp_cache *cache, >> - uint32_t port_num); >> -static int get_pathrecord_sl(struct ibv_context *context_arg, >> - uint32_t port_num, >> + ib_sa_mad_t *sa_mad, >> + ib_sa_mad_t *sar, >> + struct ibv_send_wr *swr, >> uint16_t lid, >> uint16_t rem_lid); >> +static int init_device(struct ibv_context *context_arg, >> + struct mca_btl_openib_sa_qp_cache *cache, >> + uint32_t port_num); >> +static int get_pathrecord_sl(struct ibv_context *context_arg, >> + uint32_t port_num, >> + uint16_t lid, >> + uint16_t rem_lid); >> +static void free_sa_qp_cache(void); >> +#endif >> >> /* >> * The "component" struct -- the top-level function pointers for the >> @@ -351,6 +268,33 @@ >> return OMPI_SUCCESS; >> } >> >> +#if (ENABLE_DYNAMIC_SL) >> +static void free_sa_qp_cache(void) >> +{ >> + struct mca_btl_openib_sa_qp_cache *cache, *tmp; >> + >> + cache = sa_qp_cache; >> + while (NULL != cache) { >> + /* free cache data */ >> + if (cache->device_name) >> + free(cache->device_name); >> + if (NULL != cache->qp) >> + ibv_destroy_qp(cache->qp); >> + if (NULL != cache->ah) >> + ibv_destroy_ah(cache->ah); >> + if (NULL != cache->cq) >> + ibv_destroy_cq(cache->cq); >> + if (NULL != cache->mr) >> + ibv_dereg_mr(cache->mr); >> + if (NULL != cache->pd) >> + ibv_dealloc_pd(cache->pd); >> + tmp = cache->next; >> + free(cache); >> + cache = tmp; >> + } >> +} >> +#endif >> + >> /* >> * Component finalize function. Cleanup RML non-blocking receive. >> */ >> @@ -360,7 +304,9 @@ >> orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_OPENIB); >> rml_recv_posted = false; >> } >> - >> + #if (ENABLE_DYNAMIC_SL) >> + free_sa_qp_cache(); >> +#endif >> return OMPI_SUCCESS; >> } >> >> @@ -425,7 +371,7 @@ >> */ >> static int qp_connect_all(mca_btl_openib_endpoint_t *endpoint) >> { >> - int i, rc; >> + int i; >> mca_btl_openib_module_t* openib_btl = >> (mca_btl_openib_module_t*)endpoint->endpoint_btl; >> >> @@ -446,18 +392,24 @@ >> attr.ah_attr.dlid = endpoint->rem_info.rem_lid; >> attr.ah_attr.src_path_bits = openib_btl->src_path_bits; >> attr.ah_attr.port_num = openib_btl->port_num; >> - attr.ah_attr.sl = mca_btl_openib_component.ib_service_level; >> - /* if user enable ib_path_rec_service_level - dynamically get the >> sl from PathRecord */ >> - if (mca_btl_openib_component.ib_path_rec_service_level> 0) { >> - rc = get_pathrecord_sl(qp->context, >> + >> +#if (ENABLE_DYNAMIC_SL) >> + /* if user enabled dynamic SL, get it from PathRecord */ >> + if (0 != mca_btl_openib_component.ib_path_record_service_level) { >> + int rc = get_pathrecord_sl(qp->context, >> attr.ah_attr.port_num, >> openib_btl->lid, >> attr.ah_attr.dlid); >> if (OMPI_ERROR == rc) { >> + free_sa_qp_cache(); >> return OMPI_ERROR; >> } >> attr.ah_attr.sl = rc; >> } >> +#else >> + attr.ah_attr.sl = mca_btl_openib_component.ib_service_level; >> +#endif >> + >> /* JMS to be filled in later dynamically */ >> attr.ah_attr.static_rate = 0; >> >> @@ -1056,6 +1008,7 @@ >> OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock); >> } >> >> +#if (ENABLE_DYNAMIC_SL) >> static int init_ud_qp(struct ibv_context *context_arg, >> struct mca_btl_openib_sa_qp_cache *cache) >> { >> @@ -1094,7 +1047,7 @@ >> memset(&mattr, 0, sizeof(mattr)); >> mattr.qp_state = IBV_QPS_INIT; >> mattr.port_num = cache->port_num; >> - mattr.qkey = IB_GLOBAL_QKEY; >> + mattr.qkey = ntohl(IB_QP1_WELL_KNOWN_Q_KEY); >> rc = ibv_modify_qp(cache->qp,&mattr, >> IBV_QP_STATE | >> IBV_QP_PKEY_INDEX | >> @@ -1128,61 +1081,75 @@ >> return OMPI_SUCCESS; >> } >> static void init_sa_mad(struct mca_btl_openib_sa_qp_cache *cache, >> - struct ib_mad_sa *sag, >> - struct ibv_send_wr *swr, >> - struct ibv_sge *ssge, >> - uint16_t lid, >> - uint16_t rem_lid) >> + ib_sa_mad_t *sa_mad, >> + struct ibv_send_wr *swr, >> + struct ibv_sge *ssge, >> + uint16_t lid, >> + uint16_t rem_lid) >> { >> - memset(sag, 0, sizeof(*sag)); >> + ib_path_rec_t *path_record = (ib_path_rec_t*)sa_mad->data; >> + >> memset(swr, 0, sizeof(*swr)); >> memset(ssge, 0, sizeof(*ssge)); >> >> - sag->mad_hdr.base_version = IB_MGMT_BASE_VERSION; >> - sag->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; >> - sag->mad_hdr.class_version = 2; >> - sag->mad_hdr.method = IB_MGMT_METHOD_GET; >> - sag->mad_hdr.attr_id = htons (IB_SA_ATTR_PATH_REC); >> - sag->mad_hdr.tid[0] = IB_SA_TID_GET_PATH_REC_0 + cache->qp->qp_num; >> - sag->mad_hdr.tid[1] = IB_SA_TID_GET_PATH_REC_1 + rem_lid; >> - sag->sa_hdr.comp_mask[1] = >> - htonl(IB_SA_PATH_REC_DLID | IB_SA_PATH_REC_SLID); >> - sag->sa_data.path_record.dlid = htons(rem_lid); >> - sag->sa_data.path_record.slid = htons(lid); >> + /* Initialize the standard MAD header. */ >> + memset(sa_mad, 0, MAD_BLOCK_SIZE); >> + ib_mad_init_new((ib_mad_t *)sa_mad, /* mad header pointer */ >> + IB_MCLASS_SUBN_ADM, /* management class */ >> + (uint8_t) 2, /* version */ >> + IB_MAD_METHOD_GET, /* method */ >> + hton64((uint64_t)lid<< 48 | /* transaction ID */ >> + (uint64_t)rem_lid<< 32 | >> + (uint64_t)cache->qp->qp_num<< 8), >> + IB_MAD_ATTR_PATH_RECORD, /* attribute ID */ >> + 0); /* attribute modifier */ >> + >> + sa_mad->comp_mask = IB_PR_COMPMASK_DLID | IB_PR_COMPMASK_SLID; >> + path_record->dlid = htons(rem_lid); >> + path_record->slid = htons(lid); >> >> swr->sg_list = ssge; >> swr->num_sge = 1; >> swr->opcode = IBV_WR_SEND; >> swr->wr.ud.ah = cache->ah; >> - swr->wr.ud.remote_qpn = IB_SA_QPN; >> - swr->wr.ud.remote_qkey = IB_GLOBAL_QKEY; >> + swr->wr.ud.remote_qpn = ntohl(IB_QP1); >> + swr->wr.ud.remote_qkey = ntohl(IB_QP1_WELL_KNOWN_Q_KEY); >> swr->send_flags = IBV_SEND_SIGNALED | IBV_SEND_SOLICITED; >> >> - ssge->addr = (uint64_t)(void *)sag; >> - ssge->length = sizeof(*sag); >> + ssge->addr = (uint64_t)(void *)sa_mad; >> + ssge->length = MAD_BLOCK_SIZE; >> ssge->lkey = cache->mr->lkey; >> } >> >> static int get_pathrecord_info(struct mca_btl_openib_sa_qp_cache *cache, >> - struct ib_mad_sa *sag, >> - struct ib_mad_sa *sar, >> - struct ibv_send_wr *swr, >> - uint16_t lid, >> - uint16_t rem_lid) >> + ib_sa_mad_t *req_mad, >> + ib_sa_mad_t *resp_mad, >> + struct ibv_send_wr *swr, >> + uint16_t lid, >> + uint16_t rem_lid) >> { >> struct ibv_send_wr *bswr; >> struct ibv_wc wc; >> struct timeval get_sl_rec_last_sent, get_sl_rec_last_poll; >> struct ibv_recv_wr *brwr; >> int got_sl_value, get_sl_rec_retries, rc, ne, i; >> + ib_path_rec_t *req_path_record = ib_sa_mad_get_payload_ptr(req_mad); >> + ib_path_rec_t *resp_path_record = ib_sa_mad_get_payload_ptr(resp_mad); >> >> got_sl_value = 0; >> get_sl_rec_retries = 0; >> >> + rc = ibv_post_recv(cache->qp,&(cache->rwr),&brwr); >> + if (0 != rc) { >> + BTL_ERROR(("error posting receive on QP [0x%x] errno says: %s [%d]", >> + cache->qp->qp_num, strerror(errno), errno)); >> + return OMPI_ERROR; >> + } >> + >> while (0 == got_sl_value) { >> rc = ibv_post_send(cache->qp, swr,&bswr); >> if (0 != rc) { >> - BTL_ERROR(("error posing send on QP[%x] errno says: %s [%d]", >> + BTL_ERROR(("error posting send on QP [0x%x] errno says: %s >> [%d]", >> cache->qp->qp_num, strerror(errno), errno)); >> return OMPI_ERROR; >> } >> @@ -1190,25 +1157,23 @@ >> >> while (0 == got_sl_value) { >> ne = ibv_poll_cq(cache->cq, 1,&wc); >> - if (ne> 0 >> -&& wc.status == IBV_WC_SUCCESS >> -&& wc.opcode == IBV_WC_RECV >> -&& wc.byte_len>= sizeof(*sar) >> -&& sar->mad_hdr.tid[0] == sag->mad_hdr.tid[0] >> -&& sar->mad_hdr.tid[1] == sag->mad_hdr.tid[1]) { >> - if (0 == sar->mad_hdr.status >> -&& sar->sa_data.path_record.slid == htons(lid) >> -&& sar->sa_data.path_record.dlid == htons(rem_lid)) { >> + if (ne> 0&& >> + IBV_WC_SUCCESS == wc.status&& >> + IBV_WC_RECV == wc.opcode&& >> + wc.byte_len>= MAD_BLOCK_SIZE&& >> + resp_mad->trans_id == req_mad->trans_id) { >> + if (0 == resp_mad->status&& >> + req_path_record->slid == htons(lid)&& >> + req_path_record->dlid == htons(rem_lid)) { >> /* Everything matches, so we have the desired SL */ >> - cache->sl_values[rem_lid] = >> - sar->sa_data.path_record.qos_class_sl& >> IB_PATH_REC_SL_MASK; >> + cache->sl_values[rem_lid] = >> ib_path_rec_sl(resp_path_record); >> got_sl_value = 1; /* still must repost recieve buf */ >> } else { >> /* Probably bad status, unlikely bad lid match. We will >> */ >> /* ignore response and let it time out so that we do a >> */ >> /* retry, but after a delay. We must make a new TID so >> */ >> /* the SM doesn't see it as the same request. >> */ >> - sag->mad_hdr.tid[1] += 0x10000; >> + req_mad->trans_id += hton64(1); >> } >> rc = ibv_post_recv(cache->qp,&(cache->rwr),&brwr); >> if (0 != rc) { >> @@ -1249,7 +1214,6 @@ >> { >> struct ibv_ah_attr aattr; >> struct ibv_port_attr pattr; >> - struct ibv_recv_wr *brwr; >> int rc; >> >> cache->context = ibv_open_device(context_arg->device); >> @@ -1315,16 +1279,10 @@ >> cache->rwr.sg_list =&(cache->rsge); >> memset(&(cache->rsge), 0, sizeof(cache->rsge)); >> cache->rsge.addr = (uint64_t)(void *) >> - (cache->send_recv_buffer + sizeof(struct ib_mad_sa)); >> - cache->rsge.length = sizeof(struct ib_mad_sa) + 40; >> + (cache->send_recv_buffer + MAD_BLOCK_SIZE); >> + cache->rsge.length = MAD_BLOCK_SIZE + 40; >> cache->rsge.lkey = cache->mr->lkey; >> >> - rc = ibv_post_recv(cache->qp,&(cache->rwr),&brwr); >> - if (0 != rc) { >> - BTL_ERROR(("error posing receive on QP[%x] errno says: %s [%d]", >> - cache->qp->qp_num, strerror(errno), errno)); >> - return OMPI_ERROR; >> - } >> return 0; >> } >> >> @@ -1334,7 +1292,7 @@ >> uint16_t rem_lid) >> { >> struct ibv_send_wr swr; >> - struct ib_mad_sa *sag, *sar; >> + ib_sa_mad_t *req_mad, *resp_mad; >> struct ibv_sge ssge; >> struct mca_btl_openib_sa_qp_cache *cache; >> long page_size = sysconf(_SC_PAGESIZE); >> @@ -1342,8 +1300,8 @@ >> >> /* search for a cached item */ >> for (cache = sa_qp_cache; cache; cache = cache->next) { >> - if (strcmp(cache->device_name, >> - ibv_get_device_name(context_arg->device)) == 0 >> + if (0 == strcmp(cache->device_name, >> + ibv_get_device_name(context_arg->device)) >> && cache->port_num == port_num) { >> break; >> } >> @@ -1365,15 +1323,15 @@ >> >> /* if the destination lid SL value is not in the cache, go get it */ >> if (SL_NOT_PRESENT == cache->sl_values[rem_lid]) { >> - /* sag is first buffer, where we build the SA Get request to send */ >> - sag = (struct ib_mad_sa *)(cache->send_recv_buffer); >> + /* sa_mad is first buffer, where we build the SA Get request to >> send */ >> + req_mad = (ib_sa_mad_t *)(cache->send_recv_buffer); >> >> - init_sa_mad(cache, sag,&swr,&ssge, lid, rem_lid); >> + init_sa_mad(cache, req_mad,&swr,&ssge, lid, rem_lid); >> >> - /* sar is the receive buffer (40 byte GRH) */ >> - sar = (struct ib_mad_sa *)(cache->send_recv_buffer + sizeof(struct >> ib_mad_sa) + 40); >> + /* resp_mad is the receive buffer (40 byte offset is for GRH) */ >> + resp_mad = (ib_sa_mad_t *)(cache->send_recv_buffer + MAD_BLOCK_SIZE >> + 40); >> >> - rc = get_pathrecord_info(cache, sag, sar,&swr, lid, rem_lid); >> + rc = get_pathrecord_info(cache, req_mad, resp_mad,&swr, lid, >> rem_lid); >> if (0 != rc) { >> return rc; >> } >> @@ -1382,3 +1340,4 @@ >> /* now all we do is send back the value laying around */ >> return cache->sl_values[rem_lid]; >> } >> +#endif >> _______________________________________________ >> svn-full mailing list >> svn-f...@open-mpi.org >> http://www.open-mpi.org/mailman/listinfo.cgi/svn-full > > _______________________________________________ > devel mailing list > de...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/devel >