Were all the issueswith this code fixed?  There were m4 issues and solaris 
issues, IIRC. 

Sent from my phone. No type good. 

On Jun 28, 2011, at 9:28 AM, "klit...@osl.iu.edu" <klit...@osl.iu.edu> wrote:

> Author: kliteyn
> Date: 2011-06-28 10:28:29 EDT (Tue, 28 Jun 2011)
> New Revision: 24830
> URL: https://svn.open-mpi.org/trac/ompi/changeset/24830
> 
> Log:
> Supporting dynamic SL (#2674)
> 
> - Added enable/disable configuration parameter for dynamic SL
> - All the dynamic SL code is conditionalized
> - Removed libibmad dependency
> - Using only one include - ib_types.h (part of opensm-devel package)
> - Removed all the macro and data types definitions, using the
>   existing definitions from ib_types.h instead
> - general cleaning here and there
> 
> The async mode is not implemented yet - stay tuned...
> 
> 
> Text files modified: 
>   trunk/ompi/config/ompi_check_openib.m4                     |    38 ++++     
>                                
>   trunk/ompi/mca/btl/openib/btl_openib.h                     |     5          
>                                
>   trunk/ompi/mca/btl/openib/btl_openib_mca.c                 |    10          
>                                
>   trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c |   309 
> +++++++++++++++++---------------------- 
>   4 files changed, 182 insertions(+), 180 deletions(-)
> 
> Modified: trunk/ompi/config/ompi_check_openib.m4
> ==============================================================================
> --- trunk/ompi/config/ompi_check_openib.m4    (original)
> +++ trunk/ompi/config/ompi_check_openib.m4    2011-06-28 10:28:29 EDT (Tue, 
> 28 Jun 2011)
> @@ -155,11 +155,21 @@
>                                      [$ompi_cv_func_ibv_create_cq_args],
>                                      [Number of arguments to 
> ibv_create_cq])])])
> 
> +    #
> +    # OpenIB dynamic SL
> +    #
> +    AC_ARG_ENABLE([openib-dynamic-sl],
> +        [AC_HELP_STRING([--enable-openib-dynamic-sl],
> +                        [Enable openib BTL to query Subnet Manager for IB SL 
> (default: enabled)])],
> +        [enable_openib_dynamic_sl="$enableval"],
> +        [enable_openib_dynamic_sl="yes"])
> +
>     # Set these up so that we can do an AC_DEFINE below
>     # (unconditionally)
>     $1_have_xrc=0
>     $1_have_rdmacm=0
>     $1_have_ibcm=0
> +    $1_have_dynamic_sl=0
> 
>     # If we have the openib stuff available, find out what we've got
>     AS_IF([test "$ompi_check_openib_happy" = "yes"],
> @@ -176,6 +186,19 @@
>                AC_CHECK_FUNCS([ibv_create_xrc_rcv_qp], [$1_have_xrc=1])
>            fi
> 
> +           if test "$enable_openib_dynamic_sl" = "yes"; then
> +               # We need ib_types.h file, which is installed with 
> opensm-devel
> +               # package. However, ib_types.h has a bad include directive,
> +               # which will cause AC_CHECK_HEADER to fail.
> +               # So instead, we will look for another file that is also
> +               # installed as part of opensm-devel package and included in
> +               # ib_types.h, but it doesn't include any other IB-related 
> files.
> +               AC_CHECK_HEADER([infiniband/complib/cl_types_osd.h],
> +                               [$1_have_dynamic_sl=1],
> +                               [AC_MSG_ERROR([opensm-devel package not found 
> - please install it or disable dynamic SL support with 
> \"--disable-openib-dynamic-sl\"])],
> +                               [])
> +           fi
> +
>            # Do we have a recent enough RDMA CM?  Need to have the
>            # rdma_get_peer_addr (inline) function (originally appeared
>            # in OFED v1.3).
> @@ -244,6 +267,15 @@
>     else
>         AC_MSG_RESULT([no])
>     fi
> +
> +    AC_MSG_CHECKING([if dynamic SL is enabled])
> +    AC_DEFINE_UNQUOTED([OMPI_ENABLE_DYNAMIC_SL], [$$1_have_dynamic_sl],
> +        [Enable features required for dynamic SL support])
> +    if test "1" = "$$1_have_dynamic_sl"; then
> +        AC_MSG_RESULT([yes])
> +    else
> +        AC_MSG_RESULT([no])
> +    fi
> 
>     AC_MSG_CHECKING([if OpenFabrics RDMACM support is enabled])
>     AC_DEFINE_UNQUOTED([OMPI_HAVE_RDMACM], [$$1_have_rdmacm],
> @@ -267,7 +299,11 @@
>         AC_MSG_RESULT([no])
>     fi
> 
> -    CPPFLAGS="$ompi_check_openib_$1_save_CPPFLAGS"
> +    AS_IF([test -z "$ompi_check_openib_dir"],
> +          [openib_include_dir="/usr/include"],
> +          [openib_include_dir="$ompi_check_openib_dir/include"])
> +
> +    CPPFLAGS="$ompi_check_openib_$1_save_CPPFLAGS 
> -I$openib_include_dir/infiniband"
>     LDFLAGS="$ompi_check_openib_$1_save_LDFLAGS"
>     LIBS="$ompi_check_openib_$1_save_LIBS"
> 
> 
> Modified: trunk/ompi/mca/btl/openib/btl_openib.h
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/btl_openib.h    (original)
> +++ trunk/ompi/mca/btl/openib/btl_openib.h    2011-06-28 10:28:29 EDT (Tue, 
> 28 Jun 2011)
> @@ -52,6 +52,7 @@
> BEGIN_C_DECLS
> 
> #define HAVE_XRC (1 == OMPI_HAVE_CONNECTX_XRC)
> +#define ENABLE_DYNAMIC_SL (1 == OMPI_ENABLE_DYNAMIC_SL)
> 
> #define MCA_BTL_IB_LEAVE_PINNED 1
> #define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll
> @@ -215,7 +216,9 @@
>     uint32_t ib_rnr_retry;
>     uint32_t ib_max_rdma_dst_ops;
>     uint32_t ib_service_level;
> -    uint32_t ib_path_rec_service_level;
> +#if (ENABLE_DYNAMIC_SL)
> +    uint32_t ib_path_record_service_level;
> +#endif
>     int32_t use_eager_rdma;
>     int32_t eager_rdma_threshold; /**< After this number of msg, use RDMA for 
> short messages, always */
>     int32_t eager_rdma_num;
> 
> Modified: trunk/ompi/mca/btl/openib/btl_openib_mca.c
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/btl_openib_mca.c    (original)
> +++ trunk/ompi/mca/btl/openib/btl_openib_mca.c    2011-06-28 10:28:29 EDT 
> (Tue, 28 Jun 2011)
> @@ -398,10 +398,14 @@
>     }
>     mca_btl_openib_component.ib_service_level = (uint32_t) ival;
> 
> -    CHECK(reg_int("ib_path_rec_service_level", NULL, "Enable getting 
> InfiniBand service level from PathRecord "
> -                  "(must be >= 0, 0 = disabled, positive = try to get the 
> service level from PathRecord)",
> +#if (ENABLE_DYNAMIC_SL)
> +    CHECK(reg_int("ib_path_record_service_level", NULL,
> +                  "Enable getting InfiniBand service level from PathRecord "
> +                  "(must be >= 0, 0 = disabled, positive = try to get the "
> +                  "service level from PathRecord)", 
>                   0, &ival, REGINT_GE_ZERO));
> -    mca_btl_openib_component.ib_path_rec_service_level = (uint32_t) ival;
> +    mca_btl_openib_component.ib_path_record_service_level = (uint32_t) ival;
> +#endif
> 
>     CHECK(reg_int("use_eager_rdma", NULL, "Use RDMA for eager messages "
>                   "(-1 = use device default, 0 = do not use eager RDMA, "
> 
> Modified: trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c    (original)
> +++ trunk/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c    2011-06-28 
> 10:28:29 EDT (Tue, 28 Jun 2011)
> @@ -44,6 +44,10 @@
> #include "connect/connect.h"
> #include "orte/util/show_help.h"
> 
> +#if (ENABLE_DYNAMIC_SL)
> +#include <infiniband/iba/ib_types.h>
> +#endif
> +
> #ifdef HAVE_UNISTD_H
> #include <unistd.h>
> #endif
> @@ -54,109 +58,17 @@
>     ENDPOINT_CONNECT_ACK
> } connect_message_type_t;
> 
> -#ifndef __WINDOWS__
> -#define PACK_SUFFIX __attribute__((packed))
> -#else
> -#define PACK_SUFFIX
> -#endif
> -
> -#define SL_NOT_PRESENT                0x7F
> +#define SL_NOT_PRESENT                0xFF
> #define MAX_GET_SL_REC_RETRIES        20
> #define GET_SL_REC_RETRIES_TIMEOUT_MS 2000000
> 
> -#define IB_SA_QPN                     1
> -#define IB_GLOBAL_QKEY                0x80010000UL
> -#define IB_MGMT_BASE_VERSION          1
> -#define IB_MGMT_CLASS_SUBN_ADM        0x03
> -#define IB_MGMT_METHOD_GET            0x01
> -#define IB_SA_TID_GET_PATH_REC_0      0xCA000000UL
> -#define IB_SA_TID_GET_PATH_REC_1      0xBEEF0000UL
> -#define IB_PATH_REC_SL_MASK           0x000F
> -#define IB_SA_ATTR_PATH_REC           0x35
> -#define IB_SA_PATH_REC_DLID           (1<<4)
> -#define IB_SA_PATH_REC_SLID           (1<<5)
> -
> -
> -#ifdef __WINDOWS__
> -  #pragma pack(push)
> -  #pragma pack(1)
> -#endif
> -
> -struct ib_mad_hdr {
> -    uint8_t   base_version;
> -    uint8_t   mgmt_class;
> -    uint8_t   class_version;
> -    uint8_t   method;
> -    uint16_t  status;
> -    uint16_t  class_spec;
> -    uint32_t  tid[2];
> -    uint16_t  attr_id;
> -    uint16_t  resv;
> -    uint32_t  attr_mod;
> -} PACK_SUFFIX;
> -
> -struct ib_rmpp_hdr {
> -    uint32_t  raw[3];
> -} PACK_SUFFIX;
> -
> -struct ib_sa_hdr {
> -    uint32_t sm_key[2];
> -    uint16_t reserved;
> -    uint16_t attrib_offset;
> -    uint32_t comp_mask[2];
> -} PACK_SUFFIX;
> -
> -typedef union _ib_gid {
> -    uint8_t raw[16];
> -    struct _ib_gid_unicast {
> -        uint64_t prefix;
> -        uint64_t interface_id;
> -    } PACK_SUFFIX unicast;
> -    struct _ib_gid_multicast {
> -        uint8_t header[2];
> -        uint8_t raw_group_id[14];
> -    } PACK_SUFFIX multicast;
> -} PACK_SUFFIX ib_gid_t;
> -
> -struct ib_path_record {
> -    uint64_t service_id;
> -    ib_gid_t dgit;
> -    ib_gid_t sgit;
> -    uint16_t dlid;
> -    uint16_t slid;
> -    uint32_t hop_flow_raw;
> -    uint8_t  tclass;
> -    uint8_t  num_path;
> -    uint16_t pkey;
> -    uint8_t  reserved1;
> -    uint8_t  qos_class_sl;
> -    uint8_t  mtu;
> -    uint8_t  rate;
> -    uint32_t preference__packet_lifetime__packet_lifetime_selector;
> -    uint32_t reserved2[35];
> -} PACK_SUFFIX;
> -
> -union ib_sa_data {
> -    struct ib_path_record path_record;
> -} PACK_SUFFIX;
> -
> -struct ib_mad_sa {
> -    struct ib_mad_hdr mad_hdr;
> -    struct ib_rmpp_hdr rmpp_hdr;
> -    struct ib_sa_hdr sa_hdr;
> -    union  ib_sa_data sa_data;
> -} PACK_SUFFIX;
> -
> -#ifdef __WINDOWS__
> -  #pragma pack(pop)
> -#endif
> -
> +#if (ENABLE_DYNAMIC_SL)
> static struct mca_btl_openib_sa_qp_cache {
>     /* There will be a MR with the one send and receive buffer together */
>     /* The send buffer is first, the receive buffer is second */
>     /* The receive buffer in a UD queue pair needs room for the 40 byte GRH */
>     /* The buffers are first in the structure for page alignment */
> -    char     send_recv_buffer[sizeof(struct ib_mad_sa) * 2 + 40];
> +    char     send_recv_buffer[MAD_BLOCK_SIZE * 2 + 40];
>     struct   mca_btl_openib_sa_qp_cache *next;
>     struct   ibv_context *context;
>     char     *device_name;
> @@ -168,8 +80,9 @@
>     struct   ibv_pd *pd;
>     struct   ibv_recv_wr rwr;
>     struct   ibv_sge rsge;
> -    char     sl_values[65536];
> +    uint8_t  sl_values[65536]; /* 64K */
> } *sa_qp_cache = 0;
> +#endif
> 
> static int oob_priority = 50;
> static bool rml_recv_posted = false;
> @@ -198,27 +111,31 @@
> static void rml_recv_cb(int status, orte_process_name_t* process_name, 
>                         opal_buffer_t* buffer, orte_rml_tag_t tag, 
>                         void* cbdata);
> +
> +#if (ENABLE_DYNAMIC_SL)
> static int init_ud_qp(struct ibv_context *context_arg,
>                       struct mca_btl_openib_sa_qp_cache *cache);
> static void init_sa_mad(struct mca_btl_openib_sa_qp_cache *cache,
> -                        struct ib_mad_sa *sag,
> -                        struct ibv_send_wr *swr,
> -                        struct ibv_sge *ssge,
> -                        uint16_t lid,
> -                        uint16_t rem_lid);
> +                       ib_sa_mad_t *sa_mad,
> +                       struct ibv_send_wr *swr,
> +                       struct ibv_sge *ssge,
> +                       uint16_t lid,
> +                       uint16_t rem_lid);
> static int get_pathrecord_info(struct mca_btl_openib_sa_qp_cache *cache,
> -                               struct ib_mad_sa *sag,
> -                               struct ib_mad_sa *sar,
> -                               struct ibv_send_wr *swr,
> -                               uint16_t lid,
> -                               uint16_t rem_lid);
> -static int init_device(struct ibv_context *context_arg,
> -                       struct mca_btl_openib_sa_qp_cache *cache,
> -                       uint32_t port_num);
> -static int get_pathrecord_sl(struct ibv_context *context_arg,
> -                             uint32_t port_num,
> +                             ib_sa_mad_t *sa_mad,
> +                             ib_sa_mad_t *sar,
> +                             struct ibv_send_wr *swr,
>                              uint16_t lid,
>                              uint16_t rem_lid);
> +static int init_device(struct ibv_context *context_arg,
> +                      struct mca_btl_openib_sa_qp_cache *cache,
> +                      uint32_t port_num);
> +static int get_pathrecord_sl(struct ibv_context *context_arg,
> +                           uint32_t port_num,
> +                           uint16_t lid,
> +                           uint16_t rem_lid);
> +static void free_sa_qp_cache(void);
> +#endif
> 
> /*
>  * The "component" struct -- the top-level function pointers for the
> @@ -351,6 +268,33 @@
>     return OMPI_SUCCESS;
> }
> 
> +#if (ENABLE_DYNAMIC_SL)
> +static void free_sa_qp_cache(void)
> +{
> +    struct mca_btl_openib_sa_qp_cache *cache, *tmp;
> +
> +    cache = sa_qp_cache;
> +    while (NULL != cache) {
> +        /* free cache data */
> +        if (cache->device_name)
> +            free(cache->device_name);
> +        if (NULL != cache->qp)
> +            ibv_destroy_qp(cache->qp);
> +        if (NULL != cache->ah)
> +            ibv_destroy_ah(cache->ah);
> +        if (NULL != cache->cq)
> +            ibv_destroy_cq(cache->cq);
> +        if (NULL != cache->mr)
> +            ibv_dereg_mr(cache->mr);
> +        if (NULL != cache->pd)
> +            ibv_dealloc_pd(cache->pd);
> +        tmp = cache->next;
> +        free(cache);
> +        cache = tmp;
> +    }
> +}
> +#endif
> +
> /*
>  * Component finalize function.  Cleanup RML non-blocking receive.
>  */
> @@ -360,7 +304,9 @@
>         orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_OPENIB);
>         rml_recv_posted = false;
>     }
> -
> + #if (ENABLE_DYNAMIC_SL)
> +   free_sa_qp_cache();
> +#endif
>     return OMPI_SUCCESS;
> }
> 
> @@ -425,7 +371,7 @@
>  */
> static int qp_connect_all(mca_btl_openib_endpoint_t *endpoint)
> {
> -    int i, rc;
> +    int i;
>     mca_btl_openib_module_t* openib_btl =
>         (mca_btl_openib_module_t*)endpoint->endpoint_btl;
> 
> @@ -446,18 +392,24 @@
>         attr.ah_attr.dlid          = endpoint->rem_info.rem_lid;
>         attr.ah_attr.src_path_bits = openib_btl->src_path_bits;
>         attr.ah_attr.port_num      = openib_btl->port_num;
> -        attr.ah_attr.sl = mca_btl_openib_component.ib_service_level;
> -        /* if user enable ib_path_rec_service_level - dynamically get the sl 
> from PathRecord */
> -        if (mca_btl_openib_component.ib_path_rec_service_level > 0) {
> -            rc = get_pathrecord_sl(qp->context,
> +
> +#if (ENABLE_DYNAMIC_SL)
> +        /* if user enabled dynamic SL, get it from PathRecord */
> +        if (0 != mca_btl_openib_component.ib_path_record_service_level) {
> +            int rc = get_pathrecord_sl(qp->context,
>                                    attr.ah_attr.port_num,
>                                    openib_btl->lid,
>                                    attr.ah_attr.dlid);
>             if (OMPI_ERROR == rc) {
> +                free_sa_qp_cache();
>                 return OMPI_ERROR;
>             }
>             attr.ah_attr.sl = rc;
>         }
> +#else
> +        attr.ah_attr.sl = mca_btl_openib_component.ib_service_level;
> +#endif
> +
>         /* JMS to be filled in later dynamically */
>         attr.ah_attr.static_rate   = 0;
> 
> @@ -1056,6 +1008,7 @@
>     OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
> }
> 
> +#if (ENABLE_DYNAMIC_SL)
> static int init_ud_qp(struct ibv_context *context_arg,
>                       struct mca_btl_openib_sa_qp_cache *cache)
> {
> @@ -1094,7 +1047,7 @@
>     memset(&mattr, 0, sizeof(mattr));
>     mattr.qp_state = IBV_QPS_INIT;
>     mattr.port_num = cache->port_num;
> -    mattr.qkey = IB_GLOBAL_QKEY;
> +    mattr.qkey = ntohl(IB_QP1_WELL_KNOWN_Q_KEY);
>     rc = ibv_modify_qp(cache->qp, &mattr,
>             IBV_QP_STATE              |
>             IBV_QP_PKEY_INDEX         |
> @@ -1128,61 +1081,75 @@
>     return OMPI_SUCCESS;
> }
> static void init_sa_mad(struct mca_btl_openib_sa_qp_cache *cache,
> -                        struct ib_mad_sa *sag,
> -                        struct ibv_send_wr *swr,
> -                        struct ibv_sge *ssge,
> -                        uint16_t lid,
> -                        uint16_t rem_lid)
> +                       ib_sa_mad_t *sa_mad,
> +                       struct ibv_send_wr *swr,
> +                       struct ibv_sge *ssge,
> +                       uint16_t lid,
> +                       uint16_t rem_lid)
> {
> -    memset(sag, 0, sizeof(*sag));
> +    ib_path_rec_t *path_record = (ib_path_rec_t*)sa_mad->data;
> +
>     memset(swr, 0, sizeof(*swr));
>     memset(ssge, 0, sizeof(*ssge));
> 
> -    sag->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
> -    sag->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
> -    sag->mad_hdr.class_version = 2;
> -    sag->mad_hdr.method = IB_MGMT_METHOD_GET;
> -    sag->mad_hdr.attr_id = htons (IB_SA_ATTR_PATH_REC);
> -    sag->mad_hdr.tid[0] = IB_SA_TID_GET_PATH_REC_0 + cache->qp->qp_num;
> -    sag->mad_hdr.tid[1] = IB_SA_TID_GET_PATH_REC_1 + rem_lid;
> -    sag->sa_hdr.comp_mask[1] =
> -        htonl(IB_SA_PATH_REC_DLID | IB_SA_PATH_REC_SLID);
> -    sag->sa_data.path_record.dlid = htons(rem_lid);
> -    sag->sa_data.path_record.slid = htons(lid);
> +    /* Initialize the standard MAD header. */
> +    memset(sa_mad, 0, MAD_BLOCK_SIZE);
> +    ib_mad_init_new((ib_mad_t *)sa_mad,          /* mad header pointer */
> +                    IB_MCLASS_SUBN_ADM,          /* management class */
> +                    (uint8_t) 2,                 /* version */
> +                    IB_MAD_METHOD_GET,           /* method */
> +                    hton64((uint64_t)lid << 48 | /* transaction ID */
> +                           (uint64_t)rem_lid << 32 |
> +                           (uint64_t)cache->qp->qp_num << 8),
> +                    IB_MAD_ATTR_PATH_RECORD,     /* attribute ID */
> +                    0);                          /* attribute modifier */
> +
> +    sa_mad->comp_mask = IB_PR_COMPMASK_DLID | IB_PR_COMPMASK_SLID;
> +    path_record->dlid = htons(rem_lid);
> +    path_record->slid = htons(lid);
> 
>     swr->sg_list = ssge;
>     swr->num_sge = 1;
>     swr->opcode = IBV_WR_SEND;
>     swr->wr.ud.ah = cache->ah;
> -    swr->wr.ud.remote_qpn = IB_SA_QPN;
> -    swr->wr.ud.remote_qkey = IB_GLOBAL_QKEY;
> +    swr->wr.ud.remote_qpn = ntohl(IB_QP1);
> +    swr->wr.ud.remote_qkey = ntohl(IB_QP1_WELL_KNOWN_Q_KEY);
>     swr->send_flags = IBV_SEND_SIGNALED | IBV_SEND_SOLICITED;
> 
> -    ssge->addr = (uint64_t)(void *)sag;
> -    ssge->length = sizeof(*sag);
> +    ssge->addr = (uint64_t)(void *)sa_mad;
> +    ssge->length = MAD_BLOCK_SIZE;
>     ssge->lkey = cache->mr->lkey;
> }
> 
> static int get_pathrecord_info(struct mca_btl_openib_sa_qp_cache *cache,
> -                               struct ib_mad_sa *sag,
> -                               struct ib_mad_sa *sar,
> -                               struct ibv_send_wr *swr,
> -                               uint16_t lid,
> -                               uint16_t rem_lid)
> +                             ib_sa_mad_t *req_mad,
> +                             ib_sa_mad_t *resp_mad,
> +                             struct ibv_send_wr *swr,
> +                             uint16_t lid,
> +                             uint16_t rem_lid)
> {
>     struct ibv_send_wr *bswr;
>     struct ibv_wc wc;
>     struct timeval get_sl_rec_last_sent, get_sl_rec_last_poll;
>     struct ibv_recv_wr *brwr;
>     int got_sl_value, get_sl_rec_retries, rc, ne, i;
> +    ib_path_rec_t *req_path_record = ib_sa_mad_get_payload_ptr(req_mad);
> +    ib_path_rec_t *resp_path_record = ib_sa_mad_get_payload_ptr(resp_mad);
> 
>     got_sl_value = 0;
>     get_sl_rec_retries = 0;
> 
> +    rc = ibv_post_recv(cache->qp, &(cache->rwr), &brwr);
> +    if (0 != rc) {
> +        BTL_ERROR(("error posting receive on QP [0x%x] errno says: %s [%d]",
> +                   cache->qp->qp_num, strerror(errno), errno));
> +        return OMPI_ERROR;
> +    }
> +
>     while (0 == got_sl_value) {
>         rc = ibv_post_send(cache->qp, swr, &bswr);
>         if (0 != rc) {
> -            BTL_ERROR(("error posing send on QP[%x] errno says: %s [%d]",
> +            BTL_ERROR(("error posting send on QP [0x%x] errno says: %s [%d]",
>                        cache->qp->qp_num, strerror(errno), errno));
>             return OMPI_ERROR;
>         }
> @@ -1190,25 +1157,23 @@
> 
>         while (0 == got_sl_value) {
>             ne = ibv_poll_cq(cache->cq, 1, &wc);
> -            if (ne > 0
> -                    && wc.status == IBV_WC_SUCCESS
> -                    && wc.opcode == IBV_WC_RECV
> -                    && wc.byte_len >= sizeof(*sar)
> -                    && sar->mad_hdr.tid[0] == sag->mad_hdr.tid[0]
> -                    && sar->mad_hdr.tid[1] == sag->mad_hdr.tid[1]) {
> -                if (0 == sar->mad_hdr.status
> -                        && sar->sa_data.path_record.slid == htons(lid)
> -                        && sar->sa_data.path_record.dlid == htons(rem_lid)) {
> +            if (ne > 0 &&
> +                IBV_WC_SUCCESS == wc.status &&
> +                IBV_WC_RECV == wc.opcode &&
> +                wc.byte_len >= MAD_BLOCK_SIZE &&
> +                resp_mad->trans_id == req_mad->trans_id) {
> +                if (0 == resp_mad->status &&
> +                    req_path_record->slid == htons(lid) &&
> +                    req_path_record->dlid == htons(rem_lid)) {
>                     /* Everything matches, so we have the desired SL */
> -                    cache->sl_values[rem_lid] =
> -                        sar->sa_data.path_record.qos_class_sl & 
> IB_PATH_REC_SL_MASK;
> +                    cache->sl_values[rem_lid] = 
> ib_path_rec_sl(resp_path_record);
>                     got_sl_value = 1; /* still must repost recieve buf */
>                 } else {
>                     /* Probably bad status, unlikely bad lid match. We will */
>                     /* ignore response and let it time out so that we do a  */
>                     /* retry, but after a delay. We must make a new TID so  */
>                     /* the SM doesn't see it as the same request.           */
> -                    sag->mad_hdr.tid[1] += 0x10000;
> +                    req_mad->trans_id += hton64(1);
>                 }
>                 rc = ibv_post_recv(cache->qp, &(cache->rwr), &brwr);
>                 if (0 != rc) {
> @@ -1249,7 +1214,6 @@
> {
>     struct ibv_ah_attr aattr;
>     struct ibv_port_attr pattr;
> -    struct ibv_recv_wr *brwr;
>     int rc;
> 
>     cache->context = ibv_open_device(context_arg->device);
> @@ -1315,16 +1279,10 @@
>     cache->rwr.sg_list = &(cache->rsge);
>     memset(&(cache->rsge), 0, sizeof(cache->rsge));
>     cache->rsge.addr = (uint64_t)(void *)
> -        (cache->send_recv_buffer + sizeof(struct ib_mad_sa));
> -    cache->rsge.length = sizeof(struct ib_mad_sa) + 40;
> +        (cache->send_recv_buffer + MAD_BLOCK_SIZE);
> +    cache->rsge.length = MAD_BLOCK_SIZE + 40;
>     cache->rsge.lkey = cache->mr->lkey;
> 
> -    rc = ibv_post_recv(cache->qp, &(cache->rwr), &brwr);
> -    if (0 != rc) {
> -        BTL_ERROR(("error posing receive on QP[%x] errno says: %s [%d]",
> -                   cache->qp->qp_num, strerror(errno), errno));
> -        return OMPI_ERROR;
> -    }
>     return 0;
> }
> 
> @@ -1334,7 +1292,7 @@
>                              uint16_t rem_lid)
> {
>     struct ibv_send_wr swr;
> -    struct ib_mad_sa *sag, *sar;
> +    ib_sa_mad_t *req_mad, *resp_mad;
>     struct ibv_sge ssge;
>     struct mca_btl_openib_sa_qp_cache *cache;
>     long page_size = sysconf(_SC_PAGESIZE);
> @@ -1342,8 +1300,8 @@
> 
>     /* search for a cached item */
>     for (cache = sa_qp_cache; cache; cache = cache->next) {
> -        if (strcmp(cache->device_name,
> -                    ibv_get_device_name(context_arg->device)) == 0
> +        if (0 == strcmp(cache->device_name,
> +                    ibv_get_device_name(context_arg->device))
>                 && cache->port_num == port_num) {
>             break;
>         }
> @@ -1365,15 +1323,15 @@
> 
>     /* if the destination lid SL value is not in the cache, go get it */
>     if (SL_NOT_PRESENT == cache->sl_values[rem_lid]) {
> -        /* sag is first buffer, where we build the SA Get request to send */
> -        sag = (struct ib_mad_sa *)(cache->send_recv_buffer);
> +        /* sa_mad is first buffer, where we build the SA Get request to send 
> */
> +        req_mad = (ib_sa_mad_t *)(cache->send_recv_buffer);
> 
> -        init_sa_mad(cache, sag, &swr, &ssge, lid, rem_lid);
> +        init_sa_mad(cache, req_mad, &swr, &ssge, lid, rem_lid);
> 
> -        /* sar is the receive buffer (40 byte GRH) */
> -        sar = (struct ib_mad_sa *)(cache->send_recv_buffer + sizeof(struct 
> ib_mad_sa) + 40);
> +        /* resp_mad is the receive buffer (40 byte offset is for GRH) */
> +        resp_mad = (ib_sa_mad_t *)(cache->send_recv_buffer + MAD_BLOCK_SIZE 
> + 40);
> 
> -        rc = get_pathrecord_info(cache, sag, sar, &swr, lid, rem_lid);
> +        rc = get_pathrecord_info(cache, req_mad, resp_mad, &swr, lid, 
> rem_lid);
>         if (0 != rc) {
>             return rc;
>         }
> @@ -1382,3 +1340,4 @@
>     /* now all we do is send back the value laying around */
>     return cache->sl_values[rem_lid];
> }
> +#endif
> _______________________________________________
> svn-full mailing list
> svn-f...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/svn-full

Reply via email to