[PATCH 1/5] uDAPL v2: Patch series for ucm, scm: fixes for issues discovered during scale-up, out testing
Linux testing completed with Intel MPI/HPCC benchmarks on 128 nodes, 1024 cores. ucm, scm: address handles need destroyed when freeing Endpoints with UD QP's. Signed-off-by: Arlin Davis --- dapl/openib_scm/cm.c |4 dapl/openib_ucm/cm.c |6 ++ dapl/openib_ucm/dapl_ib_util.h |1 + dapl/openib_ucm/device.c | 16 ++-- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c index 453e32e..0d2d058 100644 --- a/dapl/openib_scm/cm.c +++ b/dapl/openib_scm/cm.c @@ -355,6 +355,10 @@ multi_cleanup: dapl_os_lock(&cr->lock); hca_ptr = cr->hca; cr->ep = NULL; + if (cr->ah) { + ibv_destroy_ah(cr->ah); + cr->ah = NULL; + } cr->state = DCM_DESTROY; dapl_os_unlock(&cr->lock); } diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c index cc480c4..96ee382 100644 --- a/dapl/openib_ucm/cm.c +++ b/dapl/openib_ucm/cm.c @@ -679,6 +679,10 @@ static void ucm_ud_free(DAPL_EP *ep) dapl_os_lock(&cm->lock); hca = cm->hca; cm->ep = NULL; + if (cm->ah) { + ibv_destroy_ah(cm->ah); + cm->ah = NULL; + } cm->state = DCM_DESTROY; dapl_os_unlock(&cm->lock); } @@ -1041,6 +1045,7 @@ ud_bail: event = IB_CME_LOCAL_FAILURE; goto bail; } + cm->ah = xevent.remote_ah.ah; /* keep ref to destroy */ dapl_os_memcpy(&xevent.remote_ah.ia_addr, &cm->msg.daddr, @@ -1218,6 +1223,7 @@ static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg) if (xevent.remote_ah.ah == NULL) goto bail; + cm->ah = xevent.remote_ah.ah; /* keep ref to destroy */ dapl_os_memcpy(&xevent.remote_ah.ia_addr, &cm->msg.daddr, sizeof(union dcm_addr)); diff --git a/dapl/openib_ucm/dapl_ib_util.h b/dapl/openib_ucm/dapl_ib_util.h index 27ff8dd..6273459 100644 --- a/dapl/openib_ucm/dapl_ib_util.h +++ b/dapl/openib_ucm/dapl_ib_util.h @@ -43,6 +43,7 @@ struct ib_cm_handle struct dapl_hca *hca; struct dapl_sp *sp; struct dapl_ep *ep; + struct ibv_ah *ah; uint16_tp_size; /* accept p_data, for retries */ uint8_t p_data[DCM_MAX_PDATA_SIZE]; ib_cm_msg_t msg; diff --git a/dapl/openib_ucm/device.c b/dapl/openib_ucm/device.c index 077446b..e890eef 100644 --- a/dapl/openib_ucm/device.c +++ b/dapl/openib_ucm/device.c @@ -434,14 +434,18 @@ static void ucm_service_destroy(IN DAPL_HCA *hca) if (tp->rch) ibv_destroy_comp_channel(tp->rch); -dapl_log(DAPL_DBG_TYPE_UTIL, -" destroy_service: pd %p ctx %p handle 0x%x\n", - tp->pd, tp->pd->context, tp->pd->handle); - if (tp->pd) - ibv_dealloc_pd(tp->pd); + if (tp->ah) { + int i; - if (tp->ah) + for (i = 0;i < 0x; i++) { + if (tp->ah[i]) + ibv_destroy_ah(tp->ah[i]); + } dapl_os_free(tp->ah, (sizeof(*tp->ah) * 0x)); + } + + if (tp->pd) + ibv_dealloc_pd(tp->pd); if (tp->sid) dapl_os_free(tp->sid, (sizeof(*tp->sid) * 0x)); -- 1.5.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 01/11] uDAPL v2 - ucm: latest patch set for new ucm provider, complete timer/retry logic and windows abstractions
add timer/retry CM logic to the ucm provider add reply, rtu and retry count options via environment variables. Times in msecs. DAPL_UCM_RETRY 10 DAPL_UCM_REP_TIME 400 DAPL_UCM_RTU_TIME 200 Add RTU_PENDING and DISC_RECV states Add check timer code to the cm_thread and the option to the select abstaction to take timeout values in msecs. DREQ, REQ, and REPLY will all be timed and retried. Split out reply code and disconnect_final code to better facilitate retry timers. Add checking for duplicate messages. Added new UD extension events for errors. DAT_IB_UD_CONNECTION_REJECT_EVENT DAT_IB_UD_CONNECTION_ERROR_EVENT Signed-off-by: Arlin Davis --- dapl/common/dapl_debug.c |2 +- dapl/openib_common/dapl_ib_common.h | 36 ++- dapl/openib_ucm/cm.c | 622 -- dapl/openib_ucm/dapl_ib_util.h |8 +- dapl/openib_ucm/device.c | 10 +- dat/include/dat2/dat_ib_extensions.h | 10 +- 6 files changed, 485 insertions(+), 203 deletions(-) diff --git a/dapl/common/dapl_debug.c b/dapl/common/dapl_debug.c index 960bc00..904d075 100644 --- a/dapl/common/dapl_debug.c +++ b/dapl/common/dapl_debug.c @@ -50,7 +50,7 @@ void dapl_internal_dbg_log(DAPL_DBG_TYPE type, const char *fmt, ...) if (DAPL_DBG_DEST_STDOUT & g_dapl_dbg_dest) { va_start(args, fmt); fprintf(stdout, "%s:%x: ", _ptr_host_, - dapl_os_gettid()); + dapl_os_getpid()); dapl_os_vprintf(fmt, args); va_end(args); } diff --git a/dapl/openib_common/dapl_ib_common.h b/dapl/openib_common/dapl_ib_common.h index 065cfca..982621c 100644 --- a/dapl/openib_common/dapl_ib_common.h +++ b/dapl/openib_common/dapl_ib_common.h @@ -165,9 +165,12 @@ typedef uint16_t ib_hca_port_t; #define DCM_HOP_LIMIT 0xff #define DCM_TCLASS 0 -/* DAPL uCM timers */ -#define DCM_RETRY_CNT 7 -#define DCM_RETRY_TIME_MS 1000 +/* DAPL uCM timers, default queue sizes */ +#define DCM_RETRY_CNT 10 +#define DCM_REP_TIME400/* reply timeout in m_secs */ +#define DCM_RTU_TIME200/* rtu timeout in m_secs */ +#define DCM_QP_SIZE 500 /* uCM tx, rx qp size */ +#define DCM_CQ_SIZE 500 /* uCM cq size */ /* DTO OPs, ordered for DAPL ENUM definitions */ #define OP_RDMA_WRITE IBV_WR_RDMA_WRITE @@ -254,7 +257,7 @@ typedef enum typedef enum dapl_cm_op { - DCM_REQ, + DCM_REQ = 1, DCM_REP, DCM_REJ_USER, /* user reject */ DCM_REJ_CM, /* cm reject, no SID */ @@ -279,7 +282,9 @@ typedef enum dapl_cm_state DCM_RELEASED, DCM_DISC_PENDING, DCM_DISCONNECTED, - DCM_DESTROY + DCM_DESTROY, + DCM_RTU_PENDING, + DCM_DISC_RECV } DAPL_CM_STATE; @@ -370,9 +375,26 @@ STATIC _INLINE_ char * dapl_cm_state_str(IN int st) "CM_RELEASED", "CM_DISC_PENDING", "CM_DISCONNECTED", - "CM_DESTROY" + "CM_DESTROY", + "CM_RTU_PENDING", + "CM_DISC_RECV" }; -return ((st < 0 || st > 13) ? "Invalid CM state?" : state[st]); +return ((st < 0 || st > 15) ? "Invalid CM state?" : state[st]); +} + +STATIC _INLINE_ char * dapl_cm_op_str(IN int op) +{ + static char *ops[] = { + "INVALID", + "REQ", + "REP", + "REJ_USER", + "REJ_CM", + "RTU", + "DREQ", + "DREP", + }; + return ((op < 1 || op > 7) ? "Invalid OP?" : ops[op]); } #endif /* _DAPL_IB_COMMON_H_ */ diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c index 4dc67c9..099cadf 100644 --- a/dapl/openib_ucm/cm.c +++ b/dapl/openib_ucm/cm.c @@ -95,12 +95,19 @@ static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event) return DAPL_FD_ERROR; } -static int dapl_select(struct dapl_fd_set *set) +static int dapl_select(struct dapl_fd_set *set, int time_ms) { int ret; + struct timeval tv, *p_tv = NULL; + + if (time_ms != -1) { + p_tv = &tv; + tv.tv_sec = time_ms/1000; + tv.tv_usec = (time_ms%1000)*1000; + } dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: sleep\n"); - ret = select(0, &set->set[0], &set->set[1], &set->set[2], NULL); + ret = select(0, &set->set[0], &set->set[1], &set->set[2], p_tv); dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup\n"); if (ret == SOCKET_ERROR) @@ -166,24 +173,27
[ANNOUNCE] dapl-2.0.23 release
New release for uDAPL v2 (2.0.23) available at: http://www.openfabrics.org/downloads/dapl Latest Packages (see ChangeLog for recent changes): md5sum: f58d6dd903cee271d71b0eb6fa33984e compat-dapl-1.2.14.tar.gz md5sum: dc1f31a4bc28aedcb725264d65db7e51 dapl-2.0.23.tar.gz Vlad, please pull into OFED 1.5 using package spec files and installing following order (v2 first): dapl-2.0.23-1 dapl-utils-2.0.23-1 dapl-devel-2.0.23-1 dapl-debuginfo-2.0.23-1 compat-dapl-1.2.14-1 compat-dapl-devel-1.2.14-1 Summary of changes since last release: v2 - cma: cannot reuse the cm_id and qp for new connection, must reallocate a new one. v2 - scm, cma: update DAPL cm protocol revision with latest address/port changes v2 - ucm: modify IB address format to align better with sockaddr_in6 v2 - Add definition for getpid similar to that used by the other dtest apps. v2 - WinOF provides a common implementation of gettimeofday that should v2 - the completion manager was updated to provide an abstraction that v2 - dtestcm: remove IB verb definitions v2 - dtest, dtestx: remove IB verb definitions v2 - scm: tighten up socket options to insure similiar behavior on Windows and Linux. v2 - cma: improve serialization of destroy and event processing v2 - scm: improve serialization of destroy and state changes v2 - common: no cleanup/release code for timer thread v2 - scm, cma: dapli_thread doesn't always get teminated on library close. v2 - ucm: tighten up locking with CM processing, state changes v2 - ucm: For UD type QP's, return CR p_data with CONN_EST event on passive side. v2 - ucm: cleanup extra cr/lf v2 - ucm: fix issues with UD QP's. v2 - winof: Convert windows version of dapl and dat libaries to use private heaps. v2 - dtest, dtestx: modifications for UD QP testing with ucm provider. v2 - scm, ucm: UD QP support was broken when porting to common openib code base. v2 - cma: cleanup warning with unused local variable, ret, in disconnect v2 - cma: remove debug message after rdma_disconnect failure v2 - scm: socket errno check needs O/S dependent wrapper v2 - dapltest: update script files for WinOF v2 - cma: conditional check for new rdma_cm definition. For more details see the following: http://www.openfabrics.org/downloads/dapl documentation/uDAPL_v20.pdf for v2 API specification documentation/transition_to_dat20_120406.pdf for details on porting to 2.0 documentation/DAT_IB_Extensions.pdf for IB extension details. documentation/DAT_IW_Extensions.pdf for iWARP extension details. documentation/uDAPL_ofed_testing_bkm.pdf for latest uDAPL OFED BKM, updated 8-22-2008 uDAT/uDAPL source git tree is located at git://git.openfabrics.org/~ardavis/scm/dapl.git Thanks, -arlin ___ -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] uDAPL v2: ucm: modify IB address format to align better with sockaddr_in6
Restructure the dcm_addr union to map the IB side closer to sockaddr6 and initialize family to AF_INET6 to insure callee allocates enough memory for ucm dat_ia_address type. Put qpn in flowinfo, lid in port, and gid in addr. Change the test suites to print address information based on AF_INET or AF_INET6 instead of using specific IB address union from the provider. Signed-off-by: Arlin Davis --- dapl/openib_common/dapl_ib_common.h | 22 - dapl/openib_scm/cm.c| 34 +--- dapl/openib_ucm/cm.c| 51 +++--- dapl/openib_ucm/device.c| 15 + test/dtest/dtest.c | 59 +-- test/dtest/dtestcm.c| 59 ++- test/dtest/dtestx.c | 57 + 7 files changed, 143 insertions(+), 154 deletions(-) diff --git a/dapl/openib_common/dapl_ib_common.h b/dapl/openib_common/dapl_ib_common.h index 671073b..0436da7 100644 --- a/dapl/openib_common/dapl_ib_common.h +++ b/dapl/openib_common/dapl_ib_common.h @@ -61,24 +61,28 @@ typedef ib_hca_handle_t dapl_ibal_ca_t; #define DCM_VER 5 /* CM private data areas, same for all operations */ -#defineDCM_MAX_PDATA_SIZE 128 +#defineDCM_MAX_PDATA_SIZE 118 /* - * DAPL IB/QP address (type, port, lid, qp_num, gid) mapping to - * DAT_IA_ADDRESS_PTR, DAT_SOCK_ADDR2 (24 bytes) + * UCM DAPL IB/QP address (lid, qp_num, gid) mapping to + * DAT_IA_ADDRESS_PTR, DAT_SOCK_ADDR2 (28 bytes) * For applications, like MPI, that exchange IA_ADDRESS * across the fabric before connecting, it eliminates the * overhead of name and address resolution to the destination's - * CM services. UCM provider uses this for DAT_IA_ADDRESS. + * CM services. UCM provider uses the following for + * DAT_IA_ADDRESS. Note: family == AF_INET6 to insure proper + * callee storage for address. */ union dcm_addr { DAT_SOCK_ADDR6 so; struct { - uint8_t qp_type; - uint8_t port_num; - uint16_tlid; - uint32_tqpn; - union ibv_gid gid; + uint16_tfamily; /* sin6_family */ + uint16_tlid; /* sin6_port */ + uint32_tqpn; /* sin6_flowinfo */ + uint8_t gid[16]; /* sin6_addr */ + uint16_tport;/* sin6_scope_id */ + uint8_t sl; + uint8_t qp_type; } ib; }; diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c index dae1781..9a7bbd6 100644 --- a/dapl/openib_scm/cm.c +++ b/dapl/openib_scm/cm.c @@ -488,18 +488,17 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err) } dapl_dbg_log(DAPL_DBG_TYPE_CM, -" CONN_PENDING: sending SRC port=%d lid=0x%x," +" CONN_PENDING: sending SRC lid=0x%x," " qpn=0x%x, psize=%d\n", -cm_ptr->msg.saddr.ib.port_num, ntohs(cm_ptr->msg.saddr.ib.lid), ntohl(cm_ptr->msg.saddr.ib.qpn), ntohs(cm_ptr->msg.p_size)); dapl_dbg_log(DAPL_DBG_TYPE_CM, " CONN_PENDING: SRC GID subnet %016llx id %016llx\n", (unsigned long long) -htonll(cm_ptr->msg.saddr.ib.gid.global.subnet_prefix), +htonll(*(uint64_t*)&cm_ptr->msg.saddr.ib.gid[0]), (unsigned long long) -htonll(cm_ptr->msg.saddr.ib.gid.global.interface_id)); +htonll(*(uint64_t*)&cm_ptr->msg.saddr.ib.gid[8])); return; bail: @@ -561,10 +560,10 @@ dapli_socket_connect(DAPL_EP * ep_ptr, cm_ptr->msg.op = ntohs(DCM_REQ); cm_ptr->msg.saddr.ib.qpn = htonl(ep_ptr->qp_handle->qp_num); cm_ptr->msg.saddr.ib.qp_type = ep_ptr->qp_handle->qp_type; - cm_ptr->msg.saddr.ib.port_num = ia_ptr->hca_ptr->port_num; cm_ptr->msg.saddr.ib.lid = ia_ptr->hca_ptr->ib_trans.lid; - cm_ptr->msg.saddr.ib.gid = ia_ptr->hca_ptr->ib_trans.gid; - + dapl_os_memcpy(&cm_ptr->msg.saddr.ib.gid[0], + &ia_ptr->hca_ptr->ib_trans.gid, 16); + /* save references */ cm_ptr->hca = ia_ptr->hca_ptr; cm_ptr->ep = ep_ptr; @@ -638,13 +637,12 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr) sizeof(union dcm_addr)); dapl_dbg_log(DAPL_DBG_TYPE_EP, -" CONN_RTU: DST %s %d port=0x%x lid=0x%x," +" CONN_RTU: DST %s %d lid=0x%x," " qpn=0x%x, qp_typ
[PATCH] DAPL v2: dtest, dtestx: remove IB verb definitions
Remove gid and qp_type references from test app. Print address infomation in sockaddr and ucm provider format with qpn and lid. Signed-off-by: Arlin Davis --- test/dtest/dtest.c | 19 --- test/dtest/dtestx.c | 27 +++ 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/test/dtest/dtest.c b/test/dtest/dtest.c index 75cbe4c..5ac16c4 100755 --- a/test/dtest/dtest.c +++ b/test/dtest/dtest.c @@ -100,7 +100,6 @@ #define MAX_PROCS 1000 /* Header files needed for DAT/uDAPL */ -#include "infiniband/verbs.h" #include "dat2/udat.h" /* definitions */ @@ -219,7 +218,7 @@ union dcm_addr { uint8_t port_num; uint16_tlid; uint32_tqpn; - union ibv_gid gid; + uint8_t gid[16]; } ib; }; @@ -447,15 +446,13 @@ int main(int argc, char **argv) (void*)ia_attr.ia_address_ptr, sizeof(DAT_SOCK_ADDR6)); - if (local.ib.qp_type == IBV_QPT_UD) { - ucm = 1; - printf("%d Local uCM Address = QPN=0x%x, LID=0x%x\n", - getpid(), ntohl(local.ib.qpn), - ntohs(local.ib.lid)); - printf("%d Remote uCM Address = QPN=0x%x, LID=0x%x\n", - getpid(), ntohl(remote.ib.qpn), - ntohs(remote.ib.lid)); - } + printf("%d Local Address %s port %d\n", getpid(), + inet_ntoa(((struct sockaddr_in *)&local)->sin_addr), + SERVER_CONN_QUAL); + + printf("%d Local Address QPN=0x%x, LID=0x%x <<< ucm provider\n", + getpid(), ntohl(local.ib.qpn), + ntohs(local.ib.lid)); /* Create Protection Zone */ start = get_time(); diff --git a/test/dtest/dtestx.c b/test/dtest/dtestx.c index 6e17b6d..2132f31 100755 --- a/test/dtest/dtestx.c +++ b/test/dtest/dtestx.c @@ -65,7 +65,6 @@ #endif -#include "infiniband/verbs.h" #include "dat2/udat.h" #include "dat2/dat_ib_extensions.h" @@ -189,7 +188,7 @@ union dcm_addr { uint8_t port_num; uint16_tlid; uint32_tqpn; - union ibv_gid gid; + uint8_t gid[16]; } ib; }; @@ -444,24 +443,20 @@ int connect_ep(char *hostname) (void*)ia_attr.ia_address_ptr, sizeof(DAT_SOCK_ADDR6)); - if (local.ib.qp_type == IBV_QPT_UD) { - ucm = 1; +printf("%d Local Address %s port %d\n", getpid(), + inet_ntoa(((struct sockaddr_in *)&local)->sin_addr), + SERVER_ID); - if (ud_test) { - printf("%d UD test over UCM provider not supported\n", - getpid()); - exit(1); - } +printf("%d Local Address QPN=0x%x, LID=0x%x <<< ucm provider\n", +getpid(), ntohl(local.ib.qpn), +ntohs(local.ib.lid)); - printf("%d Local uCM Address = QPN=0x%x, LID=0x%x\n", - getpid(), ntohl(local.ib.qpn), - ntohs(local.ib.lid)); - printf("%d Remote uCM Address = QPN=0x%x, LID=0x%x\n", - getpid(), ntohl(remote.ib.qpn), - ntohs(remote.ib.lid)); + if (ucm && ud_test) { + printf("%d UD test over UCM provider not supported\n", + getpid()); + exit(1); } - /* Print provider specific attributes */ for (i = 0; i < prov_attrs.num_provider_specific_attr; i++) { LOGPRINTF(" Provider Specific Attribute[%d] %s=%s\n", -- 1.5.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html