Include current retry count with the new connect request call and set according after creating the new cm object.
Signed-off-by: Arlin Davis <arlin.r.da...@intel.com> --- dapl/openib_scm/cm.c | 23 ++++++++++++----------- 1 files changed, 12 insertions(+), 11 deletions(-) diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c index 305f85b..968d9b9 100644 --- a/dapl/openib_scm/cm.c +++ b/dapl/openib_scm/cm.c @@ -64,7 +64,7 @@ static DAT_RETURN dapli_socket_connect(DAPL_EP * ep_ptr, DAT_IA_ADDRESS_PTR r_addr, - DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data); + DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data, int retries); #ifdef DAPL_DBG /* Check for EP linking to IA and proper connect state */ @@ -505,8 +505,8 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err) struct dapl_ep *ep_ptr = cm_ptr->ep; if (err) { - dapl_log(DAPL_DBG_TYPE_ERR, - " CONN_PENDING: %s ERR %s -> %s %d - %s\n", + dapl_log(DAPL_DBG_TYPE_WARN, + " CONN_REQUEST: %s ERR %s -> %s %d - %s %d\n", err == -1 ? "POLL" : "SOCKOPT", err == -1 ? strerror(dapl_socket_errno()) : strerror(err), inet_ntoa(((struct sockaddr_in *) @@ -514,7 +514,7 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err) ntohs(((struct sockaddr_in *) &cm_ptr->addr)->sin_port), (err == ETIMEDOUT || err == ECONNREFUSED) ? - "RETRYING...":"ABORTING"); + "RETRYING...":"ABORTING", cm_ptr->retry); /* retry a timeout */ if ((err == ETIMEDOUT) || (err == ECONNREFUSED && --cm_ptr->retry)) { @@ -522,12 +522,11 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err) cm_ptr->socket = DAPL_INVALID_SOCKET; dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr, ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port) - 1000, - ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data); + ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data, cm_ptr->retry); dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr); dapli_cm_free(cm_ptr); return; } - goto bail; } @@ -579,7 +578,7 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err) bail: /* mark CM object for cleanup */ dapli_cm_free(cm_ptr); - dapl_evd_connection_callback(NULL, IB_CME_LOCAL_FAILURE, NULL, 0, ep_ptr); + dapl_evd_connection_callback(NULL, IB_CME_TIMEOUT, NULL, 0, ep_ptr); } /* @@ -589,7 +588,7 @@ bail: static DAT_RETURN dapli_socket_connect(DAPL_EP * ep_ptr, DAT_IA_ADDRESS_PTR r_addr, - DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data) + DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data, int retries) { dp_ib_cm_handle_t cm_ptr; int ret; @@ -604,6 +603,8 @@ dapli_socket_connect(DAPL_EP * ep_ptr, if (cm_ptr == NULL) return dat_ret; + cm_ptr->retry = retries; + /* create, connect, sockopt, and exchange QP information */ if ((cm_ptr->socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == DAPL_INVALID_SOCKET) { @@ -724,12 +725,12 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr) ntohs(*(uint16_t*)&cm_ptr->msg.resv[2])); /* Retry; corner case where server tcp stack resets under load */ - if (err == ECONNRESET) { + if (err == ECONNRESET && --cm_ptr->retry) { closesocket(cm_ptr->socket); cm_ptr->socket = DAPL_INVALID_SOCKET; dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr, ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port) - 1000, - ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data); + ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data, cm_ptr->retry); dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr); dapli_cm_free(cm_ptr); return; @@ -1455,7 +1456,7 @@ dapls_ib_connect(IN DAT_EP_HANDLE ep_handle, return (dapli_socket_connect(ep_ptr, remote_ia_address, remote_conn_qual, - private_data_size, private_data)); + private_data_size, private_data, SCM_CR_RETRY)); } /* -- 1.7.3