Fix some timeout and long disconnect delay issues discovered during scale-out 
testing. Added support
to retry rdma_cm address and route resolution with configuration options. 
Provide a disconnect call
when receiving the disconnect request to guarantee a disconnect reply and event 
on the remote side.
The rdma_disconnect was not being called from dat_ep_disconnect() as a result 
of the state changing
to DISCONNECTED in the event callback.   
 
Here are the new options (environment variables) with the default setting:
 
DAPL_CM_ARP_TIMEOUT_MS   4000
DAPL_CM_ARP_RETRY_COUNT  15
DAPL_CM_ROUTE_TIMEOUT_MS  4000
DAPL_CM_ROUTE_RETRY_COUNT 15
 
 
Signed-off by: Arlin Davis [EMAIL PROTECTED]


Index: dapl/openib_cma/dapl_ib_cm.c
===================================================================
--- dapl/openib_cma/dapl_ib_cm.c        (revision 10032)
+++ dapl/openib_cma/dapl_ib_cm.c        (working copy)
@@ -58,6 +58,9 @@
 #include "dapl_ib_util.h"
 #include <sys/poll.h>
 #include <signal.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
 #include <rdma/rdma_cma_ib.h>
 
 extern struct rdma_event_channel *g_cm_events;
@@ -99,8 +102,8 @@ static void dapli_addr_resolve(struct da
                        &ipaddr->src_addr)->sin_addr.s_addr),
                ntohl(((struct sockaddr_in *)
                        &ipaddr->dst_addr)->sin_addr.s_addr));
-
-       ret =  rdma_resolve_route(conn->cm_id, 2000);
+       
+       ret =  rdma_resolve_route(conn->cm_id, conn->route_timeout);
        if (ret) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
                             " rdma_connect failed: %s\n",strerror(errno));
@@ -120,6 +123,7 @@ static void dapli_route_resolve(struct d
        struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
        struct ib_addr   *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;
 #endif
+
        dapl_dbg_log(DAPL_DBG_TYPE_CM, 
                " route_resolve: cm_id %p SRC %x DST %x PORT %d\n", 
                conn->cm_id, 
@@ -331,21 +335,17 @@ static void dapli_cm_active_cb(struct da
        case RDMA_CM_EVENT_UNREACHABLE:
        case RDMA_CM_EVENT_CONNECT_ERROR:
        {
-               ib_cm_events_t cm_event;
-                dapl_dbg_log(
+               dapl_dbg_log(
                         DAPL_DBG_TYPE_WARN,
                         " dapli_cm_active_handler: CONN_ERR "
                         " event=0x%x status=%d %s\n",
                         event->event, event->status,
                         (event->status == -ETIMEDOUT)?"TIMEOUT":"" );
 
-               /* no device type specified so assume IB for now */
-               if (event->status == -ETIMEDOUT) /* IB timeout */
-                       cm_event = IB_CME_TIMEOUT;
-               else 
-                       cm_event = IB_CME_DESTINATION_UNREACHABLE;
-
-               dapl_evd_connection_callback(conn, cm_event, NULL, conn->ep);
+               /* per DAT SPEC provider always returns UNREACHABLE */
+               dapl_evd_connection_callback(conn, 
+                                            IB_CME_DESTINATION_UNREACHABLE, 
+                                            NULL, conn->ep);
                break;
        }
        case RDMA_CM_EVENT_REJECTED:
@@ -381,6 +381,7 @@ static void dapli_cm_active_cb(struct da
                break;
 
        case RDMA_CM_EVENT_DISCONNECTED:
+               rdma_disconnect(conn->cm_id); /* force the DREP */
                /* validate EP handle */
                if (!DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP)) 
                        dapl_evd_connection_callback(conn, 
@@ -494,6 +495,7 @@ static void dapli_cm_passive_cb(struct d
                
                break;
        case RDMA_CM_EVENT_DISCONNECTED:
+               rdma_disconnect(conn->cm_id); /* force the DREP */
                /* validate SP handle context */
                if (!DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_PSP) || 
                    !DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_RSP))
@@ -543,7 +545,8 @@ DAT_RETURN dapls_ib_connect(IN DAT_EP_HA
                            IN void *p_data)
 {
        struct dapl_ep *ep_ptr = ep_handle;
-               
+       struct dapl_cm_id *conn;
+                       
        /* Sanity check */
        if (NULL == ep_ptr) 
                return DAT_SUCCESS;
@@ -552,36 +555,38 @@ DAT_RETURN dapls_ib_connect(IN DAT_EP_HA
                     r_qual,p_data,p_size);
                        
        /* rdma conn and cm_id pre-bound; reference via qp_handle */
-       ep_ptr->cm_handle = ep_ptr->qp_handle;
+       conn = ep_ptr->cm_handle = ep_ptr->qp_handle;
 
        /* Setup QP/CM parameters and private data in cm_id */
-       (void)dapl_os_memzero(&ep_ptr->cm_handle->params,
-                             sizeof(ep_ptr->cm_handle->params));
-       ep_ptr->cm_handle->params.responder_resources = IB_TARGET_MAX;
-       ep_ptr->cm_handle->params.initiator_depth = IB_INITIATOR_DEPTH;
-       ep_ptr->cm_handle->params.flow_control = 1;
-       ep_ptr->cm_handle->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
-       ep_ptr->cm_handle->params.retry_count = IB_RC_RETRY_COUNT;
+       (void)dapl_os_memzero(&conn->params, sizeof(conn->params));
+       conn->params.responder_resources = IB_TARGET_MAX;
+       conn->params.initiator_depth = IB_INITIATOR_DEPTH;
+       conn->params.flow_control = 1;
+       conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
+       conn->params.retry_count = IB_RC_RETRY_COUNT;
        if (p_size) {
-               dapl_os_memcpy(ep_ptr->cm_handle->p_data, p_data, p_size);
-               ep_ptr->cm_handle->params.private_data = 
-                                       ep_ptr->cm_handle->p_data;
-               ep_ptr->cm_handle->params.private_data_len = p_size;
+               dapl_os_memcpy(conn->p_data, p_data, p_size);
+               conn->params.private_data = conn->p_data;
+               conn->params.private_data_len = p_size;
        }
 
+       /* copy in remote address, need a copy for retry attempts */
+       dapl_os_memcpy(&conn->r_addr, r_addr, sizeof(*r_addr));
+
        /* Resolve remote address, src already bound during QP create */
-       ((struct sockaddr_in*)r_addr)->sin_port = htons(MAKE_PORT(r_qual));
-       if (rdma_resolve_addr(ep_ptr->cm_handle->cm_id, 
-                             NULL, (struct sockaddr *)r_addr, 2000))
+       ((struct sockaddr_in*)&conn->r_addr)->sin_port = 
htons(MAKE_PORT(r_qual));
+       ((struct sockaddr_in*)&conn->r_addr)->sin_family = AF_INET;
+
+       if (rdma_resolve_addr(conn->cm_id, NULL, 
+                             (struct sockaddr *)&conn->r_addr, 
+                             conn->arp_timeout))
                return dapl_convert_errno(errno,"ib_connect");
 
        dapl_dbg_log(DAPL_DBG_TYPE_CM, 
-               " connect: resolve_addr: cm_id %p SRC %x DST %x port %d\n", 
-               ep_ptr->cm_handle->cm_id, 
-               ntohl(((struct sockaddr_in *)
-                 &ep_ptr->cm_handle->hca->hca_address)->sin_addr.s_addr),
-               ntohl(((struct sockaddr_in *)r_addr)->sin_addr.s_addr),
-               MAKE_PORT(r_qual) );
+               " connect: resolve_addr: cm_id %p -> %s port %d\n", 
+               conn->cm_id, 
+               inet_ntoa(((struct sockaddr_in *)&conn->r_addr)->sin_addr),
+               ((struct sockaddr_in*)&conn->r_addr)->sin_port );
 
        return DAT_SUCCESS;
 }
@@ -1163,15 +1168,60 @@ void dapli_cma_event_cb(void)
                case RDMA_CM_EVENT_ADDR_RESOLVED:
                        dapli_addr_resolve(conn);
                        break;
+
                case RDMA_CM_EVENT_ROUTE_RESOLVED:
                        dapli_route_resolve(conn);
                        break;
+
                case RDMA_CM_EVENT_ADDR_ERROR:
+                       dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+                                    " CM ADDR ERROR: -> %s retry (%d)..\n", 
+                                    inet_ntoa(((struct sockaddr_in *)
+                                       &conn->r_addr)->sin_addr),
+                                       conn->arp_retries);
+                       
+                       /* retry address resolution */
+                       if ((--conn->arp_retries) && 
+                               (event->status == -ETIMEDOUT)) {
+                               int ret;
+                               ret = rdma_resolve_addr(
+                                       conn->cm_id, NULL, 
+                                       (struct sockaddr *)&conn->r_addr, 
+                                       conn->arp_timeout);
+                               if (!ret) 
+                                       break;
+                               else { 
+                                       dapl_dbg_log(
+                                               DAPL_DBG_TYPE_WARN,
+                                               " ERROR: rdma_resolve_addr = "
+                                               "%d %s\n", 
+                                               ret,strerror(errno));
+                               }
+                       } 
+                       /* retries exhausted or resolve_addr failed */
+                       dapl_evd_connection_callback(
+                               conn, IB_CME_DESTINATION_UNREACHABLE, 
+                               NULL, conn->ep);
+                       break;
+
+
                case RDMA_CM_EVENT_ROUTE_ERROR:
-                       dapl_evd_connection_callback(conn, 
-                                                    
IB_CME_DESTINATION_UNREACHABLE, 
-                                                    NULL, conn->ep);
+                       dapl_dbg_log(DAPL_DBG_TYPE_WARN, 
+                                    " CM ROUTE ERROR: -> %s retry (%d)..\n", 
+                                    inet_ntoa(((struct sockaddr_in *)
+                                       &conn->r_addr)->sin_addr),
+                                    conn->route_retries );
+
+                       /* retry route resolution */
+                       if ((--conn->route_retries) && 
+                               (event->status == -ETIMEDOUT))
+                               dapli_addr_resolve(conn);
+                       else 
+                               dapl_evd_connection_callback( conn, 
+                                       IB_CME_DESTINATION_UNREACHABLE, 
+                                       NULL, conn->ep);
                        break;
+               
                case RDMA_CM_EVENT_DEVICE_REMOVAL:
                        dapl_evd_connection_callback(conn, 
                                                     IB_CME_LOCAL_FAILURE, 
Index: dapl/openib_cma/dapl_ib_qp.c
===================================================================
--- dapl/openib_cma/dapl_ib_qp.c        (revision 10032)
+++ dapl/openib_cma/dapl_ib_qp.c        (working copy)
@@ -160,6 +168,17 @@ DAT_RETURN dapls_ib_qp_alloc(IN DAPL_IA 
        conn->cm_id = cm_id;
        conn->ep = ep_ptr;
        conn->hca = ia_ptr->hca_ptr;
+
+       /* setup timers for address and route resolution */
+       conn->arp_timeout = dapl_os_get_env_val("DAPL_CM_ARP_TIMEOUT_MS", 
+                                               IB_ARP_TIMEOUT);
+       conn->arp_retries = dapl_os_get_env_val("DAPL_CM_ARP_RETRY_COUNT", 
+                                               IB_ARP_RETRY_COUNT);
+       conn->route_timeout = dapl_os_get_env_val("DAPL_CM_ROUTE_TIMEOUT_MS", 
+                                                   IB_ROUTE_TIMEOUT);
+       conn->route_retries = dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT", 
+                                                   IB_ROUTE_RETRY_COUNT);
+
        ep_ptr->qp_handle = conn;
        ep_ptr->qp_state = IB_QP_STATE_INIT;
        
Index: dapl/openib_cma/dapl_ib_util.h
===================================================================
--- dapl/openib_cma/dapl_ib_util.h      (revision 10032)
+++ dapl/openib_cma/dapl_ib_util.h      (working copy)
@@ -67,8 +67,12 @@ typedef ib_hca_handle_t              dapl_ibal_ca_t;
 
 #define IB_RC_RETRY_COUNT      7
 #define IB_RNR_RETRY_COUNT     7
-#define IB_CM_RESPONSE_TIMEOUT  20     /* 4 sec */
-#define IB_CM_RETRIES           15
+#define IB_CM_RESPONSE_TIMEOUT  23     /* 16 sec */
+#define IB_CM_RETRIES           15     /* 240 sec total default */
+#define IB_ARP_TIMEOUT         4000    /* 4 sec */
+#define IB_ARP_RETRY_COUNT     15      /* 60 sec total */
+#define IB_ROUTE_TIMEOUT       4000    /* 4 sec */
+#define IB_ROUTE_RETRY_COUNT   15      /* 60 sec total */
 #define IB_REQ_MRA_TIMEOUT     27      /* a little over 9 minutes */
 #define IB_MAX_AT_RETRY                3
 #define IB_TARGET_MAX          4       /* max_qp_ous_rd_atom */
@@ -177,12 +181,17 @@ struct ib_llist_entry
 struct dapl_cm_id {
        DAPL_OS_LOCK                    lock;
        int                             destroy;
+       int                             arp_retries;
+       int                             arp_timeout;
+       int                             route_retries;
+       int                             route_timeout;
        int                             in_callback;
        struct rdma_cm_id               *cm_id;
        struct dapl_hca                 *hca;
        struct dapl_sp                  *sp;
        struct dapl_ep                  *ep;
        struct rdma_conn_param          params;
+       DAT_SOCK_ADDR6                  r_addr;
        int                             p_len;
        unsigned char                   p_data[IB_MAX_DREP_PDATA_SIZE];
 };


_______________________________________________
openib-general mailing list
openib-general@openib.org
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to