This patch provides initial timewait handling in the CM. A connection will remain in the timewait state for a period of time equal to the local_ack_timeout. After a connection exits the timewait state, the user is notified of a TIMEWAIT_EXIT event, assuming that the user has not destroyed their cm_id.
If a user destroys their cm_id before a connection has exited the timewait state, the user will not be notified, but the connection will still remain in the timewait state until it expires. Note that checks for stale connections are not yet implemented. Signed-off-by: Sean Hefty <[EMAIL PROTECTED]> Index: infiniband/include/ib_cm.h =================================================================== --- infiniband/include/ib_cm.h (revision 1796) +++ infiniband/include/ib_cm.h (working copy) @@ -76,6 +76,7 @@ enum ib_cm_event_type { IB_CM_DREQ_ERROR, IB_CM_DREQ_RECEIVED, IB_CM_DREP_RECEIVED, + IB_CM_TIMEWAIT_EXIT, IB_CM_MRA_RECEIVED, IB_CM_REJ_RECEIVED, IB_CM_LAP_ERROR, Index: infiniband/core/cm.c =================================================================== --- infiniband/core/cm.c (revision 1810) +++ infiniband/core/cm.c (working copy) @@ -100,24 +100,32 @@ struct cm_work { struct list_head list; struct cm_port *port; struct ib_mad_recv_wc *mad_recv_wc; /* Received MADs */ - u32 local_id; /* Established */ + u32 local_id; /* Established / timewait */ u32 remote_id; struct ib_cm_event cm_event; struct ib_sa_path_rec path[0]; }; +struct cm_timewait_info { + struct cm_work work; /* Must be first. */ + /* struct rb_node remote_qp_node; */ + struct rb_node remote_id_node; + u64 remote_ca_guid; + u32 remote_qpn; + u8 inserted_remote_id; +}; + struct cm_id_private { struct ib_cm_id id; struct rb_node service_node; - struct rb_node remote_qp_node; - struct rb_node remote_id_node; struct rb_node sidr_id_node; spinlock_t lock; wait_queue_head_t wait; atomic_t refcount; struct cm_msg *msg; + struct cm_timewait_info *timewait_info; /* todo: use alternate port on send failure */ struct cm_av av; struct cm_av alt_av; @@ -126,11 +134,9 @@ struct cm_id_private { u32 remote_qpn; u32 sq_psn; u32 rq_psn; - u64 remote_ca_guid; int timeout_ms; enum ib_mtu path_mtu; u8 max_cm_retries; - u8 passive; u8 peer_to_peer; u8 responder_resources; u8 initiator_depth; @@ -142,6 +148,8 @@ struct cm_id_private { atomic_t work_count; }; +static void cm_work_handler(void *data); + static inline void cm_deref_id(struct cm_id_private *cm_id_priv) { if (atomic_dec_and_test(&cm_id_priv->refcount)) @@ -361,32 +369,33 @@ static struct cm_id_private * cm_find_li return NULL; } -static struct cm_id_private * cm_insert_remote_id(struct cm_id_private - *cm_id_priv) +static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info + *timewait_info) { struct rb_node **link = &cm.remote_id_table.rb_node; struct rb_node *parent = NULL; - struct cm_id_private *cur_cm_id_priv; - u64 remote_ca_guid = cm_id_priv->remote_ca_guid; - u32 remote_id = cm_id_priv->id.remote_id; + struct cm_timewait_info *cur_timewait_info; + u64 remote_ca_guid = timewait_info->remote_ca_guid; + u32 remote_id = timewait_info->work.remote_id; while (*link) { parent = *link; - cur_cm_id_priv = rb_entry(parent, struct cm_id_private, - remote_id_node); - if (remote_id < cur_cm_id_priv->id.remote_id) + cur_timewait_info = rb_entry(parent, struct cm_timewait_info, + remote_id_node); + if (remote_id < cur_timewait_info->work.remote_id) link = &(*link)->rb_left; - else if (remote_id > cur_cm_id_priv->id.remote_id) + else if (remote_id > cur_timewait_info->work.remote_id) link = &(*link)->rb_right; - else if (remote_ca_guid < cur_cm_id_priv->remote_ca_guid) + else if (remote_ca_guid < cur_timewait_info->remote_ca_guid) link = &(*link)->rb_left; - else if (remote_ca_guid > cur_cm_id_priv->remote_ca_guid) + else if (remote_ca_guid > cur_timewait_info->remote_ca_guid) link = &(*link)->rb_right; else - return cur_cm_id_priv; + return cur_timewait_info; } - rb_link_node(&cm_id_priv->remote_id_node, parent, link); - rb_insert_color(&cm_id_priv->remote_id_node, &cm.remote_id_table); + timewait_info->inserted_remote_id = 1; + rb_link_node(&timewait_info->remote_id_node, parent, link); + rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table); return NULL; } @@ -415,13 +424,13 @@ static struct cm_id_private * cm_find_id } */ -static void cm_remove_remote_id(struct cm_id_private *cm_id_priv) +static void cm_remove_remote_id(struct cm_timewait_info *timewait_info) { unsigned long flags; - cm_id_priv->passive = 0; + timewait_info->inserted_remote_id = 0; spin_lock_irqsave(&cm.lock, flags); - rb_erase(&cm_id_priv->remote_id_node, &cm.remote_id_table); + rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table); spin_unlock_irqrestore(&cm.lock, flags); } @@ -584,6 +593,72 @@ static void cm_free_work(struct cm_work kfree(work); } +static inline int cm_convert_to_ms(int iba_time) +{ + /* approximate conversion to ms from 4.096us x 2^iba_time */ + return 1 << max(iba_time - 8, 0); +} + +static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) +{ + if (timewait_info->inserted_remote_id) + cm_remove_remote_id(timewait_info); + + /* todo: stale connection handling + if (timewait_info->inserted_remote_qpn) + cm_remove_remote_qpn(timewait_info); + */ +} + +static struct cm_timewait_info * cm_create_timewait_info(u32 local_id, + u32 remote_id, + u64 remote_ca_guid, + u32 remote_qpn) +{ + struct cm_timewait_info *timewait_info; + + timewait_info = kmalloc(sizeof *timewait_info, GFP_KERNEL); + if (!timewait_info) + return ERR_PTR(-ENOMEM); + memset(timewait_info, 0, sizeof *timewait_info); + + timewait_info->work.local_id = local_id; + timewait_info->work.remote_id = remote_id; + timewait_info->remote_ca_guid = remote_ca_guid; + timewait_info->remote_qpn = remote_qpn; + + INIT_WORK(&timewait_info->work.work, cm_work_handler, + &timewait_info->work); + timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT; + return timewait_info; +} + +static void cm_enter_timewait(struct cm_id_private *cm_id_priv) +{ + int wait_time; + + /* + * The cm_id could be destroyed by the user before we exit timewait. + * To protect against this, we search for the cm_id after exiting + * timewait before notifying the user that we've exited timewait. + */ + cm_id_priv->id.state = IB_CM_TIMEWAIT; + wait_time = cm_convert_to_ms(cm_id_priv->local_ack_timeout); + queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, + msecs_to_jiffies(wait_time)); + cm_id_priv->timewait_info = NULL; +} + +static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) +{ + cm_id_priv->id.state = IB_CM_IDLE; + if (cm_id_priv->timewait_info) { + cm_cleanup_timewait(cm_id_priv->timewait_info); + kfree(cm_id_priv->timewait_info); + cm_id_priv->timewait_info = NULL; + } +} + int ib_destroy_cm_id(struct ib_cm_id *cm_id) { struct cm_id_private *cm_id_priv; @@ -630,10 +705,8 @@ retest: ib_send_cm_dreq(cm_id, NULL, 0); goto retest; case IB_CM_DREQ_SENT: - cm_id->state = IB_CM_TIMEWAIT; + cm_enter_timewait(cm_id_priv); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - if (cm_id_priv->passive) - cm_remove_remote_id(cm_id_priv); break; case IB_CM_DREQ_RCVD: spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -681,12 +754,6 @@ int ib_cm_listen(struct ib_cm_id *cm_id, } EXPORT_SYMBOL(ib_cm_listen); -static inline int cm_convert_to_ms(int iba_time) -{ - /* approximate conversion to ms from 4.096us x 2^iba_time */ - return 1 << max(iba_time - 8, 0); -} - static void cm_format_mad_hdr(struct ib_mad_hdr *hdr, struct cm_id_private *cm_id_priv, enum cm_msg_attr_id attr_id, @@ -989,7 +1056,7 @@ static void cm_process_work(struct cm_id static int cm_req_handler(struct cm_work *work) { struct ib_cm_id *cm_id; - struct cm_id_private *cm_id_priv, *listen_cm_id_priv, *cur_cm_id_priv; + struct cm_id_private *cm_id_priv, *listen_cm_id_priv; struct cm_req_msg *req_msg; unsigned long flags; int ret; @@ -1001,20 +1068,34 @@ static int cm_req_handler(struct cm_work cm_id = ib_create_cm_id(NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); - + cm_id_priv = container_of(cm_id, struct cm_id_private, id); - cm_id_priv->remote_ca_guid = req_msg->local_ca_guid; cm_id_priv->id.remote_id = req_msg->local_comm_id; - cm_id_priv->passive = 1; + cm_id_priv->timewait_info = cm_create_timewait_info( + cm_id_priv->id.local_id, + cm_id_priv->id.remote_id, + req_msg->local_ca_guid, + cm_req_get_local_qpn(req_msg)); + if (IS_ERR(cm_id_priv->timewait_info)) { + ret = PTR_ERR(cm_id_priv->timewait_info); + goto error1; + } spin_lock_irqsave(&cm.lock, flags); /* Check for duplicate REQ. */ - cur_cm_id_priv = cm_insert_remote_id(cm_id_priv); - if (cur_cm_id_priv) { + if (cm_insert_remote_id(cm_id_priv->timewait_info)) { spin_unlock_irqrestore(&cm.lock, flags); ret = -EINVAL; - goto error1; + goto error2; } + /* todo: Check for a stale connection. + if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) { + spin_unlock_irqrestore(&cm.lock, flags); + todo: reject as stale + ret = -EINVAL; + goto error2; + } + */ /* Find matching listen request. */ listen_cm_id_priv = cm_find_listen(req_msg->service_id); if (!listen_cm_id_priv) { @@ -1065,7 +1146,8 @@ static int cm_req_handler(struct cm_work error3: cm_deref_id(listen_cm_id_priv); error2: - cm_remove_remote_id(cm_id_priv); + cm_cleanup_timewait(cm_id_priv->timewait_info); + kfree(cm_id_priv->timewait_info); error1: ib_destroy_cm_id(&cm_id_priv->id); return ret; @@ -1258,6 +1340,7 @@ static void cm_format_rep_event(struct c static int cm_rep_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; + struct cm_timewait_info *timewait_info; struct cm_rep_msg *rep_msg; unsigned long flags; u64 wr_id; @@ -1268,6 +1351,14 @@ static int cm_rep_handler(struct cm_work if (!cm_id_priv) return -EINVAL; + timewait_info = cm_create_timewait_info(cm_id_priv->id.local_id, + rep_msg->local_comm_id, + rep_msg->local_ca_guid, + cm_rep_get_local_qpn(rep_msg)); + if (IS_ERR(timewait_info)) { + ret = PTR_ERR(timewait_info); + goto error1; + } cm_format_rep_event(work); spin_lock_irqsave(&cm_id_priv->lock, flags); @@ -1278,19 +1369,21 @@ static int cm_rep_handler(struct cm_work case IB_CM_ESTABLISHED: spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_resend_rtu(cm_id_priv); - goto out; + ret = -EINVAL; + goto error2; default: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - goto out; + ret = -EINVAL; + goto error2; } cm_id_priv->id.state = IB_CM_REP_RCVD; cm_id_priv->id.remote_id = rep_msg->local_comm_id; - cm_id_priv->remote_ca_guid = rep_msg->local_ca_guid; cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg); cm_id_priv->initiator_depth = rep_msg->resp_resources; cm_id_priv->responder_resources = rep_msg->initiator_depth; cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); + cm_id_priv->timewait_info = timewait_info; /* todo: handle peer_to_peer */ @@ -1306,9 +1399,11 @@ static int cm_rep_handler(struct cm_work else cm_deref_id(cm_id_priv); return 0; -out: +error2: + kfree(timewait_info); +error1: cm_deref_id(cm_id_priv); - return -EINVAL; + return ret; } static int cm_establish_handler(struct cm_work *work) @@ -1438,11 +1533,8 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_ cm_id->state = IB_CM_DREQ_SENT; cm_id_priv->msg = msg; } else - cm_id->state = IB_CM_TIMEWAIT; + cm_enter_timewait(cm_id_priv); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - - if (ret && cm_id_priv->passive) - cm_remove_remote_id(cm_id_priv); out: if (!msg_ret && ret) cm_free_msg(cm_id_priv->msg); @@ -1510,12 +1602,8 @@ int ib_send_cm_drep(struct ib_cm_id *cm_ ret = msg_ret ? msg_ret : ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, &bad_send_wr); - - cm_id->state = IB_CM_TIMEWAIT; + cm_enter_timewait(cm_id_priv); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - - if (cm_id_priv->passive) - cm_remove_remote_id(cm_id_priv); out: if (!msg_ret && ret) cm_free_msg(cm_id_priv->msg); @@ -1595,7 +1683,7 @@ static int cm_drep_handler(struct cm_wor spin_unlock_irqrestore(&cm_id_priv->lock, flags); goto out; } - cm_id_priv->id.state = IB_CM_TIMEWAIT; + cm_enter_timewait(cm_id_priv); wr_id = (unsigned long) cm_id_priv->msg; ret = atomic_inc_and_test(&cm_id_priv->work_count); @@ -1604,8 +1692,6 @@ static int cm_drep_handler(struct cm_wor spin_unlock_irqrestore(&cm_id_priv->lock, flags); ib_cancel_mad(cm_id_priv->av.port->mad_agent, wr_id); - if (cm_id_priv->passive) - cm_remove_remote_id(cm_id_priv); if (ret) cm_process_work(cm_id_priv, work); @@ -1685,11 +1771,11 @@ int ib_send_cm_rej(struct ib_cm_id *cm_i case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: - cm_id->state = IB_CM_IDLE; + cm_reset_to_idle(cm_id_priv); break; case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - cm_id->state = IB_CM_TIMEWAIT; + cm_enter_timewait(cm_id_priv); break; default: spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -1698,9 +1784,6 @@ int ib_send_cm_rej(struct ib_cm_id *cm_i } spin_unlock_irqrestore(&cm_id_priv->lock, flags); - if (cm_id_priv->passive) - cm_remove_remote_id(cm_id_priv); - ret = msg_ret ? msg_ret : ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, &bad_send_wr); @@ -2083,6 +2166,43 @@ out: return -EINVAL; } +static int cm_timewait_handler(struct cm_work *work) +{ + struct cm_timewait_info *timewait_info; + struct cm_id_private *cm_id_priv; + unsigned long flags; + int ret; + + timewait_info = (struct cm_timewait_info *)work; + cm_cleanup_timewait(timewait_info); + + cm_id_priv = cm_acquire_id(timewait_info->work.local_id, + timewait_info->work.remote_id); + if (!cm_id_priv) + return -EINVAL; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_TIMEWAIT || + cm_id_priv->remote_qpn != timewait_info->remote_qpn) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + goto out; + } + cm_id_priv->id.state = IB_CM_IDLE; + ret = atomic_inc_and_test(&cm_id_priv->work_count); + if (!ret) + list_add_tail(&work->list, &cm_id_priv->work_list); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (ret) + cm_process_work(cm_id_priv, work); + else + cm_deref_id(cm_id_priv); + return 0; +out: + cm_deref_id(cm_id_priv); + return -EINVAL; +} + static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, struct cm_id_private *cm_id_priv, struct ib_cm_sidr_req_param *param) @@ -2357,15 +2477,15 @@ static void cm_process_send_error(struct switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: - cm_id_priv->id.state = IB_CM_IDLE; + cm_reset_to_idle(cm_id_priv); cm_event.event = IB_CM_REQ_ERROR; break; case IB_CM_REP_SENT: - cm_id_priv->id.state = IB_CM_IDLE; + cm_reset_to_idle(cm_id_priv); cm_event.event = IB_CM_REP_ERROR; break; case IB_CM_DREQ_SENT: - cm_id_priv->id.state = IB_CM_TIMEWAIT; + cm_enter_timewait(cm_id_priv); cm_event.event = IB_CM_DREQ_ERROR; break; case IB_CM_SIDR_REQ_SENT: @@ -2376,13 +2496,10 @@ static void cm_process_send_error(struct goto discard; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); - if (cm_id_priv->passive) - cm_remove_remote_id(cm_id_priv); cm_event.param.send_status = wc_status; cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event); cm_free_msg(msg); return; - discard: spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -2478,6 +2595,9 @@ static void cm_work_handler(void *data) case IB_CM_APR_RECEIVED: ret = cm_apr_handler(work); break; + case IB_CM_TIMEWAIT_EXIT: + ret = cm_timewait_handler(work); + break; default: ret = -EINVAL; break; _______________________________________________ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general