Hi Steve, Thanks for the explanation. I reviewed the patch and had two comments :
1. When the set_bit(CALLBACK_DESTROY) was done, the refcount could be such so that the free_cm_id is not called, resulting in cm_id having work entries. But there are two places where a BUG_ON(!list_empty(work_list)) was added (before doing a free_cm_id()), both under check for CALLBACK_DESTROY. Isn't it possible for these BUG_ON's to get hit ? This is an error case and may not hit in normal testing. 2. > @@ -647,10 +650,11 @@ static void cm_conn_req_handler(struct i > /* Call the client CM handler */ > ret = cm_id->cm_handler(cm_id, iw_event); > if (ret) { > + iw_cm_reject(cm_id, NULL, 0); > set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); > destroy_cm_id(cm_id); > if (atomic_read(&cm_id_priv->refcount)==0) > - kfree(cm_id); > + free_cm_id(cm_id_priv); > } Though this is not a bug, the code just above this calls iw_destroy_cm_id() if alloc_work_entries() failed. Is it possible for the provider to get a reference to this cm_id during the failed call to the client CM handler ? I didn't think so, which is why in my original patch I had simply called iw_destroy_cm_id here. I had read your explanation about the provider possibly acquiring a reference, but in this place aren't we calling iw_conn_req_handler() which in turn cannot go to the device and cache a reference count ? The rest of the patch looks great. I am going to test this today and will post the results. Thanks, - KK > On Sat, 2007-02-10 at 14:36 -0600, Steve Wise wrote: > > ugh. > > > > There is at least one bug in this patch. I cannot call iw_cm_reject() > > inside destroy_cm_id() because both functions grab the iw_cm lock... > > > > > > This patch puts the iw_cm_reject() calls back in > cm_conn_req_handler()... > > > --- > > iw_cm_id destruction race condition fixes. > > From: Steve Wise <[EMAIL PROTECTED]> > > Several changes: > > - iwcm_deref_id() always wakes up if there's another reference. > > - clean up race condition in cm_work_handler(). > > - create static void free_cm_id() which deallocs the work entries and then > kfrees the cm_id memory. This reduces code replication. > > - rem_ref() if this is the last reference -and- the IWCM owns freeing the > cm_id, then free it. > > Signed-off-by: Steve Wise <[EMAIL PROTECTED]> > Signed-off-by: Tom Tucker <[EMAIL PROTECTED]> > --- > > drivers/infiniband/core/iwcm.c | 47 > +++++++++++++++++++++------------------- > 1 files changed, 25 insertions(+), 22 deletions(-) > > diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c > index 1039ad5..891d1fa 100644 > --- a/drivers/infiniband/core/iwcm.c > +++ b/drivers/infiniband/core/iwcm.c > @@ -146,6 +146,12 @@ static int copy_private_data(struct iw_c > return 0; > } > > +static void free_cm_id(struct iwcm_id_private *cm_id_priv) > +{ > + dealloc_work_entries(cm_id_priv); > + kfree(cm_id_priv); > +} > + > /* > * Release a reference on cm_id. If the last reference is being > * released, enable the waiting thread (in iw_destroy_cm_id) to > @@ -153,21 +159,14 @@ static int copy_private_data(struct iw_c > */ > static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) > { > - int ret = 0; > - > BUG_ON(atomic_read(&cm_id_priv->refcount)==0); > if (atomic_dec_and_test(&cm_id_priv->refcount)) { > BUG_ON(!list_empty(&cm_id_priv->work_list)); > - if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) { > - BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING); > - BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, > - &cm_id_priv->flags)); > - ret = 1; > - } > complete(&cm_id_priv->destroy_comp); > + return 1; > } > > - return ret; > + return 0; > } > > static void add_ref(struct iw_cm_id *cm_id) > @@ -181,7 +180,11 @@ static void rem_ref(struct iw_cm_id *cm_ > { > struct iwcm_id_private *cm_id_priv; > cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); > - iwcm_deref_id(cm_id_priv); > + if (iwcm_deref_id(cm_id_priv) && > + test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) { > + BUG_ON(!list_empty(&cm_id_priv->work_list)); > + free_cm_id(cm_id_priv); > + } > } > > static int cm_event_handler(struct iw_cm_id *cm_id, struct > iw_cm_event *event); > @@ -355,7 +358,9 @@ static void destroy_cm_id(struct iw_cm_i > case IW_CM_STATE_CONN_RECV: > /* > * App called destroy before/without calling accept after > - * receiving connection request event notification. > + * receiving connection request event notification or > + * returned non zero from the event callback function. > + * In either case, must tell the provider to reject. > */ > cm_id_priv->state = IW_CM_STATE_DESTROYING; > break; > @@ -391,9 +396,7 @@ void iw_destroy_cm_id(struct iw_cm_id *c > > wait_for_completion(&cm_id_priv->destroy_comp); > > - dealloc_work_entries(cm_id_priv); > - > - kfree(cm_id_priv); > + free_cm_id(cm_id_priv); > } > EXPORT_SYMBOL(iw_destroy_cm_id); > > @@ -647,10 +650,11 @@ static void cm_conn_req_handler(struct i > /* Call the client CM handler */ > ret = cm_id->cm_handler(cm_id, iw_event); > if (ret) { > + iw_cm_reject(cm_id, NULL, 0); > set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); > destroy_cm_id(cm_id); > if (atomic_read(&cm_id_priv->refcount)==0) > - kfree(cm_id); > + free_cm_id(cm_id_priv); > } > > out: > @@ -854,13 +858,12 @@ static void cm_work_handler(struct work_ > destroy_cm_id(&cm_id_priv->id); > } > BUG_ON(atomic_read(&cm_id_priv->refcount)==0); > - if (iwcm_deref_id(cm_id_priv)) > - return; > - > - if (atomic_read(&cm_id_priv->refcount)==0 && > - test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) { > - dealloc_work_entries(cm_id_priv); > - kfree(cm_id_priv); > + if (iwcm_deref_id(cm_id_priv)) { > + if (test_bit(IWCM_F_CALLBACK_DESTROY, > + &cm_id_priv->flags)) { > + BUG_ON(!list_empty(&cm_id_priv->work_list)); > + free_cm_id(cm_id_priv); > + } > return; > } > spin_lock_irqsave(&cm_id_priv->lock, flags); > > > > _______________________________________________ > openib-general mailing list > openib-general@openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > _______________________________________________ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general