Steve Wise wrote:
+/* + * Release a reference on cm_id. If the last reference is being removed
+ * and iw_destroy_cm_id is waiting, wake up the waiting thread.
+ */
+static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
+{
+       int ret = 0;
+
+       BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
+       if (atomic_dec_and_test(&cm_id_priv->refcount)) {
+               BUG_ON(!list_empty(&cm_id_priv->work_list));
+               if (waitqueue_active(&cm_id_priv->destroy_wait)) {
+                       BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING);
+                       BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY,
+                                       &cm_id_priv->flags));
+                       ret = 1;
+                       wake_up(&cm_id_priv->destroy_wait);

We recently changed the RDMA CM, IB CM, and a couple of other modules from using wait objects to completions. This avoids a race condition between decrementing the reference count, which allows destruction to proceed, and calling wake_up on a freed cm_id. My guess is that you may need to do the same.

Can you also explain the use of the return value here? It's ignored below in rem_ref() and destroy_cm_id().

+static void add_ref(struct iw_cm_id *cm_id)
+{
+       struct iwcm_id_private *cm_id_priv;
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       atomic_inc(&cm_id_priv->refcount);
+}
+
+static void rem_ref(struct iw_cm_id *cm_id)
+{
+       struct iwcm_id_private *cm_id_priv;
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       iwcm_deref_id(cm_id_priv);
+}
+

+/* + * CM_ID <-- CLOSING
+ *
+ * Block if a passive or active connection is currenlty being processed. Then
+ * process the event as follows:
+ * - If we are ESTABLISHED, move to CLOSING and modify the QP state
+ * based on the abrupt flag + * - If the connection is already in the CLOSING or IDLE state, the peer is + * disconnecting concurrently with us and we've already seen the + * DISCONNECT event -- ignore the request and return 0
+ * - Disconnect on a listening endpoint returns -EINVAL
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
+{
+       struct iwcm_id_private *cm_id_priv;
+       unsigned long flags;
+       int ret = 0;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       /* Wait if we're currently in a connect or accept downcall */
+ wait_event(cm_id_priv->connect_wait, + !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));

Am I understanding this check correctly? You're checking to see if the user has called iw_cm_disconnect() at the same time that they called iw_cm_connect() or iw_cm_accept(). Are connect / accept blocking, or are you just waiting for an event?

+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_ESTABLISHED:
+               cm_id_priv->state = IW_CM_STATE_CLOSING;
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               if (cm_id_priv->qp)  { /* QP could be <nul> for user-mode 
client */
+                       if (abrupt)
+                               ret = iwcm_modify_qp_err(cm_id_priv->qp);
+                       else
+                               ret = iwcm_modify_qp_sqd(cm_id_priv->qp);
+ /* + * If both sides are disconnecting the QP could
+                        * already be in ERR or SQD states
+                        */
+                       ret = 0;
+               }
+               else
+                       ret = -EINVAL;
+               break;
+       case IW_CM_STATE_LISTEN:
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               ret = -EINVAL;
+               break;
+       case IW_CM_STATE_CLOSING:
+               /* remote peer closed first */
+       case IW_CM_STATE_IDLE:  
+               /* accept or connect returned !0 */
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               break;
+       case IW_CM_STATE_CONN_RECV:
+ /* + * App called disconnect before/without calling accept after
+                * connect_request event delivered.
+                */
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               break;
+       case IW_CM_STATE_CONN_SENT:
+               /* Can only get here if wait above fails */
+       default:                
+               BUG_ON(1);
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_disconnect);
+static void destroy_cm_id(struct iw_cm_id *cm_id)
+{
+       struct iwcm_id_private *cm_id_priv;
+       unsigned long flags;
+       int ret;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       /* Wait if we're currently in a connect or accept downcall. A
+        * listening endpoint should never block here. */
+ wait_event(cm_id_priv->connect_wait, + !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));

Same question/comment as above.

+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_LISTEN:
+               cm_id_priv->state = IW_CM_STATE_DESTROYING;
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               /* destroy the listening endpoint */
+               ret = cm_id->device->iwcm->destroy_listen(cm_id);
+               break;
+       case IW_CM_STATE_ESTABLISHED:
+               cm_id_priv->state = IW_CM_STATE_DESTROYING;
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               /* Abrupt close of the connection */
+               (void)iwcm_modify_qp_err(cm_id_priv->qp);
+               break;
+       case IW_CM_STATE_IDLE:
+       case IW_CM_STATE_CLOSING:
+               cm_id_priv->state = IW_CM_STATE_DESTROYING;
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               break;
+       case IW_CM_STATE_CONN_RECV:
+ /* + * App called destroy before/without calling accept after
+                * receiving connection request event notification.
+ */ + cm_id_priv->state = IW_CM_STATE_DESTROYING;
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               break;
+       case IW_CM_STATE_CONN_SENT:
+       case IW_CM_STATE_DESTROYING:
+       default:
+               BUG_ON(1);
+               break;
+       }
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);

As an alternative, you could hold the lock from above, an let the LISTEN / ESTABLISHED state checks release and reacquire.

+       if (cm_id_priv->qp) {
+               cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+               cm_id_priv->qp = NULL;
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       (void)iwcm_deref_id(cm_id_priv);
+}
+
+/* + * This function is only called by the application thread and cannot
+ * be called by the event thread. The function will wait for all
+ * references to be released on the cm_id and then kfree the cm_id
+ * object. + */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id)
+{
+       struct iwcm_id_private *cm_id_priv;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+        BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
+
+       destroy_cm_id(cm_id);
+
+ wait_event(cm_id_priv->destroy_wait, + !atomic_read(&cm_id_priv->refcount));
+
+       kfree(cm_id_priv);
+}
+EXPORT_SYMBOL(iw_destroy_cm_id);
+
+/* + * CM_ID <-- LISTEN
+ *
+ * Start listening for connect requests. Generates one CONNECT_REQUEST
+ * event for each inbound connect request. + */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
+{
+       struct iwcm_id_private *cm_id_priv;
+       unsigned long flags;
+       int ret = 0;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_IDLE:
+               cm_id_priv->state = IW_CM_STATE_LISTEN;
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
+               if (ret)
+                       cm_id_priv->state = IW_CM_STATE_IDLE;
+               break;
+       default:
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               ret = -EINVAL;
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_listen);
+
+/* + * CM_ID <-- IDLE
+ *
+ * Rejects an inbound connection request. No events are generated.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id,
+                const void *private_data,
+                u8 private_data_len)
+{
+       struct iwcm_id_private *cm_id_priv;
+       unsigned long flags;
+       int ret;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+               wake_up_all(&cm_id_priv->connect_wait);
+               return -EINVAL;
+       }
+       cm_id_priv->state = IW_CM_STATE_IDLE;
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ ret = cm_id->device->iwcm->reject(cm_id, private_data, + private_data_len);
+
+       clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+       wake_up_all(&cm_id_priv->connect_wait);
+
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_reject);
+
+/* + * CM_ID <-- ESTABLISHED
+ *
+ * Accepts an inbound connection request and generates an ESTABLISHED
+ * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
+ * until the ESTABLISHED event is received from the provider. + */

This makes it sound like we're just waiting for an event.

+int iw_cm_accept(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *iw_param)
+{
+       struct iwcm_id_private *cm_id_priv;
+       struct ib_qp *qp;
+       unsigned long flags;
+       int ret;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+               wake_up_all(&cm_id_priv->connect_wait);
+               return -EINVAL;
+       }
+       /* Get the ib_qp given the QPN */
+       qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+       if (!qp) {
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               return -EINVAL;
+       }
+       cm_id->device->iwcm->add_ref(qp);
+       cm_id_priv->qp = qp;
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       ret = cm_id->device->iwcm->accept(cm_id, iw_param);
+       if (ret) {
+               /* An error on accept precludes provider events */
+               BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+               cm_id_priv->state = IW_CM_STATE_IDLE;
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               if (cm_id_priv->qp) {
+                       cm_id->device->iwcm->rem_ref(qp);
+                       cm_id_priv->qp = NULL;
+               }
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               printk("Accept failed, ret=%d\n", ret);
+               clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+               wake_up_all(&cm_id_priv->connect_wait);
+       }                       
+
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_accept);
+
+/*
+ * Active Side: CM_ID <-- CONN_SENT
+ *
+ * If successful, results in the generation of a CONNECT_REPLY
+ * event. iw_cm_disconnect and iw_cm_destroy will block until the
+ * CONNECT_REPLY event is received from the provider.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+       struct iwcm_id_private *cm_id_priv;
+       int ret = 0;
+       unsigned long flags;
+       struct ib_qp *qp;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       if (cm_id_priv->state != IW_CM_STATE_IDLE) {
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+               wake_up_all(&cm_id_priv->connect_wait);
+               return -EINVAL;
+       }
+               
+       /* Get the ib_qp given the QPN */
+       qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+       if (!qp) {
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               return -EINVAL;
+       }
+       cm_id->device->iwcm->add_ref(qp);
+       cm_id_priv->qp = qp;
+       cm_id_priv->state = IW_CM_STATE_CONN_SENT;
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       ret = cm_id->device->iwcm->connect(cm_id, iw_param);
+       if (ret) {
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               if (cm_id_priv->qp) {
+                       cm_id->device->iwcm->rem_ref(qp);
+                       cm_id_priv->qp = NULL;
+               }
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+               cm_id_priv->state = IW_CM_STATE_IDLE;
+               printk("Connect failed, ret=%d\n", ret);
+               clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+               wake_up_all(&cm_id_priv->connect_wait);
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_connect);
+
+/*
+ * Passive Side: new CM_ID <-- CONN_RECV
+ *
+ * Handles an inbound connect request. The function creates a new
+ * iw_cm_id to represent the new connection and inherits the client
+ * callback function and other attributes from the listening parent. + * + * The work item contains a pointer to the listen_cm_id and the event. The
+ * listen_cm_id contains the client cm_handler, context and
+ * device. These are copied when the device is cloned. The event
+ * contains the new four tuple.
+ *
+ * An error on the child should not affect the parent, so this
+ * function does not return a value.
+ */
+static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, + struct iw_cm_event *iw_event)
+{
+       unsigned long flags;
+       struct iw_cm_id *cm_id;
+       struct iwcm_id_private *cm_id_priv;
+       int ret;
+
+       /* The provider should never generate a connection request
+ * event with a bad status. + */
+       BUG_ON(iw_event->status);
+
+       /* We could be destroying the listening id. If so, ignore this
+        * upcall. */
+       spin_lock_irqsave(&listen_id_priv->lock, flags);
+       if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
+               spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+               return;
+       }
+       spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+
+       cm_id = iw_create_cm_id(listen_id_priv->id.device,   
+ listen_id_priv->id.cm_handler, + listen_id_priv->id.context);
+       /* If the cm_id could not be created, ignore the request */
+ if (IS_ERR(cm_id)) + return;
+
+       cm_id->provider_data = iw_event->provider_data;
+       cm_id->local_addr = iw_event->local_addr;
+       cm_id->remote_addr = iw_event->remote_addr;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       cm_id_priv->state = IW_CM_STATE_CONN_RECV;
+       
+       /* Call the client CM handler */
+       ret = cm_id->cm_handler(cm_id, iw_event);
+       if (ret) {
+               printk("destroying child id %p, ret=%d\n",
+                      cm_id, ret);

We probably don't always want to print a message here.

+               set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+               destroy_cm_id(cm_id);
+               if (atomic_read(&cm_id_priv->refcount)==0)
+                       kfree(cm_id);
+       }
+}
+
+/*
+ * Passive Side: CM_ID <-- ESTABLISHED
+ * + * The provider generated an ESTABLISHED event which means that + * the MPA negotion has completed successfully and we are now in MPA + * FPDU mode. + *
+ * This event can only be received in the CONN_RECV state. If the
+ * remote peer closed, the ESTABLISHED event would be received followed
+ * by the CLOSE event. If the app closes, it will block until we wake
+ * it up after processing this event.
+ */
+static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event)
+{
+       unsigned long flags;
+       int ret = 0;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+       /* We clear the CONNECT_WAIT bit here to allow the callback
+        * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
+        * from a callback handler is not allowed */
+       clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_CONN_RECV:
+               cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+               break;
+       default:
+               BUG_ON(1);

Can just BUG_ON the state and avoid the switch.  Same comment applies below.

+       }
+       wake_up_all(&cm_id_priv->connect_wait);
+
+       return ret;
+}
+
+/*
+ * Active Side: CM_ID <-- ESTABLISHED
+ *
+ * The app has called connect and is waiting for the established event to
+ * post it's requests to the server. This event will wake up anyone
+ * blocked in iw_cm_disconnect or iw_destroy_id.
+ */
+static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event)
+{
+       unsigned long flags;
+       int ret = 0;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       /* Clear the connect wait bit so a callback function calling
+        * iw_cm_disconnect will not wait and deadlock this thread */
+       clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_CONN_SENT:
+               if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
+                       cm_id_priv->id.local_addr = iw_event->local_addr;
+                       cm_id_priv->id.remote_addr = iw_event->remote_addr;
+                       cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+               } else {
+                       /* REJECTED or RESET */
+                       cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+                       cm_id_priv->qp = NULL;
+                       cm_id_priv->state = IW_CM_STATE_IDLE;
+               }
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+               break;
+       default:
+               BUG_ON(1);
+       }
+       /* Wake up waiters on connect complete */
+       wake_up_all(&cm_id_priv->connect_wait);
+
+       return ret;
+}
+
+/*
+ * CM_ID <-- CLOSING + *
+ * If in the ESTABLISHED state, move to CLOSING.
+ */
+static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
+               cm_id_priv->state = IW_CM_STATE_CLOSING;
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+}
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * If in the ESTBLISHED or CLOSING states, the QP will have have been
+ * moved by the provider to the ERR state. Disassociate the CM_ID from
+ * the QP,  move to IDLE, and remove the 'connected' reference.
+ * + * If in some other state, the cm_id was destroyed asynchronously.
+ * This is the last reference that will result in waking up
+ * the app thread blocked in iw_destroy_cm_id.
+ */
+static int cm_close_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event)
+{
+       unsigned long flags;
+       int ret = 0;
+ /* TT */printk("%s:%d cm_id_priv=%p, state=%d\n", + __FUNCTION__, __LINE__,
+                      cm_id_priv,cm_id_priv->state);

Will want to remove this.

- Sean
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to