This patch adds reference counting for MAD agents to protect against deregistration 
while a callback is being invoked.  As part of the structure changes to support 
reference counting, deregistration code has been simplified, and a bug has been fixed 
where multiple port structures were being stored in the same pointer.

Note that when sending MADs, the code currently holds a reference count from the time 
that the send is posted, until it completes and is returned to the user.

- Sean

-- 
Index: access/ib_mad_priv.h
===================================================================
--- access/ib_mad_priv.h        (revision 885)
+++ access/ib_mad_priv.h        (working copy)
@@ -97,6 +97,9 @@
        struct list_head agent_list;
        struct ib_mad_agent agent;
        struct ib_mad_reg_req *reg_req;
+       struct ib_mad_port_private *port_priv;
+       atomic_t refcount;
+       wait_queue_head_t wait;
        u8 rmpp_version;
 };
 
Index: access/ib_mad.c
===================================================================
--- access/ib_mad.c     (revision 885)
+++ access/ib_mad.c     (working copy)
@@ -221,9 +221,12 @@
 
        /* Add mad agent into agent list */
        list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list);
-
        spin_unlock_irqrestore(&port_priv->reg_lock, flags);
 
+       atomic_set(&mad_agent_priv->refcount, 1);
+       init_waitqueue_head(&mad_agent_priv->wait);
+       mad_agent_priv->port_priv = port_priv;
+
        return &mad_agent_priv->agent;
 
 error3:
@@ -241,37 +244,28 @@
  */
 int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
 {
-       struct ib_mad_port_private *entry;
-       struct ib_mad_agent_private *entry2, *temp;
-       unsigned long flags, flags2;
+       struct ib_mad_agent_private *mad_agent_priv;
+       unsigned long flags;
 
-       /*
-        * Rather than walk all the mad agent lists on all the mad ports,
-        * might use device in mad_agent and port number from mad agent QP
-        * but this approach has some downsides
-        */
-       spin_lock_irqsave(&ib_mad_port_list_lock, flags);
-       list_for_each_entry(entry, &ib_mad_port_list, port_list) {
-               spin_lock_irqsave(&entry->reg_lock, flags2);
-               list_for_each_entry_safe(entry2, temp, 
-                                        &entry->agent_list, agent_list) {
-                       if (&entry2->agent == mad_agent) {
-                               remove_mad_reg_req(entry2);
-                               list_del(&entry2->agent_list);
-
-                               spin_unlock_irqrestore(&entry->reg_lock, flags2);      
 
-                               spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
-                               /* Release allocated structures */
-                               if (entry2->reg_req)
-                                       kfree(entry2->reg_req);
-                               kfree(entry2);
-                               return 0;
-                       }
-               }
-               spin_unlock_irqrestore(&entry->reg_lock, flags2);       
-       }
-       spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
-       return 1;
+       mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
+                                     agent);
+
+       /* Cleanup outstanding sends/pending receives for this agent... */
+
+       spin_lock_irqsave(&mad_agent_priv->port_priv->reg_lock, flags);
+       remove_mad_reg_req(mad_agent_priv);
+       list_del(&mad_agent_priv->agent_list);
+       spin_unlock_irqrestore(&mad_agent_priv->port_priv->reg_lock, flags);    
+
+       atomic_dec(&mad_agent_priv->refcount);
+       wait_event(mad_agent_priv->wait,
+                  !atomic_read(&mad_agent_priv->refcount));
+
+       if (mad_agent_priv->reg_req)
+               kfree(mad_agent_priv->reg_req);
+       kfree(mad_agent_priv);
+
+       return 0;
 }
 EXPORT_SYMBOL(ib_unregister_mad_agent);
 
@@ -287,7 +281,9 @@
        struct ib_send_wr       *cur_send_wr, *next_send_wr;
        struct ib_send_wr       wr;
        struct ib_send_wr       *bad_wr;
-       struct ib_mad_send_wr_private *mad_send_wr;
+       struct ib_mad_send_wr_private   *mad_send_wr;
+       struct ib_mad_agent_private     *mad_agent_priv;
+       struct ib_mad_port_private      *port_priv;
        unsigned long flags;
 
        cur_send_wr = send_wr;
@@ -297,6 +293,10 @@
                return -EINVAL;
        }
 
+       mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
+                                     agent);
+       port_priv = mad_agent_priv->port_priv;
+
        /* Walk list of send WRs and post each on send list */
        cur_send_wr = send_wr;
        while (cur_send_wr) {
@@ -330,20 +330,25 @@
                wr.send_flags = IB_SEND_SIGNALED; /* cur_send_wr->send_flags ? */
 
                /* Link send WR into posted send MAD list */
-               spin_lock_irqsave(&((struct ib_mad_port_private 
*)mad_agent->device->mad)->send_list_lock, flags);
+               spin_lock_irqsave(&port_priv->send_list_lock, flags);
                list_add_tail(&mad_send_wr->send_list,
-                             &((struct ib_mad_port_private 
*)mad_agent->device->mad)->send_posted_mad_list);
-               ((struct ib_mad_port_private 
*)mad_agent->device->mad)->send_posted_mad_count++;
-               spin_unlock_irqrestore(&((struct ib_mad_port_private 
*)mad_agent->device->mad)->send_list_lock, flags);
+                             &port_priv->send_posted_mad_list);
+               port_priv->send_posted_mad_count++;
+               spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
+
+               /* Reference MAD agent until send completes. */
+               atomic_inc(&mad_agent_priv->refcount);
 
                ret = ib_post_send(mad_agent->qp, &wr, &bad_wr);
                if (ret) {
                        /* Unlink from posted send MAD list */
-                       spin_unlock_irqrestore(&((struct ib_mad_port_private 
*)mad_agent->device->mad)->send_list_lock, flags);
+                       spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
                        list_del(&mad_send_wr->send_list);
-                       ((struct ib_mad_port_private 
*)mad_agent->device->mad)->send_posted_mad_count--;
-                       spin_unlock_irqrestore(&((struct ib_mad_port_private 
*)mad_agent->device->mad)->send_list_lock, flags);
+                       port_priv->send_posted_mad_count--;
+                       spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
                        *bad_send_wr = cur_send_wr;
+                       if (atomic_dec_and_test(&mad_agent_priv->refcount))
+                               wake_up(&mad_agent_priv->wait);
                        printk(KERN_NOTICE "ib_post_mad_send failed\n");
                        return ret;             
                }
@@ -467,7 +472,7 @@
        /* Make sure MAD registration request supplied */
        if (!mad_reg_req)
                return 0;
-       private = priv->agent.device->mad;
+       private = priv->port_priv;
        class = &private->version[mad_reg_req->mgmt_class_version];
        mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
        if (!*class) {
@@ -541,7 +546,7 @@
                return;
        }
 
-       port_priv = agent_priv->agent.device->mad;
+       port_priv = agent_priv->port_priv;
        class = port_priv->version[agent_priv->reg_req->mgmt_class_version];
        if (!class) {
                printk(KERN_ERR "No class table yet MAD registration request 
supplied\n");
@@ -742,8 +747,11 @@
                                   recv->header.recv_buf.mad,
                                   solicited);
        if (!mad_agent) {
+               spin_unlock_irqrestore(&port_priv->reg_lock, flags);
                printk(KERN_ERR "No matching mad agent found for receive MAD\n");      
 
        } else {
+               atomic_inc(&mad_agent->refcount);
+               spin_unlock_irqrestore(&port_priv->reg_lock, flags);
                if (solicited) {
                        /* Walk the send posted list to find the match !!! */
                        printk(KERN_DEBUG "Currently unsupported solicited MAD 
received\n");
@@ -752,8 +760,10 @@
                /* Invoke receive callback */   
                mad_agent->agent.recv_handler(&mad_agent->agent,
                                              &recv->header.recv_wc);
+
+               if (atomic_dec_and_test(&mad_agent->refcount))
+                       wake_up(&mad_agent->wait);
        }
-       spin_unlock_irqrestore(&port_priv->reg_lock, flags);
 
        /* Post another receive request for this QP */
        ib_mad_post_receive_mad(port_priv, port_priv->qp[qp_num]);
@@ -765,7 +775,8 @@
 static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
                                     struct ib_wc *wc)
 {
-       struct ib_mad_send_wr_private *send_wr;
+       struct ib_mad_send_wr_private   *send_wr;
+       struct ib_mad_agent_private     *mad_agent_priv;
        unsigned long flags;
 
        /* Completion corresponds to first entry on posted MAD send list */
@@ -781,6 +792,8 @@
                        goto error;
                }
 
+               mad_agent_priv = container_of(send_wr->agent,
+                                             struct ib_mad_agent_private, agent);
                /* Check whether timeout was requested !!! */
 
                /* Remove from posted send MAD list */
@@ -795,10 +808,15 @@
 
        /* Restore client wr_id in WC */
        wc->wr_id = send_wr->wr_id;
+       
        /* Invoke client send callback */
        send_wr->agent->send_handler(send_wr->agent,
-                                    (struct ib_mad_send_wc *)wc);      
-       /* Release send MAD WR tracking structure */
+                                    (struct ib_mad_send_wc *)wc);
+
+       /* Release reference taken when sending. */
+       if (atomic_dec_and_test(&mad_agent_priv->refcount))
+               wake_up(&mad_agent_priv->wait);
+
        kfree(send_wr);
        return;
 
@@ -1302,7 +1320,6 @@
        }
 
        memset(port_priv, 0, sizeof *port_priv);
-       device->mad = port_priv;
        port_priv->device = device;
        port_priv->port_num = port_num;
        spin_lock_init(&port_priv->reg_lock);
@@ -1444,7 +1461,6 @@
        /* Handle deallocation of MAD registration tables!!! */
 
        kfree(port_priv);
-       device->mad = NULL;
 
        return 0;
 }
_______________________________________________
openib-general mailing list
[EMAIL PROTECTED]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to