This patch corrects the fact that IPoIB leaks all of its address
handles by creating a list of dead AHs and freeing an AH once all the
sends using it complete.

Index: ulp/ipoib/ipoib_verbs.c
===================================================================
--- ulp/ipoib/ipoib_verbs.c     (revision 1201)
+++ ulp/ipoib/ipoib_verbs.c     (working copy)
@@ -171,16 +171,6 @@
        return -EINVAL;
 }
 
-void ipoib_qp_destroy(struct net_device *dev)
-{
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
-
-       if (ib_destroy_qp(priv->qp))
-               ipoib_warn(priv, "ib_qp_destroy failed\n");
-
-       priv->qp = NULL;
-}
-
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
Index: ulp/ipoib/ipoib_main.c
===================================================================
--- ulp/ipoib/ipoib_main.c      (revision 1201)
+++ ulp/ipoib/ipoib_main.c      (working copy)
@@ -177,7 +177,7 @@
        struct ipoib_path *path = path_ptr;
        struct ipoib_dev_priv *priv = netdev_priv(path->dev);
        struct sk_buff *skb;
-       struct ib_ah *ah;
+       struct ipoib_ah *ah;
 
        ipoib_dbg(priv, "status %d, LID 0x%04x for GID " IPOIB_GID_FMT "\n",
                  status, be16_to_cpu(pathrec->dlid), 
IPOIB_GID_ARG(pathrec->dgid));
@@ -195,10 +195,10 @@
                        .port_num      = priv->port
                };
 
-               ah = ib_create_ah(priv->pd, &av);
+               ah = ipoib_create_ah(path->dev, priv->pd, &av);
        }
 
-       if (IS_ERR(ah))
+       if (!ah)
                goto err;
 
        path->ah = ah;
@@ -299,7 +299,7 @@
 {
        struct sk_buff *skb = skb_ptr;
        struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
-       struct ib_ah *ah;
+       struct ipoib_ah *ah;
 
        ipoib_dbg(priv, "status %d, LID 0x%04x for GID " IPOIB_GID_FMT "\n",
                  status, be16_to_cpu(pathrec->dlid), 
IPOIB_GID_ARG(pathrec->dgid));
@@ -307,6 +307,10 @@
        if (status)
                goto err;
 
+       ah = kmalloc(sizeof *ah, GFP_KERNEL);
+       if (!ah)
+               goto err;
+
        {
                struct ib_ah_attr av = {
                        .dlid          = be16_to_cpu(pathrec->dlid),
@@ -317,13 +321,15 @@
                        .port_num      = priv->port
                };
 
-               ah = ib_create_ah(priv->pd, &av);
+               ah->ah = ib_create_ah(priv->pd, &av);
        }
 
-       if (IS_ERR(ah))
+       if (IS_ERR(ah->ah)) {
+               kfree(ah);
                goto err;
+       }
 
-       *(struct ib_ah **) skb->cb = ah;
+       *(struct ipoib_ah **) skb->cb = ah;
 
        if (dev_queue_xmit(skb))
                ipoib_warn(priv, "dev_queue_xmit failed "
@@ -337,10 +343,15 @@
 
 static void unicast_arp_finish(struct sk_buff *skb)
 {
-       struct ib_ah *ah = *(struct ib_ah **) skb->cb;
+       struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
+       struct ipoib_ah *ah = *(struct ipoib_ah **) skb->cb;
+       unsigned long flags;
 
-       if (ah)
-               ib_destroy_ah(ah);
+       if (ah) {
+               spin_lock_irqsave(&priv->lock, flags);
+               list_add_tail(&ah->list, &priv->dead_ahs);
+               spin_unlock_irqrestore(&priv->lock, flags);
+       }
 }
 
 /*
@@ -443,7 +454,7 @@
                         * now we can just send the packet.
                         */
                        if (skb->destructor == unicast_arp_finish) {
-                               ipoib_send(dev, skb, *(struct ib_ah **) skb->cb,
+                               ipoib_send(dev, skb, *(struct ipoib_ah **) 
skb->cb,
                                           be32_to_cpup((u32 *) phdr->hwaddr));
                                return 0;
                        }
@@ -454,14 +465,7 @@
                                           skb->dst ? "neigh" : "dst",
                                           be16_to_cpup((u16 *) skb->data),
                                           be32_to_cpup((u32 *) phdr->hwaddr),
-                                          phdr->hwaddr[ 4], phdr->hwaddr[ 5],
-                                          phdr->hwaddr[ 6], phdr->hwaddr[ 7],
-                                          phdr->hwaddr[ 8], phdr->hwaddr[ 9],
-                                          phdr->hwaddr[10], phdr->hwaddr[11],
-                                          phdr->hwaddr[12], phdr->hwaddr[13],
-                                          phdr->hwaddr[14], phdr->hwaddr[15],
-                                          phdr->hwaddr[16], phdr->hwaddr[17],
-                                          phdr->hwaddr[18], phdr->hwaddr[19]);
+                                          IPOIB_GID_ARG(*(union ib_gid *) 
(phdr->hwaddr + 4)));
 
                        /* put the pseudoheader back on */                      
  
                        skb_push(skb, sizeof *phdr);
@@ -529,10 +533,17 @@
 
 static void ipoib_neigh_destructor(struct neighbour *neigh)
 {
-       ipoib_dbg(netdev_priv(neigh->dev),
-                 "neigh_destructor for %06x " IPOIB_GID_FMT "\n",
+       struct ipoib_dev_priv *priv = netdev_priv(neigh->dev);
+       struct ipoib_path     *path = IPOIB_PATH(neigh);
+
+       ipoib_dbg(priv, "neigh_destructor for %06x " IPOIB_GID_FMT "\n",
                  be32_to_cpup((__be32 *) neigh->ha),
                  IPOIB_GID_ARG(*((union ib_gid *) (neigh->ha + 4))));
+
+       if (path && path->ah) {
+               ipoib_put_ah(path->ah);
+               kfree(path);
+       }
 }
 
 static int ipoib_neigh_setup(struct neighbour *neigh)
@@ -683,12 +694,14 @@
        sema_init(&priv->mcast_mutex, 1);
 
        INIT_LIST_HEAD(&priv->child_intfs);
+       INIT_LIST_HEAD(&priv->dead_ahs);
        INIT_LIST_HEAD(&priv->multicast_list);
 
        INIT_WORK(&priv->pkey_task,    ipoib_pkey_poll,          priv->dev);
        INIT_WORK(&priv->mcast_task,   ipoib_mcast_join_task,    priv->dev);
        INIT_WORK(&priv->flush_task,   ipoib_ib_dev_flush,       priv->dev);
        INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task, priv->dev);
+       INIT_WORK(&priv->ah_reap_task, ipoib_reap_ah,            priv->dev);
 }
 
 struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
Index: ulp/ipoib/ipoib_multicast.c
===================================================================
--- ulp/ipoib/ipoib_multicast.c (revision 1201)
+++ ulp/ipoib/ipoib_multicast.c (working copy)
@@ -36,7 +36,7 @@
 /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
 struct ipoib_mcast {
        struct ib_sa_mcmember_rec mcmember;
-       struct ib_ah             *address_handle;
+       struct ipoib_ah          *ah;
 
        struct rb_node    rb_node;
        struct list_head  list;
@@ -69,11 +69,8 @@
        ipoib_dbg_mcast(priv, "deleting multicast group " IPOIB_GID_FMT "\n",
                        IPOIB_GID_ARG(mcast->mcmember.mgid));
 
-       if (mcast->address_handle != NULL) {
-               int ret = ib_destroy_ah(mcast->address_handle);
-               if (ret < 0)
-                       ipoib_warn(priv, "ib_destroy_ah failed (ret = %d)\n", 
ret);
-       }
+       if (mcast->ah)
+               ipoib_put_ah(mcast->ah);
 
        while (!skb_queue_empty(&mcast->pkt_queue)) {
                struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
@@ -108,7 +105,7 @@
        INIT_LIST_HEAD(&mcast->list);
        skb_queue_head_init(&mcast->pkt_queue);
 
-       mcast->address_handle = NULL;
+       mcast->ah    = NULL;
        mcast->query = NULL;
 
        return mcast;
@@ -224,14 +221,14 @@
 
                av.grh.dgid = mcast->mcmember.mgid;
 
-               mcast->address_handle = ib_create_ah(priv->pd, &av);
-               if (IS_ERR(mcast->address_handle)) {
+               mcast->ah = ipoib_create_ah(dev, priv->pd, &av);
+               if (!mcast->ah) {
                        ipoib_warn(priv, "ib_address_create failed\n");
                } else {
                        ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT
                                        " AV %p, LID 0x%04x, SL %d\n",
                                        IPOIB_GID_ARG(mcast->mcmember.mgid),
-                                       mcast->address_handle,
+                                       mcast->ah->ah,
                                        be16_to_cpu(mcast->mcmember.mlid),
                                        mcast->mcmember.sl);
                }
@@ -661,7 +658,7 @@
                list_add_tail(&mcast->list, &priv->multicast_list);
        }
 
-       if (!mcast->address_handle) {
+       if (!mcast->ah) {
                if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
                        skb_queue_tail(&mcast->pkt_queue, skb);
                else
@@ -682,14 +679,15 @@
 
 out:
        spin_unlock_irqrestore(&priv->lock, flags);
-       if (mcast && mcast->address_handle) {
+       if (mcast && mcast->ah) {
                if (skb->dst            &&
                    skb->dst->neighbour &&
                    !IPOIB_PATH(skb->dst->neighbour)) {
                        struct ipoib_path *path = kmalloc(sizeof *path, 
GFP_ATOMIC);
 
                        if (path) {
-                               path->ah        = mcast->address_handle;
+                               kref_get(&mcast->ah->ref);
+                               path->ah        = mcast->ah;
                                path->qpn       = IB_MULTICAST_QPN;
                                path->dev       = dev;
                                path->neighbour = skb->dst->neighbour;
@@ -697,7 +695,7 @@
                        }
                }
 
-               ipoib_send(dev, skb, mcast->address_handle, IB_MULTICAST_QPN);
+               ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
        }
 }
 
@@ -951,9 +949,9 @@
 
        mcast = rb_entry(iter->rb_node, struct ipoib_mcast, rb_node);
 
-       *mgid = mcast->mcmember.mgid;
-       *created = mcast->created;
-       *queuelen = skb_queue_len(&mcast->pkt_queue);
-       *complete = mcast->address_handle != NULL;
+       *mgid      = mcast->mcmember.mgid;
+       *created   = mcast->created;
+       *queuelen  = skb_queue_len(&mcast->pkt_queue);
+       *complete  = !!mcast->ah;
        *send_only = (mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)) ? 1 : 0;
 }
Index: ulp/ipoib/ipoib.h
===================================================================
--- ulp/ipoib/ipoib.h   (revision 1201)
+++ ulp/ipoib/ipoib.h   (working copy)
@@ -31,6 +31,7 @@
 #include <linux/workqueue.h>
 #include <linux/pci.h>
 #include <linux/config.h>
+#include <linux/kref.h>
 
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
@@ -65,6 +66,7 @@
        IPOIB_PKEY_STOP           = 4,
        IPOIB_FLAG_SUBINTERFACE   = 5,
        IPOIB_MCAST_RUN           = 6,
+       IPOIB_STOP_REAPER         = 7,
 
        IPOIB_MAX_BACKOFF_SECONDS = 16,
 
@@ -109,6 +111,7 @@
        struct work_struct mcast_task;
        struct work_struct flush_task;
        struct work_struct restart_task;
+       struct work_struct ah_reap_task;
 
        struct ib_device *ca;
        u8                port;
@@ -134,18 +137,28 @@
 
        struct ib_wc ibwc[IPOIB_NUM_WC];
 
+       struct list_head dead_ahs;
+
        struct proc_dir_entry *mcast_proc_entry;
 
        struct ib_event_handler event_handler;
 
        struct net_device_stats stats;
 
+       struct list_head child_intfs;
        struct list_head list;
-       struct list_head child_intfs;
 };
 
+struct ipoib_ah {
+       struct net_device *dev;
+       struct ib_ah      *ah;
+       struct list_head   list;
+       struct kref        ref;
+       unsigned           last_send;
+};
+
 struct ipoib_path {
-       struct ib_ah       *ah;
+       struct ipoib_ah    *ah;
        u32                 qpn;
        struct sk_buff_head queue;
 
@@ -166,8 +179,17 @@
 
 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
 
+struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
+                                struct ib_pd *pd, struct ib_ah_attr *attr);
+void ipoib_free_ah(struct kref *kref);
+static inline void ipoib_put_ah(struct ipoib_ah *ah)
+{
+       kref_put(&ah->ref, ipoib_free_ah);
+}
+
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
-               struct ib_ah *address, u32 qpn);
+               struct ipoib_ah *address, u32 qpn);
+void ipoib_reap_ah(void *dev_ptr);
 
 struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
 
@@ -213,7 +235,6 @@
                       union ib_gid *mgid);
 
 int ipoib_qp_create(struct net_device *dev);
-void ipoib_qp_destroy(struct net_device *dev);
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
 void ipoib_transport_dev_cleanup(struct net_device *dev);
 
Index: ulp/ipoib/ipoib_ib.c
===================================================================
--- ulp/ipoib/ipoib_ib.c        (revision 1201)
+++ ulp/ipoib/ipoib_ib.c        (working copy)
@@ -29,10 +29,44 @@
 
 static DECLARE_MUTEX(pkey_sem);
 
-static int _ipoib_ib_receive(struct ipoib_dev_priv *priv,
-                            u64 work_request_id,
-                            dma_addr_t addr)
+struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
+                                struct ib_pd *pd, struct ib_ah_attr *attr)
 {
+       struct ipoib_ah *ah;
+
+       ah = kmalloc(sizeof *ah, GFP_KERNEL);
+       if (!ah)
+               return NULL;
+
+       ah->dev       = dev;
+       ah->last_send = 0;
+       kref_init(&ah->ref);
+
+       ah->ah = ib_create_ah(pd, attr);
+       if (IS_ERR(ah->ah)) {
+               kfree(ah);
+               ah = NULL;
+       }
+
+       return ah;
+}
+
+void ipoib_free_ah(struct kref *kref)
+{
+       struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref);
+       struct ipoib_dev_priv *priv = netdev_priv(ah->dev);
+
+       unsigned long flags;
+
+       spin_lock_irqsave(&priv->lock, flags);
+       list_add_tail(&ah->list, &priv->dead_ahs);
+       spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int ipoib_ib_receive(struct ipoib_dev_priv *priv,
+                           u64 work_request_id,
+                           dma_addr_t addr)
+{
        struct ib_sge list = {
                .addr    = addr,
                .length  = IPOIB_BUF_SIZE,
@@ -50,8 +84,8 @@
 }
 
 /* =============================================================== */
-/*.._ipoib_ib_post_receive -- post a receive buffer                */
-static int _ipoib_ib_post_receive(struct net_device *dev, int id)
+/*..ipoib_ib_post_receive -- post a receive buffer                */
+static int ipoib_ib_post_receive(struct net_device *dev, int id)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct sk_buff *skb;
@@ -72,24 +106,24 @@
                              PCI_DMA_FROMDEVICE);
        pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr);
 
-       ret = _ipoib_ib_receive(priv, id, addr);
+       ret = ipoib_ib_receive(priv, id, addr);
        if (ret)
-               ipoib_warn(priv, "_ipoib_ib_receive failed for buf %d (%d)\n",
+               ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n",
                           id, ret);
 
        return ret;
 }
 
 /* =============================================================== */
-/*.._ipoib_ib_post_receives -- post all receive buffers            */
-static int _ipoib_ib_post_receives(struct net_device *dev)
+/*..ipoib_ib_post_receives -- post all receive buffers            */
+static int ipoib_ib_post_receives(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        int i;
 
        for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) {
-               if (_ipoib_ib_post_receive(dev, i)) {
-                       ipoib_warn(priv, "_ipoib_ib_post_receive failed for buf 
%d\n", i);
+               if (ipoib_ib_post_receive(dev, i)) {
+                       ipoib_warn(priv, "ipoib_ib_post_receive failed for buf 
%d\n", i);
                        return -EIO;
                }
        }
@@ -108,7 +142,7 @@
 
        if (entry->status != IB_WC_SUCCESS) {
                ipoib_warn(priv, "got failed completion event "
-                          "(status=%d, wrid=%d, op=%d)",
+                          "(status=%d, wrid=%d, op=%d)\n",
                           entry->status, wr_id, entry->opcode);
 
                if (entry->opcode == IB_WC_SEND) {
@@ -163,8 +197,8 @@
                        }
 
                        /* repost receive */
-                       if (_ipoib_ib_post_receive(dev, wr_id))
-                               ipoib_warn(priv, "_ipoib_ib_post_receive failed 
"
+                       if (ipoib_ib_post_receive(dev, wr_id))
+                               ipoib_warn(priv, "ipoib_ib_post_receive failed "
                                           "for buf %d\n", wr_id);
                } else
                        ipoib_warn(priv, "completion event with wrid %d\n",
@@ -262,7 +296,7 @@
 /* =============================================================== */
 /*..ipoib_send -- schedule an IB send work request                 */
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
-               struct ib_ah *address, u32 qpn)
+               struct ipoib_ah *address, u32 qpn)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_buf *tx_req;
@@ -302,7 +336,7 @@
        pci_unmap_addr_set(tx_req, mapping, addr);
 
        if (post_send(priv, priv->tx_head & (IPOIB_TX_RING_SIZE - 1),
-                     address, qpn, addr, skb->len)) {
+                     address->ah, qpn, addr, skb->len)) {
                ipoib_warn(priv, "post_send failed\n");
                ++priv->stats.tx_errors;
                tx_req->skb = NULL;
@@ -312,6 +346,7 @@
 
                dev->trans_start = jiffies;
 
+               address->last_send = priv->tx_head;
                ++priv->tx_head;
 
                spin_lock_irqsave(&priv->lock, flags);
@@ -323,6 +358,38 @@
        }
 }
 
+void __ipoib_reap_ah(struct net_device *dev)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_ah *ah, *tah;
+       LIST_HEAD(remove_list);
+
+       spin_lock_irq(&priv->lock);
+       list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
+               if (ah->last_send <= priv->tx_tail) {
+                       list_del(&ah->list);
+                       list_add_tail(&ah->list, &remove_list);
+               }
+       spin_unlock_irq(&priv->lock);
+
+       list_for_each_entry_safe(ah, tah, &remove_list, list) {
+               ipoib_dbg(priv, "Reaping ah %p\n", ah->ah);
+               ib_destroy_ah(ah->ah);
+               kfree(ah);
+       }
+}
+
+void ipoib_reap_ah(void *dev_ptr)
+{
+       struct net_device *dev = dev_ptr;
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+       __ipoib_reap_ah(dev);
+
+       if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
+               queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
+}
+
 int ipoib_ib_dev_open(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -334,12 +401,15 @@
                return -1;
        }
 
-       ret = _ipoib_ib_post_receives(dev);
+       ret = ipoib_ib_post_receives(dev);
        if (ret) {
-               ipoib_warn(priv, "_ipoib_ib_post_receives returned %d\n", ret);
+               ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
                return -1;
        }
 
+       clear_bit(IPOIB_STOP_REAPER, &priv->flags);
+       queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
+
        return 0;
 }
 
@@ -395,8 +465,10 @@
        int i;
 
        /* Kill the existing QP and allocate a new one */
-       if (priv->qp != NULL)
-               ipoib_qp_destroy(dev);
+       if (priv->qp != NULL) {
+               ib_destroy_qp(priv->qp);
+               priv->qp = NULL;
+       }
 
        for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) {
                if (priv->rx_ring[i].skb) {
@@ -463,14 +535,24 @@
 /*..ipoib_ib_dev_cleanup -- clean up IB resources for iface        */
 void ipoib_ib_dev_cleanup(struct net_device *dev)
 {
-       ipoib_dbg(netdev_priv(dev), "cleaning up ib_dev\n");
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
 
+       ipoib_dbg(priv, "cleaning up ib_dev\n");
+
        ipoib_mcast_stop_thread(dev);
 
        /* Delete the broadcast address and the local address */
        ipoib_mcast_dev_down(dev);
 
        ipoib_transport_dev_cleanup(dev);
+
+       set_bit(IPOIB_STOP_REAPER, &priv->flags);
+       cancel_delayed_work(&priv->ah_reap_task);
+       flush_workqueue(ipoib_workqueue);
+       while (!list_empty(&priv->dead_ahs)) {
+               __ipoib_reap_ah(dev);
+               yield();
+       }
 }
 
 /*
_______________________________________________
openib-general mailing list
[EMAIL PROTECTED]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to