This patch corrects the fact that IPoIB leaks all of its address handles by creating a list of dead AHs and freeing an AH once all the sends using it complete.
Index: ulp/ipoib/ipoib_verbs.c =================================================================== --- ulp/ipoib/ipoib_verbs.c (revision 1201) +++ ulp/ipoib/ipoib_verbs.c (working copy) @@ -171,16 +171,6 @@ return -EINVAL; } -void ipoib_qp_destroy(struct net_device *dev) -{ - struct ipoib_dev_priv *priv = netdev_priv(dev); - - if (ib_destroy_qp(priv->qp)) - ipoib_warn(priv, "ib_qp_destroy failed\n"); - - priv->qp = NULL; -} - int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) { struct ipoib_dev_priv *priv = netdev_priv(dev); Index: ulp/ipoib/ipoib_main.c =================================================================== --- ulp/ipoib/ipoib_main.c (revision 1201) +++ ulp/ipoib/ipoib_main.c (working copy) @@ -177,7 +177,7 @@ struct ipoib_path *path = path_ptr; struct ipoib_dev_priv *priv = netdev_priv(path->dev); struct sk_buff *skb; - struct ib_ah *ah; + struct ipoib_ah *ah; ipoib_dbg(priv, "status %d, LID 0x%04x for GID " IPOIB_GID_FMT "\n", status, be16_to_cpu(pathrec->dlid), IPOIB_GID_ARG(pathrec->dgid)); @@ -195,10 +195,10 @@ .port_num = priv->port }; - ah = ib_create_ah(priv->pd, &av); + ah = ipoib_create_ah(path->dev, priv->pd, &av); } - if (IS_ERR(ah)) + if (!ah) goto err; path->ah = ah; @@ -299,7 +299,7 @@ { struct sk_buff *skb = skb_ptr; struct ipoib_dev_priv *priv = netdev_priv(skb->dev); - struct ib_ah *ah; + struct ipoib_ah *ah; ipoib_dbg(priv, "status %d, LID 0x%04x for GID " IPOIB_GID_FMT "\n", status, be16_to_cpu(pathrec->dlid), IPOIB_GID_ARG(pathrec->dgid)); @@ -307,6 +307,10 @@ if (status) goto err; + ah = kmalloc(sizeof *ah, GFP_KERNEL); + if (!ah) + goto err; + { struct ib_ah_attr av = { .dlid = be16_to_cpu(pathrec->dlid), @@ -317,13 +321,15 @@ .port_num = priv->port }; - ah = ib_create_ah(priv->pd, &av); + ah->ah = ib_create_ah(priv->pd, &av); } - if (IS_ERR(ah)) + if (IS_ERR(ah->ah)) { + kfree(ah); goto err; + } - *(struct ib_ah **) skb->cb = ah; + *(struct ipoib_ah **) skb->cb = ah; if (dev_queue_xmit(skb)) ipoib_warn(priv, "dev_queue_xmit failed " @@ -337,10 +343,15 @@ static void unicast_arp_finish(struct sk_buff *skb) { - struct ib_ah *ah = *(struct ib_ah **) skb->cb; + struct ipoib_dev_priv *priv = netdev_priv(skb->dev); + struct ipoib_ah *ah = *(struct ipoib_ah **) skb->cb; + unsigned long flags; - if (ah) - ib_destroy_ah(ah); + if (ah) { + spin_lock_irqsave(&priv->lock, flags); + list_add_tail(&ah->list, &priv->dead_ahs); + spin_unlock_irqrestore(&priv->lock, flags); + } } /* @@ -443,7 +454,7 @@ * now we can just send the packet. */ if (skb->destructor == unicast_arp_finish) { - ipoib_send(dev, skb, *(struct ib_ah **) skb->cb, + ipoib_send(dev, skb, *(struct ipoib_ah **) skb->cb, be32_to_cpup((u32 *) phdr->hwaddr)); return 0; } @@ -454,14 +465,7 @@ skb->dst ? "neigh" : "dst", be16_to_cpup((u16 *) skb->data), be32_to_cpup((u32 *) phdr->hwaddr), - phdr->hwaddr[ 4], phdr->hwaddr[ 5], - phdr->hwaddr[ 6], phdr->hwaddr[ 7], - phdr->hwaddr[ 8], phdr->hwaddr[ 9], - phdr->hwaddr[10], phdr->hwaddr[11], - phdr->hwaddr[12], phdr->hwaddr[13], - phdr->hwaddr[14], phdr->hwaddr[15], - phdr->hwaddr[16], phdr->hwaddr[17], - phdr->hwaddr[18], phdr->hwaddr[19]); + IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4))); /* put the pseudoheader back on */ skb_push(skb, sizeof *phdr); @@ -529,10 +533,17 @@ static void ipoib_neigh_destructor(struct neighbour *neigh) { - ipoib_dbg(netdev_priv(neigh->dev), - "neigh_destructor for %06x " IPOIB_GID_FMT "\n", + struct ipoib_dev_priv *priv = netdev_priv(neigh->dev); + struct ipoib_path *path = IPOIB_PATH(neigh); + + ipoib_dbg(priv, "neigh_destructor for %06x " IPOIB_GID_FMT "\n", be32_to_cpup((__be32 *) neigh->ha), IPOIB_GID_ARG(*((union ib_gid *) (neigh->ha + 4)))); + + if (path && path->ah) { + ipoib_put_ah(path->ah); + kfree(path); + } } static int ipoib_neigh_setup(struct neighbour *neigh) @@ -683,12 +694,14 @@ sema_init(&priv->mcast_mutex, 1); INIT_LIST_HEAD(&priv->child_intfs); + INIT_LIST_HEAD(&priv->dead_ahs); INIT_LIST_HEAD(&priv->multicast_list); INIT_WORK(&priv->pkey_task, ipoib_pkey_poll, priv->dev); INIT_WORK(&priv->mcast_task, ipoib_mcast_join_task, priv->dev); INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush, priv->dev); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task, priv->dev); + INIT_WORK(&priv->ah_reap_task, ipoib_reap_ah, priv->dev); } struct ipoib_dev_priv *ipoib_intf_alloc(const char *name) Index: ulp/ipoib/ipoib_multicast.c =================================================================== --- ulp/ipoib/ipoib_multicast.c (revision 1201) +++ ulp/ipoib/ipoib_multicast.c (working copy) @@ -36,7 +36,7 @@ /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ struct ipoib_mcast { struct ib_sa_mcmember_rec mcmember; - struct ib_ah *address_handle; + struct ipoib_ah *ah; struct rb_node rb_node; struct list_head list; @@ -69,11 +69,8 @@ ipoib_dbg_mcast(priv, "deleting multicast group " IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mcast->mcmember.mgid)); - if (mcast->address_handle != NULL) { - int ret = ib_destroy_ah(mcast->address_handle); - if (ret < 0) - ipoib_warn(priv, "ib_destroy_ah failed (ret = %d)\n", ret); - } + if (mcast->ah) + ipoib_put_ah(mcast->ah); while (!skb_queue_empty(&mcast->pkt_queue)) { struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); @@ -108,7 +105,7 @@ INIT_LIST_HEAD(&mcast->list); skb_queue_head_init(&mcast->pkt_queue); - mcast->address_handle = NULL; + mcast->ah = NULL; mcast->query = NULL; return mcast; @@ -224,14 +221,14 @@ av.grh.dgid = mcast->mcmember.mgid; - mcast->address_handle = ib_create_ah(priv->pd, &av); - if (IS_ERR(mcast->address_handle)) { + mcast->ah = ipoib_create_ah(dev, priv->pd, &av); + if (!mcast->ah) { ipoib_warn(priv, "ib_address_create failed\n"); } else { ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT " AV %p, LID 0x%04x, SL %d\n", IPOIB_GID_ARG(mcast->mcmember.mgid), - mcast->address_handle, + mcast->ah->ah, be16_to_cpu(mcast->mcmember.mlid), mcast->mcmember.sl); } @@ -661,7 +658,7 @@ list_add_tail(&mcast->list, &priv->multicast_list); } - if (!mcast->address_handle) { + if (!mcast->ah) { if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) skb_queue_tail(&mcast->pkt_queue, skb); else @@ -682,14 +679,15 @@ out: spin_unlock_irqrestore(&priv->lock, flags); - if (mcast && mcast->address_handle) { + if (mcast && mcast->ah) { if (skb->dst && skb->dst->neighbour && !IPOIB_PATH(skb->dst->neighbour)) { struct ipoib_path *path = kmalloc(sizeof *path, GFP_ATOMIC); if (path) { - path->ah = mcast->address_handle; + kref_get(&mcast->ah->ref); + path->ah = mcast->ah; path->qpn = IB_MULTICAST_QPN; path->dev = dev; path->neighbour = skb->dst->neighbour; @@ -697,7 +695,7 @@ } } - ipoib_send(dev, skb, mcast->address_handle, IB_MULTICAST_QPN); + ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); } } @@ -951,9 +949,9 @@ mcast = rb_entry(iter->rb_node, struct ipoib_mcast, rb_node); - *mgid = mcast->mcmember.mgid; - *created = mcast->created; - *queuelen = skb_queue_len(&mcast->pkt_queue); - *complete = mcast->address_handle != NULL; + *mgid = mcast->mcmember.mgid; + *created = mcast->created; + *queuelen = skb_queue_len(&mcast->pkt_queue); + *complete = !!mcast->ah; *send_only = (mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)) ? 1 : 0; } Index: ulp/ipoib/ipoib.h =================================================================== --- ulp/ipoib/ipoib.h (revision 1201) +++ ulp/ipoib/ipoib.h (working copy) @@ -31,6 +31,7 @@ #include <linux/workqueue.h> #include <linux/pci.h> #include <linux/config.h> +#include <linux/kref.h> #include <asm/atomic.h> #include <asm/semaphore.h> @@ -65,6 +66,7 @@ IPOIB_PKEY_STOP = 4, IPOIB_FLAG_SUBINTERFACE = 5, IPOIB_MCAST_RUN = 6, + IPOIB_STOP_REAPER = 7, IPOIB_MAX_BACKOFF_SECONDS = 16, @@ -109,6 +111,7 @@ struct work_struct mcast_task; struct work_struct flush_task; struct work_struct restart_task; + struct work_struct ah_reap_task; struct ib_device *ca; u8 port; @@ -134,18 +137,28 @@ struct ib_wc ibwc[IPOIB_NUM_WC]; + struct list_head dead_ahs; + struct proc_dir_entry *mcast_proc_entry; struct ib_event_handler event_handler; struct net_device_stats stats; + struct list_head child_intfs; struct list_head list; - struct list_head child_intfs; }; +struct ipoib_ah { + struct net_device *dev; + struct ib_ah *ah; + struct list_head list; + struct kref ref; + unsigned last_send; +}; + struct ipoib_path { - struct ib_ah *ah; + struct ipoib_ah *ah; u32 qpn; struct sk_buff_head queue; @@ -166,8 +179,17 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr); +struct ipoib_ah *ipoib_create_ah(struct net_device *dev, + struct ib_pd *pd, struct ib_ah_attr *attr); +void ipoib_free_ah(struct kref *kref); +static inline void ipoib_put_ah(struct ipoib_ah *ah) +{ + kref_put(&ah->ref, ipoib_free_ah); +} + void ipoib_send(struct net_device *dev, struct sk_buff *skb, - struct ib_ah *address, u32 qpn); + struct ipoib_ah *address, u32 qpn); +void ipoib_reap_ah(void *dev_ptr); struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); @@ -213,7 +235,6 @@ union ib_gid *mgid); int ipoib_qp_create(struct net_device *dev); -void ipoib_qp_destroy(struct net_device *dev); int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); void ipoib_transport_dev_cleanup(struct net_device *dev); Index: ulp/ipoib/ipoib_ib.c =================================================================== --- ulp/ipoib/ipoib_ib.c (revision 1201) +++ ulp/ipoib/ipoib_ib.c (working copy) @@ -29,10 +29,44 @@ static DECLARE_MUTEX(pkey_sem); -static int _ipoib_ib_receive(struct ipoib_dev_priv *priv, - u64 work_request_id, - dma_addr_t addr) +struct ipoib_ah *ipoib_create_ah(struct net_device *dev, + struct ib_pd *pd, struct ib_ah_attr *attr) { + struct ipoib_ah *ah; + + ah = kmalloc(sizeof *ah, GFP_KERNEL); + if (!ah) + return NULL; + + ah->dev = dev; + ah->last_send = 0; + kref_init(&ah->ref); + + ah->ah = ib_create_ah(pd, attr); + if (IS_ERR(ah->ah)) { + kfree(ah); + ah = NULL; + } + + return ah; +} + +void ipoib_free_ah(struct kref *kref) +{ + struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref); + struct ipoib_dev_priv *priv = netdev_priv(ah->dev); + + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + list_add_tail(&ah->list, &priv->dead_ahs); + spin_unlock_irqrestore(&priv->lock, flags); +} + +static int ipoib_ib_receive(struct ipoib_dev_priv *priv, + u64 work_request_id, + dma_addr_t addr) +{ struct ib_sge list = { .addr = addr, .length = IPOIB_BUF_SIZE, @@ -50,8 +84,8 @@ } /* =============================================================== */ -/*.._ipoib_ib_post_receive -- post a receive buffer */ -static int _ipoib_ib_post_receive(struct net_device *dev, int id) +/*..ipoib_ib_post_receive -- post a receive buffer */ +static int ipoib_ib_post_receive(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; @@ -72,24 +106,24 @@ PCI_DMA_FROMDEVICE); pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr); - ret = _ipoib_ib_receive(priv, id, addr); + ret = ipoib_ib_receive(priv, id, addr); if (ret) - ipoib_warn(priv, "_ipoib_ib_receive failed for buf %d (%d)\n", + ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n", id, ret); return ret; } /* =============================================================== */ -/*.._ipoib_ib_post_receives -- post all receive buffers */ -static int _ipoib_ib_post_receives(struct net_device *dev) +/*..ipoib_ib_post_receives -- post all receive buffers */ +static int ipoib_ib_post_receives(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); int i; for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { - if (_ipoib_ib_post_receive(dev, i)) { - ipoib_warn(priv, "_ipoib_ib_post_receive failed for buf %d\n", i); + if (ipoib_ib_post_receive(dev, i)) { + ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); return -EIO; } } @@ -108,7 +142,7 @@ if (entry->status != IB_WC_SUCCESS) { ipoib_warn(priv, "got failed completion event " - "(status=%d, wrid=%d, op=%d)", + "(status=%d, wrid=%d, op=%d)\n", entry->status, wr_id, entry->opcode); if (entry->opcode == IB_WC_SEND) { @@ -163,8 +197,8 @@ } /* repost receive */ - if (_ipoib_ib_post_receive(dev, wr_id)) - ipoib_warn(priv, "_ipoib_ib_post_receive failed " + if (ipoib_ib_post_receive(dev, wr_id)) + ipoib_warn(priv, "ipoib_ib_post_receive failed " "for buf %d\n", wr_id); } else ipoib_warn(priv, "completion event with wrid %d\n", @@ -262,7 +296,7 @@ /* =============================================================== */ /*..ipoib_send -- schedule an IB send work request */ void ipoib_send(struct net_device *dev, struct sk_buff *skb, - struct ib_ah *address, u32 qpn) + struct ipoib_ah *address, u32 qpn) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_buf *tx_req; @@ -302,7 +336,7 @@ pci_unmap_addr_set(tx_req, mapping, addr); if (post_send(priv, priv->tx_head & (IPOIB_TX_RING_SIZE - 1), - address, qpn, addr, skb->len)) { + address->ah, qpn, addr, skb->len)) { ipoib_warn(priv, "post_send failed\n"); ++priv->stats.tx_errors; tx_req->skb = NULL; @@ -312,6 +346,7 @@ dev->trans_start = jiffies; + address->last_send = priv->tx_head; ++priv->tx_head; spin_lock_irqsave(&priv->lock, flags); @@ -323,6 +358,38 @@ } } +void __ipoib_reap_ah(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ipoib_ah *ah, *tah; + LIST_HEAD(remove_list); + + spin_lock_irq(&priv->lock); + list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) + if (ah->last_send <= priv->tx_tail) { + list_del(&ah->list); + list_add_tail(&ah->list, &remove_list); + } + spin_unlock_irq(&priv->lock); + + list_for_each_entry_safe(ah, tah, &remove_list, list) { + ipoib_dbg(priv, "Reaping ah %p\n", ah->ah); + ib_destroy_ah(ah->ah); + kfree(ah); + } +} + +void ipoib_reap_ah(void *dev_ptr) +{ + struct net_device *dev = dev_ptr; + struct ipoib_dev_priv *priv = netdev_priv(dev); + + __ipoib_reap_ah(dev); + + if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) + queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); +} + int ipoib_ib_dev_open(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -334,12 +401,15 @@ return -1; } - ret = _ipoib_ib_post_receives(dev); + ret = ipoib_ib_post_receives(dev); if (ret) { - ipoib_warn(priv, "_ipoib_ib_post_receives returned %d\n", ret); + ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); return -1; } + clear_bit(IPOIB_STOP_REAPER, &priv->flags); + queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); + return 0; } @@ -395,8 +465,10 @@ int i; /* Kill the existing QP and allocate a new one */ - if (priv->qp != NULL) - ipoib_qp_destroy(dev); + if (priv->qp != NULL) { + ib_destroy_qp(priv->qp); + priv->qp = NULL; + } for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { if (priv->rx_ring[i].skb) { @@ -463,14 +535,24 @@ /*..ipoib_ib_dev_cleanup -- clean up IB resources for iface */ void ipoib_ib_dev_cleanup(struct net_device *dev) { - ipoib_dbg(netdev_priv(dev), "cleaning up ib_dev\n"); + struct ipoib_dev_priv *priv = netdev_priv(dev); + ipoib_dbg(priv, "cleaning up ib_dev\n"); + ipoib_mcast_stop_thread(dev); /* Delete the broadcast address and the local address */ ipoib_mcast_dev_down(dev); ipoib_transport_dev_cleanup(dev); + + set_bit(IPOIB_STOP_REAPER, &priv->flags); + cancel_delayed_work(&priv->ah_reap_task); + flush_workqueue(ipoib_workqueue); + while (!list_empty(&priv->dead_ahs)) { + __ipoib_reap_ah(dev); + yield(); + } } /* _______________________________________________ openib-general mailing list [EMAIL PROTECTED] http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general