Re: [PATCH 8/9] IB/ipoib: deserialize multicast joins

2015-03-03 Thread Erez Shitrit

On 3/2/2015 5:29 PM, Doug Ledford wrote:

On Sun, 2015-03-01 at 15:58 +0200, Erez Shitrit wrote:

On 2/22/2015 2:27 AM, Doug Ledford wrote:

Allow the ipoib layer to attempt to join all outstanding multicast
groups at once.  The ib_sa layer will serialize multiple attempts to
join the same group, but will process attempts to join different groups
in parallel.  Take advantage of that.

In order to make this happen, change the mcast_join_thread to loop
through all needed joins, sending a join request for each one that we
still need to join.  There are a few special cases we handle though:

1) Don't attempt to join anything but the broadcast group until the join
of the broadcast group has succeeded.
2) No longer restart the join task at the end of completion handling.
If we completed successfully, we are done.  The join task now needs kicked
either by mcast_send or mcast_restart_task or mcast_start_thread, but
should not need started anytime else except when scheduling a backoff
attempt to rejoin.
3) No longer use separate join/completion routines for regular and
sendonly joins, pass them all through the same routine and just do the
right thing based on the SENDONLY join flag.
4) Only try to join a SENDONLY join twice, then drop the packets and
quit trying.  We leave the mcast group in the list so that if we get a
new packet, all that we have to do is queue up the packet and restart
the join task and it will automatically try to join twice and then
either send or flush the queue again.

Signed-off-by: Doug Ledford dledf...@redhat.com
---
   drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 250 
-
   1 file changed, 82 insertions(+), 168 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 
b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 277e7ac7c4d..c670d9c2cda 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -307,111 +307,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast 
*mcast,
return 0;
   }
   
-static int

-ipoib_mcast_sendonly_join_complete(int status,
-  struct ib_sa_multicast *multicast)
-{
-   struct ipoib_mcast *mcast = multicast-context;
-   struct net_device *dev = mcast-dev;
-   struct ipoib_dev_priv *priv = netdev_priv(dev);
-
-   /*
-* We have to take the mutex to force mcast_sendonly_join to
-* return from ib_sa_multicast_join and set mcast-mc to a
-* valid value.  Otherwise we were racing with ourselves in
-* that we might fail here, but get a valid return from
-* ib_sa_multicast_join after we had cleared mcast-mc here,
-* resulting in mis-matched joins and leaves and a deadlock
-*/
-   mutex_lock(mcast_mutex);
-
-   /* We trap for port events ourselves. */
-   if (status == -ENETRESET) {
-   status = 0;
-   goto out;
-   }
-
-   if (!status)
-   status = ipoib_mcast_join_finish(mcast, multicast-rec);
-
-   if (status) {
-   if (mcast-logcount++  20)
-   ipoib_dbg_mcast(netdev_priv(dev), sendonly multicast 
-   join failed for %pI6, status %d\n,
-   mcast-mcmember.mgid.raw, status);
-
-   /* Flush out any queued packets */
-   netif_tx_lock_bh(dev);
-   while (!skb_queue_empty(mcast-pkt_queue)) {
-   ++dev-stats.tx_dropped;
-   dev_kfree_skb_any(skb_dequeue(mcast-pkt_queue));
-   }
-   netif_tx_unlock_bh(dev);
-   __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
-   } else {
-   mcast-backoff = 1;
-   mcast-delay_until = jiffies;
-   __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
-   }
-out:
-   clear_bit(IPOIB_MCAST_FLAG_BUSY, mcast-flags);
-   if (status)
-   mcast-mc = NULL;
-   complete(mcast-done);
-   mutex_unlock(mcast_mutex);
-   return status;
-}
-
-static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
-{
-   struct net_device *dev = mcast-dev;
-   struct ipoib_dev_priv *priv = netdev_priv(dev);
-   struct ib_sa_mcmember_rec rec = {
-#if 0  /* Some SMs don't support send-only yet */
-   .join_state = 4
-#else
-   .join_state = 1
-#endif
-   };
-   int ret = 0;
-
-   if (!test_bit(IPOIB_FLAG_OPER_UP, priv-flags)) {
-   ipoib_dbg_mcast(priv, device shutting down, no sendonly 
-   multicast joins\n);
-   clear_bit(IPOIB_MCAST_FLAG_BUSY, mcast-flags);
-   complete(mcast-done);
-   return -ENODEV;
-   }
-
-   rec.mgid = mcast-mcmember.mgid;
-   rec.port_gid = priv-local_gid;
-   rec.pkey = cpu_to_be16(priv-pkey);
-
- 

Re: [PATCH 8/9] IB/ipoib: deserialize multicast joins

2015-03-02 Thread Doug Ledford
On Sun, 2015-03-01 at 15:58 +0200, Erez Shitrit wrote:
 On 2/22/2015 2:27 AM, Doug Ledford wrote:
  Allow the ipoib layer to attempt to join all outstanding multicast
  groups at once.  The ib_sa layer will serialize multiple attempts to
  join the same group, but will process attempts to join different groups
  in parallel.  Take advantage of that.
 
  In order to make this happen, change the mcast_join_thread to loop
  through all needed joins, sending a join request for each one that we
  still need to join.  There are a few special cases we handle though:
 
  1) Don't attempt to join anything but the broadcast group until the join
  of the broadcast group has succeeded.
  2) No longer restart the join task at the end of completion handling.
  If we completed successfully, we are done.  The join task now needs kicked
  either by mcast_send or mcast_restart_task or mcast_start_thread, but
  should not need started anytime else except when scheduling a backoff
  attempt to rejoin.
  3) No longer use separate join/completion routines for regular and
  sendonly joins, pass them all through the same routine and just do the
  right thing based on the SENDONLY join flag.
  4) Only try to join a SENDONLY join twice, then drop the packets and
  quit trying.  We leave the mcast group in the list so that if we get a
  new packet, all that we have to do is queue up the packet and restart
  the join task and it will automatically try to join twice and then
  either send or flush the queue again.
 
  Signed-off-by: Doug Ledford dledf...@redhat.com
  ---
drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 250 
  -
1 file changed, 82 insertions(+), 168 deletions(-)
 
  diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 
  b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  index 277e7ac7c4d..c670d9c2cda 100644
  --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  @@ -307,111 +307,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast 
  *mcast,
  return 0;
}

  -static int
  -ipoib_mcast_sendonly_join_complete(int status,
  -  struct ib_sa_multicast *multicast)
  -{
  -   struct ipoib_mcast *mcast = multicast-context;
  -   struct net_device *dev = mcast-dev;
  -   struct ipoib_dev_priv *priv = netdev_priv(dev);
  -
  -   /*
  -* We have to take the mutex to force mcast_sendonly_join to
  -* return from ib_sa_multicast_join and set mcast-mc to a
  -* valid value.  Otherwise we were racing with ourselves in
  -* that we might fail here, but get a valid return from
  -* ib_sa_multicast_join after we had cleared mcast-mc here,
  -* resulting in mis-matched joins and leaves and a deadlock
  -*/
  -   mutex_lock(mcast_mutex);
  -
  -   /* We trap for port events ourselves. */
  -   if (status == -ENETRESET) {
  -   status = 0;
  -   goto out;
  -   }
  -
  -   if (!status)
  -   status = ipoib_mcast_join_finish(mcast, multicast-rec);
  -
  -   if (status) {
  -   if (mcast-logcount++  20)
  -   ipoib_dbg_mcast(netdev_priv(dev), sendonly multicast 
  -   join failed for %pI6, status %d\n,
  -   mcast-mcmember.mgid.raw, status);
  -
  -   /* Flush out any queued packets */
  -   netif_tx_lock_bh(dev);
  -   while (!skb_queue_empty(mcast-pkt_queue)) {
  -   ++dev-stats.tx_dropped;
  -   dev_kfree_skb_any(skb_dequeue(mcast-pkt_queue));
  -   }
  -   netif_tx_unlock_bh(dev);
  -   __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
  -   } else {
  -   mcast-backoff = 1;
  -   mcast-delay_until = jiffies;
  -   __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
  -   }
  -out:
  -   clear_bit(IPOIB_MCAST_FLAG_BUSY, mcast-flags);
  -   if (status)
  -   mcast-mc = NULL;
  -   complete(mcast-done);
  -   mutex_unlock(mcast_mutex);
  -   return status;
  -}
  -
  -static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
  -{
  -   struct net_device *dev = mcast-dev;
  -   struct ipoib_dev_priv *priv = netdev_priv(dev);
  -   struct ib_sa_mcmember_rec rec = {
  -#if 0  /* Some SMs don't support send-only yet 
  */
  -   .join_state = 4
  -#else
  -   .join_state = 1
  -#endif
  -   };
  -   int ret = 0;
  -
  -   if (!test_bit(IPOIB_FLAG_OPER_UP, priv-flags)) {
  -   ipoib_dbg_mcast(priv, device shutting down, no sendonly 
  -   multicast joins\n);
  -   clear_bit(IPOIB_MCAST_FLAG_BUSY, mcast-flags);
  -   complete(mcast-done);
  -   return -ENODEV;
  -   }
  -
  -   rec.mgid = mcast-mcmember.mgid;
  -   rec.port_gid = priv-local_gid;
  -   rec.pkey = cpu_to_be16(priv-pkey);
  -
  -   mutex_lock(mcast_mutex);
  -   mcast-mc = 

Re: [PATCH 8/9] IB/ipoib: deserialize multicast joins

2015-03-01 Thread Erez Shitrit

On 2/22/2015 2:27 AM, Doug Ledford wrote:

Allow the ipoib layer to attempt to join all outstanding multicast
groups at once.  The ib_sa layer will serialize multiple attempts to
join the same group, but will process attempts to join different groups
in parallel.  Take advantage of that.

In order to make this happen, change the mcast_join_thread to loop
through all needed joins, sending a join request for each one that we
still need to join.  There are a few special cases we handle though:

1) Don't attempt to join anything but the broadcast group until the join
of the broadcast group has succeeded.
2) No longer restart the join task at the end of completion handling.
If we completed successfully, we are done.  The join task now needs kicked
either by mcast_send or mcast_restart_task or mcast_start_thread, but
should not need started anytime else except when scheduling a backoff
attempt to rejoin.
3) No longer use separate join/completion routines for regular and
sendonly joins, pass them all through the same routine and just do the
right thing based on the SENDONLY join flag.
4) Only try to join a SENDONLY join twice, then drop the packets and
quit trying.  We leave the mcast group in the list so that if we get a
new packet, all that we have to do is queue up the packet and restart
the join task and it will automatically try to join twice and then
either send or flush the queue again.

Signed-off-by: Doug Ledford dledf...@redhat.com
---
  drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 250 -
  1 file changed, 82 insertions(+), 168 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 
b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 277e7ac7c4d..c670d9c2cda 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -307,111 +307,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast 
*mcast,
return 0;
  }
  
-static int

-ipoib_mcast_sendonly_join_complete(int status,
-  struct ib_sa_multicast *multicast)
-{
-   struct ipoib_mcast *mcast = multicast-context;
-   struct net_device *dev = mcast-dev;
-   struct ipoib_dev_priv *priv = netdev_priv(dev);
-
-   /*
-* We have to take the mutex to force mcast_sendonly_join to
-* return from ib_sa_multicast_join and set mcast-mc to a
-* valid value.  Otherwise we were racing with ourselves in
-* that we might fail here, but get a valid return from
-* ib_sa_multicast_join after we had cleared mcast-mc here,
-* resulting in mis-matched joins and leaves and a deadlock
-*/
-   mutex_lock(mcast_mutex);
-
-   /* We trap for port events ourselves. */
-   if (status == -ENETRESET) {
-   status = 0;
-   goto out;
-   }
-
-   if (!status)
-   status = ipoib_mcast_join_finish(mcast, multicast-rec);
-
-   if (status) {
-   if (mcast-logcount++  20)
-   ipoib_dbg_mcast(netdev_priv(dev), sendonly multicast 
-   join failed for %pI6, status %d\n,
-   mcast-mcmember.mgid.raw, status);
-
-   /* Flush out any queued packets */
-   netif_tx_lock_bh(dev);
-   while (!skb_queue_empty(mcast-pkt_queue)) {
-   ++dev-stats.tx_dropped;
-   dev_kfree_skb_any(skb_dequeue(mcast-pkt_queue));
-   }
-   netif_tx_unlock_bh(dev);
-   __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
-   } else {
-   mcast-backoff = 1;
-   mcast-delay_until = jiffies;
-   __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
-   }
-out:
-   clear_bit(IPOIB_MCAST_FLAG_BUSY, mcast-flags);
-   if (status)
-   mcast-mc = NULL;
-   complete(mcast-done);
-   mutex_unlock(mcast_mutex);
-   return status;
-}
-
-static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
-{
-   struct net_device *dev = mcast-dev;
-   struct ipoib_dev_priv *priv = netdev_priv(dev);
-   struct ib_sa_mcmember_rec rec = {
-#if 0  /* Some SMs don't support send-only yet */
-   .join_state = 4
-#else
-   .join_state = 1
-#endif
-   };
-   int ret = 0;
-
-   if (!test_bit(IPOIB_FLAG_OPER_UP, priv-flags)) {
-   ipoib_dbg_mcast(priv, device shutting down, no sendonly 
-   multicast joins\n);
-   clear_bit(IPOIB_MCAST_FLAG_BUSY, mcast-flags);
-   complete(mcast-done);
-   return -ENODEV;
-   }
-
-   rec.mgid = mcast-mcmember.mgid;
-   rec.port_gid = priv-local_gid;
-   rec.pkey = cpu_to_be16(priv-pkey);
-
-   mutex_lock(mcast_mutex);
-   mcast-mc = ib_sa_join_multicast(ipoib_sa_client, priv-ca,
-