On 2/19/07, Moni Levy <[EMAIL PROTECTED]> wrote: > This issue was found during partitioning & SM fail over testing. The fix was > tested for 24 > hours with pkey reshuffling every few seconds. The patch applies to Roland's > master > branch.
I found an issue with that patch, I'll post an updated one soon. -- Moni > > SM reconfiguration or failover possibly causes a shuffling of the values in > the port pkey > table. The current implementation only queries for the index of the pkey > once, when it > creates the device QP and after that moves it into working state, and hence > does not > address this scenario. Fix this by using the PKEY_CHANGE event as a trigger to > reconfigure the device QP. > > Signed-off-by: Moni Levy <[EMAIL PROTECTED]> > --- > ipoib.h | 2 ++ > ipoib_ib.c | 22 ++++++++++++++++++++-- > ipoib_main.c | 1 + > ipoib_verbs.c | 4 +++- > 4 files changed, 26 insertions(+), 3 deletions(-) > > diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h > b/drivers/infiniband/ulp/ipoib/ipoib.h > index 07deee8..ed854e8 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib.h > +++ b/drivers/infiniband/ulp/ipoib/ipoib.h > @@ -139,6 +139,7 @@ struct ipoib_dev_priv { > struct delayed_work pkey_task; > struct delayed_work mcast_task; > struct work_struct flush_task; > + struct work_struct flush_restart_qp_task; > struct work_struct restart_task; > struct delayed_work ah_reap_task; > > @@ -261,6 +262,7 @@ struct ipoib_dev_priv *ipoib_intf_alloc( > > int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int > port); > void ipoib_ib_dev_flush(struct work_struct *work); > +void ipoib_ib_dev_flush_restart_qp(struct work_struct *work); > void ipoib_ib_dev_cleanup(struct net_device *dev); > > int ipoib_ib_dev_open(struct net_device *dev); > diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c > b/drivers/infiniband/ulp/ipoib/ipoib_ib.c > index 59d9594..5e2ada9 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c > +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c > @@ -611,7 +611,7 @@ int ipoib_ib_dev_init(struct net_device > return 0; > } > > -void ipoib_ib_dev_flush(struct work_struct *work) > +static void __ipoib_ib_dev_flush(struct work_struct *work, int restart_qp) > { > struct ipoib_dev_priv *cpriv, *priv = > container_of(work, struct ipoib_dev_priv, flush_task); > @@ -630,6 +630,12 @@ void ipoib_ib_dev_flush(struct work_stru > ipoib_dbg(priv, "flushing\n"); > > ipoib_ib_dev_down(dev, 0); > + > + if (restart_qp) { > + ipoib_dbg(priv, "restarting the device QP\n"); > + ipoib_ib_dev_stop(dev); > + ipoib_ib_dev_open(dev); > + } > > /* > * The device could have been brought down between the start and when > @@ -644,11 +650,23 @@ void ipoib_ib_dev_flush(struct work_stru > > /* Flush any child interfaces too */ > list_for_each_entry(cpriv, &priv->child_intfs, list) > - ipoib_ib_dev_flush(&cpriv->flush_task); > + __ipoib_ib_dev_flush(&cpriv->flush_task, restart_qp); > > mutex_unlock(&priv->vlan_mutex); > } > > +void ipoib_ib_dev_flush(struct work_struct *work) > +{ > + /* We only restart the QP in case of PKEY change event */ > + __ipoib_ib_dev_flush(work, 0); > +} > + > +void ipoib_ib_dev_flush_restart_qp(struct work_struct *work) > +{ > + /* We only restart the QP in case of PKEY change event */ > + __ipoib_ib_dev_flush(work, 1); > +} > + > void ipoib_ib_dev_cleanup(struct net_device *dev) > { > struct ipoib_dev_priv *priv = netdev_priv(dev); > diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c > b/drivers/infiniband/ulp/ipoib/ipoib_main.c > index 705eb1d..da46b79 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c > +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c > @@ -942,6 +942,7 @@ static void ipoib_setup(struct net_devic > INIT_DELAYED_WORK(&priv->pkey_task, ipoib_pkey_poll); > INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); > INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush); > + INIT_WORK(&priv->flush_restart_qp_task, > ipoib_ib_dev_flush_restart_qp); > INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); > INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); > } > diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c > b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c > index 7b717c6..c249915 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c > +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c > @@ -252,12 +252,14 @@ void ipoib_event(struct ib_event_handler > container_of(handler, struct ipoib_dev_priv, event_handler); > > if (record->event == IB_EVENT_PORT_ERR || > - record->event == IB_EVENT_PKEY_CHANGE || > record->event == IB_EVENT_PORT_ACTIVE || > record->event == IB_EVENT_LID_CHANGE || > record->event == IB_EVENT_SM_CHANGE || > record->event == IB_EVENT_CLIENT_REREGISTER) { > ipoib_dbg(priv, "Port state change event\n"); > queue_work(ipoib_workqueue, &priv->flush_task); > + } else if (record->event == IB_EVENT_PKEY_CHANGE) { > + ipoib_dbg(priv, "PKEY change event\n"); > + queue_work(ipoib_workqueue, &priv->flush_restart_qp_task); > } > } > > _______________________________________________ > openib-general mailing list > openib-general@openib.org > http://openib.org/mailman/listinfo/openib-general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > _______________________________________________ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general