> Subject: RE: [patch net-next] net: mana: Handle hardware reset events when
> probing the device
> 
> 
> 
> > -----Original Message-----
> > From: [email protected] <[email protected]>
> > Sent: Friday, November 14, 2025 9:29 PM
> > To: KY Srinivasan <[email protected]>; Haiyang Zhang
> > <[email protected]>; Wei Liu <[email protected]>; Dexuan Cui
> > <[email protected]>; David S. Miller <[email protected]>; Eric
> > Dumazet <[email protected]>; Jakub Kicinski <[email protected]>;
> Paolo
> > Abeni <[email protected]>; Shradha Gupta
> > <[email protected]>;
> > Simon Horman <[email protected]>; Konstantin Taranov
> > <[email protected]>; Souradeep Chakrabarti
> > <[email protected]>; Erick Archer
> > <[email protected]>; [email protected];
> > [email protected]; [email protected]; linux-
> > [email protected]
> > Cc: Long Li <[email protected]>
> > Subject: [patch net-next] net: mana: Handle hardware reset events when
> > probing the device
> >
> > From: Long Li <[email protected]>
> >
> > When MANA is being probed, it's possible that hardware is in recovery
> > mode and the device may get GDMA_EQE_HWC_RESET_REQUEST over HWC
> in the
> > middle of the probe. Detect such condition and go through the recovery
> > service procedure.
> >
> > Fixes: fbe346ce9d62 ("net: mana: Handle Reset Request from MANA NIC")
> > Signed-off-by: Long Li <[email protected]>
> > ---
> >  .../net/ethernet/microsoft/mana/gdma_main.c   | 131 +++++++++++++++-
> --
> >  include/net/mana/gdma.h                       |   9 +-
> >  2 files changed, 122 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > index effe0a2f207a..1d9c2beb22b2 100644
> > --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > @@ -15,6 +15,12 @@
> >
> >  struct dentry *mana_debugfs_root;
> >
> > +static struct mana_serv_delayed_work {
> > +   struct delayed_work work;
> > +   struct pci_dev *pdev;
> > +   enum gdma_eqe_type type;
> > +} mns_delayed_wk;
> > +
> >  static u32 mana_gd_r32(struct gdma_context *g, u64 offset)  {
> >     return readl(g->bar0_va + offset);
> > @@ -387,6 +393,25 @@ EXPORT_SYMBOL_NS(mana_gd_ring_cq,
> "NET_MANA");
> >
> >  #define MANA_SERVICE_PERIOD 10
> >
> > +static void mana_serv_rescan(struct pci_dev *pdev) {
> > +   struct pci_bus *parent;
> > +
> > +   pci_lock_rescan_remove();
> > +
> > +   parent = pdev->bus;
> > +   if (!parent) {
> > +           dev_err(&pdev->dev, "MANA service: no parent bus\n");
> > +           goto out;
> > +   }
> > +
> > +   pci_stop_and_remove_bus_device(pdev);
> > +   pci_rescan_bus(parent);
> > +
> > +out:
> > +   pci_unlock_rescan_remove();
> > +}
> > +
> >  static void mana_serv_fpga(struct pci_dev *pdev)  {
> >     struct pci_bus *bus, *parent;
> > @@ -419,9 +444,12 @@ static void mana_serv_reset(struct pci_dev *pdev)
> > {
> >     struct gdma_context *gc = pci_get_drvdata(pdev);
> >     struct hw_channel_context *hwc;
> > +   int ret;
> >
> >     if (!gc) {
> > -           dev_err(&pdev->dev, "MANA service: no GC\n");
> > +           /* Perform PCI rescan on device if GC is not set up */
> > +           dev_err(&pdev->dev, "MANA service: GC not setup,
> > rescanning\n");
> > +           mana_serv_rescan(pdev);
> >             return;
> >     }
> >
> > @@ -440,9 +468,18 @@ static void mana_serv_reset(struct pci_dev *pdev)
> >
> >     msleep(MANA_SERVICE_PERIOD * 1000);
> >
> > -   mana_gd_resume(pdev);
> > +   ret = mana_gd_resume(pdev);
> > +   if (ret == -ETIMEDOUT || ret == -EPROTO) {
> > +           /* Perform PCI rescan on device if we failed on HWC */
> > +           dev_err(&pdev->dev, "MANA service: resume failed,
> > rescanning\n");
> > +           mana_serv_rescan(pdev);
> > +           goto out;
> > +   }
> >
> > -   dev_info(&pdev->dev, "MANA reset cycle completed\n");
> > +   if (ret)
> > +           dev_info(&pdev->dev, "MANA reset cycle failed err %d\n",
> ret);
> > +   else
> > +           dev_info(&pdev->dev, "MANA reset cycle completed\n");
> >
> >  out:
> >     gc->in_service = false;
> > @@ -454,18 +491,9 @@ struct mana_serv_work {
> >     enum gdma_eqe_type type;
> >  };
> >
> > -static void mana_serv_func(struct work_struct *w)
> > +static void mana_do_service(enum gdma_eqe_type type, struct pci_dev
> > *pdev)
> >  {
> > -   struct mana_serv_work *mns_wk;
> > -   struct pci_dev *pdev;
> > -
> > -   mns_wk = container_of(w, struct mana_serv_work, serv_work);
> > -   pdev = mns_wk->pdev;
> > -
> > -   if (!pdev)
> > -           goto out;
> > -
> > -   switch (mns_wk->type) {
> > +   switch (type) {
> >     case GDMA_EQE_HWC_FPGA_RECONFIG:
> >             mana_serv_fpga(pdev);
> >             break;
> > @@ -475,12 +503,36 @@ static void mana_serv_func(struct work_struct
> *w)
> >             break;
> >
> >     default:
> > -           dev_err(&pdev->dev, "MANA service: unknown type %d\n",
> > -                   mns_wk->type);
> > +           dev_err(&pdev->dev, "MANA service: unknown type %d\n",
> type);
> >             break;
> >     }
> > +}
> > +
> > +static void mana_serv_delayed_func(struct work_struct *w) {
> > +   struct mana_serv_delayed_work *dwork;
> > +   struct pci_dev *pdev;
> > +
> > +   dwork = container_of(w, struct mana_serv_delayed_work,
> work.work);
> > +   pdev = dwork->pdev;
> > +
> > +   if (pdev)
> > +           mana_do_service(dwork->type, pdev);
> > +
> > +   pci_dev_put(pdev);
> > +}
> > +
> > +static void mana_serv_func(struct work_struct *w) {
> > +   struct mana_serv_work *mns_wk;
> > +   struct pci_dev *pdev;
> > +
> > +   mns_wk = container_of(w, struct mana_serv_work, serv_work);
> > +   pdev = mns_wk->pdev;
> > +
> > +   if (pdev)
> > +           mana_do_service(mns_wk->type, pdev);
> >
> > -out:
> >     pci_dev_put(pdev);
> >     kfree(mns_wk);
> >     module_put(THIS_MODULE);
> > @@ -541,6 +593,17 @@ static void mana_gd_process_eqe(struct
> gdma_queue
> > *eq)
> >     case GDMA_EQE_HWC_RESET_REQUEST:
> >             dev_info(gc->dev, "Recv MANA service type:%d\n", type);
> >
> > +           if (atomic_inc_return(&gc->in_probe) == 1) {
> 
> Since we don't care about how many times it entered probe/service,
> test_and_set_bit() should be sufficient here.
> 
> > +                   /*
> > +                    * Device is in probe and we received an hardware
> reset
> > +                    * event, probe() will detect that "in_probe" has
> > +                    * changed and perform service procedure.
> > +                    */
> > +                   dev_info(gc->dev,
> > +                            "Service is to be processed in probe\n");
> > +                   break;
> > +           }
> > +
> >             if (gc->in_service) {
> >                     dev_info(gc->dev, "Already in service\n");
> >                     break;
> > @@ -1930,6 +1993,8 @@ static int mana_gd_probe(struct pci_dev *pdev,
> > const struct pci_device_id *ent)
> >             gc->mana_pci_debugfs =
> debugfs_create_dir(pci_slot_name(pdev-
> > >slot),
> >
> mana_debugfs_root);
> >
> > +   atomic_set(&gc->in_probe, 0);
> > +
> >     err = mana_gd_setup(pdev);
> >     if (err)
> >             goto unmap_bar;
> > @@ -1942,8 +2007,19 @@ static int mana_gd_probe(struct pci_dev *pdev,
> > const struct pci_device_id *ent)
> >     if (err)
> >             goto cleanup_mana;
> >
> > +   /*
> > +    * If a hardware reset event has occurred over HWC during probe,
> > +    * rollback and perform hardware reset procedure.
> > +    */
> > +   if (atomic_inc_return(&gc->in_probe) > 1) {
> > +           err = -EPROTO;
> > +           goto cleanup_mana_rdma;
> > +   }
> > +
> >     return 0;
> >
> > +cleanup_mana_rdma:
> > +   mana_rdma_remove(&gc->mana_ib);
> >  cleanup_mana:
> >     mana_remove(&gc->mana, false);
> >  cleanup_gd:
> > @@ -1967,6 +2043,25 @@ static int mana_gd_probe(struct pci_dev *pdev,
> > const struct pci_device_id *ent)
> >  disable_dev:
> >     pci_disable_device(pdev);
> >     dev_err(&pdev->dev, "gdma probe failed: err = %d\n", err);
> > +
> > +   /*
> > +    * Hardware could be in recovery mode and the HWC returns
> TIMEDOUT
> > or
> > +    * EPROTO from mana_gd_setup(), mana_probe() or
> mana_rdma_probe(),
> > or
> > +    * we received a hardware reset event over HWC interrupt. In this
> > case,
> > +    * perform the device recovery procedure after
> MANA_SERVICE_PERIOD
> > +    * seconds.
> > +    */
> > +   if (err == -ETIMEDOUT || err == -EPROTO) {
> > +           dev_info(&pdev->dev, "Start MANA recovery mode\n");
> > +
> > +           mns_delayed_wk.pdev = pci_dev_get(pdev);
> > +           mns_delayed_wk.type = GDMA_EQE_HWC_RESET_REQUEST;
> > +
> > +           INIT_DELAYED_WORK(&mns_delayed_wk.work,
> > mana_serv_delayed_func);
> 
> To avoid INIT_DELAYED_WORK potentially multiple times this should be in the
> mana_driver_init()
> 
> > +           schedule_delayed_work(&mns_delayed_wk.work,
> > +                                 secs_to_jiffies(MANA_SERVICE_PERIOD));
> > +   }
> > +
> >     return err;
> >  }
> >
> > @@ -2084,6 +2179,8 @@ static int __init mana_driver_init(void)
> >
> >  static void __exit mana_driver_exit(void)  {
> > +   cancel_delayed_work_sync(&mns_delayed_wk.work);
> 
> I think we should call disable_delayed_work_sync() to prevent the work
> scheduled again after this line.

Thank you. I will send v2 to address all the comments and support multiple PCI 
devices in BM mode.

Long

Reply via email to