> -----Original Message-----
> From: [email protected] <[email protected]>
> Sent: Friday, November 14, 2025 9:29 PM
> To: KY Srinivasan <[email protected]>; Haiyang Zhang
> <[email protected]>; Wei Liu <[email protected]>; Dexuan Cui
> <[email protected]>; David S. Miller <[email protected]>; Eric Dumazet
> <[email protected]>; Jakub Kicinski <[email protected]>; Paolo Abeni
> <[email protected]>; Shradha Gupta <[email protected]>;
> Simon Horman <[email protected]>; Konstantin Taranov
> <[email protected]>; Souradeep Chakrabarti
> <[email protected]>; Erick Archer
> <[email protected]>; [email protected];
> [email protected]; [email protected]; linux-
> [email protected]
> Cc: Long Li <[email protected]>
> Subject: [patch net-next] net: mana: Handle hardware reset events when
> probing the device
> 
> From: Long Li <[email protected]>
> 
> When MANA is being probed, it's possible that hardware is in recovery
> mode and the device may get GDMA_EQE_HWC_RESET_REQUEST over HWC in the
> middle of the probe. Detect such condition and go through the recovery
> service procedure.
> 
> Fixes: fbe346ce9d62 ("net: mana: Handle Reset Request from MANA NIC")
> Signed-off-by: Long Li <[email protected]>
> ---
>  .../net/ethernet/microsoft/mana/gdma_main.c   | 131 +++++++++++++++---
>  include/net/mana/gdma.h                       |   9 +-
>  2 files changed, 122 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> index effe0a2f207a..1d9c2beb22b2 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> @@ -15,6 +15,12 @@
> 
>  struct dentry *mana_debugfs_root;
> 
> +static struct mana_serv_delayed_work {
> +     struct delayed_work work;
> +     struct pci_dev *pdev;
> +     enum gdma_eqe_type type;
> +} mns_delayed_wk;
> +
>  static u32 mana_gd_r32(struct gdma_context *g, u64 offset)
>  {
>       return readl(g->bar0_va + offset);
> @@ -387,6 +393,25 @@ EXPORT_SYMBOL_NS(mana_gd_ring_cq, "NET_MANA");
> 
>  #define MANA_SERVICE_PERIOD 10
> 
> +static void mana_serv_rescan(struct pci_dev *pdev)
> +{
> +     struct pci_bus *parent;
> +
> +     pci_lock_rescan_remove();
> +
> +     parent = pdev->bus;
> +     if (!parent) {
> +             dev_err(&pdev->dev, "MANA service: no parent bus\n");
> +             goto out;
> +     }
> +
> +     pci_stop_and_remove_bus_device(pdev);
> +     pci_rescan_bus(parent);
> +
> +out:
> +     pci_unlock_rescan_remove();
> +}
> +
>  static void mana_serv_fpga(struct pci_dev *pdev)
>  {
>       struct pci_bus *bus, *parent;
> @@ -419,9 +444,12 @@ static void mana_serv_reset(struct pci_dev *pdev)
>  {
>       struct gdma_context *gc = pci_get_drvdata(pdev);
>       struct hw_channel_context *hwc;
> +     int ret;
> 
>       if (!gc) {
> -             dev_err(&pdev->dev, "MANA service: no GC\n");
> +             /* Perform PCI rescan on device if GC is not set up */
> +             dev_err(&pdev->dev, "MANA service: GC not setup,
> rescanning\n");
> +             mana_serv_rescan(pdev);
>               return;
>       }
> 
> @@ -440,9 +468,18 @@ static void mana_serv_reset(struct pci_dev *pdev)
> 
>       msleep(MANA_SERVICE_PERIOD * 1000);
> 
> -     mana_gd_resume(pdev);
> +     ret = mana_gd_resume(pdev);
> +     if (ret == -ETIMEDOUT || ret == -EPROTO) {
> +             /* Perform PCI rescan on device if we failed on HWC */
> +             dev_err(&pdev->dev, "MANA service: resume failed,
> rescanning\n");
> +             mana_serv_rescan(pdev);
> +             goto out;
> +     }
> 
> -     dev_info(&pdev->dev, "MANA reset cycle completed\n");
> +     if (ret)
> +             dev_info(&pdev->dev, "MANA reset cycle failed err %d\n", ret);
> +     else
> +             dev_info(&pdev->dev, "MANA reset cycle completed\n");
> 
>  out:
>       gc->in_service = false;
> @@ -454,18 +491,9 @@ struct mana_serv_work {
>       enum gdma_eqe_type type;
>  };
> 
> -static void mana_serv_func(struct work_struct *w)
> +static void mana_do_service(enum gdma_eqe_type type, struct pci_dev
> *pdev)
>  {
> -     struct mana_serv_work *mns_wk;
> -     struct pci_dev *pdev;
> -
> -     mns_wk = container_of(w, struct mana_serv_work, serv_work);
> -     pdev = mns_wk->pdev;
> -
> -     if (!pdev)
> -             goto out;
> -
> -     switch (mns_wk->type) {
> +     switch (type) {
>       case GDMA_EQE_HWC_FPGA_RECONFIG:
>               mana_serv_fpga(pdev);
>               break;
> @@ -475,12 +503,36 @@ static void mana_serv_func(struct work_struct *w)
>               break;
> 
>       default:
> -             dev_err(&pdev->dev, "MANA service: unknown type %d\n",
> -                     mns_wk->type);
> +             dev_err(&pdev->dev, "MANA service: unknown type %d\n", type);
>               break;
>       }
> +}
> +
> +static void mana_serv_delayed_func(struct work_struct *w)
> +{
> +     struct mana_serv_delayed_work *dwork;
> +     struct pci_dev *pdev;
> +
> +     dwork = container_of(w, struct mana_serv_delayed_work, work.work);
> +     pdev = dwork->pdev;
> +
> +     if (pdev)
> +             mana_do_service(dwork->type, pdev);
> +
> +     pci_dev_put(pdev);
> +}
> +
> +static void mana_serv_func(struct work_struct *w)
> +{
> +     struct mana_serv_work *mns_wk;
> +     struct pci_dev *pdev;
> +
> +     mns_wk = container_of(w, struct mana_serv_work, serv_work);
> +     pdev = mns_wk->pdev;
> +
> +     if (pdev)
> +             mana_do_service(mns_wk->type, pdev);
> 
> -out:
>       pci_dev_put(pdev);
>       kfree(mns_wk);
>       module_put(THIS_MODULE);
> @@ -541,6 +593,17 @@ static void mana_gd_process_eqe(struct gdma_queue
> *eq)
>       case GDMA_EQE_HWC_RESET_REQUEST:
>               dev_info(gc->dev, "Recv MANA service type:%d\n", type);
> 
> +             if (atomic_inc_return(&gc->in_probe) == 1) {

Since we don't care about how many times it entered probe/service,
test_and_set_bit() should be sufficient here.

> +                     /*
> +                      * Device is in probe and we received an hardware reset
> +                      * event, probe() will detect that "in_probe" has
> +                      * changed and perform service procedure.
> +                      */
> +                     dev_info(gc->dev,
> +                              "Service is to be processed in probe\n");
> +                     break;
> +             }
> +
>               if (gc->in_service) {
>                       dev_info(gc->dev, "Already in service\n");
>                       break;
> @@ -1930,6 +1993,8 @@ static int mana_gd_probe(struct pci_dev *pdev, const
> struct pci_device_id *ent)
>               gc->mana_pci_debugfs = debugfs_create_dir(pci_slot_name(pdev-
> >slot),
>                                                         mana_debugfs_root);
> 
> +     atomic_set(&gc->in_probe, 0);
> +
>       err = mana_gd_setup(pdev);
>       if (err)
>               goto unmap_bar;
> @@ -1942,8 +2007,19 @@ static int mana_gd_probe(struct pci_dev *pdev,
> const struct pci_device_id *ent)
>       if (err)
>               goto cleanup_mana;
> 
> +     /*
> +      * If a hardware reset event has occurred over HWC during probe,
> +      * rollback and perform hardware reset procedure.
> +      */
> +     if (atomic_inc_return(&gc->in_probe) > 1) {
> +             err = -EPROTO;
> +             goto cleanup_mana_rdma;
> +     }
> +
>       return 0;
> 
> +cleanup_mana_rdma:
> +     mana_rdma_remove(&gc->mana_ib);
>  cleanup_mana:
>       mana_remove(&gc->mana, false);
>  cleanup_gd:
> @@ -1967,6 +2043,25 @@ static int mana_gd_probe(struct pci_dev *pdev,
> const struct pci_device_id *ent)
>  disable_dev:
>       pci_disable_device(pdev);
>       dev_err(&pdev->dev, "gdma probe failed: err = %d\n", err);
> +
> +     /*
> +      * Hardware could be in recovery mode and the HWC returns TIMEDOUT
> or
> +      * EPROTO from mana_gd_setup(), mana_probe() or mana_rdma_probe(),
> or
> +      * we received a hardware reset event over HWC interrupt. In this
> case,
> +      * perform the device recovery procedure after MANA_SERVICE_PERIOD
> +      * seconds.
> +      */
> +     if (err == -ETIMEDOUT || err == -EPROTO) {
> +             dev_info(&pdev->dev, "Start MANA recovery mode\n");
> +
> +             mns_delayed_wk.pdev = pci_dev_get(pdev);
> +             mns_delayed_wk.type = GDMA_EQE_HWC_RESET_REQUEST;
> +
> +             INIT_DELAYED_WORK(&mns_delayed_wk.work,
> mana_serv_delayed_func);

To avoid INIT_DELAYED_WORK potentially multiple times this should be in 
the mana_driver_init()

> +             schedule_delayed_work(&mns_delayed_wk.work,
> +                                   secs_to_jiffies(MANA_SERVICE_PERIOD));
> +     }
> +
>       return err;
>  }
> 
> @@ -2084,6 +2179,8 @@ static int __init mana_driver_init(void)
> 
>  static void __exit mana_driver_exit(void)
>  {
> +     cancel_delayed_work_sync(&mns_delayed_wk.work);

I think we should call disable_delayed_work_sync() to prevent the work
scheduled again after this line.

> +
>       pci_unregister_driver(&mana_driver);
> 
>       debugfs_remove(mana_debugfs_root);
> diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
> index 637f42485dba..1bb4c6ada2b6 100644
> --- a/include/net/mana/gdma.h
> +++ b/include/net/mana/gdma.h
> @@ -430,6 +430,9 @@ struct gdma_context {
>       u64 pf_cap_flags1;
> 
>       struct workqueue_struct *service_wq;
> +
> +     /* Count how many times we have finished probe or HWC events */
> +     atomic_t                in_probe;
>  };
> 
>  static inline bool mana_gd_is_mana(struct gdma_dev *gd)
> @@ -592,6 +595,9 @@ enum {
>  #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
>  #define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6)
> 
> +/* Driver can handle hardware reset events during probe */
> +#define GDMA_DRV_CAP_FLAG_1_RECOVER_PROBE BIT(22)
> +
>  #define GDMA_DRV_CAP_FLAGS1 \
>       (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
>        GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
> @@ -601,7 +607,8 @@ enum {
>        GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
>        GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
>        GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
> -      GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE)
> +      GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \
> +      GDMA_DRV_CAP_FLAG_1_RECOVER_PROBE)
> 
>  #define GDMA_DRV_CAP_FLAGS2 0
> 
> --
> 2.43.0


Reply via email to