Re: [PATCH v3 4/4] drm/xe/FLR: Support PCIe FLR

2024-04-25 Thread Michał Winiarski
On Thu, Apr 25, 2024 at 11:47:46AM +0530, Aravind Iddamsetty wrote:
> 
> On 25/04/24 04:59, Michał Winiarski wrote:
> > On Wed, Apr 24, 2024 at 10:42:59AM +0530, Aravind Iddamsetty wrote:
> >> On 24/04/24 05:19, Michał Winiarski wrote:
> >>> On Mon, Apr 22, 2024 at 12:27:56PM +0530, Aravind Iddamsetty wrote:
>  PCI subsystem provides callbacks to inform the driver about a request to
>  do function level reset by user, initiated by writing to sysfs entry
>  /sys/bus/pci/devices/.../reset. This will allow the driver to handle FLR
>  without the need to do unbind and rebind as the driver needs to
>  reinitialize the device afresh post FLR.
> 
>  v2:
>  1. separate out gt idle and pci save/restore to a separate patch (Lucas)
>  2. Fixed the warnings seen around xe_guc_submit_stop, xe_guc_puc_fini
> 
>  v3: declare xe_pci_err_handlers as static(Michal)
> 
>  Cc: Rodrigo Vivi 
>  Cc: Lucas De Marchi 
>  Cc: Michal Wajdeczko 
> 
>  Reviewed-by: Rodrigo Vivi 
>  Signed-off-by: Aravind Iddamsetty 
>  ---
>   drivers/gpu/drm/xe/Makefile  |  1 +
>   drivers/gpu/drm/xe/xe_device_types.h |  3 +
>   drivers/gpu/drm/xe/xe_guc_pc.c   |  4 ++
>   drivers/gpu/drm/xe/xe_pci.c  |  9 ++-
>   drivers/gpu/drm/xe/xe_pci.h  |  2 +
>   drivers/gpu/drm/xe/xe_pci_err.c  | 88 
>   drivers/gpu/drm/xe/xe_pci_err.h  | 13 
>   7 files changed, 119 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/gpu/drm/xe/xe_pci_err.c
>   create mode 100644 drivers/gpu/drm/xe/xe_pci_err.h
> 
>  diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>  index 8bc62bfbc679..693971a1fac0 100644
>  --- a/drivers/gpu/drm/xe/Makefile
>  +++ b/drivers/gpu/drm/xe/Makefile
>  @@ -117,6 +117,7 @@ xe-y += xe_bb.o \
>   xe_module.o \
>   xe_pat.o \
>   xe_pci.o \
>  +xe_pci_err.o \
>   xe_pcode.o \
>   xe_pm.o \
>   xe_preempt_fence.o \
>  diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
>  b/drivers/gpu/drm/xe/xe_device_types.h
>  index 0a66555229e9..8c749b378a92 100644
>  --- a/drivers/gpu/drm/xe/xe_device_types.h
>  +++ b/drivers/gpu/drm/xe/xe_device_types.h
>  @@ -465,6 +465,9 @@ struct xe_device {
>   /** @pci_state: PCI state of device */
>   struct pci_saved_state *pci_state;
>   
>  +/** @pci_device_is_reset: device went through PCIe FLR */
>  +bool pci_device_is_reset;
>  +
>   /* private: */
>   
>   #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
>  diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c 
>  b/drivers/gpu/drm/xe/xe_guc_pc.c
>  index 509649d0e65e..efba0fbe2f5c 100644
>  --- a/drivers/gpu/drm/xe/xe_guc_pc.c
>  +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
>  @@ -902,6 +902,10 @@ static void xe_guc_pc_fini(struct drm_device *drm, 
>  void *arg)
>   return;
>   }
>   
>  +/* We already have done this before going through a reset, so 
>  skip here */
>  +if (xe->pci_device_is_reset)
>  +return;
>  +
>   XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), 
>  XE_FORCEWAKE_ALL));
>   XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
>   XE_WARN_ON(xe_guc_pc_stop(pc));
>  diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
>  index a62300990e19..b5a582afc9e7 100644
>  --- a/drivers/gpu/drm/xe/xe_pci.c
>  +++ b/drivers/gpu/drm/xe/xe_pci.c
>  @@ -23,6 +23,7 @@
>   #include "xe_macros.h"
>   #include "xe_mmio.h"
>   #include "xe_module.h"
>  +#include "xe_pci_err.h"
>   #include "xe_pci_types.h"
>   #include "xe_pm.h"
>   #include "xe_sriov.h"
>  @@ -738,7 +739,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
>   pci_set_drvdata(pdev, NULL);
>   }
>   
>  -static int xe_pci_probe(struct pci_dev *pdev, const struct 
>  pci_device_id *ent)
>  +int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>   {
>   const struct xe_device_desc *desc = (const void 
>  *)ent->driver_data;
>   const struct xe_subplatform_desc *subplatform_desc;
>  @@ -986,6 +987,11 @@ static const struct dev_pm_ops xe_pm_ops = {
>   };
>   #endif
>   
>  +static const struct pci_error_handlers xe_pci_err_handlers = {
>  +.reset_prepare = xe_pci_reset_prepare,
>  +.reset_done = xe_pci_reset_done,
>  +};
>  +
>   static struct pci_driver xe_pci_driver = {
>   .name = DRIVER_NAME,
>   .id_table = pciidlist,
>  @@ -995,6 +1001,7 @@ static struct pci_driver xe_pci_driver = {
>   #ifdef CONFIG_PM_SLEEP
>   .

Re: [PATCH v3 4/4] drm/xe/FLR: Support PCIe FLR

2024-04-24 Thread Aravind Iddamsetty


On 25/04/24 04:59, Michał Winiarski wrote:
> On Wed, Apr 24, 2024 at 10:42:59AM +0530, Aravind Iddamsetty wrote:
>> On 24/04/24 05:19, Michał Winiarski wrote:
>>> On Mon, Apr 22, 2024 at 12:27:56PM +0530, Aravind Iddamsetty wrote:
 PCI subsystem provides callbacks to inform the driver about a request to
 do function level reset by user, initiated by writing to sysfs entry
 /sys/bus/pci/devices/.../reset. This will allow the driver to handle FLR
 without the need to do unbind and rebind as the driver needs to
 reinitialize the device afresh post FLR.

 v2:
 1. separate out gt idle and pci save/restore to a separate patch (Lucas)
 2. Fixed the warnings seen around xe_guc_submit_stop, xe_guc_puc_fini

 v3: declare xe_pci_err_handlers as static(Michal)

 Cc: Rodrigo Vivi 
 Cc: Lucas De Marchi 
 Cc: Michal Wajdeczko 

 Reviewed-by: Rodrigo Vivi 
 Signed-off-by: Aravind Iddamsetty 
 ---
  drivers/gpu/drm/xe/Makefile  |  1 +
  drivers/gpu/drm/xe/xe_device_types.h |  3 +
  drivers/gpu/drm/xe/xe_guc_pc.c   |  4 ++
  drivers/gpu/drm/xe/xe_pci.c  |  9 ++-
  drivers/gpu/drm/xe/xe_pci.h  |  2 +
  drivers/gpu/drm/xe/xe_pci_err.c  | 88 
  drivers/gpu/drm/xe/xe_pci_err.h  | 13 
  7 files changed, 119 insertions(+), 1 deletion(-)
  create mode 100644 drivers/gpu/drm/xe/xe_pci_err.c
  create mode 100644 drivers/gpu/drm/xe/xe_pci_err.h

 diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
 index 8bc62bfbc679..693971a1fac0 100644
 --- a/drivers/gpu/drm/xe/Makefile
 +++ b/drivers/gpu/drm/xe/Makefile
 @@ -117,6 +117,7 @@ xe-y += xe_bb.o \
xe_module.o \
xe_pat.o \
xe_pci.o \
 +  xe_pci_err.o \
xe_pcode.o \
xe_pm.o \
xe_preempt_fence.o \
 diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
 b/drivers/gpu/drm/xe/xe_device_types.h
 index 0a66555229e9..8c749b378a92 100644
 --- a/drivers/gpu/drm/xe/xe_device_types.h
 +++ b/drivers/gpu/drm/xe/xe_device_types.h
 @@ -465,6 +465,9 @@ struct xe_device {
/** @pci_state: PCI state of device */
struct pci_saved_state *pci_state;
  
 +  /** @pci_device_is_reset: device went through PCIe FLR */
 +  bool pci_device_is_reset;
 +
/* private: */
  
  #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
 diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c 
 b/drivers/gpu/drm/xe/xe_guc_pc.c
 index 509649d0e65e..efba0fbe2f5c 100644
 --- a/drivers/gpu/drm/xe/xe_guc_pc.c
 +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
 @@ -902,6 +902,10 @@ static void xe_guc_pc_fini(struct drm_device *drm, 
 void *arg)
return;
}
  
 +  /* We already have done this before going through a reset, so skip here 
 */
 +  if (xe->pci_device_is_reset)
 +  return;
 +
XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
XE_WARN_ON(xe_guc_pc_stop(pc));
 diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
 index a62300990e19..b5a582afc9e7 100644
 --- a/drivers/gpu/drm/xe/xe_pci.c
 +++ b/drivers/gpu/drm/xe/xe_pci.c
 @@ -23,6 +23,7 @@
  #include "xe_macros.h"
  #include "xe_mmio.h"
  #include "xe_module.h"
 +#include "xe_pci_err.h"
  #include "xe_pci_types.h"
  #include "xe_pm.h"
  #include "xe_sriov.h"
 @@ -738,7 +739,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL);
  }
  
 -static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id 
 *ent)
 +int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
  {
const struct xe_device_desc *desc = (const void *)ent->driver_data;
const struct xe_subplatform_desc *subplatform_desc;
 @@ -986,6 +987,11 @@ static const struct dev_pm_ops xe_pm_ops = {
  };
  #endif
  
 +static const struct pci_error_handlers xe_pci_err_handlers = {
 +  .reset_prepare = xe_pci_reset_prepare,
 +  .reset_done = xe_pci_reset_done,
 +};
 +
  static struct pci_driver xe_pci_driver = {
.name = DRIVER_NAME,
.id_table = pciidlist,
 @@ -995,6 +1001,7 @@ static struct pci_driver xe_pci_driver = {
  #ifdef CONFIG_PM_SLEEP
.driver.pm = &xe_pm_ops,
  #endif
 +  .err_handler = &xe_pci_err_handlers,
  };
  
  int xe_register_pci_driver(void)
 diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
 index 73b90a430d1f..9faf5380a09e 100644
 --- a/drivers/gpu/drm/xe/xe_pci.h
 +++ b/drivers/gpu/drm/xe/xe_pci.h
 @@ -7,8 +7,10 @@
  #define _XE_PCI_H_
  
  struct pci_dev;
 +struct pci_device_id;
  
  int xe_register_pci_driver(void)

Re: [PATCH v3 4/4] drm/xe/FLR: Support PCIe FLR

2024-04-24 Thread Aravind Iddamsetty


On 24/04/24 16:42, Nilawar, Badal wrote:
>
>
> On 24-04-2024 08:42, Aravind Iddamsetty wrote:
>>
>> On 23/04/24 20:34, Nilawar, Badal wrote:
>>>
>>>
>>> On 22-04-2024 12:27, Aravind Iddamsetty wrote:
 PCI subsystem provides callbacks to inform the driver about a request to
 do function level reset by user, initiated by writing to sysfs entry
 /sys/bus/pci/devices/.../reset. This will allow the driver to handle FLR
 without the need to do unbind and rebind as the driver needs to
 reinitialize the device afresh post FLR.

 v2:
 1. separate out gt idle and pci save/restore to a separate patch (Lucas)
 2. Fixed the warnings seen around xe_guc_submit_stop, xe_guc_puc_fini

 v3: declare xe_pci_err_handlers as static(Michal)

 Cc: Rodrigo Vivi 
 Cc: Lucas De Marchi 
 Cc: Michal Wajdeczko 

 Reviewed-by: Rodrigo Vivi 
 Signed-off-by: Aravind Iddamsetty 
 ---
    drivers/gpu/drm/xe/Makefile  |  1 +
    drivers/gpu/drm/xe/xe_device_types.h |  3 +
    drivers/gpu/drm/xe/xe_guc_pc.c   |  4 ++
    drivers/gpu/drm/xe/xe_pci.c  |  9 ++-
    drivers/gpu/drm/xe/xe_pci.h  |  2 +
    drivers/gpu/drm/xe/xe_pci_err.c  | 88 
    drivers/gpu/drm/xe/xe_pci_err.h  | 13 
    7 files changed, 119 insertions(+), 1 deletion(-)
    create mode 100644 drivers/gpu/drm/xe/xe_pci_err.c
    create mode 100644 drivers/gpu/drm/xe/xe_pci_err.h

 diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
 index 8bc62bfbc679..693971a1fac0 100644
 --- a/drivers/gpu/drm/xe/Makefile
 +++ b/drivers/gpu/drm/xe/Makefile
 @@ -117,6 +117,7 @@ xe-y += xe_bb.o \
    xe_module.o \
    xe_pat.o \
    xe_pci.o \
 +    xe_pci_err.o \
    xe_pcode.o \
    xe_pm.o \
    xe_preempt_fence.o \
 diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
 b/drivers/gpu/drm/xe/xe_device_types.h
 index 0a66555229e9..8c749b378a92 100644
 --- a/drivers/gpu/drm/xe/xe_device_types.h
 +++ b/drivers/gpu/drm/xe/xe_device_types.h
 @@ -465,6 +465,9 @@ struct xe_device {
    /** @pci_state: PCI state of device */
    struct pci_saved_state *pci_state;
    +    /** @pci_device_is_reset: device went through PCIe FLR */
 +    bool pci_device_is_reset;
 +
    /* private: */
      #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
 diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c 
 b/drivers/gpu/drm/xe/xe_guc_pc.c
 index 509649d0e65e..efba0fbe2f5c 100644
 --- a/drivers/gpu/drm/xe/xe_guc_pc.c
 +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
 @@ -902,6 +902,10 @@ static void xe_guc_pc_fini(struct drm_device *drm, 
 void *arg)
    return;
    }
    +    /* We already have done this before going through a reset, so skip 
 here */
 +    if (xe->pci_device_is_reset)
 +    return;
 +
    XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), 
 XE_FORCEWAKE_ALL));
    XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
    XE_WARN_ON(xe_guc_pc_stop(pc));
 diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
 index a62300990e19..b5a582afc9e7 100644
 --- a/drivers/gpu/drm/xe/xe_pci.c
 +++ b/drivers/gpu/drm/xe/xe_pci.c
 @@ -23,6 +23,7 @@
    #include "xe_macros.h"
    #include "xe_mmio.h"
    #include "xe_module.h"
 +#include "xe_pci_err.h"
    #include "xe_pci_types.h"
    #include "xe_pm.h"
    #include "xe_sriov.h"
 @@ -738,7 +739,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
    pci_set_drvdata(pdev, NULL);
    }
    -static int xe_pci_probe(struct pci_dev *pdev, const struct 
 pci_device_id *ent)
 +int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
    {
    const struct xe_device_desc *desc = (const void *)ent->driver_data;
    const struct xe_subplatform_desc *subplatform_desc;
 @@ -986,6 +987,11 @@ static const struct dev_pm_ops xe_pm_ops = {
    };
    #endif
    +static const struct pci_error_handlers xe_pci_err_handlers = {
 +    .reset_prepare = xe_pci_reset_prepare,
 +    .reset_done = xe_pci_reset_done,
 +};
 +
    static struct pci_driver xe_pci_driver = {
    .name = DRIVER_NAME,
    .id_table = pciidlist,
 @@ -995,6 +1001,7 @@ static struct pci_driver xe_pci_driver = {
    #ifdef CONFIG_PM_SLEEP
    .driver.pm = &xe_pm_ops,
    #endif
 +    .err_handler = &xe_pci_err_handlers,
    };
      int xe_register_pci_driver(void)
 diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
 index 73b90a430d1f..9faf5380a09e 100644
 --- a/drivers/gpu/drm/xe/xe_pci.h
 +++ b/drivers/gpu/drm/xe/xe_pci.h
 @@ -7,8 +7,10 @@
    #define _XE_PCI_H_
     

Re: [PATCH v3 4/4] drm/xe/FLR: Support PCIe FLR

2024-04-24 Thread Michał Winiarski
On Wed, Apr 24, 2024 at 10:42:59AM +0530, Aravind Iddamsetty wrote:
> 
> On 24/04/24 05:19, Michał Winiarski wrote:
> > On Mon, Apr 22, 2024 at 12:27:56PM +0530, Aravind Iddamsetty wrote:
> >> PCI subsystem provides callbacks to inform the driver about a request to
> >> do function level reset by user, initiated by writing to sysfs entry
> >> /sys/bus/pci/devices/.../reset. This will allow the driver to handle FLR
> >> without the need to do unbind and rebind as the driver needs to
> >> reinitialize the device afresh post FLR.
> >>
> >> v2:
> >> 1. separate out gt idle and pci save/restore to a separate patch (Lucas)
> >> 2. Fixed the warnings seen around xe_guc_submit_stop, xe_guc_puc_fini
> >>
> >> v3: declare xe_pci_err_handlers as static(Michal)
> >>
> >> Cc: Rodrigo Vivi 
> >> Cc: Lucas De Marchi 
> >> Cc: Michal Wajdeczko 
> >>
> >> Reviewed-by: Rodrigo Vivi 
> >> Signed-off-by: Aravind Iddamsetty 
> >> ---
> >>  drivers/gpu/drm/xe/Makefile  |  1 +
> >>  drivers/gpu/drm/xe/xe_device_types.h |  3 +
> >>  drivers/gpu/drm/xe/xe_guc_pc.c   |  4 ++
> >>  drivers/gpu/drm/xe/xe_pci.c  |  9 ++-
> >>  drivers/gpu/drm/xe/xe_pci.h  |  2 +
> >>  drivers/gpu/drm/xe/xe_pci_err.c  | 88 
> >>  drivers/gpu/drm/xe/xe_pci_err.h  | 13 
> >>  7 files changed, 119 insertions(+), 1 deletion(-)
> >>  create mode 100644 drivers/gpu/drm/xe/xe_pci_err.c
> >>  create mode 100644 drivers/gpu/drm/xe/xe_pci_err.h
> >>
> >> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> >> index 8bc62bfbc679..693971a1fac0 100644
> >> --- a/drivers/gpu/drm/xe/Makefile
> >> +++ b/drivers/gpu/drm/xe/Makefile
> >> @@ -117,6 +117,7 @@ xe-y += xe_bb.o \
> >>xe_module.o \
> >>xe_pat.o \
> >>xe_pci.o \
> >> +  xe_pci_err.o \
> >>xe_pcode.o \
> >>xe_pm.o \
> >>xe_preempt_fence.o \
> >> diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
> >> b/drivers/gpu/drm/xe/xe_device_types.h
> >> index 0a66555229e9..8c749b378a92 100644
> >> --- a/drivers/gpu/drm/xe/xe_device_types.h
> >> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> >> @@ -465,6 +465,9 @@ struct xe_device {
> >>/** @pci_state: PCI state of device */
> >>struct pci_saved_state *pci_state;
> >>  
> >> +  /** @pci_device_is_reset: device went through PCIe FLR */
> >> +  bool pci_device_is_reset;
> >> +
> >>/* private: */
> >>  
> >>  #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
> >> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c 
> >> b/drivers/gpu/drm/xe/xe_guc_pc.c
> >> index 509649d0e65e..efba0fbe2f5c 100644
> >> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
> >> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
> >> @@ -902,6 +902,10 @@ static void xe_guc_pc_fini(struct drm_device *drm, 
> >> void *arg)
> >>return;
> >>}
> >>  
> >> +  /* We already have done this before going through a reset, so skip here 
> >> */
> >> +  if (xe->pci_device_is_reset)
> >> +  return;
> >> +
> >>XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
> >>XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
> >>XE_WARN_ON(xe_guc_pc_stop(pc));
> >> diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
> >> index a62300990e19..b5a582afc9e7 100644
> >> --- a/drivers/gpu/drm/xe/xe_pci.c
> >> +++ b/drivers/gpu/drm/xe/xe_pci.c
> >> @@ -23,6 +23,7 @@
> >>  #include "xe_macros.h"
> >>  #include "xe_mmio.h"
> >>  #include "xe_module.h"
> >> +#include "xe_pci_err.h"
> >>  #include "xe_pci_types.h"
> >>  #include "xe_pm.h"
> >>  #include "xe_sriov.h"
> >> @@ -738,7 +739,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
> >>pci_set_drvdata(pdev, NULL);
> >>  }
> >>  
> >> -static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id 
> >> *ent)
> >> +int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> >>  {
> >>const struct xe_device_desc *desc = (const void *)ent->driver_data;
> >>const struct xe_subplatform_desc *subplatform_desc;
> >> @@ -986,6 +987,11 @@ static const struct dev_pm_ops xe_pm_ops = {
> >>  };
> >>  #endif
> >>  
> >> +static const struct pci_error_handlers xe_pci_err_handlers = {
> >> +  .reset_prepare = xe_pci_reset_prepare,
> >> +  .reset_done = xe_pci_reset_done,
> >> +};
> >> +
> >>  static struct pci_driver xe_pci_driver = {
> >>.name = DRIVER_NAME,
> >>.id_table = pciidlist,
> >> @@ -995,6 +1001,7 @@ static struct pci_driver xe_pci_driver = {
> >>  #ifdef CONFIG_PM_SLEEP
> >>.driver.pm = &xe_pm_ops,
> >>  #endif
> >> +  .err_handler = &xe_pci_err_handlers,
> >>  };
> >>  
> >>  int xe_register_pci_driver(void)
> >> diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
> >> index 73b90a430d1f..9faf5380a09e 100644
> >> --- a/drivers/gpu/drm/xe/xe_pci.h
> >> +++ b/drivers/gpu/drm/xe/xe_pci.h
> >> @@ -7,8 +7,10 @@
> >>  #define _XE_PCI_H_
> >>  
> >>  struct pci_dev;
> >> +struct pci_device_id;
> >>  
> >>  int xe_register_pci_driver(void);
> >>  void xe_unregister_pci_driver(void);

Re: [PATCH v3 4/4] drm/xe/FLR: Support PCIe FLR

2024-04-24 Thread Nilawar, Badal




On 24-04-2024 08:42, Aravind Iddamsetty wrote:


On 23/04/24 20:34, Nilawar, Badal wrote:



On 22-04-2024 12:27, Aravind Iddamsetty wrote:

PCI subsystem provides callbacks to inform the driver about a request to
do function level reset by user, initiated by writing to sysfs entry
/sys/bus/pci/devices/.../reset. This will allow the driver to handle FLR
without the need to do unbind and rebind as the driver needs to
reinitialize the device afresh post FLR.

v2:
1. separate out gt idle and pci save/restore to a separate patch (Lucas)
2. Fixed the warnings seen around xe_guc_submit_stop, xe_guc_puc_fini

v3: declare xe_pci_err_handlers as static(Michal)

Cc: Rodrigo Vivi 
Cc: Lucas De Marchi 
Cc: Michal Wajdeczko 

Reviewed-by: Rodrigo Vivi 
Signed-off-by: Aravind Iddamsetty 
---
   drivers/gpu/drm/xe/Makefile  |  1 +
   drivers/gpu/drm/xe/xe_device_types.h |  3 +
   drivers/gpu/drm/xe/xe_guc_pc.c   |  4 ++
   drivers/gpu/drm/xe/xe_pci.c  |  9 ++-
   drivers/gpu/drm/xe/xe_pci.h  |  2 +
   drivers/gpu/drm/xe/xe_pci_err.c  | 88 
   drivers/gpu/drm/xe/xe_pci_err.h  | 13 
   7 files changed, 119 insertions(+), 1 deletion(-)
   create mode 100644 drivers/gpu/drm/xe/xe_pci_err.c
   create mode 100644 drivers/gpu/drm/xe/xe_pci_err.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 8bc62bfbc679..693971a1fac0 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -117,6 +117,7 @@ xe-y += xe_bb.o \
   xe_module.o \
   xe_pat.o \
   xe_pci.o \
+    xe_pci_err.o \
   xe_pcode.o \
   xe_pm.o \
   xe_preempt_fence.o \
diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
b/drivers/gpu/drm/xe/xe_device_types.h
index 0a66555229e9..8c749b378a92 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -465,6 +465,9 @@ struct xe_device {
   /** @pci_state: PCI state of device */
   struct pci_saved_state *pci_state;
   +    /** @pci_device_is_reset: device went through PCIe FLR */
+    bool pci_device_is_reset;
+
   /* private: */
     #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 509649d0e65e..efba0fbe2f5c 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -902,6 +902,10 @@ static void xe_guc_pc_fini(struct drm_device *drm, void 
*arg)
   return;
   }
   +    /* We already have done this before going through a reset, so skip here 
*/
+    if (xe->pci_device_is_reset)
+    return;
+
   XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
   XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
   XE_WARN_ON(xe_guc_pc_stop(pc));
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index a62300990e19..b5a582afc9e7 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -23,6 +23,7 @@
   #include "xe_macros.h"
   #include "xe_mmio.h"
   #include "xe_module.h"
+#include "xe_pci_err.h"
   #include "xe_pci_types.h"
   #include "xe_pm.h"
   #include "xe_sriov.h"
@@ -738,7 +739,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
   pci_set_drvdata(pdev, NULL);
   }
   -static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id 
*ent)
+int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
   {
   const struct xe_device_desc *desc = (const void *)ent->driver_data;
   const struct xe_subplatform_desc *subplatform_desc;
@@ -986,6 +987,11 @@ static const struct dev_pm_ops xe_pm_ops = {
   };
   #endif
   +static const struct pci_error_handlers xe_pci_err_handlers = {
+    .reset_prepare = xe_pci_reset_prepare,
+    .reset_done = xe_pci_reset_done,
+};
+
   static struct pci_driver xe_pci_driver = {
   .name = DRIVER_NAME,
   .id_table = pciidlist,
@@ -995,6 +1001,7 @@ static struct pci_driver xe_pci_driver = {
   #ifdef CONFIG_PM_SLEEP
   .driver.pm = &xe_pm_ops,
   #endif
+    .err_handler = &xe_pci_err_handlers,
   };
     int xe_register_pci_driver(void)
diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
index 73b90a430d1f..9faf5380a09e 100644
--- a/drivers/gpu/drm/xe/xe_pci.h
+++ b/drivers/gpu/drm/xe/xe_pci.h
@@ -7,8 +7,10 @@
   #define _XE_PCI_H_
     struct pci_dev;
+struct pci_device_id;
     int xe_register_pci_driver(void);
   void xe_unregister_pci_driver(void);
   void xe_load_pci_state(struct pci_dev *pdev);
+int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
   #endif
diff --git a/drivers/gpu/drm/xe/xe_pci_err.c b/drivers/gpu/drm/xe/xe_pci_err.c
new file mode 100644
index ..5306925ea2fa
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci_err.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include 
+#include 
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_printk.h"
+#include "xe_pci

Re: [PATCH v3 4/4] drm/xe/FLR: Support PCIe FLR

2024-04-23 Thread Aravind Iddamsetty


On 24/04/24 05:19, Michał Winiarski wrote:
> On Mon, Apr 22, 2024 at 12:27:56PM +0530, Aravind Iddamsetty wrote:
>> PCI subsystem provides callbacks to inform the driver about a request to
>> do function level reset by user, initiated by writing to sysfs entry
>> /sys/bus/pci/devices/.../reset. This will allow the driver to handle FLR
>> without the need to do unbind and rebind as the driver needs to
>> reinitialize the device afresh post FLR.
>>
>> v2:
>> 1. separate out gt idle and pci save/restore to a separate patch (Lucas)
>> 2. Fixed the warnings seen around xe_guc_submit_stop, xe_guc_puc_fini
>>
>> v3: declare xe_pci_err_handlers as static(Michal)
>>
>> Cc: Rodrigo Vivi 
>> Cc: Lucas De Marchi 
>> Cc: Michal Wajdeczko 
>>
>> Reviewed-by: Rodrigo Vivi 
>> Signed-off-by: Aravind Iddamsetty 
>> ---
>>  drivers/gpu/drm/xe/Makefile  |  1 +
>>  drivers/gpu/drm/xe/xe_device_types.h |  3 +
>>  drivers/gpu/drm/xe/xe_guc_pc.c   |  4 ++
>>  drivers/gpu/drm/xe/xe_pci.c  |  9 ++-
>>  drivers/gpu/drm/xe/xe_pci.h  |  2 +
>>  drivers/gpu/drm/xe/xe_pci_err.c  | 88 
>>  drivers/gpu/drm/xe/xe_pci_err.h  | 13 
>>  7 files changed, 119 insertions(+), 1 deletion(-)
>>  create mode 100644 drivers/gpu/drm/xe/xe_pci_err.c
>>  create mode 100644 drivers/gpu/drm/xe/xe_pci_err.h
>>
>> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>> index 8bc62bfbc679..693971a1fac0 100644
>> --- a/drivers/gpu/drm/xe/Makefile
>> +++ b/drivers/gpu/drm/xe/Makefile
>> @@ -117,6 +117,7 @@ xe-y += xe_bb.o \
>>  xe_module.o \
>>  xe_pat.o \
>>  xe_pci.o \
>> +xe_pci_err.o \
>>  xe_pcode.o \
>>  xe_pm.o \
>>  xe_preempt_fence.o \
>> diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
>> b/drivers/gpu/drm/xe/xe_device_types.h
>> index 0a66555229e9..8c749b378a92 100644
>> --- a/drivers/gpu/drm/xe/xe_device_types.h
>> +++ b/drivers/gpu/drm/xe/xe_device_types.h
>> @@ -465,6 +465,9 @@ struct xe_device {
>>  /** @pci_state: PCI state of device */
>>  struct pci_saved_state *pci_state;
>>  
>> +/** @pci_device_is_reset: device went through PCIe FLR */
>> +bool pci_device_is_reset;
>> +
>>  /* private: */
>>  
>>  #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
>> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
>> index 509649d0e65e..efba0fbe2f5c 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
>> @@ -902,6 +902,10 @@ static void xe_guc_pc_fini(struct drm_device *drm, void 
>> *arg)
>>  return;
>>  }
>>  
>> +/* We already have done this before going through a reset, so skip here 
>> */
>> +if (xe->pci_device_is_reset)
>> +return;
>> +
>>  XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
>>  XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
>>  XE_WARN_ON(xe_guc_pc_stop(pc));
>> diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
>> index a62300990e19..b5a582afc9e7 100644
>> --- a/drivers/gpu/drm/xe/xe_pci.c
>> +++ b/drivers/gpu/drm/xe/xe_pci.c
>> @@ -23,6 +23,7 @@
>>  #include "xe_macros.h"
>>  #include "xe_mmio.h"
>>  #include "xe_module.h"
>> +#include "xe_pci_err.h"
>>  #include "xe_pci_types.h"
>>  #include "xe_pm.h"
>>  #include "xe_sriov.h"
>> @@ -738,7 +739,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
>>  pci_set_drvdata(pdev, NULL);
>>  }
>>  
>> -static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id 
>> *ent)
>> +int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>>  {
>>  const struct xe_device_desc *desc = (const void *)ent->driver_data;
>>  const struct xe_subplatform_desc *subplatform_desc;
>> @@ -986,6 +987,11 @@ static const struct dev_pm_ops xe_pm_ops = {
>>  };
>>  #endif
>>  
>> +static const struct pci_error_handlers xe_pci_err_handlers = {
>> +.reset_prepare = xe_pci_reset_prepare,
>> +.reset_done = xe_pci_reset_done,
>> +};
>> +
>>  static struct pci_driver xe_pci_driver = {
>>  .name = DRIVER_NAME,
>>  .id_table = pciidlist,
>> @@ -995,6 +1001,7 @@ static struct pci_driver xe_pci_driver = {
>>  #ifdef CONFIG_PM_SLEEP
>>  .driver.pm = &xe_pm_ops,
>>  #endif
>> +.err_handler = &xe_pci_err_handlers,
>>  };
>>  
>>  int xe_register_pci_driver(void)
>> diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
>> index 73b90a430d1f..9faf5380a09e 100644
>> --- a/drivers/gpu/drm/xe/xe_pci.h
>> +++ b/drivers/gpu/drm/xe/xe_pci.h
>> @@ -7,8 +7,10 @@
>>  #define _XE_PCI_H_
>>  
>>  struct pci_dev;
>> +struct pci_device_id;
>>  
>>  int xe_register_pci_driver(void);
>>  void xe_unregister_pci_driver(void);
>>  void xe_load_pci_state(struct pci_dev *pdev);
>> +int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
>>  #endif
>> diff --git a/drivers/gpu/drm/xe/xe_pci_err.c 
>> b/drivers/gpu/drm/xe/xe_pci_err.c
>> new file mode 100644
>> index ..530692

Re: [PATCH v3 4/4] drm/xe/FLR: Support PCIe FLR

2024-04-23 Thread Aravind Iddamsetty


On 23/04/24 20:34, Nilawar, Badal wrote:
>
>
> On 22-04-2024 12:27, Aravind Iddamsetty wrote:
>> PCI subsystem provides callbacks to inform the driver about a request to
>> do function level reset by user, initiated by writing to sysfs entry
>> /sys/bus/pci/devices/.../reset. This will allow the driver to handle FLR
>> without the need to do unbind and rebind as the driver needs to
>> reinitialize the device afresh post FLR.
>>
>> v2:
>> 1. separate out gt idle and pci save/restore to a separate patch (Lucas)
>> 2. Fixed the warnings seen around xe_guc_submit_stop, xe_guc_puc_fini
>>
>> v3: declare xe_pci_err_handlers as static(Michal)
>>
>> Cc: Rodrigo Vivi 
>> Cc: Lucas De Marchi 
>> Cc: Michal Wajdeczko 
>>
>> Reviewed-by: Rodrigo Vivi 
>> Signed-off-by: Aravind Iddamsetty 
>> ---
>>   drivers/gpu/drm/xe/Makefile  |  1 +
>>   drivers/gpu/drm/xe/xe_device_types.h |  3 +
>>   drivers/gpu/drm/xe/xe_guc_pc.c   |  4 ++
>>   drivers/gpu/drm/xe/xe_pci.c  |  9 ++-
>>   drivers/gpu/drm/xe/xe_pci.h  |  2 +
>>   drivers/gpu/drm/xe/xe_pci_err.c  | 88 
>>   drivers/gpu/drm/xe/xe_pci_err.h  | 13 
>>   7 files changed, 119 insertions(+), 1 deletion(-)
>>   create mode 100644 drivers/gpu/drm/xe/xe_pci_err.c
>>   create mode 100644 drivers/gpu/drm/xe/xe_pci_err.h
>>
>> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>> index 8bc62bfbc679..693971a1fac0 100644
>> --- a/drivers/gpu/drm/xe/Makefile
>> +++ b/drivers/gpu/drm/xe/Makefile
>> @@ -117,6 +117,7 @@ xe-y += xe_bb.o \
>>   xe_module.o \
>>   xe_pat.o \
>>   xe_pci.o \
>> +    xe_pci_err.o \
>>   xe_pcode.o \
>>   xe_pm.o \
>>   xe_preempt_fence.o \
>> diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
>> b/drivers/gpu/drm/xe/xe_device_types.h
>> index 0a66555229e9..8c749b378a92 100644
>> --- a/drivers/gpu/drm/xe/xe_device_types.h
>> +++ b/drivers/gpu/drm/xe/xe_device_types.h
>> @@ -465,6 +465,9 @@ struct xe_device {
>>   /** @pci_state: PCI state of device */
>>   struct pci_saved_state *pci_state;
>>   +    /** @pci_device_is_reset: device went through PCIe FLR */
>> +    bool pci_device_is_reset;
>> +
>>   /* private: */
>>     #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
>> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
>> index 509649d0e65e..efba0fbe2f5c 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
>> @@ -902,6 +902,10 @@ static void xe_guc_pc_fini(struct drm_device *drm, void 
>> *arg)
>>   return;
>>   }
>>   +    /* We already have done this before going through a reset, so skip 
>> here */
>> +    if (xe->pci_device_is_reset)
>> +    return;
>> +
>>   XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), 
>> XE_FORCEWAKE_ALL));
>>   XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
>>   XE_WARN_ON(xe_guc_pc_stop(pc));
>> diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
>> index a62300990e19..b5a582afc9e7 100644
>> --- a/drivers/gpu/drm/xe/xe_pci.c
>> +++ b/drivers/gpu/drm/xe/xe_pci.c
>> @@ -23,6 +23,7 @@
>>   #include "xe_macros.h"
>>   #include "xe_mmio.h"
>>   #include "xe_module.h"
>> +#include "xe_pci_err.h"
>>   #include "xe_pci_types.h"
>>   #include "xe_pm.h"
>>   #include "xe_sriov.h"
>> @@ -738,7 +739,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
>>   pci_set_drvdata(pdev, NULL);
>>   }
>>   -static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id 
>> *ent)
>> +int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>>   {
>>   const struct xe_device_desc *desc = (const void *)ent->driver_data;
>>   const struct xe_subplatform_desc *subplatform_desc;
>> @@ -986,6 +987,11 @@ static const struct dev_pm_ops xe_pm_ops = {
>>   };
>>   #endif
>>   +static const struct pci_error_handlers xe_pci_err_handlers = {
>> +    .reset_prepare = xe_pci_reset_prepare,
>> +    .reset_done = xe_pci_reset_done,
>> +};
>> +
>>   static struct pci_driver xe_pci_driver = {
>>   .name = DRIVER_NAME,
>>   .id_table = pciidlist,
>> @@ -995,6 +1001,7 @@ static struct pci_driver xe_pci_driver = {
>>   #ifdef CONFIG_PM_SLEEP
>>   .driver.pm = &xe_pm_ops,
>>   #endif
>> +    .err_handler = &xe_pci_err_handlers,
>>   };
>>     int xe_register_pci_driver(void)
>> diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
>> index 73b90a430d1f..9faf5380a09e 100644
>> --- a/drivers/gpu/drm/xe/xe_pci.h
>> +++ b/drivers/gpu/drm/xe/xe_pci.h
>> @@ -7,8 +7,10 @@
>>   #define _XE_PCI_H_
>>     struct pci_dev;
>> +struct pci_device_id;
>>     int xe_register_pci_driver(void);
>>   void xe_unregister_pci_driver(void);
>>   void xe_load_pci_state(struct pci_dev *pdev);
>> +int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
>>   #endif
>> diff --git a/drivers/gpu/drm/xe/xe_pci_err.c 
>> b/drivers/gpu/drm/xe/xe_pci_err.c
>> new file mode 100644
>> index ..5

Re: [PATCH v3 4/4] drm/xe/FLR: Support PCIe FLR

2024-04-23 Thread Michał Winiarski
On Mon, Apr 22, 2024 at 12:27:56PM +0530, Aravind Iddamsetty wrote:
> PCI subsystem provides callbacks to inform the driver about a request to
> do function level reset by user, initiated by writing to sysfs entry
> /sys/bus/pci/devices/.../reset. This will allow the driver to handle FLR
> without the need to do unbind and rebind as the driver needs to
> reinitialize the device afresh post FLR.
> 
> v2:
> 1. separate out gt idle and pci save/restore to a separate patch (Lucas)
> 2. Fixed the warnings seen around xe_guc_submit_stop, xe_guc_puc_fini
> 
> v3: declare xe_pci_err_handlers as static(Michal)
> 
> Cc: Rodrigo Vivi 
> Cc: Lucas De Marchi 
> Cc: Michal Wajdeczko 
> 
> Reviewed-by: Rodrigo Vivi 
> Signed-off-by: Aravind Iddamsetty 
> ---
>  drivers/gpu/drm/xe/Makefile  |  1 +
>  drivers/gpu/drm/xe/xe_device_types.h |  3 +
>  drivers/gpu/drm/xe/xe_guc_pc.c   |  4 ++
>  drivers/gpu/drm/xe/xe_pci.c  |  9 ++-
>  drivers/gpu/drm/xe/xe_pci.h  |  2 +
>  drivers/gpu/drm/xe/xe_pci_err.c  | 88 
>  drivers/gpu/drm/xe/xe_pci_err.h  | 13 
>  7 files changed, 119 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/xe/xe_pci_err.c
>  create mode 100644 drivers/gpu/drm/xe/xe_pci_err.h
> 
> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> index 8bc62bfbc679..693971a1fac0 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -117,6 +117,7 @@ xe-y += xe_bb.o \
>   xe_module.o \
>   xe_pat.o \
>   xe_pci.o \
> + xe_pci_err.o \
>   xe_pcode.o \
>   xe_pm.o \
>   xe_preempt_fence.o \
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
> b/drivers/gpu/drm/xe/xe_device_types.h
> index 0a66555229e9..8c749b378a92 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -465,6 +465,9 @@ struct xe_device {
>   /** @pci_state: PCI state of device */
>   struct pci_saved_state *pci_state;
>  
> + /** @pci_device_is_reset: device went through PCIe FLR */
> + bool pci_device_is_reset;
> +
>   /* private: */
>  
>  #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
> index 509649d0e65e..efba0fbe2f5c 100644
> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
> @@ -902,6 +902,10 @@ static void xe_guc_pc_fini(struct drm_device *drm, void 
> *arg)
>   return;
>   }
>  
> + /* We already have done this before going through a reset, so skip here 
> */
> + if (xe->pci_device_is_reset)
> + return;
> +
>   XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
>   XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
>   XE_WARN_ON(xe_guc_pc_stop(pc));
> diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
> index a62300990e19..b5a582afc9e7 100644
> --- a/drivers/gpu/drm/xe/xe_pci.c
> +++ b/drivers/gpu/drm/xe/xe_pci.c
> @@ -23,6 +23,7 @@
>  #include "xe_macros.h"
>  #include "xe_mmio.h"
>  #include "xe_module.h"
> +#include "xe_pci_err.h"
>  #include "xe_pci_types.h"
>  #include "xe_pm.h"
>  #include "xe_sriov.h"
> @@ -738,7 +739,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
>   pci_set_drvdata(pdev, NULL);
>  }
>  
> -static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id 
> *ent)
> +int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>  {
>   const struct xe_device_desc *desc = (const void *)ent->driver_data;
>   const struct xe_subplatform_desc *subplatform_desc;
> @@ -986,6 +987,11 @@ static const struct dev_pm_ops xe_pm_ops = {
>  };
>  #endif
>  
> +static const struct pci_error_handlers xe_pci_err_handlers = {
> + .reset_prepare = xe_pci_reset_prepare,
> + .reset_done = xe_pci_reset_done,
> +};
> +
>  static struct pci_driver xe_pci_driver = {
>   .name = DRIVER_NAME,
>   .id_table = pciidlist,
> @@ -995,6 +1001,7 @@ static struct pci_driver xe_pci_driver = {
>  #ifdef CONFIG_PM_SLEEP
>   .driver.pm = &xe_pm_ops,
>  #endif
> + .err_handler = &xe_pci_err_handlers,
>  };
>  
>  int xe_register_pci_driver(void)
> diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
> index 73b90a430d1f..9faf5380a09e 100644
> --- a/drivers/gpu/drm/xe/xe_pci.h
> +++ b/drivers/gpu/drm/xe/xe_pci.h
> @@ -7,8 +7,10 @@
>  #define _XE_PCI_H_
>  
>  struct pci_dev;
> +struct pci_device_id;
>  
>  int xe_register_pci_driver(void);
>  void xe_unregister_pci_driver(void);
>  void xe_load_pci_state(struct pci_dev *pdev);
> +int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_pci_err.c b/drivers/gpu/drm/xe/xe_pci_err.c
> new file mode 100644
> index ..5306925ea2fa
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_pci_err.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2024 Intel

Re: [PATCH v3 4/4] drm/xe/FLR: Support PCIe FLR

2024-04-23 Thread Nilawar, Badal




On 22-04-2024 12:27, Aravind Iddamsetty wrote:

PCI subsystem provides callbacks to inform the driver about a request to
do function level reset by user, initiated by writing to sysfs entry
/sys/bus/pci/devices/.../reset. This will allow the driver to handle FLR
without the need to do unbind and rebind as the driver needs to
reinitialize the device afresh post FLR.

v2:
1. separate out gt idle and pci save/restore to a separate patch (Lucas)
2. Fixed the warnings seen around xe_guc_submit_stop, xe_guc_puc_fini

v3: declare xe_pci_err_handlers as static(Michal)

Cc: Rodrigo Vivi 
Cc: Lucas De Marchi 
Cc: Michal Wajdeczko 

Reviewed-by: Rodrigo Vivi 
Signed-off-by: Aravind Iddamsetty 
---
  drivers/gpu/drm/xe/Makefile  |  1 +
  drivers/gpu/drm/xe/xe_device_types.h |  3 +
  drivers/gpu/drm/xe/xe_guc_pc.c   |  4 ++
  drivers/gpu/drm/xe/xe_pci.c  |  9 ++-
  drivers/gpu/drm/xe/xe_pci.h  |  2 +
  drivers/gpu/drm/xe/xe_pci_err.c  | 88 
  drivers/gpu/drm/xe/xe_pci_err.h  | 13 
  7 files changed, 119 insertions(+), 1 deletion(-)
  create mode 100644 drivers/gpu/drm/xe/xe_pci_err.c
  create mode 100644 drivers/gpu/drm/xe/xe_pci_err.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 8bc62bfbc679..693971a1fac0 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -117,6 +117,7 @@ xe-y += xe_bb.o \
xe_module.o \
xe_pat.o \
xe_pci.o \
+   xe_pci_err.o \
xe_pcode.o \
xe_pm.o \
xe_preempt_fence.o \
diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
b/drivers/gpu/drm/xe/xe_device_types.h
index 0a66555229e9..8c749b378a92 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -465,6 +465,9 @@ struct xe_device {
/** @pci_state: PCI state of device */
struct pci_saved_state *pci_state;
  
+	/** @pci_device_is_reset: device went through PCIe FLR */

+   bool pci_device_is_reset;
+
/* private: */
  
  #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 509649d0e65e..efba0fbe2f5c 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -902,6 +902,10 @@ static void xe_guc_pc_fini(struct drm_device *drm, void 
*arg)
return;
}
  
+	/* We already have done this before going through a reset, so skip here */

+   if (xe->pci_device_is_reset)
+   return;
+
XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
XE_WARN_ON(xe_guc_pc_stop(pc));
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index a62300990e19..b5a582afc9e7 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -23,6 +23,7 @@
  #include "xe_macros.h"
  #include "xe_mmio.h"
  #include "xe_module.h"
+#include "xe_pci_err.h"
  #include "xe_pci_types.h"
  #include "xe_pm.h"
  #include "xe_sriov.h"
@@ -738,7 +739,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL);
  }
  
-static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)

+int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
  {
const struct xe_device_desc *desc = (const void *)ent->driver_data;
const struct xe_subplatform_desc *subplatform_desc;
@@ -986,6 +987,11 @@ static const struct dev_pm_ops xe_pm_ops = {
  };
  #endif
  
+static const struct pci_error_handlers xe_pci_err_handlers = {

+   .reset_prepare = xe_pci_reset_prepare,
+   .reset_done = xe_pci_reset_done,
+};
+
  static struct pci_driver xe_pci_driver = {
.name = DRIVER_NAME,
.id_table = pciidlist,
@@ -995,6 +1001,7 @@ static struct pci_driver xe_pci_driver = {
  #ifdef CONFIG_PM_SLEEP
.driver.pm = &xe_pm_ops,
  #endif
+   .err_handler = &xe_pci_err_handlers,
  };
  
  int xe_register_pci_driver(void)

diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
index 73b90a430d1f..9faf5380a09e 100644
--- a/drivers/gpu/drm/xe/xe_pci.h
+++ b/drivers/gpu/drm/xe/xe_pci.h
@@ -7,8 +7,10 @@
  #define _XE_PCI_H_
  
  struct pci_dev;

+struct pci_device_id;
  
  int xe_register_pci_driver(void);

  void xe_unregister_pci_driver(void);
  void xe_load_pci_state(struct pci_dev *pdev);
+int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
  #endif
diff --git a/drivers/gpu/drm/xe/xe_pci_err.c b/drivers/gpu/drm/xe/xe_pci_err.c
new file mode 100644
index ..5306925ea2fa
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci_err.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include 
+#include 
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_printk.h"
+#include "xe_pci.h"
+#include "xe_pci_err.h"
+#include "xe_pm.h"
+#include "xe_uc.h"
+
+