On Mon, Dec 05, 2022 at 05:19:06PM -0800, Daniele Ceraolo Spurio wrote:
> If the GSC was loaded, the only way to stop it during the driver unload
> flow is to do a driver-FLR.
> The driver-initiated FLR is not the same as PCI config space FLR in
> that it doesn't reset the SGUnit and doesn't modify the PCI config
> space. Thus, it doesn't require a re-enumeration of the PCI BARs.
> However, the driver-FLR does cause a memory wipe of graphics memory
> on all discrete GPU platforms or a wipe limited to stolen memory
> on the integrated GPU platforms.
> 
> We perform the FLR as the last action before releasing the MMIO bar, so
> that we don't have to care about the consequences of the reset on the
> unload flow.
> 
> v2: rename FLR function, add comment to explain FLR impact (Rodrigo),
>     better explain why GSC needs FLR (Alan)
> 
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>
> Signed-off-by: Alan Previn <alan.previn.teres.ale...@intel.com>
> Cc: Rodrigo Vivi <rodrigo.v...@intel.com>

Reviewed-by: Rodrigo Vivi <rodrigo.v...@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 23 +++++++++
>  drivers/gpu/drm/i915/i915_reg.h           |  3 ++
>  drivers/gpu/drm/i915/intel_uncore.c       | 58 +++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_uncore.h       | 13 +++++
>  4 files changed, 97 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
> index f88069ab71ab..e73d4440c5e8 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
> @@ -166,6 +166,29 @@ int intel_gsc_uc_fw_upload(struct intel_gsc_uc *gsc)
>       if (err)
>               goto fail;
>  
> +     /*
> +      * GSC is only killed by an FLR, so we need to trigger one on unload to
> +      * make sure we stop it. This is because we assign a chunk of memory to
> +      * the GSC as part of the FW load , so we need to make sure it stops
> +      * using it when we release it to the system on driver unload. Note that
> +      * this is not a problem of the unload per-se, because the GSC will not
> +      * touch that memory unless there are requests for it coming from the
> +      * driver; therefore, no accesses will happen while i915 is not loaded,
> +      * but if we re-load the driver then the GSC might wake up and try to
> +      * access that old memory location again.
> +      * Given that an FLR is a very disruptive action (see the FLR function
> +      * for details), we want to do it as the last action before releasing
> +      * the access to the MMIO bar, which means we need to do it as part of
> +      * the primary uncore cleanup.
> +      * An alternative approach to the FLR would be to use a memory location
> +      * that survives driver unload, like e.g. stolen memory, and keep the
> +      * GSC loaded across reloads. However, this requires us to make sure we
> +      * preserve that memory location on unload and then determine and
> +      * reserve its offset on each subsequent load, which is not trivial, so
> +      * it is easier to just kill everything and start fresh.
> +      */
> +     intel_uncore_set_flr_on_fini(&gt->i915->uncore);
> +
>       err = gsc_fw_load(gsc);
>       if (err)
>               goto fail;
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 0b90fe6a28f7..b95d533652a4 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -118,6 +118,9 @@
>  
>  #define GU_CNTL                              _MMIO(0x101010)
>  #define   LMEM_INIT                  REG_BIT(7)
> +#define   DRIVERFLR                  REG_BIT(31)
> +#define GU_DEBUG                     _MMIO(0x101018)
> +#define   DRIVERFLR_STATUS           REG_BIT(31)
>  
>  #define GEN6_STOLEN_RESERVED         _MMIO(0x1082C0)
>  #define GEN6_STOLEN_RESERVED_ADDR_MASK       (0xFFF << 20)
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
> b/drivers/gpu/drm/i915/intel_uncore.c
> index 8006a6c61466..3bfb4af0df78 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -2703,6 +2703,61 @@ void intel_uncore_prune_engine_fw_domains(struct 
> intel_uncore *uncore,
>       }
>  }
>  
> +/*
> + * The driver-initiated FLR is the highest level of reset that we can trigger
> + * from within the driver. It is different from the PCI FLR in that it 
> doesn't
> + * fully reset the SGUnit and doesn't modify the PCI config space and 
> therefore
> + * it doesn't require a re-enumeration of the PCI BARs. However, the
> + * driver-initiated FLR does still cause a reset of both GT and display and a
> + * memory wipe of local and stolen memory, so recovery would require a full 
> HW
> + * re-init and saving/restoring (or re-populating) the wiped memory. Since we
> + * perform the FLR as the very last action before releasing access to the HW
> + * during the driver release flow, we don't attempt recovery at all, because
> + * if/when a new instance of i915 is bound to the device it will do a full
> + * re-init anyway.
> + */
> +static void driver_initiated_flr(struct intel_uncore *uncore)
> +{
> +     struct drm_i915_private *i915 = uncore->i915;
> +     const unsigned int flr_timeout_ms = 3000; /* specs recommend a 3s wait 
> */
> +     int ret;
> +
> +     drm_dbg(&i915->drm, "Triggering Driver-FLR\n");
> +
> +     /*
> +      * Make sure any pending FLR requests have cleared by waiting for the
> +      * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
> +      * to make sure it's not still set from a prior attempt (it's a write to
> +      * clear bit).
> +      * Note that we should never be in a situation where a previous attempt
> +      * is still pending (unless the HW is totally dead), but better to be
> +      * safe in case something unexpected happens
> +      */
> +     ret = intel_wait_for_register_fw(uncore, GU_CNTL, DRIVERFLR, 0, 
> flr_timeout_ms);
> +     if (ret) {
> +             drm_err(&i915->drm,
> +                     "Failed to wait for Driver-FLR bit to clear! %d\n",
> +                     ret);
> +             return;
> +     }
> +     intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
> +
> +     /* Trigger the actual Driver-FLR */
> +     intel_uncore_rmw_fw(uncore, GU_CNTL, 0, DRIVERFLR);
> +
> +     ret = intel_wait_for_register_fw(uncore, GU_DEBUG,
> +                                      DRIVERFLR_STATUS, DRIVERFLR_STATUS,
> +                                      flr_timeout_ms);
> +     if (ret) {
> +             drm_err(&i915->drm, "wait for Driver-FLR completion failed! 
> %d\n", ret);
> +             return;
> +     }
> +
> +     intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
> +
> +     return;
> +}
> +
>  /* Called via drm-managed action */
>  void intel_uncore_fini_mmio(struct drm_device *dev, void *data)
>  {
> @@ -2716,6 +2771,9 @@ void intel_uncore_fini_mmio(struct drm_device *dev, 
> void *data)
>               intel_uncore_fw_domains_fini(uncore);
>               iosf_mbi_punit_release();
>       }
> +
> +     if (intel_uncore_needs_flr_on_fini(uncore))
> +             driver_initiated_flr(uncore);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/intel_uncore.h 
> b/drivers/gpu/drm/i915/intel_uncore.h
> index e9e38490815d..9ea1f4864a3a 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.h
> +++ b/drivers/gpu/drm/i915/intel_uncore.h
> @@ -153,6 +153,7 @@ struct intel_uncore {
>  #define UNCORE_HAS_FPGA_DBG_UNCLAIMED        BIT(1)
>  #define UNCORE_HAS_DBG_UNCLAIMED     BIT(2)
>  #define UNCORE_HAS_FIFO                      BIT(3)
> +#define UNCORE_NEEDS_FLR_ON_FINI     BIT(4)
>  
>       const struct intel_forcewake_range *fw_domains_table;
>       unsigned int fw_domains_table_entries;
> @@ -223,6 +224,18 @@ intel_uncore_has_fifo(const struct intel_uncore *uncore)
>       return uncore->flags & UNCORE_HAS_FIFO;
>  }
>  
> +static inline bool
> +intel_uncore_needs_flr_on_fini(const struct intel_uncore *uncore)
> +{
> +     return uncore->flags & UNCORE_NEEDS_FLR_ON_FINI;
> +}
> +
> +static inline bool
> +intel_uncore_set_flr_on_fini(struct intel_uncore *uncore)
> +{
> +     return uncore->flags |= UNCORE_NEEDS_FLR_ON_FINI;
> +}
> +
>  void intel_uncore_mmio_debug_init_early(struct drm_i915_private *i915);
>  void intel_uncore_init_early(struct intel_uncore *uncore,
>                            struct intel_gt *gt);
> -- 
> 2.37.3
> 

Reply via email to