On 14.10.2022 16:02, Matt Roper wrote:
> Rather than relying on the implicit behavior of intel_uncore_*()
> functions, let's always use the intel_gt_mcr_*() functions to operate on
> multicast/replicated registers.
> 
> v2:
>  - Add TLB invalidation registers
> 
> v3:
>  - Switch more uncore operations in mmio_invalidate_full() to MCR
>    operations for Xe_HP.  (Bala)
> 
> Cc: Balasubramani Vivekanandan <balasubramani.vivekanan...@intel.com>
> Signed-off-by: Matt Roper <matthew.d.ro...@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_gt.c        | 58 ++++++++++++++++-------
>  drivers/gpu/drm/i915/gt/intel_mocs.c      | 13 ++---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 12 +++--
>  drivers/gpu/drm/i915/intel_pm.c           | 19 ++++----
>  4 files changed, 65 insertions(+), 37 deletions(-)

Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanan...@intel.com>

Regards,
Bala
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
> b/drivers/gpu/drm/i915/gt/intel_gt.c
> index e14f159ad9fc..3df0d0336dbc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -1017,6 +1017,32 @@ get_reg_and_bit(const struct intel_engine_cs *engine, 
> const bool gen8,
>       return rb;
>  }
>  
> +/*
> + * HW architecture suggest typical invalidation time at 40us,
> + * with pessimistic cases up to 100us and a recommendation to
> + * cap at 1ms. We go a bit higher just in case.
> + */
> +#define TLB_INVAL_TIMEOUT_US 100
> +#define TLB_INVAL_TIMEOUT_MS 4
> +
> +/*
> + * On Xe_HP the TLB invalidation registers are located at the same MMIO 
> offsets
> + * but are now considered MCR registers.  Since they exist within a GAM 
> range,
> + * the primary instance of the register rolls up the status from each unit.
> + */
> +static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
> +{
> +     if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
> +             return intel_gt_mcr_wait_for_reg_fw(gt, rb.reg, rb.bit, 0,
> +                                                 TLB_INVAL_TIMEOUT_US,
> +                                                 TLB_INVAL_TIMEOUT_MS);
> +     else
> +             return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 
> 0,
> +                                                 TLB_INVAL_TIMEOUT_US,
> +                                                 TLB_INVAL_TIMEOUT_MS,
> +                                                 NULL);
> +}
> +
>  static void mmio_invalidate_full(struct intel_gt *gt)
>  {
>       static const i915_reg_t gen8_regs[] = {
> @@ -1048,7 +1074,7 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>       unsigned int num = 0;
>  
>       if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> -             regs = xehp_regs;
> +             regs = NULL;
>               num = ARRAY_SIZE(xehp_regs);
>       } else if (GRAPHICS_VER(i915) == 12) {
>               regs = gen12_regs;
> @@ -1075,11 +1101,17 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>               if (!intel_engine_pm_is_awake(engine))
>                       continue;
>  
> -             rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
> -             if (!i915_mmio_reg_offset(rb.reg))
> -                     continue;
> +             if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> +                     intel_gt_mcr_multicast_write_fw(gt,
> +                                                     
> xehp_regs[engine->class],
> +                                                     BIT(engine->instance));
> +             } else {
> +                     rb = get_reg_and_bit(engine, regs == gen8_regs, regs, 
> num);
> +                     if (!i915_mmio_reg_offset(rb.reg))
> +                             continue;
>  
> -             intel_uncore_write_fw(uncore, rb.reg, rb.bit);
> +                     intel_uncore_write_fw(uncore, rb.reg, rb.bit);
> +             }
>               awake |= engine->mask;
>       }
>  
> @@ -1099,22 +1131,12 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>       for_each_engine_masked(engine, gt, awake, tmp) {
>               struct reg_and_bit rb;
>  
> -             /*
> -              * HW architecture suggest typical invalidation time at 40us,
> -              * with pessimistic cases up to 100us and a recommendation to
> -              * cap at 1ms. We go a bit higher just in case.
> -              */
> -             const unsigned int timeout_us = 100;
> -             const unsigned int timeout_ms = 4;
> -
>               rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
> -             if (__intel_wait_for_register_fw(uncore,
> -                                              rb.reg, rb.bit, 0,
> -                                              timeout_us, timeout_ms,
> -                                              NULL))
> +
> +             if (wait_for_invalidate(gt, rb))
>                       drm_err_ratelimited(&gt->i915->drm,
>                                           "%s TLB invalidation did not 
> complete in %ums!\n",
> -                                         engine->name, timeout_ms);
> +                                         engine->name, TLB_INVAL_TIMEOUT_MS);
>       }
>  
>       /*
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
> b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index ecfa5baa5e3f..49fdd509527a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -7,6 +7,7 @@
>  
>  #include "intel_engine.h"
>  #include "intel_gt.h"
> +#include "intel_gt_mcr.h"
>  #include "intel_gt_regs.h"
>  #include "intel_mocs.h"
>  #include "intel_ring.h"
> @@ -609,17 +610,17 @@ static u32 l3cc_combine(u16 low, u16 high)
>            0; \
>            i++)
>  
> -static void init_l3cc_table(struct intel_uncore *uncore,
> +static void init_l3cc_table(struct intel_gt *gt,
>                           const struct drm_i915_mocs_table *table)
>  {
>       unsigned int i;
>       u32 l3cc;
>  
>       for_each_l3cc(l3cc, table, i)
> -             if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 50))
> -                     intel_uncore_write_fw(uncore, XEHP_LNCFCMOCS(i), l3cc);
> +             if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
> +                     intel_gt_mcr_multicast_write_fw(gt, XEHP_LNCFCMOCS(i), 
> l3cc);
>               else
> -                     intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc);
> +                     intel_uncore_write_fw(gt->uncore, GEN9_LNCFCMOCS(i), 
> l3cc);
>  }
>  
>  void intel_mocs_init_engine(struct intel_engine_cs *engine)
> @@ -639,7 +640,7 @@ void intel_mocs_init_engine(struct intel_engine_cs 
> *engine)
>               init_mocs_table(engine, &table);
>  
>       if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS)
> -             init_l3cc_table(engine->uncore, &table);
> +             init_l3cc_table(engine->gt, &table);
>  }
>  
>  static u32 global_mocs_offset(void)
> @@ -675,7 +676,7 @@ void intel_mocs_init(struct intel_gt *gt)
>        * memory transactions including guc transactions
>        */
>       if (flags & HAS_RENDER_L3CC)
> -             init_l3cc_table(gt->uncore, &table);
> +             init_l3cc_table(gt, &table);
>  }
>  
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> index 9229243992c2..5b86b2e286e0 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> @@ -10,12 +10,15 @@
>   */
>  
>  #include "gt/intel_gt.h"
> +#include "gt/intel_gt_mcr.h"
>  #include "gt/intel_gt_regs.h"
>  #include "intel_guc_fw.h"
>  #include "i915_drv.h"
>  
> -static void guc_prepare_xfer(struct intel_uncore *uncore)
> +static void guc_prepare_xfer(struct intel_gt *gt)
>  {
> +     struct intel_uncore *uncore = gt->uncore;
> +
>       u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
>                        GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
>                        GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
> @@ -35,8 +38,9 @@ static void guc_prepare_xfer(struct intel_uncore *uncore)
>  
>       if (GRAPHICS_VER(uncore->i915) == 9) {
>               /* DOP Clock Gating Enable for GuC clocks */
> -             intel_uncore_rmw(uncore, GEN8_MISCCPCTL,
> -                              0, GEN8_DOP_CLOCK_GATE_GUC_ENABLE);
> +             intel_gt_mcr_multicast_write(gt, GEN8_MISCCPCTL,
> +                                          GEN8_DOP_CLOCK_GATE_GUC_ENABLE |
> +                                          intel_gt_mcr_read_any(gt, 
> GEN8_MISCCPCTL));
>  
>               /* allows for 5us (in 10ns units) before GT can go to RC6 */
>               intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF);
> @@ -168,7 +172,7 @@ int intel_guc_fw_upload(struct intel_guc *guc)
>       struct intel_uncore *uncore = gt->uncore;
>       int ret;
>  
> -     guc_prepare_xfer(uncore);
> +     guc_prepare_xfer(gt);
>  
>       /*
>        * Note that GuC needs the CSS header plus uKernel code to be copied
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 390802245514..cb18e45f6adf 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -30,6 +30,8 @@
>  #include "display/skl_watermark.h"
>  
>  #include "gt/intel_engine_regs.h"
> +#include "gt/intel_gt.h"
> +#include "gt/intel_gt_mcr.h"
>  #include "gt/intel_gt_regs.h"
>  
>  #include "i915_drv.h"
> @@ -4321,22 +4323,22 @@ static void gen8_set_l3sqc_credits(struct 
> drm_i915_private *dev_priv,
>       u32 val;
>  
>       /* WaTempDisableDOPClkGating:bdw */
> -     misccpctl = intel_uncore_rmw(&dev_priv->uncore, GEN8_MISCCPCTL, 
> ~GEN8_DOP_CLOCK_GATE_ENABLE,
> -                                  0);
> +     misccpctl = intel_gt_mcr_multicast_rmw(to_gt(dev_priv), GEN8_MISCCPCTL,
> +                                            ~GEN8_DOP_CLOCK_GATE_ENABLE, 0);
>  
> -     val = intel_uncore_read(&dev_priv->uncore, GEN8_L3SQCREG1);
> +     val = intel_gt_mcr_read_any(to_gt(dev_priv), GEN8_L3SQCREG1);
>       val &= ~L3_PRIO_CREDITS_MASK;
>       val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
>       val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
> -     intel_uncore_write(&dev_priv->uncore, GEN8_L3SQCREG1, val);
> +     intel_gt_mcr_multicast_write(to_gt(dev_priv), GEN8_L3SQCREG1, val);
>  
>       /*
>        * Wait at least 100 clocks before re-enabling clock gating.
>        * See the definition of L3SQCREG1 in BSpec.
>        */
> -     intel_uncore_posting_read(&dev_priv->uncore, GEN8_L3SQCREG1);
> +     intel_gt_mcr_read_any(to_gt(dev_priv), GEN8_L3SQCREG1);
>       udelay(1);
> -     intel_uncore_write(&dev_priv->uncore, GEN8_MISCCPCTL, misccpctl);
> +     intel_gt_mcr_multicast_write(to_gt(dev_priv), GEN8_MISCCPCTL, 
> misccpctl);
>  }
>  
>  static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
> @@ -4496,9 +4498,8 @@ static void skl_init_clock_gating(struct 
> drm_i915_private *dev_priv)
>       gen9_init_clock_gating(dev_priv);
>  
>       /* WaDisableDopClockGating:skl */
> -     intel_uncore_write(&dev_priv->uncore, GEN8_MISCCPCTL,
> -                        intel_uncore_read(&dev_priv->uncore, GEN8_MISCCPCTL) 
> &
> -                        ~GEN8_DOP_CLOCK_GATE_ENABLE);
> +     intel_gt_mcr_multicast_rmw(to_gt(dev_priv), GEN8_MISCCPCTL,
> +                                GEN8_DOP_CLOCK_GATE_ENABLE, 0);
>  
>       /* WAC6entrylatency:skl */
>       intel_uncore_write(&dev_priv->uncore, FBC_LLC_READ_CTRL, 
> intel_uncore_read(&dev_priv->uncore, FBC_LLC_READ_CTRL) |
> -- 
> 2.37.3
> 

Reply via email to