On 30.09.2022 17:45, Matt Roper wrote:
> MTL once again changes the multicast register types and steering
> details.  Key changes from past platforms:
>  * The number of instances of some MCR types (NODE, OAAL2, and GAM) vary
>    according to the MTL subplatform and cannot be read from fuse
>    registers.
>  * The MCR steering register (and its bitfields) has changed.
> 
> Unlike past platforms, we will be explicitly steering all types of MCR
> accesses, including those for "SLICE" and "DSS" ranges; we no longer
> rely on implicit steering.  On previous platforms, various
> hardware/firmware agents that needed to access registers typically had
> their own steering control registers, allowing them to perform multicast
> steering without clobbering the CPU/kernel steering.  Starting with MTL,
> more of these agents now share a single steering register (0xFD4) and it
> is no longer safe for us to assume that the value will remain unchanged
> from how we initialized it during startup.  There is also a slight
> chance of race conditions between the driver and a hardware/firmware
> agent, so the hardware provides a semaphore register that can be used to
> coordinate access to the steering register.  Support for the semaphore
> register will be introduced in a future patch.
> 
> Bspec: 67788, 67112
> Cc: Radhakrishna Sripada <radhakrishna.srip...@intel.com>
> Signed-off-by: Matt Roper <matthew.d.ro...@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_mcr.c      | 85 ++++++++++++++++++---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h     |  5 ++
>  drivers/gpu/drm/i915/gt/intel_gt_types.h    |  8 +-
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 18 ++++-
>  drivers/gpu/drm/i915/i915_pci.c             |  1 +
>  5 files changed, 102 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> index 9e2caba64f19..a1fa71b47831 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> @@ -41,6 +41,7 @@ static const char * const intel_steering_types[] = {
>       "MSLICE",
>       "LNCF",
>       "GAM",
> +     "DSS",
>       "INSTANCE 0",
>  };
>  
> @@ -99,9 +100,40 @@ static const struct intel_mmio_range 
> pvc_instance0_steering_table[] = {
>       {},
>  };
>  
> +static const struct intel_mmio_range mtl3d_instance0_steering_table[] = {
> +     { 0x000B00, 0x000BFF },         /* SQIDI */
> +     { 0x001000, 0x001FFF },         /* SQIDI */
> +     { 0x004000, 0x0048FF },         /* GAM */
> +     { 0x008700, 0x0087FF },         /* SQIDI */
> +     { 0x00B000, 0x00B0FF },         /* NODE */
> +     { 0x00C800, 0x00CFFF },         /* GAM */
> +     { 0x00D880, 0x00D8FF },         /* NODE */
> +     { 0x00DD00, 0x00DDFF },         /* OAAL2 */
> +     {},
> +};
> +
> +static const struct intel_mmio_range mtl3d_l3bank_steering_table[] = {
> +     { 0x00B100, 0x00B3FF },
> +     {},
> +};
> +
> +/* DSS steering is used for SLICE ranges as well */
> +static const struct intel_mmio_range mtl3d_dss_steering_table[] = {
> +     { 0x005200, 0x0052FF },         /* SLICE */
> +     { 0x005500, 0x007FFF },         /* SLICE */
> +     { 0x008140, 0x00815F },         /* SLICE (0x8140-0x814F), DSS 
> (0x8150-0x815F) */
> +     { 0x0094D0, 0x00955F },         /* SLICE (0x94D0-0x951F), DSS 
> (0x9520-0x955F) */
> +     { 0x009680, 0x0096FF },         /* DSS */
> +     { 0x00D800, 0x00D87F },         /* SLICE */
> +     { 0x00DC00, 0x00DCFF },         /* SLICE */
> +     { 0x00DE80, 0x00E8FF },         /* DSS (0xE000-0xE0FF reserved) */
> +};
> +
>  void intel_gt_mcr_init(struct intel_gt *gt)
>  {
>       struct drm_i915_private *i915 = gt->i915;
> +     unsigned long fuse;
> +     int i;
>  
>       /*
>        * An mslice is unavailable only if both the meml3 for the slice is
> @@ -119,7 +151,22 @@ void intel_gt_mcr_init(struct intel_gt *gt)
>                       drm_warn(&i915->drm, "mslice mask all zero!\n");
>       }
>  
> -     if (IS_PONTEVECCHIO(i915)) {
> +     if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70) &&
> +         gt->type == GT_PRIMARY) {
> +             fuse = REG_FIELD_GET(GT_L3_EXC_MASK,
> +                                  intel_uncore_read(gt->uncore, XEHP_FUSE4));
> +
> +             /*
> +              * Despite the register field being named "exclude mask" the
> +              * bits actually represent enabled banks (two banks per bit).
> +              */
> +             for_each_set_bit(i, &fuse, 3)
> +                     gt->info.l3bank_mask |= (0x3 << 2*i);
> +
> +             gt->steering_table[INSTANCE0] = mtl3d_instance0_steering_table;
> +             gt->steering_table[L3BANK] = mtl3d_l3bank_steering_table;
> +             gt->steering_table[DSS] = mtl3d_dss_steering_table;
> +     } else if (IS_PONTEVECCHIO(i915)) {
>               gt->steering_table[INSTANCE0] = pvc_instance0_steering_table;
>       } else if (IS_DG2(i915)) {
>               gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
> @@ -184,7 +231,12 @@ static u32 rw_with_mcr_steering_fw(struct intel_uncore 
> *uncore,
>  
>       lockdep_assert_held(&uncore->lock);
>  
> -     if (GRAPHICS_VER(uncore->i915) >= 11) {
> +     if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70)) {
> +             /* No need to save old steering reg value */
> +             intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR,
> +                                   REG_FIELD_PREP(MTL_MCR_GROUPID, group) |
> +                                   REG_FIELD_PREP(MTL_MCR_INSTANCEID, 
> instance));

I think we need to clear the MULTICAST bit in the MTL_MCR_SELECTOR
register here since we are doing unicast r/w.

And in the functions intel_gt_mcr_multicast_write() and
intel_gt_mcr_multicast_write_fw(), we need to set the MULTICAST bit
in the MTL_MCR_SELECTOR register for MTL since we no more rely on
default values for the MTL_MCR_SELECTOR.

Can I also suggest to optimize the groupid and instanceid calculation
during non-terminating unicast MCR read/write by caching the groupid and
instanceid in some variables instead of calculating it everytime?
Because from MTL we have replaced all MCR register reads with implicit
steering to intel_gt_mcr_read_any/_fw call, and each call of that
function does groupip/instanceid calculation.

Regards,
Bala

> +     } else if (GRAPHICS_VER(uncore->i915) >= 11) {
>               mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
>               mcr_ss = GEN11_MCR_SLICE(group) | GEN11_MCR_SUBSLICE(instance);
>  
> @@ -202,26 +254,30 @@ static u32 rw_with_mcr_steering_fw(struct intel_uncore 
> *uncore,
>                */
>               if (rw_flag == FW_REG_WRITE)
>                       mcr_mask |= GEN11_MCR_MULTICAST;
> +
> +             old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
> +
> +             mcr &= ~mcr_mask;
> +             mcr |= mcr_ss;
> +             intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
>       } else {
>               mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
>               mcr_ss = GEN8_MCR_SLICE(group) | GEN8_MCR_SUBSLICE(instance);
> -     }
>  
> -     old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
> +             old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
>  
> -     mcr &= ~mcr_mask;
> -     mcr |= mcr_ss;
> -     intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
> +             mcr &= ~mcr_mask;
> +             mcr |= mcr_ss;
> +             intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
> +     }
>  
>       if (rw_flag == FW_REG_READ)
>               val = intel_uncore_read_fw(uncore, mcr_reg_cast(reg));
>       else
>               intel_uncore_write_fw(uncore, mcr_reg_cast(reg), value);
>  
> -     mcr &= ~mcr_mask;
> -     mcr |= old_mcr & mcr_mask;
> -
> -     intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
> +     if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 70))
> +             intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
>  
>       return val;
>  }
> @@ -385,6 +441,8 @@ static void get_nonterminated_steering(struct intel_gt 
> *gt,
>                                      enum intel_steering_type type,
>                                      u8 *group, u8 *instance)
>  {
> +     u32 dss;
> +
>       switch (type) {
>       case L3BANK:
>               *group = 0;             /* unused */
> @@ -408,6 +466,11 @@ static void get_nonterminated_steering(struct intel_gt 
> *gt,
>               *group = IS_DG2(gt->i915) ? 1 : 0;
>               *instance = 0;
>               break;
> +     case DSS:
> +             dss = intel_sseu_find_first_xehp_dss(&gt->info.sseu, 0, 0);
> +             *group = dss / GEN_DSS_PER_GSLICE;
> +             *instance = dss % GEN_DSS_PER_GSLICE;
> +             break;
>       case INSTANCE0:
>               /*
>                * There are a lot of MCR types for which instance (0, 0)
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index b7341f01ec9f..c5b9671097e3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -59,6 +59,7 @@
>  #define GMD_ID_MEDIA                         _MMIO(MTL_MEDIA_GSI_BASE + 
> 0xd8c)
>  
>  #define MCFG_MCR_SELECTOR                    _MMIO(0xfd0)
> +#define MTL_MCR_SELECTOR                     _MMIO(0xfd4)
>  #define SF_MCR_SELECTOR                              _MMIO(0xfd8)
>  #define GEN8_MCR_SELECTOR                    _MMIO(0xfdc)
>  #define GAM_MCR_SELECTOR                     _MMIO(0xfe0)
> @@ -71,6 +72,8 @@
>  #define   GEN11_MCR_SLICE_MASK                       GEN11_MCR_SLICE(0xf)
>  #define   GEN11_MCR_SUBSLICE(subslice)               (((subslice) & 0x7) << 
> 24)
>  #define   GEN11_MCR_SUBSLICE_MASK            GEN11_MCR_SUBSLICE(0x7)
> +#define   MTL_MCR_GROUPID                    REG_GENMASK(11, 8)
> +#define   MTL_MCR_INSTANCEID                 REG_GENMASK(3, 0)
>  
>  #define IPEIR_I965                           _MMIO(0x2064)
>  #define IPEHR_I965                           _MMIO(0x2068)
> @@ -531,6 +534,8 @@
>  #define   GEN6_MBCTL_BOOT_FETCH_MECH         (1 << 0)
>  
>  /* Fuse readout registers for GT */
> +#define XEHP_FUSE4                           _MMIO(0x9114)
> +#define   GT_L3_EXC_MASK                     REG_GENMASK(6, 4)
>  #define      GEN10_MIRROR_FUSE3                      _MMIO(0x9118)
>  #define   GEN10_L3BANK_PAIR_COUNT            4
>  #define   GEN10_L3BANK_MASK                  0x0F
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index 30003d68fd51..77ecbd6be331 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -60,6 +60,7 @@ enum intel_steering_type {
>       MSLICE,
>       LNCF,
>       GAM,
> +     DSS,
>  
>       /*
>        * On some platforms there are multiple types of MCR registers that
> @@ -255,8 +256,6 @@ struct intel_gt {
>  
>               intel_engine_mask_t engine_mask;
>  
> -             u32 l3bank_mask;
> -
>               u8 num_engines;
>  
>               /* General presence of SFC units */
> @@ -268,7 +267,10 @@ struct intel_gt {
>               /* Slice/subslice/EU info */
>               struct sseu_dev_info sseu;
>  
> -             unsigned long mslice_mask;
> +             union {
> +                     unsigned long mslice_mask;
> +                     unsigned long l3bank_mask;
> +             };
>  
>               /** @hwconfig: hardware configuration data */
>               struct intel_hwconfig hwconfig;
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 174c74dcda99..9be048da7fb3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -1581,12 +1581,28 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct 
> i915_wa_list *wal)
>       wa_mcr_write_clr(wal, GEN8_MISCCPCTL, 
> GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
>  }
>  
> +static void
> +mtl_3d_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
> +{
> +     /*
> +      * Unlike older platforms, we no longer setup implicit steering here;
> +      * all MCR accesses are explicitly steered.
> +      */
> +     if (drm_debug_enabled(DRM_UT_DRIVER)) {
> +             struct drm_printer p = drm_debug_printer("MCR Steering:");
> +
> +             intel_gt_mcr_report_steering(&p, gt, false);
> +     }
> +}
> +
>  static void
>  gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
>  {
>       struct drm_i915_private *i915 = gt->i915;
>  
> -     if (IS_PONTEVECCHIO(i915))
> +     if (IS_METEORLAKE(i915) && gt->type == GT_PRIMARY)
> +             mtl_3d_gt_workarounds_init(gt, wal);
> +     else if (IS_PONTEVECCHIO(i915))
>               pvc_gt_workarounds_init(gt, wal);
>       else if (IS_DG2(i915))
>               dg2_gt_workarounds_init(gt, wal);
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 38460a0bd7cb..bd1d8e0205a6 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -1145,6 +1145,7 @@ static const struct intel_device_info mtl_info = {
>       .extra_gt_list = xelpmp_extra_gt,
>       .has_flat_ccs = 0,
>       .has_gmd_id = 1,
> +     .has_mslice_steering = 0,
>       .has_snoop = 1,
>       .__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM,
>       .__runtime.platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(CCS0),
> -- 
> 2.37.3
> 

Reply via email to