i915: Add second slice l3 remapping

Bell, Bryan J Tue, 17 Sep 2013 12:19:01 -0700

> -----Original Message-----
> From: Ville Syrjälä [mailto:ville.syrj...@linux.intel.com] 
> Sent: Tuesday, September 17, 2013 12:02 PM
> To: Bell, Bryan J
> Cc: Ben Widawsky; Widawsky, Benjamin; intel-gfx@lists.freedesktop.org; 
> Venkatesh, Vishnu
> Subject: Re: [Intel-gfx] [PATCH 5/8] drm/i915: Add second slice l3 remapping
> 
> On Tue, Sep 17, 2013 at 06:51:31PM +0000, Bell, Bryan J wrote:
> > >> > +      spin_lock_irqsave(&dev_priv->irq_lock, flags);
> > >> > +      ilk_enable_gt_irq(dev_priv, GT_PARITY_ERROR);
> > >> 
> > >> Is it actually safe to enable the second slice irq when there's no 
> > >> second slice? This docs say it's just "reserved", but no mention 
> > >> whether it RO or could there be side effects.
> > 
> > >Tests on my machine appear to work. But I don't know for certain. Bryan, 
> > >could you answer this?
> > 
> > On the Windows driver we enable the IRQ on all HSW skus and haven't seen 
> > any issues. 
> 
> This code would enable it for IVB too. Any data on that?


No data on IVB, I recommend against enabling it on IVB. 

FYI: My understanding is that L3 DPF code and or interrupts should be disabled 
on VLV. 
VLV does not support dynamic L3 row replacement. 

> > 
> > -----Original Message-----
> > From: Ben Widawsky [mailto:b...@bwidawsk.net]
> > Sent: Tuesday, September 17, 2013 11:46 AM
> > To: Ville Syrjälä; Bell, Bryan J
> > Cc: Widawsky, Benjamin; intel-gfx@lists.freedesktop.org; Venkatesh, 
> > Vishnu
> > Subject: Re: [Intel-gfx] [PATCH 5/8] drm/i915: Add second slice l3 
> > remapping
> > 
> > On Fri, Sep 13, 2013 at 12:38:01PM +0300, Ville Syrjälä wrote:
> > > On Thu, Sep 12, 2013 at 10:28:31PM -0700, Ben Widawsky wrote:
> > > > Certain HSW SKUs have a second bank of L3. This L3 remapping has a 
> > > > separate register set, and interrupt from the first "slice". A 
> > > > slice is simply a term to define some subset of the GPU's l3 
> > > > cache. This patch implements both the interrupt handler, and 
> > > > ability to communicate with userspace about this second slice.
> > > > 
> > > > Signed-off-by: Ben Widawsky <b...@bwidawsk.net>
> > > > ---
> > > >  drivers/gpu/drm/i915/i915_drv.h         |  9 ++--
> > > >  drivers/gpu/drm/i915/i915_gem.c         | 26 ++++++----
> > > >  drivers/gpu/drm/i915/i915_irq.c         | 84 
> > > > +++++++++++++++++++++------------
> > > >  drivers/gpu/drm/i915/i915_reg.h         |  6 +++
> > > >  drivers/gpu/drm/i915/i915_sysfs.c       | 34 ++++++++++---
> > > >  drivers/gpu/drm/i915/intel_ringbuffer.c |  6 +--
> > > >  include/uapi/drm/i915_drm.h             |  8 ++--
> > > >  7 files changed, 115 insertions(+), 58 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h 
> > > > b/drivers/gpu/drm/i915/i915_drv.h index 81ba5bb..eb90461 100644
> > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > @@ -918,9 +918,11 @@ struct i915_ums_state {
> > > >         int mm_suspended;
> > > >  };
> > > >  
> > > > +#define MAX_L3_SLICES 2
> > > >  struct intel_l3_parity {
> > > > -       u32 *remap_info;
> > > > +       u32 *remap_info[MAX_L3_SLICES];
> > > >         struct work_struct error_work;
> > > > +       int which_slice;
> > > >  };
> > > >  
> > > >  struct i915_gem_mm {
> > > > @@ -1686,7 +1688,8 @@ struct drm_i915_file_private {
> > > >  
> > > >  #define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake)
> > > >  
> > > > -#define HAS_L3_GPU_CACHE(dev) (IS_IVYBRIDGE(dev) ||
> > > > IS_HASWELL(dev))
> > > > +#define HAS_L3_GPU_CACHE(dev) (INTEL_INFO(dev)->gen >= 7) #define
> > > > +NUM_L3_SLICES(dev) (IS_HSW_GT3(dev) ? 2 : HAS_L3_GPU_CACHE(dev))
> > > >  
> > > >  #define GT_FREQUENCY_MULTIPLIER 50
> > > >  
> > > > @@ -1947,7 +1950,7 @@ bool i915_gem_clflush_object(struct 
> > > > drm_i915_gem_object *obj, bool force);  int __must_check 
> > > > i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);  int 
> > > > __must_check i915_gem_init(struct drm_device *dev);  int 
> > > > __must_check i915_gem_init_hw(struct drm_device *dev); -void 
> > > > i915_gem_l3_remap(struct drm_device *dev);
> > > > +void i915_gem_l3_remap(struct drm_device *dev, int slice);
> > > >  void i915_gem_init_swizzling(struct drm_device *dev);  void 
> > > > i915_gem_cleanup_ringbuffer(struct drm_device *dev);  int 
> > > > __must_check i915_gpu_idle(struct drm_device *dev); diff --git 
> > > > a/drivers/gpu/drm/i915/i915_gem.c 
> > > > b/drivers/gpu/drm/i915/i915_gem.c index 5b510a3..b11f7d6c 100644
> > > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > > @@ -4256,16 +4256,21 @@ i915_gem_idle(struct drm_device *dev)
> > > >         return 0;
> > > >  }
> > > >  
> > > > -void i915_gem_l3_remap(struct drm_device *dev)
> > > > +void i915_gem_l3_remap(struct drm_device *dev, int slice)
> > > >  {
> > > >         drm_i915_private_t *dev_priv = dev->dev_private;
> > > > +       u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
> > > > +       u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
> > > >         u32 misccpctl;
> > > >         int i;
> > > >  
> > > >         if (!HAS_L3_GPU_CACHE(dev))
> > > >                 return;
> > > >  
> > > > -       if (!dev_priv->l3_parity.remap_info)
> > > > +       if (NUM_L3_SLICES(dev) < 2 && slice)
> > > > +               return;
> > > 
> > > This check is redundant as we should never populate 
> > > l3_parity.remap_info[1] when there's no second slice.
> > > 
> > 
> > Got it. Smashed the early exit check together while at it.
> > 
> > > > +
> > > > +       if (!remap_info)
> > > >                 return;
> > > >  
> > > >         misccpctl = I915_READ(GEN7_MISCCPCTL); @@ -4273,17 +4278,17 @@ 
> > > > void i915_gem_l3_remap(struct drm_device *dev)
> > > >         POSTING_READ(GEN7_MISCCPCTL);
> > > >  
> > > >         for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
> > > > -               u32 remap = I915_READ(GEN7_L3LOG_BASE + i);
> > > > -               if (remap && remap != 
> > > > dev_priv->l3_parity.remap_info[i/4])
> > > > +               u32 remap = I915_READ(reg_base + i);
> > > > +               if (remap && remap != remap_info[i/4])
> > > >                         DRM_DEBUG("0x%x was already programmed to %x\n",
> > > > -                                 GEN7_L3LOG_BASE + i, remap);
> > > > -               if (remap && !dev_priv->l3_parity.remap_info[i/4])
> > > > +                                 reg_base + i, remap);
> > > > +               if (remap && !remap_info[i/4])
> > > >                         DRM_DEBUG_DRIVER("Clearing remapped 
> > > > register\n");
> > > > -               I915_WRITE(GEN7_L3LOG_BASE + i, 
> > > > dev_priv->l3_parity.remap_info[i/4]);
> > > > +               I915_WRITE(reg_base + i, remap_info[i/4]);
> > > >         }
> > > >  
> > > >         /* Make sure all the writes land before disabling dop clock 
> > > > gating */
> > > > -       POSTING_READ(GEN7_L3LOG_BASE);
> > > > +       POSTING_READ(reg_base);
> > > >  
> > > >         I915_WRITE(GEN7_MISCCPCTL, misccpctl);  } @@ -4377,7 +4382,7 @@ 
> > > > int  i915_gem_init_hw(struct drm_device *dev)  {
> > > >         drm_i915_private_t *dev_priv = dev->dev_private;
> > > > -       int ret;
> > > > +       int ret, i;
> > > >  
> > > >         if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
> > > >                 return -EIO;
> > > > @@ -4396,7 +4401,8 @@ i915_gem_init_hw(struct drm_device *dev)
> > > >                 I915_WRITE(GEN7_MSG_CTL, temp);
> > > >         }
> > > >  
> > > > -       i915_gem_l3_remap(dev);
> > > > +       for (i = 0; i < NUM_L3_SLICES(dev); i++)
> > > > +               i915_gem_l3_remap(dev, i);
> > > >  
> > > >         i915_gem_init_swizzling(dev);
> > > >  
> > > > diff --git a/drivers/gpu/drm/i915/i915_irq.c 
> > > > b/drivers/gpu/drm/i915/i915_irq.c index 13d26cf..62cdf05 100644
> > > > --- a/drivers/gpu/drm/i915/i915_irq.c
> > > > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > > > @@ -882,9 +882,10 @@ static void ivybridge_parity_work(struct 
> > > > work_struct *work)
> > > >         drm_i915_private_t *dev_priv = container_of(work, 
> > > > drm_i915_private_t,
> > > >                                                     
> > > > l3_parity.error_work);
> > > >         u32 error_status, row, bank, subbank;
> > > > -       char *parity_event[5];
> > > > +       char *parity_event[6];
> > > >         uint32_t misccpctl;
> > > >         unsigned long flags;
> > > > +       uint8_t slice = 0;
> > > >  
> > > >         /* We must turn off DOP level clock gating to access the L3 
> > > > registers.
> > > >          * In order to prevent a get/put style interface, acquire 
> > > > struct 
> > > > mutex @@ -892,45 +893,63 @@ static void ivybridge_parity_work(struct 
> > > > work_struct *work)
> > > >          */
> > > >         mutex_lock(&dev_priv->dev->struct_mutex);
> > > >  
> > > > +       /* If we've screwed up tracking, just let the interrupt fire 
> > > > again */
> > > > +       if (WARN_ON(!dev_priv->l3_parity.which_slice))
> > > > +               goto out;
> > > > +
> > > >         misccpctl = I915_READ(GEN7_MISCCPCTL);
> > > >         I915_WRITE(GEN7_MISCCPCTL, misccpctl & 
> > > > ~GEN7_DOP_CLOCK_GATE_ENABLE);
> > > >         POSTING_READ(GEN7_MISCCPCTL);
> > > >  
> > > > -       error_status = I915_READ(GEN7_L3CDERRST1);
> > > > -       row = GEN7_PARITY_ERROR_ROW(error_status);
> > > > -       bank = GEN7_PARITY_ERROR_BANK(error_status);
> > > > -       subbank = GEN7_PARITY_ERROR_SUBBANK(error_status);
> > > > +       while ((slice = ffs(dev_priv->l3_parity.which_slice)) != 0) {
> > > > +               u32 reg;
> > > >  
> > > > -       I915_WRITE(GEN7_L3CDERRST1, GEN7_PARITY_ERROR_VALID |
> > > > -                                   GEN7_L3CDERRST1_ENABLE);
> > > > -       POSTING_READ(GEN7_L3CDERRST1);
> > > > +               if (WARN_ON(slice >= MAX_L3_SLICES))
> > > > +                       break;
> > > 
> > > Could be >= NUM_L3_SLICES(dev) for a bit of extra paranoia. Also we 
> > > would fail to clear invalid bits from which_slice in this case, and 
> > > thus we'd get the WARN every time the work runs. But I guess this 
> > > should never happen in any case so probably not worth worrying about 
> > > this too much.
> > 
> > Not worth worrying, but I didn't mean to be so noisy. I've fixed this with 
> > WARN_ON_ONCE.
> > 
> > > 
> > > >  
> > > > -       I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> > > > +               dev_priv->l3_parity.which_slice &= ~(1<<slice);
> > > >  
> > > > -       spin_lock_irqsave(&dev_priv->irq_lock, flags);
> > > > -       ilk_enable_gt_irq(dev_priv, 
> > > > GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
> > > > -       spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
> > > > +               reg = GEN7_L3CDERRST1 + (slice * 0x200);
> > > >  
> > > > -       mutex_unlock(&dev_priv->dev->struct_mutex);
> > > > +               error_status = I915_READ(reg);
> > > > +               row = GEN7_PARITY_ERROR_ROW(error_status);
> > > > +               bank = GEN7_PARITY_ERROR_BANK(error_status);
> > > > +               subbank = GEN7_PARITY_ERROR_SUBBANK(error_status);
> > > > +
> > > > +               I915_WRITE(reg, GEN7_PARITY_ERROR_VALID | 
> > > > GEN7_L3CDERRST1_ENABLE);
> > > > +               POSTING_READ(reg);
> > > > +
> > > > +               parity_event[0] = I915_L3_PARITY_UEVENT "=1";
> > > > +               parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row);
> > > > +               parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", 
> > > > bank);
> > > > +               parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", 
> > > > subbank);
> > > > +               parity_event[4] = kasprintf(GFP_KERNEL, "SLICE=%d", 
> > > > slice);
> > > > +               parity_event[5] = NULL;
> > > >  
> > > > -       parity_event[0] = I915_L3_PARITY_UEVENT "=1";
> > > > -       parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row);
> > > > -       parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank);
> > > > -       parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank);
> > > > -       parity_event[4] = NULL;
> > > > +               kobject_uevent_env(&dev_priv->dev->primary->kdev.kobj,
> > > > +                                  KOBJ_CHANGE, parity_event);
> > > >  
> > > > -       kobject_uevent_env(&dev_priv->dev->primary->kdev.kobj,
> > > > -                          KOBJ_CHANGE, parity_event);
> > > > +               DRM_DEBUG("Parity error: Slice = %d, Row = %d, Bank = 
> > > > %d, Sub bank = %d.\n",
> > > > +                         slice, row, bank, subbank);
> > > >  
> > > > -       DRM_DEBUG("Parity error: Row = %d, Bank = %d, Sub bank = %d.\n",
> > > > -                 row, bank, subbank);
> > > > +               kfree(parity_event[4]);
> > > > +               kfree(parity_event[3]);
> > > > +               kfree(parity_event[2]);
> > > > +               kfree(parity_event[1]);
> > > > +       }
> > > > +
> > > > +       I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> > > > +
> > > > +out:
> > > > +       WARN_ON(dev_priv->l3_parity.which_slice);
> > > 
> > > First I figured the irq could rearm this behind our back, but we 
> > > disable the irq until the work is done. So yeah, this is fine.
> > > 
> > > > +       spin_lock_irqsave(&dev_priv->irq_lock, flags);
> > > > +       ilk_enable_gt_irq(dev_priv, GT_PARITY_ERROR);
> > > 
> > > Is it actually safe to enable the second slice irq when there's no 
> > > second slice? This docs say it's just "reserved", but no mention 
> > > whether it RO or could there be side effects.
> > 
> > Tests on my machine appear to work. But I don't know for certain. Bryan, 
> > could you answer this?
> > 
> > > 
> > > > +       spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
> > > >  
> > > > -       kfree(parity_event[3]);
> > > > -       kfree(parity_event[2]);
> > > > -       kfree(parity_event[1]);
> > > > +       mutex_unlock(&dev_priv->dev->struct_mutex);
> > > >  }
> > > >  
> > > > -static void ivybridge_parity_error_irq_handler(struct drm_device
> > > > *dev)
> > > > +static void ivybridge_parity_error_irq_handler(struct drm_device 
> > > > +*dev, u32 iir)
> > > >  {
> > > >         drm_i915_private_t *dev_priv = (drm_i915_private_t *)
> > > > dev->dev_private;
> > > >  
> > > > @@ -938,9 +957,12 @@ static void 
> > > > ivybridge_parity_error_irq_handler(struct drm_device *dev)
> > > >                 return;
> > > >  
> > > >         spin_lock(&dev_priv->irq_lock);
> > > > -       ilk_disable_gt_irq(dev_priv, 
> > > > GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
> > > > +       ilk_disable_gt_irq(dev_priv, GT_PARITY_ERROR);
> > > >         spin_unlock(&dev_priv->irq_lock);
> > > >  
> > > > +       iir &= GT_PARITY_ERROR;
> > > > +       dev_priv->l3_parity.which_slice =
> > > > +               1 << (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 ? 1 
> > > > : 0);
> > > 
> > > What if both slices report an error at the same time?
> > 
> > I was thinking that such an event can not occur, but on rethinking it 
> > you are right that it's possible. I really hope this never happens, 
> > but it's fixed. Anyway, it should have been |=, not =
> > 
> > 
> > [snip]
> > 
> > I'll resend the patch after Bryan answers the question about both 
> > interrupts.
> > 
> > --
> > Ben Widawsky, Intel Open Source Technology Center
> 
> --
> Ville Syrjälä
> Intel OTC

--Regards
Bryan
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 5/8] drm/i915: Add second slice l3 remapping

Reply via email to