On Thu, Jul 03, 2014 at 05:33:01PM -0400, Rodrigo Vivi wrote:
> From: Deepak S <deepa...@linux.intel.com>
> 
> With RC6 enabled, BYT has an HW issue in determining the right
> Gfx busyness.
> WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
> on increasing/decreasing the freq. This logic will monitor C0
> counters of render/media power-wells over EI period and takes
> necessary action based on these values
> 
> v2: Refactor duplicate code. (Ville)
> 
> v3: Reformat the comments. (Ville)
> 
> v4: Enable required counters and remove unwanted code (Ville)
> 
> v5: Added frequency change acceleration support and remove kernel-doc
> style comments. (Ville)
> 
> v6: Updated comment section and Fix w/a comment. (Ville)
> 
> Signed-off-by: Deepak S <deepa...@linux.intel.com>
> Reviewed-by: Ville Syrjälä <ville.syrj...@linux.intel.com>
> Signed-off-by: Rodrigo Vivi <rodrigo.v...@intel.com>

Queued for -next, thanks for the patch.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h |  15 +++++
>  drivers/gpu/drm/i915/i915_irq.c | 133 
> +++++++++++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_reg.h |  11 ++++
>  drivers/gpu/drm/i915/intel_pm.c |  12 +++-
>  4 files changed, 167 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1bf277e..db33a34 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -880,6 +880,12 @@ struct vlv_s0ix_state {
>       u32 clock_gate_dis2;
>  };
>  
> +struct intel_rps_ei_calc {
> +     u32 cz_ts_ei;
> +     u32 render_ei_c0;
> +     u32 media_ei_c0;
> +};
> +
>  struct intel_gen6_power_mgmt {
>       /* work and pm_iir are protected by dev_priv->irq_lock */
>       struct work_struct work;
> @@ -904,6 +910,8 @@ struct intel_gen6_power_mgmt {
>       u8 rp1_freq;            /* "less than" RP0 power/freqency */
>       u8 rp0_freq;            /* Non-overclocked max frequency. */
>  
> +     u32 ei_interrupt_count;
> +
>       int last_adj;
>       enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>  
> @@ -1504,6 +1512,13 @@ struct drm_i915_private {
>       /* gen6+ rps state */
>       struct intel_gen6_power_mgmt rps;
>  
> +     /* rps wa up ei calculation */
> +     struct intel_rps_ei_calc rps_up_ei;
> +
> +     /* rps wa down ei calculation */
> +     struct intel_rps_ei_calc rps_down_ei;
> +
> +
>       /* ilk-only ips/rps state. Everything in here is protected by the global
>        * mchdev_lock in intel_pm.c */
>       struct intel_ilk_power_mgmt ips;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 0217a41..7ae17af 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1225,6 +1225,131 @@ static void notify_ring(struct drm_device *dev,
>       i915_queue_hangcheck(dev);
>  }
>  
> +static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
> +                             struct  intel_rps_ei_calc *rps_ei)
> +{
> +     u32 cz_ts, cz_freq_khz;
> +     u32 render_count, media_count;
> +     u32 elapsed_render, elapsed_media, elapsed_time;
> +     u32 residency = 0;
> +
> +     cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
> +     cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
> +
> +     render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
> +     media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
> +
> +     if (rps_ei->cz_ts_ei == 0) {
> +             rps_ei->cz_ts_ei = cz_ts;
> +             rps_ei->render_ei_c0 = render_count;
> +             rps_ei->media_ei_c0 = media_count;
> +
> +             return dev_priv->rps.cur_freq;
> +     }
> +
> +     elapsed_time = cz_ts - rps_ei->cz_ts_ei;
> +     rps_ei->cz_ts_ei = cz_ts;
> +
> +     elapsed_render = render_count - rps_ei->render_ei_c0;
> +     rps_ei->render_ei_c0 = render_count;
> +
> +     elapsed_media = media_count - rps_ei->media_ei_c0;
> +     rps_ei->media_ei_c0 = media_count;
> +
> +     /* Convert all the counters into common unit of milli sec */
> +     elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
> +     elapsed_render /=  cz_freq_khz;
> +     elapsed_media /= cz_freq_khz;
> +
> +     /*
> +      * Calculate overall C0 residency percentage
> +      * only if elapsed time is non zero
> +      */
> +     if (elapsed_time) {
> +             residency =
> +                     ((max(elapsed_render, elapsed_media) * 100)
> +                             / elapsed_time);
> +     }
> +
> +     return residency;
> +}
> +
> +/**
> + * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
> + * busy-ness calculated from C0 counters of render & media power wells
> + * @dev_priv: DRM device private
> + *
> + */
> +static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
> +{
> +     u32 residency_C0_up = 0, residency_C0_down = 0;
> +     u8 new_delay, adj;
> +
> +     dev_priv->rps.ei_interrupt_count++;
> +
> +     WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> +
> +
> +     if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
> +             vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
> +             vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
> +             return dev_priv->rps.cur_freq;
> +     }
> +
> +
> +     /*
> +      * To down throttle, C0 residency should be less than down threshold
> +      * for continous EI intervals. So calculate down EI counters
> +      * once in VLV_INT_COUNT_FOR_DOWN_EI
> +      */
> +     if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
> +
> +             dev_priv->rps.ei_interrupt_count = 0;
> +
> +             residency_C0_down = vlv_c0_residency(dev_priv,
> +                                             &dev_priv->rps_down_ei);
> +     } else {
> +             residency_C0_up = vlv_c0_residency(dev_priv,
> +                                             &dev_priv->rps_up_ei);
> +     }
> +
> +     new_delay = dev_priv->rps.cur_freq;
> +
> +     adj = dev_priv->rps.last_adj;
> +     /* C0 residency is greater than UP threshold. Increase Frequency */
> +     if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
> +             if (adj > 0)
> +                     adj *= 2;
> +             else
> +                     adj = 1;
> +
> +             if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
> +                     new_delay = dev_priv->rps.cur_freq + adj;
> +
> +             /*
> +              * For better performance, jump directly
> +              * to RPe if we're below it.
> +              */
> +             if (new_delay < dev_priv->rps.efficient_freq)
> +                     new_delay = dev_priv->rps.efficient_freq;
> +
> +     } else if (!dev_priv->rps.ei_interrupt_count &&
> +                     (residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
> +             if (adj < 0)
> +                     adj *= 2;
> +             else
> +                     adj = -1;
> +             /*
> +              * This means, C0 residency is less than down threshold over
> +              * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
> +              */
> +             if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
> +                     new_delay = dev_priv->rps.cur_freq + adj;
> +     }
> +
> +     return new_delay;
> +}
> +
>  static void gen6_pm_rps_work(struct work_struct *work)
>  {
>       struct drm_i915_private *dev_priv =
> @@ -1273,6 +1398,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
>               else
>                       new_delay = dev_priv->rps.min_freq_softlimit;
>               adj = 0;
> +     } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> +             new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
>       } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
>               if (adj < 0)
>                       adj *= 2;
> @@ -4363,7 +4490,11 @@ void intel_irq_init(struct drm_device *dev)
>       INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
>  
>       /* Let's track the enabled rps events */
> -     dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
> +     if (IS_VALLEYVIEW(dev))
> +             /* WaGsvRC0ResidenncyMethod:VLV */
> +             dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
> +     else
> +             dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>  
>       setup_timer(&dev_priv->gpu_error.hangcheck_timer,
>                   i915_hangcheck_elapsed,
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 3488567..99413ae 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -525,6 +525,7 @@ enum punit_power_well {
>  #define PUNIT_REG_GPU_FREQ_STS                       0xd8
>  #define   GENFREQSTATUS                              (1<<0)
>  #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ               0xdc
> +#define PUNIT_REG_CZ_TIMESTAMP                       0xce
>  
>  #define PUNIT_FUSE_BUS2                              0xf6 /* bits 47:40 */
>  #define PUNIT_FUSE_BUS1                              0xf5 /* bits 55:48 */
> @@ -550,6 +551,11 @@ enum punit_power_well {
>  #define   FB_FMAX_VMIN_FREQ_LO_SHIFT         27
>  #define   FB_FMAX_VMIN_FREQ_LO_MASK          0xf8000000
>  
> +#define VLV_CZ_CLOCK_TO_MILLI_SEC            100000
> +#define VLV_RP_UP_EI_THRESHOLD                       90
> +#define VLV_RP_DOWN_EI_THRESHOLD             70
> +#define VLV_INT_COUNT_FOR_DOWN_EI            5
> +
>  /* vlv2 north clock has */
>  #define CCK_FUSE_REG                         0x8
>  #define  CCK_FUSE_HPLL_FREQ_MASK             0x3
> @@ -5383,6 +5389,7 @@ enum punit_power_well {
>  #define   VLV_GTLC_ALLOWWAKEERR                      (1 << 1)
>  #define   VLV_GTLC_PW_MEDIA_STATUS_MASK              (1 << 5)
>  #define   VLV_GTLC_PW_RENDER_STATUS_MASK     (1 << 7)
> +#define VLV_GTLC_SURVIVABILITY_REG              0x130098
>  #define  FORCEWAKE_MT                                0xa188 /* 
> multi-threaded */
>  #define   FORCEWAKE_KERNEL                   0x1
>  #define   FORCEWAKE_USER                     0x2
> @@ -5530,6 +5537,8 @@ enum punit_power_well {
>  #define GEN6_GT_GFX_RC6_LOCKED                       0x138104
>  #define VLV_COUNTER_CONTROL                  0x138104
>  #define   VLV_COUNT_RANGE_HIGH                       (1<<15)
> +#define   VLV_MEDIA_RC0_COUNT_EN             (1<<5)
> +#define   VLV_RENDER_RC0_COUNT_EN            (1<<4)
>  #define   VLV_MEDIA_RC6_COUNT_EN             (1<<1)
>  #define   VLV_RENDER_RC6_COUNT_EN            (1<<0)
>  #define GEN6_GT_GFX_RC6                              0x138108
> @@ -5538,6 +5547,8 @@ enum punit_power_well {
>  
>  #define GEN6_GT_GFX_RC6p                     0x13810C
>  #define GEN6_GT_GFX_RC6pp                    0x138110
> +#define VLV_RENDER_C0_COUNT_REG              0x138118
> +#define VLV_MEDIA_C0_COUNT_REG                       0x13811C
>  
>  #define GEN6_PCODE_MAILBOX                   0x138124
>  #define   GEN6_PCODE_READY                   (1<<31)
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 1e4611a..8ea96ff 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3235,8 +3235,11 @@ static void vlv_set_rps_idle(struct drm_i915_private 
> *dev_priv)
>  
>       vlv_force_gfx_clock(dev_priv, false);
>  
> -     I915_WRITE(GEN6_PMINTRMSK,
> -                gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
> +     if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
> +             I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
> +     else 
> +             I915_WRITE(GEN6_PMINTRMSK,
> +                        gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
>  }
>  
>  void gen6_rps_idle(struct drm_i915_private *dev_priv)
> @@ -4076,6 +4079,7 @@ static void valleyview_enable_rps(struct drm_device 
> *dev)
>       I915_WRITE(GEN6_RP_DOWN_EI, 350000);
>  
>       I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> +     I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
>  
>       I915_WRITE(GEN6_RP_CONTROL,
>                  GEN6_RP_MEDIA_TURBO |
> @@ -4096,9 +4100,11 @@ static void valleyview_enable_rps(struct drm_device 
> *dev)
>  
>       /* allows RC6 residency counter to work */
>       I915_WRITE(VLV_COUNTER_CONTROL,
> -                _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
> +                _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
> +                                   VLV_RENDER_RC0_COUNT_EN |
>                                     VLV_MEDIA_RC6_COUNT_EN |
>                                     VLV_RENDER_RC6_COUNT_EN));
> +
>       if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
>               rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
>  
> -- 
> 1.9.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to