On Wed, Nov 29, 2017 at 02:30:30PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
> 
> It seems that the DMC likes to transition between the DC states a lot when
> there are no connected displays (no active power domains) during command
> submission.
> 
> This activity on DC states has a negative impact on the performance of the
> chip with huge latencies observed in the interrupt handlers and elsewhere.
> Simple tests like igt/gem_latency -n 0 are slowed down by a factor of
> eight.
> 
> Work around it by introducing a new power domain named,
> POWER_DOMAIN_GT_IRQ, associtated with the "DC off" power well, which is
> held for the duration of command submission activity.
> 
> v2:
>  * Add commit text as comment in i915_gem_mark_busy. (Chris Wilson)
>  * Protect macro body with braces. (Jani Nikula)
> 
> v3:
>  * Add dedicated power domain for clarity. (Chris, Imre)
>  * Commit message and comment text updates.
>  * Apply to all big-core GEN9 parts apart for Skylake which is pending DMC
>    firmware release.
> 
> v4:
>  * Power domain should be inner to device runtime pm. (Chris)
>  * Simplify NEEDS_CSR_GT_PERF_WA macro. (Chris)
>  * Handle async DMC loading by moving the GT_IRQ power domain logic into
>    intel_runtime_pm. (Daniel, Chris)
>  * Include small core GEN9 as well. (Imre)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572
> Testcase: igt/gem_exec_nop/headless
> Cc: Imre Deak <imre.d...@intel.com>
> Acked-by: Chris Wilson <ch...@chris-wilson.co.uk> (v2)
> Cc: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozh...@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  5 ++++
>  drivers/gpu/drm/i915/i915_gem.c         |  2 ++
>  drivers/gpu/drm/i915/i915_gem_request.c | 14 +++++++++++
>  drivers/gpu/drm/i915/intel_csr.c        | 29 +++++++++++++---------
>  drivers/gpu/drm/i915/intel_runtime_pm.c | 44 
> +++++++++++++++++++++++++++------
>  5 files changed, 76 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bddd65839f60..37b3da4fd0d4 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -398,6 +398,7 @@ enum intel_display_power_domain {
>       POWER_DOMAIN_AUX_D,
>       POWER_DOMAIN_GMBUS,
>       POWER_DOMAIN_MODESET,
> +     POWER_DOMAIN_GT_IRQ,
>       POWER_DOMAIN_INIT,
>  
>       POWER_DOMAIN_NUM,
> @@ -3285,6 +3286,10 @@ intel_info(const struct drm_i915_private *dev_priv)
>  #define GT_FREQUENCY_MULTIPLIER 50
>  #define GEN9_FREQ_SCALER 3
>  
> +#define NEEDS_CSR_GT_PERF_WA(dev_priv) \
> +     ((dev_priv)->csr.dmc_payload && \
> +      IS_GEN9(dev_priv) && !IS_SKYLAKE(dev_priv))
> +
>  #include "i915_trace.h"
>  
>  static inline bool intel_vtd_active(void)
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 354b0546a191..a6f522e2d1a1 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3381,6 +3381,8 @@ i915_gem_idle_work_handler(struct work_struct *work)
>  
>       if (INTEL_GEN(dev_priv) >= 6)
>               gen6_rps_idle(dev_priv);
> +
> +     intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
>       intel_runtime_pm_put(dev_priv);
>  out_unlock:
>       mutex_unlock(&dev_priv->drm.struct_mutex);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
> b/drivers/gpu/drm/i915/i915_gem_request.c
> index a90bdd26571f..c28a4ceb016d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -252,6 +252,20 @@ static void mark_busy(struct drm_i915_private *i915)
>       GEM_BUG_ON(!i915->gt.active_requests);
>  
>       intel_runtime_pm_get_noresume(i915);
> +
> +     /*
> +      * It seems that the DMC likes to transition between the DC states a lot
> +      * when there are no connected displays (no active power domains) during
> +      * command submission.
> +      *
> +      * This activity has negative impact on the performance of the chip with
> +      * huge latencies observed in the interrupt handler and elsewhere.
> +      *
> +      * Work around it by grabbing a GT IRQ power domain whilst there is any
> +      * GT activity, preventing any DC state transitions.
> +      */
> +     intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
> +
>       i915->gt.awake = true;
>  
>       intel_enable_gt_powersave(i915);
> diff --git a/drivers/gpu/drm/i915/intel_csr.c 
> b/drivers/gpu/drm/i915/intel_csr.c
> index 07e4f7bc4412..8b188e9f283b 100644
> --- a/drivers/gpu/drm/i915/intel_csr.c
> +++ b/drivers/gpu/drm/i915/intel_csr.c
> @@ -403,24 +403,33 @@ static uint32_t *parse_csr_fw(struct drm_i915_private 
> *dev_priv,
>  
>  static void csr_load_work_fn(struct work_struct *work)
>  {
> -     struct drm_i915_private *dev_priv;
> -     struct intel_csr *csr;
> +     struct drm_i915_private *dev_priv =
> +             container_of(work, typeof(*dev_priv), csr.work);
> +     struct intel_csr *csr = &dev_priv->csr;
>       const struct firmware *fw = NULL;
> +     u32 *dmc_payload;
>  
> -     dev_priv = container_of(work, typeof(*dev_priv), csr.work);
> -     csr = &dev_priv->csr;
> +     request_firmware(&fw, csr->fw_path, &dev_priv->drm.pdev->dev);
>  
> -     request_firmware(&fw, dev_priv->csr.fw_path, &dev_priv->drm.pdev->dev);
> -     if (fw)
> -             dev_priv->csr.dmc_payload = parse_csr_fw(dev_priv, fw);
> +     if (fw) {
> +             dmc_payload = parse_csr_fw(dev_priv, fw);
> +             release_firmware(fw);
> +     } else {
> +             dmc_payload = NULL;
> +     }
>  
> -     if (dev_priv->csr.dmc_payload) {
> +     /* Lock to document memory barrier towards NEEDS_CSR_GT_PERF_WA. */
> +     mutex_lock(&dev_priv->power_domains.lock);
> +     csr->dmc_payload = dmc_payload;
> +     mutex_unlock(&dev_priv->power_domains.lock);
> +
> +     if (csr->dmc_payload) {
>               intel_csr_load_program(dev_priv);
>  
>               intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
>  
>               DRM_INFO("Finished loading DMC firmware %s (v%u.%u)\n",
> -                      dev_priv->csr.fw_path,
> +                      csr->fw_path,
>                        CSR_VERSION_MAJOR(csr->version),
>                        CSR_VERSION_MINOR(csr->version));
>       } else {
> @@ -431,8 +440,6 @@ static void csr_load_work_fn(struct work_struct *work)
>               dev_notice(dev_priv->drm.dev, "DMC firmware homepage: %s",
>                          INTEL_UC_FIRMWARE_URL);
>       }
> -
> -     release_firmware(fw);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c 
> b/drivers/gpu/drm/i915/intel_runtime_pm.c
> index 8315499452dc..915124a2e945 100644
> --- a/drivers/gpu/drm/i915/intel_runtime_pm.c
> +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
> @@ -130,6 +130,8 @@ intel_display_power_domain_str(enum 
> intel_display_power_domain domain)
>               return "INIT";
>       case POWER_DOMAIN_MODESET:
>               return "MODESET";
> +     case POWER_DOMAIN_GT_IRQ:
> +             return "GT_IRQ";
>       default:
>               MISSING_CASE(domain);
>               return "?";
> @@ -1448,6 +1450,9 @@ __intel_display_power_get_domain(struct 
> drm_i915_private *dev_priv,
>       struct i915_power_domains *power_domains = &dev_priv->power_domains;
>       struct i915_power_well *power_well;
>  
> +     if (domain == POWER_DOMAIN_GT_IRQ && !NEEDS_CSR_GT_PERF_WA(dev_priv))
> +             return;
> +
>       for_each_power_domain_well(dev_priv, power_well, BIT_ULL(domain))
>               intel_power_well_get(dev_priv, power_well);
>  
> @@ -1518,6 +1523,19 @@ bool intel_display_power_get_if_enabled(struct 
> drm_i915_private *dev_priv,
>       return is_enabled;
>  }
>  
> +static void
> +__intel_display_power_put_domain(struct drm_i915_private *dev_priv,
> +                              enum intel_display_power_domain domain)
> +{
> +     struct i915_power_domains *power_domains = &dev_priv->power_domains;
> +     struct i915_power_well *power_well;
> +
> +     power_domains->domain_use_count[domain]--;
> +
> +     for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain))
> +             intel_power_well_put(dev_priv, power_well);
> +}
> +
>  /**
>   * intel_display_power_put - release a power domain reference
>   * @dev_priv: i915 device instance
> @@ -1530,21 +1548,30 @@ bool intel_display_power_get_if_enabled(struct 
> drm_i915_private *dev_priv,
>  void intel_display_power_put(struct drm_i915_private *dev_priv,
>                            enum intel_display_power_domain domain)
>  {
> -     struct i915_power_domains *power_domains;
> -     struct i915_power_well *power_well;
> -
> -     power_domains = &dev_priv->power_domains;
> +     struct i915_power_domains *power_domains = &dev_priv->power_domains;
>  
>       mutex_lock(&power_domains->lock);
>  
> +     if (domain == POWER_DOMAIN_GT_IRQ) {
> +             /*
> +              * To handle asynchronous DMC loading we have to be careful to
> +              * keep the use count on POWER_DOMAIN_GT_IRQ balanced.
> +              *
> +              * If there was GT activity before the DMC was loaded, we will
> +              * have skipped obtaining the power domain so must not decrement
> +              * it now.
> +              */
> +             if (!power_domains->domain_use_count[domain])
> +                     goto out;
> +     }

Is it possible to have any GT activity before
intel_power_domains_init_hw() / intel_csr_ucode_init()? I couldn't spot
anything at least. Calling into the power well code before those isn't
correct in any case. If so, the INIT power ref taken in
intel_csr_ucode_init() makes sure the above scenario can't happen and
then we don't need dmc_payload check in NEEDS_CSR_GT_PERF_WA() either.

> +
>       WARN(!power_domains->domain_use_count[domain],
>            "Use count on domain %s is already zero\n",
>            intel_display_power_domain_str(domain));
> -     power_domains->domain_use_count[domain]--;
>  
> -     for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain))
> -             intel_power_well_put(dev_priv, power_well);
> +     __intel_display_power_put_domain(dev_priv, domain);
>  
> +out:
>       mutex_unlock(&power_domains->lock);
>  
>       intel_runtime_pm_put(dev_priv);
> @@ -1705,6 +1732,7 @@ void intel_display_power_put(struct drm_i915_private 
> *dev_priv,
>       BIT_ULL(POWER_DOMAIN_INIT))
>  #define SKL_DISPLAY_DC_OFF_POWER_DOMAINS (           \
>       SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS |         \
> +     BIT_ULL(POWER_DOMAIN_GT_IRQ) |                  \
>       BIT_ULL(POWER_DOMAIN_MODESET) |                 \
>       BIT_ULL(POWER_DOMAIN_AUX_A) |                   \
>       BIT_ULL(POWER_DOMAIN_INIT))
> @@ -1727,6 +1755,7 @@ void intel_display_power_put(struct drm_i915_private 
> *dev_priv,
>       BIT_ULL(POWER_DOMAIN_INIT))
>  #define BXT_DISPLAY_DC_OFF_POWER_DOMAINS (           \
>       BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS |         \
> +     BIT_ULL(POWER_DOMAIN_GT_IRQ) |                  \
>       BIT_ULL(POWER_DOMAIN_MODESET) |                 \
>       BIT_ULL(POWER_DOMAIN_AUX_A) |                   \
>       BIT_ULL(POWER_DOMAIN_INIT))
> @@ -1785,6 +1814,7 @@ void intel_display_power_put(struct drm_i915_private 
> *dev_priv,
>       BIT_ULL(POWER_DOMAIN_INIT))
>  #define GLK_DISPLAY_DC_OFF_POWER_DOMAINS (           \
>       GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS |         \
> +     BIT_ULL(POWER_DOMAIN_GT_IRQ) |                  \
>       BIT_ULL(POWER_DOMAIN_MODESET) |                 \
>       BIT_ULL(POWER_DOMAIN_AUX_A) |                   \
>       BIT_ULL(POWER_DOMAIN_INIT))
> -- 
> 2.14.1
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to