On Fri, Sep 01, 2017 at 01:16:06PM +0100, Chris Wilson wrote:
> Since runtime suspend is very harsh on GTT mmappings (they all get
> zapped on suspend) keep the device awake while the buffer remains in
> the GTT domain. However, userspace can control the domain and
> although there is a soft contract that writes must be flushed (for e.g.
> flushing scanouts and fbc), we are intentionally lax with respect to read
> domains, allowing them to persist for as long as is feasible.
> 
> We acquire a wakeref when using the buffer in the GEM domain so that all
> subsequent operations on that object are fast, trying to avoid
> suspending while the GTT is still in active use by userspace.  To ensure
> that the device can eventually suspend, we install a timer and expire the
> GTT wakeref. So in effect we have just a fancy pm autosuspend that tries
> to estimate the cost of restoring actively used GTT mmapping.

Please tag with for-CI or something like that when throwing patches at
the shards :-) At least that's what I assuming given lack of sob and
revision of changes ...

Thanks, Daniel

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c      |   2 +
>  drivers/gpu/drm/i915/i915_drv.h          |  11 ++++
>  drivers/gpu/drm/i915/i915_gem.c          | 103 
> ++++++++++++++++++++++++++++---
>  drivers/gpu/drm/i915/i915_gem_object.h   |   5 ++
>  drivers/gpu/drm/i915/i915_gem_shrinker.c |   4 +-
>  drivers/gpu/drm/i915/intel_lrc.c         |   1 +
>  drivers/gpu/drm/i915/intel_ringbuffer.c  |   3 +
>  7 files changed, 118 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 48572b157222..dbb07612aa5a 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2809,6 +2809,8 @@ static int i915_runtime_pm_status(struct seq_file *m, 
> void *unused)
>       if (!HAS_RUNTIME_PM(dev_priv))
>               seq_puts(m, "Runtime power management not supported\n");
>  
> +     seq_printf(m, "GTT wakeref count: %d\n",
> +                atomic_read(&dev_priv->mm.gtt_wakeref.count));
>       seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
>       seq_printf(m, "IRQs disabled: %s\n",
>                  yesno(!intel_irqs_enabled(dev_priv)));
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0383e879a315..14dcf6614f3c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1460,6 +1460,17 @@ struct i915_gem_mm {
>        */
>       struct list_head userfault_list;
>  
> +     /* List of all objects in gtt domain, holding a wakeref.
> +      * The list is reaped periodically, and protected by its own mutex.
> +      */
> +     struct {
> +             struct mutex lock;
> +             struct list_head list;
> +             atomic_t count;
> +
> +             struct delayed_work work;
> +     } gtt_wakeref;
> +
>       /**
>        * List of objects which are pending destruction.
>        */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e4cc08bc518c..09baf80889e8 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -235,6 +235,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object 
> *obj)
>  
>  static void __start_cpu_write(struct drm_i915_gem_object *obj)
>  {
> +     GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
>       obj->base.read_domains = I915_GEM_DOMAIN_CPU;
>       obj->base.write_domain = I915_GEM_DOMAIN_CPU;
>       if (cpu_write_needs_clflush(obj))
> @@ -667,11 +668,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, 
> unsigned int domain)
>               obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
>  }
>  
> -static void
> +void
>  flush_write_domain(struct drm_i915_gem_object *obj, unsigned int 
> flush_domains)
>  {
>       struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>  
> +     lockdep_assert_held(&dev_priv->drm.struct_mutex);
> +
>       if (!(obj->base.write_domain & flush_domains))
>               return;
>  
> @@ -694,16 +697,19 @@ flush_write_domain(struct drm_i915_gem_object *obj, 
> unsigned int flush_domains)
>  
>       switch (obj->base.write_domain) {
>       case I915_GEM_DOMAIN_GTT:
> -             if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
> -                     intel_runtime_pm_get(dev_priv);
> -                     spin_lock_irq(&dev_priv->uncore.lock);
> -                     
> POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
> -                     spin_unlock_irq(&dev_priv->uncore.lock);
> -                     intel_runtime_pm_put(dev_priv);
> -             }
> +             mutex_lock(&dev_priv->mm.gtt_wakeref.lock);
> +             if (!list_empty(&obj->mm.gtt_wakeref_link)) {
> +                     if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
> +                             spin_lock_irq(&dev_priv->uncore.lock);
> +                             
> POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
> +                             spin_unlock_irq(&dev_priv->uncore.lock);
> +                     }
>  
> -             intel_fb_obj_flush(obj,
> -                                fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
> +                     intel_fb_obj_flush(obj, fb_write_origin(obj, 
> I915_GEM_DOMAIN_GTT));
> +
> +                     list_del_init(&obj->mm.gtt_wakeref_link);
> +             }
> +             mutex_unlock(&dev_priv->mm.gtt_wakeref.lock);
>               break;
>  
>       case I915_GEM_DOMAIN_CPU:
> @@ -3425,6 +3431,7 @@ static void __i915_gem_object_flush_for_display(struct 
> drm_i915_gem_object *obj)
>       flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
>       if (obj->cache_dirty)
>               i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
> +     GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
>       obj->base.write_domain = 0;
>  }
>  
> @@ -3501,6 +3508,49 @@ i915_gem_object_set_to_wc_domain(struct 
> drm_i915_gem_object *obj, bool write)
>       return 0;
>  }
>  
> +static void wakeref_timeout(struct work_struct *work)
> +{
> +     struct drm_i915_private *dev_priv =
> +             container_of(work, typeof(*dev_priv), mm.gtt_wakeref.work.work);
> +     struct drm_i915_gem_object *obj, *on;
> +     unsigned int count;
> +
> +     mutex_lock(&dev_priv->mm.gtt_wakeref.lock);
> +     count = atomic_xchg(&dev_priv->mm.gtt_wakeref.count, 0);
> +     if (count) {
> +             unsigned long timeout;
> +
> +             GEM_BUG_ON(count == -1);
> +
> +             if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
> +                     spin_lock_irq(&dev_priv->uncore.lock);
> +                     
> POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
> +                     spin_unlock_irq(&dev_priv->uncore.lock);
> +             }
> +
> +             count = 0;
> +             list_for_each_entry_safe(obj, on,
> +                                      &dev_priv->mm.gtt_wakeref.list,
> +                                      mm.gtt_wakeref_link) {
> +                     list_del_init(&obj->mm.gtt_wakeref_link);
> +                     if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) {
> +                             intel_fb_obj_flush(obj, fb_write_origin(obj, 
> I915_GEM_DOMAIN_GTT));
> +                             obj->base.write_domain = 0;
> +                     }
> +                     count++;
> +             }
> +
> +             timeout = HZ * (ilog2(count) + 1) / 2;
> +             mod_delayed_work(system_wq,
> +                              &dev_priv->mm.gtt_wakeref.work,
> +                              round_jiffies_up_relative(timeout));
> +     } else {
> +             intel_runtime_pm_put(dev_priv);
> +             atomic_set(&dev_priv->mm.gtt_wakeref.count, -1);
> +     }
> +     mutex_unlock(&dev_priv->mm.gtt_wakeref.lock);
> +}
> +
>  /**
>   * Moves a single object to the GTT read, and possibly write domain.
>   * @obj: object to act on
> @@ -3512,6 +3562,7 @@ i915_gem_object_set_to_wc_domain(struct 
> drm_i915_gem_object *obj, bool write)
>  int
>  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool 
> write)
>  {
> +     struct drm_i915_private *i915 = to_i915(obj->base.dev);
>       int ret;
>  
>       lockdep_assert_held(&obj->base.dev->struct_mutex);
> @@ -3525,6 +3576,17 @@ i915_gem_object_set_to_gtt_domain(struct 
> drm_i915_gem_object *obj, bool write)
>       if (ret)
>               return ret;
>  
> +     mutex_lock(&i915->mm.gtt_wakeref.lock);
> +     if (list_empty(&obj->mm.gtt_wakeref_link)) {
> +             if (atomic_inc_and_test(&i915->mm.gtt_wakeref.count)) {
> +                     intel_runtime_pm_get(i915);
> +                     schedule_delayed_work(&i915->mm.gtt_wakeref.work,
> +                                           round_jiffies_up_relative(HZ));
> +             }
> +             list_add(&obj->mm.gtt_wakeref_link, &i915->mm.gtt_wakeref.list);
> +     }
> +     mutex_unlock(&i915->mm.gtt_wakeref.lock);
> +
>       if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
>               return 0;
>  
> @@ -4257,6 +4319,7 @@ void i915_gem_object_init(struct drm_i915_gem_object 
> *obj,
>  
>       INIT_LIST_HEAD(&obj->global_link);
>       INIT_LIST_HEAD(&obj->userfault_link);
> +     INIT_LIST_HEAD(&obj->mm.gtt_wakeref_link);
>       INIT_LIST_HEAD(&obj->vma_list);
>       INIT_LIST_HEAD(&obj->lut_list);
>       INIT_LIST_HEAD(&obj->batch_pool_link);
> @@ -4394,6 +4457,14 @@ static void __i915_gem_free_objects(struct 
> drm_i915_private *i915,
>  
>               trace_i915_gem_object_destroy(obj);
>  
> +             flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
> +
> +             if (!list_empty_careful(&obj->mm.gtt_wakeref_link)) {
> +                     mutex_lock(&i915->mm.gtt_wakeref.lock);
> +                     list_del(&obj->mm.gtt_wakeref_link);
> +                     mutex_unlock(&i915->mm.gtt_wakeref.lock);
> +             }
> +
>               GEM_BUG_ON(i915_gem_object_is_active(obj));
>               list_for_each_entry_safe(vma, vn,
>                                        &obj->vma_list, obj_link) {
> @@ -4548,6 +4619,9 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
>       int ret;
>  
>       intel_runtime_pm_get(dev_priv);
> +     while (flush_delayed_work(&dev_priv->mm.gtt_wakeref.work))
> +             ;
> +     WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1);
>       intel_suspend_gt_powersave(dev_priv);
>  
>       mutex_lock(&dev->struct_mutex);
> @@ -4932,6 +5006,11 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
>       if (err)
>               goto err_priorities;
>  
> +     INIT_LIST_HEAD(&dev_priv->mm.gtt_wakeref.list);
> +     INIT_DELAYED_WORK(&dev_priv->mm.gtt_wakeref.work, wakeref_timeout);
> +     mutex_init(&dev_priv->mm.gtt_wakeref.lock);
> +     atomic_set(&dev_priv->mm.gtt_wakeref.count, -1);
> +
>       INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
>       init_llist_head(&dev_priv->mm.free_list);
>       INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
> @@ -4978,6 +5057,10 @@ void i915_gem_load_cleanup(struct drm_i915_private 
> *dev_priv)
>       WARN_ON(!list_empty(&dev_priv->gt.timelines));
>       mutex_unlock(&dev_priv->drm.struct_mutex);
>  
> +     while (flush_delayed_work(&dev_priv->mm.gtt_wakeref.work))
> +             ;
> +     WARN_ON(atomic_read(&dev_priv->mm.gtt_wakeref.count) != -1);
> +
>       kmem_cache_destroy(dev_priv->priorities);
>       kmem_cache_destroy(dev_priv->dependencies);
>       kmem_cache_destroy(dev_priv->requests);
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h 
> b/drivers/gpu/drm/i915/i915_gem_object.h
> index c30d8f808185..3ca13edf32b6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -177,6 +177,8 @@ struct drm_i915_gem_object {
>                       struct mutex lock; /* protects this cache */
>               } get_page;
>  
> +             struct list_head gtt_wakeref_link;
> +
>               /**
>                * Advice: are the backing pages purgeable?
>                */
> @@ -421,5 +423,8 @@ void i915_gem_object_set_cache_coherency(struct 
> drm_i915_gem_object *obj,
>                                        unsigned int cache_level);
>  void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
>  
> +void
> +flush_write_domain(struct drm_i915_gem_object *obj, unsigned int 
> flush_domains);
> +
>  #endif
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c 
> b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 77fb39808131..71110b7d3ca0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -127,8 +127,10 @@ static bool can_release_pages(struct drm_i915_gem_object 
> *obj)
>  
>  static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
>  {
> -     if (i915_gem_object_unbind(obj) == 0)
> +     if (i915_gem_object_unbind(obj) == 0) {
> +             flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
>               __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
> +     }
>       return !READ_ONCE(obj->mm.pages);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index d89e1b8e1cc5..357eee6f907c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -866,6 +866,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
>               i915_ggtt_offset(ce->ring->vma);
>  
>       ce->state->obj->mm.dirty = true;
> +     flush_write_domain(ce->state->obj, ~0);
>  
>       i915_gem_context_get(ctx);
>  out:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index cdf084ef5aae..571a5b1f4f54 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1321,6 +1321,8 @@ int intel_ring_pin(struct intel_ring *ring,
>       if (IS_ERR(addr))
>               goto err;
>  
> +     flush_write_domain(vma->obj, ~0);
> +
>       ring->vaddr = addr;
>       return 0;
>  
> @@ -1516,6 +1518,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
>                       goto err;
>  
>               ce->state->obj->mm.dirty = true;
> +             flush_write_domain(ce->state->obj, ~0);
>       }
>  
>       /* The kernel context is only used as a placeholder for flushing the
> -- 
> 2.14.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to