Re: [Intel-gfx] [PATCH v3] drm/i915: Keep the device awake whilst in the GTT domain

2017-09-04 Thread Daniel Vetter
On Fri, Sep 01, 2017 at 01:16:06PM +0100, Chris Wilson wrote:
> Since runtime suspend is very harsh on GTT mmappings (they all get
> zapped on suspend) keep the device awake while the buffer remains in
> the GTT domain. However, userspace can control the domain and
> although there is a soft contract that writes must be flushed (for e.g.
> flushing scanouts and fbc), we are intentionally lax with respect to read
> domains, allowing them to persist for as long as is feasible.
> 
> We acquire a wakeref when using the buffer in the GEM domain so that all
> subsequent operations on that object are fast, trying to avoid
> suspending while the GTT is still in active use by userspace.  To ensure
> that the device can eventually suspend, we install a timer and expire the
> GTT wakeref. So in effect we have just a fancy pm autosuspend that tries
> to estimate the cost of restoring actively used GTT mmapping.

Please tag with for-CI or something like that when throwing patches at
the shards :-) At least that's what I assuming given lack of sob and
revision of changes ...

Thanks, Daniel

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c  |   2 +
>  drivers/gpu/drm/i915/i915_drv.h  |  11 
>  drivers/gpu/drm/i915/i915_gem.c  | 103 
> ---
>  drivers/gpu/drm/i915/i915_gem_object.h   |   5 ++
>  drivers/gpu/drm/i915/i915_gem_shrinker.c |   4 +-
>  drivers/gpu/drm/i915/intel_lrc.c |   1 +
>  drivers/gpu/drm/i915/intel_ringbuffer.c  |   3 +
>  7 files changed, 118 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 48572b157222..dbb07612aa5a 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2809,6 +2809,8 @@ static int i915_runtime_pm_status(struct seq_file *m, 
> void *unused)
>   if (!HAS_RUNTIME_PM(dev_priv))
>   seq_puts(m, "Runtime power management not supported\n");
>  
> + seq_printf(m, "GTT wakeref count: %d\n",
> +atomic_read(_priv->mm.gtt_wakeref.count));
>   seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
>   seq_printf(m, "IRQs disabled: %s\n",
>  yesno(!intel_irqs_enabled(dev_priv)));
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0383e879a315..14dcf6614f3c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1460,6 +1460,17 @@ struct i915_gem_mm {
>*/
>   struct list_head userfault_list;
>  
> + /* List of all objects in gtt domain, holding a wakeref.
> +  * The list is reaped periodically, and protected by its own mutex.
> +  */
> + struct {
> + struct mutex lock;
> + struct list_head list;
> + atomic_t count;
> +
> + struct delayed_work work;
> + } gtt_wakeref;
> +
>   /**
>* List of objects which are pending destruction.
>*/
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e4cc08bc518c..09baf80889e8 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -235,6 +235,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object 
> *obj)
>  
>  static void __start_cpu_write(struct drm_i915_gem_object *obj)
>  {
> + GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
>   obj->base.read_domains = I915_GEM_DOMAIN_CPU;
>   obj->base.write_domain = I915_GEM_DOMAIN_CPU;
>   if (cpu_write_needs_clflush(obj))
> @@ -667,11 +668,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, 
> unsigned int domain)
>   obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
>  }
>  
> -static void
> +void
>  flush_write_domain(struct drm_i915_gem_object *obj, unsigned int 
> flush_domains)
>  {
>   struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>  
> + lockdep_assert_held(_priv->drm.struct_mutex);
> +
>   if (!(obj->base.write_domain & flush_domains))
>   return;
>  
> @@ -694,16 +697,19 @@ flush_write_domain(struct drm_i915_gem_object *obj, 
> unsigned int flush_domains)
>  
>   switch (obj->base.write_domain) {
>   case I915_GEM_DOMAIN_GTT:
> - if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
> - intel_runtime_pm_get(dev_priv);
> - spin_lock_irq(_priv->uncore.lock);
> - 
> POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
> - spin_unlock_irq(_priv->uncore.lock);
> - intel_runtime_pm_put(dev_priv);
> - }
> + mutex_lock(_priv->mm.gtt_wakeref.lock);
> + if (!list_empty(>mm.gtt_wakeref_link)) {
> + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
> + spin_lock_irq(_priv->uncore.lock);
> + 
> 

[Intel-gfx] [PATCH v3] drm/i915: Keep the device awake whilst in the GTT domain

2017-09-01 Thread Chris Wilson
Since runtime suspend is very harsh on GTT mmappings (they all get
zapped on suspend) keep the device awake while the buffer remains in
the GTT domain. However, userspace can control the domain and
although there is a soft contract that writes must be flushed (for e.g.
flushing scanouts and fbc), we are intentionally lax with respect to read
domains, allowing them to persist for as long as is feasible.

We acquire a wakeref when using the buffer in the GEM domain so that all
subsequent operations on that object are fast, trying to avoid
suspending while the GTT is still in active use by userspace.  To ensure
that the device can eventually suspend, we install a timer and expire the
GTT wakeref. So in effect we have just a fancy pm autosuspend that tries
to estimate the cost of restoring actively used GTT mmapping.
---
 drivers/gpu/drm/i915/i915_debugfs.c  |   2 +
 drivers/gpu/drm/i915/i915_drv.h  |  11 
 drivers/gpu/drm/i915/i915_gem.c  | 103 ---
 drivers/gpu/drm/i915/i915_gem_object.h   |   5 ++
 drivers/gpu/drm/i915/i915_gem_shrinker.c |   4 +-
 drivers/gpu/drm/i915/intel_lrc.c |   1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c  |   3 +
 7 files changed, 118 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 48572b157222..dbb07612aa5a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2809,6 +2809,8 @@ static int i915_runtime_pm_status(struct seq_file *m, 
void *unused)
if (!HAS_RUNTIME_PM(dev_priv))
seq_puts(m, "Runtime power management not supported\n");
 
+   seq_printf(m, "GTT wakeref count: %d\n",
+  atomic_read(_priv->mm.gtt_wakeref.count));
seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
seq_printf(m, "IRQs disabled: %s\n",
   yesno(!intel_irqs_enabled(dev_priv)));
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0383e879a315..14dcf6614f3c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1460,6 +1460,17 @@ struct i915_gem_mm {
 */
struct list_head userfault_list;
 
+   /* List of all objects in gtt domain, holding a wakeref.
+* The list is reaped periodically, and protected by its own mutex.
+*/
+   struct {
+   struct mutex lock;
+   struct list_head list;
+   atomic_t count;
+
+   struct delayed_work work;
+   } gtt_wakeref;
+
/**
 * List of objects which are pending destruction.
 */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e4cc08bc518c..09baf80889e8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -235,6 +235,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object 
*obj)
 
 static void __start_cpu_write(struct drm_i915_gem_object *obj)
 {
+   GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
if (cpu_write_needs_clflush(obj))
@@ -667,11 +668,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned 
int domain)
obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
 }
 
-static void
+void
 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 {
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 
+   lockdep_assert_held(_priv->drm.struct_mutex);
+
if (!(obj->base.write_domain & flush_domains))
return;
 
@@ -694,16 +697,19 @@ flush_write_domain(struct drm_i915_gem_object *obj, 
unsigned int flush_domains)
 
switch (obj->base.write_domain) {
case I915_GEM_DOMAIN_GTT:
-   if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
-   intel_runtime_pm_get(dev_priv);
-   spin_lock_irq(_priv->uncore.lock);
-   
POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
-   spin_unlock_irq(_priv->uncore.lock);
-   intel_runtime_pm_put(dev_priv);
-   }
+   mutex_lock(_priv->mm.gtt_wakeref.lock);
+   if (!list_empty(>mm.gtt_wakeref_link)) {
+   if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+   spin_lock_irq(_priv->uncore.lock);
+   
POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+   spin_unlock_irq(_priv->uncore.lock);
+   }
 
-   intel_fb_obj_flush(obj,
-  fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+   intel_fb_obj_flush(obj, fb_write_origin(obj, 
I915_GEM_DOMAIN_GTT));
+
+ 

[Intel-gfx] [PATCH v3] drm/i915: Keep the device awake whilst in the GTT domain

2017-08-31 Thread Chris Wilson
Since runtime suspend is very harsh on GTT mmappings (they all get
zapped on suspend) keep the device awake while the buffer remains in
the GTT domain. However, userspace can control the domain and
although there is a soft contract that writes must be flushed (for e.g.
flushing scanouts and fbc), we are intentionally lax with respect to read
domains, allowing them to persist for as long as is feasible. To ensure
that the device can eventually suspend, we install a timer. So in effect
we have just a fancy pm autosuspend that tries to estimate the cost of
restoring actively used GTT mmappings.
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  2 +
 drivers/gpu/drm/i915/i915_drv.h  |  8 +++
 drivers/gpu/drm/i915/i915_gem.c  | 88 
 drivers/gpu/drm/i915/i915_gem_object.h   |  5 ++
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  4 +-
 drivers/gpu/drm/i915/intel_lrc.c |  1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  3 ++
 7 files changed, 100 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 48572b157222..1432392fb2f8 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2812,6 +2812,8 @@ static int i915_runtime_pm_status(struct seq_file *m, 
void *unused)
seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
seq_printf(m, "IRQs disabled: %s\n",
   yesno(!intel_irqs_enabled(dev_priv)));
+   seq_printf(m, "GTT wakeref count: %d\n",
+  atomic_read(_priv->mm.wakeref_count));
 #ifdef CONFIG_PM
seq_printf(m, "Usage count: %d\n",
   atomic_read(_priv->drm.dev->power.usage_count));
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0383e879a315..799ab57cf040 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1460,6 +1460,14 @@ struct i915_gem_mm {
 */
struct list_head userfault_list;
 
+   /** List of all objects in gtt domain, holding a wakeref.
+* The list is reaped periodically.
+*/
+   struct list_head wakeref_list;
+   struct timer_list wakeref_timer;
+   spinlock_t wakeref_lock;
+   atomic_t wakeref_count;
+
/**
 * List of objects which are pending destruction.
 */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e4cc08bc518c..659fda483f7e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -235,6 +235,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object 
*obj)
 
 static void __start_cpu_write(struct drm_i915_gem_object *obj)
 {
+   GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
if (cpu_write_needs_clflush(obj))
@@ -667,11 +668,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned 
int domain)
obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
 }
 
-static void
+void
 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 {
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 
+   lockdep_assert_held(_priv->drm.struct_mutex);
+
if (!(obj->base.write_domain & flush_domains))
return;
 
@@ -694,16 +697,20 @@ flush_write_domain(struct drm_i915_gem_object *obj, 
unsigned int flush_domains)
 
switch (obj->base.write_domain) {
case I915_GEM_DOMAIN_GTT:
-   if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
-   intel_runtime_pm_get(dev_priv);
-   spin_lock_irq(_priv->uncore.lock);
-   
POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
-   spin_unlock_irq(_priv->uncore.lock);
-   intel_runtime_pm_put(dev_priv);
-   }
+   spin_lock_bh(_priv->mm.wakeref_lock);
+   if (!list_empty(>mm.wakeref_link)) {
+   if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+   spin_lock_irq(_priv->uncore.lock);
+   
POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+   spin_unlock_irq(_priv->uncore.lock);
+   }
+
+   intel_fb_obj_flush(obj,
+   fb_write_origin(obj, 
I915_GEM_DOMAIN_GTT));
 
-   intel_fb_obj_flush(obj,
-  fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+   list_del_init(>mm.wakeref_link);
+   }
+   spin_unlock_bh(_priv->mm.wakeref_lock);
break;
 
case I915_GEM_DOMAIN_CPU:
@@ -3425,6 +3432,7 @@ static void __i915_gem_object_flush_for_display(struct