On Tue, May 20, 2014 at 08:28:43AM +0100, Chris Wilson wrote:
> Before the process killer is invoked, oom-notifiers are executed for one
> last try at recovering pages. We can hook into this callback to be sure
> that everything that can be is purged from our page lists, and to give a
> summary of how much memory is still pinned by the GPU in the case of an
> oom.
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=72742
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Tested-by: lu hua <huax...@intel.com>

Dave, can you please have a look at this and ack it from a core mm
perspective? I just like your ack since you've worked together with Chris
on these issues.

Thanks, Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h |  1 +
>  drivers/gpu/drm/i915/i915_gem.c | 74 
> +++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 72 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index e69cb51de738..389204d44431 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1057,6 +1057,7 @@ struct i915_gem_mm {
>       /** PPGTT used for aliasing the PPGTT with the GTT */
>       struct i915_hw_ppgtt *aliasing_ppgtt;
>  
> +     struct notifier_block oom_notifier;
>       struct shrinker shrinker;
>       bool shrinker_no_lock_stealing;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ea93898d51bc..dc8e1ef50bfb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -31,6 +31,7 @@
>  #include "i915_drv.h"
>  #include "i915_trace.h"
>  #include "intel_drv.h"
> +#include <linux/oom.h>
>  #include <linux/shmem_fs.h>
>  #include <linux/slab.h>
>  #include <linux/swap.h>
> @@ -61,6 +62,9 @@ static unsigned long i915_gem_shrinker_count(struct 
> shrinker *shrinker,
>                                            struct shrink_control *sc);
>  static unsigned long i915_gem_shrinker_scan(struct shrinker *shrinker,
>                                           struct shrink_control *sc);
> +static int i915_gem_shrinker_oom(struct notifier_block *nb,
> +                              unsigned long event,
> +                              void *ptr);
>  static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long 
> target);
>  static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
>  
> @@ -4759,6 +4763,9 @@ i915_gem_load(struct drm_device *dev)
>       dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count;
>       dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS;
>       register_shrinker(&dev_priv->mm.shrinker);
> +
> +     dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
> +     register_oom_notifier(&dev_priv->mm.oom_notifier);
>  }
>  
>  /*
> @@ -5154,15 +5161,76 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, 
> struct shrink_control *sc)
>               freed += __i915_gem_shrink(dev_priv,
>                                          sc->nr_to_scan - freed,
>                                          false);
> -     if (freed < sc->nr_to_scan)
> -             freed += i915_gem_shrink_all(dev_priv);
> -
>       if (unlock)
>               mutex_unlock(&dev->struct_mutex);
>  
>       return freed;
>  }
>  
> +static int
> +i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void 
> *ptr)
> +{
> +     struct drm_i915_private *dev_priv =
> +             container_of(nb, struct drm_i915_private, mm.oom_notifier);
> +     struct drm_device *dev = dev_priv->dev;
> +     struct drm_i915_gem_object *obj;
> +     unsigned long timeout = msecs_to_jiffies(5000) + 1;
> +     unsigned long pinned, bound, unbound, freed;
> +     bool was_interruptible;
> +     bool unlock;
> +
> +     while (!i915_gem_shrinker_lock(dev, &unlock) && --timeout)
> +             schedule_timeout_killable(1);
> +     if (timeout == 0) {
> +             pr_err("Unable to purge GPU memory due lock contention.\n");
> +             return NOTIFY_DONE;
> +     }
> +
> +     was_interruptible = dev_priv->mm.interruptible;
> +     dev_priv->mm.interruptible = false;
> +
> +     freed = i915_gem_shrink_all(dev_priv);
> +
> +     dev_priv->mm.interruptible = was_interruptible;
> +
> +     /* Because we may be allocating inside our own driver, we cannot
> +      * assert that there are no objects with pinned pages that are not
> +      * being pointed to by hardware.
> +      */
> +     unbound = bound = pinned = 0;
> +     list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
> +             if (!obj->base.filp) /* not backed by a freeable object */
> +                     continue;
> +
> +             if (obj->pages_pin_count)
> +                     pinned += obj->base.size;
> +             else
> +                     unbound += obj->base.size;
> +     }
> +     list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> +             if (!obj->base.filp)
> +                     continue;
> +
> +             if (obj->pages_pin_count)
> +                     pinned += obj->base.size;
> +             else
> +                     bound += obj->base.size;
> +     }
> +
> +     if (unlock)
> +             mutex_unlock(&dev->struct_mutex);
> +
> +     pr_info("Purging GPU memory, %lu bytes freed, %lu bytes still 
> pinned.\n",
> +             freed, pinned);
> +     if (unbound | bound)
> +             pr_err("%lu and %lu bytes still available in the "
> +                    "bound and unbound GPU page lists.\n",
> +                    bound, unbound);
> +
> +     *(unsigned long *)ptr += freed;
> +     return NOTIFY_DONE;
> +}
> +
>  struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
>  {
>       struct i915_vma *vma;
> -- 
> 2.0.0.rc2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to