On Tue, Sep 11, 2018 at 04:14:39PM +0100, Chris Wilson wrote:
> Whilst reviewing another new user of stolen memory, Ville made the
> observation that we should try to ensure that all permanent allocations
> within stolen memory are clustered together at either end of the stolen
> region, in order to reduce fragmentation. In the depths of
> i915_gem_stolen.c it is not always clear what manner of allocation we
> need, so expose the drm_mm search parameter and push the decision to our
> callers.
> 
> Suggested-by: Ville Syrjälä <ville.syrj...@linux.intel.com>
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrj...@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         | 15 +++++++++------
>  drivers/gpu/drm/i915/i915_gem_stolen.c  | 19 ++++++++++++-------
>  drivers/gpu/drm/i915/intel_engine_cs.c  |  3 ++-
>  drivers/gpu/drm/i915/intel_fbc.c        | 13 ++++++++-----
>  drivers/gpu/drm/i915/intel_fbdev.c      |  3 ++-
>  drivers/gpu/drm/i915/intel_overlay.c    |  3 ++-
>  drivers/gpu/drm/i915/intel_pm.c         |  3 ++-
>  drivers/gpu/drm/i915/intel_ringbuffer.c |  2 +-
>  8 files changed, 38 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7ea442033a57..e68102141067 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3302,19 +3302,22 @@ static inline void i915_gem_chipset_flush(struct 
> drm_i915_private *dev_priv)
>  
>  /* i915_gem_stolen.c */
>  int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv,
> -                             struct drm_mm_node *node, u64 size,
> -                             unsigned alignment);
> +                             struct drm_mm_node *node,
> +                             u64 size, unsigned int alignment,
> +                             unsigned int search);
>  int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
> -                                      struct drm_mm_node *node, u64 size,
> -                                      unsigned alignment, u64 start,
> -                                      u64 end);
> +                                      struct drm_mm_node *node,
> +                                      u64 size, unsigned int alignment,
> +                                      u64 start, u64 end,
> +                                      unsigned int search);
>  void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
>                                struct drm_mm_node *node);
>  int i915_gem_init_stolen(struct drm_i915_private *dev_priv);
>  void i915_gem_cleanup_stolen(struct drm_device *dev);
>  struct drm_i915_gem_object *
>  i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
> -                           resource_size_t size);
> +                           resource_size_t size,
> +                           unsigned int search);
>  struct drm_i915_gem_object *
>  i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private 
> *dev_priv,
>                                              resource_size_t stolen_offset,
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c 
> b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 53440bf87650..ed440e280dd0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -43,8 +43,10 @@
>   */
>  
>  int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
> -                                      struct drm_mm_node *node, u64 size,
> -                                      unsigned alignment, u64 start, u64 end)
> +                                      struct drm_mm_node *node,
> +                                      u64 size, unsigned int alignment,
> +                                      u64 start, u64 end,
> +                                      unsigned int search)
>  {
>       int ret;
>  
> @@ -58,7 +60,7 @@ int i915_gem_stolen_insert_node_in_range(struct 
> drm_i915_private *dev_priv,
>       mutex_lock(&dev_priv->mm.stolen_lock);
>       ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node,
>                                         size, alignment, 0,
> -                                       start, end, DRM_MM_INSERT_BEST);
> +                                       start, end, search);
>       mutex_unlock(&dev_priv->mm.stolen_lock);
>  
>       return ret;
> @@ -66,10 +68,12 @@ int i915_gem_stolen_insert_node_in_range(struct 
> drm_i915_private *dev_priv,
>  
>  int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv,
>                               struct drm_mm_node *node, u64 size,
> -                             unsigned alignment)
> +                             unsigned int alignment,
> +                             unsigned int search)
>  {
>       return i915_gem_stolen_insert_node_in_range(dev_priv, node, size,
> -                                                 alignment, 0, U64_MAX);
> +                                                 alignment, 0, U64_MAX,
> +                                                 search);
>  }
>  
>  void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
> @@ -591,7 +595,8 @@ _i915_gem_object_create_stolen(struct drm_i915_private 
> *dev_priv,
>  
>  struct drm_i915_gem_object *
>  i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
> -                           resource_size_t size)
> +                           resource_size_t size,
> +                           unsigned int search)
>  {
>       struct drm_i915_gem_object *obj;
>       struct drm_mm_node *stolen;
> @@ -607,7 +612,7 @@ i915_gem_object_create_stolen(struct drm_i915_private 
> *dev_priv,
>       if (!stolen)
>               return NULL;
>  
> -     ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096);
> +     ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096, search);
>       if (ret) {
>               kfree(stolen);
>               return NULL;
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 10cd051ba29e..c945a9fb54ae 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -499,7 +499,8 @@ int intel_engine_create_scratch(struct intel_engine_cs 
> *engine,
>  
>       WARN_ON(engine->scratch);
>  
> -     obj = i915_gem_object_create_stolen(engine->i915, size);
> +     obj = i915_gem_object_create_stolen(engine->i915,
> +                                         size, DRM_MM_INSERT_LOW);
>       if (!obj)
>               obj = i915_gem_object_create_internal(engine->i915, size);
>       if (IS_ERR(obj)) {
> diff --git a/drivers/gpu/drm/i915/intel_fbc.c 
> b/drivers/gpu/drm/i915/intel_fbc.c
> index 01d1d2088f04..bf62d3d08e82 100644
> --- a/drivers/gpu/drm/i915/intel_fbc.c
> +++ b/drivers/gpu/drm/i915/intel_fbc.c
> @@ -457,8 +457,9 @@ static int find_compression_threshold(struct 
> drm_i915_private *dev_priv,
>        */
>  
>       /* Try to over-allocate to reduce reallocations and fragmentation. */
> -     ret = i915_gem_stolen_insert_node_in_range(dev_priv, node, size <<= 1,
> -                                                4096, 0, end);
> +     ret = i915_gem_stolen_insert_node_in_range(dev_priv, node,
> +                                                size <<= 1, 4096,
> +                                                0, end, DRM_MM_INSERT_BEST);
>       if (ret == 0)
>               return compression_threshold;
>  
> @@ -468,8 +469,9 @@ static int find_compression_threshold(struct 
> drm_i915_private *dev_priv,
>           (fb_cpp == 2 && compression_threshold == 2))
>               return 0;
>  
> -     ret = i915_gem_stolen_insert_node_in_range(dev_priv, node, size >>= 1,
> -                                                4096, 0, end);
> +     ret = i915_gem_stolen_insert_node_in_range(dev_priv, node,
> +                                                size >>= 1, 4096,
> +                                                0, end, DRM_MM_INSERT_BEST);
>       if (ret && INTEL_GEN(dev_priv) <= 4) {
>               return 0;
>       } else if (ret) {
> @@ -513,7 +515,8 @@ static int intel_fbc_alloc_cfb(struct intel_crtc *crtc)
>                       goto err_fb;
>  
>               ret = i915_gem_stolen_insert_node(dev_priv, compressed_llb,
> -                                               4096, 4096);
> +                                               4096, 4096,
> +                                               DRM_MM_INSERT_LOW);

We seem to alloc/free the line length buffer alongside the cfb.
So should this use best instead?

>               if (ret)
>                       goto err_fb;
>  
> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c 
> b/drivers/gpu/drm/i915/intel_fbdev.c
> index fb2f9fce34cd..0e1ddbf1c5a0 100644
> --- a/drivers/gpu/drm/i915/intel_fbdev.c
> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> @@ -140,7 +140,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>        * features. */
>       obj = NULL;
>       if (size * 2 < dev_priv->stolen_usable_size)
> -             obj = i915_gem_object_create_stolen(dev_priv, size);
> +             obj = i915_gem_object_create_stolen(dev_priv,
> +                                                 size, DRM_MM_INSERT_LOW);
>       if (obj == NULL)
>               obj = i915_gem_object_create(dev_priv, size);
>       if (IS_ERR(obj)) {
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c 
> b/drivers/gpu/drm/i915/intel_overlay.c
> index 72eb7e48e8bc..b134b9cabf93 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -1306,7 +1306,8 @@ static int get_registers(struct intel_overlay *overlay, 
> bool use_phys)
>       struct i915_vma *vma;
>       int err;
>  
> -     obj = i915_gem_object_create_stolen(overlay->i915, PAGE_SIZE);
> +     obj = i915_gem_object_create_stolen(overlay->i915,
> +                                         PAGE_SIZE, DRM_MM_INSERT_LOW);
>       if (obj == NULL)
>               obj = i915_gem_object_create_internal(overlay->i915, PAGE_SIZE);
>       if (IS_ERR(obj))
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index d99e5fabe93c..5d18301ba079 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7382,7 +7382,8 @@ static void valleyview_setup_pctx(struct 
> drm_i915_private *dev_priv)
>        * overlap with other ranges, such as the frame buffer, protected
>        * memory, or any other relevant ranges.
>        */
> -     pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
> +     pctx = i915_gem_object_create_stolen(dev_priv,
> +                                          pctx_size, DRM_MM_INSERT_LOW);

I guess there was no special requirement for the placement of this.
AFAIK the BIOS always allocates it just below the wopcm, but I suppose
it doesn't matter if we take a different approach.

>       if (!pctx) {
>               DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
>               goto out;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 472939f5c18f..e6a23a241cf3 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1104,7 +1104,7 @@ intel_ring_create_vma(struct drm_i915_private 
> *dev_priv, int size)
>       struct drm_i915_gem_object *obj;
>       struct i915_vma *vma;
>  
> -     obj = i915_gem_object_create_stolen(dev_priv, size);
> +     obj = i915_gem_object_create_stolen(dev_priv, size, DRM_MM_INSERT_BEST);

Should these go low? We never reallocate them, right?

>       if (!obj)
>               obj = i915_gem_object_create_internal(dev_priv, size);
>       if (IS_ERR(obj))
> -- 
> 2.19.0.rc2

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to