On 18/11/21 12:09 am, Matthew Auld wrote:
> On 16/11/2021 20:18, Arunpravin wrote:
>> - Make drm_buddy_alloc a single function to handle
>>    range allocation and non-range allocation demands
>>
>> - Implemented a new function alloc_range() which allocates
>>    the requested power-of-two block comply with range limitations
>>
>> - Moved order computation and memory alignment logic from
>>    i915 driver to drm buddy
>>
>> v2:
>>    merged below changes to keep the build unbroken
>>     - drm_buddy_alloc_range() becomes obsolete and may be removed
>>     - enable ttm range allocation (fpfn / lpfn) support in i915 driver
>>     - apply enhanced drm_buddy_alloc() function to i915 driver
>>
>> v3(Matthew Auld):
>>    - Fix alignment issues and remove unnecessary list_empty check
>>    - add more validation checks for input arguments
>>    - make alloc_range() block allocations as bottom-up
>>    - optimize order computation logic
>>    - replace uint64_t with u64, which is preferred in the kernel
>>
>> Signed-off-by: Arunpravin <arunpravin.paneersel...@amd.com>
>> ---
>>   drivers/gpu/drm/drm_buddy.c                   | 259 ++++++++++--------
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  69 ++---
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   2 +
>>   include/drm/drm_buddy.h                       |  22 +-
>>   4 files changed, 203 insertions(+), 149 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index 39eb1d224bec..c9b18a29f8d1 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -274,63 +274,6 @@ void drm_buddy_free_list(struct drm_buddy_mm *mm, 
>> struct list_head *objects)
>>   }
>>   EXPORT_SYMBOL(drm_buddy_free_list);
>>   
>> -/**
>> - * drm_buddy_alloc - allocate power-of-two blocks
>> - *
>> - * @mm: DRM buddy manager to allocate from
>> - * @order: size of the allocation
>> - *
>> - * The order value here translates to:
>> - *
>> - * 0 = 2^0 * mm->chunk_size
>> - * 1 = 2^1 * mm->chunk_size
>> - * 2 = 2^2 * mm->chunk_size
>> - *
>> - * Returns:
>> - * allocated ptr to the &drm_buddy_block on success
>> - */
>> -struct drm_buddy_block *
>> -drm_buddy_alloc(struct drm_buddy_mm *mm, unsigned int order)
>> -{
>> -    struct drm_buddy_block *block = NULL;
>> -    unsigned int i;
>> -    int err;
>> -
>> -    for (i = order; i <= mm->max_order; ++i) {
>> -            block = list_first_entry_or_null(&mm->free_list[i],
>> -                                             struct drm_buddy_block,
>> -                                             link);
>> -            if (block)
>> -                    break;
>> -    }
>> -
>> -    if (!block)
>> -            return ERR_PTR(-ENOSPC);
>> -
>> -    BUG_ON(!drm_buddy_block_is_free(block));
>> -
>> -    while (i != order) {
>> -            err = split_block(mm, block);
>> -            if (unlikely(err))
>> -                    goto out_free;
>> -
>> -            /* Go low */
>> -            block = block->left;
>> -            i--;
>> -    }
>> -
>> -    mark_allocated(block);
>> -    mm->avail -= drm_buddy_block_size(mm, block);
>> -    kmemleak_update_trace(block);
>> -    return block;
>> -
>> -out_free:
>> -    if (i != order)
>> -            __drm_buddy_free(mm, block);
>> -    return ERR_PTR(err);
>> -}
>> -EXPORT_SYMBOL(drm_buddy_alloc);
>> -
>>   static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
>>   {
>>      return s1 <= e2 && e1 >= s2;
>> @@ -341,52 +284,22 @@ static inline bool contains(u64 s1, u64 e1, u64 s2, 
>> u64 e2)
>>      return s1 <= s2 && e1 >= e2;
>>   }
>>   
>> -/**
>> - * drm_buddy_alloc_range - allocate range
>> - *
>> - * @mm: DRM buddy manager to allocate from
>> - * @blocks: output list head to add allocated blocks
>> - * @start: start of the allowed range for this block
>> - * @size: size of the allocation
>> - *
>> - * Intended for pre-allocating portions of the address space, for example to
>> - * reserve a block for the initial framebuffer or similar, hence the 
>> expectation
>> - * here is that drm_buddy_alloc() is still the main vehicle for
>> - * allocations, so if that's not the case then the drm_mm range allocator is
>> - * probably a much better fit, and so you should probably go use that 
>> instead.
>> - *
>> - * Note that it's safe to chain together multiple alloc_ranges
>> - * with the same blocks list
>> - *
>> - * Returns:
>> - * 0 on success, error code on failure.
>> - */
>> -int drm_buddy_alloc_range(struct drm_buddy_mm *mm,
>> -                      struct list_head *blocks,
>> -                      u64 start, u64 size)
>> +static struct drm_buddy_block *
>> +alloc_range(struct drm_buddy_mm *mm,
>> +        u64 start, u64 end,
>> +        unsigned int order)
>>   {
>>      struct drm_buddy_block *block;
>>      struct drm_buddy_block *buddy;
>> -    LIST_HEAD(allocated);
>>      LIST_HEAD(dfs);
>> -    u64 end;
>>      int err;
>>      int i;
>>   
>> -    if (size < mm->chunk_size)
>> -            return -EINVAL;
>> -
>> -    if (!IS_ALIGNED(size | start, mm->chunk_size))
>> -            return -EINVAL;
>> -
>> -    if (range_overflows(start, size, mm->size))
>> -            return -EINVAL;
>> +    end = end - 1;
>>   
>>      for (i = 0; i < mm->n_roots; ++i)
>>              list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>>   
>> -    end = start + size - 1;
>> -
>>      do {
>>              u64 block_start;
>>              u64 block_end;
>> @@ -399,26 +312,26 @@ int drm_buddy_alloc_range(struct drm_buddy_mm *mm,
>>   
>>              list_del(&block->tmp_link);
>>   
>> +            if (drm_buddy_block_order(block) < order)
>> +                    continue;
>> +
>>              block_start = drm_buddy_block_offset(block);
>>              block_end = block_start + drm_buddy_block_size(mm, block) - 1;
>>   
>>              if (!overlaps(start, end, block_start, block_end))
>>                      continue;
>>   
>> -            if (drm_buddy_block_is_allocated(block)) {
>> -                    err = -ENOSPC;
>> -                    goto err_free;
>> -            }
>> +            if (drm_buddy_block_is_allocated(block))
>> +                    continue;
>>   
>> -            if (contains(start, end, block_start, block_end)) {
>> -                    if (!drm_buddy_block_is_free(block)) {
>> -                            err = -ENOSPC;
>> -                            goto err_free;
>> -                    }
>> +            if (contains(start, end, block_start, block_end) &&
>> +                order == drm_buddy_block_order(block)) {
>> +                    /*
>> +                     * Find the free block within the range.
>> +                     */
>> +                    if (drm_buddy_block_is_free(block))
>> +                            return block;
> 
> AFAIK this is still randomly ordered when dealing with actual range 
> allocation. Maybe I'm missing something, but it looks like we are just 
> repeatedly picking the largest order that fits in the remaining number 
> of pages, but it might be the case that the smallest block is for 
> example at the start of the range, depending on the layout of the 
> address space, which would then only get allocated last.
> 
> I think I mentioned this before, but most of the block_trim() looks to 
> already do what we want for this case, and could maybe be factored out, 
> since that is also just another range allocation, except we start with 
> one root node for our "dfs" search.
> 
> Maybe something like:
> 
> drm_buddy_trim(block, new_size...)
> {
>      ....
>      mark_free(block);
>      list_add(&block->tmp_link, &dfs);
>      err = __alloc_range(mm, &dfs, block->start, new_size, ...);
>      if (err)
>      ....
> }
> 
> __drm_buddy_alloc_range(start, size, blocks)
> {
>      for_each_root()
>          list_add(&root->tmp_link, &dfs);
> 
>      err = __alloc_range(mm, &dfs, start, size, ...);
>      if (err)
>      ....
> }
> 
> drm_buddy_alloc(...)
> {
>      /* Actual range allocation */
>      if (start + size == end)
>              return __drm_buddy_alloc_range(...);
>     ....
> }
> 
> And then have something different for the alloc_range_bias()?
> 
Yes, we will have a common function __alloc_range() which does the range
allocation for block_trim() and __drm_buddy_alloc_range() (actual range
allocation).

And, I understood that we keep the alloc_range() implementation for end
bias allocations?

>>   
>> -                    mark_allocated(block);
>> -                    mm->avail -= drm_buddy_block_size(mm, block);
>> -                    list_add_tail(&block->link, &allocated);
>>                      continue;
>>              }
>>   
>> @@ -432,8 +345,7 @@ int drm_buddy_alloc_range(struct drm_buddy_mm *mm,
>>              list_add(&block->left->tmp_link, &dfs);
>>      } while (1);
>>   
>> -    list_splice_tail(&allocated, blocks);
>> -    return 0;
>> +    return ERR_PTR(-ENOSPC);
>>   
>>   err_undo:
>>      /*
>> @@ -446,12 +358,145 @@ int drm_buddy_alloc_range(struct drm_buddy_mm *mm,
>>          (drm_buddy_block_is_free(block) &&
>>           drm_buddy_block_is_free(buddy)))
>>              __drm_buddy_free(mm, block);
>> +    return ERR_PTR(err);
>> +}
>> +
>> +static struct drm_buddy_block *
>> +alloc_from_freelist(struct drm_buddy_mm *mm,
>> +                unsigned int order,
>> +                unsigned long flags)
>> +{
>> +    struct drm_buddy_block *block = NULL;
>> +    unsigned int i;
>> +    int err;
>> +
>> +    for (i = order; i <= mm->max_order; ++i) {
>> +            block = list_first_entry_or_null(&mm->free_list[i],
>> +                                             struct drm_buddy_block,
>> +                                             link);
>> +            if (block)
>> +                    break;
>> +    }
>> +
>> +    if (!block)
>> +            return ERR_PTR(-ENOSPC);
>> +
>> +    BUG_ON(!drm_buddy_block_is_free(block));
>> +
>> +    while (i != order) {
>> +            err = split_block(mm, block);
>> +            if (unlikely(err))
>> +                    goto err_undo;
>> +
>> +            block = block->right;
>> +            i--;
>> +    }
>> +    return block;
>> +
>> +err_undo:
>> +    if (i != order)
>> +            __drm_buddy_free(mm, block);
>> +    return ERR_PTR(err);
>> +}
>> +
>> +/**
>> + * drm_buddy_alloc - allocate power-of-two blocks
>> + *
>> + * @mm: DRM buddy manager to allocate from
>> + * @start: start of the allowed range for this block
>> + * @end: end of the allowed range for this block
>> + * @size: size of the allocation
>> + * @min_page_size: alignment of the allocation
>> + * @blocks: output list head to add allocated blocks
>> + * @flags: DRM_BUDDY_*_ALLOCATION flags
>> + *
>> + * alloc_range() called on range limitations, which traverses
>> + * the tree and returns the desired block.
>> + *
>> + * alloc_from_freelist() called when *no* range restrictions
>> + * are enforced, which picks the block from the freelist.
>> + *
>> + * blocks are allocated in order, the order value here translates to:
>> + *
>> + * 0 = 2^0 * mm->chunk_size
>> + * 1 = 2^1 * mm->chunk_size
>> + * 2 = 2^2 * mm->chunk_size
>> + *
>> + * Returns:
>> + * 0 on success, error code on failure.
>> + */
>> +int drm_buddy_alloc(struct drm_buddy_mm *mm,
>> +                u64 start, u64 end, u64 size,
>> +                u64 min_page_size,
>> +                struct list_head *blocks,
>> +                unsigned long flags)
>> +{
>> +    struct drm_buddy_block *block = NULL;
>> +    unsigned int min_order, order;
>> +    unsigned long pages;
>> +    LIST_HEAD(allocated);
>> +    int err;
>> +
>> +    if (size < mm->chunk_size)
>> +            return -EINVAL;
>> +
>> +    if (min_page_size < mm->chunk_size)
>> +            return -EINVAL;
>> +
>> +    if (!is_power_of_2(min_page_size))
>> +            return -EINVAL;
>> +
>> +    if (!IS_ALIGNED(start | end | size, mm->chunk_size))
>> +            return -EINVAL;
>> +
>> +    if (check_range_overflow(start, end, size, mm->size))
>> +            return -EINVAL;
>> +
>> +    pages = size >> ilog2(mm->chunk_size);
>> +    order = fls(pages) - 1;
>> +    min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
>> +
>> +    do {
>> +            order = min(order, (unsigned int)fls(pages) - 1);
>> +            BUG_ON(order > mm->max_order);
>> +            BUG_ON(order < min_order);
>> +
>> +            do {
>> +                    if (flags & DRM_BUDDY_RANGE_ALLOCATION)
>> +                            /* Allocate traversing within the range */
>> +                            block = alloc_range(mm, start, end, order);
>> +                    else
>> +                            /* Allocate from freelist */
>> +                            block = alloc_from_freelist(mm, order, flags);
>> +
>> +                    if (!IS_ERR(block))
>> +                            break;
>> +
>> +                    if (order-- == min_order) {
>> +                            err = -ENOSPC;
>> +                            goto err_free;
>> +                    }
>> +            } while (1);
>> +
>> +            mark_allocated(block);
>> +            mm->avail -= drm_buddy_block_size(mm, block);
>> +            kmemleak_update_trace(block);
>> +            list_add_tail(&block->link, &allocated);
>> +
>> +            pages -= BIT(order);
>> +
>> +            if (!pages)
>> +                    break;
>> +    } while (1);
>> +
>> +    list_splice_tail(&allocated, blocks);
>> +    return 0;
>>   
>>   err_free:
>>      drm_buddy_free_list(mm, &allocated);
>>      return err;
>>   }
>> -EXPORT_SYMBOL(drm_buddy_alloc_range);
>> +EXPORT_SYMBOL(drm_buddy_alloc);
>>   
>>   /**
>>    * drm_buddy_block_print - print block information
>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c 
>> b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> index c4b70cb8c248..9e3d130c3f42 100644
>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
>> @@ -36,13 +36,14 @@ static int i915_ttm_buddy_man_alloc(struct 
>> ttm_resource_manager *man,
>>      struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>>      struct i915_ttm_buddy_resource *bman_res;
>>      struct drm_buddy_mm *mm = &bman->mm;
>> -    unsigned long n_pages;
>> -    unsigned int min_order;
>> +    unsigned long n_pages, lpfn;
>>      u64 min_page_size;
>>      u64 size;
>>      int err;
>>   
>> -    GEM_BUG_ON(place->fpfn || place->lpfn);
>> +    lpfn = place->lpfn;
>> +    if (!lpfn)
>> +            lpfn = man->size;
>>   
>>      bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL);
>>      if (!bman_res)
>> @@ -52,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct 
>> ttm_resource_manager *man,
>>      INIT_LIST_HEAD(&bman_res->blocks);
>>      bman_res->mm = mm;
>>   
>> +    if (place->fpfn || lpfn != man->size)
>> +            bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>> +
>>      GEM_BUG_ON(!bman_res->base.num_pages);
>>      size = bman_res->base.num_pages << PAGE_SHIFT;
>>   
>> @@ -60,10 +64,16 @@ static int i915_ttm_buddy_man_alloc(struct 
>> ttm_resource_manager *man,
>>              min_page_size = bo->page_alignment << PAGE_SHIFT;
>>   
>>      GEM_BUG_ON(min_page_size < mm->chunk_size);
>> -    min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
>> +
>>      if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
>> +            unsigned long pages;
>> +
>>              size = roundup_pow_of_two(size);
>> -            min_order = ilog2(size) - ilog2(mm->chunk_size);
>> +            min_page_size = size;
>> +
>> +            pages = size >> ilog2(mm->chunk_size);
>> +            if (pages > lpfn)
>> +                    lpfn = pages;
>>      }
>>   
>>      if (size > mm->size) {
>> @@ -73,34 +83,16 @@ static int i915_ttm_buddy_man_alloc(struct 
>> ttm_resource_manager *man,
>>   
>>      n_pages = size >> ilog2(mm->chunk_size);
>>   
>> -    do {
>> -            struct drm_buddy_block *block;
>> -            unsigned int order;
>> -
>> -            order = fls(n_pages) - 1;
>> -            GEM_BUG_ON(order > mm->max_order);
>> -            GEM_BUG_ON(order < min_order);
>> -
>> -            do {
>> -                    mutex_lock(&bman->lock);
>> -                    block = drm_buddy_alloc(mm, order);
>> -                    mutex_unlock(&bman->lock);
>> -                    if (!IS_ERR(block))
>> -                            break;
>> -
>> -                    if (order-- == min_order) {
>> -                            err = -ENOSPC;
>> -                            goto err_free_blocks;
>> -                    }
>> -            } while (1);
>> -
>> -            n_pages -= BIT(order);
>> -
>> -            list_add_tail(&block->link, &bman_res->blocks);
>> -
>> -            if (!n_pages)
>> -                    break;
>> -    } while (1);
>> +    mutex_lock(&bman->lock);
>> +    err = drm_buddy_alloc(mm, (u64)place->fpfn << PAGE_SHIFT,
>> +                    (u64)place->lpfn << PAGE_SHIFT,
>> +                    (u64)n_pages << PAGE_SHIFT,
>> +                     min_page_size,
>> +                     &bman_res->blocks,
>> +                     bman_res->flags);
>> +    mutex_unlock(&bman->lock);
>> +    if (unlikely(err))
>> +            goto err_free_blocks;
>>   
>>      *res = &bman_res->base;
>>      return 0;
>> @@ -266,10 +258,19 @@ int i915_ttm_buddy_man_reserve(struct 
>> ttm_resource_manager *man,
>>   {
>>      struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
>>      struct drm_buddy_mm *mm = &bman->mm;
>> +    unsigned long flags = 0;
>> +    u64 min_size;
>>      int ret;
>>   
>> +    min_size = size;
>> +    flags |= DRM_BUDDY_RANGE_ALLOCATION;
>> +
>>      mutex_lock(&bman->lock);
>> -    ret = drm_buddy_alloc_range(mm, &bman->reserved, start, size);
>> +    ret = drm_buddy_alloc(mm, start,
>> +                    start + size,
>> +                    size, min_size,
> 
> min_page_size = min_size = size. I don't think we want that here. Also 
> the size here might not be power of two. I guess we need to use 
> chunk_size here instead?
ok, we will use chunk_size here.
> 
>> +                    &bman->reserved,
>> +                    flags);
>>      mutex_unlock(&bman->lock);
>>   
>>      return ret;
>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h 
>> b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
>> index fa644b512c2e..5ba490875f66 100644
>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
>> @@ -20,6 +20,7 @@ struct drm_buddy_mm;
>>    *
>>    * @base: struct ttm_resource base class we extend
>>    * @blocks: the list of struct i915_buddy_block for this 
>> resource/allocation
>> + * @flags: DRM_BUDDY_*_ALLOCATION flags
>>    * @mm: the struct i915_buddy_mm for this resource
>>    *
>>    * Extends the struct ttm_resource to manage an address space allocation 
>> with
>> @@ -28,6 +29,7 @@ struct drm_buddy_mm;
>>   struct i915_ttm_buddy_resource {
>>      struct ttm_resource base;
>>      struct list_head blocks;
>> +    unsigned long flags;
>>      struct drm_buddy_mm *mm;
>>   };
>>   
>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
>> index 5ce3fc702f80..2ec3cbc9d5d7 100644
>> --- a/include/drm/drm_buddy.h
>> +++ b/include/drm/drm_buddy.h
>> @@ -13,15 +13,22 @@
>>   
>>   #include <drm/drm_print.h>
>>   
>> -#define range_overflows(start, size, max) ({ \
>> +#define check_range_overflow(start, end, size, max) ({ \
>>      typeof(start) start__ = (start); \
>> +    typeof(end) end__ = (end);\
>>      typeof(size) size__ = (size); \
>>      typeof(max) max__ = (max); \
>>      (void)(&start__ == &size__); \
>>      (void)(&start__ == &max__); \
>> -    start__ >= max__ || size__ > max__ - start__; \
>> +    (void)(&start__ == &end__); \
>> +    (void)(&end__ == &size__); \
>> +    (void)(&end__ == &max__); \
>> +    start__ >= max__ || end__ > max__ || \
>> +    size__ > end__ - start__; \
>>   })
>>   
>> +#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
>> +
>>   struct drm_buddy_block {
>>   #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
>>   #define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
>> @@ -131,12 +138,11 @@ int drm_buddy_init(struct drm_buddy_mm *mm, u64 size, 
>> u64 chunk_size);
>>   
>>   void drm_buddy_fini(struct drm_buddy_mm *mm);
>>   
>> -struct drm_buddy_block *
>> -drm_buddy_alloc(struct drm_buddy_mm *mm, unsigned int order);
>> -
>> -int drm_buddy_alloc_range(struct drm_buddy_mm *mm,
>> -                      struct list_head *blocks,
>> -                      u64 start, u64 size);
>> +int drm_buddy_alloc(struct drm_buddy_mm *mm,
>> +                u64 start, u64 end, u64 size,
>> +                u64 min_page_size,
>> +                struct list_head *blocks,
>> +                unsigned long flags);
>>   
>>   void drm_buddy_free(struct drm_buddy_mm *mm, struct drm_buddy_block 
>> *block);
>>   
>>

Reply via email to