Large alignment requests previously forced the buddy allocator to search by alignment order, which often caused higher-order free blocks to be split even when a suitably aligned smaller region already existed within them. This led to excessive fragmentation, especially for workloads requesting small sizes with large alignment constraints.
This change prioritizes the requested allocation size during the search and uses an augmented RB-tree field (subtree_max_alignment) to efficiently locate free blocks that satisfy both size and offset-alignment requirements. As a result, the allocator can directly select an aligned sub-region without splitting larger blocks unnecessarily. A practical example is the VKCTS test dEQP-VK.memory.allocation.basic.size_8KiB.reverse.count_4000, which repeatedly allocates 8 KiB buffers with a 256 KiB alignment. Previously, such allocations caused large blocks to be split aggressively, despite smaller aligned regions being sufficient. With this change, those aligned regions are reused directly, significantly reducing fragmentation. This improvement is visible in the amdgpu VRAM buddy allocator state (/sys/kernel/debug/dri/1/amdgpu_vram_mm). After the change, higher-order blocks are preserved and the number of low-order fragments is substantially reduced. Before: order- 5 free: 1936 MiB, blocks: 15490 order- 4 free: 967 MiB, blocks: 15486 order- 3 free: 483 MiB, blocks: 15485 order- 2 free: 241 MiB, blocks: 15486 order- 1 free: 241 MiB, blocks: 30948 After: order- 5 free: 493 MiB, blocks: 3949 order- 4 free: 246 MiB, blocks: 3949 order- 3 free: 123 MiB, blocks: 3947 order- 2 free: 61 MiB, blocks: 3949 order- 1 free: 61 MiB, blocks: 7878 By avoiding unnecessary splits, this change improves allocator efficiency and helps maintain larger contiguous free regions under heavy offset-aligned allocation workloads. Signed-off-by: Arunpravin Paneer Selvam <[email protected]> Suggested-by: Christian König <[email protected]> --- drivers/gpu/drm/drm_buddy.c | 261 +++++++++++++++++++++++++++++------- include/drm/drm_buddy.h | 3 + 2 files changed, 219 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index f2c92902e4a3..285757a151d1 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -23,6 +23,16 @@ static struct kmem_cache *slab_blocks; #define for_each_free_tree(tree) \ for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++) +static unsigned int drm_buddy_block_offset_alignment(struct drm_buddy_block *block) +{ + return __ffs(drm_buddy_block_offset(block)); +} + +RB_DECLARE_CALLBACKS_MAX(static, drm_buddy_augment_cb, + struct drm_buddy_block, rb, + unsigned int, subtree_max_alignment, + drm_buddy_block_offset_alignment); + static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm, struct drm_buddy_block *parent, unsigned int order, @@ -40,6 +50,9 @@ static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm, block->header |= order; block->parent = parent; + block->subtree_max_alignment = + drm_buddy_block_offset_alignment(block); + RB_CLEAR_NODE(&block->rb); BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED); @@ -76,26 +89,32 @@ static bool rbtree_is_empty(struct rb_root *root) return RB_EMPTY_ROOT(root); } -static bool drm_buddy_block_offset_less(const struct drm_buddy_block *block, - const struct drm_buddy_block *node) -{ - return drm_buddy_block_offset(block) < drm_buddy_block_offset(node); -} - -static bool rbtree_block_offset_less(struct rb_node *block, - const struct rb_node *node) -{ - return drm_buddy_block_offset_less(rbtree_get_free_block(block), - rbtree_get_free_block(node)); -} - static void rbtree_insert(struct drm_buddy *mm, struct drm_buddy_block *block, enum drm_buddy_free_tree tree) { - rb_add(&block->rb, - &mm->free_trees[tree][drm_buddy_block_order(block)], - rbtree_block_offset_less); + struct rb_node **link, *parent = NULL; + struct drm_buddy_block *node; + struct rb_root *root; + unsigned int order; + + order = drm_buddy_block_order(block); + + root = &mm->free_trees[tree][order]; + link = &root->rb_node; + + while (*link) { + parent = *link; + node = rbtree_get_free_block(parent); + + if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node)) + link = &parent->rb_left; + else + link = &parent->rb_right; + } + + rb_link_node(&block->rb, parent, link); + rb_insert_augmented(&block->rb, root, &drm_buddy_augment_cb); } static void rbtree_remove(struct drm_buddy *mm, @@ -108,7 +127,7 @@ static void rbtree_remove(struct drm_buddy *mm, tree = get_block_tree(block); root = &mm->free_trees[tree][order]; - rb_erase(&block->rb, root); + rb_erase_augmented(&block->rb, root, &drm_buddy_augment_cb); RB_CLEAR_NODE(&block->rb); } @@ -798,6 +817,132 @@ alloc_from_freetree(struct drm_buddy *mm, return ERR_PTR(err); } +static bool +drm_buddy_can_offset_align(u64 size, u64 min_block_size) +{ + return size < min_block_size && is_power_of_2(size); +} + +static bool drm_buddy_subtree_can_satisfy(struct rb_node *node, + unsigned int alignment) +{ + struct drm_buddy_block *block; + + if (!node) + return false; + + block = rbtree_get_free_block(node); + return block->subtree_max_alignment >= alignment; +} + +static struct drm_buddy_block * +drm_buddy_find_block_aligned(struct drm_buddy *mm, + enum drm_buddy_free_tree tree, + unsigned int order, + unsigned int tmp, + unsigned int alignment, + unsigned long flags) +{ + struct rb_root *root = &mm->free_trees[tree][tmp]; + struct rb_node *rb = root->rb_node; + + while (rb) { + struct drm_buddy_block *block = rbtree_get_free_block(rb); + struct rb_node *left_node = rb->rb_left, *right_node = rb->rb_right; + + if (right_node) { + if (drm_buddy_subtree_can_satisfy(right_node, alignment)) { + rb = right_node; + continue; + } + } + + if (drm_buddy_block_order(block) >= order && + __ffs(drm_buddy_block_offset(block)) >= alignment) + return block; + + if (left_node) { + if (drm_buddy_subtree_can_satisfy(left_node, alignment)) { + rb = left_node; + continue; + } + } + + break; + } + + return NULL; +} + +static struct drm_buddy_block * +drm_buddy_offset_aligned_allocation(struct drm_buddy *mm, + u64 size, + u64 min_block_size, + unsigned long flags) +{ + struct drm_buddy_block *block = NULL; + unsigned int order, tmp, alignment; + struct drm_buddy_block *buddy; + enum drm_buddy_free_tree tree; + unsigned long pages; + int err; + + alignment = ilog2(min_block_size); + pages = size >> ilog2(mm->chunk_size); + order = fls(pages) - 1; + + tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ? + DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE; + + for (tmp = order; tmp <= mm->max_order; ++tmp) { + block = drm_buddy_find_block_aligned(mm, tree, order, + tmp, alignment, flags); + if (!block) { + tree = (tree == DRM_BUDDY_CLEAR_TREE) ? + DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE; + block = drm_buddy_find_block_aligned(mm, tree, order, + tmp, alignment, flags); + } + + if (block) + break; + } + + if (!block) + return ERR_PTR(-ENOSPC); + + while (drm_buddy_block_order(block) > order) { + struct drm_buddy_block *left, *right; + + err = split_block(mm, block); + if (unlikely(err)) + goto err_undo; + + left = block->left; + right = block->right; + + if (__ffs(drm_buddy_block_offset(right)) >= alignment) + block = right; + else + block = left; + } + + return block; + +err_undo: + /* + * We really don't want to leave around a bunch of split blocks, since + * bigger is better, so make sure we merge everything back before we + * free the allocated blocks. + */ + buddy = __get_buddy(block); + if (buddy && + (drm_buddy_block_is_free(block) && + drm_buddy_block_is_free(buddy))) + __drm_buddy_free(mm, block, false); + return ERR_PTR(err); +} + static int __alloc_range(struct drm_buddy *mm, struct list_head *dfs, u64 start, u64 size, @@ -1067,6 +1212,7 @@ EXPORT_SYMBOL(drm_buddy_block_trim); static struct drm_buddy_block * __drm_buddy_alloc_blocks(struct drm_buddy *mm, u64 start, u64 end, + u64 size, u64 min_block_size, unsigned int order, unsigned long flags) { @@ -1074,6 +1220,11 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm, /* Allocate traversing within the range */ return __drm_buddy_alloc_range_bias(mm, start, end, order, flags); + else if (size < min_block_size) + /* Allocate from an offset-aligned region without size rounding */ + return drm_buddy_offset_aligned_allocation(mm, size, + min_block_size, + flags); else /* Allocate from freetree */ return alloc_from_freetree(mm, order, flags); @@ -1145,8 +1296,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) { size = roundup_pow_of_two(size); min_block_size = size; - /* Align size value to min_block_size */ - } else if (!IS_ALIGNED(size, min_block_size)) { + /* + * Normalize the requested size to min_block_size for regular allocations. + * Offset-aligned allocations intentionally skip size rounding. + */ + } else if (!drm_buddy_can_offset_align(size, min_block_size)) { size = round_up(size, min_block_size); } @@ -1157,43 +1311,60 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, do { order = min(order, (unsigned int)fls(pages) - 1); BUG_ON(order > mm->max_order); - BUG_ON(order < min_order); + /* + * Regular allocations must not allocate blocks smaller than min_block_size. + * Offset-aligned allocations deliberately bypass this constraint. + */ + BUG_ON(size >= min_block_size && order < min_order); do { + unsigned int fallback_order; + block = __drm_buddy_alloc_blocks(mm, start, end, + size, + min_block_size, order, flags); if (!IS_ERR(block)) break; - if (order-- == min_order) { - /* Try allocation through force merge method */ - if (mm->clear_avail && - !__force_merge(mm, start, end, min_order)) { - block = __drm_buddy_alloc_blocks(mm, start, - end, - min_order, - flags); - if (!IS_ERR(block)) { - order = min_order; - break; - } - } + if (size < min_block_size) { + fallback_order = order; + } else if (order == min_order) { + fallback_order = min_order; + } else { + order--; + continue; + } - /* - * Try contiguous block allocation through - * try harder method. - */ - if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION && - !(flags & DRM_BUDDY_RANGE_ALLOCATION)) - return __alloc_contig_try_harder(mm, - original_size, - original_min_size, - blocks); - err = -ENOSPC; - goto err_free; + /* Try allocation through force merge method */ + if (mm->clear_avail && + !__force_merge(mm, start, end, fallback_order)) { + block = __drm_buddy_alloc_blocks(mm, start, + end, + size, + min_block_size, + fallback_order, + flags); + if (!IS_ERR(block)) { + order = fallback_order; + break; + } } + + /* + * Try contiguous block allocation through + * try harder method. + */ + if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION && + !(flags & DRM_BUDDY_RANGE_ALLOCATION)) + return __alloc_contig_try_harder(mm, + original_size, + original_min_size, + blocks); + err = -ENOSPC; + goto err_free; } while (1); mark_allocated(mm, block); diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index d7891d08f67a..da6a40fb4763 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/sched.h> #include <linux/rbtree.h> +#include <linux/rbtree_augmented.h> #include <drm/drm_print.h> @@ -60,6 +61,8 @@ struct drm_buddy_block { }; struct list_head tmp_link; + + unsigned int subtree_max_alignment; }; /* Order-zero must be at least SZ_4K */ -- 2.34.1
