[PATCH v2] drm/buddy: Fix the warn on's during force merge

2024-05-17 Thread Arunpravin Paneer Selvam
Move the fallback and block incompatible checks
above, so that we dont unnecessarily split the blocks
and leaving the unmerged. This resolves the unnecessary
warn on's thrown during force_merge call.

v2:(Matthew)
  - Move the fallback and block incompatible checks above
the contains check.

Signed-off-by: Arunpravin Paneer Selvam 
Reviewed-by: Matthew Auld 
Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")
Link: 
https://patchwork.kernel.org/project/dri-devel/patch/20240517135015.17565-1-arunpravin.paneersel...@amd.com/
---
 drivers/gpu/drm/drm_buddy.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 1daf778cf6fa..94f8c34fc293 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -524,11 +524,11 @@ __alloc_range_bias(struct drm_buddy *mm,
continue;
}
 
+   if (!fallback && block_incompatible(block, flags))
+   continue;
+
if (contains(start, end, block_start, block_end) &&
order == drm_buddy_block_order(block)) {
-   if (!fallback && block_incompatible(block, flags))
-   continue;
-
/*
 * Find the free block within the range.
 */
-- 
2.25.1



[PATCH v2] drm/buddy: Fix the warn on's during force merge

2024-05-17 Thread Arunpravin Paneer Selvam
Move the fallback and block incompatible checks
above, so that we dont unnecessarily split the blocks
and leaving the unmerged. This resolves the unnecessary
warn on's thrown during force_merge call.

v2:(Matthew)
  - Move the fallback and block incompatible checks above
the contains check.

Signed-off-by: Arunpravin Paneer Selvam 
Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")
---
 drivers/gpu/drm/drm_buddy.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 1daf778cf6fa..94f8c34fc293 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -524,11 +524,11 @@ __alloc_range_bias(struct drm_buddy *mm,
continue;
}
 
+   if (!fallback && block_incompatible(block, flags))
+   continue;
+
if (contains(start, end, block_start, block_end) &&
order == drm_buddy_block_order(block)) {
-   if (!fallback && block_incompatible(block, flags))
-   continue;
-
/*
 * Find the free block within the range.
 */
-- 
2.25.1



[PATCH] drm/buddy: Merge back blocks in bias range function

2024-05-17 Thread Arunpravin Paneer Selvam
In bias range allocation, when we don't find the required
blocks (i.e) on returning the -ENOSPC, we should merge back the
split blocks. Otherwise, during force_merge we are flooded with
warn on's due to block and its buddy are in same clear state
(dirty or clear).

Hence, renamed the force_merge with merge_blocks and passed a
force_merge as a bool function parameter. Based on the requirement,
say, in any normal situation we can call the merge_blocks passing
the force_merge variable as false. And, in any memory cruch situation,
we can call the merge_blocks passing the force_merge as true. This
resolves the unnecessary warn on's thrown during force_merge call.

Signed-off-by: Arunpravin Paneer Selvam 
Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")
---
 drivers/gpu/drm/drm_buddy.c | 32 ++--
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 1daf778cf6fa..111f602f1359 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -161,10 +161,11 @@ static unsigned int __drm_buddy_free(struct drm_buddy *mm,
return order;
 }
 
-static int __force_merge(struct drm_buddy *mm,
-u64 start,
-u64 end,
-unsigned int min_order)
+static int __merge_blocks(struct drm_buddy *mm,
+ u64 start,
+ u64 end,
+ unsigned int min_order,
+ bool force_merge)
 {
unsigned int order;
int i;
@@ -195,8 +196,9 @@ static int __force_merge(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
continue;
 
-   WARN_ON(drm_buddy_block_is_clear(block) ==
-   drm_buddy_block_is_clear(buddy));
+   if (force_merge)
+   WARN_ON(drm_buddy_block_is_clear(block) ==
+   drm_buddy_block_is_clear(buddy));
 
/*
 * If the prev block is same as buddy, don't access the
@@ -210,7 +212,7 @@ static int __force_merge(struct drm_buddy *mm,
if (drm_buddy_block_is_clear(block))
mm->clear_avail -= drm_buddy_block_size(mm, 
block);
 
-   order = __drm_buddy_free(mm, block, true);
+   order = __drm_buddy_free(mm, block, force_merge);
if (order >= min_order)
return 0;
}
@@ -332,7 +334,7 @@ void drm_buddy_fini(struct drm_buddy *mm)
 
for (i = 0; i < mm->n_roots; ++i) {
order = ilog2(size) - ilog2(mm->chunk_size);
-   __force_merge(mm, 0, size, order);
+   __merge_blocks(mm, 0, size, order, true);
 
WARN_ON(!drm_buddy_block_is_free(mm->roots[i]));
drm_block_free(mm, mm->roots[i]);
@@ -479,7 +481,7 @@ __alloc_range_bias(struct drm_buddy *mm,
   unsigned long flags,
   bool fallback)
 {
-   u64 req_size = mm->chunk_size << order;
+   u64 size, root_size, req_size = mm->chunk_size << order;
struct drm_buddy_block *block;
struct drm_buddy_block *buddy;
LIST_HEAD(dfs);
@@ -487,6 +489,7 @@ __alloc_range_bias(struct drm_buddy *mm,
int i;
 
end = end - 1;
+   size = mm->size;
 
for (i = 0; i < mm->n_roots; ++i)
list_add_tail(>roots[i]->tmp_link, );
@@ -548,6 +551,15 @@ __alloc_range_bias(struct drm_buddy *mm,
list_add(>left->tmp_link, );
} while (1);
 
+   /* Merge back the split blocks */
+   for (i = 0; i < mm->n_roots; ++i) {
+   order = ilog2(size) - ilog2(mm->chunk_size);
+   __merge_blocks(mm, start, end, order, false);
+
+   root_size = mm->chunk_size << order;
+   size -= root_size;
+   }
+
return ERR_PTR(-ENOSPC);
 
 err_undo:
@@ -1026,7 +1038,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
if (order-- == min_order) {
/* Try allocation through force merge method */
if (mm->clear_avail &&
-   !__force_merge(mm, start, end, min_order)) {
+   !__merge_blocks(mm, start, end, min_order, 
true)) {
block = __drm_buddy_alloc_blocks(mm, 
start,
 end,
 
min_order,
-- 
2.25.1



[PATCH v3 1/2] drm/buddy: Fix the range bias clear memory allocation issue

2024-05-14 Thread Arunpravin Paneer Selvam
Problem statement: During the system boot time, an application request
for the bulk volume of cleared range bias memory when the clear_avail
is zero, we dont fallback into normal allocation method as we had an
unnecessary clear_avail check which prevents the fallback method leads
to fb allocation failure following system goes into unresponsive state.

Solution: Remove the unnecessary clear_avail check in the range bias
allocation function.

v2: add a kunit for this corner case (Daniel Vetter)

Signed-off-by: Arunpravin Paneer Selvam 
Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/drm_buddy.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 284ebae71cc4..1daf778cf6fa 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -249,6 +249,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 
chunk_size)
 
mm->size = size;
mm->avail = size;
+   mm->clear_avail = 0;
mm->chunk_size = chunk_size;
mm->max_order = ilog2(size) - ilog2(chunk_size);
 
@@ -574,7 +575,7 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm,
 
block = __alloc_range_bias(mm, start, end, order,
   flags, fallback);
-   if (IS_ERR(block) && mm->clear_avail)
+   if (IS_ERR(block))
return __alloc_range_bias(mm, start, end, order,
  flags, !fallback);
 
-- 
2.25.1



[PATCH v3 2/2] drm/tests: Add a unit test for range bias allocation

2024-05-14 Thread Arunpravin Paneer Selvam
Allocate cleared blocks in the bias range when the DRM
buddy's clear avail is zero. This will validate the bias
range allocation in scenarios like system boot when no
cleared blocks are available and exercise the fallback
path too. The resulting blocks should always be dirty.

v1:(Matthew)
  - move the size to the variable declaration section.
  - move the mm.clear_avail init to allocator init.

Signed-off-by: Arunpravin Paneer Selvam 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/tests/drm_buddy_test.c | 36 +-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index e3b50e240d36..b3be68b03610 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -23,9 +23,11 @@ static inline u64 get_size(int order, u64 chunk_size)
 
 static void drm_test_buddy_alloc_range_bias(struct kunit *test)
 {
-   u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem;
+   u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem;
DRM_RND_STATE(prng, random_seed);
unsigned int i, count, *order;
+   struct drm_buddy_block *block;
+   unsigned long flags;
struct drm_buddy mm;
LIST_HEAD(allocated);
 
@@ -222,6 +224,38 @@ static void drm_test_buddy_alloc_range_bias(struct kunit 
*test)
 
drm_buddy_free_list(, , 0);
drm_buddy_fini();
+
+   /*
+* Allocate cleared blocks in the bias range when the DRM buddy's clear 
avail is
+* zero. This will validate the bias range allocation in scenarios like 
system boot
+* when no cleared blocks are available and exercise the fallback path 
too. The resulting
+* blocks should always be dirty.
+*/
+
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(, mm_size, ps),
+  "buddy_init failed\n");
+
+   bias_start = round_up(prandom_u32_state() % (mm_size - ps), ps);
+   bias_end = round_up(bias_start + prandom_u32_state() % (mm_size - 
bias_start), ps);
+   bias_end = max(bias_end, bias_start + ps);
+   bias_rem = bias_end - bias_start;
+
+   flags = DRM_BUDDY_CLEAR_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION;
+   size = max(round_up(prandom_u32_state() % bias_rem, ps), ps);
+
+   KUNIT_ASSERT_FALSE_MSG(test,
+  drm_buddy_alloc_blocks(, bias_start,
+ bias_end, size, ps,
+ ,
+ flags),
+  "buddy_alloc failed with bias(%x-%x), size=%u, 
ps=%u\n",
+  bias_start, bias_end, size, ps);
+
+   list_for_each_entry(block, , link)
+   KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+
+   drm_buddy_free_list(, , 0);
+   drm_buddy_fini();
 }
 
 static void drm_test_buddy_alloc_clear(struct kunit *test)
-- 
2.25.1



[PATCH v2 1/2] drm/buddy: Fix the range bias clear memory allocation issue

2024-05-12 Thread Arunpravin Paneer Selvam
Problem statement: During the system boot time, an application request
for the bulk volume of cleared range bias memory when the clear_avail
is zero, we dont fallback into normal allocation method as we had an
unnecessary clear_avail check which prevents the fallback method leads
to fb allocation failure following system goes into unresponsive state.

Solution: Remove the unnecessary clear_avail check in the range bias
allocation function.

v2: add a kunit for this corner case (Daniel Vetter)

Signed-off-by: Arunpravin Paneer Selvam 
Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/drm_buddy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 284ebae71cc4..831929ac95eb 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -574,7 +574,7 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm,
 
block = __alloc_range_bias(mm, start, end, order,
   flags, fallback);
-   if (IS_ERR(block) && mm->clear_avail)
+   if (IS_ERR(block))
return __alloc_range_bias(mm, start, end, order,
  flags, !fallback);
 
-- 
2.25.1



[PATCH v2 2/2] drm/tests: Add a unit test for range bias allocation

2024-05-12 Thread Arunpravin Paneer Selvam
Allocate cleared blocks in the bias range when the DRM
buddy's clear avail is zero. This will validate the bias
range allocation in scenarios like system boot when no
cleared blocks are available and exercise the fallback
path too. The resulting blocks should always be dirty.

Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/tests/drm_buddy_test.c | 35 ++
 1 file changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index e3b50e240d36..a194f271bc55 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -26,6 +26,8 @@ static void drm_test_buddy_alloc_range_bias(struct kunit 
*test)
u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem;
DRM_RND_STATE(prng, random_seed);
unsigned int i, count, *order;
+   struct drm_buddy_block *block;
+   unsigned long flags;
struct drm_buddy mm;
LIST_HEAD(allocated);
 
@@ -222,6 +224,39 @@ static void drm_test_buddy_alloc_range_bias(struct kunit 
*test)
 
drm_buddy_free_list(, , 0);
drm_buddy_fini();
+
+   /*
+* Allocate cleared blocks in the bias range when the DRM buddy's clear 
avail is
+* zero. This will validate the bias range allocation in scenarios like 
system boot
+* when no cleared blocks are available and exercise the fallback path 
too. The resulting
+* blocks should always be dirty.
+*/
+
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(, mm_size, ps),
+  "buddy_init failed\n");
+   mm.clear_avail = 0;
+
+   bias_start = round_up(prandom_u32_state() % (mm_size - ps), ps);
+   bias_end = round_up(bias_start + prandom_u32_state() % (mm_size - 
bias_start), ps);
+   bias_end = max(bias_end, bias_start + ps);
+   bias_rem = bias_end - bias_start;
+
+   flags = DRM_BUDDY_CLEAR_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION;
+   u32 size = max(round_up(prandom_u32_state() % bias_rem, ps), ps);
+
+   KUNIT_ASSERT_FALSE_MSG(test,
+  drm_buddy_alloc_blocks(, bias_start,
+ bias_end, size, ps,
+ ,
+ flags),
+  "buddy_alloc failed with bias(%x-%x), size=%u, 
ps=%u\n",
+  bias_start, bias_end, size, ps);
+
+   list_for_each_entry(block, , link)
+   KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+
+   drm_buddy_free_list(, , 0);
+   drm_buddy_fini();
 }
 
 static void drm_test_buddy_alloc_clear(struct kunit *test)
-- 
2.25.1



[PATCH] drm/buddy: Fix the range bias clear memory allocation issue

2024-05-08 Thread Arunpravin Paneer Selvam
Problem statement: During the system boot time, an application request
for the bulk volume of cleared range bias memory when the clear_avail
is zero, we dont fallback into normal allocation method as we had an
unnecessary clear_avail check which prevents the fallback method leads
to fb allocation failure following system goes into unresponsive state.

Solution: Remove the unnecessary clear_avail check in the range bias
allocation function.

Signed-off-by: Arunpravin Paneer Selvam 
Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/drm_buddy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 284ebae71cc4..831929ac95eb 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -574,7 +574,7 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm,
 
block = __alloc_range_bias(mm, start, end, order,
   flags, fallback);
-   if (IS_ERR(block) && mm->clear_avail)
+   if (IS_ERR(block))
return __alloc_range_bias(mm, start, end, order,
  flags, !fallback);
 
-- 
2.25.1



[PATCH] drm/buddy: Fix the range bias clear memory allocation issue

2024-05-06 Thread Arunpravin Paneer Selvam
Problem statement: During the system boot time, an application request
for the bulk volume of cleared range bias memory when the clear_avail
is zero, we dont fallback into normal allocation method as we had an
unnecessary clear_avail check which prevents the fallback method leads
to fb allocation failure following system goes into unresponsive state.

Solution: Remove the unnecessary clear_avail check in the range bias
allocation function.

Signed-off-by: Arunpravin Paneer Selvam 
Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")
---
 drivers/gpu/drm/drm_buddy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 284ebae71cc4..831929ac95eb 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -574,7 +574,7 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm,
 
block = __alloc_range_bias(mm, start, end, order,
   flags, fallback);
-   if (IS_ERR(block) && mm->clear_avail)
+   if (IS_ERR(block))
return __alloc_range_bias(mm, start, end, order,
  flags, !fallback);
 
-- 
2.25.1



[PATCH v11 3/3] drm/tests: Add a test case for drm buddy clear allocation

2024-04-14 Thread Arunpravin Paneer Selvam
Add a new test case for the drm buddy clear and dirty
allocation.

v2:(Matthew)
  - make size as u32
  - rename PAGE_SIZE with SZ_4K
  - dont fragment the address space for all the order allocation
iterations. we can do it once and just increment and allocate
the size.
  - create new mm with non power-of-two size to ensure the multi-root
force_merge during fini.

v3:
  - add randomness in size calculation(Matthew)

Signed-off-by: Arunpravin Paneer Selvam 
Reviewed-by: Matthew Auld 
Suggested-by: Matthew Auld 
---
 drivers/gpu/drm/tests/drm_buddy_test.c | 143 +
 1 file changed, 143 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index 4621a860cb05..e3b50e240d36 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -224,6 +224,148 @@ static void drm_test_buddy_alloc_range_bias(struct kunit 
*test)
drm_buddy_fini();
 }
 
+static void drm_test_buddy_alloc_clear(struct kunit *test)
+{
+   unsigned long n_pages, total, i = 0;
+   DRM_RND_STATE(prng, random_seed);
+   const unsigned long ps = SZ_4K;
+   struct drm_buddy_block *block;
+   const int max_order = 12;
+   LIST_HEAD(allocated);
+   struct drm_buddy mm;
+   unsigned int order;
+   u32 mm_size, size;
+   LIST_HEAD(dirty);
+   LIST_HEAD(clean);
+
+   mm_size = SZ_4K << max_order;
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
+
+   /*
+* Idea is to allocate and free some random portion of the address 
space,
+* returning those pages as non-dirty and randomly alternate between
+* requesting dirty and non-dirty pages (not going over the limit
+* we freed as non-dirty), putting that into two separate lists.
+* Loop over both lists at the end checking that the dirty list
+* is indeed all dirty pages and vice versa. Free it all again,
+* keeping the dirty/clear status.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   5 * ps, ps, 
,
+   
DRM_BUDDY_TOPDOWN_ALLOCATION),
+   "buddy_alloc hit an error size=%lu\n", 5 * ps);
+   drm_buddy_free_list(, , DRM_BUDDY_CLEARED);
+
+   n_pages = 10;
+   do {
+   unsigned long flags;
+   struct list_head *list;
+   int slot = i % 2;
+
+   if (slot == 0) {
+   list = 
+   flags = 0;
+   } else {
+   list = 
+   flags = DRM_BUDDY_CLEAR_ALLOCATION;
+   }
+
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,
+   ps, ps, 
list,
+   flags),
+   "buddy_alloc hit an error size=%lu\n", 
ps);
+   } while (++i < n_pages);
+
+   list_for_each_entry(block, , link)
+   KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), true);
+
+   list_for_each_entry(block, , link)
+   KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+
+   drm_buddy_free_list(, , DRM_BUDDY_CLEARED);
+
+   /*
+* Trying to go over the clear limit for some allocation.
+* The allocation should never fail with reasonable page-size.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   10 * ps, ps, ,
+   
DRM_BUDDY_CLEAR_ALLOCATION),
+   "buddy_alloc hit an error size=%lu\n", 10 * ps);
+
+   drm_buddy_free_list(, , DRM_BUDDY_CLEARED);
+   drm_buddy_free_list(, , 0);
+   drm_buddy_fini();
+
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   /*
+* Create a new mm. Intentionally fragment the address space by creating
+* two alternating lists. Free both lists, one as dirty the other as 
clean.
+* Try to allocate double the previous size with matching 
min_page_size. The
+* allocation should never fail as it calls the force_merge. Also check 
that
+* the page is always dirty after force_merge. Free the page as dirty, 
then
+* repeat the whole thing, increment the order until we hit the 
max_order.
+*/
+
+   i = 0;
+   n_pages = mm_size / ps;
+   do {
+   struct list_head *list;
+   int slot = i % 2;
+
+   if (slot == 0)
+   list = 
+ 

[PATCH v11 1/3] drm/buddy: Implement tracking clear page feature

2024-04-14 Thread Arunpravin Paneer Selvam
- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
  - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
cleared. Else, reset the clear flag for each block in the list(Christian)
  - For merging the 2 cleared blocks compare as below,
drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
  - Defragment the memory beginning from min_order
till the required memory space is available.

v2: (Matthew)
  - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
operation within drm buddy.
  - Write a macro block_incompatible() to allocate the required blocks.
  - Update the xe driver for the drm_buddy_free_list change in arguments.
  - add a warning if the two blocks are incompatible on
defragmentation
  - call full defragmentation in the fini() function
  - place a condition to test if min_order is equal to 0
  - replace the list with safe_reverse() variant as we might
remove the block from the list.

v3:
  - fix Gitlab user reported lockup issue.
  - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
  - modify to pass the root order instead max_order in fini()
function(Matthew)
  - change bool 1 to true(Matthew)
  - add check if min_block_size is power of 2(Matthew)
  - modify the min_block_size datatype to u64(Matthew)

v4:
  - rename the function drm_buddy_defrag with __force_merge.
  - Include __force_merge directly in drm buddy file and remove
the defrag use in amdgpu driver.
  - Remove list_empty() check(Matthew)
  - Remove unnecessary space, headers and placement of new variables(Matthew)
  - Add a unit test case(Matthew)

v5:
  - remove force merge support to actual range allocation and not to bail
out when contains && split(Matthew)
  - add range support to force merge function.

v6:
  - modify the alloc_range() function clear page non merged blocks
allocation(Matthew)
  - correct the list_insert function name(Matthew).

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c   | 427 ++
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c|  28 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
 include/drm/drm_buddy.h   |  16 +-
 6 files changed, 363 insertions(+), 124 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
 
 error_free_blocks:
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
 
amdgpu_vram_mgr_do_reserve(man);
 
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 
atomic64_sub(vis_usage, >vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
 
list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {
-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 5ebdd6f8f36e..284ebae71cc4 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
 }
 
+static void clear_reset(struct drm_buddy_block *block)
+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
 stati

[PATCH v11 2/3] drm/amdgpu: Enable clear page functionality

2024-04-14 Thread Arunpravin Paneer Selvam
Add clear page support in vram memory region.

v1(Christian):
  - Dont handle clear page as TTM flag since when moving the BO back
in from GTT again we don't need that.
  - Make a specialized version of amdgpu_fill_buffer() which only
clears the VRAM areas which are not already cleared
  - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
amdgpu_object.c

v2:
  - Modify the function name amdgpu_ttm_* (Alex)
  - Drop the delayed parameter (Christian)
  - handle amdgpu_res_cleared() just above the size
calculation (Christian)
  - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
in the free path to properly wait for fences etc.. (Christian)

v3(Christian):
  - Remove buffer clear code in VRAM manager instead change the
AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
the DRM_BUDDY_CLEARED flag.
  - Remove ! from amdgpu_res_cleared() check.

v4(Christian):
  - vres flag setting move to vram manager file
  - use dma_fence_get_stub in amdgpu_ttm_clear_buffer function
  - make fence a mandatory parameter and drop the if and the get/put dance

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
Acked-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c|  9 +--
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 70 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  | 10 +++
 6 files changed, 116 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8bc79924d171..e011a326fe86 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-   if (adev->ras_enabled)
-   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 
bo->tbo.bdev = >mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -634,7 +634,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
struct dma_fence *fence;
 
-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
+   r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
if (unlikely(r))
goto fail_unreserve;
 
@@ -1365,8 +1365,9 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return;
 
-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, , true);
+   r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
if (!WARN_ON(r)) {
+   amdgpu_vram_mgr_set_cleared(bo->resource);
amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   block = cur->node;
+
+   if (!amdgpu_vram_mgr_is_cleared(block))
+   return false;
+   break;
+   default:
+   return false;
+   }
+
+   return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index fc418e670fda..f0b42b567357 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -378,11 +378,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
 
-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, _fence,
-   false);
+   r = 

[PATCH v10 3/3] drm/tests: Add a test case for drm buddy clear allocation

2024-04-08 Thread Arunpravin Paneer Selvam
Add a new test case for the drm buddy clear and dirty
allocation.

v2:(Matthew)
  - make size as u32
  - rename PAGE_SIZE with SZ_4K
  - dont fragment the address space for all the order allocation
iterations. we can do it once and just increment and allocate
the size.
  - create new mm with non power-of-two size to ensure the multi-root
force_merge during fini.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Matthew Auld 
---
 drivers/gpu/drm/tests/drm_buddy_test.c | 141 +
 1 file changed, 141 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index 4621a860cb05..b07f132f2835 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -224,6 +224,146 @@ static void drm_test_buddy_alloc_range_bias(struct kunit 
*test)
drm_buddy_fini();
 }
 
+static void drm_test_buddy_alloc_clear(struct kunit *test)
+{
+   unsigned long n_pages, total, i = 0;
+   const unsigned long ps = SZ_4K;
+   struct drm_buddy_block *block;
+   const int max_order = 12;
+   LIST_HEAD(allocated);
+   struct drm_buddy mm;
+   unsigned int order;
+   u32 mm_size, size;
+   LIST_HEAD(dirty);
+   LIST_HEAD(clean);
+
+   mm_size = SZ_4K << max_order;
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
+
+   /*
+* Idea is to allocate and free some random portion of the address 
space,
+* returning those pages as non-dirty and randomly alternate between
+* requesting dirty and non-dirty pages (not going over the limit
+* we freed as non-dirty), putting that into two separate lists.
+* Loop over both lists at the end checking that the dirty list
+* is indeed all dirty pages and vice versa. Free it all again,
+* keeping the dirty/clear status.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   5 * ps, ps, 
,
+   
DRM_BUDDY_TOPDOWN_ALLOCATION),
+   "buddy_alloc hit an error size=%lu\n", 5 * ps);
+   drm_buddy_free_list(, , DRM_BUDDY_CLEARED);
+
+   n_pages = 10;
+   do {
+   unsigned long flags;
+   struct list_head *list;
+   int slot = i % 2;
+
+   if (slot == 0) {
+   list = 
+   flags = 0;
+   } else {
+   list = 
+   flags = DRM_BUDDY_CLEAR_ALLOCATION;
+   }
+
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,
+   ps, ps, 
list,
+   flags),
+   "buddy_alloc hit an error size=%lu\n", 
ps);
+   } while (++i < n_pages);
+
+   list_for_each_entry(block, , link)
+   KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), true);
+
+   list_for_each_entry(block, , link)
+   KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+
+   drm_buddy_free_list(, , DRM_BUDDY_CLEARED);
+
+   /*
+* Trying to go over the clear limit for some allocation.
+* The allocation should never fail with reasonable page-size.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   10 * ps, ps, ,
+   
DRM_BUDDY_CLEAR_ALLOCATION),
+   "buddy_alloc hit an error size=%lu\n", 10 * ps);
+
+   drm_buddy_free_list(, , DRM_BUDDY_CLEARED);
+   drm_buddy_free_list(, , 0);
+   drm_buddy_fini();
+
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   /*
+* Create a new mm. Intentionally fragment the address space by creating
+* two alternating lists. Free both lists, one as dirty the other as 
clean.
+* Try to allocate double the previous size with matching 
min_page_size. The
+* allocation should never fail as it calls the force_merge. Also check 
that
+* the page is always dirty after force_merge. Free the page as dirty, 
then
+* repeat the whole thing, increment the order until we hit the 
max_order.
+*/
+
+   i = 0;
+   n_pages = mm_size / ps;
+   do {
+   struct list_head *list;
+   int slot = i % 2;
+
+   if (slot == 0)
+   list = 
+   else
+   list = 
+
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,
+  

[PATCH v10 1/3] drm/buddy: Implement tracking clear page feature

2024-04-08 Thread Arunpravin Paneer Selvam
- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
  - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
cleared. Else, reset the clear flag for each block in the list(Christian)
  - For merging the 2 cleared blocks compare as below,
drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
  - Defragment the memory beginning from min_order
till the required memory space is available.

v2: (Matthew)
  - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
operation within drm buddy.
  - Write a macro block_incompatible() to allocate the required blocks.
  - Update the xe driver for the drm_buddy_free_list change in arguments.
  - add a warning if the two blocks are incompatible on
defragmentation
  - call full defragmentation in the fini() function
  - place a condition to test if min_order is equal to 0
  - replace the list with safe_reverse() variant as we might
remove the block from the list.

v3:
  - fix Gitlab user reported lockup issue.
  - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
  - modify to pass the root order instead max_order in fini()
function(Matthew)
  - change bool 1 to true(Matthew)
  - add check if min_block_size is power of 2(Matthew)
  - modify the min_block_size datatype to u64(Matthew)

v4:
  - rename the function drm_buddy_defrag with __force_merge.
  - Include __force_merge directly in drm buddy file and remove
the defrag use in amdgpu driver.
  - Remove list_empty() check(Matthew)
  - Remove unnecessary space, headers and placement of new variables(Matthew)
  - Add a unit test case(Matthew)

v5:
  - remove force merge support to actual range allocation and not to bail
out when contains && split(Matthew)
  - add range support to force merge function.

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c   | 430 ++
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c|  28 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
 include/drm/drm_buddy.h   |  16 +-
 6 files changed, 368 insertions(+), 122 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
 
 error_free_blocks:
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
 
amdgpu_vram_mgr_do_reserve(man);
 
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 
atomic64_sub(vis_usage, >vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
 
list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {
-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 5ebdd6f8f36e..83dbe252f727 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,8 +38,8 @@ static void drm_block_free(struct drm_buddy *mm,
kmem_cache_free(slab_blocks, block);
 }
 
-static void list_insert_sorted(struct drm_buddy *mm,
-  struct drm_buddy_block *block)
+static void list_insert(struct drm_buddy *mm,
+   struct drm_buddy_block *block)
 {
struct drm_buddy_block *node;
struct list_head *head;
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
 }
 
+static void 

[PATCH v10 2/3] drm/amdgpu: Enable clear page functionality

2024-04-08 Thread Arunpravin Paneer Selvam
Add clear page support in vram memory region.

v1(Christian):
  - Dont handle clear page as TTM flag since when moving the BO back
in from GTT again we don't need that.
  - Make a specialized version of amdgpu_fill_buffer() which only
clears the VRAM areas which are not already cleared
  - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
amdgpu_object.c

v2:
  - Modify the function name amdgpu_ttm_* (Alex)
  - Drop the delayed parameter (Christian)
  - handle amdgpu_res_cleared() just above the size
calculation (Christian)
  - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
in the free path to properly wait for fences etc.. (Christian)

v3(Christian):
  - Remove buffer clear code in VRAM manager instead change the
AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
the DRM_BUDDY_CLEARED flag.
  - Remove ! from amdgpu_res_cleared() check.

v4(Christian):
  - vres flag setting move to vram manager file
  - use dma_fence_get_stub in amdgpu_ttm_clear_buffer function
  - make fence a mandatory parameter and drop the if and the get/put dance

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
Acked-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c|  9 +--
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 70 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  | 10 +++
 6 files changed, 116 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8bc79924d171..e011a326fe86 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-   if (adev->ras_enabled)
-   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 
bo->tbo.bdev = >mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -634,7 +634,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
struct dma_fence *fence;
 
-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
+   r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
if (unlikely(r))
goto fail_unreserve;
 
@@ -1365,8 +1365,9 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return;
 
-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, , true);
+   r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
if (!WARN_ON(r)) {
+   amdgpu_vram_mgr_set_cleared(bo->resource);
amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   block = cur->node;
+
+   if (!amdgpu_vram_mgr_is_cleared(block))
+   return false;
+   break;
+   default:
+   return false;
+   }
+
+   return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index fc418e670fda..f0b42b567357 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -378,11 +378,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
 
-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, _fence,
-   false);
+   r = 

[PATCH v9 3/3] drm/tests: Add a test case for drm buddy clear allocation

2024-03-18 Thread Arunpravin Paneer Selvam
Add a new test case for the drm buddy clear and dirty
allocation.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Matthew Auld 
---
 drivers/gpu/drm/tests/drm_buddy_test.c | 127 +
 1 file changed, 127 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index 454ad9952f56..d355a6e61893 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -19,6 +19,132 @@ static inline u64 get_size(int order, u64 chunk_size)
return (1 << order) * chunk_size;
 }
 
+static void drm_test_buddy_alloc_clear(struct kunit *test)
+{
+   unsigned long n_pages, total, i = 0;
+   const unsigned long ps = SZ_4K;
+   struct drm_buddy_block *block;
+   const int max_order = 12;
+   LIST_HEAD(allocated);
+   struct drm_buddy mm;
+   unsigned int order;
+   u64 mm_size, size;
+   LIST_HEAD(dirty);
+   LIST_HEAD(clean);
+
+   mm_size = PAGE_SIZE << max_order;
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
+
+   /**
+* Idea is to allocate and free some random portion of the address 
space,
+* returning those pages as non-dirty and randomly alternate between
+* requesting dirty and non-dirty pages (not going over the limit
+* we freed as non-dirty), putting that into two separate lists.
+* Loop over both lists at the end checking that the dirty list
+* is indeed all dirty pages and vice versa. Free it all again,
+* keeping the dirty/clear status.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   5 * ps, ps, 
,
+   
DRM_BUDDY_TOPDOWN_ALLOCATION),
+   "buddy_alloc hit an error size=%u\n", 5 * ps);
+   drm_buddy_free_list(, , DRM_BUDDY_CLEARED);
+
+   n_pages = 10;
+   do {
+   unsigned long flags;
+   struct list_head *list;
+   int slot = i % 2;
+
+   if (slot == 0) {
+   list = 
+   flags = 0;
+   } else if (slot == 1) {
+   list = 
+   flags = DRM_BUDDY_CLEAR_ALLOCATION;
+   }
+
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, 
mm_size,
+   ps, ps, 
list,
+   flags),
+   "buddy_alloc hit an error size=%u\n", 
ps);
+   } while (++i < n_pages);
+
+   list_for_each_entry(block, , link)
+   KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), true);
+
+   list_for_each_entry(block, , link)
+   KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+
+   drm_buddy_free_list(, , DRM_BUDDY_CLEARED);
+
+   /**
+* Trying to go over the clear limit for some allocation.
+* The allocation should never fail with reasonable page-size.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   10 * ps, ps, ,
+   
DRM_BUDDY_CLEAR_ALLOCATION),
+   "buddy_alloc hit an error size=%u\n", 10 * ps);
+
+   drm_buddy_free_list(, , DRM_BUDDY_CLEARED);
+   drm_buddy_free_list(, , 0);
+   drm_buddy_fini();
+
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   /**
+* Create a new mm. Intentionally fragment the address space by creating
+* two alternating lists. Free both lists, one as dirty the other as 
clean.
+* Try to allocate double the previous size with matching 
min_page_size. The
+* allocation should never fail as it calls the force_merge. Also check 
that
+* the page is always dirty after force_merge. Free the page as dirty, 
then
+* repeat the whole thing, increment the order until we hit the 
max_order.
+*/
+
+   order = 1;
+   do {
+   size = PAGE_SIZE << order;
+   i = 0;
+   n_pages = mm_size / ps;
+   do {
+   struct list_head *list;
+   int slot = i % 2;
+
+   if (slot == 0)
+   list = 
+   else if (slot == 1)
+   list = 
+
+   KUNIT_ASSERT_FALSE_MSG(test,
+  drm_buddy_alloc_blocks(, 0, 
mm_size,
+  

[PATCH v9 2/3] drm/amdgpu: Enable clear page functionality

2024-03-18 Thread Arunpravin Paneer Selvam
Add clear page support in vram memory region.

v1(Christian):
  - Dont handle clear page as TTM flag since when moving the BO back
in from GTT again we don't need that.
  - Make a specialized version of amdgpu_fill_buffer() which only
clears the VRAM areas which are not already cleared
  - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
amdgpu_object.c

v2:
  - Modify the function name amdgpu_ttm_* (Alex)
  - Drop the delayed parameter (Christian)
  - handle amdgpu_res_cleared() just above the size
calculation (Christian)
  - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
in the free path to properly wait for fences etc.. (Christian)

v3(Christian):
  - Remove buffer clear code in VRAM manager instead change the
AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
the DRM_BUDDY_CLEARED flag.
  - Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
Acked-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 22 ---
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
 6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8bc79924d171..c92d92b28a57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-   if (adev->ras_enabled)
-   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 
bo->tbo.bdev = >mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 
if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-   struct dma_fence *fence;
+   struct dma_fence *fence = NULL;
 
-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
+   r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
if (unlikely(r))
goto fail_unreserve;
 
-   dma_resv_add_fence(bo->tbo.base.resv, fence,
-  DMA_RESV_USAGE_KERNEL);
-   dma_fence_put(fence);
+   if (fence) {
+   dma_resv_add_fence(bo->tbo.base.resv, fence,
+  DMA_RESV_USAGE_KERNEL);
+   dma_fence_put(fence);
+   }
}
if (!bp->resv)
amdgpu_bo_unreserve(bo);
@@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return;
 
-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, , true);
+   r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
if (!WARN_ON(r)) {
+   struct amdgpu_vram_mgr_resource *vres;
+
+   vres = to_amdgpu_vram_mgr_resource(bo->resource);
+   vres->flags |= DRM_BUDDY_CLEARED;
amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   block = cur->node;
+
+   if (!amdgpu_vram_mgr_is_cleared(block))
+   return false;
+   break;
+   default:
+   return false;
+   }
+
+   return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/am

[PATCH v9 1/3] drm/buddy: Implement tracking clear page feature

2024-03-18 Thread Arunpravin Paneer Selvam
- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
  - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
cleared. Else, reset the clear flag for each block in the list(Christian)
  - For merging the 2 cleared blocks compare as below,
drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
  - Defragment the memory beginning from min_order
till the required memory space is available.

v2: (Matthew)
  - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
operation within drm buddy.
  - Write a macro block_incompatible() to allocate the required blocks.
  - Update the xe driver for the drm_buddy_free_list change in arguments.
  - add a warning if the two blocks are incompatible on
defragmentation
  - call full defragmentation in the fini() function
  - place a condition to test if min_order is equal to 0
  - replace the list with safe_reverse() variant as we might
remove the block from the list.

v3:
  - fix Gitlab user reported lockup issue.
  - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
  - modify to pass the root order instead max_order in fini()
function(Matthew)
  - change bool 1 to true(Matthew)
  - add check if min_block_size is power of 2(Matthew)
  - modify the min_block_size datatype to u64(Matthew)

v4:
  - rename the function drm_buddy_defrag with __force_merge.
  - Include __force_merge directly in drm buddy file and remove
the defrag use in amdgpu driver.
  - Remove list_empty() check(Matthew)
  - Remove unnecessary space, headers and placement of new variables(Matthew)
  - Add a unit test case(Matthew)

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c   | 427 ++
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c|  18 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
 include/drm/drm_buddy.h   |  16 +-
 6 files changed, 360 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
 
 error_free_blocks:
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
 
amdgpu_vram_mgr_do_reserve(man);
 
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 
atomic64_sub(vis_usage, >vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
 
list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {
-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c4222b886db7..625a30a6b855 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,8 +38,8 @@ static void drm_block_free(struct drm_buddy *mm,
kmem_cache_free(slab_blocks, block);
 }
 
-static void list_insert_sorted(struct drm_buddy *mm,
-  struct drm_buddy_block *block)
+static void list_insert(struct drm_buddy *mm,
+   struct drm_buddy_block *block)
 {
struct drm_buddy_block *node;
struct list_head *head;
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
 }
 
+static void clear_reset(struct drm_buddy_block *block)
+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   

[PATCH v8 2/3] drm/amdgpu: Enable clear page functionality

2024-03-04 Thread Arunpravin Paneer Selvam
Add clear page support in vram memory region.

v1(Christian):
  - Dont handle clear page as TTM flag since when moving the BO back
in from GTT again we don't need that.
  - Make a specialized version of amdgpu_fill_buffer() which only
clears the VRAM areas which are not already cleared
  - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
amdgpu_object.c

v2:
  - Modify the function name amdgpu_ttm_* (Alex)
  - Drop the delayed parameter (Christian)
  - handle amdgpu_res_cleared() just above the size
calculation (Christian)
  - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
in the free path to properly wait for fences etc.. (Christian)

v3(Christian):
  - Remove buffer clear code in VRAM manager instead change the
AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
the DRM_BUDDY_CLEARED flag.
  - Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
Acked-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 22 ---
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
 6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8bc79924d171..c92d92b28a57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-   if (adev->ras_enabled)
-   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 
bo->tbo.bdev = >mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 
if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-   struct dma_fence *fence;
+   struct dma_fence *fence = NULL;
 
-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
+   r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
if (unlikely(r))
goto fail_unreserve;
 
-   dma_resv_add_fence(bo->tbo.base.resv, fence,
-  DMA_RESV_USAGE_KERNEL);
-   dma_fence_put(fence);
+   if (fence) {
+   dma_resv_add_fence(bo->tbo.base.resv, fence,
+  DMA_RESV_USAGE_KERNEL);
+   dma_fence_put(fence);
+   }
}
if (!bp->resv)
amdgpu_bo_unreserve(bo);
@@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return;
 
-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, , true);
+   r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
if (!WARN_ON(r)) {
+   struct amdgpu_vram_mgr_resource *vres;
+
+   vres = to_amdgpu_vram_mgr_resource(bo->resource);
+   vres->flags |= DRM_BUDDY_CLEARED;
amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   block = cur->node;
+
+   if (!amdgpu_vram_mgr_is_cleared(block))
+   return false;
+   break;
+   default:
+   return false;
+   }
+
+   return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/am

[PATCH v8 3/3] drm/buddy: Add user for defragmentation

2024-03-04 Thread Arunpravin Paneer Selvam
Add amdgpu driver as user for the drm buddy
defragmentation.

Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 17 +++--
 drivers/gpu/drm/drm_buddy.c  |  1 +
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index e494f5bf136a..cff8a526c622 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -533,8 +533,21 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
   min_block_size,
   >blocks,
   vres->flags);
-   if (unlikely(r))
-   goto error_free_blocks;
+   if (unlikely(r)) {
+   if (r == -ENOSPC) {
+   drm_buddy_defrag(mm, min_block_size);
+   r = drm_buddy_alloc_blocks(mm, fpfn,
+  lpfn,
+  size,
+  min_block_size,
+  >blocks,
+  vres->flags);
+   if (unlikely(r))
+   goto error_free_blocks;
+   } else {
+   goto error_free_blocks;
+   }
+   }
 
if (size > remaining_size)
remaining_size = 0;
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 40131ed9b0cd..19440f8caec0 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -396,6 +396,7 @@ void drm_buddy_defrag(struct drm_buddy *mm,
}
}
 }
+EXPORT_SYMBOL(drm_buddy_defrag);
 
 /**
  * drm_buddy_free_block - free a block
-- 
2.25.1



[PATCH v8 1/3] drm/buddy: Implement tracking clear page feature

2024-03-04 Thread Arunpravin Paneer Selvam
- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
  - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
cleared. Else, reset the clear flag for each block in the list(Christian)
  - For merging the 2 cleared blocks compare as below,
drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
  - Defragment the memory beginning from min_order
till the required memory space is available.

v2: (Matthew)
  - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
operation within drm buddy.
  - Write a macro block_incompatible() to allocate the required blocks.
  - Update the xe driver for the drm_buddy_free_list change in arguments.
  - add a warning if the two blocks are incompatible on
defragmentation
  - call full defragmentation in the fini() function
  - place a condition to test if min_order is equal to 0
  - replace the list with safe_reverse() variant as we might
remove the block from the list.

v3:
  - fix Gitlab user reported lockup issue.
  - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
  - modify to pass the root order instead max_order in fini()
function(Matthew)
  - change bool 1 to true(Matthew)
  - add check if min_block_size is power of 2(Matthew)
  - modify the min_block_size datatype to u64(Matthew)

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c   | 294 +++---
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c|  18 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
 include/drm/drm_buddy.h   |  22 +-
 6 files changed, 290 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
 
 error_free_blocks:
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
 
amdgpu_vram_mgr_do_reserve(man);
 
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 
atomic64_sub(vis_usage, >vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
 
list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {
-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c4222b886db7..40131ed9b0cd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
 }
 
+static void clear_reset(struct drm_buddy_block *block)
+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
 static void mark_allocated(struct drm_buddy_block *block)
 {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -186,11 +196,21 @@ EXPORT_SYMBOL(drm_buddy_init);
  */
 void drm_buddy_fini(struct drm_buddy *mm)
 {
+   u64 root_size, size;
+   unsigned int order;
int i;
 
+   size = mm->size;
+
for (i = 0; i < mm->n_roots; ++i) {
+   order = ilog2(size) - ilog2(mm->chunk_size);
+   root_size = mm->chunk_size << order;
+   drm_buddy_defrag(mm, root_size);
+
WARN_ON(!drm_buddy_block_is_free(mm->roots[i]));
drm_block_free(mm, mm->roots[i]);
+
+

Re: [PATCH v7 3/3] drm/buddy: Add defragmentation support

2024-02-22 Thread Arunpravin Paneer Selvam

Hi Christian,

On 2/21/2024 7:58 PM, Christian König wrote:

Am 21.02.24 um 13:18 schrieb Arunpravin Paneer Selvam:

Add a function to support defragmentation.


Thinking more about it maybe you want to call this function differently.

Essentially we are force merging pages even if their cleared flag 
doesn't match, that is something different than defragmentation I think.


Maybe rename it for force_merge or something similar. Not mandatory, 
just an idea how to improve the readability of the code.


Apart from that just let me know when I can push it to drm-misc-next.

Christian.
I am fixing few issues reported by user and addressing Matthew's 
suggestions on v7 version. I will post the v8 version incorporating

all the changes. I hope we can push v8 into drm-misc-next.

Thanks,
Arun.




v1:
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2(Matthew):
   - add amdgpu user for defragmentation
   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 17 +++-
  drivers/gpu/drm/drm_buddy.c  | 93 +---
  include/drm/drm_buddy.h  |  3 +
  3 files changed, 97 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index e494f5bf136a..cff8a526c622 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -533,8 +533,21 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

 min_block_size,
 >blocks,
 vres->flags);
-    if (unlikely(r))
-    goto error_free_blocks;
+    if (unlikely(r)) {
+    if (r == -ENOSPC) {
+    drm_buddy_defrag(mm, min_block_size);
+    r = drm_buddy_alloc_blocks(mm, fpfn,
+   lpfn,
+   size,
+   min_block_size,
+   >blocks,
+   vres->flags);
+    if (unlikely(r))
+    goto error_free_blocks;
+    } else {
+    goto error_free_blocks;
+    }
+    }
    if (size > remaining_size)
  remaining_size = 0;
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 18e004fa39d3..56bd1560fbcd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -203,6 +203,8 @@ void drm_buddy_fini(struct drm_buddy *mm)
  drm_block_free(mm, mm->roots[i]);
  }
  +    drm_buddy_defrag(mm, mm->chunk_size << mm->max_order);
+
  WARN_ON(mm->avail != mm->size);
    kfree(mm->roots);
@@ -276,25 +278,39 @@ drm_get_buddy(struct drm_buddy_block *block)
  }
  EXPORT_SYMBOL(drm_get_buddy);
  -static void __drm_buddy_free(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+static unsigned int __drm_buddy_free(struct drm_buddy *mm,
+ struct drm_buddy_block *block,
+ bool defrag)
  {
+    unsigned int order, block_order;
  struct drm_buddy_block *parent;
  +    block_order = drm_buddy_block_order(block);
+
  while ((parent = block->parent)) {
-    struct drm_buddy_block *buddy;
+    struct drm_buddy_block *buddy = NULL;
    buddy = __get_buddy(block);
    if (!drm_buddy_block_is_free(buddy))
  break;
  -    if (drm_buddy_block_is_clear(block) !=
-    drm_buddy_block_is_clear(buddy))
-    break;
+    if (!defrag) {
+    /*
+ * Check the block and its buddy clear state and exit
+ * the loop if they both have the dissimilar state.
+ */
+    if (drm_buddy_block_is_clear(block) !=
+    drm_buddy_block_is_clear(buddy))
+    break;
  -    if (drm_buddy_block_is_clear(block))
-    mark_cleared(parent);
+    if (drm_buddy_block_is_clear(block))
+    mark_cleared(parent);
+    }
+
+    WARN_ON(defrag &&
+    (drm_buddy_block_is_clear(block) ==
+ drm_buddy_block_is_clear(buddy)));
    list_del(>link);
  @@ -304,8 +320,57 @@ static void __drm_buddy_free(struct drm_buddy 
*mm,

  block = parent;
  }
  -    mark_free(mm, block);
+    order = drm_buddy_block_order(block);
+    if (block_order != order)
+    mark_free(mm, block);
+
+    return order;
+}
+
+/**
+ * drm_buddy_defrag - Defragmentation routi

[PATCH v7 3/3] drm/buddy: Add defragmentation support

2024-02-21 Thread Arunpravin Paneer Selvam
Add a function to support defragmentation.

v1:
  - Defragment the memory beginning from min_order
till the required memory space is available.

v2(Matthew):
  - add amdgpu user for defragmentation
  - add a warning if the two blocks are incompatible on
defragmentation
  - call full defragmentation in the fini() function
  - place a condition to test if min_order is equal to 0
  - replace the list with safe_reverse() variant as we might
remove the block from the list.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Matthew Auld 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 17 +++-
 drivers/gpu/drm/drm_buddy.c  | 93 +---
 include/drm/drm_buddy.h  |  3 +
 3 files changed, 97 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index e494f5bf136a..cff8a526c622 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -533,8 +533,21 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
   min_block_size,
   >blocks,
   vres->flags);
-   if (unlikely(r))
-   goto error_free_blocks;
+   if (unlikely(r)) {
+   if (r == -ENOSPC) {
+   drm_buddy_defrag(mm, min_block_size);
+   r = drm_buddy_alloc_blocks(mm, fpfn,
+  lpfn,
+  size,
+  min_block_size,
+  >blocks,
+  vres->flags);
+   if (unlikely(r))
+   goto error_free_blocks;
+   } else {
+   goto error_free_blocks;
+   }
+   }
 
if (size > remaining_size)
remaining_size = 0;
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 18e004fa39d3..56bd1560fbcd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -203,6 +203,8 @@ void drm_buddy_fini(struct drm_buddy *mm)
drm_block_free(mm, mm->roots[i]);
}
 
+   drm_buddy_defrag(mm, mm->chunk_size << mm->max_order);
+
WARN_ON(mm->avail != mm->size);
 
kfree(mm->roots);
@@ -276,25 +278,39 @@ drm_get_buddy(struct drm_buddy_block *block)
 }
 EXPORT_SYMBOL(drm_get_buddy);
 
-static void __drm_buddy_free(struct drm_buddy *mm,
-struct drm_buddy_block *block)
+static unsigned int __drm_buddy_free(struct drm_buddy *mm,
+struct drm_buddy_block *block,
+bool defrag)
 {
+   unsigned int order, block_order;
struct drm_buddy_block *parent;
 
+   block_order = drm_buddy_block_order(block);
+
while ((parent = block->parent)) {
-   struct drm_buddy_block *buddy;
+   struct drm_buddy_block *buddy = NULL;
 
buddy = __get_buddy(block);
 
if (!drm_buddy_block_is_free(buddy))
break;
 
-   if (drm_buddy_block_is_clear(block) !=
-   drm_buddy_block_is_clear(buddy))
-   break;
+   if (!defrag) {
+   /*
+* Check the block and its buddy clear state and exit
+* the loop if they both have the dissimilar state.
+*/
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
 
-   if (drm_buddy_block_is_clear(block))
-   mark_cleared(parent);
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+   }
+
+   WARN_ON(defrag &&
+   (drm_buddy_block_is_clear(block) ==
+drm_buddy_block_is_clear(buddy)));
 
list_del(>link);
 
@@ -304,8 +320,57 @@ static void __drm_buddy_free(struct drm_buddy *mm,
block = parent;
}
 
-   mark_free(mm, block);
+   order = drm_buddy_block_order(block);
+   if (block_order != order)
+   mark_free(mm, block);
+
+   return order;
+}
+
+/**
+ * drm_buddy_defrag - Defragmentation routine
+ *
+ * @mm: DRM buddy manager
+ * @min_block_size: minimum size in bytes to begin
+ * the defragmenta

[PATCH v7 2/3] drm/amdgpu: Enable clear page functionality

2024-02-21 Thread Arunpravin Paneer Selvam
Add clear page support in vram memory region.

v1(Christian):
  - Dont handle clear page as TTM flag since when moving the BO back
in from GTT again we don't need that.
  - Make a specialized version of amdgpu_fill_buffer() which only
clears the VRAM areas which are not already cleared
  - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
amdgpu_object.c

v2:
  - Modify the function name amdgpu_ttm_* (Alex)
  - Drop the delayed parameter (Christian)
  - handle amdgpu_res_cleared() just above the size
calculation (Christian)
  - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
in the free path to properly wait for fences etc.. (Christian)

v3(Christian):
  - Remove buffer clear code in VRAM manager instead change the
AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
the DRM_BUDDY_CLEARED flag.
  - Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
Acked-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 22 ---
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
 6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 010b0cb7693c..904d71b3393f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -595,8 +596,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-   if (adev->ras_enabled)
-   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 
bo->tbo.bdev = >mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -626,15 +626,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 
if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-   struct dma_fence *fence;
+   struct dma_fence *fence = NULL;
 
-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
+   r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
if (unlikely(r))
goto fail_unreserve;
 
-   dma_resv_add_fence(bo->tbo.base.resv, fence,
-  DMA_RESV_USAGE_KERNEL);
-   dma_fence_put(fence);
+   if (fence) {
+   dma_resv_add_fence(bo->tbo.base.resv, fence,
+  DMA_RESV_USAGE_KERNEL);
+   dma_fence_put(fence);
+   }
}
if (!bp->resv)
amdgpu_bo_unreserve(bo);
@@ -1359,8 +1361,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return;
 
-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, , true);
+   r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
if (!WARN_ON(r)) {
+   struct amdgpu_vram_mgr_resource *vres;
+
+   vres = to_amdgpu_vram_mgr_resource(bo->resource);
+   vres->flags |= DRM_BUDDY_CLEARED;
amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   block = cur->node;
+
+   if (!amdgpu_vram_mgr_is_cleared(block))
+   return false;
+   break;
+   default:
+   return false;
+   }
+
+   return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/am

[PATCH v7 1/3] drm/buddy: Implement tracking clear page feature

2024-02-21 Thread Arunpravin Paneer Selvam
- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
  - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
cleared. Else, reset the clear flag for each block in the list.

  - For merging the 2 cleared blocks compare as below,
drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

v2: (Matthew)
  - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
operation within drm buddy.
  - Write a macro block_incompatible() to allocate the required blocks.
  - Update the xe driver for the drm_buddy_free_list change in arguments.

v3: (Matthew)
  - Keep DRM_BUDDY_HEADER_CLEAR define sorted.

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c   | 190 ++
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c|  10 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
 include/drm/drm_buddy.h   |  19 +-
 6 files changed, 186 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
 
 error_free_blocks:
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
 
amdgpu_vram_mgr_do_reserve(man);
 
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 
atomic64_sub(vis_usage, >vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
 
list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {
-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..18e004fa39d3 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
 }
 
+static void clear_reset(struct drm_buddy_block *block)
+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
 static void mark_allocated(struct drm_buddy_block *block)
 {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);
 
+   if (drm_buddy_block_is_clear(block)) {
+   mark_cleared(block->left);
+   mark_cleared(block->right);
+   clear_reset(block);
+   }
+
mark_split(block);
 
return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
 
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
+
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+
list_del(>link);
 
drm_block_free(mm, block);
@@ -295,26 +318,59 @@ void drm_buddy_free_block(struct drm_buddy *mm,
 {
BUG_ON(!drm_buddy_block_is_allocated(block));
mm->avail += drm_buddy_block_size(mm, block);
+   if (drm_buddy_block_is_clear(block))
+   mm->clear_avail += drm_buddy_block_size(mm, block);
+
__drm_buddy_free(mm, block);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
 
-/**
- * drm_buddy_free_list - free bloc

Re: [PATCH] drm/buddy: Modify duplicate list_splice_tail call

2024-02-18 Thread Arunpravin Paneer Selvam

Hi Christian,

On 2/16/2024 5:29 PM, Christian König wrote:



Am 16.02.24 um 12:46 schrieb Arunpravin Paneer Selvam:



On 2/16/2024 4:41 PM, Matthew Auld wrote:

On 16/02/2024 10:00, Arunpravin Paneer Selvam wrote:

Remove the duplicate list_splice_tail call when the
total_allocated < size condition is true.

Cc:  # 6.7+
Fixes: 8746c6c9dfa3 ("drm/buddy: Fix alloc_range() error handling 
code")

Reported-by: Bert Karwatzki 
Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/drm_buddy.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c1a99bf4dffd..c4222b886db7 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -538,13 +538,13 @@ static int __alloc_range(struct drm_buddy *mm,
  list_add(>left->tmp_link, dfs);
  } while (1);
  -    list_splice_tail(, blocks);
-
  if (total_allocated < size) {
  err = -ENOSPC;
  goto err_free;
  }
  +    list_splice_tail(, blocks);


Sigh. Can we extend the unit test(s) to catch this?

Sure, Let me check.


In the meantime I'm going to push this one to drm-misc-fixes.

Thank you!


Regards,
Christian.



Regards,
Arun.


Reviewed-by: Matthew Auld 


+
  return 0;
    err_undo:

base-commit: a64056bb5a3215bd31c8ce17d609ba0f4d5c55ea








Re: [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature

2024-02-16 Thread Arunpravin Paneer Selvam

Hi Matthew,

Could you review the v6?

Thanks,
Arun.

On 2/8/2024 9:19 PM, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list.

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in arguments.

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 192 ++
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c|  10 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  18 +-
  6 files changed, 187 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
  
  error_free_blocks:

-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
  
  	amdgpu_vram_mgr_do_reserve(man);
  
-	drm_buddy_free_list(mm, >blocks);

+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  
  	atomic64_sub(vis_usage, >vis_usage);

@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
  
  	list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {

-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..33ad0cfbd54c 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
  }
  
+static void clear_reset(struct drm_buddy_block *block)

+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);
  
+	if (drm_buddy_block_is_clear(block)) {

+   mark_cleared(block->left);
+   mark_cleared(block->right);
+   clear_reset(block);
+   }
+
mark_split(block);
  
  	return 0;

@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
  
+		if (drm_buddy_block_is_clear(block) !=

+   drm_buddy_block_is_clear(buddy))
+   break;
+
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+
list_del(>link);
  
  		drm_block_free(mm, block);

@@ -295,26 +318,61 @@ void drm_buddy_free_block(struct drm_buddy *mm,
  {
BUG_ON(!drm_buddy_block_is_allocated(block));
mm->avail += drm_buddy_block_size(mm, block);
+   if (drm_buddy_block_is_clear(block))
+   mm->clear_avail += drm_buddy_block_size(mm, block);
+
__drm_buddy_free(mm, block);
  }
  EXPORT_SYMBOL(drm_buddy_free_block);
  
-/**

Re: [PATCH] drm/buddy: Modify duplicate list_splice_tail call

2024-02-16 Thread Arunpravin Paneer Selvam




On 2/16/2024 4:41 PM, Matthew Auld wrote:

On 16/02/2024 10:00, Arunpravin Paneer Selvam wrote:

Remove the duplicate list_splice_tail call when the
total_allocated < size condition is true.

Cc:  # 6.7+
Fixes: 8746c6c9dfa3 ("drm/buddy: Fix alloc_range() error handling code")
Reported-by: Bert Karwatzki 
Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/drm_buddy.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c1a99bf4dffd..c4222b886db7 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -538,13 +538,13 @@ static int __alloc_range(struct drm_buddy *mm,
  list_add(>left->tmp_link, dfs);
  } while (1);
  -    list_splice_tail(, blocks);
-
  if (total_allocated < size) {
  err = -ENOSPC;
  goto err_free;
  }
  +    list_splice_tail(, blocks);


Sigh. Can we extend the unit test(s) to catch this?

Sure, Let me check.

Regards,
Arun.


Reviewed-by: Matthew Auld 


+
  return 0;
    err_undo:

base-commit: a64056bb5a3215bd31c8ce17d609ba0f4d5c55ea




Re: [PATCH 2/6] drm/buddy: fix range bias

2024-02-16 Thread Arunpravin Paneer Selvam

Looks good.

Reviewed-by: Arunpravin Paneer Selvam 



On 2/15/2024 11:14 PM, Matthew Auld wrote:

There is a corner case here where start/end is after/before the block
range we are currently checking. If so we need to be sure that splitting
the block will eventually give use the block size we need. To do that we
should adjust the block range to account for the start/end, and only
continue with the split if the size/alignment will fit the requested
size. Not doing so can result in leaving split blocks unmerged when it
eventually fails.

Fixes: afea229fe102 ("drm: improve drm_buddy_alloc function")
Signed-off-by: Matthew Auld
Cc: Arunpravin Paneer Selvam
Cc: Christian König
Cc:  # v5.18+
---
  drivers/gpu/drm/drm_buddy.c | 10 ++
  1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c1a99bf4dffd..d09540d4065b 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm,
 u64 start, u64 end,
 unsigned int order)
  {
+   u64 req_size = mm->chunk_size << order;
struct drm_buddy_block *block;
struct drm_buddy_block *buddy;
LIST_HEAD(dfs);
@@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm,
if (drm_buddy_block_is_allocated(block))
continue;
  
+		if (block_start < start || block_end > end) {

+   u64 adjusted_start = max(block_start, start);
+   u64 adjusted_end = min(block_end, end);
+
+   if (round_down(adjusted_end + 1, req_size) <=
+   round_up(adjusted_start, req_size))
+   continue;
+   }
+
if (contains(start, end, block_start, block_end) &&
order == drm_buddy_block_order(block)) {
/*


Re: [PATCH 4/6] drm/tests/drm_buddy: add alloc_range_bias test

2024-02-16 Thread Arunpravin Paneer Selvam

Reviewed-by: Arunpravin Paneer Selvam 



On 2/15/2024 11:14 PM, Matthew Auld wrote:

Sanity check range bias with DRM_BUDDY_RANGE_ALLOCATION.

Signed-off-by: Matthew Auld
Cc: Arunpravin Paneer Selvam
Cc: Christian König
---
  drivers/gpu/drm/tests/drm_buddy_test.c | 218 +
  1 file changed, 218 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index edacc1adb28f..3d4b29686132 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -14,11 +14,216 @@
  
  #include "../lib/drm_random.h"
  
+static unsigned int random_seed;

+
  static inline u64 get_size(int order, u64 chunk_size)
  {
return (1 << order) * chunk_size;
  }
  
+static void drm_test_buddy_alloc_range_bias(struct kunit *test)

+{
+   u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem;
+   DRM_RND_STATE(prng, random_seed);
+   unsigned int i, count, *order;
+   struct drm_buddy mm;
+   LIST_HEAD(allocated);
+
+   bias_size = SZ_1M;
+   ps = roundup_pow_of_two(prandom_u32_state() % bias_size);
+   ps = max(SZ_4K, ps);
+   mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */
+
+   kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps);
+
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(, mm_size, ps),
+  "buddy_init failed\n");
+
+   count = mm_size / bias_size;
+   order = drm_random_order(count, );
+   KUNIT_EXPECT_TRUE(test, order);
+
+   /*
+* Idea is to split the address space into uniform bias ranges, and then
+* in some random order allocate within each bias, using various
+* patterns within. This should detect if allocations leak out from a
+* given bias, for example.
+*/
+
+   for (i = 0; i < count; i++) {
+   LIST_HEAD(tmp);
+   u64 size;
+
+   bias_start = order[i] * bias_size;
+   bias_end = bias_start + bias_size;
+   bias_rem = bias_size;
+
+   /* internal round_up too big */
+   KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(, bias_start,
+bias_end, 
bias_size + ps, bias_size,
+,
+
DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc failed with bias(%x-%x), size=%u, 
ps=%u\n",
+ bias_start, bias_end, bias_size, 
bias_size);
+
+   /* size too big */
+   KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(, bias_start,
+bias_end, 
bias_size + ps, ps,
+,
+
DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc didn't fail with bias(%x-%x), 
size=%u, ps=%u\n",
+ bias_start, bias_end, bias_size + ps, ps);
+
+   /* bias range too small for size */
+   KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(, bias_start + 
ps,
+bias_end, 
bias_size, ps,
+,
+
DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc didn't fail with bias(%x-%x), 
size=%u, ps=%u\n",
+ bias_start + ps, bias_end, bias_size, ps);
+
+   /* bias misaligned */
+   KUNIT_ASSERT_TRUE_MSG(test,
+ drm_buddy_alloc_blocks(, bias_start + 
ps,
+bias_end - ps,
+bias_size >> 1, 
bias_size >> 1,
+,
+
DRM_BUDDY_RANGE_ALLOCATION),
+ "buddy_alloc h didn't fail with bias(%x-%x), 
size=%u, ps=%u\n",
+ bias_start + ps, bias_end - ps, bias_size >> 1, 
bias_size >> 1);
+
+   /* single big page */
+   KUNIT_ASSERT_FALSE_MSG(test,
+  drm_buddy_alloc_blocks(, bias_start,
+ bias_end, 
bias_size, bias_size,
+ 

Re: [PATCH 3/6] drm/buddy: check range allocation matches alignment

2024-02-16 Thread Arunpravin Paneer Selvam

Its good to check the alignment

Reviewed-by: Arunpravin Paneer Selvam 



On 2/15/2024 11:14 PM, Matthew Auld wrote:

Likely not a big deal for real users, but for consistency we should
respect the min_page_size here. Main issue is that bias allocations
turns into normal range allocation if the range and size matches
exactly, and in the next patch we want to add some unit tests for this
part of the api.

Signed-off-by: Matthew Auld
Cc: Arunpravin Paneer Selvam
Cc: Christian König
---
  drivers/gpu/drm/drm_buddy.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index d09540d4065b..ee9913016626 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -771,8 +771,12 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
return -EINVAL;
  
  	/* Actual range allocation */

-   if (start + size == end)
+   if (start + size == end) {
+   if (!IS_ALIGNED(start | end, min_block_size))
+   return -EINVAL;
+
return __drm_buddy_alloc_range(mm, start, size, NULL, blocks);
+   }
  
  	original_size = size;

original_min_size = min_block_size;


Re: [PATCH 1/6] drm/tests/drm_buddy: fix 32b build

2024-02-16 Thread Arunpravin Paneer Selvam

Reviewed-by: Arunpravin Paneer Selvam 



On 2/15/2024 11:14 PM, Matthew Auld wrote:

Doesn't seem to compile on 32b, presumably due to u64 mod/division.
Simplest is to just switch over to u32 here. Also make print modifiers
consistent with that.

Fixes: a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous test")
Reported-by: Geert Uytterhoeven
Signed-off-by: Matthew Auld
Cc: Arunpravin Paneer Selvam
Cc: Christian König
Cc: Maxime Ripard
---
  drivers/gpu/drm/tests/drm_buddy_test.c | 16 
  1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index fee6bec757d1..edacc1adb28f 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -21,7 +21,7 @@ static inline u64 get_size(int order, u64 chunk_size)
  
  static void drm_test_buddy_alloc_contiguous(struct kunit *test)

  {
-   u64 mm_size, ps = SZ_4K, i, n_pages, total;
+   u32 mm_size, ps = SZ_4K, i, n_pages, total;
struct drm_buddy_block *block;
struct drm_buddy mm;
LIST_HEAD(left);
@@ -56,30 +56,30 @@ static void drm_test_buddy_alloc_contiguous(struct kunit 
*test)
KUNIT_ASSERT_FALSE_MSG(test,
   drm_buddy_alloc_blocks(, 0, mm_size,
  ps, ps, list, 0),
-  "buddy_alloc hit an error size=%d\n",
+  "buddy_alloc hit an error size=%u\n",
   ps);
} while (++i < n_pages);
  
  	KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,

   3 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%d\n", 3 * ps);
+  "buddy_alloc didn't error size=%u\n", 3 * ps);
  
  	drm_buddy_free_list(, );

KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
   3 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+  "buddy_alloc didn't error size=%u\n", 3 * ps);
KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
   2 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%llu\n", 2 * ps);
+  "buddy_alloc didn't error size=%u\n", 2 * ps);
  
  	drm_buddy_free_list(, );

KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
   3 * ps, ps, 
,
   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+  "buddy_alloc didn't error size=%u\n", 3 * ps);
/*
 * At this point we should have enough contiguous space for 2 blocks,
 * however they are never buddies (since we freed middle and right) so
@@ -88,13 +88,13 @@ static void drm_test_buddy_alloc_contiguous(struct kunit 
*test)
KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
2 * ps, ps, 
,

DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc hit an error size=%d\n", 2 * ps);
+  "buddy_alloc hit an error size=%u\n", 2 * ps);
  
  	drm_buddy_free_list(, );

KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
3 * ps, ps, 
,

DRM_BUDDY_CONTIGUOUS_ALLOCATION),
-  "buddy_alloc hit an error size=%d\n", 3 * ps);
+  "buddy_alloc hit an error size=%u\n", 3 * ps);
  
  	total = 0;

list_for_each_entry(block, , link)


[PATCH] drm/buddy: Modify duplicate list_splice_tail call

2024-02-16 Thread Arunpravin Paneer Selvam
Remove the duplicate list_splice_tail call when the
total_allocated < size condition is true.

Cc:  # 6.7+
Fixes: 8746c6c9dfa3 ("drm/buddy: Fix alloc_range() error handling code")
Reported-by: Bert Karwatzki 
Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/drm_buddy.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c1a99bf4dffd..c4222b886db7 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -538,13 +538,13 @@ static int __alloc_range(struct drm_buddy *mm,
list_add(>left->tmp_link, dfs);
} while (1);
 
-   list_splice_tail(, blocks);
-
if (total_allocated < size) {
err = -ENOSPC;
goto err_free;
}
 
+   list_splice_tail(, blocks);
+
return 0;
 
 err_undo:

base-commit: a64056bb5a3215bd31c8ce17d609ba0f4d5c55ea
-- 
2.25.1



[PATCH v2 2/2] drm/tests/drm_buddy: add alloc_contiguous test

2024-02-14 Thread Arunpravin Paneer Selvam
From: Matthew Auld 

Sanity check DRM_BUDDY_CONTIGUOUS_ALLOCATION.

v2: Fix checkpatch warnings.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Limonciello 
Cc: Christian König 
Reviewed-by: Arunpravin Paneer Selvam 
Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/tests/drm_buddy_test.c | 89 ++
 1 file changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index ea2af6bd9abe..fee6bec757d1 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -18,6 +19,93 @@ static inline u64 get_size(int order, u64 chunk_size)
return (1 << order) * chunk_size;
 }
 
+static void drm_test_buddy_alloc_contiguous(struct kunit *test)
+{
+   u64 mm_size, ps = SZ_4K, i, n_pages, total;
+   struct drm_buddy_block *block;
+   struct drm_buddy mm;
+   LIST_HEAD(left);
+   LIST_HEAD(middle);
+   LIST_HEAD(right);
+   LIST_HEAD(allocated);
+
+   mm_size = 16 * 3 * SZ_4K;
+
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   /*
+* Idea is to fragment the address space by alternating block
+* allocations between three different lists; one for left, middle and
+* right. We can then free a list to simulate fragmentation. In
+* particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION,
+* including the try_harder path.
+*/
+
+   i = 0;
+   n_pages = mm_size / ps;
+   do {
+   struct list_head *list;
+   int slot = i % 3;
+
+   if (slot == 0)
+   list = 
+   else if (slot == 1)
+   list = 
+   else
+   list = 
+   KUNIT_ASSERT_FALSE_MSG(test,
+  drm_buddy_alloc_blocks(, 0, mm_size,
+ ps, ps, list, 0),
+  "buddy_alloc hit an error size=%d\n",
+  ps);
+   } while (++i < n_pages);
+
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%d\n", 3 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  2 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 2 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   /*
+* At this point we should have enough contiguous space for 2 blocks,
+* however they are never buddies (since we freed middle and right) so
+* will require the try_harder logic to find them.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   2 * ps, ps, 
,
+   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc hit an error size=%d\n", 2 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   3 * ps, ps, 
,
+   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc hit an error size=%d\n", 3 * ps);
+
+   total = 0;
+   list_for_each_entry(block, , link)
+   total += drm_buddy_block_size(, block);
+
+   KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
+
+   drm_buddy_free_list(, );
+   drm_buddy_fini();
+}
+
 static void drm_test_buddy_alloc_pathological(struct kunit *test)
 {
u64 mm_size

[PATCH v2 1/2] drm/buddy: Fix alloc_range() error handling code

2024-02-14 Thread Arunpravin Paneer Selvam
Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.

The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Cc:  # 6.7+
Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Tested-by: Mario Limonciello 
Link: 
https://patchwork.kernel.org/project/dri-devel/patch/20240207174456.341121-1-arunpravin.paneersel...@amd.com/
Acked-by: Christian König 
Reviewed-by: Matthew Auld 
Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/drm_buddy.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
} while (1);
 
list_splice_tail(, blocks);
+
+   if (total_allocated < size) {
+   err = -ENOSPC;
+   goto err_free;
+   }
+
return 0;
 
 err_undo:

base-commit: b6ddaa63f728d26c12048aed76be99c24f435c41
-- 
2.25.1



[PATCH 2/2] drm/tests/drm_buddy: add alloc_contiguous test

2024-02-13 Thread Arunpravin Paneer Selvam
From: Matthew Auld 

Sanity check DRM_BUDDY_CONTIGUOUS_ALLOCATION.

References: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Limonciello 
Cc: Christian König 
Reviewed-by: Arunpravin Paneer Selvam 
Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/tests/drm_buddy_test.c | 89 ++
 1 file changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index ea2af6bd9abe..4215d8b5fcf0 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -18,6 +19,93 @@ static inline u64 get_size(int order, u64 chunk_size)
return (1 << order) * chunk_size;
 }
 
+static void drm_test_buddy_alloc_contiguous(struct kunit *test)
+{
+   u64 mm_size, ps = SZ_4K, i, n_pages, total;
+   struct drm_buddy_block *block;
+   struct drm_buddy mm;
+   LIST_HEAD(left);
+   LIST_HEAD(middle);
+   LIST_HEAD(right);
+   LIST_HEAD(allocated);
+
+   mm_size = 16 * 3 * SZ_4K;
+
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   /*
+* Idea is to fragment the address space by alternating block
+* allocations between three different lists; one for left, middle and
+* right. We can then free a list to simulate fragmentation. In
+* particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION,
+* including the try_harder path.
+*/
+
+   i = 0;
+   n_pages = mm_size / ps;
+   do {
+   struct list_head *list;
+   int slot = i % 3;
+
+   if (slot == 0)
+   list = 
+   else if (slot == 1)
+   list = 
+   else
+   list = 
+   KUNIT_ASSERT_FALSE_MSG(test,
+  drm_buddy_alloc_blocks(, 0, mm_size,
+ ps, ps, list, 0),
+  "buddy_alloc hit an error size=%d\n",
+  ps);
+   } while (++i < n_pages);
+
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%d\n", 3 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  2 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 2 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   /*
+* At this point we should have enough contiguous space for 2 blocks,
+* however they are never buddies (since we freed middle and right) so
+* will require the try_harder logic to find them.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  2 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc hit an error size=%d\n", 2 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc hit an error size=%d\n", 3 * ps);
+
+   total = 0;
+   list_for_each_entry(block, , link)
+   total += drm_buddy_block_size(, block);
+
+   KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
+
+   drm_buddy_free_list(, );
+   drm_buddy_fini();
+}
+
 static void drm_test_buddy_alloc_pathological(struct kunit *test)
 {
u64 mm_size, size, start = 0;
@@ -28

[PATCH 1/2] drm/buddy: Fix alloc_range() error handling code

2024-02-13 Thread Arunpravin Paneer Selvam
Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.

The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Cc:  # 6.7+
Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Tested-by: Mario Limonciello 
Link: 
https://patchwork.kernel.org/project/dri-devel/patch/20240207174456.341121-1-arunpravin.paneersel...@amd.com/
Acked-by: Christian König 
Reviewed-by: Matthew Auld 
Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/drm_buddy.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
} while (1);
 
list_splice_tail(, blocks);
+
+   if (total_allocated < size) {
+   err = -ENOSPC;
+   goto err_free;
+   }
+
return 0;
 
 err_undo:

base-commit: 2c80a2b715df75881359d07dbaacff8ad411f40e
-- 
2.25.1



[PATCH 2/2] drm/tests/drm_buddy: add alloc_contiguous test

2024-02-13 Thread Arunpravin Paneer Selvam
Sanity check DRM_BUDDY_CONTIGUOUS_ALLOCATION.

References: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Signed-off-by: Matthew Auld 
Reviewed-by: Arunpravin Paneer Selvam 
Cc: Arunpravin Paneer Selvam 
Cc: Limonciello 
Cc: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/tests/drm_buddy_test.c | 89 ++
 1 file changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index ea2af6bd9abe..fee6bec757d1 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -18,6 +19,93 @@ static inline u64 get_size(int order, u64 chunk_size)
return (1 << order) * chunk_size;
 }
 
+static void drm_test_buddy_alloc_contiguous(struct kunit *test)
+{
+   u64 mm_size, ps = SZ_4K, i, n_pages, total;
+   struct drm_buddy_block *block;
+   struct drm_buddy mm;
+   LIST_HEAD(left);
+   LIST_HEAD(middle);
+   LIST_HEAD(right);
+   LIST_HEAD(allocated);
+
+   mm_size = 16 * 3 * SZ_4K;
+
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   /*
+* Idea is to fragment the address space by alternating block
+* allocations between three different lists; one for left, middle and
+* right. We can then free a list to simulate fragmentation. In
+* particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION,
+* including the try_harder path.
+*/
+
+   i = 0;
+   n_pages = mm_size / ps;
+   do {
+   struct list_head *list;
+   int slot = i % 3;
+
+   if (slot == 0)
+   list = 
+   else if (slot == 1)
+   list = 
+   else
+   list = 
+   KUNIT_ASSERT_FALSE_MSG(test,
+  drm_buddy_alloc_blocks(, 0, mm_size,
+ ps, ps, list, 0),
+  "buddy_alloc hit an error size=%d\n",
+  ps);
+   } while (++i < n_pages);
+
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%d\n", 3 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  2 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 2 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   /*
+* At this point we should have enough contiguous space for 2 blocks,
+* however they are never buddies (since we freed middle and right) so
+* will require the try_harder logic to find them.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   2 * ps, ps, 
,
+   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc hit an error size=%d\n", 2 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+   3 * ps, ps, 
,
+   
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc hit an error size=%d\n", 3 * ps);
+
+   total = 0;
+   list_for_each_entry(block, , link)
+   total += drm_buddy_block_size(, block);
+
+   KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
+
+   drm_buddy_free_list(, );
+   drm_buddy_fini();
+}
+
 static void drm_test_buddy_alloc_pathological(struct kunit *test)
 {
u64 mm_size, size, start = 0;
@@ -280,6 +368,7 @@ static struct kuni

[PATCH 1/2] drm/buddy: Fix alloc_range() error handling code

2024-02-13 Thread Arunpravin Paneer Selvam
Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.

The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Cc:  # 6.7+
Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Tested-by: Mario Limonciello 
Link: 
https://patchwork.kernel.org/project/dri-devel/patch/20240207174456.341121-1-arunpravin.paneersel...@amd.com/
Acked-by: Christian König 
Reviewed-by: Matthew Auld 
Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/drm_buddy.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
} while (1);
 
list_splice_tail(, blocks);
+
+   if (total_allocated < size) {
+   err = -ENOSPC;
+   goto err_free;
+   }
+
return 0;
 
 err_undo:

base-commit: 2c80a2b715df75881359d07dbaacff8ad411f40e
-- 
2.25.1



Re: [PATCH] drm/tests/drm_buddy: add alloc_contiguous test

2024-02-12 Thread Arunpravin Paneer Selvam

Hi Matthew,

Can I push this test case along with the bug fix patch.

Thanks,
Arun.

On 2/8/2024 8:06 PM, Matthew Auld wrote:

Sanity check DRM_BUDDY_CONTIGUOUS_ALLOCATION.

References: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Signed-off-by: Matthew Auld 
Cc: Arunpravin Paneer Selvam 
Cc: Limonciello 
Cc: Christian König 
---
  drivers/gpu/drm/tests/drm_buddy_test.c | 89 ++
  1 file changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index ea2af6bd9abe..4215d8b5fcf0 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -8,6 +8,7 @@
  
  #include 

  #include 
+#include 
  
  #include 
  
@@ -18,6 +19,93 @@ static inline u64 get_size(int order, u64 chunk_size)

return (1 << order) * chunk_size;
  }
  
+static void drm_test_buddy_alloc_contiguous(struct kunit *test)

+{
+   u64 mm_size, ps = SZ_4K, i, n_pages, total;
+   struct drm_buddy_block *block;
+   struct drm_buddy mm;
+   LIST_HEAD(left);
+   LIST_HEAD(middle);
+   LIST_HEAD(right);
+   LIST_HEAD(allocated);
+
+   mm_size = 16 * 3 * SZ_4K;
+
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   /*
+* Idea is to fragment the address space by alternating block
+* allocations between three different lists; one for left, middle and
+* right. We can then free a list to simulate fragmentation. In
+* particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION,
+* including the try_harder path.
+*/
+
+   i = 0;
+   n_pages = mm_size / ps;
+   do {
+   struct list_head *list;
+   int slot = i % 3;
+
+   if (slot == 0)
+   list = 
+   else if (slot == 1)
+   list = 
+   else
+   list = 
+   KUNIT_ASSERT_FALSE_MSG(test,
+  drm_buddy_alloc_blocks(, 0, mm_size,
+ ps, ps, list, 0),
+  "buddy_alloc hit an error size=%d\n",
+  ps);
+   } while (++i < n_pages);
+
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%d\n", 3 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  2 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 2 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   /*
+* At this point we should have enough contiguous space for 2 blocks,
+* however they are never buddies (since we freed middle and right) so
+* will require the try_harder logic to find them.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  2 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc hit an error size=%d\n", 2 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc hit an error size=%d\n", 3 * ps);
+
+   total = 0;
+   list_for_each_entry(block, , link)
+   total += drm_buddy_block_size(, block);
+
+   KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
+
+   drm_buddy_free_list(, );
+   drm_buddy_fini();
+}
+
  static void drm_test_buddy_alloc_pathological(struct kunit *test)
  {
u64 mm_size

Re: [PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-09 Thread Arunpravin Paneer Selvam

Hi Daniel,

On 2/9/2024 11:34 PM, Daniel Vetter wrote:

On Fri, Feb 09, 2024 at 08:56:24PM +0530, Arunpravin Paneer Selvam wrote:

Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.

The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Cc:   # 6.7+
Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Tested-by: Mario Limonciello 
Link: 
https://patchwork.kernel.org/project/dri-devel/patch/20240207174456.341121-1-arunpravin.paneersel...@amd.com/
Acked-by: Christian König 
Reviewed-by: Matthew Auld 
Signed-off-by: Arunpravin Paneer Selvam 

New unit test for this would be most excellent - these kind of missed edge
cases is exactly what kunit is for. Can you please follow up with, since
we don't want to hold up the bugfix for longer?
Matthew Auld has added a new unit test for this case. Please let us know 
if this will suffice.

https://patchwork.freedesktop.org/patch/577497/?series=129671=1

Thanks,
Arun.

-Sima


---
  drivers/gpu/drm/drm_buddy.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
} while (1);
  
  	list_splice_tail(, blocks);

+
+   if (total_allocated < size) {
+   err = -ENOSPC;
+   goto err_free;
+   }
+
return 0;
  
  err_undo:

--
2.25.1





[PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-09 Thread Arunpravin Paneer Selvam
Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.

The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Cc:   # 6.7+
Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Tested-by: Mario Limonciello 
Link: 
https://patchwork.kernel.org/project/dri-devel/patch/20240207174456.341121-1-arunpravin.paneersel...@amd.com/
Acked-by: Christian König 
Reviewed-by: Matthew Auld 
Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/drm_buddy.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
} while (1);
 
list_splice_tail(, blocks);
+
+   if (total_allocated < size) {
+   err = -ENOSPC;
+   goto err_free;
+   }
+
return 0;
 
 err_undo:
-- 
2.25.1



[PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-09 Thread Arunpravin Paneer Selvam
Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.

The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Cc:   # 6.7+
Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Tested-by: Mario Limonciello 
Link: 
https://patchwork.kernel.org/project/dri-devel/patch/20240207174456.341121-1-arunpravin.paneersel...@amd.com/
Reviewed-by: Matthew Auld 
Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/drm_buddy.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
} while (1);
 
list_splice_tail(, blocks);
+
+   if (total_allocated < size) {
+   err = -ENOSPC;
+   goto err_free;
+   }
+
return 0;
 
 err_undo:
-- 
2.25.1



Re: [PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-09 Thread Arunpravin Paneer Selvam




On 2/8/2024 7:47 PM, Matthew Auld wrote:

On 08/02/2024 13:47, Arunpravin Paneer Selvam wrote:

Hi Matthew,

On 2/8/2024 7:00 PM, Matthew Auld wrote:

On 07/02/2024 17:44, Arunpravin Paneer Selvam wrote:

Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.


Can you please give an example here?

In the try hard contiguous allocation, for example the requested 
memory is 1024 pages,
it might go and pick the highest and last block (of size 512 pages) 
in the freelist where
there are no more space exist in the total address range. In this 
kind of corner case,
alloc_range was returning success though the allocated size is less 
than the requested size.
Hence in try_hard_contiguous_allocation, we will not proceed to the 
LHS allocation and
we return only with the RHS allocation having only the 512 pages of 
allocation. This
leads to display corruption in many use cases (I think mainly when 
requested for contiguous huge buffer)

mainly on APU platforms.


Ok, I guess other thing is doing:

lhs_offset = drm_buddy_block_offset(block) - lhs_size;

I presume it's possible for block_offset < lhs_size here, which might 
be funny?
yes, seems it is possible, I will modify the lhs_offset calculation and 
send the patch for review.


Thanks,
Arun.




Thanks,
Arun.


The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Gitlab ticket link - 
https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory 
allocation")
Signed-off-by: Arunpravin Paneer Selvam 


Tested-by: Mario Limonciello 
---
  drivers/gpu/drm/drm_buddy.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
  } while (1);
    list_splice_tail(, blocks);
+
+    if (total_allocated < size) {
+    err = -ENOSPC;
+    goto err_free;
+    }
+
  return 0;
    err_undo:






Re: [PATCH v5 1/3] drm/buddy: Implement tracking clear page feature

2024-02-08 Thread Arunpravin Paneer Selvam




On 1/31/2024 11:52 PM, Matthew Auld wrote:

On 30/01/2024 19:48, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list.

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 169 +++---
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  10 +-
  include/drm/drm_buddy.h   |  18 +-
  5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, >vis_usage);
@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
*adev)

  kfree(rsv);
    list_for_each_entry_safe(rsv, temp, >reserved_pages, 
blocks) {

-    drm_buddy_free_list(>mm, >allocated);
+    drm_buddy_free_list(>mm, >allocated, 0);
  kfree(rsv);
  }
  if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..d44172f23f05 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
  __list_add(>link, node->link.prev, >link);
  }
  +static void clear_reset(struct drm_buddy_block *block)
+{
+    block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+    block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
  block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
  mark_free(mm, block->left);
  mark_free(mm, block->right);
  +    if (drm_buddy_block_is_clear(block)) {
+    mark_cleared(block->left);
+    mark_cleared(block->right);
+    clear_reset(block);
+    }
+
  mark_split(block);
    return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
  if (!drm_buddy_block_is_free(buddy))
  break;
  +    if (drm_buddy_block_is_clear(block) !=
+    drm_buddy_block_is_clear(buddy))
+    break;
+
+    if (drm_buddy_block_is_clear(block))
+    mark_cleared(parent);
+
  list_del(>link);
    drm_block_free(mm, block);
@@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
  {
  BUG_ON(!drm_buddy_block_is_allocated(block));
  mm->avail += drm_buddy_block_size(mm, block);
+    if (drm_buddy_block_is_clear(block))
+    mm->clear_avail += drm_buddy_block_size(mm, block);
+
  __drm_buddy_free(mm, block);
  }
  EXPORT_SYMBOL(drm_buddy_free_block);
@@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
   * @mm: DRM buddy manager
   * @objects: input list head to free blocks
   */
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head 
*objects)

+void drm_buddy_free_list(struct drm_buddy *mm,
+ struct list_head *objects,
+ unsigned long flags)
  {
  struct drm_buddy_block *block, *on;
  +    if (flags & DRM_BUDDY_CLEARED) {
+    list_for_each_entry(block, objects, link)
+    mark_cleared(bloc

[PATCH v6 3/3] drm/buddy: Add defragmentation support

2024-02-08 Thread Arunpravin Paneer Selvam
Add a function to support defragmentation.

v1: Defragment the memory beginning from min_order
till the required memory space is available.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Matthew Auld 
---
 drivers/gpu/drm/drm_buddy.c | 67 +++--
 include/drm/drm_buddy.h |  3 ++
 2 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 33ad0cfbd54c..fac423d2cb73 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -276,10 +276,12 @@ drm_get_buddy(struct drm_buddy_block *block)
 }
 EXPORT_SYMBOL(drm_get_buddy);
 
-static void __drm_buddy_free(struct drm_buddy *mm,
-struct drm_buddy_block *block)
+static unsigned int __drm_buddy_free(struct drm_buddy *mm,
+struct drm_buddy_block *block,
+bool defrag)
 {
struct drm_buddy_block *parent;
+   unsigned int order;
 
while ((parent = block->parent)) {
struct drm_buddy_block *buddy;
@@ -289,12 +291,14 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
 
-   if (drm_buddy_block_is_clear(block) !=
-   drm_buddy_block_is_clear(buddy))
-   break;
+   if (!defrag) {
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
 
-   if (drm_buddy_block_is_clear(block))
-   mark_cleared(parent);
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+   }
 
list_del(>link);
 
@@ -304,8 +308,49 @@ static void __drm_buddy_free(struct drm_buddy *mm,
block = parent;
}
 
+   order = drm_buddy_block_order(block);
mark_free(mm, block);
+
+   return order;
+}
+
+/**
+ * drm_buddy_defrag - Defragmentation routine
+ *
+ * @mm: DRM buddy manager
+ * @min_order: minimum order in the freelist to begin
+ * the defragmentation process
+ *
+ * Driver calls the defragmentation function when the
+ * requested memory allocation returns -ENOSPC.
+ */
+void drm_buddy_defrag(struct drm_buddy *mm,
+ unsigned int min_order)
+{
+   struct drm_buddy_block *block;
+   struct list_head *list;
+   unsigned int order;
+   int i;
+
+   if (min_order > mm->max_order)
+   return;
+
+   for (i = min_order - 1; i >= 0; i--) {
+   list = >free_list[i];
+   if (list_empty(list))
+   continue;
+
+   list_for_each_entry_reverse(block, list, link) {
+   if (!block->parent)
+   continue;
+
+   order = __drm_buddy_free(mm, block, 1);
+   if (order >= min_order)
+   return;
+   }
+   }
 }
+EXPORT_SYMBOL(drm_buddy_defrag);
 
 /**
  * drm_buddy_free_block - free a block
@@ -321,7 +366,7 @@ void drm_buddy_free_block(struct drm_buddy *mm,
if (drm_buddy_block_is_clear(block))
mm->clear_avail += drm_buddy_block_size(mm, block);
 
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
 
@@ -470,7 +515,7 @@ __alloc_range_bias(struct drm_buddy *mm,
if (buddy &&
(drm_buddy_block_is_free(block) &&
 drm_buddy_block_is_free(buddy)))
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
return ERR_PTR(err);
 }
 
@@ -588,7 +633,7 @@ alloc_from_freelist(struct drm_buddy *mm,
 
 err_undo:
if (tmp != order)
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
return ERR_PTR(err);
 }
 
@@ -668,7 +713,7 @@ static int __alloc_range(struct drm_buddy *mm,
if (buddy &&
(drm_buddy_block_is_free(block) &&
 drm_buddy_block_is_free(buddy)))
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
 
 err_free:
if (err == -ENOSPC && total_allocated_on_err) {
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index d81c596dfa38..d0f63e7b5915 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -166,6 +166,9 @@ void drm_buddy_free_list(struct drm_buddy *mm,
 struct list_head *objects,
 unsigned int flags);
 
+void drm_buddy_defrag(struct drm_buddy *mm,
+ unsigned int min_order);
+
 void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
 void drm_buddy_

[PATCH v6 1/3] drm/buddy: Implement tracking clear page feature

2024-02-08 Thread Arunpravin Paneer Selvam
- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
  - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
cleared. Else, reset the clear flag for each block in the list.

  - For merging the 2 cleared blocks compare as below,
drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

v2: (Matthew)
  - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
operation within drm buddy.
  - Write a macro block_incompatible() to allocate the required blocks.
  - Update the xe driver for the drm_buddy_free_list change in arguments.

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c   | 192 ++
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c|  10 +-
 drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
 include/drm/drm_buddy.h   |  18 +-
 6 files changed, 187 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
 
 error_free_blocks:
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
 
amdgpu_vram_mgr_do_reserve(man);
 
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 
atomic64_sub(vis_usage, >vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
 
list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {
-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..33ad0cfbd54c 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
 }
 
+static void clear_reset(struct drm_buddy_block *block)
+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
 static void mark_allocated(struct drm_buddy_block *block)
 {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);
 
+   if (drm_buddy_block_is_clear(block)) {
+   mark_cleared(block->left);
+   mark_cleared(block->right);
+   clear_reset(block);
+   }
+
mark_split(block);
 
return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
 
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
+
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+
list_del(>link);
 
drm_block_free(mm, block);
@@ -295,26 +318,61 @@ void drm_buddy_free_block(struct drm_buddy *mm,
 {
BUG_ON(!drm_buddy_block_is_allocated(block));
mm->avail += drm_buddy_block_size(mm, block);
+   if (drm_buddy_block_is_clear(block))
+   mm->clear_avail += drm_buddy_block_size(mm, block);
+
__drm_buddy_free(mm, block);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
 
-/**
- * drm_buddy_free_list - free blocks
- *
- * @mm: DRM buddy manager
- * @objects: input list head to f

[PATCH v6 2/3] drm/amdgpu: Enable clear page functionality

2024-02-08 Thread Arunpravin Paneer Selvam
Add clear page support in vram memory region.

v1:(Christian)
  - Dont handle clear page as TTM flag since when moving the BO back
in from GTT again we don't need that.
  - Make a specialized version of amdgpu_fill_buffer() which only
clears the VRAM areas which are not already cleared
  - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
amdgpu_object.c

v2:
  - Modify the function name amdgpu_ttm_* (Alex)
  - Drop the delayed parameter (Christian)
  - handle amdgpu_res_cleared() just above the size
calculation (Christian)
  - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
in the free path to properly wait for fences etc.. (Christian)

v3:(Christian)
  - Remove buffer clear code in VRAM manager instead change the
AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
the DRM_BUDDY_CLEARED flag.
  - Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 22 ---
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
 6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b671b0665492..c673e070199f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -595,8 +596,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-   if (adev->ras_enabled)
-   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 
bo->tbo.bdev = >mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -626,15 +626,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 
if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-   struct dma_fence *fence;
+   struct dma_fence *fence = NULL;
 
-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
+   r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
if (unlikely(r))
goto fail_unreserve;
 
-   dma_resv_add_fence(bo->tbo.base.resv, fence,
-  DMA_RESV_USAGE_KERNEL);
-   dma_fence_put(fence);
+   if (fence) {
+   dma_resv_add_fence(bo->tbo.base.resv, fence,
+  DMA_RESV_USAGE_KERNEL);
+   dma_fence_put(fence);
+   }
}
if (!bp->resv)
amdgpu_bo_unreserve(bo);
@@ -1348,8 +1350,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return;
 
-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, , true);
+   r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
if (!WARN_ON(r)) {
+   struct amdgpu_vram_mgr_resource *vres;
+
+   vres = to_amdgpu_vram_mgr_resource(bo->resource);
+   vres->flags |= DRM_BUDDY_CLEARED;
amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   block = cur->node;
+
+   if (!amdgpu_vram_mgr_is_cleared(block))
+   return false;
+   break;
+   default:
+   return false;
+   }
+
+   return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8722beba494e..b

Re: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature

2024-02-08 Thread Arunpravin Paneer Selvam




On 1/31/2024 11:59 PM, Matthew Auld wrote:

On 30/01/2024 20:30, Arunpravin Paneer Selvam wrote:

Hi Matthew,

On 12/21/2023 12:51 AM, Matthew Auld wrote:

Hi,

On 14/12/2023 13:42, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.


I was not involved, but it looks like we have also tried enabling 
the clear-on-free idea for VRAM in i915 and then also tracking that 
in the allocator, however that work unfortunately is not upstream. 
The code is open source though: 
https://github.com/intel-gpu/intel-gpu-i915-backports/blob/backport/main/drivers/gpu/drm/i915/i915_buddy.c#L300 



It looks like some of the design differences there are having two 
separate free lists, so mm->clean and mm->dirty (sounds reasonable 
to me). And also the inclusion of a de-fragmentation routine, since 
buddy blocks are now not always merged back, we might choose to run 
the defrag in some cases, which also sounds reasonable. IIRC in 
amdgpu userspace can control the page-size for an allocation, so 
perhaps you would want to run it first if the allocation fails, 
before trying to evict stuff?
I checked the clear-on-free idea implemented in i915. In amdgpu 
version, we are clearing all the blocks in amdgpu free routine and 
DRM buddy expects only the DRM_BUDDY_CLEARED flag. Basically, we are 
keeping the cleared blocks ready to be allocated when the user 
request for the cleared memory. We observed that this improves the 
performance on games and resolves the stutter issues as well. I see 
i915 active fences part does the same job for i915. Could we move 
this part into i915 free routine and set the DRM_BUDDY_CLEARED flag.


On de-fragmentation , I have included a function which can be called 
at places where we get -ENOSPC. This routine will merge back the 
clear and dirty blocks together to form a larger block of requested 
size. I am wondering where we could use this routine as for the 
non-contiguous memory we have the fallback method and for the 
contiguous memory we have the try harder method which searches 
through the tree.


Don't you also want to call it from your vram manager when the 
requested page size is something large, before trying to evict stuff? 
That could now fail due to fragmention IIUC. Or am I misreading 
mdgpu_vram_mgr_new()?
Yes you are right, we can call the defragmentation routine from VRAM 
manager when there is a allocation failure.


Thanks,
Arun




I agree we can have 2 lists (clear list and dirty list) and this 
would reduce the search iterations. But we need to handle the 2 lists 
design in all the functions which might require more time for testing 
on all platforms. Could we just go ahead with 1 list (free list) for 
now and I am going to take up this work as my next task.


Sounds good.



Thanks,
Arun.




v1: (Christian)
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list.

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 169 
+++---

  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  10 +-
  include/drm/drm_buddy.h   |  18 +-
  5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, &g

Re: [PATCH] drm/tests/drm_buddy: add alloc_contiguous test

2024-02-08 Thread Arunpravin Paneer Selvam



On 2/8/2024 8:06 PM, Matthew Auld wrote:

Sanity check DRM_BUDDY_CONTIGUOUS_ALLOCATION.

That's really quick :)

Reviewed-by: Arunpravin Paneer Selvam 



References:https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Signed-off-by: Matthew Auld
Cc: Arunpravin Paneer Selvam
Cc: Limonciello
Cc: Christian König
---
  drivers/gpu/drm/tests/drm_buddy_test.c | 89 ++
  1 file changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c
index ea2af6bd9abe..4215d8b5fcf0 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -8,6 +8,7 @@
  
  #include 

  #include 
+#include 
  
  #include 
  
@@ -18,6 +19,93 @@ static inline u64 get_size(int order, u64 chunk_size)

return (1 << order) * chunk_size;
  }
  
+static void drm_test_buddy_alloc_contiguous(struct kunit *test)

+{
+   u64 mm_size, ps = SZ_4K, i, n_pages, total;
+   struct drm_buddy_block *block;
+   struct drm_buddy mm;
+   LIST_HEAD(left);
+   LIST_HEAD(middle);
+   LIST_HEAD(right);
+   LIST_HEAD(allocated);
+
+   mm_size = 16 * 3 * SZ_4K;
+
+   KUNIT_EXPECT_FALSE(test, drm_buddy_init(, mm_size, ps));
+
+   /*
+* Idea is to fragment the address space by alternating block
+* allocations between three different lists; one for left, middle and
+* right. We can then free a list to simulate fragmentation. In
+* particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION,
+* including the try_harder path.
+*/
+
+   i = 0;
+   n_pages = mm_size / ps;
+   do {
+   struct list_head *list;
+   int slot = i % 3;
+
+   if (slot == 0)
+   list = 
+   else if (slot == 1)
+   list = 
+   else
+   list = 
+   KUNIT_ASSERT_FALSE_MSG(test,
+  drm_buddy_alloc_blocks(, 0, mm_size,
+ ps, ps, list, 0),
+  "buddy_alloc hit an error size=%d\n",
+  ps);
+   } while (++i < n_pages);
+
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%d\n", 3 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  2 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 2 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc didn't error size=%llu\n", 3 * ps);
+   /*
+* At this point we should have enough contiguous space for 2 blocks,
+* however they are never buddies (since we freed middle and right) so
+* will require the try_harder logic to find them.
+*/
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  2 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc hit an error size=%d\n", 2 * ps);
+
+   drm_buddy_free_list(, );
+   KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(, 0, mm_size,
+  3 * ps, ps, 
,
+  
DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+  "buddy_alloc hit an error size=%d\n", 3 * ps);
+
+   total = 0;
+   list_for_each_entry(block, , link)
+   total += drm_buddy_block_size(, block);
+
+   KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
+
+   drm_buddy_free_list(, );
+   drm_buddy_fini();
+}
+
  static void drm_test_buddy_alloc_pathological(struct kunit *test)
  {
u64 mm_size, size, start = 0;
@@ -28

Re: [PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-08 Thread Arunpravin Paneer Selvam

Hi Matthew,

On 2/8/2024 7:00 PM, Matthew Auld wrote:

On 07/02/2024 17:44, Arunpravin Paneer Selvam wrote:

Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.


Can you please give an example here?

In the try hard contiguous allocation, for example the requested memory 
is 1024 pages,
it might go and pick the highest and last block (of size 512 pages) in 
the freelist where
there are no more space exist in the total address range. In this kind 
of corner case,
alloc_range was returning success though the allocated size is less than 
the requested size.
Hence in try_hard_contiguous_allocation, we will not proceed to the LHS 
allocation and
we return only with the RHS allocation having only the 512 pages of 
allocation. This
leads to display corruption in many use cases (I think mainly when 
requested for contiguous huge buffer)

mainly on APU platforms.

Thanks,
Arun.


The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Gitlab ticket link - 
https://gitlab.freedesktop.org/drm/amd/-/issues/3097

Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Signed-off-by: Arunpravin Paneer Selvam 


Tested-by: Mario Limonciello 
---
  drivers/gpu/drm/drm_buddy.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
  } while (1);
    list_splice_tail(, blocks);
+
+    if (total_allocated < size) {
+    err = -ENOSPC;
+    goto err_free;
+    }
+
  return 0;
    err_undo:




Re: [PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-08 Thread Arunpravin Paneer Selvam

Hi Christian,

On 2/8/2024 12:27 PM, Christian König wrote:

Am 07.02.24 um 18:44 schrieb Arunpravin Paneer Selvam:

Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.

The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Gitlab ticket link - 
https://gitlab.freedesktop.org/drm/amd/-/issues/3097

Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Signed-off-by: Arunpravin Paneer Selvam 


Tested-by: Mario Limonciello 


Acked-by: Christian König 

CC: stable.. ?

I will check and add the stable kernel version.

Thanks,
Arun.



---
  drivers/gpu/drm/drm_buddy.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
  } while (1);
    list_splice_tail(, blocks);
+
+    if (total_allocated < size) {
+    err = -ENOSPC;
+    goto err_free;
+    }
+
  return 0;
    err_undo:






[PATCH] drm/buddy: Fix alloc_range() error handling code

2024-02-07 Thread Arunpravin Paneer Selvam
Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.

The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.

Gitlab ticket link - https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Signed-off-by: Arunpravin Paneer Selvam 
Tested-by: Mario Limonciello 
---
 drivers/gpu/drm/drm_buddy.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
} while (1);
 
list_splice_tail(, blocks);
+
+   if (total_allocated < size) {
+   err = -ENOSPC;
+   goto err_free;
+   }
+
return 0;
 
 err_undo:
-- 
2.25.1



Re: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature

2024-01-30 Thread Arunpravin Paneer Selvam

Hi Matthew,

On 12/21/2023 12:51 AM, Matthew Auld wrote:

Hi,

On 14/12/2023 13:42, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.


I was not involved, but it looks like we have also tried enabling the 
clear-on-free idea for VRAM in i915 and then also tracking that in the 
allocator, however that work unfortunately is not upstream. The code 
is open source though: 
https://github.com/intel-gpu/intel-gpu-i915-backports/blob/backport/main/drivers/gpu/drm/i915/i915_buddy.c#L300


It looks like some of the design differences there are having two 
separate free lists, so mm->clean and mm->dirty (sounds reasonable to 
me). And also the inclusion of a de-fragmentation routine, since buddy 
blocks are now not always merged back, we might choose to run the 
defrag in some cases, which also sounds reasonable. IIRC in amdgpu 
userspace can control the page-size for an allocation, so perhaps you 
would want to run it first if the allocation fails, before trying to 
evict stuff?
I checked the clear-on-free idea implemented in i915. In amdgpu version, 
we are clearing all the blocks in amdgpu free routine and DRM buddy 
expects only the DRM_BUDDY_CLEARED flag. Basically, we are keeping the 
cleared blocks ready to be allocated when the user request for the 
cleared memory. We observed that this improves the performance on games 
and resolves the stutter issues as well. I see i915 active fences part 
does the same job for i915. Could we move this part into i915 free 
routine and set the DRM_BUDDY_CLEARED flag.


On de-fragmentation , I have included a function which can be called at 
places where we get -ENOSPC. This routine will merge back the clear and 
dirty blocks together to form a larger block of requested size. I am 
wondering where we could use this routine as for the non-contiguous 
memory we have the fallback method and for the contiguous memory we have 
the try harder method which searches through the tree.


I agree we can have 2 lists (clear list and dirty list) and this would 
reduce the search iterations. But we need to handle the 2 lists design 
in all the functions which might require more time for testing on all 
platforms. Could we just go ahead with 1 list (free list) for now and I 
am going to take up this work as my next task.


Thanks,
Arun.




v1: (Christian)
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list.

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 169 +++---
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  10 +-
  include/drm/drm_buddy.h   |  18 +-
  5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, >vis_usage);
@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
*adev)

  kfree(rsv);
    list_for_each_entry_safe(rsv, temp, >reserved_pages, 
blocks) {

-    drm_buddy_free_list(>mm, >allocated);
+    drm_buddy_free_list(>mm, >allocated, 0);
  kfree(rsv);
  }
  if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..d44172f23f05 100644
--- a/d

[PATCH v5 3/3] drm/buddy: Add defragmentation support

2024-01-30 Thread Arunpravin Paneer Selvam
Add a function to support defragmentation.

v5: Defragment the freelist order array beginning
from min_order.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Matthew Auld 
---
 drivers/gpu/drm/drm_buddy.c | 70 ++---
 1 file changed, 58 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index d44172f23f05..8aa6d31cb826 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -276,10 +276,12 @@ drm_get_buddy(struct drm_buddy_block *block)
 }
 EXPORT_SYMBOL(drm_get_buddy);
 
-static void __drm_buddy_free(struct drm_buddy *mm,
-struct drm_buddy_block *block)
+static unsigned int __drm_buddy_free(struct drm_buddy *mm,
+struct drm_buddy_block *block,
+bool defrag)
 {
struct drm_buddy_block *parent;
+   unsigned int order;
 
while ((parent = block->parent)) {
struct drm_buddy_block *buddy;
@@ -289,12 +291,14 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
 
-   if (drm_buddy_block_is_clear(block) !=
-   drm_buddy_block_is_clear(buddy))
-   break;
+   if (!defrag) {
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
 
-   if (drm_buddy_block_is_clear(block))
-   mark_cleared(parent);
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+   }
 
list_del(>link);
 
@@ -304,7 +308,37 @@ static void __drm_buddy_free(struct drm_buddy *mm,
block = parent;
}
 
+   order = drm_buddy_block_order(block);
mark_free(mm, block);
+
+   return order;
+}
+
+static void drm_buddy_defrag(struct drm_buddy *mm,
+unsigned int min_order)
+{
+   struct drm_buddy_block *block;
+   struct list_head *list;
+   unsigned int order;
+   int i;
+
+   if (min_order > mm->max_order)
+   return;
+
+   for (i = min_order - 1; i >= 0; i--) {
+   list = >free_list[i];
+   if (list_empty(list))
+   continue;
+
+   list_for_each_entry_reverse(block, list, link) {
+   if (!block->parent)
+   continue;
+
+   order = __drm_buddy_free(mm, block, 1);
+   if (order >= min_order)
+   return;
+   }
+   }
 }
 
 /**
@@ -321,7 +355,7 @@ void drm_buddy_free_block(struct drm_buddy *mm,
if (drm_buddy_block_is_clear(block))
mm->clear_avail += drm_buddy_block_size(mm, block);
 
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
 
@@ -447,7 +481,7 @@ __alloc_range_bias(struct drm_buddy *mm,
if (buddy &&
(drm_buddy_block_is_free(block) &&
 drm_buddy_block_is_free(buddy)))
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
return ERR_PTR(err);
 }
 
@@ -577,7 +611,7 @@ alloc_from_freelist(struct drm_buddy *mm,
 
 err_undo:
if (tmp != order)
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
return ERR_PTR(err);
 }
 
@@ -657,7 +691,7 @@ static int __alloc_range(struct drm_buddy *mm,
if (buddy &&
(drm_buddy_block_is_free(block) &&
 drm_buddy_block_is_free(buddy)))
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
 
 err_free:
if (err == -ENOSPC && total_allocated_on_err) {
@@ -903,7 +937,17 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 
if (order-- == min_order) {
if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
-   !(flags & DRM_BUDDY_RANGE_ALLOCATION))
+   !(flags & DRM_BUDDY_RANGE_ALLOCATION)) {
+   /*
+* Defragment the freelist
+*/
+   drm_buddy_defrag(mm, min_order);
+   /*
+* Try contiguous block allocation 
again!
+*/
+   block = alloc_from_freelist(mm, 
min_order, flags);
+ 

[PATCH v5 1/3] drm/buddy: Implement tracking clear page feature

2024-01-30 Thread Arunpravin Paneer Selvam
- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
  - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
cleared. Else, reset the clear flag for each block in the list.

  - For merging the 2 cleared blocks compare as below,
drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c   | 169 +++---
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c|  10 +-
 include/drm/drm_buddy.h   |  18 +-
 5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
 
 error_free_blocks:
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 error_fini:
ttm_resource_fini(man, >base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
 
amdgpu_vram_mgr_do_reserve(man);
 
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 
atomic64_sub(vis_usage, >vis_usage);
@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
 
list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {
-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..d44172f23f05 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
 }
 
+static void clear_reset(struct drm_buddy_block *block)
+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
 static void mark_allocated(struct drm_buddy_block *block)
 {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);
 
+   if (drm_buddy_block_is_clear(block)) {
+   mark_cleared(block->left);
+   mark_cleared(block->right);
+   clear_reset(block);
+   }
+
mark_split(block);
 
return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
 
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
+
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+
list_del(>link);
 
drm_block_free(mm, block);
@@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
 {
BUG_ON(!drm_buddy_block_is_allocated(block));
mm->avail += drm_buddy_block_size(mm, block);
+   if (drm_buddy_block_is_clear(block))
+   mm->clear_avail += drm_buddy_block_size(mm, block);
+
__drm_buddy_free(mm, block);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
@@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
  * @mm: DRM buddy manager
  * @objects: input list head to free blocks
  */
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
+void drm_buddy_free_list(struct drm_buddy *mm,
+struct list_head *objects,
+unsigned long flags)
 {
struct drm_buddy_block *block, *on;
 
+   if (flags & DRM_BUDDY_CLEARED) {
+  

[PATCH v5 2/3] drm/amdgpu: Enable clear page functionality

2024-01-30 Thread Arunpravin Paneer Selvam
Add clear page support in vram memory region.

v1:(Christian)
  - Dont handle clear page as TTM flag since when moving the BO back
in from GTT again we don't need that.
  - Make a specialized version of amdgpu_fill_buffer() which only
clears the VRAM areas which are not already cleared
  - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
amdgpu_object.c

v2:
  - Modify the function name amdgpu_ttm_* (Alex)
  - Drop the delayed parameter (Christian)
  - handle amdgpu_res_cleared() just above the size
calculation (Christian)
  - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
in the free path to properly wait for fences etc.. (Christian)

v3:(Christian)
  - Remove buffer clear code in VRAM manager instead change the
AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
the DRM_BUDDY_CLEARED flag.
  - Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 22 ---
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
 6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b2e9a2f81d82..be32f9852d19 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -595,8 +596,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-   if (adev->ras_enabled)
-   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 
bo->tbo.bdev = >mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -626,15 +626,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 
if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-   struct dma_fence *fence;
+   struct dma_fence *fence = NULL;
 
-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
+   r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
if (unlikely(r))
goto fail_unreserve;
 
-   dma_resv_add_fence(bo->tbo.base.resv, fence,
-  DMA_RESV_USAGE_KERNEL);
-   dma_fence_put(fence);
+   if (fence) {
+   dma_resv_add_fence(bo->tbo.base.resv, fence,
+  DMA_RESV_USAGE_KERNEL);
+   dma_fence_put(fence);
+   }
}
if (!bp->resv)
amdgpu_bo_unreserve(bo);
@@ -1357,8 +1359,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return;
 
-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, , true);
+   r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
if (!WARN_ON(r)) {
+   struct amdgpu_vram_mgr_resource *vres;
+
+   vres = to_amdgpu_vram_mgr_resource(bo->resource);
+   vres->flags |= DRM_BUDDY_CLEARED;
amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   block = cur->node;
+
+   if (!amdgpu_vram_mgr_is_cleared(block))
+   return false;
+   break;
+   default:
+   return false;
+   }
+
+   return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 46a24d2308aa..1

[PATCH v4 3/3] drm/buddy: Add defragmentation support

2024-01-25 Thread Arunpravin Paneer Selvam
Add a function to support defragmentation.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Matthew Auld 
---
 drivers/gpu/drm/drm_buddy.c | 48 -
 1 file changed, 37 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index d44172f23f05..3cffa9cc12d7 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -277,7 +277,8 @@ drm_get_buddy(struct drm_buddy_block *block)
 EXPORT_SYMBOL(drm_get_buddy);
 
 static void __drm_buddy_free(struct drm_buddy *mm,
-struct drm_buddy_block *block)
+struct drm_buddy_block *block,
+bool defrag)
 {
struct drm_buddy_block *parent;
 
@@ -289,12 +290,14 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
 
-   if (drm_buddy_block_is_clear(block) !=
-   drm_buddy_block_is_clear(buddy))
-   break;
+   if (!defrag) {
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
 
-   if (drm_buddy_block_is_clear(block))
-   mark_cleared(parent);
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+   }
 
list_del(>link);
 
@@ -307,6 +310,19 @@ static void __drm_buddy_free(struct drm_buddy *mm,
mark_free(mm, block);
 }
 
+static void drm_buddy_defrag(struct drm_buddy *mm)
+{
+   struct drm_buddy_block *block;
+   struct list_head *list;
+
+   list = >free_list[0];
+   if (list_empty(list))
+   return;
+
+   list_for_each_entry_reverse(block, list, link)
+   __drm_buddy_free(mm, block, 1);
+}
+
 /**
  * drm_buddy_free_block - free a block
  *
@@ -321,7 +337,7 @@ void drm_buddy_free_block(struct drm_buddy *mm,
if (drm_buddy_block_is_clear(block))
mm->clear_avail += drm_buddy_block_size(mm, block);
 
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
 
@@ -447,7 +463,7 @@ __alloc_range_bias(struct drm_buddy *mm,
if (buddy &&
(drm_buddy_block_is_free(block) &&
 drm_buddy_block_is_free(buddy)))
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
return ERR_PTR(err);
 }
 
@@ -577,7 +593,7 @@ alloc_from_freelist(struct drm_buddy *mm,
 
 err_undo:
if (tmp != order)
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
return ERR_PTR(err);
 }
 
@@ -657,7 +673,7 @@ static int __alloc_range(struct drm_buddy *mm,
if (buddy &&
(drm_buddy_block_is_free(block) &&
 drm_buddy_block_is_free(buddy)))
-   __drm_buddy_free(mm, block);
+   __drm_buddy_free(mm, block, 0);
 
 err_free:
if (err == -ENOSPC && total_allocated_on_err) {
@@ -903,7 +919,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 
if (order-- == min_order) {
if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
-   !(flags & DRM_BUDDY_RANGE_ALLOCATION))
+   !(flags & DRM_BUDDY_RANGE_ALLOCATION)) {
/*
 * Try contiguous block allocation 
through
 * try harder method
@@ -912,6 +928,16 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
 
original_size,
 
original_min_size,
 
blocks);
+
+   /*
+* Defragment the freelist and try 
contiguous block
+* allocation
+*/
+   drm_buddy_defrag(mm);
+   if (!IS_ERR(alloc_from_freelist(mm, 
min_order, flags)))
+   break;
+   }
+
err = -ENOSPC;
goto err_free;
}
-- 
2.25.1



[PATCH v4 2/3] drm/amdgpu: Enable clear page functionality

2024-01-25 Thread Arunpravin Paneer Selvam
Add clear page support in vram memory region.

v1:(Christian)
  - Dont handle clear page as TTM flag since when moving the BO back
in from GTT again we don't need that.
  - Make a specialized version of amdgpu_fill_buffer() which only
clears the VRAM areas which are not already cleared
  - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
amdgpu_object.c

v2:
  - Modify the function name amdgpu_ttm_* (Alex)
  - Drop the delayed parameter (Christian)
  - handle amdgpu_res_cleared() just above the size
calculation (Christian)
  - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
in the free path to properly wait for fences etc.. (Christian)

v3:(Christian)
  - Remove buffer clear code in VRAM manager instead change the
AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
the DRM_BUDDY_CLEARED flag.
  - Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 22 ---
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
 6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b2e9a2f81d82..be32f9852d19 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -595,8 +596,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-   if (adev->ras_enabled)
-   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 
bo->tbo.bdev = >mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -626,15 +626,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 
if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-   struct dma_fence *fence;
+   struct dma_fence *fence = NULL;
 
-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
+   r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
if (unlikely(r))
goto fail_unreserve;
 
-   dma_resv_add_fence(bo->tbo.base.resv, fence,
-  DMA_RESV_USAGE_KERNEL);
-   dma_fence_put(fence);
+   if (fence) {
+   dma_resv_add_fence(bo->tbo.base.resv, fence,
+  DMA_RESV_USAGE_KERNEL);
+   dma_fence_put(fence);
+   }
}
if (!bp->resv)
amdgpu_bo_unreserve(bo);
@@ -1357,8 +1359,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return;
 
-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, , true);
+   r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
if (!WARN_ON(r)) {
+   struct amdgpu_vram_mgr_resource *vres;
+
+   vres = to_amdgpu_vram_mgr_resource(bo->resource);
+   vres->flags |= DRM_BUDDY_CLEARED;
amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   block = cur->node;
+
+   if (!amdgpu_vram_mgr_is_cleared(block))
+   return false;
+   break;
+   default:
+   return false;
+   }
+
+   return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 46a24d2308aa..1

[PATCH v4 1/3] drm/buddy: Implement tracking clear page feature

2024-01-25 Thread Arunpravin Paneer Selvam
- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
  - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
cleared. Else, reset the clear flag for each block in the list.

  - For merging the 2 cleared blocks compare as below,
drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c   | 169 +++---
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c|  10 +-
 include/drm/drm_buddy.h   |  18 +-
 5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
 
 error_free_blocks:
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 error_fini:
ttm_resource_fini(man, >base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
 
amdgpu_vram_mgr_do_reserve(man);
 
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 
atomic64_sub(vis_usage, >vis_usage);
@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
 
list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {
-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..d44172f23f05 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
 }
 
+static void clear_reset(struct drm_buddy_block *block)
+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
 static void mark_allocated(struct drm_buddy_block *block)
 {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);
 
+   if (drm_buddy_block_is_clear(block)) {
+   mark_cleared(block->left);
+   mark_cleared(block->right);
+   clear_reset(block);
+   }
+
mark_split(block);
 
return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
 
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
+
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+
list_del(>link);
 
drm_block_free(mm, block);
@@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
 {
BUG_ON(!drm_buddy_block_is_allocated(block));
mm->avail += drm_buddy_block_size(mm, block);
+   if (drm_buddy_block_is_clear(block))
+   mm->clear_avail += drm_buddy_block_size(mm, block);
+
__drm_buddy_free(mm, block);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
@@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
  * @mm: DRM buddy manager
  * @objects: input list head to free blocks
  */
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
+void drm_buddy_free_list(struct drm_buddy *mm,
+struct list_head *objects,
+unsigned long flags)
 {
struct drm_buddy_block *block, *on;
 
+   if (flags & DRM_BUDDY_CLEARED) {
+  

Re: [PATCH v3 1/2] drm/buddy: Implement tracking clear page feature

2023-12-21 Thread Arunpravin Paneer Selvam

Hi Matthew,

On 12/21/2023 12:51 AM, Matthew Auld wrote:

Hi,

On 14/12/2023 13:42, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.


I was not involved, but it looks like we have also tried enabling the 
clear-on-free idea for VRAM in i915 and then also tracking that in the 
allocator, however that work unfortunately is not upstream. The code 
is open source though: 
https://github.com/intel-gpu/intel-gpu-i915-backports/blob/backport/main/drivers/gpu/drm/i915/i915_buddy.c#L300


It looks like some of the design differences there are having two 
separate free lists, so mm->clean and mm->dirty (sounds reasonable to 
me). And also the inclusion of a de-fragmentation routine, since buddy 
blocks are now not always merged back, we might choose to run the 
defrag in some cases, which also sounds reasonable. IIRC in amdgpu 
userspace can control the page-size for an allocation, so perhaps you 
would want to run it first if the allocation fails, before trying to 
evict stuff?

Thanks, I will check the code.

Regards,
Arun.




v1: (Christian)
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list.

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 169 +++---
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  10 +-
  include/drm/drm_buddy.h   |  18 +-
  5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, >vis_usage);
@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
*adev)

  kfree(rsv);
    list_for_each_entry_safe(rsv, temp, >reserved_pages, 
blocks) {

-    drm_buddy_free_list(>mm, >allocated);
+    drm_buddy_free_list(>mm, >allocated, 0);
  kfree(rsv);
  }
  if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..d44172f23f05 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
  __list_add(>link, node->link.prev, >link);
  }
  +static void clear_reset(struct drm_buddy_block *block)
+{
+    block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+    block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
  block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
  mark_free(mm, block->left);
  mark_free(mm, block->right);
  +    if (drm_buddy_block_is_clear(block)) {
+    mark_cleared(block->left);
+    mark_cleared(block->right);
+    clear_reset(block);
+    }
+
  mark_split(block);
    return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
  if (!drm_buddy_block_is_free(buddy))
  break;
  +    if (drm_buddy_block_is_clear(block) !=
+    drm_buddy_block_is_clear(buddy))
+    break;
+
+    if (drm_buddy_block_is_clear(block))
+    mark_cleared(parent);
+
  list_del(>li

[PATCH v3 2/2] drm/amdgpu: Enable clear page functionality

2023-12-14 Thread Arunpravin Paneer Selvam
Add clear page support in vram memory region.

v1:(Christian)
  - Dont handle clear page as TTM flag since when moving the BO back
in from GTT again we don't need that.
  - Make a specialized version of amdgpu_fill_buffer() which only
clears the VRAM areas which are not already cleared
  - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
amdgpu_object.c

v2:
  - Modify the function name amdgpu_ttm_* (Alex)
  - Drop the delayed parameter (Christian)
  - handle amdgpu_res_cleared() just above the size
calculation (Christian)
  - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
in the free path to properly wait for fences etc.. (Christian)

v3:(Christian)
  - Remove buffer clear code in VRAM manager instead change the
AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
the DRM_BUDDY_CLEARED flag.
  - Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 22 ---
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
 6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index cef920a93924..be8bf375d823 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -598,8 +599,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-   if (adev->ras_enabled)
-   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 
bo->tbo.bdev = >mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -629,15 +629,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 
if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-   struct dma_fence *fence;
+   struct dma_fence *fence = NULL;
 
-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
+   r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
if (unlikely(r))
goto fail_unreserve;
 
-   dma_resv_add_fence(bo->tbo.base.resv, fence,
-  DMA_RESV_USAGE_KERNEL);
-   dma_fence_put(fence);
+   if (fence) {
+   dma_resv_add_fence(bo->tbo.base.resv, fence,
+  DMA_RESV_USAGE_KERNEL);
+   dma_fence_put(fence);
+   }
}
if (!bp->resv)
amdgpu_bo_unreserve(bo);
@@ -1360,8 +1362,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return;
 
-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, , true);
+   r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
if (!WARN_ON(r)) {
+   struct amdgpu_vram_mgr_resource *vres;
+
+   vres = to_amdgpu_vram_mgr_resource(bo->resource);
+   vres->flags |= DRM_BUDDY_CLEARED;
amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   block = cur->node;
+
+   if (!amdgpu_vram_mgr_is_cleared(block))
+   return false;
+   break;
+   default:
+   return false;
+   }
+
+   return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 05991c5c8ddb..c

[PATCH v3 1/2] drm/buddy: Implement tracking clear page feature

2023-12-14 Thread Arunpravin Paneer Selvam
- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
  - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
cleared. Else, reset the clear flag for each block in the list.

  - For merging the 2 cleared blocks compare as below,
drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c   | 169 +++---
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c|  10 +-
 include/drm/drm_buddy.h   |  18 +-
 5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
 
 error_free_blocks:
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 error_fini:
ttm_resource_fini(man, >base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
 
amdgpu_vram_mgr_do_reserve(man);
 
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 
atomic64_sub(vis_usage, >vis_usage);
@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
 
list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {
-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..d44172f23f05 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
 }
 
+static void clear_reset(struct drm_buddy_block *block)
+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
 static void mark_allocated(struct drm_buddy_block *block)
 {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);
 
+   if (drm_buddy_block_is_clear(block)) {
+   mark_cleared(block->left);
+   mark_cleared(block->right);
+   clear_reset(block);
+   }
+
mark_split(block);
 
return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
 
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
+
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+
list_del(>link);
 
drm_block_free(mm, block);
@@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
 {
BUG_ON(!drm_buddy_block_is_allocated(block));
mm->avail += drm_buddy_block_size(mm, block);
+   if (drm_buddy_block_is_clear(block))
+   mm->clear_avail += drm_buddy_block_size(mm, block);
+
__drm_buddy_free(mm, block);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
@@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
  * @mm: DRM buddy manager
  * @objects: input list head to free blocks
  */
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
+void drm_buddy_free_list(struct drm_buddy *mm,
+struct list_head *objects,
+unsigned long flags)
 {
struct drm_buddy_block *block, *on;
 
+   if (flags & DRM_BUDDY_CLEARED) {
+  

[PATCH v2 2/2] drm/amdgpu: Enable clear page functionality

2023-12-10 Thread Arunpravin Paneer Selvam
Add clear page support in vram memory region.

v1:(Christian)
  - Dont handle clear page as TTM flag since when moving the BO back
in from GTT again we don't need that.
  - Make a specialized version of amdgpu_fill_buffer() which only
clears the VRAM areas which are not already cleared
  - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
amdgpu_object.c

v2:
  - Modify the function name amdgpu_ttm_* (Alex)
  - Drop the delayed parameter (Christian)
  - handle amdgpu_res_cleared() just above the size
calculation (Christian)
  - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
in the free path to properly wait for fences etc.. (Christian)

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 16 +++---
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 53 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 15 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
 6 files changed, 109 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index cef920a93924..5a01b6266772 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -598,8 +599,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-   if (adev->ras_enabled)
-   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
 
bo->tbo.bdev = >mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -629,15 +629,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 
if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-   struct dma_fence *fence;
+   struct dma_fence *fence = NULL;
 
-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
+   r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
if (unlikely(r))
goto fail_unreserve;
 
-   dma_resv_add_fence(bo->tbo.base.resv, fence,
-  DMA_RESV_USAGE_KERNEL);
-   dma_fence_put(fence);
+   if (fence) {
+   dma_resv_add_fence(bo->tbo.base.resv, fence,
+  DMA_RESV_USAGE_KERNEL);
+   dma_fence_put(fence);
+   }
}
if (!bp->resv)
amdgpu_bo_unreserve(bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   block = cur->node;
+
+   if (!amdgpu_vram_mgr_is_cleared(block))
+   return false;
+   break;
+   default:
+   return false;
+   }
+
+   return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 05991c5c8ddb..252b384194c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -,6 +,59 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, 
uint32_t src_data,
return 0;
 }
 
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+   struct dma_resv *resv,
+   struct dma_fence **fence)
+{
+   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+   struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+   struct amdgpu_res_cursor cursor;
+   struct dma_fence *f = NULL;
+   u64 addr;
+   int r;
+
+   if (!adev->mman.buffer_funcs_enabled)
+   return -EINVAL;
+
+   amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), );
+
+   mutex_lock(>

[PATCH v2 1/2] drm/buddy: Implement tracking clear page feature

2023-12-10 Thread Arunpravin Paneer Selvam
- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
  - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
cleared. Else, reset the clear flag for each block in the list.

  - For merging the 2 cleared blocks compare as below,
drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c   | 169 +++---
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c|  10 +-
 include/drm/drm_buddy.h   |  18 +-
 5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
 
 error_free_blocks:
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 error_fini:
ttm_resource_fini(man, >base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
 
amdgpu_vram_mgr_do_reserve(man);
 
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 
atomic64_sub(vis_usage, >vis_usage);
@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
 
list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {
-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..d44172f23f05 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
 }
 
+static void clear_reset(struct drm_buddy_block *block)
+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
 static void mark_allocated(struct drm_buddy_block *block)
 {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);
 
+   if (drm_buddy_block_is_clear(block)) {
+   mark_cleared(block->left);
+   mark_cleared(block->right);
+   clear_reset(block);
+   }
+
mark_split(block);
 
return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
 
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
+
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+
list_del(>link);
 
drm_block_free(mm, block);
@@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
 {
BUG_ON(!drm_buddy_block_is_allocated(block));
mm->avail += drm_buddy_block_size(mm, block);
+   if (drm_buddy_block_is_clear(block))
+   mm->clear_avail += drm_buddy_block_size(mm, block);
+
__drm_buddy_free(mm, block);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
@@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
  * @mm: DRM buddy manager
  * @objects: input list head to free blocks
  */
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
+void drm_buddy_free_list(struct drm_buddy *mm,
+struct list_head *objects,
+unsigned long flags)
 {
struct drm_buddy_block *block, *on;
 
+   if (flags & DRM_BUDDY_CLEARED) {
+  

[PATCH 2/2] drm/amdgpu: Enable clear page functionality

2023-12-07 Thread Arunpravin Paneer Selvam
Add clear page support in vram memory region.

Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 13 +++--
 .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 50 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
 6 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index cef920a93924..bc4ea87f8b5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
 
 /**
  * DOC: amdgpu_object
@@ -629,15 +630,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 
if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-   struct dma_fence *fence;
+   struct dma_fence *fence = NULL;
 
-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
+   r = amdgpu_clear_buffer(bo, bo->tbo.base.resv, , true);
if (unlikely(r))
goto fail_unreserve;
 
-   dma_resv_add_fence(bo->tbo.base.resv, fence,
-  DMA_RESV_USAGE_KERNEL);
-   dma_fence_put(fence);
+   if (fence) {
+   dma_resv_add_fence(bo->tbo.base.resv, fence,
+  DMA_RESV_USAGE_KERNEL);
+   dma_fence_put(fence);
+   }
}
if (!bp->resv)
amdgpu_bo_unreserve(bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
}
 }
 
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   block = cur->node;
+
+   if (!amdgpu_vram_mgr_is_cleared(block))
+   return false;
+   break;
+   default:
+   return false;
+   }
+
+   return true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 05991c5c8ddb..6d7514e8f40c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -,6 +,56 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, 
uint32_t src_data,
return 0;
 }
 
+int amdgpu_clear_buffer(struct amdgpu_bo *bo,
+   struct dma_resv *resv,
+   struct dma_fence **fence,
+   bool delayed)
+{
+   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+   struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+   struct amdgpu_res_cursor cursor;
+   struct dma_fence *f = NULL;
+   u64 addr;
+   int r;
+
+   if (!adev->mman.buffer_funcs_enabled)
+   return -EINVAL;
+
+   amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), );
+
+   mutex_lock(>mman.gtt_window_lock);
+   while (cursor.remaining) {
+   struct dma_fence *next = NULL;
+   u64 size;
+
+   /* Never clear more than 256MiB at once to avoid timeouts */
+   size = min(cursor.size, 256ULL << 20);
+
+   if (!amdgpu_res_cleared()) {
+   r = amdgpu_ttm_map_buffer(>tbo, bo->tbo.resource, 
,
+ 1, ring, false, , );
+   if (r)
+   goto err;
+
+   r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
+   , true, delayed);
+   if (r)
+   goto err;
+   }
+   dma_fence_put(f);
+   f = next;
+
+   amdgpu_res_next(, size);
+   }
+err:
+   mutex_unlock(>mman.gtt_window_lock);
+   if (fence)
+   *fence = dma_fence_get(f);
+   dma_fence_put(f);
+
+   return r;
+}
+
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32

[PATCH 1/2] drm/buddy: Implement tracking clear page feature

2023-12-07 Thread Arunpravin Paneer Selvam
- Add tracking clear page feature.

- If driver requests cleared memory we prefer cleared memory
  but fallback to uncleared if we can't find the cleared blocks.
  when driver requests uncleared memory we try to use uncleared but
  fallback to cleared memory if necessary.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
  successfully clears the blocks in the free path. On the otherhand,
  DRM buddy marks each block as cleared.

- When a block gets freed we clear it and mark the freed block as cleared,
  when there are buddies which are cleared as well we can merge them.
  Otherwise, we prefer to keep the blocks as separated.

- Track the available cleared pages size

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
 drivers/gpu/drm/drm_buddy.c   | 169 +++---
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
 drivers/gpu/drm/tests/drm_buddy_test.c|  10 +-
 include/drm/drm_buddy.h   |  18 +-
 5 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 08916538a615..d0e199cc8f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -556,7 +556,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
 
 error_free_blocks:
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 error_fini:
ttm_resource_fini(man, >base);
@@ -589,7 +589,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
 
amdgpu_vram_mgr_do_reserve(man);
 
-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
 
atomic64_sub(vis_usage, >vis_usage);
@@ -897,7 +897,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
 
list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {
-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..d44172f23f05 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
 }
 
+static void clear_reset(struct drm_buddy_block *block)
+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
 static void mark_allocated(struct drm_buddy_block *block)
 {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
mark_free(mm, block->left);
mark_free(mm, block->right);
 
+   if (drm_buddy_block_is_clear(block)) {
+   mark_cleared(block->left);
+   mark_cleared(block->right);
+   clear_reset(block);
+   }
+
mark_split(block);
 
return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
break;
 
+   if (drm_buddy_block_is_clear(block) !=
+   drm_buddy_block_is_clear(buddy))
+   break;
+
+   if (drm_buddy_block_is_clear(block))
+   mark_cleared(parent);
+
list_del(>link);
 
drm_block_free(mm, block);
@@ -295,6 +318,9 @@ void drm_buddy_free_block(struct drm_buddy *mm,
 {
BUG_ON(!drm_buddy_block_is_allocated(block));
mm->avail += drm_buddy_block_size(mm, block);
+   if (drm_buddy_block_is_clear(block))
+   mm->clear_avail += drm_buddy_block_size(mm, block);
+
__drm_buddy_free(mm, block);
 }
 EXPORT_SYMBOL(drm_buddy_free_block);
@@ -305,10 +331,20 @@ EXPORT_SYMBOL(drm_buddy_free_block);
  * @mm: DRM buddy manager
  * @objects: input list head to free blocks
  */
-void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
+void drm_buddy_free_list(struct drm_buddy *mm,
+struct list_head *objects,
+unsigned long flags)
 {
struct drm_buddy_block *block, *on;
 
+   if (flags & DRM_BUDDY_CLEARED) {
+   list_for_each_entry(block, objects, link)
+   mark_cleared(block);
+   } else {
+   list_for_each_entry(block, objects, link)
+   clear_reset(block);
+   }
+
list_for_each_entry_safe(block, on, o

Re: [PATCH v2 1/3] drm/buddy: Improve contiguous memory allocation

2023-09-13 Thread Arunpravin Paneer Selvam



On 11/09/23 03:46, Matthew Auld wrote:

On 09/09/2023 17:09, Arunpravin Paneer Selvam wrote:

Problem statement: The current method roundup_power_of_two()
to allocate contiguous address triggers -ENOSPC in some cases
even though we have enough free spaces and so to help with
that we introduce a try harder mechanism.

In case of -ENOSPC, the new try harder mechanism rounddown the
original size to power of 2 and iterating over the round down
sized freelist blocks to allocate the required size traversing
RHS and LHS.

As part of the above new method implementation we moved
contiguous/alignment size computation part and trim function
to the drm buddy file.

v2: Modify the alloc_range() function to return total allocated size
 on -ENOSPC err and traverse RHS/LHS to allocate the required
 size (Matthew).

Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/drm_buddy.c | 138 
  include/drm/drm_buddy.h |   6 +-
  2 files changed, 127 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 7098f125b54a..e909eed9cf60 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -480,10 +480,12 @@ alloc_from_freelist(struct drm_buddy *mm,
  static int __alloc_range(struct drm_buddy *mm,
   struct list_head *dfs,
   u64 start, u64 size,
- struct list_head *blocks)
+ struct list_head *blocks,
+ u64 *total_allocated_on_err)
  {
  struct drm_buddy_block *block;
  struct drm_buddy_block *buddy;
+    u64 total_allocated = 0;
  LIST_HEAD(allocated);
  u64 end;
  int err;
@@ -520,6 +522,7 @@ static int __alloc_range(struct drm_buddy *mm,
  }
    mark_allocated(block);
+    total_allocated += drm_buddy_block_size(mm, block);
  mm->avail -= drm_buddy_block_size(mm, block);
  list_add_tail(>link, );
  continue;
@@ -551,13 +554,20 @@ static int __alloc_range(struct drm_buddy *mm,
  __drm_buddy_free(mm, block);
    err_free:
-    drm_buddy_free_list(mm, );
+    if (err == -ENOSPC && total_allocated_on_err) {
+    list_splice_tail(, blocks);
+    *total_allocated_on_err = total_allocated;
+    } else {
+    drm_buddy_free_list(mm, );
+    }
+
  return err;
  }
    static int __drm_buddy_alloc_range(struct drm_buddy *mm,
 u64 start,
 u64 size,
+   u64 *total_allocated_on_err,
 struct list_head *blocks)
  {
  LIST_HEAD(dfs);
@@ -566,7 +576,62 @@ static int __drm_buddy_alloc_range(struct 
drm_buddy *mm,

  for (i = 0; i < mm->n_roots; ++i)
  list_add_tail(>roots[i]->tmp_link, );
  -    return __alloc_range(mm, , start, size, blocks);
+    return __alloc_range(mm, , start, size,
+ blocks, total_allocated_on_err);
+}
+
+static int __alloc_contig_try_harder(struct drm_buddy *mm,
+ u64 size,
+ u64 min_block_size,
+ struct list_head *blocks)
+{
+    u64 rhs_offset, lhs_offset, lhs_size, filled;
+    struct drm_buddy_block *block;
+    struct list_head *list;
+    LIST_HEAD(blocks_lhs);
+    unsigned long pages;
+    unsigned int order;
+    u64 modify_size;
+    int err;
+
+    modify_size = rounddown_pow_of_two(size);
+    pages = modify_size >> ilog2(mm->chunk_size);
+    order = fls(pages) - 1;
+    if (order == 0)
+    return -ENOSPC;
+
+    list = >free_list[order];
+    if (list_empty(list))
+    return -ENOSPC;
+
+    list_for_each_entry_reverse(block, list, link) {
+    /* Allocate blocks traversing RHS */
+    rhs_offset = drm_buddy_block_offset(block);
+    err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
+   , blocks);
+    if (!err || err != -ENOSPC)
+    return err;
+
+    lhs_size = max((size - filled), min_block_size);
+    if (!IS_ALIGNED(lhs_size, min_block_size))
+    lhs_size = round_up(lhs_size, min_block_size);
+
+    /* Allocate blocks traversing LHS */
+    lhs_offset = drm_buddy_block_offset(block) - lhs_size;
+    err =  __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
+   NULL, _lhs);
+    if (!err) {
+    list_splice(_lhs, blocks);
+    return 0;


I guess we could attempt to trim this also (could tweak the trim to 
work on multiple nodes)? But I guess in practice should be pretty meh, 
given that the extra rhs is hopefully not too big in the corner case 
where the alignment doesn't fit the min_block_size?


Thanks for the review. good point. I will take a look into it.

Regards,

Arun.



Anyway, for patches 1-3,
Reviewed-by: Matthew Auld 


+    } else if (err != -ENOSPC) {
+    drm_buddy_free_list(mm, blocks);
+    return err;
+    }
+   

[PATCH v2 3/3] drm/i915: Move the size computations to drm buddy

2023-09-09 Thread Arunpravin Paneer Selvam
- Move roundup_power_of_two() to drm buddy file to support
  the new try harder mechanism for contiguous allocation.

- Move trim function call to drm_buddy_alloc_blocks() function.

Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 23 +++
 1 file changed, 3 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c 
b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index a1bc804cfa15..0d735d5c2b35 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -59,6 +59,9 @@ static int i915_ttm_buddy_man_alloc(struct 
ttm_resource_manager *man,
if (place->flags & TTM_PL_FLAG_TOPDOWN)
bman_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
 
+   if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+   bman_res->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+
if (place->fpfn || lpfn != man->size)
bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
 
@@ -72,18 +75,6 @@ static int i915_ttm_buddy_man_alloc(struct 
ttm_resource_manager *man,
GEM_BUG_ON(min_page_size < mm->chunk_size);
GEM_BUG_ON(!IS_ALIGNED(size, min_page_size));
 
-   if (place->fpfn + PFN_UP(bman_res->base.size) != place->lpfn &&
-   place->flags & TTM_PL_FLAG_CONTIGUOUS) {
-   unsigned long pages;
-
-   size = roundup_pow_of_two(size);
-   min_page_size = size;
-
-   pages = size >> ilog2(mm->chunk_size);
-   if (pages > lpfn)
-   lpfn = pages;
-   }
-
if (size > lpfn << PAGE_SHIFT) {
err = -E2BIG;
goto err_free_res;
@@ -107,14 +98,6 @@ static int i915_ttm_buddy_man_alloc(struct 
ttm_resource_manager *man,
if (unlikely(err))
goto err_free_blocks;
 
-   if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
-   u64 original_size = (u64)bman_res->base.size;
-
-   drm_buddy_block_trim(mm,
-original_size,
-_res->blocks);
-   }
-
if (lpfn <= bman->visible_size) {
bman_res->used_visible_size = PFN_UP(bman_res->base.size);
} else {
-- 
2.25.1



[PATCH v2 2/3] drm/amdgpu: Move the size computations to drm buddy

2023-09-09 Thread Arunpravin Paneer Selvam
- Move roundup_power_of_two() and IS_ALIGNED() computations to
  drm buddy file to support the new try harder mechanism for
  contiguous allocation.

- Move trim function call to drm_buddy_alloc_blocks() function.

Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 58 ++--
 1 file changed, 4 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index c7085a747b03..18f58efc9dc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -424,9 +424,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
   const struct ttm_place *place,
   struct ttm_resource **res)
 {
-   u64 vis_usage = 0, max_bytes, cur_size, min_block_size;
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
+   u64 vis_usage = 0, max_bytes, min_block_size;
struct amdgpu_vram_mgr_resource *vres;
u64 size, remaining_size, lpfn, fpfn;
struct drm_buddy *mm = >mm;
@@ -474,6 +474,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
if (place->flags & TTM_PL_FLAG_TOPDOWN)
vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
 
+   if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+   vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+
if (fpfn || lpfn != mgr->mm.size)
/* Allocate blocks in desired range */
vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
@@ -496,25 +499,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
!(size & (((u64)pages_per_block << PAGE_SHIFT) 
- 1)))
min_block_size = (u64)pages_per_block << PAGE_SHIFT;
 
-   cur_size = size;
-
-   if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) {
-   /*
-* Except for actual range allocation, modify the size 
and
-* min_block_size conforming to continuous flag 
enablement
-*/
-   if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
-   size = roundup_pow_of_two(size);
-   min_block_size = size;
-   /*
-* Modify the size value if size is not
-* aligned with min_block_size
-*/
-   } else if (!IS_ALIGNED(size, min_block_size)) {
-   size = round_up(size, min_block_size);
-   }
-   }
-
r = drm_buddy_alloc_blocks(mm, fpfn,
   lpfn,
   size,
@@ -531,40 +515,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
}
mutex_unlock(>lock);
 
-   if (cur_size != size) {
-   struct drm_buddy_block *block;
-   struct list_head *trim_list;
-   u64 original_size;
-   LIST_HEAD(temp);
-
-   trim_list = >blocks;
-   original_size = (u64)vres->base.size;
-
-   /*
-* If size value is rounded up to min_block_size, trim the last
-* block to the required size
-*/
-   if (!list_is_singular(>blocks)) {
-   block = list_last_entry(>blocks, typeof(*block), 
link);
-   list_move_tail(>link, );
-   trim_list = 
-   /*
-* Compute the original_size value by subtracting the
-* last block size with (aligned size - original size)
-*/
-   original_size = amdgpu_vram_mgr_block_size(block) - 
(size - cur_size);
-   }
-
-   mutex_lock(>lock);
-   drm_buddy_block_trim(mm,
-original_size,
-trim_list);
-   mutex_unlock(>lock);
-
-   if (!list_empty())
-   list_splice_tail(trim_list, >blocks);
-   }
-
vres->base.start = 0;
list_for_each_entry(block, >blocks, link) {
unsigned long start;
-- 
2.25.1



[PATCH v2 1/3] drm/buddy: Improve contiguous memory allocation

2023-09-09 Thread Arunpravin Paneer Selvam
Problem statement: The current method roundup_power_of_two()
to allocate contiguous address triggers -ENOSPC in some cases
even though we have enough free spaces and so to help with
that we introduce a try harder mechanism.

In case of -ENOSPC, the new try harder mechanism rounddown the
original size to power of 2 and iterating over the round down
sized freelist blocks to allocate the required size traversing
RHS and LHS.

As part of the above new method implementation we moved
contiguous/alignment size computation part and trim function
to the drm buddy file.

v2: Modify the alloc_range() function to return total allocated size
on -ENOSPC err and traverse RHS/LHS to allocate the required
size (Matthew).

Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/drm_buddy.c | 138 
 include/drm/drm_buddy.h |   6 +-
 2 files changed, 127 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 7098f125b54a..e909eed9cf60 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -480,10 +480,12 @@ alloc_from_freelist(struct drm_buddy *mm,
 static int __alloc_range(struct drm_buddy *mm,
 struct list_head *dfs,
 u64 start, u64 size,
-struct list_head *blocks)
+struct list_head *blocks,
+u64 *total_allocated_on_err)
 {
struct drm_buddy_block *block;
struct drm_buddy_block *buddy;
+   u64 total_allocated = 0;
LIST_HEAD(allocated);
u64 end;
int err;
@@ -520,6 +522,7 @@ static int __alloc_range(struct drm_buddy *mm,
}
 
mark_allocated(block);
+   total_allocated += drm_buddy_block_size(mm, block);
mm->avail -= drm_buddy_block_size(mm, block);
list_add_tail(>link, );
continue;
@@ -551,13 +554,20 @@ static int __alloc_range(struct drm_buddy *mm,
__drm_buddy_free(mm, block);
 
 err_free:
-   drm_buddy_free_list(mm, );
+   if (err == -ENOSPC && total_allocated_on_err) {
+   list_splice_tail(, blocks);
+   *total_allocated_on_err = total_allocated;
+   } else {
+   drm_buddy_free_list(mm, );
+   }
+
return err;
 }
 
 static int __drm_buddy_alloc_range(struct drm_buddy *mm,
   u64 start,
   u64 size,
+  u64 *total_allocated_on_err,
   struct list_head *blocks)
 {
LIST_HEAD(dfs);
@@ -566,7 +576,62 @@ static int __drm_buddy_alloc_range(struct drm_buddy *mm,
for (i = 0; i < mm->n_roots; ++i)
list_add_tail(>roots[i]->tmp_link, );
 
-   return __alloc_range(mm, , start, size, blocks);
+   return __alloc_range(mm, , start, size,
+blocks, total_allocated_on_err);
+}
+
+static int __alloc_contig_try_harder(struct drm_buddy *mm,
+u64 size,
+u64 min_block_size,
+struct list_head *blocks)
+{
+   u64 rhs_offset, lhs_offset, lhs_size, filled;
+   struct drm_buddy_block *block;
+   struct list_head *list;
+   LIST_HEAD(blocks_lhs);
+   unsigned long pages;
+   unsigned int order;
+   u64 modify_size;
+   int err;
+
+   modify_size = rounddown_pow_of_two(size);
+   pages = modify_size >> ilog2(mm->chunk_size);
+   order = fls(pages) - 1;
+   if (order == 0)
+   return -ENOSPC;
+
+   list = >free_list[order];
+   if (list_empty(list))
+   return -ENOSPC;
+
+   list_for_each_entry_reverse(block, list, link) {
+   /* Allocate blocks traversing RHS */
+   rhs_offset = drm_buddy_block_offset(block);
+   err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
+  , blocks);
+   if (!err || err != -ENOSPC)
+   return err;
+
+   lhs_size = max((size - filled), min_block_size);
+   if (!IS_ALIGNED(lhs_size, min_block_size))
+   lhs_size = round_up(lhs_size, min_block_size);
+
+   /* Allocate blocks traversing LHS */
+   lhs_offset = drm_buddy_block_offset(block) - lhs_size;
+   err =  __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
+  NULL, _lhs);
+   if (!err) {
+   list_splice(_lhs, blocks);
+   return 0;
+   } else if (err != -ENOSPC) {
+   drm_buddy_free_list(mm, blocks);
+   return err;
+  

Re: [PATCH 1/3] drm/buddy: Fix contiguous memory allocation issues

2023-08-23 Thread Arunpravin Paneer Selvam



On 22/08/23 22:52, Christian König wrote:

Am 21.08.23 um 13:16 schrieb Christian König:

Am 21.08.23 um 12:14 schrieb Arunpravin Paneer Selvam:

The way now contiguous requests are implemented such that
the size rounded up to power of 2 and the corresponding order
block picked from the freelist.

In addition to the older method, the new method will rounddown
the size to power of 2 and the corresponding order block picked
from the freelist. And for the remaining size we traverse the
tree and try to allocate either from the freelist block's buddy
or from the peer block. If the remaining size from peer/buddy
block is not free, we pick the next freelist block and repeat
the same method.


I think it's worth mentioning that Xinhui tried something similar a 
few month ago, but that didn't looked like it would work. For this 
here I'm more confident.


Of hand the implementation looks clean to me, but Matthew or others 
which have more background in how the implementation works need to 
take a look as well.


One more thing I've just noticed, not sure if Matthew already noted 
it: When you mention "fix" in the subject line people might try to 
backport it, better write "improve" and drop the "issues" at the end.


I will modify in the next version.

Thanks,
Arun.



Regards,
Christian.



Thanks,
Christian.



Moved contiguous/alignment size computation part and trim
function to the drm buddy manager.

Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/drm_buddy.c | 253 
++--

  include/drm/drm_buddy.h |   6 +-
  2 files changed, 248 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 7098f125b54a..220f60c08a03 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -569,6 +569,197 @@ static int __drm_buddy_alloc_range(struct 
drm_buddy *mm,

  return __alloc_range(mm, , start, size, blocks);
  }
  +static int __alloc_contiguous_block_from_buddy(struct drm_buddy *mm,
+   u64 size,
+   u64 min_block_size,
+   struct drm_buddy_block *block,
+   struct list_head *blocks)
+{
+    struct drm_buddy_block *buddy, *parent = NULL;
+    u64 start, offset = 0;
+    LIST_HEAD(dfs);
+    int err;
+
+    if (!block)
+    return -EINVAL;
+
+    buddy = __get_buddy(block);
+    if (!buddy)
+    return -ENOSPC;
+
+    if (drm_buddy_block_is_allocated(buddy))
+    return -ENOSPC;
+
+    parent = block->parent;
+    if (!parent)
+    return -ENOSPC;
+
+    if (block->parent->right == block) {
+    u64 remaining;
+
+    /* Compute the leftover size for allocation */
+    remaining = max((size - drm_buddy_block_size(mm, buddy)),
+    min_block_size);
+    if (!IS_ALIGNED(remaining, min_block_size))
+    remaining = round_up(remaining, min_block_size);
+
+    /* Check if remaining size is greater than buddy block size */
+    if (drm_buddy_block_size(mm, buddy) < remaining)
+    return -ENOSPC;
+
+    offset = drm_buddy_block_size(mm, buddy) - remaining;
+    }
+
+    list_add(>tmp_link, );
+    start = drm_buddy_block_offset(parent) + offset;
+
+    err = __alloc_range(mm, , start, size, blocks);
+    if (err)
+    return -ENOSPC;
+
+    return 0;
+}
+
+static int __alloc_contiguous_block_from_peer(struct drm_buddy *mm,
+  u64 size,
+  u64 min_block_size,
+  struct drm_buddy_block *block,
+  struct list_head *blocks)
+{
+    struct drm_buddy_block *first, *peer, *tmp;
+    struct drm_buddy_block *parent = NULL;
+    u64 start, offset = 0;
+    unsigned int order;
+    LIST_HEAD(dfs);
+    int err;
+
+    if (!block)
+    return -EINVAL;
+
+    order = drm_buddy_block_order(block);
+    /* Add freelist block to dfs list */
+    list_add(>tmp_link, );
+
+    tmp = block;
+    parent = block->parent;
+    while (parent) {
+    if (block->parent->left == block) {
+    if (parent->left != tmp) {
+    peer = parent->left;
+    break;
+    }
+    } else {
+    if (parent->right != tmp) {
+    peer = parent->right;
+    break;
+    }
+    }
+
+    tmp = parent;
+    parent = tmp->parent;
+    }
+
+    if (!parent)
+    return -ENOSPC;
+
+    do {
+    if (drm_buddy_block_is_allocated(peer))
+    return -ENOSPC;
+    /* Exit loop if peer block order is equal to block order */
+    if (drm_buddy_block_order(peer) == order)
+    break;
+
+    if (drm_buddy_block_is_split(peer)) {
+    /* Traverse down to the block order level */
+    if (block->parent->left == block)
+    peer = peer->right;
+

Re: [PATCH 1/3] drm/buddy: Fix contiguous memory allocation issues

2023-08-22 Thread Arunpravin Paneer Selvam



On 21/08/23 10:46, Matthew Auld wrote:

Hi,

On 21/08/2023 11:14, Arunpravin Paneer Selvam wrote:

The way now contiguous requests are implemented such that
the size rounded up to power of 2 and the corresponding order
block picked from the freelist.

In addition to the older method, the new method will rounddown
the size to power of 2 and the corresponding order block picked
from the freelist. And for the remaining size we traverse the
tree and try to allocate either from the freelist block's buddy
or from the peer block. If the remaining size from peer/buddy
block is not free, we pick the next freelist block and repeat
the same method.

Moved contiguous/alignment size computation part and trim
function to the drm buddy manager.


I think we should also mention somewhere what issue this is trying to 
solve. IIUC the roundup_power_of_two() might in some cases trigger 
-ENOSPC even though there might be enough free space, and so to help 
with that we introduce a try harder mechanism.
Yes, we are trying to solve the above issue. I will add the problem 
statement to the commit description.




Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/drm_buddy.c | 253 ++--
  include/drm/drm_buddy.h |   6 +-
  2 files changed, 248 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 7098f125b54a..220f60c08a03 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -569,6 +569,197 @@ static int __drm_buddy_alloc_range(struct 
drm_buddy *mm,

  return __alloc_range(mm, , start, size, blocks);
  }
  +static int __alloc_contiguous_block_from_buddy(struct drm_buddy *mm,
+   u64 size,
+   u64 min_block_size,
+   struct drm_buddy_block *block,
+   struct list_head *blocks)
+{
+    struct drm_buddy_block *buddy, *parent = NULL;
+    u64 start, offset = 0;
+    LIST_HEAD(dfs);
+    int err;
+
+    if (!block)
+    return -EINVAL;
+
+    buddy = __get_buddy(block);
+    if (!buddy)
+    return -ENOSPC;
+
+    if (drm_buddy_block_is_allocated(buddy))
+    return -ENOSPC;
+
+    parent = block->parent;
+    if (!parent)
+    return -ENOSPC;
+
+    if (block->parent->right == block) {
+    u64 remaining;
+
+    /* Compute the leftover size for allocation */
+    remaining = max((size - drm_buddy_block_size(mm, buddy)),
+    min_block_size);
+    if (!IS_ALIGNED(remaining, min_block_size))
+    remaining = round_up(remaining, min_block_size);
+
+    /* Check if remaining size is greater than buddy block size */
+    if (drm_buddy_block_size(mm, buddy) < remaining)
+    return -ENOSPC;
+
+    offset = drm_buddy_block_size(mm, buddy) - remaining;
+    }
+
+    list_add(>tmp_link, );
+    start = drm_buddy_block_offset(parent) + offset;
+
+    err = __alloc_range(mm, , start, size, blocks);
+    if (err)
+    return -ENOSPC;
+
+    return 0;
+}
+
+static int __alloc_contiguous_block_from_peer(struct drm_buddy *mm,
+  u64 size,
+  u64 min_block_size,
+  struct drm_buddy_block *block,
+  struct list_head *blocks)
+{
+    struct drm_buddy_block *first, *peer, *tmp;
+    struct drm_buddy_block *parent = NULL;
+    u64 start, offset = 0;
+    unsigned int order;
+    LIST_HEAD(dfs);
+    int err;
+
+    if (!block)
+    return -EINVAL;
+
+    order = drm_buddy_block_order(block);
+    /* Add freelist block to dfs list */
+    list_add(>tmp_link, );
+
+    tmp = block;
+    parent = block->parent;
+    while (parent) {
+    if (block->parent->left == block) {
+    if (parent->left != tmp) {
+    peer = parent->left;
+    break;
+    }
+    } else {
+    if (parent->right != tmp) {
+    peer = parent->right;
+    break;
+    }
+    }
+
+    tmp = parent;
+    parent = tmp->parent;
+    }
+
+    if (!parent)
+    return -ENOSPC;
+
+    do {
+    if (drm_buddy_block_is_allocated(peer))
+    return -ENOSPC;
+    /* Exit loop if peer block order is equal to block order */
+    if (drm_buddy_block_order(peer) == order)
+    break;
+
+    if (drm_buddy_block_is_split(peer)) {
+    /* Traverse down to the block order level */
+    if (block->parent->left == block)
+    peer = peer->right;
+    else
+    peer = peer->left;
+    } else {
+    break;
+    }
+    } while (1);
+
+    if (block->parent->left == block) {
+    u64 remaining;
+
+    /* Compute the leftover size for allocation */
+    remaining = max((size - drm_buddy_block_size(mm, block)),
+    min_blo

Re: [PATCH 0/4] drm/amdgpu: Explicitly add a flexible array at the end of 'struct amdgpu_bo_list' and simplify amdgpu_bo_list_create()

2023-08-21 Thread Arunpravin Paneer Selvam

Hi Christian,

I ran GLMark2, unigine heaven and Tomb Raider on steam. I didn't observe 
any issues.


Regards,
Arun.

On 8/21/2023 4:33 PM, Christian König wrote:

Am 20.08.23 um 11:51 schrieb Christophe JAILLET:

This serie simplifies amdgpu_bo_list_create() and usage of the 'struct
amdgpu_bo_list'.


Oh, yes please. That's something I always wanted to cleanup as well.


It is compile tested only.


That bothers me a bit. Arun, Vitaly, Shashank can anybody with time 
run a quick test on this? Preferable the IGT tests, but this just some 
game should probably do as well.


Apart from that looks good to me,
Christian.



Christophe JAILLET (4):
   drm/amdgpu: Explicitly add a flexible array at the end of 'struct
 amdgpu_bo_list'
   drm/amdgpu: Remove a redundant sanity check
   drm/amdgpu: Remove amdgpu_bo_list_array_entry()
   drm/amdgpu: Use kvzalloc() to simplify code

  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 15 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 18 ++
  2 files changed, 8 insertions(+), 25 deletions(-)







[PATCH 3/3] drm/i915: Remove the contiguous computation and trim

2023-08-21 Thread Arunpravin Paneer Selvam
As we have implemented a new method for contiguous allocation
which requires actual size and actual min_block_size, hence we
have moved the roundup and alignment size computation to buddy
allocator. This way gpu drivers pass the required size and
alignment to buddy allocator and rest of the operations taken
care by drm_buddy_alloc_blocks() function.

We have moved the trim function call to the drm_buddy_alloc_blocks()
function as we dont have the roundup or aligned size in our driver.

Now we have all in one place and it will be easy to manage in
buddy allocator.

Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 23 +++
 1 file changed, 3 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c 
b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index a1bc804cfa15..0d735d5c2b35 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -59,6 +59,9 @@ static int i915_ttm_buddy_man_alloc(struct 
ttm_resource_manager *man,
if (place->flags & TTM_PL_FLAG_TOPDOWN)
bman_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
 
+   if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+   bman_res->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+
if (place->fpfn || lpfn != man->size)
bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
 
@@ -72,18 +75,6 @@ static int i915_ttm_buddy_man_alloc(struct 
ttm_resource_manager *man,
GEM_BUG_ON(min_page_size < mm->chunk_size);
GEM_BUG_ON(!IS_ALIGNED(size, min_page_size));
 
-   if (place->fpfn + PFN_UP(bman_res->base.size) != place->lpfn &&
-   place->flags & TTM_PL_FLAG_CONTIGUOUS) {
-   unsigned long pages;
-
-   size = roundup_pow_of_two(size);
-   min_page_size = size;
-
-   pages = size >> ilog2(mm->chunk_size);
-   if (pages > lpfn)
-   lpfn = pages;
-   }
-
if (size > lpfn << PAGE_SHIFT) {
err = -E2BIG;
goto err_free_res;
@@ -107,14 +98,6 @@ static int i915_ttm_buddy_man_alloc(struct 
ttm_resource_manager *man,
if (unlikely(err))
goto err_free_blocks;
 
-   if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
-   u64 original_size = (u64)bman_res->base.size;
-
-   drm_buddy_block_trim(mm,
-original_size,
-_res->blocks);
-   }
-
if (lpfn <= bman->visible_size) {
bman_res->used_visible_size = PFN_UP(bman_res->base.size);
} else {
-- 
2.25.1



[PATCH 2/3] drm/amdgpu: Remove the contiguous computation and trim

2023-08-21 Thread Arunpravin Paneer Selvam
As we have implemented a new method for contiguous allocation
which requires actual size and actual min_block_size, hence we
have moved the roundup and alignment size computation to buddy
allocator. This way gpu drivers pass the required size and
alignment to buddy allocator and rest of the operations taken
care by drm_buddy_alloc_blocks() function.

We have moved the trim function call to the drm_buddy_alloc_blocks()
function as we dont have the roundup or aligned size in our driver.

Now we have all in one place and it will be easy to manage in
buddy allocator.

Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 58 ++--
 1 file changed, 4 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index c7085a747b03..18f58efc9dc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -424,9 +424,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
   const struct ttm_place *place,
   struct ttm_resource **res)
 {
-   u64 vis_usage = 0, max_bytes, cur_size, min_block_size;
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
+   u64 vis_usage = 0, max_bytes, min_block_size;
struct amdgpu_vram_mgr_resource *vres;
u64 size, remaining_size, lpfn, fpfn;
struct drm_buddy *mm = >mm;
@@ -474,6 +474,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
if (place->flags & TTM_PL_FLAG_TOPDOWN)
vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
 
+   if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+   vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+
if (fpfn || lpfn != mgr->mm.size)
/* Allocate blocks in desired range */
vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
@@ -496,25 +499,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
!(size & (((u64)pages_per_block << PAGE_SHIFT) 
- 1)))
min_block_size = (u64)pages_per_block << PAGE_SHIFT;
 
-   cur_size = size;
-
-   if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) {
-   /*
-* Except for actual range allocation, modify the size 
and
-* min_block_size conforming to continuous flag 
enablement
-*/
-   if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
-   size = roundup_pow_of_two(size);
-   min_block_size = size;
-   /*
-* Modify the size value if size is not
-* aligned with min_block_size
-*/
-   } else if (!IS_ALIGNED(size, min_block_size)) {
-   size = round_up(size, min_block_size);
-   }
-   }
-
r = drm_buddy_alloc_blocks(mm, fpfn,
   lpfn,
   size,
@@ -531,40 +515,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
}
mutex_unlock(>lock);
 
-   if (cur_size != size) {
-   struct drm_buddy_block *block;
-   struct list_head *trim_list;
-   u64 original_size;
-   LIST_HEAD(temp);
-
-   trim_list = >blocks;
-   original_size = (u64)vres->base.size;
-
-   /*
-* If size value is rounded up to min_block_size, trim the last
-* block to the required size
-*/
-   if (!list_is_singular(>blocks)) {
-   block = list_last_entry(>blocks, typeof(*block), 
link);
-   list_move_tail(>link, );
-   trim_list = 
-   /*
-* Compute the original_size value by subtracting the
-* last block size with (aligned size - original size)
-*/
-   original_size = amdgpu_vram_mgr_block_size(block) - 
(size - cur_size);
-   }
-
-   mutex_lock(>lock);
-   drm_buddy_block_trim(mm,
-original_size,
-trim_list);
-   mutex_unlock(>lock);
-
-   if (!list_empty())
-   list_splice_tail(trim_list, >blocks);
-   }
-
vres->base.start = 0;
list_for_each_entry(block, >blocks, link) {
unsigned long start;
-- 
2.25.1



[PATCH 1/3] drm/buddy: Fix contiguous memory allocation issues

2023-08-21 Thread Arunpravin Paneer Selvam
The way now contiguous requests are implemented such that
the size rounded up to power of 2 and the corresponding order
block picked from the freelist.

In addition to the older method, the new method will rounddown
the size to power of 2 and the corresponding order block picked
from the freelist. And for the remaining size we traverse the
tree and try to allocate either from the freelist block's buddy
or from the peer block. If the remaining size from peer/buddy
block is not free, we pick the next freelist block and repeat
the same method.

Moved contiguous/alignment size computation part and trim
function to the drm buddy manager.

Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/drm_buddy.c | 253 ++--
 include/drm/drm_buddy.h |   6 +-
 2 files changed, 248 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 7098f125b54a..220f60c08a03 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -569,6 +569,197 @@ static int __drm_buddy_alloc_range(struct drm_buddy *mm,
return __alloc_range(mm, , start, size, blocks);
 }
 
+static int __alloc_contiguous_block_from_buddy(struct drm_buddy *mm,
+  u64 size,
+  u64 min_block_size,
+  struct drm_buddy_block *block,
+  struct list_head *blocks)
+{
+   struct drm_buddy_block *buddy, *parent = NULL;
+   u64 start, offset = 0;
+   LIST_HEAD(dfs);
+   int err;
+
+   if (!block)
+   return -EINVAL;
+
+   buddy = __get_buddy(block);
+   if (!buddy)
+   return -ENOSPC;
+
+   if (drm_buddy_block_is_allocated(buddy))
+   return -ENOSPC;
+
+   parent = block->parent;
+   if (!parent)
+   return -ENOSPC;
+
+   if (block->parent->right == block) {
+   u64 remaining;
+
+   /* Compute the leftover size for allocation */
+   remaining = max((size - drm_buddy_block_size(mm, buddy)),
+   min_block_size);
+   if (!IS_ALIGNED(remaining, min_block_size))
+   remaining = round_up(remaining, min_block_size);
+
+   /* Check if remaining size is greater than buddy block size */
+   if (drm_buddy_block_size(mm, buddy) < remaining)
+   return -ENOSPC;
+
+   offset = drm_buddy_block_size(mm, buddy) - remaining;
+   }
+
+   list_add(>tmp_link, );
+   start = drm_buddy_block_offset(parent) + offset;
+
+   err = __alloc_range(mm, , start, size, blocks);
+   if (err)
+   return -ENOSPC;
+
+   return 0;
+}
+
+static int __alloc_contiguous_block_from_peer(struct drm_buddy *mm,
+ u64 size,
+ u64 min_block_size,
+ struct drm_buddy_block *block,
+ struct list_head *blocks)
+{
+   struct drm_buddy_block *first, *peer, *tmp;
+   struct drm_buddy_block *parent = NULL;
+   u64 start, offset = 0;
+   unsigned int order;
+   LIST_HEAD(dfs);
+   int err;
+
+   if (!block)
+   return -EINVAL;
+
+   order = drm_buddy_block_order(block);
+   /* Add freelist block to dfs list */
+   list_add(>tmp_link, );
+
+   tmp = block;
+   parent = block->parent;
+   while (parent) {
+   if (block->parent->left == block) {
+   if (parent->left != tmp) {
+   peer = parent->left;
+   break;
+   }
+   } else {
+   if (parent->right != tmp) {
+   peer = parent->right;
+   break;
+   }
+   }
+
+   tmp = parent;
+   parent = tmp->parent;
+   }
+
+   if (!parent)
+   return -ENOSPC;
+
+   do {
+   if (drm_buddy_block_is_allocated(peer))
+   return -ENOSPC;
+   /* Exit loop if peer block order is equal to block order */
+   if (drm_buddy_block_order(peer) == order)
+   break;
+
+   if (drm_buddy_block_is_split(peer)) {
+   /* Traverse down to the block order level */
+   if (block->parent->left == block)
+   peer = peer->right;
+   else
+   peer = peer->left;
+   } else {
+   break;
+   }
+   } while (1);
+
+   if (block-&

Re: [PATCH] drm/buddy: Fix drm buddy info output format

2023-08-07 Thread Arunpravin Paneer Selvam

looks good to me as well.

Reviewed-by: Arunpravin Paneer Selvam 

On 8/7/2023 12:17 PM, Christian König wrote:

Am 04.08.23 um 08:56 schrieb Ma Jun:

[1] Change pages to blocks to avoid confusion.
[2] Fix output format to align the output info.

Signed-off-by: Ma Jun 


Of hand looks good to me, but Arun should probably judge.

Christian.


---
  drivers/gpu/drm/drm_buddy.c | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 3d1f50f481cf..ef3dd15c334a 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -781,15 +781,15 @@ void drm_buddy_print(struct drm_buddy *mm, 
struct drm_printer *p)

  count++;
  }
  -    drm_printf(p, "order-%d ", order);
+    drm_printf(p, "order-%2d ", order);
    free = count * (mm->chunk_size << order);
  if (free < SZ_1M)
-    drm_printf(p, "free: %lluKiB", free >> 10);
+    drm_printf(p, "free: %8llu KiB", free >> 10);
  else
-    drm_printf(p, "free: %lluMiB", free >> 20);
+    drm_printf(p, "free: %8llu MiB", free >> 20);
  -    drm_printf(p, ", pages: %llu\n", count);
+    drm_printf(p, ", blocks: %llu\n", count);
  }
  }
  EXPORT_SYMBOL(drm_buddy_print);




Re: [PATCH RFC 2/2] drm: add documentation for drm_buddy_test kUnit test

2023-07-13 Thread Arunpravin Paneer Selvam
Looks good, Feel free to add Reviewed-by: Arunpravin Paneer Selvam 



Regards,
Arun.

On 7/13/2023 12:09 PM, Christian König wrote:

[Adding Arun]

Am 12.07.23 um 16:28 schrieb Mauro Carvalho Chehab:

As an example for the new documentation tool, add a documentation
for drm_buddy_test.

I opted to place this on a completely different directory, in order
to make easier to test the feature with:

$ make SPHINXDIRS="tests" htmldocs

Signed-off-by: Mauro Carvalho Chehab 


Acked-by: Christian König 

Arun please take a look as well.

Thanks,
Christian.


---

To avoid mailbombing on a large number of people, only mailing lists 
were C/C on the cover.
See [PATCH RFC 0/2] at: 
https://lore.kernel.org/all/cover.1689171160.git.mche...@kernel.org/


  Documentation/index.rst    |  2 +-
  Documentation/tests/index.rst  |  6 ++
  Documentation/tests/kunit.rst  |  5 +
  drivers/gpu/drm/tests/drm_buddy_test.c | 12 
  4 files changed, 24 insertions(+), 1 deletion(-)
  create mode 100644 Documentation/tests/index.rst
  create mode 100644 Documentation/tests/kunit.rst

diff --git a/Documentation/index.rst b/Documentation/index.rst
index 9dfdc826618c..80a6ce14a61a 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -60,7 +60,7 @@ Various other manuals with useful information for 
all kernel developers.

 fault-injection/index
 livepatch/index
 rust/index
-
+   test/index
    User-oriented documentation
  ===
diff --git a/Documentation/tests/index.rst 
b/Documentation/tests/index.rst

new file mode 100644
index ..bfc39eb5c0aa
--- /dev/null
+++ b/Documentation/tests/index.rst
@@ -0,0 +1,6 @@
+
+Kunit documentation test
+
+
+.. toctree::
+   kunit
diff --git a/Documentation/tests/kunit.rst 
b/Documentation/tests/kunit.rst

new file mode 100644
index ..6ffc151988a0
--- /dev/null
+++ b/Documentation/tests/kunit.rst
@@ -0,0 +1,5 @@
+Kunit tests
+---
+
+.. include-test:: drivers/gpu/drm/tests/drm_buddy_test.c
+
diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c

index 09ee6f6af896..dd6c5afd6cd6 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -737,6 +737,18 @@ static int drm_buddy_suite_init(struct 
kunit_suite *suite)

  return 0;
  }
  +/**
+ * KTEST_SUITE: set of tests for drm buddy alloc
+ * Scope: drm subsystem
+ * Mega feature: drm
+ * Feature: buddy_alloc
+ *
+ * KTEST_TEST: drm_test_buddy_alloc_%s
+ * Description: Run DRM buddy allocation %arg[1] test
+ *
+ * arg[1].values: limit, range, optimistic, smoke, pathological
+ */
+
  static struct kunit_case drm_buddy_tests[] = {
  KUNIT_CASE(drm_test_buddy_alloc_limit),
  KUNIT_CASE(drm_test_buddy_alloc_range),




[PATCH v2] drm: Optimize drm buddy top-down allocation method

2023-01-12 Thread Arunpravin Paneer Selvam
We are observing performance drop in many usecases which include
games, 3D benchmark applications,etc.. To solve this problem, We
are strictly not allowing top down flag enabled allocations to
steal the memory space from cpu visible region.

The idea is, we are sorting each order list entries in
ascending order and compare the last entry of each order
list in the freelist and return the max block.

This patch improves the 3D benchmark scores and solves
fragmentation issues.

All drm buddy selftests are verfied.
drm_buddy: pass:6 fail:0 skip:0 total:6

Signed-off-by: Arunpravin Paneer Selvam 
Acked-by: Christian König 
Acked-by: Alex Deucher 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/drm_buddy.c | 81 -
 1 file changed, 54 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 11bb59399471..3d1f50f481cf 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,6 +38,25 @@ static void drm_block_free(struct drm_buddy *mm,
kmem_cache_free(slab_blocks, block);
 }
 
+static void list_insert_sorted(struct drm_buddy *mm,
+  struct drm_buddy_block *block)
+{
+   struct drm_buddy_block *node;
+   struct list_head *head;
+
+   head = >free_list[drm_buddy_block_order(block)];
+   if (list_empty(head)) {
+   list_add(>link, head);
+   return;
+   }
+
+   list_for_each_entry(node, head, link)
+   if (drm_buddy_block_offset(block) < 
drm_buddy_block_offset(node))
+   break;
+
+   __list_add(>link, node->link.prev, >link);
+}
+
 static void mark_allocated(struct drm_buddy_block *block)
 {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -52,8 +71,7 @@ static void mark_free(struct drm_buddy *mm,
block->header &= ~DRM_BUDDY_HEADER_STATE;
block->header |= DRM_BUDDY_FREE;
 
-   list_add(>link,
->free_list[drm_buddy_block_order(block)]);
+   list_insert_sorted(mm, block);
 }
 
 static void mark_split(struct drm_buddy_block *block)
@@ -387,20 +405,26 @@ alloc_range_bias(struct drm_buddy *mm,
 }
 
 static struct drm_buddy_block *
-get_maxblock(struct list_head *head)
+get_maxblock(struct drm_buddy *mm, unsigned int order)
 {
struct drm_buddy_block *max_block = NULL, *node;
+   unsigned int i;
 
-   max_block = list_first_entry_or_null(head,
-struct drm_buddy_block,
-link);
-   if (!max_block)
-   return NULL;
+   for (i = order; i <= mm->max_order; ++i) {
+   if (!list_empty(>free_list[i])) {
+   node = list_last_entry(>free_list[i],
+  struct drm_buddy_block,
+  link);
+   if (!max_block) {
+   max_block = node;
+   continue;
+   }
 
-   list_for_each_entry(node, head, link) {
-   if (drm_buddy_block_offset(node) >
-   drm_buddy_block_offset(max_block))
-   max_block = node;
+   if (drm_buddy_block_offset(node) >
+   drm_buddy_block_offset(max_block)) {
+   max_block = node;
+   }
+   }
}
 
return max_block;
@@ -412,20 +436,23 @@ alloc_from_freelist(struct drm_buddy *mm,
unsigned long flags)
 {
struct drm_buddy_block *block = NULL;
-   unsigned int i;
+   unsigned int tmp;
int err;
 
-   for (i = order; i <= mm->max_order; ++i) {
-   if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
-   block = get_maxblock(>free_list[i]);
-   if (block)
-   break;
-   } else {
-   block = list_first_entry_or_null(>free_list[i],
-struct drm_buddy_block,
-link);
-   if (block)
-   break;
+   if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
+   block = get_maxblock(mm, order);
+   if (block)
+   /* Store the obtained block order */
+   tmp = drm_buddy_block_order(block);
+   } else {
+   for (tmp = order; tmp <= mm->max_order; ++tmp) {
+   if (!list_empty(>free_list[tmp])) {
+   block = list_last_entry(>free_list[tmp],
+   struct drm_buddy_block,
+

Re: [PATCH] drm: Alloc high address for drm buddy topdown flag

2023-01-10 Thread Arunpravin Paneer Selvam

Hi Matthew,

On 1/10/2023 5:32 PM, Matthew Auld wrote:

On 07/01/2023 15:15, Arunpravin Paneer Selvam wrote:

As we are observing low numbers in viewperf graphics benchmark, we
are strictly not allowing the top down flag enabled allocations
to steal the memory space from cpu visible region.

The approach is, we are sorting each order list entries in
ascending order and compare the last entry of each order
list in the freelist and return the max block.


Did you also run the selftests? Does everything still pass and 
complete in a reasonable amount of time?

I will try giving a run


This patch improves the viewperf 3D benchmark scores.

Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/drm_buddy.c | 81 -
  1 file changed, 54 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 11bb59399471..50916b2f2fc5 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,6 +38,25 @@ static void drm_block_free(struct drm_buddy *mm,
  kmem_cache_free(slab_blocks, block);
  }
  +static void list_insert_sorted(struct drm_buddy *mm,
+   struct drm_buddy_block *block)
+{
+    struct drm_buddy_block *node;
+    struct list_head *head;
+
+    head = >free_list[drm_buddy_block_order(block)];
+    if (list_empty(head)) {
+    list_add(>link, head);
+    return;
+    }
+
+    list_for_each_entry(node, head, link)
+    if (drm_buddy_block_offset(block) < 
drm_buddy_block_offset(node))

+    break;
+
+    __list_add(>link, node->link.prev, >link);
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
  block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -52,8 +71,7 @@ static void mark_free(struct drm_buddy *mm,
  block->header &= ~DRM_BUDDY_HEADER_STATE;
  block->header |= DRM_BUDDY_FREE;
  -    list_add(>link,
- >free_list[drm_buddy_block_order(block)]);
+    list_insert_sorted(mm, block);


One advantage of not sorting is when splitting down a large block. 
Previously the most-recently-split would be at the start of the list 
for the next order down, where potentially the next allocation could 
use it. So perhaps less fragmentation if it's all part of one BO. 
Otherwise I don't see any other downsides, other than the extra 
overhead of sorting.


Allocating from freelist is traversing through right side (i.e top most 
address range) and for TOPDOWN flag allocations we just split the top 
most large block once and the subsequent TOPDOWN low order allocations 
would get block from same already split large block
For the normal allocations, I modified to retrieve the blocks in each 
order list from the last entry which has the high probability of getting 
top most blocks as we have sorted the blocks in each order list.
Thus the bottom most large blocks are not frequently used, hence we have 
more space for the visible region on dGPU.


For APU which has small sized VRAM space, the allocations are now 
ordered and we don't allocate randomly from freelist solving 
fragmentation issues.

  }
    static void mark_split(struct drm_buddy_block *block)
@@ -387,20 +405,26 @@ alloc_range_bias(struct drm_buddy *mm,
  }
    static struct drm_buddy_block *
-get_maxblock(struct list_head *head)
+get_maxblock(struct drm_buddy *mm, unsigned int order)
  {
  struct drm_buddy_block *max_block = NULL, *node;
+    unsigned int i;
  -    max_block = list_first_entry_or_null(head,
- struct drm_buddy_block,
- link);
-    if (!max_block)
-    return NULL;
+    for (i = order; i <= mm->max_order; ++i) {
+    if (!list_empty(>free_list[i])) {
+    node = list_last_entry(>free_list[i],
+   struct drm_buddy_block,
+   link);
+    if (!max_block) {
+    max_block = node;
+    continue;
+    }
  -    list_for_each_entry(node, head, link) {
-    if (drm_buddy_block_offset(node) >
-    drm_buddy_block_offset(max_block))
-    max_block = node;
+    if (drm_buddy_block_offset(node) >
+    drm_buddy_block_offset(max_block)) {


Formatting doesn't look right here.

I will check.


Going to test this today with some workloads with small-bar and i915 
just to see if this improves/impacts anything for us.



+    max_block = node;
+    }
+    }
  }
    return max_block;
@@ -412,20 +436,23 @@ alloc_from_freelist(struct drm_buddy *mm,
  unsigned long flags)
  {
  struct drm_buddy_block *block = NULL;
-    unsigned int i;
+    unsigned int tmp;
  int err;
  -    for (i = order; i <= mm->max_order; ++i) {
-    if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
-    block = get_maxblock(>free_list[i]);
-    if (block)
-    break;
-   

[PATCH] drm: Alloc high address for drm buddy topdown flag

2023-01-07 Thread Arunpravin Paneer Selvam
As we are observing low numbers in viewperf graphics benchmark, we
are strictly not allowing the top down flag enabled allocations
to steal the memory space from cpu visible region.

The approach is, we are sorting each order list entries in
ascending order and compare the last entry of each order
list in the freelist and return the max block.

This patch improves the viewperf 3D benchmark scores.

Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/drm_buddy.c | 81 -
 1 file changed, 54 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 11bb59399471..50916b2f2fc5 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,6 +38,25 @@ static void drm_block_free(struct drm_buddy *mm,
kmem_cache_free(slab_blocks, block);
 }
 
+static void list_insert_sorted(struct drm_buddy *mm,
+  struct drm_buddy_block *block)
+{
+   struct drm_buddy_block *node;
+   struct list_head *head;
+
+   head = >free_list[drm_buddy_block_order(block)];
+   if (list_empty(head)) {
+   list_add(>link, head);
+   return;
+   }
+
+   list_for_each_entry(node, head, link)
+   if (drm_buddy_block_offset(block) < 
drm_buddy_block_offset(node))
+   break;
+
+   __list_add(>link, node->link.prev, >link);
+}
+
 static void mark_allocated(struct drm_buddy_block *block)
 {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -52,8 +71,7 @@ static void mark_free(struct drm_buddy *mm,
block->header &= ~DRM_BUDDY_HEADER_STATE;
block->header |= DRM_BUDDY_FREE;
 
-   list_add(>link,
->free_list[drm_buddy_block_order(block)]);
+   list_insert_sorted(mm, block);
 }
 
 static void mark_split(struct drm_buddy_block *block)
@@ -387,20 +405,26 @@ alloc_range_bias(struct drm_buddy *mm,
 }
 
 static struct drm_buddy_block *
-get_maxblock(struct list_head *head)
+get_maxblock(struct drm_buddy *mm, unsigned int order)
 {
struct drm_buddy_block *max_block = NULL, *node;
+   unsigned int i;
 
-   max_block = list_first_entry_or_null(head,
-struct drm_buddy_block,
-link);
-   if (!max_block)
-   return NULL;
+   for (i = order; i <= mm->max_order; ++i) {
+   if (!list_empty(>free_list[i])) {
+   node = list_last_entry(>free_list[i],
+  struct drm_buddy_block,
+  link);
+   if (!max_block) {
+   max_block = node;
+   continue;
+   }
 
-   list_for_each_entry(node, head, link) {
-   if (drm_buddy_block_offset(node) >
-   drm_buddy_block_offset(max_block))
-   max_block = node;
+   if (drm_buddy_block_offset(node) >
+   drm_buddy_block_offset(max_block)) {
+   max_block = node;
+   }
+   }
}
 
return max_block;
@@ -412,20 +436,23 @@ alloc_from_freelist(struct drm_buddy *mm,
unsigned long flags)
 {
struct drm_buddy_block *block = NULL;
-   unsigned int i;
+   unsigned int tmp;
int err;
 
-   for (i = order; i <= mm->max_order; ++i) {
-   if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
-   block = get_maxblock(>free_list[i]);
-   if (block)
-   break;
-   } else {
-   block = list_first_entry_or_null(>free_list[i],
-struct drm_buddy_block,
-link);
-   if (block)
-   break;
+   if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
+   block = get_maxblock(mm, order);
+   if (block)
+   /* Store the obtained block order */
+   tmp = drm_buddy_block_order(block);
+   } else {
+   for (tmp = order; tmp <= mm->max_order; ++tmp) {
+   if (!list_empty(>free_list[tmp])) {
+   block = list_last_entry(>free_list[tmp],
+   struct drm_buddy_block,
+   link);
+   if (block)
+   break;
+   }
}
}
 
@@ -434,18 +461,18 @@ alloc_from_freelist(struct drm_buddy *mm,
 
  

Re: [PATCH v3] drm: Optimise for continuous memory allocation

2022-11-28 Thread Arunpravin Paneer Selvam

Hi Xinhui,

On 11/28/2022 12:04 PM, xinhui pan wrote:

Currently drm-buddy does not have full knowledge of continuous memory.

Lets consider scenario below.
order 1:L   R
order 0: LL LR  RL  RR
for order 1 allocation, it can offer L or R or LR+RL.

For now, we only implement L or R case for continuous memory allocation.
So this patch aims to implement the LR+RL case.

Signed-off-by: xinhui pan 
---
change from v2:
search continuous block in nearby root if needed

change from v1:
implement top-down continuous allocation
---
  drivers/gpu/drm/drm_buddy.c | 78 +
  1 file changed, 71 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 11bb59399471..ff58eb3136d2 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -386,6 +386,58 @@ alloc_range_bias(struct drm_buddy *mm,
return ERR_PTR(err);
  }
  
+static struct drm_buddy_block *

+find_continuous_blocks(struct drm_buddy *mm,
+  int order,
+  unsigned long flags,
+  struct drm_buddy_block **rn)
+{
+   struct list_head *head = >free_list[order];
+   struct drm_buddy_block *node, *parent, *free_node, *max_node = NULL;
NIT: We usually name the variable as *block or ***_block for drm buddy 
and we have *node or ***_node for drm mm manager.

+   int i;
+
+   list_for_each_entry(free_node, head, link) {
+   if (max_node) {
+   if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION))
+   break;
+
+   if (drm_buddy_block_offset(free_node) <
+   drm_buddy_block_offset(max_node))
+   continue;
+   }
+
+   parent = free_node;
+   do {
+   node = parent;
+   parent = parent->parent;
+   } while (parent && parent->right == node);
+
+   if (!parent) {
+   for (i = 0; i < mm->n_roots - 1; i++)
+   if (mm->roots[i] == node)
+   break;
+   if (i == mm->n_roots - 1)
+   continue;
+   node = mm->roots[i + 1];
+   } else {
+   node = parent->right;
+   }
+
+   while (drm_buddy_block_is_split(node))
+   node = node->left;
+
+   if (drm_buddy_block_is_free(node) &&
+   drm_buddy_block_order(node) == order) {
+   *rn = node;
+   max_node = free_node;
+   BUG_ON(drm_buddy_block_offset(node) !=
+   drm_buddy_block_offset(max_node) +
+   drm_buddy_block_size(mm, max_node));
+   }
+   }
+   return max_node;
+}
+
  static struct drm_buddy_block *
  get_maxblock(struct list_head *head)
  {
@@ -637,7 +689,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
   struct list_head *blocks,
   unsigned long flags)
  {
-   struct drm_buddy_block *block = NULL;
+   struct drm_buddy_block *block = NULL, *rblock = NULL;
unsigned int min_order, order;
unsigned long pages;
LIST_HEAD(allocated);
@@ -689,17 +741,29 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
break;
  
  			if (order-- == min_order) {

+   if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) &&
+   min_order != 0 && pages == BIT(order + 1)) {
+   block = find_continuous_blocks(mm,
+  order,
+  flags,
+  );
+   if (block)
+   break;
+   }
err = -ENOSPC;
goto err_free;
}
} while (1);
  
-		mark_allocated(block);

-   mm->avail -= drm_buddy_block_size(mm, block);
-   kmemleak_update_trace(block);
-   list_add_tail(>link, );
-
-   pages -= BIT(order);
+   do {
+   mark_allocated(block);
+   mm->avail -= drm_buddy_block_size(mm, block);
+   kmemleak_update_trace(block);
+   list_add_tail(>link, );
+   pages -= BIT(order);
+   block = rblock;
+   rblock = NULL;
+   } while (block);
I think with this 

Re: [PATCH 1/9] drm/amdgpu: generally allow over-commit during BO allocation

2022-11-27 Thread Arunpravin Paneer Selvam

Hi Christian,

Looks good to me.
Reviewed-by: Arunpravin Paneer Selvam 
for the series.

Regards,
Arun.

On 11/25/2022 3:51 PM, Christian König wrote:

We already fallback to a dummy BO with no backing store when we
allocate GDS,GWS and OA resources and to GTT when we allocate VRAM.

Drop all those workarounds and generalize this for GTT as well. This
fixes ENOMEM issues with runaway applications which try to allocate/free
GTT in a loop and are otherwise only limited by the CPU speed.

The CS will wait for the cleanup of freed up BOs to satisfy the
various domain specific limits and so effectively throttle those
buggy applications down to a sane allocation behavior again.

Signed-off-by: Christian König
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c| 16 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  6 +-
  2 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index a0780a4e3e61..62e98f1ad770 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -113,7 +113,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, 
unsigned long size,
bp.resv = resv;
bp.preferred_domain = initial_domain;
bp.flags = flags;
-   bp.domain = initial_domain;
+   bp.domain = initial_domain | AMDGPU_GEM_DOMAIN_CPU;
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
  
  	r = amdgpu_bo_create_user(adev, , );

@@ -332,20 +332,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void 
*data,
}
  
  	initial_domain = (u32)(0x & args->in.domains);

-retry:
r = amdgpu_gem_object_create(adev, size, args->in.alignment,
-initial_domain,
-flags, ttm_bo_type_device, resv, );
+initial_domain, flags, ttm_bo_type_device,
+resv, );
if (r && r != -ERESTARTSYS) {
-   if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
-   flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-   goto retry;
-   }
-
-   if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
-   initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
-   goto retry;
-   }
DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, 
%d)\n",
size, initial_domain, args->in.alignment, r);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 974e85d8b6cc..919bbea2e3ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -581,11 +581,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
  
  	bo->tbo.bdev = >mman.bdev;

-   if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
- AMDGPU_GEM_DOMAIN_GDS))
-   amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
-   else
-   amdgpu_bo_placement_from_domain(bo, bp->domain);
+   amdgpu_bo_placement_from_domain(bo, bp->domain);
if (bp->type == ttm_bo_type_kernel)
bo->tbo.priority = 1;
  


Re: [PATCH 2/2] drm/gem: Remove BUG_ON in drm_gem_private_object_init

2022-11-21 Thread Arunpravin Paneer Selvam




On 11/22/2022 10:48 AM, Somalapuram, Amaranath wrote:


On 11/16/2022 2:50 PM, Arunpravin Paneer Selvam wrote:

Hi Amar,

On 11/16/2022 2:20 PM, Somalapuram Amaranath wrote:

ttm_resource allocate size in bytes i.e less than page size.

Signed-off-by: Somalapuram Amaranath 
---
  drivers/gpu/drm/drm_gem.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index b8db675e7fb5..a346e3b7f9a8 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -152,7 +152,7 @@ EXPORT_SYMBOL(drm_gem_object_init);
  void drm_gem_private_object_init(struct drm_device *dev,
   struct drm_gem_object *obj, size_t size)
  {
-    BUG_ON((size & (PAGE_SIZE - 1)) != 0);
+    //BUG_ON((size & (PAGE_SIZE - 1)) != 0);

This line is added by mistake?


No this breaks when the size is less than page size.

Now we support size in bytes.


I see the line BUG_ON is commented out


Regards,
Arun

    obj->dev = dev;
  obj->filp = NULL;






Re: [PATCH 2/2] drm/gem: Remove BUG_ON in drm_gem_private_object_init

2022-11-16 Thread Arunpravin Paneer Selvam

Hi Amar,

On 11/16/2022 2:20 PM, Somalapuram Amaranath wrote:

ttm_resource allocate size in bytes i.e less than page size.

Signed-off-by: Somalapuram Amaranath 
---
  drivers/gpu/drm/drm_gem.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index b8db675e7fb5..a346e3b7f9a8 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -152,7 +152,7 @@ EXPORT_SYMBOL(drm_gem_object_init);
  void drm_gem_private_object_init(struct drm_device *dev,
 struct drm_gem_object *obj, size_t size)
  {
-   BUG_ON((size & (PAGE_SIZE - 1)) != 0);
+   //BUG_ON((size & (PAGE_SIZE - 1)) != 0);

This line is added by mistake?

Regards,
Arun
  
  	obj->dev = dev;

obj->filp = NULL;




Re: [git pull] drm fixes for 6.1-rc1

2022-10-17 Thread Arunpravin Paneer Selvam

Hi Christian,

Looks like we have to exit the loop if there are no blocks to compare.
May be that's why the function returns false.

@Arthur Marsh Could you please test the attached patch.

Thanks,
Arun

On 10/17/2022 1:39 PM, Christian König wrote:

Am 17.10.22 um 10:01 schrieb Dave Airlie:
On Mon, 17 Oct 2022 at 17:07, Christian König 
 wrote:

Hi Arun,

the hw generation doesn't matter. This error message here:

amdgpu: Move buffer fallback to memcpy unavailable

indicates that the detection of linear buffers still doesn't work as
expected or that we have a bug somewhere else.

Maybe the limiting when SDMA moves are not available isn't working
correctly?
It is a CAPE_VERDE, so maybe something with the SI UVD memory 
limitations?


Yeah, good point. Could be that we try to move something into the UVD 
memory window and that something isn't allocated linearly.


Arun can you trace the allocation and make sure that all kernel 
allocations have the CONTIGUOUS flag set?


Thanks,
Christian.



Dave.


From 132ce83f893eaea743fb7f41a9dc72afea52cdaa Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam 
Date: Mon, 17 Oct 2022 13:15:21 -0700
Subject: [PATCH] drm/amdgpu: Fix for BO move issue

If there are no blocks to compare then exit
the loop.

Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index dc262d2c2925..57277b1cf183 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -439,6 +439,9 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev,
while (cursor.remaining) {
amdgpu_res_next(, cursor.size);
 
+   if (!cursor.remaining)
+   break;
+
/* ttm_resource_ioremap only supports contiguous memory */
if (end != cursor.start)
return false;
-- 
2.25.1



Re: [git pull] drm fixes for 6.1-rc1

2022-10-17 Thread Arunpravin Paneer Selvam

Hi Arthur,

Is this old radeon card?

Thanks,
Arun

On 10/17/2022 11:50 AM, Christian König wrote:

Arun please take a look into this ASAP.

Thanks,
Christian.

Am 17.10.22 um 03:13 schrieb Arthur Marsh:
Thanks Dave, I reverted patch 
312b4dc11d4f74bfe03ea25ffe04c1f2fdd13cb9 against 6.1-rc1 and the 
resulting kernel loaded amdgpu fine on my pc with Cape Verde GPU.


Regards,

Arthur.

On 17 October 2022 8:14:18 am ACDT, Dave Airlie  
wrote:

On Sun, 16 Oct 2022 at 18:09, Arthur Marsh
 wrote:

From: Arthur Marsh 

Hi, the "drm fixes for 6.1-rc1" commit caused the amdgpu module to 
fail

with my Cape Verde radeonsi card.

I haven't been able to bisect the problem to an individual commit, but
attach a dmesg extract below.

I'm happy to supply any other configuration information and test 
patches.



Can you try reverting: it's the only think I can spot that might
affect a card that old since most changes in that request were for
display hw you don't have.

ommit 312b4dc11d4f74bfe03ea25ffe04c1f2fdd13cb9
Author: Arunpravin Paneer Selvam 
Date:   Tue Oct 4 07:33:39 2022 -0700

    drm/amdgpu: Fix VRAM BO swap issue

    DRM buddy manager allocates the contiguous memory requests in
    a single block or multiple blocks. So for the ttm move operation
    (incase of low vram memory) we should consider all the blocks to
    compute the total memory size which compared with the struct
    ttm_resource num_pages in order to verify that the blocks are
    contiguous for the eviction process.

    v2: Added a Fixes tag
    v3: Rewrite the code to save a bit of calculations and
    variables (Christian)

    Fixes: c9cad937c0c5 ("drm/amdgpu: add drm buddy support to amdgpu")
    Signed-off-by: Arunpravin Paneer Selvam 


    Reviewed-by: Christian König 
    Signed-off-by: Alex Deucher 


Thanks,
Dave.


Arthur.

  Linux version 6.0.0+ (root@am64) (gcc-12 (Debian 12.2.0-5) 
12.2.0, GNU ld (GNU Binutils for Debian) 2.39) #5179 SMP 
PREEMPT_DYNAMIC Fri Oct 14 17:00:40 ACDT 2022
  Command line: BOOT_IMAGE=/vmlinuz-6.0.0+ 
root=UUID=39706f53-7c27-4310-b22a-36c7b042d1a1 ro single 
amdgpu.audio=1 amdgpu.si_support=1 radeon.si_support=0 
page_owner=on amdgpu.gpu_recovery=1

...

  [drm] amdgpu kernel modesetting enabled.
  amdgpu :01:00.0: vgaarb: deactivate vga console
  Console: switching to colour dummy device 80x25
  [drm] initializing kernel modesetting (VERDE 0x1002:0x682B 
0x1458:0x22CA 0x87).

  [drm] register mmio base: 0xFE8C
  [drm] register mmio size: 262144
  [drm] add ip block number 0 
  [drm] add ip block number 1 
  [drm] add ip block number 2 
  [drm] add ip block number 3 
  [drm] add ip block number 4 
  [drm] add ip block number 5 
  [drm] add ip block number 6 
  [drm] add ip block number 7 
  [drm] BIOS signature incorrect 5b 7
  resource sanity check: requesting [mem 0x000c-0x000d], 
which spans more than PCI Bus :00 [mem 0x000d-0x000d 
window]

  caller pci_map_rom+0x68/0x1b0 mapping multiple BARs
  amdgpu :01:00.0: No more image in the PCI ROM
  amdgpu :01:00.0: amdgpu: Fetched VBIOS from ROM BAR
  amdgpu: ATOM BIOS: xxx-xxx-xxx
  amdgpu :01:00.0: amdgpu: Trusted Memory Zone (TMZ) feature 
not supported

  amdgpu :01:00.0: amdgpu: PCIE atomic ops is not supported
  [drm] PCIE gen 2 link speeds already enabled
  [drm] vm size is 64 GB, 2 levels, block size is 10-bit, fragment 
size is 9-bit
  RTL8211B Gigabit Ethernet r8169-0-300:00: attached PHY driver 
(mii_bus:phy_addr=r8169-0-300:00, irq=MAC)

  r8169 :03:00.0 eth0: Link is Down
  amdgpu :01:00.0: amdgpu: VRAM: 2048M 0x00F4 - 
0x00F47FFF (2048M used)
  amdgpu :01:00.0: amdgpu: GART: 1024M 0x00FF - 
0x00FF3FFF

  [drm] Detected VRAM RAM=2048M, BAR=256M
  [drm] RAM width 128bits DDR3
  [drm] amdgpu: 2048M of VRAM memory ready
  [drm] amdgpu: 3979M of GTT memory ready.
  [drm] GART: num cpu pages 262144, num gpu pages 262144
  amdgpu :01:00.0: amdgpu: PCIE GART of 1024M enabled (table at 
0x00F400A0).

  [drm] Internal thermal controller with fan control
  [drm] amdgpu: dpm initialized
  [drm] AMDGPU Display Connectors
  [drm] Connector 0:
  [drm]   HDMI-A-1
  [drm]   HPD1
  [drm]   DDC: 0x194c 0x194c 0x194d 0x194d 0x194e 0x194e 0x194f 0x194f
  [drm]   Encoders:
  [drm] DFP1: INTERNAL_UNIPHY
  [drm] Connector 1:
  [drm]   DVI-D-1
  [drm]   HPD2
  [drm]   DDC: 0x1950 0x1950 0x1951 0x1951 0x1952 0x1952 0x1953 0x1953
  [drm]   Encoders:
  [drm] DFP2: INTERNAL_UNIPHY
  [drm] Connector 2:
  [drm]   VGA-1
  [drm]   DDC: 0x1970 0x1970 0x1971 0x1971 0x1972 0x1972 0x1973 0x1973
  [drm]   Encoders:
  [drm] CRT1: INTERNAL_KLDSCP_DAC1
  [drm] Found UVD firmware Version: 64.0 Family ID: 13
  amdgpu: Move buffer fallback to memcpy unavailable
  [drm:amdgpu_device_init.cold [amdgpu]] *ERROR* sw_init of IP 
block  failed -19

  amdgpu :01:00.0: amdgpu: amdgpu_device_ip_init failed
  amdgpu :01:00.0: amdgpu: Fatal e

Re: [PATCH v6 1/6] drm/ttm: Add new callbacks to ttm res mgr

2022-09-08 Thread Arunpravin Paneer Selvam




On 9/8/2022 11:34 AM, Christian König wrote:

Am 07.09.22 um 19:00 schrieb Daniel Vetter:

[SNIP]

I'm a bit confused why the bloat here ...
Drivers do have specialized implementations of the backend, e.g. 
VMWGFX have
his handle backend, amdgpu the VRAM backend with special placements, 
i915 is

completely special as well.

Here we only move the decision if resources intersect or are 
compatible into

those specialized backends. Previously we had all this in a centralized
callback for all backends of a driver.

See the switch in amdgpu_ttm_bo_eviction_valuable() for an example. 
Final
goal is to move all this stuff into the specialized backends and 
remove this

callback.

The only driver where I couldn't figure out why we have duplicated 
all this

from the standard implementation is Nouveau.

Yeah I didn't read this too carefully, apologies.

Also please document new callbacks precisely with inline kerneldoc. 
I know

ttm docs aren't great yet, but they don't get better if we don't start
somewhere. I think the in-depth comments for modeset vfuncs (e.g. in
drm_modeset_helper_vtables.h) are a good standard here.

I thought we already did that. Please be a bit more specific.

Yeah rushed this too, but the kerneldoc isn't too great yet. It's
definitely not formatted correctly (you can't do a full function
definition in a struct unfortunately, see the examples I linked). And it
would be good to specificy what the default implementation is, that 
there
is one (i.e. the hook is optional) and when exactly a driver would 
want to
overwrite this. Atm it's a one-liner that explains exactly as much as 
you

can guess from the function interface anyway, that's not super userful.
-Daniel


Arun can you take care of improving this?


Hi Christian,
Yes, I will check on this.

Thanks,
Arun


Thanks,
Christian.






Thanks,
Christian.


-Daniel


Thanks,
Arun

Regards,
Christian.







[PATCH v6 4/6] drm/i915: Implement intersect/compatible functions

2022-08-20 Thread Arunpravin Paneer Selvam
Implemented a new intersect and compatible callback function
fetching start offset from drm buddy allocator.

v3: move the bits that are specific to buddy_man (Matthew)
v4: consider the block size /range (Matthew)

Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
Reviewed-by: Matthew Auld 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   | 41 +--
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 73 +++
 2 files changed, 74 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 5a5cf332d8a5..bc9c432edffe 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -361,7 +361,6 @@ static bool i915_ttm_eviction_valuable(struct 
ttm_buffer_object *bo,
   const struct ttm_place *place)
 {
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-   struct ttm_resource *res = bo->resource;
 
if (!obj)
return false;
@@ -378,45 +377,7 @@ static bool i915_ttm_eviction_valuable(struct 
ttm_buffer_object *bo,
if (!i915_gem_object_evictable(obj))
return false;
 
-   switch (res->mem_type) {
-   case I915_PL_LMEM0: {
-   struct ttm_resource_manager *man =
-   ttm_manager_type(bo->bdev, res->mem_type);
-   struct i915_ttm_buddy_resource *bman_res =
-   to_ttm_buddy_resource(res);
-   struct drm_buddy *mm = bman_res->mm;
-   struct drm_buddy_block *block;
-
-   if (!place->fpfn && !place->lpfn)
-   return true;
-
-   GEM_BUG_ON(!place->lpfn);
-
-   /*
-* If we just want something mappable then we can quickly check
-* if the current victim resource is using any of the CPU
-* visible portion.
-*/
-   if (!place->fpfn &&
-   place->lpfn == i915_ttm_buddy_man_visible_size(man))
-   return bman_res->used_visible_size > 0;
-
-   /* Real range allocation */
-   list_for_each_entry(block, _res->blocks, link) {
-   unsigned long fpfn =
-   drm_buddy_block_offset(block) >> PAGE_SHIFT;
-   unsigned long lpfn = fpfn +
-   (drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
-
-   if (place->fpfn < lpfn && place->lpfn > fpfn)
-   return true;
-   }
-   return false;
-   } default:
-   break;
-   }
-
-   return true;
+   return ttm_bo_eviction_valuable(bo, place);
 }
 
 static void i915_ttm_evict_flags(struct ttm_buffer_object *bo,
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c 
b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 427de1aaab36..e19452f0e100 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -173,6 +173,77 @@ static void i915_ttm_buddy_man_free(struct 
ttm_resource_manager *man,
kfree(bman_res);
 }
 
+static bool i915_ttm_buddy_man_intersects(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+   struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
+   struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
+   struct drm_buddy *mm = >mm;
+   struct drm_buddy_block *block;
+
+   if (!place->fpfn && !place->lpfn)
+   return true;
+
+   GEM_BUG_ON(!place->lpfn);
+
+   /*
+* If we just want something mappable then we can quickly check
+* if the current victim resource is using any of the CPU
+* visible portion.
+*/
+   if (!place->fpfn &&
+   place->lpfn == i915_ttm_buddy_man_visible_size(man))
+   return bman_res->used_visible_size > 0;
+
+   /* Check each drm buddy block individually */
+   list_for_each_entry(block, _res->blocks, link) {
+   unsigned long fpfn =
+   drm_buddy_block_offset(block) >> PAGE_SHIFT;
+   unsigned long lpfn = fpfn +
+   (drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+
+   if (place->fpfn < lpfn && place->lpfn > fpfn)
+   return true;
+   }
+
+   return false;
+}
+
+static bool i915_ttm_buddy_man_compatible(struct ttm_resource_manager *man,
+   

[PATCH v6 2/6] drm/ttm: Implement intersect/compatible functions

2022-08-20 Thread Arunpravin Paneer Selvam
Implemented a new intersect and compatible callback functions
to ttm range manager fetching start offset from drm mm range
allocator.

Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_range_manager.c | 33 +
 1 file changed, 33 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_range_manager.c 
b/drivers/gpu/drm/ttm/ttm_range_manager.c
index d91666721dc6..4cfef2b3514d 100644
--- a/drivers/gpu/drm/ttm/ttm_range_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_range_manager.c
@@ -113,6 +113,37 @@ static void ttm_range_man_free(struct ttm_resource_manager 
*man,
kfree(node);
 }
 
+static bool ttm_range_man_intersects(struct ttm_resource_manager *man,
+struct ttm_resource *res,
+const struct ttm_place *place,
+size_t size)
+{
+   struct drm_mm_node *node = _ttm_range_mgr_node(res)->mm_nodes[0];
+   u32 num_pages = PFN_UP(size);
+
+   /* Don't evict BOs outside of the requested placement range */
+   if (place->fpfn >= (node->start + num_pages) ||
+   (place->lpfn && place->lpfn <= node->start))
+   return false;
+
+   return true;
+}
+
+static bool ttm_range_man_compatible(struct ttm_resource_manager *man,
+struct ttm_resource *res,
+const struct ttm_place *place,
+size_t size)
+{
+   struct drm_mm_node *node = _ttm_range_mgr_node(res)->mm_nodes[0];
+   u32 num_pages = PFN_UP(size);
+
+   if (node->start < place->fpfn ||
+   (place->lpfn && (node->start + num_pages) > place->lpfn))
+   return false;
+
+   return true;
+}
+
 static void ttm_range_man_debug(struct ttm_resource_manager *man,
struct drm_printer *printer)
 {
@@ -126,6 +157,8 @@ static void ttm_range_man_debug(struct ttm_resource_manager 
*man,
 static const struct ttm_resource_manager_func ttm_range_manager_func = {
.alloc = ttm_range_man_alloc,
.free = ttm_range_man_free,
+   .intersects = ttm_range_man_intersects,
+   .compatible = ttm_range_man_compatible,
.debug = ttm_range_man_debug
 };
 
-- 
2.25.1



[PATCH v6 3/6] drm/amdgpu: Implement intersect/compatible functions

2022-08-20 Thread Arunpravin Paneer Selvam
Implemented a new intersect and compatible callback function
fetching start offset from backend drm buddy allocator.

Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c  | 38 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 68 
 2 files changed, 106 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 8c6b2284cf56..1f3302aebeff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -204,6 +204,42 @@ void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr)
amdgpu_gart_invalidate_tlb(adev);
 }
 
+/**
+ * amdgpu_gtt_mgr_intersects - test for intersection
+ *
+ * @man: Our manager object
+ * @res: The resource to test
+ * @place: The place for the new allocation
+ * @size: The size of the new allocation
+ *
+ * Simplified intersection test, only interesting if we need GART or not.
+ */
+static bool amdgpu_gtt_mgr_intersects(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+   return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res);
+}
+
+/**
+ * amdgpu_gtt_mgr_compatible - test for compatibility
+ *
+ * @man: Our manager object
+ * @res: The resource to test
+ * @place: The place for the new allocation
+ * @size: The size of the new allocation
+ *
+ * Simplified compatibility test.
+ */
+static bool amdgpu_gtt_mgr_compatible(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+   return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res);
+}
+
 /**
  * amdgpu_gtt_mgr_debug - dump VRAM table
  *
@@ -225,6 +261,8 @@ static void amdgpu_gtt_mgr_debug(struct 
ttm_resource_manager *man,
 static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = {
.alloc = amdgpu_gtt_mgr_new,
.free = amdgpu_gtt_mgr_del,
+   .intersects = amdgpu_gtt_mgr_intersects,
+   .compatible = amdgpu_gtt_mgr_compatible,
.debug = amdgpu_gtt_mgr_debug
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 28ec5f8ac1c1..d1a2619fa89f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -720,6 +720,72 @@ uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr 
*mgr)
return atomic64_read(>vis_usage);
 }
 
+/**
+ * amdgpu_vram_mgr_intersects - test each drm buddy block for intersection
+ *
+ * @man: TTM memory type manager
+ * @res: The resource to test
+ * @place: The place to test against
+ * @size: Size of the new allocation
+ *
+ * Test each drm buddy block for intersection for eviction decision.
+ */
+static bool amdgpu_vram_mgr_intersects(struct ttm_resource_manager *man,
+  struct ttm_resource *res,
+  const struct ttm_place *place,
+  size_t size)
+{
+   struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
+   struct drm_buddy_block *block;
+
+   /* Check each drm buddy block individually */
+   list_for_each_entry(block, >blocks, link) {
+   unsigned long fpfn =
+   amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
+   unsigned long lpfn = fpfn +
+   (amdgpu_vram_mgr_block_size(block) >> PAGE_SHIFT);
+
+   if (place->fpfn < lpfn &&
+   (place->lpfn && place->lpfn > fpfn))
+   return true;
+   }
+
+   return false;
+}
+
+/**
+ * amdgpu_vram_mgr_compatible - test each drm buddy block for compatibility
+ *
+ * @man: TTM memory type manager
+ * @res: The resource to test
+ * @place: The place to test against
+ * @size: Size of the new allocation
+ *
+ * Test each drm buddy block for placement compatibility.
+ */
+static bool amdgpu_vram_mgr_compatible(struct ttm_resource_manager *man,
+  struct ttm_resource *res,
+  const struct ttm_place *place,
+  size_t size)
+{
+   struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
+   struct drm_buddy_block *block;
+
+   /* Check each drm buddy block individually */
+   list_for_each_entry(block, >blocks, link) {
+   unsigned long fpfn =
+   amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
+   unsigned long lpfn = 

[PATCH v6 6/6] drm/ttm: Switch to using the new res callback

2022-08-20 Thread Arunpravin Paneer Selvam
Apply new intersect and compatible callback instead
of having a generic placement range verfications.

v2: Added a separate callback for compatiblilty
checks (Christian)
v3: Cleanups and removal of workarounds

Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 45 +++--
 drivers/gpu/drm/ttm/ttm_resource.c  | 17 ++
 2 files changed, 15 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 170935c294f5..7d25a10395c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1328,11 +1328,12 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device 
*adev, struct ttm_tt *ttm,
 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
const struct ttm_place *place)
 {
-   unsigned long num_pages = bo->resource->num_pages;
struct dma_resv_iter resv_cursor;
-   struct amdgpu_res_cursor cursor;
struct dma_fence *f;
 
+   if (!amdgpu_bo_is_amdgpu_bo(bo))
+   return ttm_bo_eviction_valuable(bo, place);
+
/* Swapout? */
if (bo->resource->mem_type == TTM_PL_SYSTEM)
return true;
@@ -1351,40 +1352,20 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct 
ttm_buffer_object *bo,
return false;
}
 
-   switch (bo->resource->mem_type) {
-   case AMDGPU_PL_PREEMPT:
-   /* Preemptible BOs don't own system resources managed by the
-* driver (pages, VRAM, GART space). They point to resources
-* owned by someone else (e.g. pageable memory in user mode
-* or a DMABuf). They are used in a preemptible context so we
-* can guarantee no deadlocks and good QoS in case of MMU
-* notifiers or DMABuf move notifiers from the resource owner.
-*/
+   /* Preemptible BOs don't own system resources managed by the
+* driver (pages, VRAM, GART space). They point to resources
+* owned by someone else (e.g. pageable memory in user mode
+* or a DMABuf). They are used in a preemptible context so we
+* can guarantee no deadlocks and good QoS in case of MMU
+* notifiers or DMABuf move notifiers from the resource owner.
+*/
+   if (bo->resource->mem_type == AMDGPU_PL_PREEMPT)
return false;
-   case TTM_PL_TT:
-   if (amdgpu_bo_is_amdgpu_bo(bo) &&
-   amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
-   return false;
-   return true;
 
-   case TTM_PL_VRAM:
-   /* Check each drm MM node individually */
-   amdgpu_res_first(bo->resource, 0, (u64)num_pages << PAGE_SHIFT,
-);
-   while (cursor.remaining) {
-   if (place->fpfn < PFN_DOWN(cursor.start + cursor.size)
-   && !(place->lpfn &&
-place->lpfn <= PFN_DOWN(cursor.start)))
-   return true;
-
-   amdgpu_res_next(, cursor.size);
-   }
+   if (bo->resource->mem_type == TTM_PL_TT &&
+   amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
return false;
 
-   default:
-   break;
-   }
-
return ttm_bo_eviction_valuable(bo, place);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 0d1f862a582b..a729c32a1e48 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -276,17 +276,9 @@ bool ttm_resource_intersects(struct ttm_device *bdev,
if (!res)
return false;
 
-   if (!place)
-   return true;
-
man = ttm_manager_type(bdev, res->mem_type);
-   if (!man->func->intersects) {
-   if (place->fpfn >= (res->start + res->num_pages) ||
-   (place->lpfn && place->lpfn <= res->start))
-   return false;
-
+   if (!place || !man->func->intersects)
return true;
-   }
 
return man->func->intersects(man, res, place, size);
 }
@@ -314,13 +306,8 @@ bool ttm_resource_compatible(struct ttm_device *bdev,
return false;
 
man = ttm_manager_type(bdev, res->mem_type);
-   if (!man->func->compatible) {
-   if (res->start < place->fpfn ||
-   (place->lpfn && (res->start + res->num_pages) > 
place->lpfn))
-   return false;
-
+   if (!man->func->compatible)
return true;
-   }
 
return man->func->compatible(man, res, place, size);
 }
-- 
2.25.1



[PATCH v6 5/6] drm/nouveau: Implement intersect/compatible functions

2022-08-20 Thread Arunpravin Paneer Selvam
Implemented a new intersect and compatible callback function
fetching the start offset from struct ttm_resource.

Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/nouveau/nouveau_mem.c | 29 +++
 drivers/gpu/drm/nouveau/nouveau_mem.h |  6 ++
 drivers/gpu/drm/nouveau/nouveau_ttm.c | 24 ++
 3 files changed, 59 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c 
b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 2e517cdc24c9..76f8edefa637 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -187,3 +187,32 @@ nouveau_mem_new(struct nouveau_cli *cli, u8 kind, u8 comp,
*res = >base;
return 0;
 }
+
+bool
+nouveau_mem_intersects(struct ttm_resource *res,
+  const struct ttm_place *place,
+  size_t size)
+{
+   u32 num_pages = PFN_UP(size);
+
+   /* Don't evict BOs outside of the requested placement range */
+   if (place->fpfn >= (res->start + num_pages) ||
+   (place->lpfn && place->lpfn <= res->start))
+   return false;
+
+   return true;
+}
+
+bool
+nouveau_mem_compatible(struct ttm_resource *res,
+  const struct ttm_place *place,
+  size_t size)
+{
+   u32 num_pages = PFN_UP(size);
+
+   if (res->start < place->fpfn ||
+   (place->lpfn && (res->start + num_pages) > place->lpfn))
+   return false;
+
+   return true;
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.h 
b/drivers/gpu/drm/nouveau/nouveau_mem.h
index 325551eba5cd..1ee6cdb9ad9b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.h
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.h
@@ -25,6 +25,12 @@ int nouveau_mem_new(struct nouveau_cli *, u8 kind, u8 comp,
struct ttm_resource **);
 void nouveau_mem_del(struct ttm_resource_manager *man,
 struct ttm_resource *);
+bool nouveau_mem_intersects(struct ttm_resource *res,
+   const struct ttm_place *place,
+   size_t size);
+bool nouveau_mem_compatible(struct ttm_resource *res,
+   const struct ttm_place *place,
+   size_t size);
 int nouveau_mem_vram(struct ttm_resource *, bool contig, u8 page);
 int nouveau_mem_host(struct ttm_resource *, struct ttm_tt *);
 void nouveau_mem_fini(struct nouveau_mem *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c 
b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 85f1f5a0fe5d..9602c30928f2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -42,6 +42,24 @@ nouveau_manager_del(struct ttm_resource_manager *man,
nouveau_mem_del(man, reg);
 }
 
+static bool
+nouveau_manager_intersects(struct ttm_resource_manager *man,
+  struct ttm_resource *res,
+  const struct ttm_place *place,
+  size_t size)
+{
+   return nouveau_mem_intersects(res, place, size);
+}
+
+static bool
+nouveau_manager_compatible(struct ttm_resource_manager *man,
+  struct ttm_resource *res,
+  const struct ttm_place *place,
+  size_t size)
+{
+   return nouveau_mem_compatible(res, place, size);
+}
+
 static int
 nouveau_vram_manager_new(struct ttm_resource_manager *man,
 struct ttm_buffer_object *bo,
@@ -73,6 +91,8 @@ nouveau_vram_manager_new(struct ttm_resource_manager *man,
 const struct ttm_resource_manager_func nouveau_vram_manager = {
.alloc = nouveau_vram_manager_new,
.free = nouveau_manager_del,
+   .intersects = nouveau_manager_intersects,
+   .compatible = nouveau_manager_compatible,
 };
 
 static int
@@ -97,6 +117,8 @@ nouveau_gart_manager_new(struct ttm_resource_manager *man,
 const struct ttm_resource_manager_func nouveau_gart_manager = {
.alloc = nouveau_gart_manager_new,
.free = nouveau_manager_del,
+   .intersects = nouveau_manager_intersects,
+   .compatible = nouveau_manager_compatible,
 };
 
 static int
@@ -130,6 +152,8 @@ nv04_gart_manager_new(struct ttm_resource_manager *man,
 const struct ttm_resource_manager_func nv04_gart_manager = {
.alloc = nv04_gart_manager_new,
.free = nouveau_manager_del,
+   .intersects = nouveau_manager_intersects,
+   .compatible = nouveau_manager_compatible,
 };
 
 static int
-- 
2.25.1



[PATCH v6 1/6] drm/ttm: Add new callbacks to ttm res mgr

2022-08-20 Thread Arunpravin Paneer Selvam
We are adding two new callbacks to ttm resource manager
function to handle intersection and compatibility of
placement and resources.

v2: move the amdgpu and ttm_range_manager changes to
separate patches (Christian)
v3: rename "intersect" to "intersects" (Matthew)
v4: move !place check to the !res if and return false
in ttm_resource_compatible() function (Christian)
v5: move bits of code from patch number 6 to avoid
temporary driver breakup (Christian)

Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
Reviewed-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo.c   |  9 ++--
 drivers/gpu/drm/ttm/ttm_resource.c | 77 +-
 include/drm/ttm/ttm_resource.h | 40 
 3 files changed, 119 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index c1bd006a5525..f066e8124c50 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -518,6 +518,9 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo,
 bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
  const struct ttm_place *place)
 {
+   struct ttm_resource *res = bo->resource;
+   struct ttm_device *bdev = bo->bdev;
+
dma_resv_assert_held(bo->base.resv);
if (bo->resource->mem_type == TTM_PL_SYSTEM)
return true;
@@ -525,11 +528,7 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
/* Don't evict this BO if it's outside of the
 * requested placement range
 */
-   if (place->fpfn >= (bo->resource->start + bo->resource->num_pages) ||
-   (place->lpfn && place->lpfn <= bo->resource->start))
-   return false;
-
-   return true;
+   return ttm_resource_intersects(bdev, res, place, bo->base.size);
 }
 EXPORT_SYMBOL(ttm_bo_eviction_valuable);
 
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 20f9adcc3235..0d1f862a582b 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -253,10 +253,84 @@ void ttm_resource_free(struct ttm_buffer_object *bo, 
struct ttm_resource **res)
 }
 EXPORT_SYMBOL(ttm_resource_free);
 
+/**
+ * ttm_resource_intersects - test for intersection
+ *
+ * @bdev: TTM device structure
+ * @res: The resource to test
+ * @place: The placement to test
+ * @size: How many bytes the new allocation needs.
+ *
+ * Test if @res intersects with @place and @size. Used for testing if evictions
+ * are valueable or not.
+ *
+ * Returns true if the res placement intersects with @place and @size.
+ */
+bool ttm_resource_intersects(struct ttm_device *bdev,
+struct ttm_resource *res,
+const struct ttm_place *place,
+size_t size)
+{
+   struct ttm_resource_manager *man;
+
+   if (!res)
+   return false;
+
+   if (!place)
+   return true;
+
+   man = ttm_manager_type(bdev, res->mem_type);
+   if (!man->func->intersects) {
+   if (place->fpfn >= (res->start + res->num_pages) ||
+   (place->lpfn && place->lpfn <= res->start))
+   return false;
+
+   return true;
+   }
+
+   return man->func->intersects(man, res, place, size);
+}
+
+/**
+ * ttm_resource_compatible - test for compatibility
+ *
+ * @bdev: TTM device structure
+ * @res: The resource to test
+ * @place: The placement to test
+ * @size: How many bytes the new allocation needs.
+ *
+ * Test if @res compatible with @place and @size.
+ *
+ * Returns true if the res placement compatible with @place and @size.
+ */
+bool ttm_resource_compatible(struct ttm_device *bdev,
+struct ttm_resource *res,
+const struct ttm_place *place,
+size_t size)
+{
+   struct ttm_resource_manager *man;
+
+   if (!res || !place)
+   return false;
+
+   man = ttm_manager_type(bdev, res->mem_type);
+   if (!man->func->compatible) {
+   if (res->start < place->fpfn ||
+   (place->lpfn && (res->start + res->num_pages) > 
place->lpfn))
+   return false;
+
+   return true;
+   }
+
+   return man->func->compatible(man, res, place, size);
+}
+
 static bool ttm_resource_places_compat(struct ttm_resource *res,
   const struct ttm_place *places,
   unsigned num_placement)
 {
+   struct ttm_buffer_object *bo = res->bo;
+   struct ttm_device *bdev = bo->bdev;
unsigned i;
 
if (res->placement & TTM_PL_FLAG_TEMPOR

Re: [PATCH v6 1/6] drm/ttm: Add new callbacks to ttm res mgr

2022-08-15 Thread Arunpravin Paneer Selvam




On 8/15/2022 4:35 PM, Christian König wrote:

Am 12.08.22 um 15:30 schrieb Arunpravin Paneer Selvam:

We are adding two new callbacks to ttm resource manager
function to handle intersection and compatibility of
placement and resources.

v2: move the amdgpu and ttm_range_manager changes to
 separate patches (Christian)
v3: rename "intersect" to "intersects" (Matthew)
v4: move !place check to the !res if and return false
 in ttm_resource_compatible() function (Christian)
v5: move bits of code from patch number 6 to avoid
 temporary driver breakup (Christian)

Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 



Patch #6 could still be cleaned up more now that we have the 
workaround code in patch #1, but that not really a must have.


Reviewed-by: Christian König  for the entire 
series.


Do you already have commit rights?


Hi Christian,
I applied for drm-misc commit rights, waiting for the project 
maintainers to approve my request.


Thanks,
Arun


Regards,
Christian.


---
  drivers/gpu/drm/ttm/ttm_bo.c   |  9 ++--
  drivers/gpu/drm/ttm/ttm_resource.c | 77 +-
  include/drm/ttm/ttm_resource.h | 40 
  3 files changed, 119 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index c1bd006a5525..f066e8124c50 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -518,6 +518,9 @@ static int ttm_bo_evict(struct ttm_buffer_object 
*bo,

  bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
    const struct ttm_place *place)
  {
+    struct ttm_resource *res = bo->resource;
+    struct ttm_device *bdev = bo->bdev;
+
  dma_resv_assert_held(bo->base.resv);
  if (bo->resource->mem_type == TTM_PL_SYSTEM)
  return true;
@@ -525,11 +528,7 @@ bool ttm_bo_eviction_valuable(struct 
ttm_buffer_object *bo,

  /* Don't evict this BO if it's outside of the
   * requested placement range
   */
-    if (place->fpfn >= (bo->resource->start + 
bo->resource->num_pages) ||

-    (place->lpfn && place->lpfn <= bo->resource->start))
-    return false;
-
-    return true;
+    return ttm_resource_intersects(bdev, res, place, bo->base.size);
  }
  EXPORT_SYMBOL(ttm_bo_eviction_valuable);
  diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c

index 20f9adcc3235..0d1f862a582b 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -253,10 +253,84 @@ void ttm_resource_free(struct ttm_buffer_object 
*bo, struct ttm_resource **res)

  }
  EXPORT_SYMBOL(ttm_resource_free);
  +/**
+ * ttm_resource_intersects - test for intersection
+ *
+ * @bdev: TTM device structure
+ * @res: The resource to test
+ * @place: The placement to test
+ * @size: How many bytes the new allocation needs.
+ *
+ * Test if @res intersects with @place and @size. Used for testing 
if evictions

+ * are valueable or not.
+ *
+ * Returns true if the res placement intersects with @place and @size.
+ */
+bool ttm_resource_intersects(struct ttm_device *bdev,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+    struct ttm_resource_manager *man;
+
+    if (!res)
+    return false;
+
+    if (!place)
+    return true;
+
+    man = ttm_manager_type(bdev, res->mem_type);
+    if (!man->func->intersects) {
+    if (place->fpfn >= (res->start + res->num_pages) ||
+    (place->lpfn && place->lpfn <= res->start))
+    return false;
+
+    return true;
+    }
+
+    return man->func->intersects(man, res, place, size);
+}
+
+/**
+ * ttm_resource_compatible - test for compatibility
+ *
+ * @bdev: TTM device structure
+ * @res: The resource to test
+ * @place: The placement to test
+ * @size: How many bytes the new allocation needs.
+ *
+ * Test if @res compatible with @place and @size.
+ *
+ * Returns true if the res placement compatible with @place and @size.
+ */
+bool ttm_resource_compatible(struct ttm_device *bdev,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+    struct ttm_resource_manager *man;
+
+    if (!res || !place)
+    return false;
+
+    man = ttm_manager_type(bdev, res->mem_type);
+    if (!man->func->compatible) {
+    if (res->start < place->fpfn ||
+    (place->lpfn && (res->start + res->num_pages) > 
place->lpfn))

+    return false;
+
+    return true;
+    }
+
+    return man->func->compatible(man, res, place, size);
+}
+
  static bool ttm_resource_places_compat(struct ttm_resource *res,
 const struct ttm_place *places,
 unsigned 

Re: [bug report] drm/ttm: Fix dummy res NULL ptr deref bug

2022-08-14 Thread Arunpravin Paneer Selvam

Hi Dan,

On 8/11/2022 5:26 PM, Dan Carpenter wrote:

On Thu, Aug 11, 2022 at 04:36:33PM +0530, Arunpravin Paneer Selvam wrote:

Hi Dan,

drm-misc-fixes doesn't have the updated ttm_bo.c file, we have the updated
ttm_bo.c version in
drm-misc-next branch. Please find below for the line number 907.

On 8/11/2022 3:25 PM, Dan Carpenter wrote:

Hello Arunpravin Paneer Selvam,

This is a semi-automatic email about new static checker warnings.

The patch cf4b7387c0a8: "drm/ttm: Fix dummy res NULL ptr deref bug"
from Aug 9, 2022, leads to the following Smatch complaint:

  drivers/gpu/drm/ttm/ttm_bo.c:915 ttm_bo_validate()
  warn: variable dereferenced before check 'bo->resource' (see line 907)

drivers/gpu/drm/ttm/ttm_bo.c
 906 */
 907if (!ttm_resource_compat(bo->resource, placement)) {
   
Unchecked dereference here inside the function.

|if (!bo->resource || !ttm_resource_compat(bo->resource, placement)) { we
have this version in drm-misc-next Regards, Arun |


Huh...  That's very interesting.  It appears there was a bug in
drm-misc-next, we applied the fix to the wrong tree, and now both trees
are wrong.  The drm-misc-next tree still has the bug and the other tree
has a static checker warning about nonsensical NULL checks.

Eventually drm-misc-next will get merged and everything will work.  Is
it too late to remove the bogus "CC: sta...@vger.kernel.org"?

I will look into this problem.

This could have been avoided if the NULL dereference fix had a Fixes tag.

I should have added the below tag
Fixes: 347987a2cf0d ("drm/ttm: rename and cleanup ttm_bo_init")

I will check on this.

Thanks,
Arun


regards,
dan carpenter





[PATCH v6 6/6] drm/ttm: Switch to using the new res callback

2022-08-12 Thread Arunpravin Paneer Selvam
Apply new intersect and compatible callback instead
of having a generic placement range verfications.

v2: Added a separate callback for compatiblilty
checks (Christian)
v3: Cleanups and removal of workarounds

Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 45 +++--
 drivers/gpu/drm/ttm/ttm_resource.c  | 17 ++
 2 files changed, 15 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 170935c294f5..7d25a10395c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1328,11 +1328,12 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device 
*adev, struct ttm_tt *ttm,
 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
const struct ttm_place *place)
 {
-   unsigned long num_pages = bo->resource->num_pages;
struct dma_resv_iter resv_cursor;
-   struct amdgpu_res_cursor cursor;
struct dma_fence *f;
 
+   if (!amdgpu_bo_is_amdgpu_bo(bo))
+   return ttm_bo_eviction_valuable(bo, place);
+
/* Swapout? */
if (bo->resource->mem_type == TTM_PL_SYSTEM)
return true;
@@ -1351,40 +1352,20 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct 
ttm_buffer_object *bo,
return false;
}
 
-   switch (bo->resource->mem_type) {
-   case AMDGPU_PL_PREEMPT:
-   /* Preemptible BOs don't own system resources managed by the
-* driver (pages, VRAM, GART space). They point to resources
-* owned by someone else (e.g. pageable memory in user mode
-* or a DMABuf). They are used in a preemptible context so we
-* can guarantee no deadlocks and good QoS in case of MMU
-* notifiers or DMABuf move notifiers from the resource owner.
-*/
+   /* Preemptible BOs don't own system resources managed by the
+* driver (pages, VRAM, GART space). They point to resources
+* owned by someone else (e.g. pageable memory in user mode
+* or a DMABuf). They are used in a preemptible context so we
+* can guarantee no deadlocks and good QoS in case of MMU
+* notifiers or DMABuf move notifiers from the resource owner.
+*/
+   if (bo->resource->mem_type == AMDGPU_PL_PREEMPT)
return false;
-   case TTM_PL_TT:
-   if (amdgpu_bo_is_amdgpu_bo(bo) &&
-   amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
-   return false;
-   return true;
 
-   case TTM_PL_VRAM:
-   /* Check each drm MM node individually */
-   amdgpu_res_first(bo->resource, 0, (u64)num_pages << PAGE_SHIFT,
-);
-   while (cursor.remaining) {
-   if (place->fpfn < PFN_DOWN(cursor.start + cursor.size)
-   && !(place->lpfn &&
-place->lpfn <= PFN_DOWN(cursor.start)))
-   return true;
-
-   amdgpu_res_next(, cursor.size);
-   }
+   if (bo->resource->mem_type == TTM_PL_TT &&
+   amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
return false;
 
-   default:
-   break;
-   }
-
return ttm_bo_eviction_valuable(bo, place);
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 0d1f862a582b..a729c32a1e48 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -276,17 +276,9 @@ bool ttm_resource_intersects(struct ttm_device *bdev,
if (!res)
return false;
 
-   if (!place)
-   return true;
-
man = ttm_manager_type(bdev, res->mem_type);
-   if (!man->func->intersects) {
-   if (place->fpfn >= (res->start + res->num_pages) ||
-   (place->lpfn && place->lpfn <= res->start))
-   return false;
-
+   if (!place || !man->func->intersects)
return true;
-   }
 
return man->func->intersects(man, res, place, size);
 }
@@ -314,13 +306,8 @@ bool ttm_resource_compatible(struct ttm_device *bdev,
return false;
 
man = ttm_manager_type(bdev, res->mem_type);
-   if (!man->func->compatible) {
-   if (res->start < place->fpfn ||
-   (place->lpfn && (res->start + res->num_pages) > 
place->lpfn))
-   return false;
-
+   if (!man->func->compatible)
return true;
-   }
 
return man->func->compatible(man, res, place, size);
 }
-- 
2.25.1



[PATCH v6 5/6] drm/nouveau: Implement intersect/compatible functions

2022-08-12 Thread Arunpravin Paneer Selvam
Implemented a new intersect and compatible callback function
fetching the start offset from struct ttm_resource.

Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
---
 drivers/gpu/drm/nouveau/nouveau_mem.c | 29 +++
 drivers/gpu/drm/nouveau/nouveau_mem.h |  6 ++
 drivers/gpu/drm/nouveau/nouveau_ttm.c | 24 ++
 3 files changed, 59 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c 
b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 2e517cdc24c9..76f8edefa637 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -187,3 +187,32 @@ nouveau_mem_new(struct nouveau_cli *cli, u8 kind, u8 comp,
*res = >base;
return 0;
 }
+
+bool
+nouveau_mem_intersects(struct ttm_resource *res,
+  const struct ttm_place *place,
+  size_t size)
+{
+   u32 num_pages = PFN_UP(size);
+
+   /* Don't evict BOs outside of the requested placement range */
+   if (place->fpfn >= (res->start + num_pages) ||
+   (place->lpfn && place->lpfn <= res->start))
+   return false;
+
+   return true;
+}
+
+bool
+nouveau_mem_compatible(struct ttm_resource *res,
+  const struct ttm_place *place,
+  size_t size)
+{
+   u32 num_pages = PFN_UP(size);
+
+   if (res->start < place->fpfn ||
+   (place->lpfn && (res->start + num_pages) > place->lpfn))
+   return false;
+
+   return true;
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.h 
b/drivers/gpu/drm/nouveau/nouveau_mem.h
index 325551eba5cd..1ee6cdb9ad9b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.h
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.h
@@ -25,6 +25,12 @@ int nouveau_mem_new(struct nouveau_cli *, u8 kind, u8 comp,
struct ttm_resource **);
 void nouveau_mem_del(struct ttm_resource_manager *man,
 struct ttm_resource *);
+bool nouveau_mem_intersects(struct ttm_resource *res,
+   const struct ttm_place *place,
+   size_t size);
+bool nouveau_mem_compatible(struct ttm_resource *res,
+   const struct ttm_place *place,
+   size_t size);
 int nouveau_mem_vram(struct ttm_resource *, bool contig, u8 page);
 int nouveau_mem_host(struct ttm_resource *, struct ttm_tt *);
 void nouveau_mem_fini(struct nouveau_mem *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c 
b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 85f1f5a0fe5d..9602c30928f2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -42,6 +42,24 @@ nouveau_manager_del(struct ttm_resource_manager *man,
nouveau_mem_del(man, reg);
 }
 
+static bool
+nouveau_manager_intersects(struct ttm_resource_manager *man,
+  struct ttm_resource *res,
+  const struct ttm_place *place,
+  size_t size)
+{
+   return nouveau_mem_intersects(res, place, size);
+}
+
+static bool
+nouveau_manager_compatible(struct ttm_resource_manager *man,
+  struct ttm_resource *res,
+  const struct ttm_place *place,
+  size_t size)
+{
+   return nouveau_mem_compatible(res, place, size);
+}
+
 static int
 nouveau_vram_manager_new(struct ttm_resource_manager *man,
 struct ttm_buffer_object *bo,
@@ -73,6 +91,8 @@ nouveau_vram_manager_new(struct ttm_resource_manager *man,
 const struct ttm_resource_manager_func nouveau_vram_manager = {
.alloc = nouveau_vram_manager_new,
.free = nouveau_manager_del,
+   .intersects = nouveau_manager_intersects,
+   .compatible = nouveau_manager_compatible,
 };
 
 static int
@@ -97,6 +117,8 @@ nouveau_gart_manager_new(struct ttm_resource_manager *man,
 const struct ttm_resource_manager_func nouveau_gart_manager = {
.alloc = nouveau_gart_manager_new,
.free = nouveau_manager_del,
+   .intersects = nouveau_manager_intersects,
+   .compatible = nouveau_manager_compatible,
 };
 
 static int
@@ -130,6 +152,8 @@ nv04_gart_manager_new(struct ttm_resource_manager *man,
 const struct ttm_resource_manager_func nv04_gart_manager = {
.alloc = nv04_gart_manager_new,
.free = nouveau_manager_del,
+   .intersects = nouveau_manager_intersects,
+   .compatible = nouveau_manager_compatible,
 };
 
 static int
-- 
2.25.1



[PATCH v6 4/6] drm/i915: Implement intersect/compatible functions

2022-08-12 Thread Arunpravin Paneer Selvam
Implemented a new intersect and compatible callback function
fetching start offset from drm buddy allocator.

v3: move the bits that are specific to buddy_man (Matthew)
v4: consider the block size /range (Matthew)

Signed-off-by: Christian König 
Signed-off-by: Arunpravin Paneer Selvam 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c   | 41 +--
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 73 +++
 2 files changed, 74 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 5a5cf332d8a5..bc9c432edffe 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -361,7 +361,6 @@ static bool i915_ttm_eviction_valuable(struct 
ttm_buffer_object *bo,
   const struct ttm_place *place)
 {
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-   struct ttm_resource *res = bo->resource;
 
if (!obj)
return false;
@@ -378,45 +377,7 @@ static bool i915_ttm_eviction_valuable(struct 
ttm_buffer_object *bo,
if (!i915_gem_object_evictable(obj))
return false;
 
-   switch (res->mem_type) {
-   case I915_PL_LMEM0: {
-   struct ttm_resource_manager *man =
-   ttm_manager_type(bo->bdev, res->mem_type);
-   struct i915_ttm_buddy_resource *bman_res =
-   to_ttm_buddy_resource(res);
-   struct drm_buddy *mm = bman_res->mm;
-   struct drm_buddy_block *block;
-
-   if (!place->fpfn && !place->lpfn)
-   return true;
-
-   GEM_BUG_ON(!place->lpfn);
-
-   /*
-* If we just want something mappable then we can quickly check
-* if the current victim resource is using any of the CPU
-* visible portion.
-*/
-   if (!place->fpfn &&
-   place->lpfn == i915_ttm_buddy_man_visible_size(man))
-   return bman_res->used_visible_size > 0;
-
-   /* Real range allocation */
-   list_for_each_entry(block, _res->blocks, link) {
-   unsigned long fpfn =
-   drm_buddy_block_offset(block) >> PAGE_SHIFT;
-   unsigned long lpfn = fpfn +
-   (drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
-
-   if (place->fpfn < lpfn && place->lpfn > fpfn)
-   return true;
-   }
-   return false;
-   } default:
-   break;
-   }
-
-   return true;
+   return ttm_bo_eviction_valuable(bo, place);
 }
 
 static void i915_ttm_evict_flags(struct ttm_buffer_object *bo,
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c 
b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 427de1aaab36..e19452f0e100 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -173,6 +173,77 @@ static void i915_ttm_buddy_man_free(struct 
ttm_resource_manager *man,
kfree(bman_res);
 }
 
+static bool i915_ttm_buddy_man_intersects(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+ const struct ttm_place *place,
+ size_t size)
+{
+   struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
+   struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
+   struct drm_buddy *mm = >mm;
+   struct drm_buddy_block *block;
+
+   if (!place->fpfn && !place->lpfn)
+   return true;
+
+   GEM_BUG_ON(!place->lpfn);
+
+   /*
+* If we just want something mappable then we can quickly check
+* if the current victim resource is using any of the CPU
+* visible portion.
+*/
+   if (!place->fpfn &&
+   place->lpfn == i915_ttm_buddy_man_visible_size(man))
+   return bman_res->used_visible_size > 0;
+
+   /* Check each drm buddy block individually */
+   list_for_each_entry(block, _res->blocks, link) {
+   unsigned long fpfn =
+   drm_buddy_block_offset(block) >> PAGE_SHIFT;
+   unsigned long lpfn = fpfn +
+   (drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+
+   if (place->fpfn < lpfn && place->lpfn > fpfn)
+   return true;
+   }
+
+   return false;
+}
+
+static bool i915_ttm_buddy_man_compatible(struct ttm_resource_manager *man,
+ struct ttm_resource *res,
+  

  1   2   >