Along the lines of:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 428265046815..06b243f05edd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1824,7 +1824,7 @@ static int amdgpu_ttm_pools_init(struct amdgpu_device
*adev)
for (i = 0; i < adev->gmc.num_mem_partitions; i++) {
ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev,
adev->gmc.mem_partitions[i].numa.node,
- false, false);
+ false, false, get_order(2 * SZ_1M));
}
return 0;
}
@@ -1865,7 +1865,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
adev_to_drm(adev)->anon_inode->i_mapping,
adev_to_drm(adev)->vma_offset_manager,
adev->need_swiotlb,
- dma_addressing_limited(adev->dev));
+ dma_addressing_limited(adev->dev),
+ get_order(2 * SZ_1M));
if (r) {
dev_err(adev->dev,
"failed initializing buffer object driver(%d).\n", r);
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index baf27c70a419..5d54e8373230 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -726,8 +726,12 @@ static int __ttm_pool_alloc(struct ttm_pool *pool, struct
ttm_tt *tt,
page_caching = tt->caching;
allow_pools = true;
- for (order = ttm_pool_alloc_find_order(MAX_PAGE_ORDER, alloc);
- alloc->remaining_pages;
+
+ order = ttm_pool_alloc_find_order(MAX_PAGE_ORDER, alloc);
+ if (order > pool->max_beneficial_order)
+ gfp_flags &= ~__GFP_DIRECT_RECLAIM;
+
+ for (; alloc->remaining_pages;
order = ttm_pool_alloc_find_order(order, alloc)) {
struct ttm_pool_type *pt;
@@ -745,6 +749,8 @@ static int __ttm_pool_alloc(struct ttm_pool *pool, struct
ttm_tt *tt,
if (!p) {
page_caching = ttm_cached;
allow_pools = false;
+ if (order <= pool->max_beneficial_order)
+ gfp_flags |= __GFP_DIRECT_RECLAIM;
p = ttm_pool_alloc_page(pool, gfp_flags, order);
}
/* If that fails, lower the order if possible and retry. */
@@ -1064,7 +1070,8 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt
*tt,
* Initialize the pool and its pool types.
*/
void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
- int nid, bool use_dma_alloc, bool use_dma32)
+ int nid, bool use_dma_alloc, bool use_dma32,
+ unsigned int max_beneficial_order)
{
unsigned int i, j;
@@ -1074,6 +1081,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device
*dev,
pool->nid = nid;
pool->use_dma_alloc = use_dma_alloc;
pool->use_dma32 = use_dma32;
+ pool->max_beneficial_order = max_beneficial_order;
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
for (j = 0; j < NR_PAGE_ORDERS; ++j) {
That should have the page allocator working less hard and lower the latency
with large buffers.
Then a more aggressive change on top could be:
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 5d54e8373230..152164f79927 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -726,12 +726,8 @@ static int __ttm_pool_alloc(struct ttm_pool *pool, struct
ttm_tt *tt,
page_caching = tt->caching;
allow_pools = true;
-
- order = ttm_pool_alloc_find_order(MAX_PAGE_ORDER, alloc);
- if (order > pool->max_beneficial_order)
- gfp_flags &= ~__GFP_DIRECT_RECLAIM;
-
- for (; alloc->remaining_pages;
+ for (order = ttm_pool_alloc_find_order(pool->max_beneficial_order, alloc);
+ alloc->remaining_pages;
order = ttm_pool_alloc_find_order(order, alloc)) {
struct ttm_pool_type *pt;
@@ -749,8 +745,6 @@ static int __ttm_pool_alloc(struct ttm_pool *pool, struct
ttm_tt *tt,
if (!p) {
page_caching = ttm_cached;
allow_pools = false;
- if (order <= pool->max_beneficial_order)
- gfp_flags |= __GFP_DIRECT_RECLAIM;
p = ttm_pool_alloc_page(pool, gfp_flags, order);
}
/* If that fails, lower the order if possible and retry. */
Ie. don't even bother trying to allocate orders above what the driver says is
useful. Could be made a drivers choice as well.
And all could be combined with some sort of a sysfs control, as Cascardo was
suggesting, to disable direct reclaim completely if someone wants that.
Regards,
Tvrtko
Another alternative would be that we add a WARN_ONCE() when we have to fallback
to lower order pages, but that wouldn't help the end user either. It just makes
it more obvious that you need more memory for a specific use case without
triggering the OOM killer.
Regards,
Christian.
Regards,
Tvrtko
The alternative I can offer is to disable the fallback which in your case would
trigger the OOM killer.
Regards,
Christian.
Other drivers can later opt to use this mechanism too.
Signed-off-by: Thadeu Lima de Souza Cascardo <[email protected]>
---
Changes in v2:
- Make disabling direct reclaim an option.
- Link to v1:
https://lore.kernel.org/r/[email protected]
---
Thadeu Lima de Souza Cascardo (3):
ttm: pool: allow requests to prefer latency over throughput
ttm: pool: add a module parameter to set latency preference
drm/amdgpu: allow allocation preferences when creating GEM object
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 ++-
drivers/gpu/drm/ttm/ttm_pool.c | 23 +++++++++++++++++------
drivers/gpu/drm/ttm/ttm_tt.c | 2 +-
include/drm/ttm/ttm_bo.h | 5 +++++
include/drm/ttm/ttm_pool.h | 2 +-
include/drm/ttm/ttm_tt.h | 2 +-
include/uapi/drm/amdgpu_drm.h | 9 +++++++++
8 files changed, 38 insertions(+), 11 deletions(-)
---
base-commit: f83ec76bf285bea5727f478a68b894f5543ca76e
change-id: 20250909-ttm_pool_no_direct_reclaim-ee0807a2d3fe
Best regards,