Am 14.04.2016 um 16:23 schrieb Alex Deucher: > On Thu, Apr 14, 2016 at 8:54 AM, Christian König > <deathsimple at vodafone.de> wrote: >> From: Christian König <christian.koenig at amd.com> >> >> This allows us to have small BOs on the LRU before big ones. >> >> Signed-off-by: Christian König <christian.koenig at amd.com> > Have you done any benchmarking to see how much this helps when there > is memory contention?
Still working on this. Marek could you help with that? You usually have the Unigin benchmarks ready at hand. Christian. > > For the series: > Reviewed-by: Alex Deucher <alexander.deucher at amd.com> > >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 ++++++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 >> +++++++++++++++++++++++++++++++-- >> 2 files changed, 68 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index c4a21c6..7b90323 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -391,6 +391,14 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring >> *ring); >> /* >> * TTM. >> */ >> + >> +#define AMDGPU_TTM_LRU_SIZE 20 >> + >> +struct amdgpu_mman_lru { >> + struct list_head *lru[TTM_NUM_MEM_TYPES]; >> + struct list_head *swap_lru; >> +}; >> + >> struct amdgpu_mman { >> struct ttm_bo_global_ref bo_global_ref; >> struct drm_global_reference mem_global_ref; >> @@ -408,6 +416,9 @@ struct amdgpu_mman { >> struct amdgpu_ring *buffer_funcs_ring; >> /* Scheduler entity for buffer moves */ >> struct amd_sched_entity entity; >> + >> + /* custom LRU management */ >> + struct amdgpu_mman_lru >> log2_size[AMDGPU_TTM_LRU_SIZE]; >> }; >> >> int amdgpu_copy_buffer(struct amdgpu_ring *ring, >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >> index fefaa9b..b58a445 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >> @@ -910,6 +910,50 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device >> *adev, struct ttm_tt *ttm, >> return flags; >> } >> >> +static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo) >> +{ >> + struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev); >> + unsigned i; >> + >> + for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) { >> + struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i]; >> + >> + if (&tbo->lru == lru->lru[tbo->mem.mem_type]) >> + lru->lru[tbo->mem.mem_type] = tbo->lru.prev; >> + >> + if (&tbo->swap == lru->swap_lru) >> + lru->swap_lru = tbo->swap.prev; >> + } >> +} >> + >> +static struct amdgpu_mman_lru *amdgpu_ttm_lru(struct ttm_buffer_object *tbo) >> +{ >> + struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev); >> + unsigned log2_size = min(ilog2(tbo->num_pages), AMDGPU_TTM_LRU_SIZE); >> + >> + return &adev->mman.log2_size[log2_size]; >> +} >> + >> +static struct list_head *amdgpu_ttm_lru_tail(struct ttm_buffer_object *tbo) >> +{ >> + struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo); >> + struct list_head *res = lru->lru[tbo->mem.mem_type]; >> + >> + lru->lru[tbo->mem.mem_type] = &tbo->lru; >> + >> + return res; >> +} >> + >> +static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object >> *tbo) >> +{ >> + struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo); >> + struct list_head *res = lru->swap_lru; >> + >> + lru->swap_lru = &tbo->swap; >> + >> + return res; >> +} >> + >> static struct ttm_bo_driver amdgpu_bo_driver = { >> .ttm_tt_create = &amdgpu_ttm_tt_create, >> .ttm_tt_populate = &amdgpu_ttm_tt_populate, >> @@ -923,12 +967,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = { >> .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify, >> .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, >> .io_mem_free = &amdgpu_ttm_io_mem_free, >> - .lru_tail = &ttm_bo_default_lru_tail, >> - .swap_lru_tail = &ttm_bo_default_swap_lru_tail, >> + .lru_removal = &amdgpu_ttm_lru_removal, >> + .lru_tail = &amdgpu_ttm_lru_tail, >> + .swap_lru_tail = &amdgpu_ttm_swap_lru_tail, >> }; >> >> int amdgpu_ttm_init(struct amdgpu_device *adev) >> { >> + unsigned i, j; >> int r; >> >> r = amdgpu_ttm_global_init(adev); >> @@ -946,6 +992,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) >> DRM_ERROR("failed initializing buffer object >> driver(%d).\n", r); >> return r; >> } >> + >> + for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) { >> + struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i]; >> + >> + for (j = 0; j < TTM_NUM_MEM_TYPES; ++j) >> + lru->lru[j] = &adev->mman.bdev.man[j].lru; >> + lru->swap_lru = &adev->mman.bdev.glob->swap_lru; >> + } >> + >> adev->mman.initialized = true; >> r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, >> adev->mc.real_vram_size >> PAGE_SHIFT); >> -- >> 2.5.0 >>