comments inline > 2020年2月11日 13:26,Pan, Xinhui <xinhui....@amd.com> 写道: > > comments inline. > [xh] > > >> 2020年2月10日 23:09,Christian König <ckoenig.leichtzumer...@gmail.com> 写道: >> >> This patch reworks the whole delayed deletion of BOs which aren't idle. >> >> Instead of having two counters for the BO structure we resurrect the BO >> when we find that a deleted BO is not idle yet. >> >> This has many advantages, especially that we don't need to >> increment/decrement the BOs reference counter any more when it >> moves on the LRUs. >> >> Signed-off-by: Christian König <christian.koe...@amd.com> >> --- >> drivers/gpu/drm/ttm/ttm_bo.c | 217 +++++++++++++----------------- >> drivers/gpu/drm/ttm/ttm_bo_util.c | 1 - >> include/drm/ttm/ttm_bo_api.h | 11 +- >> 3 files changed, 97 insertions(+), 132 deletions(-) >> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c >> index e12fc2c2d165..d0624685f5d2 100644 >> --- a/drivers/gpu/drm/ttm/ttm_bo.c >> +++ b/drivers/gpu/drm/ttm/ttm_bo.c >> @@ -145,26 +145,6 @@ static inline uint32_t ttm_bo_type_flags(unsigned type) >> return 1 << (type); >> } >> >> -static void ttm_bo_release_list(struct kref *list_kref) >> -{ >> - struct ttm_buffer_object *bo = >> - container_of(list_kref, struct ttm_buffer_object, list_kref); >> - size_t acc_size = bo->acc_size; >> - >> - BUG_ON(kref_read(&bo->list_kref)); >> - BUG_ON(kref_read(&bo->kref)); >> - BUG_ON(bo->mem.mm_node != NULL); >> - BUG_ON(!list_empty(&bo->lru)); >> - BUG_ON(!list_empty(&bo->ddestroy)); >> - ttm_tt_destroy(bo->ttm); >> - atomic_dec(&ttm_bo_glob.bo_count); >> - dma_fence_put(bo->moving); >> - if (!ttm_bo_uses_embedded_gem_object(bo)) >> - dma_resv_fini(&bo->base._resv); >> - bo->destroy(bo); >> - ttm_mem_global_free(&ttm_mem_glob, acc_size); >> -} >> - >> static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo, >> struct ttm_mem_reg *mem) >> { >> @@ -181,21 +161,14 @@ static void ttm_bo_add_mem_to_lru(struct >> ttm_buffer_object *bo, >> >> man = &bdev->man[mem->mem_type]; >> list_add_tail(&bo->lru, &man->lru[bo->priority]); >> - kref_get(&bo->list_kref); >> >> if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm && >> !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | >> TTM_PAGE_FLAG_SWAPPED))) { >> list_add_tail(&bo->swap, &ttm_bo_glob.swap_lru[bo->priority]); >> - kref_get(&bo->list_kref); >> } >> } >> >> -static void ttm_bo_ref_bug(struct kref *list_kref) >> -{ >> - BUG(); >> -} >> - >> static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) >> { >> struct ttm_bo_device *bdev = bo->bdev; >> @@ -203,12 +176,10 @@ static void ttm_bo_del_from_lru(struct >> ttm_buffer_object *bo) >> >> if (!list_empty(&bo->swap)) { >> list_del_init(&bo->swap); >> - kref_put(&bo->list_kref, ttm_bo_ref_bug); >> notify = true; >> } >> if (!list_empty(&bo->lru)) { >> list_del_init(&bo->lru); >> - kref_put(&bo->list_kref, ttm_bo_ref_bug); >> notify = true; >> } >> >> @@ -421,8 +392,7 @@ static int ttm_bo_individualize_resv(struct >> ttm_buffer_object *bo) >> BUG_ON(!dma_resv_trylock(&bo->base._resv)); >> >> r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv); >> - if (r) >> - dma_resv_unlock(&bo->base._resv); >> + dma_resv_unlock(&bo->base._resv); >> >> return r; >> } >> @@ -449,68 +419,10 @@ static void ttm_bo_flush_all_fences(struct >> ttm_buffer_object *bo) >> rcu_read_unlock(); >> } >> >> -static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) >> -{ >> - struct ttm_bo_device *bdev = bo->bdev; >> - int ret; >> - >> - ret = ttm_bo_individualize_resv(bo); >> - if (ret) { >> - /* Last resort, if we fail to allocate memory for the >> - * fences block for the BO to become idle >> - */ >> - dma_resv_wait_timeout_rcu(bo->base.resv, true, false, >> - 30 * HZ); >> - spin_lock(&ttm_bo_glob.lru_lock); >> - goto error; >> - } >> - >> - spin_lock(&ttm_bo_glob.lru_lock); >> - ret = dma_resv_trylock(bo->base.resv) ? 0 : -EBUSY; >> - if (!ret) { >> - if (dma_resv_test_signaled_rcu(&bo->base._resv, true)) { >> - ttm_bo_del_from_lru(bo); >> - spin_unlock(&ttm_bo_glob.lru_lock); >> - if (bo->base.resv != &bo->base._resv) >> - dma_resv_unlock(&bo->base._resv); >> - >> - ttm_bo_cleanup_memtype_use(bo); >> - dma_resv_unlock(bo->base.resv); >> - return; >> - } >> - >> - ttm_bo_flush_all_fences(bo); >> - >> - /* >> - * Make NO_EVICT bos immediately available to >> - * shrinkers, now that they are queued for >> - * destruction. >> - */ >> - if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) { >> - bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT; >> - ttm_bo_move_to_lru_tail(bo, NULL); >> - } >> - >> - dma_resv_unlock(bo->base.resv); >> - } >> - if (bo->base.resv != &bo->base._resv) { >> - ttm_bo_flush_all_fences(bo); >> - dma_resv_unlock(&bo->base._resv); >> - } >> - >> -error: >> - kref_get(&bo->list_kref); >> - list_add_tail(&bo->ddestroy, &bdev->ddestroy); >> - spin_unlock(&ttm_bo_glob.lru_lock); >> - >> - schedule_delayed_work(&bdev->wq, >> - ((HZ / 100) < 1) ? 1 : HZ / 100); >> -} >> - >> /** >> * function ttm_bo_cleanup_refs >> - * If bo idle, remove from delayed- and lru lists, and unref. >> - * If not idle, do nothing. >> + * If bo idle, remove from lru lists, and unref. >> + * If not idle, block if possible. >> * >> * Must be called with lru_lock and reservation held, this function >> * will drop the lru lock and optionally the reservation lock before >> returning. >> @@ -572,14 +484,14 @@ static int ttm_bo_cleanup_refs(struct >> ttm_buffer_object *bo, >> >> ttm_bo_del_from_lru(bo); >> list_del_init(&bo->ddestroy); >> - kref_put(&bo->list_kref, ttm_bo_ref_bug); >> - >> spin_unlock(&ttm_bo_glob.lru_lock); >> ttm_bo_cleanup_memtype_use(bo); >> >> if (unlock_resv) >> dma_resv_unlock(bo->base.resv); >> >> + ttm_bo_put(bo); >> + >> return 0; >> } >> >> @@ -601,8 +513,9 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device >> *bdev, bool remove_all) >> >> bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object, >> ddestroy); >> - kref_get(&bo->list_kref); >> list_move_tail(&bo->ddestroy, &removed); >> + if (!ttm_bo_get_unless_zero(bo)) >> + continue; >> >> if (remove_all || bo->base.resv != &bo->base._resv) { >> spin_unlock(&glob->lru_lock); >> @@ -617,7 +530,7 @@ static bool ttm_bo_delayed_delete(struct ttm_bo_device >> *bdev, bool remove_all) >> spin_unlock(&glob->lru_lock); >> } >> >> - kref_put(&bo->list_kref, ttm_bo_release_list); >> + ttm_bo_put(bo); >> spin_lock(&glob->lru_lock); >> } >> list_splice_tail(&removed, &bdev->ddestroy); >> @@ -643,16 +556,68 @@ static void ttm_bo_release(struct kref *kref) >> container_of(kref, struct ttm_buffer_object, kref); >> struct ttm_bo_device *bdev = bo->bdev; >> struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type]; >> + size_t acc_size = bo->acc_size; >> + int ret; >> >> - if (bo->bdev->driver->release_notify) >> - bo->bdev->driver->release_notify(bo); >> + if (!bo->deleted) { >> + if (bo->bdev->driver->release_notify) >> + bo->bdev->driver->release_notify(bo); >> >> - drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node); >> - ttm_mem_io_lock(man, false); >> - ttm_mem_io_free_vm(bo); >> - ttm_mem_io_unlock(man); >> - ttm_bo_cleanup_refs_or_queue(bo); >> - kref_put(&bo->list_kref, ttm_bo_release_list); >> + drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node); >> + ttm_mem_io_lock(man, false); >> + ttm_mem_io_free_vm(bo); >> + ttm_mem_io_unlock(man); >> + >> + ret = ttm_bo_individualize_resv(bo); >> + if (ret) { >> + /* Last resort, if we fail to allocate memory for the >> + * fences block for the BO to become idle >> + */ >> + dma_resv_wait_timeout_rcu(bo->base.resv, true, false, >> + 30 * HZ); >> + } >> + } >> + >> + if (!dma_resv_test_signaled_rcu(bo->base.resv, true)) { >> + /* The BO is not idle, resurrect it for delayed destroy */ >> + ttm_bo_flush_all_fences(bo); >> + bo->deleted = true; >> + >> + /* >> + * Make NO_EVICT bos immediately available to >> + * shrinkers, now that they are queued for >> + * destruction. >> + */ >> + if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) { >> + bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT; >> + ttm_bo_move_to_lru_tail(bo, NULL); > > [xh] this should be under lru lock. > >> + } >> + >> + spin_lock(&ttm_bo_glob.lru_lock); >> + kref_init(&bo->kref); >> + list_add_tail(&bo->ddestroy, &bdev->ddestroy); >> + spin_unlock(&ttm_bo_glob.lru_lock); >> + >> + schedule_delayed_work(&bdev->wq, >> + ((HZ / 100) < 1) ? 1 : HZ / 100); >> + return; >> + } >> + >> + spin_lock(&ttm_bo_glob.lru_lock); >> + ttm_bo_del_from_lru(bo); >> + list_del(&bo->ddestroy); >> + spin_unlock(&ttm_bo_glob.lru_lock); >> + >> + ttm_bo_cleanup_memtype_use(bo); >> + >> + BUG_ON(bo->mem.mm_node != NULL); >> + ttm_tt_destroy(bo->ttm); > > [xh] already destroy it in ttm_bo_cleanup_memtype_use. > > >> + atomic_dec(&ttm_bo_glob.bo_count); >> + dma_fence_put(bo->moving); >> + if (!ttm_bo_uses_embedded_gem_object(bo)) >> + dma_resv_fini(&bo->base._resv); >> + bo->destroy(bo); >> + ttm_mem_global_free(&ttm_mem_glob, acc_size); >> } >> >> void ttm_bo_put(struct ttm_buffer_object *bo) >> @@ -755,8 +720,7 @@ static bool ttm_bo_evict_swapout_allowable(struct >> ttm_buffer_object *bo, >> >> if (bo->base.resv == ctx->resv) { >> dma_resv_assert_held(bo->base.resv); >> - if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT >> - || !list_empty(&bo->ddestroy)) >> + if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT || bo->deleted) >> ret = true; >> *locked = false; >> if (busy) >> @@ -837,6 +801,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device >> *bdev, >> dma_resv_unlock(bo->base.resv); >> continue; >> } >> + if (!ttm_bo_get_unless_zero(bo)) { >> + if (locked) >> + dma_resv_unlock(bo->base.resv); >> + continue; >> + } >> break; >> } >> >> @@ -848,21 +817,19 @@ static int ttm_mem_evict_first(struct ttm_bo_device >> *bdev, >> } >> >> if (!bo) { >> - if (busy_bo) >> - kref_get(&busy_bo->list_kref); >> + if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) >> + busy_bo = NULL; >> spin_unlock(&ttm_bo_glob.lru_lock); >> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); >> if (busy_bo) >> - kref_put(&busy_bo->list_kref, ttm_bo_release_list); >> + ttm_bo_put(busy_bo); >> return ret; >> } >> >> - kref_get(&bo->list_kref); >> - >> - if (!list_empty(&bo->ddestroy)) { >> + if (bo->deleted) { >> ret = ttm_bo_cleanup_refs(bo, ctx->interruptible, >> ctx->no_wait_gpu, locked); >> - kref_put(&bo->list_kref, ttm_bo_release_list); >> + ttm_bo_put(bo); >> return ret; >> } >> >> @@ -872,7 +839,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device >> *bdev, >> if (locked) >> ttm_bo_unreserve(bo); >> >> - kref_put(&bo->list_kref, ttm_bo_release_list); >> + ttm_bo_put(bo); >> return ret; >> } >> >> @@ -1284,7 +1251,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev, >> bo->destroy = destroy ? destroy : ttm_bo_default_destroy; >> >> kref_init(&bo->kref); >> - kref_init(&bo->list_kref); >> INIT_LIST_HEAD(&bo->lru); >> INIT_LIST_HEAD(&bo->ddestroy); >> INIT_LIST_HEAD(&bo->swap); >> @@ -1804,11 +1770,18 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, >> struct ttm_operation_ctx *ctx) >> spin_lock(&glob->lru_lock); >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { >> list_for_each_entry(bo, &glob->swap_lru[i], swap) { >> - if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked, >> - NULL)) { >> - ret = 0; >> - break; >> + if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, >> + NULL)) >> + continue; >> + >> + if (!ttm_bo_get_unless_zero(bo)) { >> + if (locked) >> + dma_resv_unlock(bo->base.resv); >> + continue; >> } > > [xh] As you get the bo. when you put it? > > You only put one bo just before return. BUT you get the bos in the for loop. > Am I missing somethings? > > thanks > xinhui > > [xh] Never mind. I missed the break;
thanks xinhui >> + >> + ret = 0; >> + break; >> } >> if (!ret) >> break; >> @@ -1819,11 +1792,9 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct >> ttm_operation_ctx *ctx) >> return ret; >> } >> >> - kref_get(&bo->list_kref); >> - >> - if (!list_empty(&bo->ddestroy)) { >> + if (bo->deleted) { >> ret = ttm_bo_cleanup_refs(bo, false, false, locked); >> - kref_put(&bo->list_kref, ttm_bo_release_list); >> + ttm_bo_put(bo); >> return ret; >> } >> >> @@ -1877,7 +1848,7 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct >> ttm_operation_ctx *ctx) >> */ >> if (locked) >> dma_resv_unlock(bo->base.resv); >> - kref_put(&bo->list_kref, ttm_bo_release_list); >> + ttm_bo_put(bo); >> return ret; >> } >> EXPORT_SYMBOL(ttm_bo_swapout); >> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c >> b/drivers/gpu/drm/ttm/ttm_bo_util.c >> index 86d152472f38..c8e359ded1df 100644 >> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c >> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c >> @@ -507,7 +507,6 @@ static int ttm_buffer_object_transfer(struct >> ttm_buffer_object *bo, >> fbo->base.moving = NULL; >> drm_vma_node_reset(&fbo->base.base.vma_node); >> >> - kref_init(&fbo->base.list_kref); >> kref_init(&fbo->base.kref); >> fbo->base.destroy = &ttm_transfered_destroy; >> fbo->base.acc_size = 0; >> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h >> index 66ca49db9633..b9bc1b00142e 100644 >> --- a/include/drm/ttm/ttm_bo_api.h >> +++ b/include/drm/ttm/ttm_bo_api.h >> @@ -135,18 +135,14 @@ struct ttm_tt; >> * @num_pages: Actual number of pages. >> * @acc_size: Accounted size for this object. >> * @kref: Reference count of this buffer object. When this refcount reaches >> - * zero, the object is put on the delayed delete list. >> - * @list_kref: List reference count of this buffer object. This member is >> - * used to avoid destruction while the buffer object is still on a list. >> - * Lru lists may keep one refcount, the delayed delete list, and kref != 0 >> - * keeps one refcount. When this refcount reaches zero, >> - * the object is destroyed. >> + * zero, the object is destroyed or put on the delayed delete list. >> * @mem: structure describing current placement. >> * @persistent_swap_storage: Usually the swap storage is deleted for buffers >> * pinned in physical memory. If this behaviour is not desired, this member >> * holds a pointer to a persistent shmem object. >> * @ttm: TTM structure holding system pages. >> * @evicted: Whether the object was evicted without user-space knowing. >> + * @deleted: True if the object is only a zombie and already deleted. >> * @lru: List head for the lru list. >> * @ddestroy: List head for the delayed destroy list. >> * @swap: List head for swap LRU list. >> @@ -183,9 +179,7 @@ struct ttm_buffer_object { >> /** >> * Members not needing protection. >> */ >> - >> struct kref kref; >> - struct kref list_kref; >> >> /** >> * Members protected by the bo::resv::reserved lock. >> @@ -195,6 +189,7 @@ struct ttm_buffer_object { >> struct file *persistent_swap_storage; >> struct ttm_tt *ttm; >> bool evicted; >> + bool deleted; >> >> /** >> * Members protected by the bdev::lru_lock. >> -- >> 2.17.1 >> > _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx