On Thu, Mar 18, 2021 at 08:47:19PM +0800, Christian König wrote:
> Instead of having a global lock.
> 
> Signed-off-by: Christian König <christian.koe...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  8 ++---
>  drivers/gpu/drm/qxl/qxl_release.c      |  5 +--
>  drivers/gpu/drm/ttm/ttm_bo.c           | 49 ++++++++++++--------------
>  drivers/gpu/drm/ttm/ttm_device.c       | 12 +++----
>  drivers/gpu/drm/ttm/ttm_execbuf_util.c |  8 ++---
>  drivers/gpu/drm/ttm/ttm_resource.c     |  9 +++--
>  include/drm/ttm/ttm_bo_driver.h        |  4 +--
>  include/drm/ttm/ttm_device.h           |  4 +--
>  8 files changed, 43 insertions(+), 56 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9d19078246c8..ae18c0e32347 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device 
> *adev,
>       struct amdgpu_vm_bo_base *bo_base;
>  
>       if (vm->bulk_moveable) {
> -             spin_lock(&ttm_glob.lru_lock);
> +             spin_lock(&adev->mman.bdev.lru_lock);

Could you please explain why do you want to instead of the global lock?

Thanks,
Ray

>               ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
> -             spin_unlock(&ttm_glob.lru_lock);
> +             spin_unlock(&adev->mman.bdev.lru_lock);
>               return;
>       }
>  
>       memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
>  
> -     spin_lock(&ttm_glob.lru_lock);
> +     spin_lock(&adev->mman.bdev.lru_lock);
>       list_for_each_entry(bo_base, &vm->idle, vm_status) {
>               struct amdgpu_bo *bo = bo_base->bo;
>  
> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device 
> *adev,
>                                               &bo->shadow->tbo.mem,
>                                               &vm->lru_bulk_move);
>       }
> -     spin_unlock(&ttm_glob.lru_lock);
> +     spin_unlock(&adev->mman.bdev.lru_lock);
>  
>       vm->bulk_moveable = true;
>  }
> diff --git a/drivers/gpu/drm/qxl/qxl_release.c 
> b/drivers/gpu/drm/qxl/qxl_release.c
> index f5845c96d414..b19f2f00b215 100644
> --- a/drivers/gpu/drm/qxl/qxl_release.c
> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct 
> qxl_release *release)
>                      release->id | 0xf0000000, release->base.seqno);
>       trace_dma_fence_emit(&release->base);
>  
> -     spin_lock(&ttm_glob.lru_lock);
> -
>       list_for_each_entry(entry, &release->bos, head) {
>               bo = entry->bo;
>  
>               dma_resv_add_shared_fence(bo->base.resv, &release->base);
> -             ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> +             ttm_bo_move_to_lru_tail_unlocked(bo);
>               dma_resv_unlock(bo->base.resv);
>       }
> -     spin_unlock(&ttm_glob.lru_lock);
>       ww_acquire_fini(&release->ticket);
>  }
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 3673157527ff..2d2ac532987e 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct 
> ttm_buffer_object *bo)
>                * reference it any more. The only tricky case is the trylock on
>                * the resv object while holding the lru_lock.
>                */
> -             spin_lock(&ttm_glob.lru_lock);
> +             spin_lock(&bo->bdev->lru_lock);
>               bo->base.resv = &bo->base._resv;
> -             spin_unlock(&ttm_glob.lru_lock);
> +             spin_unlock(&bo->bdev->lru_lock);
>       }
>  
>       return r;
> @@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object 
> *bo,
>  
>               if (unlock_resv)
>                       dma_resv_unlock(bo->base.resv);
> -             spin_unlock(&ttm_glob.lru_lock);
> +             spin_unlock(&bo->bdev->lru_lock);
>  
>               lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
>                                                30 * HZ);
> @@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object 
> *bo,
>               else if (lret == 0)
>                       return -EBUSY;
>  
> -             spin_lock(&ttm_glob.lru_lock);
> +             spin_lock(&bo->bdev->lru_lock);
>               if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
>                       /*
>                        * We raced, and lost, someone else holds the 
> reservation now,
> @@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object 
> *bo,
>                        * delayed destruction would succeed, so just return 
> success
>                        * here.
>                        */
> -                     spin_unlock(&ttm_glob.lru_lock);
> +                     spin_unlock(&bo->bdev->lru_lock);
>                       return 0;
>               }
>               ret = 0;
> @@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object 
> *bo,
>       if (ret || unlikely(list_empty(&bo->ddestroy))) {
>               if (unlock_resv)
>                       dma_resv_unlock(bo->base.resv);
> -             spin_unlock(&ttm_glob.lru_lock);
> +             spin_unlock(&bo->bdev->lru_lock);
>               return ret;
>       }
>  
>       ttm_bo_del_from_lru(bo);
>       list_del_init(&bo->ddestroy);
> -     spin_unlock(&ttm_glob.lru_lock);
> +     spin_unlock(&bo->bdev->lru_lock);
>       ttm_bo_cleanup_memtype_use(bo);
>  
>       if (unlock_resv)
> @@ -356,13 +356,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object 
> *bo,
>   */
>  bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>  {
> -     struct ttm_global *glob = &ttm_glob;
>       struct list_head removed;
>       bool empty;
>  
>       INIT_LIST_HEAD(&removed);
>  
> -     spin_lock(&glob->lru_lock);
> +     spin_lock(&bdev->lru_lock);
>       while (!list_empty(&bdev->ddestroy)) {
>               struct ttm_buffer_object *bo;
>  
> @@ -373,24 +372,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, 
> bool remove_all)
>                       continue;
>  
>               if (remove_all || bo->base.resv != &bo->base._resv) {
> -                     spin_unlock(&glob->lru_lock);
> +                     spin_unlock(&bdev->lru_lock);
>                       dma_resv_lock(bo->base.resv, NULL);
>  
> -                     spin_lock(&glob->lru_lock);
> +                     spin_lock(&bdev->lru_lock);
>                       ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>  
>               } else if (dma_resv_trylock(bo->base.resv)) {
>                       ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>               } else {
> -                     spin_unlock(&glob->lru_lock);
> +                     spin_unlock(&bdev->lru_lock);
>               }
>  
>               ttm_bo_put(bo);
> -             spin_lock(&glob->lru_lock);
> +             spin_lock(&bdev->lru_lock);
>       }
>       list_splice_tail(&removed, &bdev->ddestroy);
>       empty = list_empty(&bdev->ddestroy);
> -     spin_unlock(&glob->lru_lock);
> +     spin_unlock(&bdev->lru_lock);
>  
>       return empty;
>  }
> @@ -425,7 +424,7 @@ static void ttm_bo_release(struct kref *kref)
>               ttm_bo_flush_all_fences(bo);
>               bo->deleted = true;
>  
> -             spin_lock(&ttm_glob.lru_lock);
> +             spin_lock(&bo->bdev->lru_lock);
>  
>               /*
>                * Make pinned bos immediately available to
> @@ -442,17 +441,17 @@ static void ttm_bo_release(struct kref *kref)
>  
>               kref_init(&bo->kref);
>               list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> -             spin_unlock(&ttm_glob.lru_lock);
> +             spin_unlock(&bo->bdev->lru_lock);
>  
>               schedule_delayed_work(&bdev->wq,
>                                     ((HZ / 100) < 1) ? 1 : HZ / 100);
>               return;
>       }
>  
> -     spin_lock(&ttm_glob.lru_lock);
> +     spin_lock(&bo->bdev->lru_lock);
>       ttm_bo_del_from_lru(bo);
>       list_del(&bo->ddestroy);
> -     spin_unlock(&ttm_glob.lru_lock);
> +     spin_unlock(&bo->bdev->lru_lock);
>  
>       ttm_bo_cleanup_memtype_use(bo);
>       dma_resv_unlock(bo->base.resv);
> @@ -626,7 +625,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>       unsigned i;
>       int ret;
>  
> -     spin_lock(&ttm_glob.lru_lock);
> +     spin_lock(&bo->bdev->lru_lock);
>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>               list_for_each_entry(bo, &man->lru[i], lru) {
>                       bool busy;
> @@ -663,7 +662,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>       if (!bo) {
>               if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
>                       busy_bo = NULL;
> -             spin_unlock(&ttm_glob.lru_lock);
> +             spin_unlock(&bo->bdev->lru_lock);
>               ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
>               if (busy_bo)
>                       ttm_bo_put(busy_bo);
> @@ -677,7 +676,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>               return ret;
>       }
>  
> -     spin_unlock(&ttm_glob.lru_lock);
> +     spin_unlock(&bo->bdev->lru_lock);
>  
>       ret = ttm_bo_evict(bo, ctx);
>       if (locked)
> @@ -777,10 +776,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object 
> *bo,
>       mem->mem_type = place->mem_type;
>       mem->placement = place->flags;
>  
> -     spin_lock(&ttm_glob.lru_lock);
> +     spin_lock(&bo->bdev->lru_lock);
>       ttm_bo_move_to_lru_tail(bo, mem, NULL);
> -     spin_unlock(&ttm_glob.lru_lock);
> -
> +     spin_unlock(&bo->bdev->lru_lock);
>       return 0;
>  }
>  
> @@ -1167,7 +1165,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
>  int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx 
> *ctx,
>                  gfp_t gfp_flags)
>  {
> -     struct ttm_global *glob = &ttm_glob;
>       bool locked;
>       int ret;
>  
> @@ -1188,7 +1185,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct 
> ttm_operation_ctx *ctx,
>  
>       ttm_bo_del_from_lru(bo);
>       /* TODO: Cleanup the locking */
> -     spin_unlock(&glob->lru_lock);
> +     spin_unlock(&bo->bdev->lru_lock);
>  
>       /*
>        * Move to system cached
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c 
> b/drivers/gpu/drm/ttm/ttm_device.c
> index 2096a0fd9c35..85f6975d9872 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
>       ttm_pool_mgr_init(num_pages * 50 / 100);
>       ttm_tt_mgr_init();
>  
> -     spin_lock_init(&glob->lru_lock);
>       glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>  
>       if (unlikely(glob->dummy_read_page == NULL)) {
> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
>  long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx 
> *ctx,
>                       gfp_t gfp_flags)
>  {
> -     struct ttm_global *glob = &ttm_glob;
>       struct ttm_resource_manager *man;
>       struct ttm_buffer_object *bo;
>       unsigned i, j;
>       int ret;
>  
> -     spin_lock(&glob->lru_lock);
> +     spin_lock(&bdev->lru_lock);
>       for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>               man = ttm_manager_type(bdev, i);
>               if (!man || !man->use_tt)
> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct 
> ttm_operation_ctx *ctx,
>                       }
>               }
>       }
> -     spin_unlock(&glob->lru_lock);
> +     spin_unlock(&bdev->lru_lock);
>       return 0;
>  }
>  EXPORT_SYMBOL(ttm_device_swapout);
> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct 
> ttm_device_funcs *funcs,
>  
>       bdev->vma_manager = vma_manager;
>       INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
> +     spin_lock_init(&bdev->lru_lock);
>       INIT_LIST_HEAD(&bdev->ddestroy);
>       bdev->dev_mapping = mapping;
>       mutex_lock(&ttm_global_mutex);
> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
>  
>  void ttm_device_fini(struct ttm_device *bdev)
>  {
> -     struct ttm_global *glob = &ttm_glob;
>       struct ttm_resource_manager *man;
>       unsigned i;
>  
> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
>       if (ttm_bo_delayed_delete(bdev, true))
>               pr_debug("Delayed destroy list was clean\n");
>  
> -     spin_lock(&glob->lru_lock);
> +     spin_lock(&bdev->lru_lock);
>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>               if (list_empty(&man->lru[0]))
>                       pr_debug("Swap list %d was clean\n", i);
> -     spin_unlock(&glob->lru_lock);
> +     spin_unlock(&bdev->lru_lock);
>  
>       ttm_pool_fini(&bdev->pool);
>       ttm_global_release();
> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c 
> b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> index 690ab97d52b7..071c48d672c6 100644
> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx 
> *ticket,
>       if (list_empty(list))
>               return;
>  
> -     spin_lock(&ttm_glob.lru_lock);
>       list_for_each_entry(entry, list, head) {
>               struct ttm_buffer_object *bo = entry->bo;
>  
> -             ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> +             ttm_bo_move_to_lru_tail_unlocked(bo);
>               dma_resv_unlock(bo->base.resv);
>       }
> -     spin_unlock(&ttm_glob.lru_lock);
>  
>       if (ticket)
>               ww_acquire_fini(ticket);
> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx 
> *ticket,
>       if (list_empty(list))
>               return;
>  
> -     spin_lock(&ttm_glob.lru_lock);
>       list_for_each_entry(entry, list, head) {
>               struct ttm_buffer_object *bo = entry->bo;
>  
> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx 
> *ticket,
>                       dma_resv_add_shared_fence(bo->base.resv, fence);
>               else
>                       dma_resv_add_excl_fence(bo->base.resv, fence);
> -             ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> +             ttm_bo_move_to_lru_tail_unlocked(bo);
>               dma_resv_unlock(bo->base.resv);
>       }
> -     spin_unlock(&ttm_glob.lru_lock);
>       if (ticket)
>               ww_acquire_fini(ticket);
>  }
> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
> b/drivers/gpu/drm/ttm/ttm_resource.c
> index ed1672a9f332..04f2eef653ab 100644
> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>               .no_wait_gpu = false,
>               .force_alloc = true
>       };
> -     struct ttm_global *glob = &ttm_glob;
>       struct dma_fence *fence;
>       int ret;
>       unsigned i;
> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device 
> *bdev,
>        * Can't use standard list traversal since we're unlocking.
>        */
>  
> -     spin_lock(&glob->lru_lock);
> +     spin_lock(&bdev->lru_lock);
>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>               while (!list_empty(&man->lru[i])) {
> -                     spin_unlock(&glob->lru_lock);
> +                     spin_unlock(&bdev->lru_lock);
>                       ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
>                                                 NULL);
>                       if (ret)
>                               return ret;
> -                     spin_lock(&glob->lru_lock);
> +                     spin_lock(&bdev->lru_lock);
>               }
>       }
> -     spin_unlock(&glob->lru_lock);
> +     spin_unlock(&bdev->lru_lock);
>  
>       spin_lock(&man->move_lock);
>       fence = dma_fence_get(man->move);
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index d007feef7676..dbccac957f8f 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct 
> ttm_buffer_object *bo,
>  static inline void
>  ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
>  {
> -     spin_lock(&ttm_glob.lru_lock);
> +     spin_lock(&bo->bdev->lru_lock);
>       ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> -     spin_unlock(&ttm_glob.lru_lock);
> +     spin_unlock(&bo->bdev->lru_lock);
>  }
>  
>  static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index cda6efb4c34b..bae56d29e8ff 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -56,7 +56,6 @@ extern struct ttm_global {
>        */
>  
>       struct page *dummy_read_page;
> -     spinlock_t lru_lock;
>  
>       /**
>        * Protected by ttm_global_mutex.
> @@ -277,8 +276,9 @@ struct ttm_device {
>       struct ttm_pool pool;
>  
>       /*
> -      * Protected by the global:lru lock.
> +      * Protection for the per manager LRU and ddestroy lists.
>        */
> +     spinlock_t lru_lock;
>       struct list_head ddestroy;
>  
>       /*
> -- 
> 2.25.1
> 
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to