I know that before, it will issue warning only when debug option is enabled. Removing that is ok to me. I only help Prike draft your idea, and Prike is trying this patch on his side. The latest feedback he gave me is first_bo is always null, code doesn't run into busy path, which is very confusing me, and he said he is debugging that.
-David -------- Original Message -------- Subject: Re: [PATCH 1/2] drm/ttm: fix busy memory to fail other user v7 From: "Koenig, Christian" To: "Zhou, David(ChunMing)" ,"Liang, Prike" ,dri-devel@lists.freedesktop.org CC: I've foudn one more problem with this. With lockdep enabled I get a warning because ttm_eu_reserve_buffers() has called ww_acquire_done() on the ticket (which essentially means we are done, no more locking with that ticket). The simplest solution is probably to just remove the call to ww_acquire_done() from ttm_eu_reserve_buffers(). Christian. Am 07.05.19 um 13:45 schrieb Chunming Zhou: > heavy gpu job could occupy memory long time, which lead other user fail to > get memory. > > basically pick up Christian idea: > > 1. Reserve the BO in DC using a ww_mutex ticket (trivial). > 2. If we then run into this EBUSY condition in TTM check if the BO we need > memory for (or rather the ww_mutex of its reservation object) has a ticket > assigned. > 3. If we have a ticket we grab a reference to the first BO on the LRU, drop > the LRU lock and try to grab the reservation lock with the ticket. > 4. If getting the reservation lock with the ticket succeeded we check if the > BO is still the first one on the LRU in question (the BO could have moved). > 5. If the BO is still the first one on the LRU in question we try to evict it > as we would evict any other BO. > 6. If any of the "If's" above fail we just back off and return -EBUSY. > > v2: fix some minor check > v3: address Christian v2 comments. > v4: fix some missing > v5: handle first_bo unlock and bo_get/put > v6: abstract unified iterate function, and handle all possible usecase not > only pinned bo. > v7: pass request bo->resv to ttm_bo_evict_first > > Change-Id: I21423fb922f885465f13833c41df1e134364a8e7 > Signed-off-by: Chunming Zhou <david1.z...@amd.com> > --- > drivers/gpu/drm/ttm/ttm_bo.c | 111 +++++++++++++++++++++++++++++------ > 1 file changed, 94 insertions(+), 17 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > index 8502b3ed2d88..f5e6328e4a57 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable); > * b. Otherwise, trylock it. > */ > static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, > - struct ttm_operation_ctx *ctx, bool *locked) > + struct ttm_operation_ctx *ctx, bool *locked, bool *busy) > { > bool ret = false; > > *locked = false; > + if (busy) > + *busy = false; > if (bo->resv == ctx->resv) { > reservation_object_assert_held(bo->resv); > if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT > @@ -779,35 +781,46 @@ static bool ttm_bo_evict_swapout_allowable(struct > ttm_buffer_object *bo, > } else { > *locked = reservation_object_trylock(bo->resv); > ret = *locked; > + if (!ret && busy) > + *busy = true; > } > > return ret; > } > > -static int ttm_mem_evict_first(struct ttm_bo_device *bdev, > - uint32_t mem_type, > - const struct ttm_place *place, > - struct ttm_operation_ctx *ctx) > +static struct ttm_buffer_object* > +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev, > + struct ttm_mem_type_manager *man, > + const struct ttm_place *place, > + struct ttm_operation_ctx *ctx, > + struct ttm_buffer_object **first_bo, > + bool *locked) > { > - struct ttm_bo_global *glob = bdev->glob; > - struct ttm_mem_type_manager *man = &bdev->man[mem_type]; > struct ttm_buffer_object *bo = NULL; > - bool locked = false; > - unsigned i; > - int ret; > + int i; > > - spin_lock(&glob->lru_lock); > + if (first_bo) > + *first_bo = NULL; > for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { > list_for_each_entry(bo, &man->lru[i], lru) { > - if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) > + bool busy = false; > + > + if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked, > + &busy)) { > + if (first_bo && !(*first_bo) && busy) { > + ttm_bo_get(bo); > + *first_bo = bo; > + } > continue; > + } > > if (place && !bdev->driver->eviction_valuable(bo, > place)) { > - if (locked) > + if (*locked) > reservation_object_unlock(bo->resv); > continue; > } > + > break; > } > > @@ -818,9 +831,67 @@ static int ttm_mem_evict_first(struct ttm_bo_device > *bdev, > bo = NULL; > } > > + return bo; > +} > + > +static int ttm_mem_evict_first(struct ttm_bo_device *bdev, > + uint32_t mem_type, > + const struct ttm_place *place, > + struct ttm_operation_ctx *ctx, > + struct reservation_object *request_resv) > +{ > + struct ttm_bo_global *glob = bdev->glob; > + struct ttm_mem_type_manager *man = &bdev->man[mem_type]; > + struct ttm_buffer_object *bo = NULL, *first_bo = NULL; > + bool locked = false; > + int ret; > + > + spin_lock(&glob->lru_lock); > + bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo, > + &locked); > if (!bo) { > + struct ttm_operation_ctx busy_ctx; > + > spin_unlock(&glob->lru_lock); > - return -EBUSY; > + /* check if other user occupy memory too long time */ > + if (!first_bo || !request_resv || !request_resv->lock.ctx) { > + if (first_bo) > + ttm_bo_put(first_bo); > + return -EBUSY; > + } > + if (first_bo->resv == request_resv) { > + ttm_bo_put(first_bo); > + return -EBUSY; > + } > + if (ctx->interruptible) > + ret = ww_mutex_lock_interruptible(&first_bo->resv->lock, > + > request_resv->lock.ctx); > + else > + ret = ww_mutex_lock(&first_bo->resv->lock, > request_resv->lock.ctx); > + if (ret) { > + ttm_bo_put(first_bo); > + return ret; > + } > + spin_lock(&glob->lru_lock); > + /* previous busy resv lock is held by above, idle now, > + * so let them evictable. > + */ > + busy_ctx.interruptible = ctx->interruptible; > + busy_ctx.no_wait_gpu = ctx->no_wait_gpu; > + busy_ctx.resv = first_bo->resv; > + busy_ctx.flags = TTM_OPT_FLAG_ALLOW_RES_EVICT; > + > + bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, NULL, > + &locked); > + if (bo && (bo->resv == first_bo->resv)) > + locked = true; > + else if (bo) > + ww_mutex_unlock(&first_bo->resv->lock); > + if (!bo) { > + spin_unlock(&glob->lru_lock); > + ttm_bo_put(first_bo); > + return -EBUSY; > + } > } > > kref_get(&bo->list_kref); > @@ -829,11 +900,15 @@ static int ttm_mem_evict_first(struct ttm_bo_device > *bdev, > ret = ttm_bo_cleanup_refs(bo, ctx->interruptible, > ctx->no_wait_gpu, locked); > kref_put(&bo->list_kref, ttm_bo_release_list); > + if (first_bo) > + ttm_bo_put(first_bo); > return ret; > } > > ttm_bo_del_from_lru(bo); > spin_unlock(&glob->lru_lock); > + if (first_bo) > + ttm_bo_put(first_bo); > > ret = ttm_bo_evict(bo, ctx); > if (locked) { > @@ -907,7 +982,7 @@ static int ttm_bo_mem_force_space(struct > ttm_buffer_object *bo, > return ret; > if (mem->mm_node) > break; > - ret = ttm_mem_evict_first(bdev, mem_type, place, ctx); > + ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, bo->resv); > if (unlikely(ret != 0)) > return ret; > } while (1); > @@ -1413,7 +1488,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device > *bdev, > for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { > while (!list_empty(&man->lru[i])) { > spin_unlock(&glob->lru_lock); > - ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx); > + ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx, > + NULL); > if (ret) > return ret; > spin_lock(&glob->lru_lock); > @@ -1784,7 +1860,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct > ttm_operation_ctx *ctx) > spin_lock(&glob->lru_lock); > for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { > list_for_each_entry(bo, &glob->swap_lru[i], swap) { > - if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) { > + if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked, > + NULL)) { > ret = 0; > break; > }
_______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel