[Public] When a bo_list handle was reused across multi command submission, reusing of those allocated HMM range structure can avoid redundant allocate/free operations on each submission. Doing this way benefits the amdgpu_cs_parser_bos time, especially for large bo_list
Signed-off-by: Wang, Beyond <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 +++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 19 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h | 2 ++ 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 66fb37b64388..9c662369d292 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -51,8 +51,10 @@ static void amdgpu_bo_list_free(struct kref *ref) refcount); struct amdgpu_bo_list_entry *e; - amdgpu_bo_list_for_each_entry(e, list) + amdgpu_bo_list_for_each_entry(e, list) { + amdgpu_hmm_range_free(e->range); amdgpu_bo_unref(&e->bo); + } call_rcu(&list->rhead, amdgpu_bo_list_free_rcu); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ecdfe6cb36cc..fc195fa2c0c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -891,9 +891,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, bool userpage_invalidated = false; struct amdgpu_bo *bo = e->bo; - e->range = amdgpu_hmm_range_alloc(NULL); - if (unlikely(!e->range)) - return -ENOMEM; + if (!e->range) { + e->range = amdgpu_hmm_range_alloc(NULL); + if (unlikely(!e->range)) + return -ENOMEM; + } else { + amdgpu_hmm_range_reset(e->range); + } r = amdgpu_ttm_tt_get_user_pages(bo, e->range); if (r) @@ -995,8 +999,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, out_free_user_pages: amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - amdgpu_hmm_range_free(e->range); - e->range = NULL; + amdgpu_hmm_range_reset(e->range); } mutex_unlock(&p->bo_list->bo_list_mutex); return r; @@ -1327,8 +1330,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, r = 0; amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { r |= !amdgpu_hmm_range_valid(e->range); - amdgpu_hmm_range_free(e->range); - e->range = NULL; + amdgpu_hmm_range_reset(e->range); } if (r) { r = -EAGAIN; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index 90d26d820bac..5b72ea5a3db7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -273,6 +273,25 @@ struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo) return range; } +/** + * amdgpu_hmm_range_reset - reset an AMDGPU HMM range + * @range: pointer to the range object to reset + * + * Free the hmm_pfns associated with range, but keep the allocated struct range + * for reuse, in order to avoid repeated allocation/free overhead when the same + * bo_list handle reused across multiple command submissions. + * + * Return: void + */ +void amdgpu_hmm_range_reset(struct amdgpu_hmm_range *range) +{ + if (!range) + return; + + kvfree(range->hmm_range.hmm_pfns); + range->hmm_range.hmm_pfns = NULL; +} + /** * amdgpu_hmm_range_free - release an AMDGPU HMM range * @range: pointer to the range object to free diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h index 140bc9cd57b4..558f3f22c617 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h @@ -44,6 +44,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, #if defined(CONFIG_HMM_MIRROR) bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range); struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo); +void amdgpu_hmm_range_reset(struct amdgpu_hmm_range *range); void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range); int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_hmm_unregister(struct amdgpu_bo *bo); @@ -67,6 +68,7 @@ static inline struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo * return NULL; } +static inline void amdgpu_hmm_range_reset(struct amdgpu_hmm_range *range) {} static inline void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range) {} #endif -- 2.43.0
