[Public]

When a bo_list handle was reused across multi command submission, reusing
of those allocated HMM range structure can avoid redundant allocate/free
operations on each submission.
Doing this way benefits the amdgpu_cs_parser_bos time, especially for
large bo_list

Signed-off-by: Wang, Beyond <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c |  4 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 16 +++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c     | 19 +++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h     |  2 ++
 4 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 66fb37b64388..9c662369d292 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -51,8 +51,10 @@ static void amdgpu_bo_list_free(struct kref *ref)
                           refcount);
    struct amdgpu_bo_list_entry *e;

-   amdgpu_bo_list_for_each_entry(e, list)
+   amdgpu_bo_list_for_each_entry(e, list) {
+       amdgpu_hmm_range_free(e->range);
        amdgpu_bo_unref(&e->bo);
+   }
    call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
 }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ecdfe6cb36cc..fc195fa2c0c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -891,9 +891,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
        bool userpage_invalidated = false;
        struct amdgpu_bo *bo = e->bo;

-       e->range = amdgpu_hmm_range_alloc(NULL);
-       if (unlikely(!e->range))
-           return -ENOMEM;
+       if (!e->range) {
+           e->range = amdgpu_hmm_range_alloc(NULL);
+           if (unlikely(!e->range))
+               return -ENOMEM;
+       } else {
+           amdgpu_hmm_range_reset(e->range);
+       }

        r = amdgpu_ttm_tt_get_user_pages(bo, e->range);
        if (r)
@@ -995,8 +999,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,

 out_free_user_pages:
    amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-       amdgpu_hmm_range_free(e->range);
-       e->range = NULL;
+       amdgpu_hmm_range_reset(e->range);
    }
    mutex_unlock(&p->bo_list->bo_list_mutex);
    return r;
@@ -1327,8 +1330,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
    r = 0;
    amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
        r |= !amdgpu_hmm_range_valid(e->range);
-       amdgpu_hmm_range_free(e->range);
-       e->range = NULL;
+       amdgpu_hmm_range_reset(e->range);
    }
    if (r) {
        r = -EAGAIN;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index 90d26d820bac..5b72ea5a3db7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -273,6 +273,25 @@ struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct 
amdgpu_bo *bo)
    return range;
 }

+/**
+ * amdgpu_hmm_range_reset - reset an AMDGPU HMM range
+ * @range: pointer to the range object to reset
+ *
+ * Free the hmm_pfns associated with range, but keep the allocated struct range
+ * for reuse, in order to avoid repeated allocation/free overhead when the same
+ * bo_list handle reused across multiple command submissions.
+ *
+ * Return: void
+ */
+void amdgpu_hmm_range_reset(struct amdgpu_hmm_range *range)
+{
+   if (!range)
+       return;
+
+   kvfree(range->hmm_range.hmm_pfns);
+   range->hmm_range.hmm_pfns = NULL;
+}
+
 /**
  * amdgpu_hmm_range_free - release an AMDGPU HMM range
  * @range: pointer to the range object to free
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
index 140bc9cd57b4..558f3f22c617 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
@@ -44,6 +44,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier 
*notifier,
 #if defined(CONFIG_HMM_MIRROR)
 bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range);
 struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo);
+void amdgpu_hmm_range_reset(struct amdgpu_hmm_range *range);
 void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range);
 int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_hmm_unregister(struct amdgpu_bo *bo);
@@ -67,6 +68,7 @@ static inline struct amdgpu_hmm_range 
*amdgpu_hmm_range_alloc(struct amdgpu_bo *
    return NULL;
 }

+static inline void amdgpu_hmm_range_reset(struct amdgpu_hmm_range *range) {}
 static inline void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range) {}
 #endif

--
2.43.0

Reply via email to