amdgpu: optimize out a spin lock Use atomic instead of spin lock.

Alex Xie Wed, 21 Jun 2017 19:43:59 -0700

Signed-off-by: Alex Xie <alexbin....@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |   5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 110 +++++++++++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   1 -
 3 files changed, 76 insertions(+), 40 deletions(-)


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 7caf514..21d318b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1588,9 +1588,8 @@ struct amdgpu_device {
 
        /* data for buffer migration throttling */
        struct {
-               spinlock_t              lock;
-               s64                     last_update_us;
-               s64                     accum_us; /* accumulated microseconds */
+               atomic64_t              last_update_us;
+               atomic64_t              accum_us; /* accumulated microseconds */
                u32                     log2_max_MBps;
        } mm_stats;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 82131d7..7b6f42e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -225,6 +225,9 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct 
amdgpu_device *adev)
        s64 time_us, increment_us;
        u64 max_bytes;
        u64 free_vram, total_vram, used_vram;
+       s64 old_update_us, head_time_us;
+       s64 accum_us;
+       s64 old_accum_us, head_accum_us;
 
        /* Allow a maximum of 200 accumulated ms. This is basically per-IB
         * throttling.
@@ -242,47 +245,83 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct 
amdgpu_device *adev)
        used_vram = atomic64_read(&adev->vram_usage);
        free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
 
-       spin_lock(&adev->mm_stats.lock);
-
        /* Increase the amount of accumulated us. */
-       time_us = ktime_to_us(ktime_get());
-       increment_us = time_us - adev->mm_stats.last_update_us;
-       adev->mm_stats.last_update_us = time_us;
-       adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
-                                      us_upper_bound);
-
-       /* This prevents the short period of low performance when the VRAM
-        * usage is low and the driver is in debt or doesn't have enough
-        * accumulated us to fill VRAM quickly.
-        *
-        * The situation can occur in these cases:
-        * - a lot of VRAM is freed by userspace
-        * - the presence of a big buffer causes a lot of evictions
-        *   (solution: split buffers into smaller ones)
-        *
-        * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
-        * accum_us to a positive number.
-        */
-       if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
-               s64 min_us;
-
-               /* Be more aggresive on dGPUs. Try to fill a portion of free
-                * VRAM now.
-                */
-               if (!(adev->flags & AMD_IS_APU))
-                       min_us = bytes_to_us(adev, free_vram / 4);
+       old_update_us = atomic64_read(&adev->mm_stats.last_update_us);
+       for (;;) {
+               time_us = ktime_to_us(ktime_get());
+               head_time_us = atomic64_cmpxchg(&adev->mm_stats.last_update_us,
+                                               old_update_us, time_us);
+
+               if (likely(head_time_us == old_update_us))
+                       /*
+                        * No other task modified adev->mm_stats.last_update_us.
+                        * Update was successful.
+                        */
+                       break;
                else
-                       min_us = 0; /* Reset accum_us on APUs. */
+                       /* Another task modified the value after we read it.
+                        * A rare contention happens, let us retry.
+                        * In most case, one retry can do the job.
+                        * See function atomic64_add_unless as a similar idea.
+                        */
+                       old_update_us = head_time_us;
+       }
+       increment_us = time_us - old_update_us;
+
+       old_accum_us = atomic64_read(&adev->mm_stats.accum_us);
+
+       for (;;) {
+               accum_us = min(old_accum_us + increment_us, us_upper_bound);
+
+               /* This prevents the short period of low performance when the
+                * VRAM usage is low and the driver is in debt or doesn't have
+                * enough accumulated us to fill VRAM quickly.
+                *
+                * The situation can occur in these cases:
+                * - a lot of VRAM is freed by userspace
+                * - the presence of a big buffer causes a lot of evictions
+                *   (solution: split buffers into smaller ones)
+                *
+                * If 128 MB or 1/8th of VRAM is free, start filling it now by
+                * setting accum_us to a positive number.
+                */
+               if (free_vram >= 128 * 1024 * 1024 ||
+                       free_vram >= total_vram / 8) {
+                       s64 min_us;
+
+                       /* Be more aggresive on dGPUs. Try to fill a portion of
+                        * free VRAM now.
+                        */
+                       if (!(adev->flags & AMD_IS_APU))
+                               min_us = bytes_to_us(adev, free_vram / 4);
+                       else
+                               min_us = 0; /* Reset accum_us on APUs. */
+
+                       accum_us = max(min_us, accum_us);
+               }
+
+               head_accum_us = atomic64_cmpxchg(&adev->mm_stats.accum_us,
+                                                       old_accum_us, accum_us);
 
-               adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
+               if (likely(head_accum_us == old_accum_us))
+                       /*
+                        * No other task modified adev->mm_stats.accum_us.
+                        * Update was successful.
+                        */
+                       break;
+               else
+                       /* Another task modified the value after we read it.
+                        * A rare contention happens, let us retry.
+                        * In most case, one retry can do the job.
+                        * See function atomic64_add_unless as a similar idea.
+                        */
+                       old_accum_us = head_accum_us;
        }
 
        /* This returns 0 if the driver is in debt to disallow (optional)
         * buffer moves.
         */
-       max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
-
-       spin_unlock(&adev->mm_stats.lock);
+       max_bytes = us_to_bytes(adev, accum_us);
        return max_bytes;
 }
 
@@ -292,9 +331,8 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct 
amdgpu_device *adev)
  */
 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes)
 {
-       spin_lock(&adev->mm_stats.lock);
-       adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
-       spin_unlock(&adev->mm_stats.lock);
+       s64 i = bytes_to_us(adev, num_bytes);
+       atomic64_sub(i, &adev->mm_stats.accum_us);
 }
 
 static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index ff90f78..9e9d592 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2117,7 +2117,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        spin_lock_init(&adev->didt_idx_lock);
        spin_lock_init(&adev->gc_cac_idx_lock);
        spin_lock_init(&adev->audio_endpt_idx_lock);
-       spin_lock_init(&adev->mm_stats.lock);
 
        INIT_LIST_HEAD(&adev->shadow_list);
        mutex_init(&adev->shadow_list_lock);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/3] drm/amdgpu: optimize out a spin lock Use atomic instead of spin lock.

Reply via email to