Module: Mesa
Branch: main
Commit: a94319d29b466c48ff0e00d6f986e36b99d2ed7b
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a94319d29b466c48ff0e00d6f986e36b99d2ed7b

Author: Marek Olšák <marek.ol...@amd.com>
Date:   Sun Jan  7 15:11:26 2024 -0500

winsys/amdgpu: don't clear buffer list elements after IB submission

amdgpu_winsys_bo_reference(dst, NULL) sets *dst to NULL, but we never read
*dst again because we set num_buffers to 0. So don't touch the buffer list
elements and only decrement the BO refcount. It makes a difference when you
have 10k BOs.

The CS thread overhead in VP2020/Catia1: 9.23% -> 8.74%

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-pra...@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27060>

---

 src/gallium/include/winsys/radeon_winsys.h | 16 ++++++++++++++++
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.h  | 14 ++++++++++++++
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c  |  8 ++++----
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/src/gallium/include/winsys/radeon_winsys.h 
b/src/gallium/include/winsys/radeon_winsys.h
index 22d10cbe370..02f7ee48d6d 100644
--- a/src/gallium/include/winsys/radeon_winsys.h
+++ b/src/gallium/include/winsys/radeon_winsys.h
@@ -792,6 +792,22 @@ radeon_bo_reference(struct radeon_winsys *rws, struct 
pb_buffer_lean **dst,
    *dst = src;
 }
 
+/* Same as radeon_bo_reference, but ignore the value in *dst. */
+static inline void
+radeon_bo_set_reference(struct pb_buffer_lean **dst, struct pb_buffer_lean 
*src)
+{
+   *dst = src;
+   pipe_reference(NULL, &src->reference); /* only increment refcount */
+}
+
+/* Unreference dst, but don't assign anything. */
+static inline void
+radeon_bo_drop_reference(struct radeon_winsys *rws, struct pb_buffer_lean *dst)
+{
+   if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
+      rws->buffer_destroy(rws, dst);
+}
+
 /* The following bits describe the heaps managed by slab allocators (pb_slab) 
and
  * the allocation cache (pb_cache).
  */
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
index 9d2bb04f66e..1fcb233d921 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
@@ -265,4 +265,18 @@ amdgpu_winsys_bo_reference(struct amdgpu_winsys *ws, 
struct amdgpu_winsys_bo **d
                        (struct pb_buffer_lean**)dst, (struct 
pb_buffer_lean*)src);
 }
 
+/* Same as amdgpu_winsys_bo_reference, but ignore the value in *dst. */
+static inline void
+amdgpu_winsys_bo_set_reference(struct amdgpu_winsys_bo **dst, struct 
amdgpu_winsys_bo *src)
+{
+   radeon_bo_set_reference((struct pb_buffer_lean**)dst, (struct 
pb_buffer_lean*)src);
+}
+
+/* Unreference dst, but don't assign anything. */
+static inline void
+amdgpu_winsys_bo_drop_reference(struct amdgpu_winsys *ws, struct 
amdgpu_winsys_bo *dst)
+{
+   radeon_bo_drop_reference(&ws->dummy_ws.base, &dst->base);
+}
+
 #endif
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 3bef8a1ed28..4b52c99894a 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -617,7 +617,7 @@ amdgpu_do_add_buffer(struct amdgpu_cs_context *cs, struct 
amdgpu_winsys_bo *bo,
    struct amdgpu_cs_buffer *buffer = &list->buffers[idx];
 
    memset(buffer, 0, sizeof(*buffer));
-   amdgpu_winsys_bo_reference(cs->ws, &buffer->bo, bo);
+   amdgpu_winsys_bo_set_reference(&buffer->bo, bo);
    list->num_buffers++;
 
    unsigned hash = bo->unique_id & (BUFFER_HASHLIST_SIZE-1);
@@ -882,7 +882,7 @@ static void amdgpu_cs_context_cleanup_buffers(struct 
amdgpu_winsys *ws, struct a
       unsigned num_buffers = cs->buffer_lists[i].num_buffers;
 
       for (unsigned j = 0; j < num_buffers; j++)
-         amdgpu_winsys_bo_reference(ws, &buffers[j].bo, NULL);
+         amdgpu_winsys_bo_drop_reference(ws, buffers[j].bo);
 
       cs->buffer_lists[i].num_buffers = 0;
    }
@@ -1638,10 +1638,10 @@ cleanup:
 
       for (i = 0; i < num_dec_buffers; i++) {
          p_atomic_dec(&buffers[i].bo->num_active_ioctls);
-         amdgpu_winsys_bo_reference(ws, &buffers[i].bo, NULL);
+         amdgpu_winsys_bo_drop_reference(ws, buffers[i].bo);
       }
       for (; i < num_buffers; i++)
-         amdgpu_winsys_bo_reference(ws, &buffers[i].bo, NULL);
+         amdgpu_winsys_bo_drop_reference(ws, buffers[i].bo);
 
       cs->buffer_lists[list].num_buffers = 0;
    }

Reply via email to