Avoid constant register reloads while emitting IBs by using a local write
pointer and only updating the size at the end of each helper.

Signed-off-by: Tvrtko Ursulin <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 72 ++++++++++++++-------------
 1 file changed, 38 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 1f8866f3f63c..f4621c114f08 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -222,7 +222,8 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring 
*ring, u32 handle,
        struct amdgpu_ib *ib;
        struct dma_fence *f = NULL;
        uint64_t addr;
-       int i, r;
+       u32 *ptr;
+       int r;
 
        r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
                                     AMDGPU_IB_POOL_DIRECT, &job,
@@ -231,27 +232,28 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring 
*ring, u32 handle,
                return r;
 
        ib = &job->ibs[0];
+       ptr = ib->ptr;
        addr = amdgpu_bo_gpu_offset(bo);
 
-       ib->length_dw = 0;
-       ib->ptr[ib->length_dw++] = 0x00000018;
-       ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
-       ib->ptr[ib->length_dw++] = handle;
-       ib->ptr[ib->length_dw++] = 0x00000000;
-       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
-       ib->ptr[ib->length_dw++] = addr;
+       *ptr++ = 0x00000018;
+       *ptr++ = 0x00000001; /* session info */
+       *ptr++ = handle;
+       *ptr++ = 0x00000000;
+       *ptr++ = upper_32_bits(addr);
+       *ptr++ = addr;
 
-       ib->ptr[ib->length_dw++] = 0x00000014;
-       ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
-       ib->ptr[ib->length_dw++] = 0x0000001c;
-       ib->ptr[ib->length_dw++] = 0x00000000;
-       ib->ptr[ib->length_dw++] = 0x00000000;
+       *ptr++ = 0x00000014;
+       *ptr++ = 0x00000002; /* task info */
+       *ptr++ = 0x0000001c;
+       *ptr++ = 0x00000000;
+       *ptr++ = 0x00000000;
 
-       ib->ptr[ib->length_dw++] = 0x00000008;
-       ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
+       *ptr++ = 0x00000008;
+       *ptr++ = 0x08000001; /* op initialize */
 
-       for (i = ib->length_dw; i < ib_size_dw; ++i)
-               ib->ptr[i] = 0x0;
+       ib->length_dw = ptr - ib->ptr;
+
+       memset32(ptr, 0, ib_size_dw - ib->length_dw);
 
        r = amdgpu_job_submit_direct(job, ring, &f);
        if (r)
@@ -286,7 +288,8 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring 
*ring, u32 handle,
        struct amdgpu_ib *ib;
        struct dma_fence *f = NULL;
        uint64_t addr;
-       int i, r;
+       u32 *ptr;
+       int r;
 
        r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
                                     AMDGPU_IB_POOL_DIRECT, &job,
@@ -295,27 +298,28 @@ static int uvd_v7_0_enc_get_destroy_msg(struct 
amdgpu_ring *ring, u32 handle,
                return r;
 
        ib = &job->ibs[0];
+       ptr = ib->ptr;
        addr = amdgpu_bo_gpu_offset(bo);
 
-       ib->length_dw = 0;
-       ib->ptr[ib->length_dw++] = 0x00000018;
-       ib->ptr[ib->length_dw++] = 0x00000001;
-       ib->ptr[ib->length_dw++] = handle;
-       ib->ptr[ib->length_dw++] = 0x00000000;
-       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
-       ib->ptr[ib->length_dw++] = addr;
+       *ptr++ = 0x00000018;
+       *ptr++ = 0x00000001;
+       *ptr++ = handle;
+       *ptr++ = 0x00000000;
+       *ptr++ = upper_32_bits(addr);
+       *ptr++ = addr;
 
-       ib->ptr[ib->length_dw++] = 0x00000014;
-       ib->ptr[ib->length_dw++] = 0x00000002;
-       ib->ptr[ib->length_dw++] = 0x0000001c;
-       ib->ptr[ib->length_dw++] = 0x00000000;
-       ib->ptr[ib->length_dw++] = 0x00000000;
+       *ptr++ = 0x00000014;
+       *ptr++ = 0x00000002;
+       *ptr++ = 0x0000001c;
+       *ptr++ = 0x00000000;
+       *ptr++ = 0x00000000;
 
-       ib->ptr[ib->length_dw++] = 0x00000008;
-       ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
+       *ptr++ = 0x00000008;
+       *ptr++ = 0x08000002; /* op close session */
 
-       for (i = ib->length_dw; i < ib_size_dw; ++i)
-               ib->ptr[i] = 0x0;
+       ib->length_dw = ptr - ib->ptr;
+
+       memset32(ptr, 0, ib_size_dw - ib->length_dw);
 
        r = amdgpu_job_submit_direct(job, ring, &f);
        if (r)
-- 
2.48.0

Reply via email to