Avoid constant register reloads while emitting IBs by using a local write pointer and only updating the size at the end of each helper.
Signed-off-by: Tvrtko Ursulin <[email protected]> --- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 72 ++++++++++++++------------- 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 1f8866f3f63c..f4621c114f08 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -222,7 +222,8 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, u32 handle, struct amdgpu_ib *ib; struct dma_fence *f = NULL; uint64_t addr; - int i, r; + u32 *ptr; + int r; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, &job, @@ -231,27 +232,28 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, u32 handle, return r; ib = &job->ibs[0]; + ptr = ib->ptr; addr = amdgpu_bo_gpu_offset(bo); - ib->length_dw = 0; - ib->ptr[ib->length_dw++] = 0x00000018; - ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ - ib->ptr[ib->length_dw++] = handle; - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = upper_32_bits(addr); - ib->ptr[ib->length_dw++] = addr; + *ptr++ = 0x00000018; + *ptr++ = 0x00000001; /* session info */ + *ptr++ = handle; + *ptr++ = 0x00000000; + *ptr++ = upper_32_bits(addr); + *ptr++ = addr; - ib->ptr[ib->length_dw++] = 0x00000014; - ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ - ib->ptr[ib->length_dw++] = 0x0000001c; - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000000; + *ptr++ = 0x00000014; + *ptr++ = 0x00000002; /* task info */ + *ptr++ = 0x0000001c; + *ptr++ = 0x00000000; + *ptr++ = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000008; - ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ + *ptr++ = 0x00000008; + *ptr++ = 0x08000001; /* op initialize */ - for (i = ib->length_dw; i < ib_size_dw; ++i) - ib->ptr[i] = 0x0; + ib->length_dw = ptr - ib->ptr; + + memset32(ptr, 0, ib_size_dw - ib->length_dw); r = amdgpu_job_submit_direct(job, ring, &f); if (r) @@ -286,7 +288,8 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, u32 handle, struct amdgpu_ib *ib; struct dma_fence *f = NULL; uint64_t addr; - int i, r; + u32 *ptr; + int r; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, &job, @@ -295,27 +298,28 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, u32 handle, return r; ib = &job->ibs[0]; + ptr = ib->ptr; addr = amdgpu_bo_gpu_offset(bo); - ib->length_dw = 0; - ib->ptr[ib->length_dw++] = 0x00000018; - ib->ptr[ib->length_dw++] = 0x00000001; - ib->ptr[ib->length_dw++] = handle; - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = upper_32_bits(addr); - ib->ptr[ib->length_dw++] = addr; + *ptr++ = 0x00000018; + *ptr++ = 0x00000001; + *ptr++ = handle; + *ptr++ = 0x00000000; + *ptr++ = upper_32_bits(addr); + *ptr++ = addr; - ib->ptr[ib->length_dw++] = 0x00000014; - ib->ptr[ib->length_dw++] = 0x00000002; - ib->ptr[ib->length_dw++] = 0x0000001c; - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000000; + *ptr++ = 0x00000014; + *ptr++ = 0x00000002; + *ptr++ = 0x0000001c; + *ptr++ = 0x00000000; + *ptr++ = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000008; - ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ + *ptr++ = 0x00000008; + *ptr++ = 0x08000002; /* op close session */ - for (i = ib->length_dw; i < ib_size_dw; ++i) - ib->ptr[i] = 0x0; + ib->length_dw = ptr - ib->ptr; + + memset32(ptr, 0, ib_size_dw - ib->length_dw); r = amdgpu_job_submit_direct(job, ring, &f); if (r) -- 2.48.0
