Avoid constant register reloads while emitting IBs by using a local write pointer and only updating the size at the end of each helper.uvd 6
Signed-off-by: Tvrtko Ursulin <[email protected]> --- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 72 ++++++++++++++------------- 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index ceb94bbb03a4..0175bdae6f5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -214,7 +214,8 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle struct amdgpu_ib *ib; struct dma_fence *f = NULL; uint64_t addr; - int i, r; + u32 *ptr; + int r; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, &job, @@ -223,27 +224,28 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle return r; ib = &job->ibs[0]; + ptr = ib->ptr; addr = amdgpu_bo_gpu_offset(bo); - ib->length_dw = 0; - ib->ptr[ib->length_dw++] = 0x00000018; - ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ - ib->ptr[ib->length_dw++] = handle; - ib->ptr[ib->length_dw++] = 0x00010000; - ib->ptr[ib->length_dw++] = upper_32_bits(addr); - ib->ptr[ib->length_dw++] = addr; + *ptr++ = 0x00000018; + *ptr++ = 0x00000001; /* session info */ + *ptr++ = handle; + *ptr++ = 0x00010000; + *ptr++ = upper_32_bits(addr); + *ptr++ = addr; - ib->ptr[ib->length_dw++] = 0x00000014; - ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ - ib->ptr[ib->length_dw++] = 0x0000001c; - ib->ptr[ib->length_dw++] = 0x00000001; - ib->ptr[ib->length_dw++] = 0x00000000; + *ptr++ = 0x00000014; + *ptr++ = 0x00000002; /* task info */ + *ptr++ = 0x0000001c; + *ptr++ = 0x00000001; + *ptr++ = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000008; - ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ + *ptr++ = 0x00000008; + *ptr++ = 0x08000001; /* op initialize */ - for (i = ib->length_dw; i < ib_size_dw; ++i) - ib->ptr[i] = 0x0; + ib->length_dw = ptr - ib->ptr; + + memset32(ptr, 0, ib_size_dw - ib->length_dw); r = amdgpu_job_submit_direct(job, ring, &f); if (r) @@ -279,7 +281,8 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, struct amdgpu_ib *ib; struct dma_fence *f = NULL; uint64_t addr; - int i, r; + u32 *ptr; + int r; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, &job, @@ -288,27 +291,28 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, return r; ib = &job->ibs[0]; + ptr = ib->ptr; addr = amdgpu_bo_gpu_offset(bo); - ib->length_dw = 0; - ib->ptr[ib->length_dw++] = 0x00000018; - ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ - ib->ptr[ib->length_dw++] = handle; - ib->ptr[ib->length_dw++] = 0x00010000; - ib->ptr[ib->length_dw++] = upper_32_bits(addr); - ib->ptr[ib->length_dw++] = addr; + *ptr++ = 0x00000018; + *ptr++ = 0x00000001; /* session info */ + *ptr++ = handle; + *ptr++ = 0x00010000; + *ptr++ = upper_32_bits(addr); + *ptr++ = addr; - ib->ptr[ib->length_dw++] = 0x00000014; - ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ - ib->ptr[ib->length_dw++] = 0x0000001c; - ib->ptr[ib->length_dw++] = 0x00000001; - ib->ptr[ib->length_dw++] = 0x00000000; + *ptr++ = 0x00000014; + *ptr++ = 0x00000002; /* task info */ + *ptr++ = 0x0000001c; + *ptr++ = 0x00000001; + *ptr++ = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000008; - ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ + *ptr++ = 0x00000008; + *ptr++ = 0x08000002; /* op close session */ - for (i = ib->length_dw; i < ib_size_dw; ++i) - ib->ptr[i] = 0x0; + ib->length_dw = ptr - ib->ptr; + + memset32(ptr, 0, ib_size_dw - ib->length_dw); r = amdgpu_job_submit_direct(job, ring, &f); if (r) -- 2.48.0
