Avoid constant register reloads while emitting IBs by using a local write pointer and only updating the size at the end of each helper.
Signed-off-by: Tvrtko Ursulin <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 96 +++++++++++++------------ 1 file changed, 50 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index ce318f5de047..62f549dbf827 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -444,7 +444,8 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, struct amdgpu_ib ib_msg; struct dma_fence *f = NULL; uint64_t addr; - int i, r; + u32 *ptr; + int r; r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, AMDGPU_FENCE_OWNER_UNDEFINED, @@ -462,45 +463,46 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, goto err; ib = &job->ibs[0]; + ptr = ib->ptr; /* let addr point to page boundary */ addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg.gpu_addr); /* stitch together an VCE create msg */ - ib->length_dw = 0; - ib->ptr[ib->length_dw++] = 0x0000000c; /* len */ - ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */ - ib->ptr[ib->length_dw++] = handle; + *ptr++ = 0x0000000c; /* len */ + *ptr++ = 0x00000001; /* session cmd */ + *ptr++ = handle; if ((ring->adev->vce.fw_version >> 24) >= 52) - ib->ptr[ib->length_dw++] = 0x00000040; /* len */ + *ptr++ = 0x00000040; /* len */ else - ib->ptr[ib->length_dw++] = 0x00000030; /* len */ - ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */ - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000042; - ib->ptr[ib->length_dw++] = 0x0000000a; - ib->ptr[ib->length_dw++] = 0x00000001; - ib->ptr[ib->length_dw++] = 0x00000080; - ib->ptr[ib->length_dw++] = 0x00000060; - ib->ptr[ib->length_dw++] = 0x00000100; - ib->ptr[ib->length_dw++] = 0x00000100; - ib->ptr[ib->length_dw++] = 0x0000000c; - ib->ptr[ib->length_dw++] = 0x00000000; + *ptr++ = 0x00000030; /* len */ + *ptr++ = 0x01000001; /* create cmd */ + *ptr++ = 0x00000000; + *ptr++ = 0x00000042; + *ptr++ = 0x0000000a; + *ptr++ = 0x00000001; + *ptr++ = 0x00000080; + *ptr++ = 0x00000060; + *ptr++ = 0x00000100; + *ptr++ = 0x00000100; + *ptr++ = 0x0000000c; + *ptr++ = 0x00000000; if ((ring->adev->vce.fw_version >> 24) >= 52) { - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000000; + *ptr++ = 0x00000000; + *ptr++ = 0x00000000; + *ptr++ = 0x00000000; + *ptr++ = 0x00000000; } - ib->ptr[ib->length_dw++] = 0x00000014; /* len */ - ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */ - ib->ptr[ib->length_dw++] = upper_32_bits(addr); - ib->ptr[ib->length_dw++] = addr; - ib->ptr[ib->length_dw++] = 0x00000001; + *ptr++ = 0x00000014; /* len */ + *ptr++ = 0x05000005; /* feedback buffer */ + *ptr++ = upper_32_bits(addr); + *ptr++ = addr; + *ptr++ = 0x00000001; - for (i = ib->length_dw; i < ib_size_dw; ++i) - ib->ptr[i] = 0x0; + ib->length_dw = ptr - ib->ptr; + + memset32(ptr, 0, ib_size_dw - ib->length_dw); r = amdgpu_job_submit_direct(job, ring, &f); amdgpu_ib_free(&ib_msg, f); @@ -534,7 +536,8 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; - int i, r; + u32 *ptr; + int r; r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, AMDGPU_FENCE_OWNER_UNDEFINED, @@ -546,27 +549,28 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, return r; ib = &job->ibs[0]; + ptr = ib->ptr; /* stitch together an VCE destroy msg */ - ib->length_dw = 0; - ib->ptr[ib->length_dw++] = 0x0000000c; /* len */ - ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */ - ib->ptr[ib->length_dw++] = handle; + *ptr++ = 0x0000000c; /* len */ + *ptr++ = 0x00000001; /* session cmd */ + *ptr++ = handle; - ib->ptr[ib->length_dw++] = 0x00000020; /* len */ - ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ - ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */ - ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */ - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */ - ib->ptr[ib->length_dw++] = 0x00000000; + *ptr++ = 0x00000020; /* len */ + *ptr++ = 0x00000002; /* task info */ + *ptr++ = 0xffffffff; /* next task info, set to 0xffffffff if no */ + *ptr++ = 0x00000001; /* destroy session */ + *ptr++ = 0x00000000; + *ptr++ = 0x00000000; + *ptr++ = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */ + *ptr++ = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000008; /* len */ - ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */ + *ptr++ = 0x00000008; /* len */ + *ptr++ = 0x02000001; /* destroy cmd */ - for (i = ib->length_dw; i < ib_size_dw; ++i) - ib->ptr[i] = 0x0; + ib->length_dw = ptr - ib->ptr; + + memset32(ptr, 0, ib_size_dw - ib->length_dw); if (direct) r = amdgpu_job_submit_direct(job, ring, &f); -- 2.48.0
