Avoid constant register reloads while emitting IBs by using a local write pointer and only updating the size at the end of each helper.
Signed-off-by: Tvrtko Ursulin <[email protected]> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 51 +++++++++++++------------ 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 8058ea91ecaf..884c091f40ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -354,6 +354,7 @@ static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev, int r, i; uint32_t total_size, shader_offset; u64 gpu_addr; + u32 *ptr; total_size = (regs_size * 3 + 4 + 5 + 5) * 4; total_size = ALIGN(total_size, 256); @@ -370,43 +371,43 @@ static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev, } /* load the compute shaders */ - for (i = 0; i < shader_size/sizeof(u32); i++) - ib->ptr[i + (shader_offset / 4)] = shader_ptr[i]; + memcpy(&ib->ptr[shader_offset / sizeof(u32)], shader_ptr, shader_size); - /* init the ib length to 0 */ - ib->length_dw = 0; + ptr = ib->ptr; /* write the register state for the compute dispatch */ for (i = 0; i < regs_size; i++) { - ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); - ib->ptr[ib->length_dw++] = SOC15_REG_ENTRY_OFFSET(init_regs[i]) - - PACKET3_SET_SH_REG_START; - ib->ptr[ib->length_dw++] = init_regs[i].reg_value; + *ptr++ = PACKET3(PACKET3_SET_SH_REG, 1); + *ptr++ = SOC15_REG_ENTRY_OFFSET(init_regs[i]) - + PACKET3_SET_SH_REG_START; + *ptr++ = init_regs[i].reg_value; } /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ gpu_addr = (ib->gpu_addr + (u64)shader_offset) >> 8; - ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); - ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_PGM_LO) - - PACKET3_SET_SH_REG_START; - ib->ptr[ib->length_dw++] = lower_32_bits(gpu_addr); - ib->ptr[ib->length_dw++] = upper_32_bits(gpu_addr); + *ptr++ = PACKET3(PACKET3_SET_SH_REG, 2); + *ptr++ = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + *ptr++ = lower_32_bits(gpu_addr); + *ptr++ = upper_32_bits(gpu_addr); /* write the wb buffer address */ - ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 3); - ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_USER_DATA_0) - - PACKET3_SET_SH_REG_START; - ib->ptr[ib->length_dw++] = lower_32_bits(wb_gpu_addr); - ib->ptr[ib->length_dw++] = upper_32_bits(wb_gpu_addr); - ib->ptr[ib->length_dw++] = pattern; + *ptr++ = PACKET3(PACKET3_SET_SH_REG, 3); + *ptr++ = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_USER_DATA_0) - + PACKET3_SET_SH_REG_START; + *ptr++ = lower_32_bits(wb_gpu_addr); + *ptr++ = upper_32_bits(wb_gpu_addr); + *ptr++ = pattern; /* write dispatch packet */ - ib->ptr[ib->length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib->ptr[ib->length_dw++] = compute_dim_x; /* x */ - ib->ptr[ib->length_dw++] = 1; /* y */ - ib->ptr[ib->length_dw++] = 1; /* z */ - ib->ptr[ib->length_dw++] = - REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + *ptr++ = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + *ptr++ = compute_dim_x; /* x */ + *ptr++ = 1; /* y */ + *ptr++ = 1; /* z */ + *ptr++ = REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, + 1); + + ib->length_dw = ptr - ib->ptr; /* shedule the ib on the ring */ r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr); -- 2.48.0
