From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_compute.c | 32 ++++++++++++++--------- src/gallium/drivers/radeonsi/si_pipe.h | 3 +++ 2 files changed, 23 insertions(+), 12 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 42caac66884..5ec0c0a5699 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -752,18 +752,14 @@ static void si_setup_tgsi_user_data(struct si_context *sctx, } } -static void si_emit_dispatch_packets(struct si_context *sctx, - const struct pipe_grid_info *info) +unsigned si_get_compute_resource_limits(struct si_screen *sscreen, + unsigned waves_per_threadgroup, + unsigned max_waves_per_sh) { - struct si_screen *sscreen = sctx->screen; - struct radeon_cmdbuf *cs = sctx->gfx_cs; - bool render_cond_bit = sctx->render_cond && !sctx->render_cond_force_off; - unsigned waves_per_threadgroup = - DIV_ROUND_UP(info->block[0] * info->block[1] * info->block[2], 64); unsigned compute_resource_limits = S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0); - if (sctx->chip_class >= CIK) { + if (sscreen->info.chip_class >= CIK) { unsigned num_cu_per_se = sscreen->info.num_good_compute_units / sscreen->info.max_se; @@ -774,17 +770,29 @@ static void si_emit_dispatch_packets(struct si_context *sctx, if (num_cu_per_se % 4 && waves_per_threadgroup == 1) compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1); - compute_resource_limits |= S_00B854_WAVES_PER_SH(sctx->cs_max_waves_per_sh); + compute_resource_limits |= S_00B854_WAVES_PER_SH(max_waves_per_sh); } else { /* SI */ - if (sctx->cs_max_waves_per_sh) { - unsigned limit_div16 = DIV_ROUND_UP(sctx->cs_max_waves_per_sh, 16); + if (max_waves_per_sh) { + unsigned limit_div16 = DIV_ROUND_UP(max_waves_per_sh, 16); compute_resource_limits |= S_00B854_WAVES_PER_SH_SI(limit_div16); } } + return compute_resource_limits; +} + +static void si_emit_dispatch_packets(struct si_context *sctx, + const struct pipe_grid_info *info) +{ + struct si_screen *sscreen = sctx->screen; + struct radeon_cmdbuf *cs = sctx->gfx_cs; + bool render_cond_bit = sctx->render_cond && !sctx->render_cond_force_off; + unsigned waves_per_threadgroup = + DIV_ROUND_UP(info->block[0] * info->block[1] * info->block[2], 64); radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, - compute_resource_limits); + si_get_compute_resource_limits(sscreen, waves_per_threadgroup, + sctx->cs_max_waves_per_sh)); unsigned dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) | diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index bd39e13b381..81faf4c66e8 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1291,6 +1291,9 @@ unsigned si_end_counter(struct si_screen *sscreen, unsigned type, uint64_t begin); /* si_compute.c */ +unsigned si_get_compute_resource_limits(struct si_screen *sscreen, + unsigned waves_per_threadgroup, + unsigned max_waves_per_sh); void si_init_compute_functions(struct si_context *sctx); /* si_perfcounters.c */ -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev