From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_pipe.c | 38 ++++++++++++++++- src/gallium/drivers/radeonsi/si_pipe.h | 3 ++ src/gallium/drivers/radeonsi/si_state_shaders.c | 56 ++++++------------------- 3 files changed, 52 insertions(+), 45 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index f07ec50..83133cb 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -742,25 +742,59 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, si_destroy_shader_cache(sscreen); FREE(sscreen); return NULL; } si_handle_env_var_force_family(sscreen); if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false)) si_init_perfcounters(sscreen); + /* Determine tessellation ring info. */ + bool double_offchip_buffers = sscreen->info.chip_class >= CIK && + sscreen->info.family != CHIP_CARRIZO && + sscreen->info.family != CHIP_STONEY; + /* This must be one less than the maximum number due to a hw limitation. + * Various hardware bugs in SI, CIK, and GFX9 need this. + */ + unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63; + unsigned max_offchip_buffers = max_offchip_buffers_per_se * + sscreen->info.max_se; + unsigned offchip_granularity; + /* Hawaii has a bug with offchip buffers > 256 that can be worked * around by setting 4K granularity. */ - sscreen->tess_offchip_block_dw_size = - sscreen->info.family == CHIP_HAWAII ? 4096 : 8192; + if (sscreen->info.family == CHIP_HAWAII) { + sscreen->tess_offchip_block_dw_size = 4096; + offchip_granularity = V_03093C_X_4K_DWORDS; + } else { + sscreen->tess_offchip_block_dw_size = 8192; + offchip_granularity = V_03093C_X_8K_DWORDS; + } + + sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se; + assert(((sscreen->tess_factor_ring_size / 4) & C_030938_SIZE) == 0); + sscreen->tess_offchip_ring_size = max_offchip_buffers * + sscreen->tess_offchip_block_dw_size * 4; + + if (sscreen->info.chip_class >= CIK) { + if (sscreen->info.chip_class >= VI) + --max_offchip_buffers; + sscreen->vgt_hs_offchip_param = + S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | + S_03093C_OFFCHIP_GRANULARITY(offchip_granularity); + } else { + assert(offchip_granularity == V_03093C_X_8K_DWORDS); + sscreen->vgt_hs_offchip_param = + S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers); + } /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs * on SI. */ sscreen->has_clear_state = sscreen->info.chip_class >= CIK; sscreen->has_distributed_tess = sscreen->info.chip_class >= VI && sscreen->info.max_se >= 2; sscreen->has_draw_indirect_multi = diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 3a959f9..7b23e8c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -97,20 +97,23 @@ struct si_screen { struct pipe_screen b; struct radeon_winsys *ws; struct disk_cache *disk_shader_cache; struct radeon_info info; uint64_t debug_flags; char renderer_string[100]; unsigned gs_table_depth; unsigned tess_offchip_block_dw_size; + unsigned tess_offchip_ring_size; + unsigned tess_factor_ring_size; + unsigned vgt_hs_offchip_param; bool has_clear_state; bool has_distributed_tess; bool has_draw_indirect_multi; bool has_out_of_order_rast; bool assume_no_z_fights; bool commutative_blend_add; bool clear_db_cache_before_clear; bool has_msaa_sample_loc_bug; bool has_ls_vgpr_init_bug; bool dpbb_allowed; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 2cd48f5..9c505ff 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2945,102 +2945,72 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx) S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10); if (spi_tmpring_size != sctx->spi_tmpring_size) { sctx->spi_tmpring_size = spi_tmpring_size; si_mark_atom_dirty(sctx, &sctx->scratch_state); } return true; } static void si_init_tess_factor_ring(struct si_context *sctx) { - bool double_offchip_buffers = sctx->b.chip_class >= CIK && - sctx->b.family != CHIP_CARRIZO && - sctx->b.family != CHIP_STONEY; - /* This must be one less than the maximum number due to a hw limitation. - * Various hardware bugs in SI, CIK, and GFX9 need this. - */ - unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63; - unsigned max_offchip_buffers = max_offchip_buffers_per_se * - sctx->screen->info.max_se; - unsigned offchip_granularity; - - switch (sctx->screen->tess_offchip_block_dw_size) { - default: - assert(0); - /* fall through */ - case 8192: - offchip_granularity = V_03093C_X_8K_DWORDS; - break; - case 4096: - offchip_granularity = V_03093C_X_4K_DWORDS; - break; - } - assert(!sctx->tf_ring); + /* Use 64K alignment for both rings, so that we can pass the address * to shaders as one SGPR containing bits [16:47]. */ sctx->tf_ring = si_aligned_buffer_create(sctx->b.b.screen, - R600_RESOURCE_FLAG_UNMAPPABLE, - PIPE_USAGE_DEFAULT, - 32768 * sctx->screen->info.max_se, - 64 * 1024); + R600_RESOURCE_FLAG_UNMAPPABLE, + PIPE_USAGE_DEFAULT, + sctx->screen->tess_factor_ring_size, + 64 * 1024); if (!sctx->tf_ring) return; - assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0); - sctx->tess_offchip_ring = si_aligned_buffer_create(sctx->b.b.screen, - R600_RESOURCE_FLAG_UNMAPPABLE, - PIPE_USAGE_DEFAULT, - max_offchip_buffers * - sctx->screen->tess_offchip_block_dw_size * 4, - 64 * 1024); + R600_RESOURCE_FLAG_UNMAPPABLE, + PIPE_USAGE_DEFAULT, + sctx->screen->tess_offchip_ring_size, + 64 * 1024); if (!sctx->tess_offchip_ring) return; si_init_config_add_vgt_flush(sctx); uint64_t offchip_va = r600_resource(sctx->tess_offchip_ring)->gpu_address; uint64_t factor_va = r600_resource(sctx->tf_ring)->gpu_address; assert((offchip_va & 0xffff) == 0); assert((factor_va & 0xffff) == 0); si_pm4_add_bo(sctx->init_config, r600_resource(sctx->tess_offchip_ring), RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS); si_pm4_add_bo(sctx->init_config, r600_resource(sctx->tf_ring), RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS); /* Append these registers to the init config state. */ if (sctx->b.chip_class >= CIK) { - if (sctx->b.chip_class >= VI) - --max_offchip_buffers; - si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE, - S_030938_SIZE(sctx->tf_ring->width0 / 4)); + S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4)); si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE, factor_va >> 8); if (sctx->b.chip_class >= GFX9) si_pm4_set_reg(sctx->init_config, R_030944_VGT_TF_MEMORY_BASE_HI, factor_va >> 40); si_pm4_set_reg(sctx->init_config, R_03093C_VGT_HS_OFFCHIP_PARAM, - S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | - S_03093C_OFFCHIP_GRANULARITY(offchip_granularity)); + sctx->screen->vgt_hs_offchip_param); } else { - assert(offchip_granularity == V_03093C_X_8K_DWORDS); si_pm4_set_reg(sctx->init_config, R_008988_VGT_TF_RING_SIZE, - S_008988_SIZE(sctx->tf_ring->width0 / 4)); + S_008988_SIZE(sctx->screen->tess_factor_ring_size / 4)); si_pm4_set_reg(sctx->init_config, R_0089B8_VGT_TF_MEMORY_BASE, factor_va >> 8); si_pm4_set_reg(sctx->init_config, R_0089B0_VGT_HS_OFFCHIP_PARAM, - S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers)); + sctx->screen->vgt_hs_offchip_param); } if (sctx->b.chip_class >= GFX9) { si_pm4_set_reg(sctx->init_config, R_00B430_SPI_SHADER_USER_DATA_LS_0 + GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K * 4, offchip_va >> 16); si_pm4_set_reg(sctx->init_config, R_00B430_SPI_SHADER_USER_DATA_LS_0 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K * 4, -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev