On Wed, Aug 6, 2014 at 9:32 AM, Marek Olšák <mar...@gmail.com> wrote: > From: Marek Olšák <marek.ol...@amd.com> > > The code is rewritten to take known constraints into account, while always > using 0 by default. > > This should improve performance for multi-SE parts in theory. > > A debug option is also added for easier debugging. (If there are hangs, > use the option. If the hangs go away, you have found the problem.)
Just one comment below. With that addressed: Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> > --- > src/gallium/drivers/radeon/r600_pipe_common.c | 2 +- > src/gallium/drivers/radeon/r600_pipe_common.h | 1 + > src/gallium/drivers/radeonsi/si_state_draw.c | 33 > ++++++++++++++++------- > src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 17 ++++++++++++ > 4 files changed, 43 insertions(+), 10 deletions(-) > > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c > b/src/gallium/drivers/radeon/r600_pipe_common.c > index 3476021..eb44d72 100644 > --- a/src/gallium/drivers/radeon/r600_pipe_common.c > +++ b/src/gallium/drivers/radeon/r600_pipe_common.c > @@ -239,7 +239,6 @@ static const struct debug_named_value > common_debug_options[] = { > { "vm", DBG_VM, "Print virtual addresses when creating resources" }, > { "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_<csid>.c file > with faulty cs" }, > > - > /* shaders */ > { "fs", DBG_FS, "Print fetch shaders" }, > { "vs", DBG_VS, "Print vertex shaders" }, > @@ -254,6 +253,7 @@ static const struct debug_named_value > common_debug_options[] = { > { "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of > INVALIDATE_RANGE map flags" }, > { "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" }, > { "notiling", DBG_NO_TILING, "Disable tiling" }, > + { "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on > end-of-packet." }, > > DEBUG_NAMED_VALUE_END /* must be last */ > }; > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h > b/src/gallium/drivers/radeon/r600_pipe_common.h > index dcec2bb..ac69d5b 100644 > --- a/src/gallium/drivers/radeon/r600_pipe_common.h > +++ b/src/gallium/drivers/radeon/r600_pipe_common.h > @@ -93,6 +93,7 @@ > #define DBG_NO_DISCARD_RANGE (1 << 12) > #define DBG_NO_2D_TILING (1 << 13) > #define DBG_NO_TILING (1 << 14) > +#define DBG_SWITCH_ON_EOP (1 << 15) > /* The maximum allowed bit is 15. */ > > #define R600_MAP_BUFFER_ALIGNMENT 64 > diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c > b/src/gallium/drivers/radeonsi/si_state_draw.c > index 4e808a3..ae839ba 100644 > --- a/src/gallium/drivers/radeonsi/si_state_draw.c > +++ b/src/gallium/drivers/radeonsi/si_state_draw.c > @@ -401,25 +401,40 @@ static bool si_update_draw_info_state(struct si_context > *sctx, > > if (sctx->b.chip_class >= CIK) { > struct si_state_rasterizer *rs = > sctx->queued.named.rasterizer; > - bool wd_switch_on_eop = prim == V_008958_DI_PT_POLYGON || > - prim == V_008958_DI_PT_LINELOOP || > - prim == V_008958_DI_PT_TRIFAN || > - prim == V_008958_DI_PT_TRISTRIP_ADJ || > - info->primitive_restart || > - (rs ? rs->line_stipple_enable : > false); > - /* If the WD switch is false, the IA switch must be false > too. */ > - bool ia_switch_on_eop = wd_switch_on_eop; > unsigned primgroup_size = 64; > > + /* SWITCH_ON_EOP(0) is always preferable. */ > + bool wd_switch_on_eop = false; > + bool ia_switch_on_eop = false; > + > + /* WD_SWITCH_ON_EOP has no effect on GPUs with less than > + * 4 shader engines. Set 1 to pass the assertion below. > + * The other cases are hardware requirements. */ > + if (sctx->b.screen->info.max_se < 4 || > + prim == V_008958_DI_PT_POLYGON || > + prim == V_008958_DI_PT_LINELOOP || > + prim == V_008958_DI_PT_TRIFAN || > + prim == V_008958_DI_PT_TRISTRIP_ADJ || > + info->primitive_restart) > + wd_switch_on_eop = true; > + > /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP > is 0. > * We don't know that for indirect drawing, so treat it as > * always problematic. */ > if (sctx->b.family == CHIP_HAWAII && > - (info->indirect || info->instance_count > 1)) { > + (info->indirect || info->instance_count > 1)) > wd_switch_on_eop = true; > + > + /* This is a hardware requirement. */ > + if ((rs && rs->line_stipple_enable) || > + (sctx->b.screen->debug_flags & DBG_SWITCH_ON_EOP)) { > ia_switch_on_eop = true; > + wd_switch_on_eop = true; > } > > + /* If the WD switch is false, the IA switch must be false > too. */ > + assert(wd_switch_on_eop || !ia_switch_on_eop); > + > si_pm4_set_reg(pm4, R_028B74_VGT_DISPATCH_DRAW_INDEX, > ib->index_size == 4 ? 0xFC000000 : 0xFC00); > > diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c > b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c > index 910d06b..aecd0fd 100644 > --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c > +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c > @@ -392,6 +392,23 @@ static boolean do_winsys_init(struct radeon_drm_winsys > *ws) > radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL, > &ws->info.max_se); > > + if (!ws->info.max_se) { > + switch (ws->info.family) { > + default: > + ws->info.max_se = 1; > + break; > + case CAYMAN: Shouldn't this be CHIP_CAYMAN? Also, FWIW, Cypress and Hemlock have 2 SEs as well. > + case CHIP_TAHITI: > + case CHIP_PITCAIRN: > + case CHIP_BONAIRE: > + ws->info.max_se = 2; > + break; > + case CHIP_HAWAII: > + ws->info.max_se = 4; > + break; > + } > + } > + > radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL, > &ws->info.max_sh_per_se); > > -- > 1.9.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev