On Wed, Aug 6, 2014 at 9:32 AM, Marek Olšák <mar...@gmail.com> wrote:
> From: Marek Olšák <marek.ol...@amd.com>
>
> The code is rewritten to take known constraints into account, while always
> using 0 by default.
>
> This should improve performance for multi-SE parts in theory.
>
> A debug option is also added for easier debugging. (If there are hangs,
> use the option. If the hangs go away, you have found the problem.)

Just one comment below.  With that addressed:

Reviewed-by: Alex Deucher <alexander.deuc...@amd.com>

> ---
>  src/gallium/drivers/radeon/r600_pipe_common.c     |  2 +-
>  src/gallium/drivers/radeon/r600_pipe_common.h     |  1 +
>  src/gallium/drivers/radeonsi/si_state_draw.c      | 33 
> ++++++++++++++++-------
>  src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 17 ++++++++++++
>  4 files changed, 43 insertions(+), 10 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
> b/src/gallium/drivers/radeon/r600_pipe_common.c
> index 3476021..eb44d72 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> @@ -239,7 +239,6 @@ static const struct debug_named_value 
> common_debug_options[] = {
>         { "vm", DBG_VM, "Print virtual addresses when creating resources" },
>         { "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_<csid>.c file 
> with faulty cs" },
>
> -
>         /* shaders */
>         { "fs", DBG_FS, "Print fetch shaders" },
>         { "vs", DBG_VS, "Print vertex shaders" },
> @@ -254,6 +253,7 @@ static const struct debug_named_value 
> common_debug_options[] = {
>         { "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of 
> INVALIDATE_RANGE map flags" },
>         { "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" },
>         { "notiling", DBG_NO_TILING, "Disable tiling" },
> +       { "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on 
> end-of-packet." },
>
>         DEBUG_NAMED_VALUE_END /* must be last */
>  };
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
> b/src/gallium/drivers/radeon/r600_pipe_common.h
> index dcec2bb..ac69d5b 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -93,6 +93,7 @@
>  #define DBG_NO_DISCARD_RANGE   (1 << 12)
>  #define DBG_NO_2D_TILING       (1 << 13)
>  #define DBG_NO_TILING          (1 << 14)
> +#define DBG_SWITCH_ON_EOP      (1 << 15)
>  /* The maximum allowed bit is 15. */
>
>  #define R600_MAP_BUFFER_ALIGNMENT 64
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
> b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 4e808a3..ae839ba 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -401,25 +401,40 @@ static bool si_update_draw_info_state(struct si_context 
> *sctx,
>
>         if (sctx->b.chip_class >= CIK) {
>                 struct si_state_rasterizer *rs = 
> sctx->queued.named.rasterizer;
> -               bool wd_switch_on_eop = prim == V_008958_DI_PT_POLYGON ||
> -                                       prim == V_008958_DI_PT_LINELOOP ||
> -                                       prim == V_008958_DI_PT_TRIFAN ||
> -                                       prim == V_008958_DI_PT_TRISTRIP_ADJ ||
> -                                       info->primitive_restart ||
> -                                       (rs ? rs->line_stipple_enable : 
> false);
> -               /* If the WD switch is false, the IA switch must be false 
> too. */
> -               bool ia_switch_on_eop = wd_switch_on_eop;
>                 unsigned primgroup_size = 64;
>
> +               /* SWITCH_ON_EOP(0) is always preferable. */
> +               bool wd_switch_on_eop = false;
> +               bool ia_switch_on_eop = false;
> +
> +               /* WD_SWITCH_ON_EOP has no effect on GPUs with less than
> +                * 4 shader engines. Set 1 to pass the assertion below.
> +                * The other cases are hardware requirements. */
> +               if (sctx->b.screen->info.max_se < 4 ||
> +                   prim == V_008958_DI_PT_POLYGON ||
> +                   prim == V_008958_DI_PT_LINELOOP ||
> +                   prim == V_008958_DI_PT_TRIFAN ||
> +                   prim == V_008958_DI_PT_TRISTRIP_ADJ ||
> +                   info->primitive_restart)
> +                       wd_switch_on_eop = true;
> +
>                 /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP 
> is 0.
>                  * We don't know that for indirect drawing, so treat it as
>                  * always problematic. */
>                 if (sctx->b.family == CHIP_HAWAII &&
> -                   (info->indirect || info->instance_count > 1)) {
> +                   (info->indirect || info->instance_count > 1))
>                         wd_switch_on_eop = true;
> +
> +               /* This is a hardware requirement. */
> +               if ((rs && rs->line_stipple_enable) ||
> +                   (sctx->b.screen->debug_flags & DBG_SWITCH_ON_EOP)) {
>                         ia_switch_on_eop = true;
> +                       wd_switch_on_eop = true;
>                 }
>
> +               /* If the WD switch is false, the IA switch must be false 
> too. */
> +               assert(wd_switch_on_eop || !ia_switch_on_eop);
> +
>                 si_pm4_set_reg(pm4, R_028B74_VGT_DISPATCH_DRAW_INDEX,
>                                ib->index_size == 4 ? 0xFC000000 : 0xFC00);
>
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
> b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> index 910d06b..aecd0fd 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> @@ -392,6 +392,23 @@ static boolean do_winsys_init(struct radeon_drm_winsys 
> *ws)
>      radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL,
>                           &ws->info.max_se);
>
> +    if (!ws->info.max_se) {
> +        switch (ws->info.family) {
> +        default:
> +            ws->info.max_se = 1;
> +            break;
> +        case CAYMAN:

Shouldn't this be CHIP_CAYMAN?

Also, FWIW, Cypress and Hemlock have 2 SEs as well.

> +        case CHIP_TAHITI:
> +        case CHIP_PITCAIRN:
> +        case CHIP_BONAIRE:
> +            ws->info.max_se = 2;
> +            break;
> +        case CHIP_HAWAII:
> +            ws->info.max_se = 4;
> +            break;
> +        }
> +    }
> +
>      radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL,
>                           &ws->info.max_sh_per_se);
>
> --
> 1.9.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to