From: Marek Olšák <marek.ol...@amd.com> "64 / max * 4" is less than "64 * 4 / max". --- src/gallium/drivers/radeonsi/si_state_draw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 942cb3c7994..e7f8389caf3 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -139,21 +139,22 @@ static bool si_emit_derived_tess_state(struct si_context *sctx, input_patch_size = num_tcs_input_cp * input_vertex_size; pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size; output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; /* Ensure that we only need one wave per SIMD so we don't need to check * resource usage. Also ensures that the number of tcs in and out * vertices per threadgroup are at most 256. */ - *num_patches = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp) * 4; + unsigned max_verts_per_patch = MAX2(num_tcs_input_cp, num_tcs_output_cp); + *num_patches = 256 / max_verts_per_patch; /* Make sure that the data fits in LDS. This assumes the shaders only * use LDS for the inputs and outputs. * * While CIK can use 64K per threadgroup, there is a hang on Stoney * with 2 CUs if we use more than 32K. The closed Vulkan driver also * uses 32K at most on all GCN chips. */ hardware_lds_size = 32768; *num_patches = MIN2(*num_patches, hardware_lds_size / (input_patch_size + @@ -166,21 +167,21 @@ static bool si_emit_derived_tess_state(struct si_context *sctx, /* Not necessary for correctness, but improves performance. The * specific value is taken from the proprietary driver. */ *num_patches = MIN2(*num_patches, 40); if (sctx->chip_class == SI) { /* SI bug workaround, related to power management. Limit LS-HS * threadgroups to only one wave. */ - unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp); + unsigned one_wave = 64 / max_verts_per_patch; *num_patches = MIN2(*num_patches, one_wave); } /* The VGT HS block increments the patch ID unconditionally * within a single threadgroup. This results in incorrect * patch IDs when instanced draws are used. * * The intended solution is to restrict threadgroups to * a single instance by setting SWITCH_ON_EOI, which * should cause IA to split instances up. However, this -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev