Module: Mesa
Branch: main
Commit: 71edf4de5e81c56816129aacca5e2441a96589fd
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=71edf4de5e81c56816129aacca5e2441a96589fd

Author: Georg Lehmann <dadschoo...@gmail.com>
Date:   Mon Dec 25 15:42:16 2023 +0100

aco/gfx12: implement broadcast dmask shrink behavior

Reviewed-by: Daniel Schürmann <dan...@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26897>

---

 src/amd/compiler/aco_instruction_selection.cpp | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index a7059f38ed4..4bcf85f8ed6 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -6411,15 +6411,23 @@ visit_image_store(isel_context* ctx, 
nir_intrinsic_instr* instr)
               ((access & (ACCESS_VOLATILE | ACCESS_COHERENT)) && 
ctx->program->gfx_level < GFX11);
 
    uint32_t dmask = BITFIELD_MASK(num_components);
-   /* remove zero/undef elements from data, components which aren't in dmask
-    * are zeroed anyway
-    */
    if (instr->src[3].ssa->bit_size == 32 || instr->src[3].ssa->bit_size == 16) 
{
       for (uint32_t i = 0; i < instr->num_components; i++) {
+         /* components not in dmask receive:
+          * GFX6-11.5:  zero
+          * GFX12+: first component in dmask
+          */
          nir_scalar comp = nir_scalar_resolved(instr->src[3].ssa, i);
-         if ((nir_scalar_is_const(comp) && nir_scalar_as_uint(comp) == 0) ||
-             nir_scalar_is_undef(comp))
+         if (nir_scalar_is_undef(comp)) {
             dmask &= ~BITFIELD_BIT(i);
+         } else if (ctx->options->gfx_level <= GFX11_5) {
+            if (nir_scalar_is_const(comp) && nir_scalar_as_uint(comp) == 0)
+               dmask &= ~BITFIELD_BIT(i);
+         } else {
+            unsigned first = dim == GLSL_SAMPLER_DIM_BUF ? 0 : ffs(dmask) - 1;
+            if (i != first && 
nir_scalar_equal(nir_scalar_resolved(instr->src[3].ssa, first), comp))
+               dmask &= ~BITFIELD_BIT(i);
+         }
       }
 
       /* dmask cannot be 0, at least one vgpr is always read */

Reply via email to