Module: Mesa
Branch: staging/23.2
Commit: edf482aca271130e5a83b61b135443bb03393f85
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=edf482aca271130e5a83b61b135443bb03393f85

Author: Mike Blumenkrantz <[email protected]>
Date:   Wed Aug  2 13:34:36 2023 -0400

nir/lower_io_to_scalar: fix 64bit io splitting

this was creating broken 64bit loads/stores using 32bit component
size

cc: mesa-stable

Reviewed-by: Rhys Perry <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24458>
(cherry picked from commit 58ba2bcc8c185f8ebb74b863b76c1b65eb78206f)

---

 .pick_status.json                         |  2 +-
 src/compiler/nir/nir_lower_io_to_scalar.c | 26 +++++++++++++++++++++++---
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 78bef1745a2..b310c740062 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -10084,7 +10084,7 @@
         "description": "nir/lower_io_to_scalar: fix 64bit io splitting",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": null,
         "notes": null
diff --git a/src/compiler/nir/nir_lower_io_to_scalar.c 
b/src/compiler/nir/nir_lower_io_to_scalar.c
index 083f8a140a0..d7fd2e29711 100644
--- a/src/compiler/nir/nir_lower_io_to_scalar.c
+++ b/src/compiler/nir/nir_lower_io_to_scalar.c
@@ -50,6 +50,9 @@ lower_load_input_to_scalar(nir_builder *b, 
nir_intrinsic_instr *intr)
    nir_ssa_def *loads[NIR_MAX_VEC_COMPONENTS];
 
    for (unsigned i = 0; i < intr->num_components; i++) {
+      bool is_64bit = (nir_intrinsic_instr_dest_type(intr) & 
NIR_ALU_TYPE_SIZE_MASK) == 64;
+      unsigned newi = is_64bit ? i * 2 : i;
+      unsigned newc = nir_intrinsic_component(intr);
       nir_intrinsic_instr *chan_intr =
          nir_intrinsic_instr_create(b->shader, intr->intrinsic);
       nir_ssa_dest_init(&chan_intr->instr, &chan_intr->dest, 1,
@@ -57,12 +60,19 @@ lower_load_input_to_scalar(nir_builder *b, 
nir_intrinsic_instr *intr)
       chan_intr->num_components = 1;
 
       nir_intrinsic_set_base(chan_intr, nir_intrinsic_base(intr));
-      nir_intrinsic_set_component(chan_intr, nir_intrinsic_component(intr) + 
i);
+      nir_intrinsic_set_component(chan_intr, (newc + newi) % 4);
       nir_intrinsic_set_dest_type(chan_intr, nir_intrinsic_dest_type(intr));
       set_io_semantics(chan_intr, intr, i);
       /* offset and vertex (if needed) */
       for (unsigned j = 0; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; 
++j)
          nir_src_copy(&chan_intr->src[j], &intr->src[j], &chan_intr->instr);
+      if (newc + newi > 3) {
+         nir_ssa_def *offset = nir_imm_int(b, (newc + newi) / 4);
+         nir_src *src = nir_get_io_offset_src(chan_intr);
+         nir_src new_src = nir_src_for_ssa(offset);
+         offset = nir_iadd_imm(b, src->ssa, (newc + newi) / 4);
+         nir_src_copy(src, &new_src, &chan_intr->instr);
+      }
 
       nir_builder_instr_insert(b, &chan_intr->instr);
 
@@ -131,13 +141,16 @@ lower_store_output_to_scalar(nir_builder *b, 
nir_intrinsic_instr *intr)
       if (!(nir_intrinsic_write_mask(intr) & (1 << i)))
          continue;
 
+      bool is_64bit = (nir_intrinsic_instr_src_type(intr, 0) & 
NIR_ALU_TYPE_SIZE_MASK) == 64;
+      unsigned newi = is_64bit ? i * 2 : i;
+      unsigned newc = nir_intrinsic_component(intr);
       nir_intrinsic_instr *chan_intr =
          nir_intrinsic_instr_create(b->shader, intr->intrinsic);
       chan_intr->num_components = 1;
 
       nir_intrinsic_set_base(chan_intr, nir_intrinsic_base(intr));
       nir_intrinsic_set_write_mask(chan_intr, 0x1);
-      nir_intrinsic_set_component(chan_intr, nir_intrinsic_component(intr) + 
i);
+      nir_intrinsic_set_component(chan_intr, (newc + newi) % 4);
       nir_intrinsic_set_src_type(chan_intr, nir_intrinsic_src_type(intr));
       set_io_semantics(chan_intr, intr, i);
 
@@ -153,7 +166,7 @@ lower_store_output_to_scalar(nir_builder *b, 
nir_intrinsic_instr *intr)
                nir_io_xfb scalar_xfb;
 
                memset(&scalar_xfb, 0, sizeof(scalar_xfb));
-               scalar_xfb.out[component % 2].num_components = 1;
+               scalar_xfb.out[component % 2].num_components = is_64bit ? 2 :  
1;
                scalar_xfb.out[component % 2].buffer = xfb.out[c % 2].buffer;
                scalar_xfb.out[component % 2].offset = xfb.out[c % 2].offset +
                                                       component - c;
@@ -171,6 +184,13 @@ lower_store_output_to_scalar(nir_builder *b, 
nir_intrinsic_instr *intr)
       /* offset and vertex (if needed) */
       for (unsigned j = 1; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; 
++j)
          nir_src_copy(&chan_intr->src[j], &intr->src[j], &chan_intr->instr);
+      if (newc + newi > 3) {
+         nir_ssa_def *offset = nir_imm_int(b, (newc + newi) / 4);
+         nir_src *src = nir_get_io_offset_src(chan_intr);
+         offset = nir_iadd_imm(b, src->ssa, (newc + newi) / 4);
+         nir_src new_src = nir_src_for_ssa(offset);
+         nir_src_copy(src, &new_src, &chan_intr->instr);
+      }
 
       nir_builder_instr_insert(b, &chan_intr->instr);
    }

Reply via email to