xe2+: Update for new layout of vertex setup data in PS payload.

GitLab Mirror Thu, 28 Dec 2023 14:36:34 -0800

Module: Mesa
Branch: main
Commit: 702eabaaae3ae559ba495488148139d506c1edcb
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=702eabaaae3ae559ba495488148139d506c1edcb


Author: Francisco Jerez <[email protected]>
Date:   Fri Dec  1 16:23:11 2023 -0800

intel/fs/xe2+: Update for new layout of vertex setup data in PS payload.

The interpolation deltas of PS inputs now show up as a 12B vec3 (A0,
A1-A0, A2-A0) in the ATTR file, instead of the previously used 16B
format with an unused component.

Reviewed-by: Jordan Justen <[email protected]>
Reviewed-by: Caio Oliveira <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26606>

---

 src/intel/compiler/brw_fs.cpp     | 52 ++++++++++++++++++++++++++++-----------
 src/intel/compiler/brw_fs_nir.cpp | 25 ++++++++++++++++---
 2 files changed, 58 insertions(+), 19 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index dedcfd56952..24e1e5662f0 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1742,10 +1742,10 @@ fs_visitor::assign_urb_setup()
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == ATTR) {
             /* ATTR fs_reg::nr in the FS is in units of logical scalar
-             * inputs each of which consumes half of a GRF register on
-             * current platforms.  In single polygon mode this leads
-             * to the following layout of the vertex setup plane
-             * parameters in the ATTR register file:
+             * inputs each of which consumes 16B on Gfx4-Gfx12.  In
+             * single polygon mode this leads to the following layout
+             * of the vertex setup plane parameters in the ATTR
+             * register file:
              *
              *  fs_reg::nr   Input   Comp0  Comp1  Comp2  Comp3
              *      0       Attr0.x  a1-a0  a2-a0   N/A    a0
@@ -1782,27 +1782,49 @@ fs_visitor::assign_urb_setup()
              * The latter layout corresponds to a param_width equal to
              * dispatch_width, while the former (scalar parameter)
              * layout has a param_width of 1.
+             *
+             * Gfx20+ represent plane parameters in a format similar
+             * to the above, except the parameters are packed in 12B
+             * and ordered like "a0, a1-a0, a2-a0" instead of the
+             * above vec4 representation with a missing component.
              */
             const unsigned param_width = (max_polygons > 1 ? dispatch_width : 
1);
-            assert(inst->src[i].offset / param_width < REG_SIZE / 2);
-            assert(max_polygons > 0);
 
             /* Size of a single scalar component of a plane parameter
              * in bytes.
              */
             const unsigned chan_sz = 4;
+            struct brw_reg reg;
+            assert(max_polygons > 0);
 
             /* Translate the offset within the param_width-wide
-             * representation described above into an offset into grf,
-             * which contains plane parameters for the first polygon
-             * handled by the thread.
+             * representation described above into an offset and a
+             * grf, which contains the plane parameters for the first
+             * polygon processed by the thread.
              */
-            const unsigned grf = urb_start + inst->src[i].nr / 2 * 
max_polygons;
-            const unsigned delta = (inst->src[i].nr % 2) * (REG_SIZE / 2) +
-               inst->src[i].offset / (param_width * chan_sz) * chan_sz +
-               inst->src[i].offset % chan_sz;
-            struct brw_reg reg =
-               byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type), 
delta);
+            if (devinfo->ver >= 20) {
+               /* Gfx20+ is able to pack 5 logical input components
+                * per 64B register.
+                */
+               const unsigned grf = urb_start + inst->src[i].nr / 5 * 2;
+               assert(inst->src[i].offset / param_width < 12);
+               const unsigned delta = inst->src[i].nr % 5 * 12 +
+                  inst->src[i].offset / (param_width * chan_sz) * chan_sz +
+                  inst->src[i].offset % chan_sz;
+               reg = byte_offset(retype(brw_vec8_grf(grf, 0), 
inst->src[i].type),
+                                 delta);
+            } else {
+               /* Earlier platforms pack 2 logical input components
+                * per 32B register.
+                */
+               const unsigned grf = urb_start + inst->src[i].nr / 2 * 
max_polygons;
+               assert(inst->src[i].offset / param_width < REG_SIZE / 2);
+               const unsigned delta = (inst->src[i].nr % 2) * (REG_SIZE / 2) +
+                  inst->src[i].offset / (param_width * chan_sz) * chan_sz +
+                  inst->src[i].offset % chan_sz;
+               reg = byte_offset(retype(brw_vec8_grf(grf, 0), 
inst->src[i].type),
+                                 delta);
+            }
 
             if (max_polygons > 1) {
                assert(devinfo->ver == 12);
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 70ee4ecae82..088542dc717 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4237,9 +4237,14 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
                     retype(s.per_primitive_reg(bld, base, comp + i), 
dest.type));
          }
       } else {
+         /* Gfx20+ packs the plane parameters of a single logical
+          * input in a vec3 format instead of the previously used vec4
+          * format.
+          */
+         const unsigned k = devinfo->ver >= 20 ? 0 : 3;
          for (unsigned int i = 0; i < num_components; i++) {
             bld.MOV(offset(dest, bld, i),
-                    retype(s.interp_reg(bld, base, comp + i, 3), dest.type));
+                    retype(s.interp_reg(bld, base, comp + i, k), dest.type));
          }
       }
       break;
@@ -4251,9 +4256,21 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
       const unsigned base = nir_intrinsic_base(instr);
       const unsigned comp = nir_intrinsic_component(instr);
       dest.type = BRW_REGISTER_TYPE_F;
-      bld.MOV(offset(dest, bld, 0), s.interp_reg(bld, base, comp, 3));
-      bld.MOV(offset(dest, bld, 1), s.interp_reg(bld, base, comp, 1));
-      bld.MOV(offset(dest, bld, 2), s.interp_reg(bld, base, comp, 0));
+
+      /* Gfx20+ packs the plane parameters of a single logical
+       * input in a vec3 format instead of the previously used vec4
+       * format.
+       */
+      if (devinfo->ver >= 20) {
+         bld.MOV(offset(dest, bld, 0), s.interp_reg(bld, base, comp, 0));
+         bld.MOV(offset(dest, bld, 1), s.interp_reg(bld, base, comp, 2));
+         bld.MOV(offset(dest, bld, 2), s.interp_reg(bld, base, comp, 1));
+      } else {
+         bld.MOV(offset(dest, bld, 0), s.interp_reg(bld, base, comp, 3));
+         bld.MOV(offset(dest, bld, 1), s.interp_reg(bld, base, comp, 1));
+         bld.MOV(offset(dest, bld, 2), s.interp_reg(bld, base, comp, 0));
+      }
+
       break;
    }

Mesa (main): intel/fs/xe2+: Update for new layout of vertex setup data in PS payload.

Reply via email to