Module: Mesa
Branch: main
Commit: bd634bef1237b3429731d70f508680ebd26e56f1
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=bd634bef1237b3429731d70f508680ebd26e56f1

Author: Francisco Jerez <[email protected]>
Date:   Fri Dec  1 17:01:36 2023 -0800

intel/fs/xe2+: Implement layout of mesh shading per-primitive inputs in PS 
thread payloads.

This is based on a previous patch by Marcin Ĺšlusarz addressing the
same issue, though it's largely rewritten, simplified and includes
additional fixes.

Reviewed-by: Caio Oliveira <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26606>

---

 src/intel/compiler/brw_fs.cpp | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 992abc5dffd..bdccc215be8 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1797,29 +1797,43 @@ fs_visitor::assign_urb_setup()
             struct brw_reg reg;
             assert(max_polygons > 0);
 
+            /* Calculate the base register on the thread payload of
+             * either the block of vertex setup data or the block of
+             * per-primitive constant data depending on whether we're
+             * accessing a primitive or vertex input.  Also calculate
+             * the index of the input within that block.
+             */
+            const bool per_prim = inst->src[i].nr < 
prog_data->num_per_primitive_inputs;
+            const unsigned base = urb_start +
+               (per_prim ? 0 :
+                ALIGN(prog_data->num_per_primitive_inputs / 2,
+                      reg_unit(devinfo)) * max_polygons);
+            const unsigned idx = per_prim ? inst->src[i].nr :
+               inst->src[i].nr - prog_data->num_per_primitive_inputs;
+
             /* Translate the offset within the param_width-wide
              * representation described above into an offset and a
              * grf, which contains the plane parameters for the first
              * polygon processed by the thread.
              */
-            if (devinfo->ver >= 20) {
+            if (devinfo->ver >= 20 && !per_prim) {
                /* Gfx20+ is able to pack 5 logical input components
-                * per 64B register.
+                * per 64B register for vertex setup data.
                 */
-               const unsigned grf = urb_start + inst->src[i].nr / 5 * 2 * 
max_polygons;
+               const unsigned grf = base + idx / 5 * 2 * max_polygons;
                assert(inst->src[i].offset / param_width < 12);
-               const unsigned delta = inst->src[i].nr % 5 * 12 +
+               const unsigned delta = idx % 5 * 12 +
                   inst->src[i].offset / (param_width * chan_sz) * chan_sz +
                   inst->src[i].offset % chan_sz;
                reg = byte_offset(retype(brw_vec8_grf(grf, 0), 
inst->src[i].type),
                                  delta);
             } else {
-               /* Earlier platforms pack 2 logical input components
-                * per 32B register.
+               /* Earlier platforms and per-primitive block pack 2 logical
+                * input components per 32B register.
                 */
-               const unsigned grf = urb_start + inst->src[i].nr / 2 * 
max_polygons;
+               const unsigned grf = base + idx / 2 * max_polygons;
                assert(inst->src[i].offset / param_width < REG_SIZE / 2);
-               const unsigned delta = (inst->src[i].nr % 2) * (REG_SIZE / 2) +
+               const unsigned delta = (idx % 2) * (REG_SIZE / 2) +
                   inst->src[i].offset / (param_width * chan_sz) * chan_sz +
                   inst->src[i].offset % chan_sz;
                reg = byte_offset(retype(brw_vec8_grf(grf, 0), 
inst->src[i].type),

Reply via email to