Module: Mesa
Branch: main
Commit: 98ea540158f819b14e69ebde812f012c6fe9da59
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=98ea540158f819b14e69ebde812f012c6fe9da59

Author: Samuel Pitoiset <samuel.pitoi...@gmail.com>
Date:   Wed Dec 13 15:49:04 2023 +0100

radv: add support for MRT compaction with PS epilogs

Now that PS epilogs are always compiled during cmdbuf recording, we
have all information to enable MRT compaction, for optimal performance.

Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26667>

---

 src/amd/vulkan/radv_cmd_buffer.c        |  9 +++++++--
 src/amd/vulkan/radv_pipeline_graphics.c | 25 +++++--------------------
 src/amd/vulkan/radv_private.h           |  5 +++--
 src/amd/vulkan/radv_shader_args.c       |  4 +++-
 4 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 6be6d1ca3c5..b6c4bd16f4e 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1849,7 +1849,8 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer 
*cmd_buffer, struct radv_shader
    if (cmd_buffer->state.emitted_ps_epilog == ps_epilog)
       return;
 
-   uint32_t col_format = ps_epilog->spi_shader_col_format;
+   uint32_t col_format = radv_compact_spi_shader_col_format(ps_shader, 
ps_epilog->spi_shader_col_format);
+
    bool need_null_export_workaround =
       radv_needs_null_export_workaround(device, ps_shader, 
cmd_buffer->state.custom_blend_mode);
    if (need_null_export_workaround && !col_format)
@@ -4226,7 +4227,11 @@ lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer)
       state.alpha_to_coverage_via_mrtz = d->vk.ms.alpha_to_coverage_enable;
    }
 
-   struct radv_ps_epilog_key key = radv_generate_ps_epilog_key(device, &state, 
true);
+   struct radv_ps_epilog_key key = radv_generate_ps_epilog_key(device, &state);
+
+   /* Clear color attachments that aren't exported by the FS to match IO 
shader arguments. */
+   key.spi_shader_col_format &= ps->info.ps.colors_written;
+
    return radv_shader_part_cache_get(device, &device->ps_epilogs, 
&cmd_buffer->ps_epilogs, &key);
 }
 
diff --git a/src/amd/vulkan/radv_pipeline_graphics.c 
b/src/amd/vulkan/radv_pipeline_graphics.c
index 5dbc75fad9c..c52a42b2196 100644
--- a/src/amd/vulkan/radv_pipeline_graphics.c
+++ b/src/amd/vulkan/radv_pipeline_graphics.c
@@ -195,10 +195,9 @@ format_is_float32(VkFormat format)
    return channel >= 0 && desc->channel[channel].type == 
UTIL_FORMAT_TYPE_FLOAT && desc->channel[channel].size == 32;
 }
 
-static unsigned
-radv_compact_spi_shader_col_format(const struct radv_shader *ps, const struct 
radv_blend_state *blend)
+unsigned
+radv_compact_spi_shader_col_format(const struct radv_shader *ps, uint32_t 
spi_shader_col_format)
 {
-   unsigned spi_shader_col_format = blend->spi_shader_col_format;
    unsigned value = 0, num_mrts = 0;
    unsigned i, num_targets;
 
@@ -1677,8 +1676,7 @@ radv_graphics_shaders_link(const struct radv_device 
*device, const struct radv_p
 }
 
 struct radv_ps_epilog_key
-radv_generate_ps_epilog_key(const struct radv_device *device, const struct 
radv_ps_epilog_state *state,
-                            bool disable_mrt_compaction)
+radv_generate_ps_epilog_key(const struct radv_device *device, const struct 
radv_ps_epilog_state *state)
 {
    unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, 
z_format = 0;
    struct radv_ps_epilog_key key;
@@ -1714,19 +1712,6 @@ radv_generate_ps_epilog_key(const struct radv_device 
*device, const struct radv_
       col_format |= V_028714_SPI_SHADER_32_AR;
    }
 
-   if (disable_mrt_compaction) {
-      /* Do not compact MRTs when the pipeline uses a PS epilog because we 
can't detect color
-       * attachments without exports. Without compaction and if the i-th 
target format is set, all
-       * previous target formats must be non-zero to avoid hangs.
-       */
-      unsigned num_targets = (util_last_bit(col_format) + 3) / 4;
-      for (unsigned i = 0; i < num_targets; i++) {
-         if (!(col_format & (0xfu << (i * 4)))) {
-            col_format |= V_028714_SPI_SHADER_32_R << (i * 4);
-         }
-      }
-   }
-
    /* The output for dual source blending should have the same format as the 
first output. */
    if (state->mrt0_is_dual_src) {
       assert(!(col_format >> 4));
@@ -1805,7 +1790,7 @@ radv_pipeline_generate_ps_epilog_key(const struct 
radv_device *device, const str
       }
    }
 
-   return radv_generate_ps_epilog_key(device, &ps_epilog, false);
+   return radv_generate_ps_epilog_key(device, &ps_epilog);
 }
 
 static struct radv_pipeline_key
@@ -3962,7 +3947,7 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline 
*pipeline, struct radv
    struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
    bool enable_mrt_compaction = ps && !ps->info.has_epilog && 
!ps->info.ps.mrt0_is_dual_src;
    if (enable_mrt_compaction) {
-      blend.spi_shader_col_format = radv_compact_spi_shader_col_format(ps, 
&blend);
+      blend.spi_shader_col_format = radv_compact_spi_shader_col_format(ps, 
blend.spi_shader_col_format);
 
       /* In presence of MRT holes (ie. the FS exports MRT1 but not MRT0), the 
compiler will remap
        * them, so that only MRT0 is exported and the driver will compact 
SPI_SHADER_COL_FORMAT to
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index e58cf56e78b..021d337b533 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -2029,8 +2029,7 @@ struct radv_ps_epilog_state {
 };
 
 struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_device 
*device,
-                                                      const struct 
radv_ps_epilog_state *state,
-                                                      bool 
disable_mrt_compaction);
+                                                      const struct 
radv_ps_epilog_state *state);
 
 bool radv_needs_null_export_workaround(const struct radv_device *device, const 
struct radv_shader *ps,
                                        unsigned custom_blend_mode);
@@ -3709,6 +3708,8 @@ radv_has_pops(const struct radv_physical_device *pdevice)
    return pdevice->rad_info.gfx_level >= GFX9 && !pdevice->use_llvm;
 }
 
+unsigned radv_compact_spi_shader_col_format(const struct radv_shader *ps, 
uint32_t spi_shader_col_format);
+
 /* radv_perfcounter.c */
 void radv_perfcounter_emit_shaders(struct radv_device *device, struct 
radeon_cmdbuf *cs, unsigned shaders);
 void radv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs);
diff --git a/src/amd/vulkan/radv_shader_args.c 
b/src/amd/vulkan/radv_shader_args.c
index fbc8d076706..3518ecf31a5 100644
--- a/src/amd/vulkan/radv_shader_args.c
+++ b/src/amd/vulkan/radv_shader_args.c
@@ -837,8 +837,10 @@ radv_declare_ps_epilog_args(const struct radv_device 
*device, const struct radv_
    for (unsigned i = 0; i < MAX_RTS; i++) {
       unsigned col_format = (key->spi_shader_col_format >> (i * 4)) & 0xf;
 
-      if (col_format == V_028714_SPI_SHADER_ZERO)
+      if (col_format == V_028714_SPI_SHADER_ZERO) {
+         ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, NULL);
          continue;
+      }
 
       ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, &args->colors[i]);
    }

Reply via email to