Module: Mesa
Branch: main
Commit: afbaeee3581ad72ba97538008a401f01c2572a47
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=afbaeee3581ad72ba97538008a401f01c2572a47

Author: Thomas H.P. Andersen <pho...@gmail.com>
Date:   Sun Nov 19 20:07:00 2023 +0100

nvk: VK_EXT_color_write_enable

A write mask based on the pipeline creation input is stored in scratch. Another
similar mask is also stored for the dynamic color_write_enable. These can then
be updated individually, and will be combined in MME macro before use.

Each attachment has a mask for rgba. The max number of attachments in 8 so
we can fit the write mask in a single 32bit scratch.

color_write_enable is a single bit per attachment. To make it easier to combine
in with the write mask it is stored in scratch with a separate rgba bits.

The layout of the both scratch values are:
Attachment index 88887777666655554444333322221111
Component        abgrabgrabgrabgrabgrabgrabgrabgr

dEQP-VK.pipeline.monolithic.color_write_enable.*

Test run totals:
  Passed:        576/576 (100.0%)
  Failed:        0/576 (0.0%)
  Not supported: 0/576 (0.0%)
  Warnings:      0/576 (0.0%)
  Waived:        0/576 (0.0%)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26211>

---

 docs/features.txt                          |  2 +-
 src/nouveau/vulkan/nvk_cmd_draw.c          | 82 ++++++++++++++++++++++++++++--
 src/nouveau/vulkan/nvk_graphics_pipeline.c | 27 ++++------
 src/nouveau/vulkan/nvk_mme.c               |  1 +
 src/nouveau/vulkan/nvk_mme.h               |  4 ++
 src/nouveau/vulkan/nvk_physical_device.c   |  4 ++
 6 files changed, 100 insertions(+), 20 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index cb79b043bbd..002fd534898 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -548,7 +548,7 @@ Khronos extensions that are not part of any Vulkan version:
   VK_EXT_border_color_swizzle                           DONE (anv, hasvk, lvp, 
nvk, radv/gfx10+, tu, v3dv, vn)
   VK_EXT_buffer_device_address                          DONE (anv/gen8+, 
hasvk, nvk, radv)
   VK_EXT_calibrated_timestamps                          DONE (anv, hasvk, lvp, 
radv, vn)
-  VK_EXT_color_write_enable                             DONE (anv, hasvk, lvp, 
radv, tu, v3dv, vn)
+  VK_EXT_color_write_enable                             DONE (anv, hasvk, lvp, 
nvk, radv, tu, v3dv, vn)
   VK_EXT_conditional_rendering                          DONE (anv, hasvk, lvp, 
nvk, radv, tu, vn)
   VK_EXT_conservative_rasterization                     DONE (anv/gen9+, radv, 
vn)
   VK_EXT_custom_border_color                            DONE (anv, hasvk, lvp, 
nvk, panvk, radv, tu, v3dv, vn)
diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c 
b/src/nouveau/vulkan/nvk_cmd_draw.c
index 0769bda0d82..e2d48af682f 100644
--- a/src/nouveau/vulkan/nvk_cmd_draw.c
+++ b/src/nouveau/vulkan/nvk_cmd_draw.c
@@ -1519,14 +1519,72 @@ vk_to_nv9097_logic_op(VkLogicOp vk_op)
    return nv9097_op;
 }
 
+void
+nvk_mme_set_write_mask(struct mme_builder *b)
+{
+   struct mme_value count = mme_load(b);
+   struct mme_value pipeline = nvk_mme_load_scratch(b, WRITE_MASK_PIPELINE);
+   struct mme_value dynamic = nvk_mme_load_scratch(b, WRITE_MASK_DYN);
+
+   /*
+      dynamic and pipeline are both bit fields
+
+      attachment index 88887777666655554444333322221111
+      component        abgrabgrabgrabgrabgrabgrabgrabgr
+   */
+
+   struct mme_value mask = mme_and(b, pipeline, dynamic);
+   mme_free_reg(b, pipeline);
+   mme_free_reg(b, dynamic);
+
+   struct mme_value common_mask = mme_mov(b, mme_imm(1));
+   struct mme_value first = mme_and(b, mask, mme_imm(BITFIELD_RANGE(0, 4)));
+   struct mme_value i = mme_mov(b, mme_zero());
+
+   mme_while(b, ine, i, count) {
+      /*
+         We call NV9097_SET_CT_WRITE per attachment. It needs a value as:
+         0x0000 0000 0000 0000 000a 000b 000g 000r
+
+         So for i=0 a mask of
+         0x0000 0000 0000 0000 0000 0000 0000 1111
+         becomes
+         0x0000 0000 0000 0000 0001 0001 0001 0001
+      */
+
+      struct mme_value val = mme_merge(b, mme_zero(), mask, 0, 1, 0);
+      mme_merge_to(b, val, val, mask, 4, 1, 1);
+      mme_merge_to(b, val, val, mask, 8, 1, 2);
+      mme_merge_to(b, val, val, mask, 12, 1, 3);
+
+      mme_mthd_arr(b, NV9097_SET_CT_WRITE(0), i);
+      mme_emit(b, val);
+      mme_free_reg(b, val);
+
+      /* Check if all masks are common */
+      struct mme_value temp = mme_add(b, mask, mme_imm(BITFIELD_RANGE(0, 4)));
+      mme_if(b, ine, first, temp) {
+         mme_mov_to(b, common_mask, mme_zero());
+      }
+      mme_free_reg(b, temp);
+
+      mme_srl_to(b, mask, mask, mme_imm(4));
+
+      mme_add_to(b, i, i, mme_imm(1));
+   }
+
+   mme_mthd(b, NV9097_SET_SINGLE_CT_WRITE_CONTROL);
+   mme_emit(b, common_mask);
+}
+
 static void
 nvk_flush_cb_state(struct nvk_cmd_buffer *cmd)
 {
-   struct nv_push *p = nvk_cmd_buffer_push(cmd, 9);
-
    const struct vk_dynamic_graphics_state *dyn =
       &cmd->vk.dynamic_graphics_state;
 
+   struct nv_push *p = nvk_cmd_buffer_push(cmd, 9 + 4 * NVK_MAX_RTS);
+
    if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE))
       P_IMMD(p, NV9097, SET_LOGIC_OP, dyn->cb.logic_op_enable);
 
@@ -1535,7 +1593,25 @@ nvk_flush_cb_state(struct nvk_cmd_buffer *cmd)
       P_IMMD(p, NV9097, SET_LOGIC_OP_FUNC, func);
    }
 
-   /* MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES */
+   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
+      /* We intentionally ignore cb.attachment_count here and just fill out
+       * whatever is in the mask.  This ensures that what we set to the MME
+       * scratch reg exactly matches the CPU side state.
+       *
+       * If attachment count is wrong (or changes), that will show up in the
+       * pipeline and the MME_SET_WRITE_MASK will be invoked again with the
+       * correct write mask.
+       */
+      uint32_t color_write_enables = 0x0;
+      u_foreach_bit(a, dyn->cb.color_write_enables)
+         color_write_enables |= 0xf << (4 * a);
+
+      P_IMMD(p, NV9097, SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_WRITE_MASK_DYN),
+             color_write_enables);
+
+      P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_SET_WRITE_MASK));
+      P_INLINE_DATA(p, dyn->cb.attachment_count);
+   }
 
    if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
       P_MTHD(p, NV9097, SET_BLEND_CONST_RED);
diff --git a/src/nouveau/vulkan/nvk_graphics_pipeline.c 
b/src/nouveau/vulkan/nvk_graphics_pipeline.c
index 809fa8444df..f8ca5f41ec3 100644
--- a/src/nouveau/vulkan/nvk_graphics_pipeline.c
+++ b/src/nouveau/vulkan/nvk_graphics_pipeline.c
@@ -6,6 +6,7 @@
 
 #include "nvk_cmd_buffer.h"
 #include "nvk_device.h"
+#include "nvk_mme.h"
 #include "nvk_physical_device.h"
 #include "nvk_shader.h"
 
@@ -172,37 +173,31 @@ emit_pipeline_ct_write_state(struct nv_push *p,
                              const struct vk_color_blend_state *cb,
                              const struct vk_render_pass_state *rp)
 {
-   uint32_t att_write_masks[8] = {};
+   uint32_t write_mask = 0;
    uint32_t att_count = 0;
 
    if (rp != NULL) {
       att_count = rp->color_attachment_count;
       for (uint32_t a = 0; a < rp->color_attachment_count; a++) {
          VkFormat att_format = rp->color_attachment_formats[a];
-         att_write_masks[a] = att_format == VK_FORMAT_UNDEFINED ? 0 : 0xf;
+         if (att_format != VK_FORMAT_UNDEFINED)
+            write_mask |= 0xf << (4 * a);
       }
    }
 
    if (cb != NULL) {
       assert(cb->attachment_count == att_count);
+      uint32_t wm = 0;
       for (uint32_t a = 0; a < cb->attachment_count; a++)
-         att_write_masks[a] &= cb->attachments[a].write_mask;
+         wm |= cb->attachments[a].write_mask << (a * 4);
+      write_mask &= wm;
    }
 
-   bool indep_color_masks = true;
-   for (uint32_t a = 0; a < att_count; a++) {
-      P_IMMD(p, NV9097, SET_CT_WRITE(a), {
-         .r_enable = (att_write_masks[a] & BITFIELD_BIT(0)) != 0,
-         .g_enable = (att_write_masks[a] & BITFIELD_BIT(1)) != 0,
-         .b_enable = (att_write_masks[a] & BITFIELD_BIT(2)) != 0,
-         .a_enable = (att_write_masks[a] & BITFIELD_BIT(3)) != 0,
-      });
-
-      if (att_write_masks[a] != att_write_masks[0])
-         indep_color_masks = false;
-   }
+   P_IMMD(p, NV9097, 
SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_WRITE_MASK_PIPELINE),
+          write_mask);
 
-   P_IMMD(p, NV9097, SET_SINGLE_CT_WRITE_CONTROL, indep_color_masks);
+   P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_SET_WRITE_MASK));
+   P_INLINE_DATA(p, att_count);
 }
 
 static void
diff --git a/src/nouveau/vulkan/nvk_mme.c b/src/nouveau/vulkan/nvk_mme.c
index 8e733dafcee..ee3e9ccf908 100644
--- a/src/nouveau/vulkan/nvk_mme.c
+++ b/src/nouveau/vulkan/nvk_mme.c
@@ -21,6 +21,7 @@ static const nvk_mme_builder_func mme_builders[NVK_MME_COUNT] 
= {
    [NVK_MME_XFB_COUNTER_LOAD]            = nvk_mme_xfb_counter_load,
    [NVK_MME_XFB_DRAW_INDIRECT]           = nvk_mme_xfb_draw_indirect,
    [NVK_MME_SET_PRIV_REG]                = nvk_mme_set_priv_reg,
+   [NVK_MME_SET_WRITE_MASK]              = nvk_mme_set_write_mask,
 };
 
 uint32_t *
diff --git a/src/nouveau/vulkan/nvk_mme.h b/src/nouveau/vulkan/nvk_mme.h
index 9fcfc5b06ae..d73e6925e29 100644
--- a/src/nouveau/vulkan/nvk_mme.h
+++ b/src/nouveau/vulkan/nvk_mme.h
@@ -24,6 +24,7 @@ enum nvk_mme {
    NVK_MME_XFB_COUNTER_LOAD,
    NVK_MME_XFB_DRAW_INDIRECT,
    NVK_MME_SET_PRIV_REG,
+   NVK_MME_SET_WRITE_MASK,
    NVK_MME_COUNT,
 };
 
@@ -35,6 +36,8 @@ enum nvk_mme_scratch {
    NVK_MME_SCRATCH_DRAW_PAD_DW,
    NVK_MME_SCRATCH_DRAW_IDX,
    NVK_MME_SCRATCH_VIEW_MASK,
+   NVK_MME_SCRATCH_WRITE_MASK_DYN,
+   NVK_MME_SCRATCH_WRITE_MASK_PIPELINE,
 
    /* Must be at the end */
    NVK_MME_NUM_SCRATCH,
@@ -122,5 +125,6 @@ void nvk_mme_copy_queries(struct mme_builder *b);
 void nvk_mme_xfb_counter_load(struct mme_builder *b);
 void nvk_mme_xfb_draw_indirect(struct mme_builder *b);
 void nvk_mme_set_priv_reg(struct mme_builder *b);
+void nvk_mme_set_write_mask(struct mme_builder *b);
 
 #endif /* NVK_MME_H */
diff --git a/src/nouveau/vulkan/nvk_physical_device.c 
b/src/nouveau/vulkan/nvk_physical_device.c
index c3501f6484b..739e8137237 100644
--- a/src/nouveau/vulkan/nvk_physical_device.c
+++ b/src/nouveau/vulkan/nvk_physical_device.c
@@ -139,6 +139,7 @@ nvk_get_device_extensions(const struct nv_device_info *info,
       .EXT_border_color_swizzle = true,
       .EXT_buffer_device_address = true,
       .EXT_conditional_rendering = true,
+      .EXT_color_write_enable = true,
       .EXT_custom_border_color = true,
       .EXT_depth_bias_control = true,
       .EXT_depth_clip_control = true,
@@ -348,6 +349,9 @@ nvk_get_device_features(const struct nv_device_info *info,
       /* VK_EXT_buffer_device_address */
       .bufferDeviceAddressCaptureReplayEXT = true,
 
+      /* VK_EXT_color_write_enable */
+      .colorWriteEnable = true,
+
       /* VK_EXT_conditional_rendering */
       .conditionalRendering = true,
       .inheritedConditionalRendering = true,

Reply via email to