Module: Mesa
Branch: master
Commit: 61a1a385d3034be659b2a1ba5956f69b7300c60e
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=61a1a385d3034be659b2a1ba5956f69b7300c60e

Author: Bas Nieuwenhuizen <[email protected]>
Date:   Tue Apr 13 11:21:36 2021 +0200

radv: Re-enable retiling.

Reviewed-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10037>

---

 src/amd/vulkan/radv_cmd_buffer.c      |  14 +++-
 src/amd/vulkan/radv_meta.c            |   8 --
 src/amd/vulkan/radv_meta.h            |   1 -
 src/amd/vulkan/radv_meta_dcc_retile.c | 148 +++++++++++++++++++++-------------
 4 files changed, 101 insertions(+), 70 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index f7933c85ef6..3ec09e33533 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -5983,6 +5983,14 @@ radv_retile_transition(struct radv_cmd_buffer 
*cmd_buffer, struct radv_image *im
         (dst_queue_mask & (1u << RADV_QUEUE_FOREIGN))))
       radv_retile_dcc(cmd_buffer, image);
 }
+
+static bool
+radv_image_need_retile(const struct radv_image *image)
+{
+   return image->planes[0].surface.display_dcc_offset &&
+          image->planes[0].surface.display_dcc_offset != 
image->planes[0].surface.meta_offset;
+}
+
 /**
  * Handle color image transitions for DCC/FMASK/CMASK.
  */
@@ -6003,7 +6011,7 @@ radv_handle_color_image_transition(struct radv_cmd_buffer 
*cmd_buffer, struct ra
       radv_init_color_image_metadata(cmd_buffer, image, src_layout, 
src_render_loop, dst_layout,
                                      dst_render_loop, src_queue_mask, 
dst_queue_mask, range);
 
-      if (0)
+      if (radv_image_need_retile(image))
          radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, 
dst_queue_mask);
       return;
    }
@@ -6025,8 +6033,8 @@ radv_handle_color_image_transition(struct radv_cmd_buffer 
*cmd_buffer, struct ra
          fast_clear_flushed = true;
       }
 
-      /*if (image->retile_map)
-         radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, 
dst_queue_mask);*/
+      if (radv_image_need_retile(image))
+         radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, 
dst_queue_mask);
    } else if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) {
       if (radv_layout_can_fast_clear(cmd_buffer->device, image, src_layout, 
src_render_loop,
                                      src_queue_mask) &&
diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c
index a38c7911601..28a266f0824 100644
--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -460,16 +460,8 @@ radv_device_init_meta(struct radv_device *device)
    if (result != VK_SUCCESS)
       goto fail_fmask_expand;
 
-   if (!on_demand) {
-      result = radv_device_init_meta_dcc_retile_state(device);
-      if (result != VK_SUCCESS)
-         goto fail_dcc_retile;
-   }
-
    return VK_SUCCESS;
 
-fail_dcc_retile:
-   radv_device_finish_meta_fmask_expand_state(device);
 fail_fmask_expand:
    radv_device_finish_meta_resolve_fragment_state(device);
 fail_resolve_fragment:
diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h
index f20e0b07a02..0e351702470 100644
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -125,7 +125,6 @@ void radv_device_finish_meta_resolve_fragment_state(struct 
radv_device *device);
 VkResult radv_device_init_meta_fmask_expand_state(struct radv_device *device);
 void radv_device_finish_meta_fmask_expand_state(struct radv_device *device);
 
-VkResult radv_device_init_meta_dcc_retile_state(struct radv_device *device);
 void radv_device_finish_meta_dcc_retile_state(struct radv_device *device);
 
 void radv_meta_save(struct radv_meta_saved_state *saved_state, struct 
radv_cmd_buffer *cmd_buffer,
diff --git a/src/amd/vulkan/radv_meta_dcc_retile.c 
b/src/amd/vulkan/radv_meta_dcc_retile.c
index 92d76fce4bf..816fc7f36ef 100644
--- a/src/amd/vulkan/radv_meta_dcc_retile.c
+++ b/src/amd/vulkan/radv_meta_dcc_retile.c
@@ -21,51 +21,69 @@
  * IN THE SOFTWARE.
  */
 
+#define AC_SURFACE_INCLUDE_NIR
+#include "ac_surface.h"
+
 #include "radv_meta.h"
 #include "radv_private.h"
 
+static nir_ssa_def *
+get_global_ids(nir_builder *b, unsigned num_components)
+{
+   unsigned mask = BITFIELD_MASK(num_components);
+
+   nir_ssa_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), 
mask);
+   nir_ssa_def *block_ids = nir_channels(b, nir_load_work_group_id(b, 32), 
mask);
+   nir_ssa_def *block_size = nir_channels(
+      b,
+      nir_imm_ivec4(b, b->shader->info.cs.local_size[0], 
b->shader->info.cs.local_size[1],
+                    b->shader->info.cs.local_size[2], 0),
+      mask);
+
+   return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids);
+}
+
 static nir_shader *
-build_dcc_retile_compute_shader(struct radv_device *dev)
+build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf 
*surf)
 {
    const struct glsl_type *buf_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, 
false, GLSL_TYPE_UINT);
    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, 
"dcc_retile_compute");
 
-   b.shader->info.cs.local_size[0] = 256;
-   b.shader->info.cs.local_size[1] = 1;
+   b.shader->info.cs.local_size[0] = 8;
+   b.shader->info.cs.local_size[1] = 8;
    b.shader->info.cs.local_size[2] = 1;
 
-   nir_variable *indices = nir_variable_create(b.shader, nir_var_uniform, 
buf_type, "indices_in");
-   indices->data.descriptor_set = 0;
-   indices->data.binding = 0;
+   nir_ssa_def *src_dcc_size = nir_load_push_constant(&b, 2, 32, 
nir_imm_int(&b, 0), .range = 8);
+   nir_ssa_def *src_dcc_pitch = nir_channels(&b, src_dcc_size, 1);
+   nir_ssa_def *src_dcc_height = nir_channels(&b, src_dcc_size, 2);
+
+   nir_ssa_def *dst_dcc_size = nir_load_push_constant(&b, 2, 32, 
nir_imm_int(&b, 8), .range = 8);
+   nir_ssa_def *dst_dcc_pitch = nir_channels(&b, dst_dcc_size, 1);
+   nir_ssa_def *dst_dcc_height = nir_channels(&b, dst_dcc_size, 2);
    nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform, 
buf_type, "dcc_in");
    input_dcc->data.descriptor_set = 0;
-   input_dcc->data.binding = 1;
+   input_dcc->data.binding = 0;
    nir_variable *output_dcc = nir_variable_create(b.shader, nir_var_uniform, 
buf_type, "dcc_out");
    output_dcc->data.descriptor_set = 0;
-   output_dcc->data.binding = 2;
+   output_dcc->data.binding = 1;
 
-   nir_ssa_def *indices_ref = &nir_build_deref_var(&b, indices)->dest.ssa;
    nir_ssa_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa;
    nir_ssa_def *output_dcc_ref = &nir_build_deref_var(&b, 
output_dcc)->dest.ssa;
 
-   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-   nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
-   nir_ssa_def *block_size = nir_imm_ivec4(&b, 
b.shader->info.cs.local_size[0], 0, 0, 0);
-
-   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), 
invoc_id);
-
-   nir_intrinsic_instr *index_vals =
-      nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
-   index_vals->num_components = 2;
-   index_vals->src[0] = nir_src_for_ssa(indices_ref);
-   index_vals->src[1] = nir_src_for_ssa(global_id);
-   index_vals->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
-   index_vals->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0));
-   nir_ssa_dest_init(&index_vals->instr, &index_vals->dest, 2, 32, "indices");
-   nir_builder_instr_insert(&b, &index_vals->instr);
-
-   nir_ssa_def *src = nir_channels(&b, &index_vals->dest.ssa, 1);
-   nir_ssa_def *dst = nir_channels(&b, &index_vals->dest.ssa, 2);
+   nir_ssa_def *coord = get_global_ids(&b, 2);
+   nir_ssa_def *zero = nir_imm_int(&b, 0);
+   coord = nir_imul(
+      &b, coord,
+      nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, 
surf->u.gfx9.color.dcc_block_height));
+
+   nir_ssa_def *src = ac_nir_dcc_addr_from_coord(&b, 
&dev->physical_device->rad_info, surf->bpe,
+                                                 
&surf->u.gfx9.color.dcc_equation, src_dcc_pitch,
+                                                 src_dcc_height, zero, 
nir_channel(&b, coord, 0),
+                                                 nir_channel(&b, coord, 1), 
zero, zero, zero);
+   nir_ssa_def *dst = ac_nir_dcc_addr_from_coord(
+      &b, &dev->physical_device->rad_info, surf->bpe, 
&surf->u.gfx9.color.display_dcc_equation,
+      dst_dcc_pitch, dst_dcc_height, zero, nir_channel(&b, coord, 0), 
nir_channel(&b, coord, 1),
+      zero, zero, zero);
 
    nir_intrinsic_instr *dcc_val =
       nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
@@ -105,16 +123,26 @@ radv_device_finish_meta_dcc_retile_state(struct 
radv_device *device)
    memset(&state->dcc_retile, 0, sizeof(state->dcc_retile));
 }
 
-VkResult
-radv_device_init_meta_dcc_retile_state(struct radv_device *device)
+/*
+ * This take a surface, but the only things used are:
+ * - BPE
+ * - DCC equations
+ * - DCC block size
+ *
+ * BPE is always 4 at the moment and the rest is derived from the tilemode,
+ * and ac_surface limits displayable DCC to at most 1 tiling mode. So in effect
+ * this shader is indepedent of the surface.
+ */
+static VkResult
+radv_device_init_meta_dcc_retile_state(struct radv_device *device, struct 
radeon_surf *surf)
 {
    VkResult result = VK_SUCCESS;
-   nir_shader *cs = build_dcc_retile_compute_shader(device);
+   nir_shader *cs = build_dcc_retile_compute_shader(device, surf);
 
    VkDescriptorSetLayoutCreateInfo ds_create_info = {
       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
-      .bindingCount = 3,
+      .bindingCount = 2,
       .pBindings = (VkDescriptorSetLayoutBinding[]){
          {.binding = 0,
           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
@@ -126,11 +154,6 @@ radv_device_init_meta_dcc_retile_state(struct radv_device 
*device)
           .descriptorCount = 1,
           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
           .pImmutableSamplers = NULL},
-         {.binding = 2,
-          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-          .descriptorCount = 1,
-          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-          .pImmutableSamplers = NULL},
       }};
 
    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), 
&ds_create_info,
@@ -143,7 +166,8 @@ radv_device_init_meta_dcc_retile_state(struct radv_device 
*device)
       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
       .setLayoutCount = 1,
       .pSetLayouts = &device->meta_state.dcc_retile.ds_layout,
-      .pushConstantRangeCount = 0,
+      .pushConstantRangeCount = 1,
+      .pPushConstantRanges = 
&(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
    };
 
    result =
@@ -198,24 +222,26 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, 
struct radv_image *image)
 
    /* Compile pipelines if not already done so. */
    if (!cmd_buffer->device->meta_state.dcc_retile.pipeline) {
-      VkResult ret = 
radv_device_init_meta_dcc_retile_state(cmd_buffer->device);
+      VkResult ret =
+         radv_device_init_meta_dcc_retile_state(cmd_buffer->device, 
&image->planes[0].surface);
       if (ret != VK_SUCCESS) {
          cmd_buffer->record_result = ret;
          return;
       }
    }
 
-   radv_meta_save(&saved_state, cmd_buffer,
-                  RADV_META_SAVE_DESCRIPTORS | 
RADV_META_SAVE_COMPUTE_PIPELINE);
+   radv_meta_save(
+      &saved_state, cmd_buffer,
+      RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | 
RADV_META_SAVE_CONSTANTS);
 
    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 
VK_PIPELINE_BIND_POINT_COMPUTE,
                         device->meta_state.dcc_retile.pipeline);
 
    struct radv_buffer buffer = {.size = image->size, .bo = image->bo, .offset 
= image->offset};
 
-   struct radv_buffer_view views[3];
-   VkBufferView view_handles[3];
-   radv_buffer_view_init(views + 1, cmd_buffer->device,
+   struct radv_buffer_view views[2];
+   VkBufferView view_handles[2];
+   radv_buffer_view_init(views, cmd_buffer->device,
                          &(VkBufferViewCreateInfo){
                             .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
                             .buffer = radv_buffer_to_handle(&buffer),
@@ -223,7 +249,7 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct 
radv_image *image)
                             .range = image->planes[0].surface.meta_size,
                             .format = VK_FORMAT_R8_UINT,
                          });
-   radv_buffer_view_init(views + 2, cmd_buffer->device,
+   radv_buffer_view_init(views + 1, cmd_buffer->device,
                          &(VkBufferViewCreateInfo){
                             .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
                             .buffer = radv_buffer_to_handle(&buffer),
@@ -231,12 +257,12 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, 
struct radv_image *image)
                             .range = 
image->planes[0].surface.u.gfx9.color.display_dcc_size,
                             .format = VK_FORMAT_R8_UINT,
                          });
-   for (unsigned i = 0; i < 3; ++i)
+   for (unsigned i = 0; i < 2; ++i)
       view_handles[i] = radv_buffer_view_to_handle(&views[i]);
 
    radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
                                  device->meta_state.dcc_retile.p_layout, 0, /* 
set */
-                                 3, /* descriptorWriteCount */
+                                 2, /* descriptorWriteCount */
                                  (VkWriteDescriptorSet[]){
                                     {
                                        .sType = 
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
@@ -254,20 +280,26 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, 
struct radv_image *image)
                                        .descriptorType = 
VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
                                        .pTexelBufferView = &view_handles[1],
                                     },
-                                    {
-                                       .sType = 
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-                                       .dstBinding = 2,
-                                       .dstArrayElement = 0,
-                                       .descriptorCount = 1,
-                                       .descriptorType = 
VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-                                       .pTexelBufferView = &view_handles[2],
-                                    },
                                  });
 
-   /* src+dst pairs count double, so the number of DCC bytes we move is
-    * actually half of dcc_retile_num_elements. */
-   /*radv_unaligned_dispatch(cmd_buffer, 
image->planes[0].surface.u.gfx9.color.dcc_retile_num_elements / 2,
-                           1, 1);*/
+   unsigned width = DIV_ROUND_UP(image->info.width, 
vk_format_get_blockwidth(image->vk_format));
+   unsigned height = DIV_ROUND_UP(image->info.height, 
vk_format_get_blockheight(image->vk_format));
+
+   unsigned dcc_width = DIV_ROUND_UP(width, 
image->planes[0].surface.u.gfx9.color.dcc_block_width);
+   unsigned dcc_height =
+      DIV_ROUND_UP(height, 
image->planes[0].surface.u.gfx9.color.dcc_block_height);
+
+   uint32_t constants[] = {
+      image->planes[0].surface.u.gfx9.color.dcc_pitch_max + 1,
+      image->planes[0].surface.u.gfx9.color.dcc_height,
+      image->planes[0].surface.u.gfx9.color.display_dcc_pitch_max + 1,
+      image->planes[0].surface.u.gfx9.color.display_dcc_height,
+   };
+   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+                         device->meta_state.dcc_retile.p_layout, 
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
+                         constants);
+
+   radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, 1);
 
    radv_meta_restore(&saved_state, cmd_buffer);
 

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to