Module: Mesa
Branch: main
Commit: bd3f2567cc017f4bb87b418ec862f03d0862f95b
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=bd3f2567cc017f4bb87b418ec862f03d0862f95b

Author: Timur Kristóf <timur.kris...@gmail.com>
Date:   Sat Dec  9 22:42:46 2023 +0100

radv: Implement T2T scanline copy workaround.

The built-in tiled-to-tiled copy packet doesn't support copying
between images that don't meet certain criteria such as alignment,
micro tile format, compression state etc.

To work around this, we copy the image piece by piece to a
temporary buffer that we know is supported,
and then copy it to the intended destination.

The implementation assumes that at least one pixel row of the
image fits into the temporary buffer, and will try to copy as
many rows as fit.

Signed-off-by: Timur Kristóf <timur.kris...@gmail.com>
Reviewed-by: Tatsuyuki Ishi <ishitatsuy...@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26913>

---

 src/amd/vulkan/meta/radv_meta_copy.c |   9 ++-
 src/amd/vulkan/radv_sdma.c           | 132 +++++++++++++++++++++++++++++++++++
 src/amd/vulkan/radv_sdma.h           |  22 ++++--
 3 files changed, 155 insertions(+), 8 deletions(-)

diff --git a/src/amd/vulkan/meta/radv_meta_copy.c 
b/src/amd/vulkan/meta/radv_meta_copy.c
index 3fe1151a646..cc70b615b4b 100644
--- a/src/amd/vulkan/meta/radv_meta_copy.c
+++ b/src/amd/vulkan/meta/radv_meta_copy.c
@@ -406,7 +406,14 @@ transfer_copy_image(struct radv_cmd_buffer *cmd_buffer, 
struct radv_image *src_i
          radv_sdma_get_surf(device, dst_image, region->dstSubresource, 
region->dstOffset, dst_aspect_mask);
       const VkExtent3D extent = radv_sdma_get_copy_extent(src_image, 
region->srcSubresource, region->extent);
 
-      radv_sdma_copy_image(device, cs, &src, &dst, extent);
+      if (radv_sdma_use_t2t_scanline_copy(device, &src, &dst, extent)) {
+         if (!alloc_transfer_temp_bo(cmd_buffer))
+            return;
+
+         radv_sdma_copy_image_t2t_scanline(device, cs, &src, &dst, extent, 
cmd_buffer->transfer.copy_temp);
+      } else {
+         radv_sdma_copy_image(device, cs, &src, &dst, extent);
+      }
    }
 }
 
diff --git a/src/amd/vulkan/radv_sdma.c b/src/amd/vulkan/radv_sdma.c
index b0f86f618f0..c086104d673 100644
--- a/src/amd/vulkan/radv_sdma.c
+++ b/src/amd/vulkan/radv_sdma.c
@@ -37,6 +37,22 @@ struct radv_sdma_chunked_copy_info {
    unsigned num_rows_per_copy;
 };
 
+static const VkExtent3D radv_sdma_t2t_alignment_2d_and_planar[] = {
+   {16, 16, 1}, /* 1 bpp */
+   {16, 8, 1},  /* 2 bpp */
+   {8, 8, 1},   /* 4 bpp */
+   {8, 4, 1},   /* 8 bpp */
+   {4, 4, 1},   /* 16 bpp */
+};
+
+static const VkExtent3D radv_sdma_t2t_alignment_3d[] = {
+   {8, 4, 8}, /* 1 bpp */
+   {4, 4, 8}, /* 2 bpp */
+   {4, 4, 4}, /* 4 bpp */
+   {4, 2, 4}, /* 8 bpp */
+   {2, 2, 4}, /* 16 bpp */
+};
+
 ALWAYS_INLINE static unsigned
 radv_sdma_pitch_alignment(const struct radv_device *device, const unsigned bpp)
 {
@@ -265,7 +281,10 @@ radv_sdma_get_surf(const struct radv_device *const device, 
const struct radv_ima
       .bpp = surf->bpe,
       .blk_w = surf->blk_w,
       .blk_h = surf->blk_h,
+      .mip_levels = image->vk.mip_levels,
+      .micro_tile_mode = surf->micro_tile_mode,
       .is_linear = surf->is_linear,
+      .is_3d = surf->u.gfx9.resource_type == RADEON_RESOURCE_3D,
    };
 
    if (surf->is_linear) {
@@ -664,3 +683,116 @@ radv_sdma_copy_image(const struct radv_device *device, 
struct radeon_cmdbuf *cs,
       }
    }
 }
+
+bool
+radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const struct 
radv_sdma_surf *src,
+                                const struct radv_sdma_surf *dst, const 
VkExtent3D extent)
+{
+   /* These need a linear-to-linear / linear-to-tiled copy. */
+   if (src->is_linear || dst->is_linear)
+      return false;
+
+   /* SDMA can't do format conversion. */
+   assert(src->bpp == dst->bpp);
+
+   const enum sdma_version ver = 
device->physical_device->rad_info.sdma_ip_version;
+   if (ver < SDMA_5_0) {
+      /* SDMA v4.x and older doesn't support proper mip level selection. */
+      if (src->mip_levels > 1 || dst->mip_levels > 1)
+         return true;
+   }
+
+   /* The two images can have a different block size,
+    * but must have the same swizzle mode.
+    */
+   if (src->micro_tile_mode != dst->micro_tile_mode)
+      return true;
+
+   /* The T2T subwindow copy packet only has fields for one metadata 
configuration.
+    * It can either compress or decompress, or copy uncompressed images, but it
+    * can't copy from a compressed image to another.
+    */
+   if (src->meta_va && dst->meta_va)
+      return true;
+
+   const bool needs_3d_alignment = src->is_3d && (src->micro_tile_mode == 
RADEON_MICRO_MODE_DISPLAY ||
+                                                  src->micro_tile_mode == 
RADEON_MICRO_MODE_STANDARD);
+   const unsigned log2bpp = util_logbase2(src->bpp);
+   const VkExtent3D *const alignment =
+      needs_3d_alignment ? &radv_sdma_t2t_alignment_3d[log2bpp] : 
&radv_sdma_t2t_alignment_2d_and_planar[log2bpp];
+
+   const VkExtent3D copy_extent_blk = radv_sdma_pixel_extent_to_blocks(extent, 
src->blk_w, src->blk_h);
+   const VkOffset3D src_offset_blk = 
radv_sdma_pixel_offset_to_blocks(src->offset, src->blk_w, src->blk_h);
+   const VkOffset3D dst_offset_blk = 
radv_sdma_pixel_offset_to_blocks(dst->offset, dst->blk_w, dst->blk_h);
+
+   if (!radv_is_aligned(copy_extent_blk.width, alignment->width) ||
+       !radv_is_aligned(copy_extent_blk.height, alignment->height) ||
+       !radv_is_aligned(copy_extent_blk.depth, alignment->depth))
+      return true;
+
+   if (!radv_is_aligned(src_offset_blk.x, alignment->width) || 
!radv_is_aligned(src_offset_blk.y, alignment->height) ||
+       !radv_is_aligned(src_offset_blk.z, alignment->depth))
+      return true;
+
+   if (!radv_is_aligned(dst_offset_blk.x, alignment->width) || 
!radv_is_aligned(dst_offset_blk.y, alignment->height) ||
+       !radv_is_aligned(dst_offset_blk.z, alignment->depth))
+      return true;
+
+   return false;
+}
+
+void
+radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct 
radeon_cmdbuf *cs,
+                                  const struct radv_sdma_surf *src, const 
struct radv_sdma_surf *dst,
+                                  const VkExtent3D extent, struct 
radeon_winsys_bo *temp_bo)
+{
+   const struct radv_sdma_chunked_copy_info info = 
radv_sdma_get_chunked_copy_info(device, src, extent);
+   struct radv_sdma_surf t2l_src = *src;
+   struct radv_sdma_surf t2l_dst = {
+      .va = temp_bo->va,
+      .bpp = src->bpp,
+      .blk_w = src->blk_w,
+      .blk_h = src->blk_h,
+      .pitch = info.aligned_row_pitch * src->blk_w,
+   };
+   struct radv_sdma_surf l2t_dst = *dst;
+   struct radv_sdma_surf l2t_src = {
+      .va = temp_bo->va,
+      .bpp = dst->bpp,
+      .blk_w = dst->blk_w,
+      .blk_h = dst->blk_h,
+      .pitch = info.aligned_row_pitch * dst->blk_w,
+   };
+
+   for (unsigned slice = 0; slice < extent.depth; ++slice) {
+      for (unsigned row = 0; row < info.extent_vertical_blocks; row += 
info.num_rows_per_copy) {
+         const unsigned rows = MIN2(info.extent_vertical_blocks - row, 
info.num_rows_per_copy);
+
+         const VkExtent3D t2l_extent = {
+            .width = info.extent_horizontal_blocks * src->blk_w,
+            .height = rows * src->blk_h,
+            .depth = 1,
+         };
+
+         t2l_src.offset.y = src->offset.y + row * src->blk_h;
+         t2l_src.offset.z = src->offset.z + slice;
+         t2l_dst.slice_pitch = t2l_dst.pitch * t2l_extent.height;
+
+         radv_sdma_emit_copy_tiled_sub_window(device, cs, &t2l_src, &t2l_dst, 
t2l_extent, true);
+         radv_sdma_emit_nop(device, cs);
+
+         const VkExtent3D l2t_extent = {
+            .width = info.extent_horizontal_blocks * dst->blk_w,
+            .height = rows * dst->blk_h,
+            .depth = 1,
+         };
+
+         l2t_dst.offset.y = dst->offset.y + row * dst->blk_h;
+         l2t_dst.offset.z = dst->offset.z + slice;
+         l2t_src.slice_pitch = l2t_src.pitch * l2t_extent.height;
+
+         radv_sdma_emit_copy_tiled_sub_window(device, cs, &l2t_dst, &l2t_src, 
l2t_extent, false);
+         radv_sdma_emit_nop(device, cs);
+      }
+   }
+}
diff --git a/src/amd/vulkan/radv_sdma.h b/src/amd/vulkan/radv_sdma.h
index e089618b407..bcc95919c97 100644
--- a/src/amd/vulkan/radv_sdma.h
+++ b/src/amd/vulkan/radv_sdma.h
@@ -31,13 +31,16 @@ extern "C" {
 #endif
 
 struct radv_sdma_surf {
-   VkExtent3D extent; /* Image extent. */
-   VkOffset3D offset; /* Image offset. */
-   uint64_t va;       /* Virtual address of image data. */
-   unsigned bpp;      /* Bytes per pixel. */
-   unsigned blk_w;    /* Image format block width in pixels. */
-   unsigned blk_h;    /* Image format block height in pixels. */
-   bool is_linear;    /* Whether the image is linear. */
+   VkExtent3D extent;       /* Image extent. */
+   VkOffset3D offset;       /* Image offset. */
+   uint64_t va;             /* Virtual address of image data. */
+   unsigned bpp;            /* Bytes per pixel. */
+   unsigned blk_w;          /* Image format block width in pixels. */
+   unsigned blk_h;          /* Image format block height in pixels. */
+   unsigned mip_levels;     /* Mip levels in the image. */
+   uint8_t micro_tile_mode; /* Micro tile mode of the image. */
+   bool is_linear;          /* Whether the image is linear. */
+   bool is_3d;              /* Whether the image is 3-dimensional. */
 
    union {
       /* linear images only */
@@ -83,6 +86,11 @@ void radv_sdma_copy_buffer_image_unaligned(const struct 
radv_device *device, str
                                            bool to_image);
 void radv_sdma_copy_image(const struct radv_device *device, struct 
radeon_cmdbuf *cs, const struct radv_sdma_surf *src,
                           const struct radv_sdma_surf *dst, const VkExtent3D 
extent);
+bool radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const 
struct radv_sdma_surf *src,
+                                     const struct radv_sdma_surf *dst, const 
VkExtent3D extent);
+void radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, 
struct radeon_cmdbuf *cs,
+                                       const struct radv_sdma_surf *src, const 
struct radv_sdma_surf *dst,
+                                       const VkExtent3D extent, struct 
radeon_winsys_bo *temp_bo);
 void radv_sdma_copy_buffer(const struct radv_device *device, struct 
radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va,
                            uint64_t size);
 void radv_sdma_fill_buffer(const struct radv_device *device, struct 
radeon_cmdbuf *cs, const uint64_t va,

Reply via email to