Module: Mesa
Branch: main
Commit: d8f3060bd915e6ba6cc01086978d126e70bfea92
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d8f3060bd915e6ba6cc01086978d126e70bfea92

Author: Dave Airlie <[email protected]>
Date:   Thu Mar 16 13:24:45 2023 +1000

radv/video: start adding gfx11 vcn decoder

On gfx11 the vcn decoder moved into the vcn encoder ring,
now known as the unified vcn ring.

Reviewed-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21980>

---

 src/amd/vulkan/radv_private.h |   4 ++
 src/amd/vulkan/radv_video.c   | 150 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 147 insertions(+), 7 deletions(-)

diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index dd0d0f39365..e8a0d29461f 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -79,6 +79,7 @@
 #include "ac_spm.h"
 #include "ac_sqtt.h"
 #include "ac_surface.h"
+#include "ac_vcn.h"
 #include "radv_constants.h"
 #include "radv_descriptor_set.h"
 #include "radv_radeon_winsys.h"
@@ -248,6 +249,7 @@ radv_float_to_ufixed(float value, unsigned frac_bits)
 
 struct radv_image_view;
 struct radv_instance;
+struct rvcn_decode_buffer_s;
 
 /* A non-fatal assert.  Useful for debugging. */
 #ifdef NDEBUG
@@ -1808,6 +1810,8 @@ struct radv_cmd_buffer {
    struct {
       struct radv_video_session *vid;
       struct radv_video_session_params *params;
+      struct rvcn_sq_var sq;
+      struct rvcn_decode_buffer_s *decode_buffer;
    } video;
 
    uint64_t shader_upload_seq;
diff --git a/src/amd/vulkan/radv_video.c b/src/amd/vulkan/radv_video.c
index fbed8bf64ef..e99d79bb19f 100644
--- a/src/amd/vulkan/radv_video.c
+++ b/src/amd/vulkan/radv_video.c
@@ -50,6 +50,50 @@ radv_vid_buffer_upload_alloc(struct radv_cmd_buffer 
*cmd_buffer, unsigned size,
                                                out_offset, ptr);
 }
 
+/* vcn unified queue (sq) ib header */
+static void
+radv_vcn_sq_header(struct radeon_cmdbuf *cs,
+                   struct rvcn_sq_var *sq,
+                   bool enc)
+{
+   /* vcn ib signature */
+   radeon_emit(cs, RADEON_VCN_SIGNATURE_SIZE);
+   radeon_emit(cs, RADEON_VCN_SIGNATURE);
+   sq->ib_checksum = &cs->buf[cs->cdw];
+   radeon_emit(cs, 0);
+   sq->ib_total_size_in_dw = &cs->buf[cs->cdw];
+   radeon_emit(cs, 0);
+
+   /* vcn ib engine info */
+   radeon_emit(cs, RADEON_VCN_ENGINE_INFO_SIZE);
+   radeon_emit(cs, RADEON_VCN_ENGINE_INFO);
+   radeon_emit(cs, enc ? RADEON_VCN_ENGINE_TYPE_ENCODE
+                       : RADEON_VCN_ENGINE_TYPE_DECODE);
+   radeon_emit(cs, 0);
+}
+
+static void
+radv_vcn_sq_tail(struct radeon_cmdbuf *cs,
+                 struct rvcn_sq_var *sq)
+{
+   uint32_t *end;
+   uint32_t size_in_dw;
+   uint32_t checksum = 0;
+
+   if (sq->ib_checksum == NULL || sq->ib_total_size_in_dw == NULL)
+      return;
+
+   end = &cs->buf[cs->cdw];
+   size_in_dw = end - sq->ib_total_size_in_dw - 1;
+   *sq->ib_total_size_in_dw = size_in_dw;
+   *(sq->ib_total_size_in_dw + 4) = size_in_dw * sizeof(uint32_t);
+
+   for (int i = 0; i < size_in_dw; i++)
+      checksum += *(sq->ib_checksum + 2 + i);
+
+   *sq->ib_checksum = checksum;
+}
+
 /* generate an stream handle */
 static unsigned si_vid_alloc_stream_handle()
 {
@@ -68,7 +112,10 @@ static unsigned si_vid_alloc_stream_handle()
 void
 radv_init_physical_device_decoder(struct radv_physical_device *pdevice)
 {
-   if (radv_has_uvd(pdevice))
+   if (pdevice->rad_info.family >= CHIP_GFX1100 ||
+       pdevice->rad_info.family == CHIP_GFX940)
+      pdevice->vid_decode_ip = AMD_IP_VCN_UNIFIED;
+   else if (radv_has_uvd(pdevice))
       pdevice->vid_decode_ip = AMD_IP_UVD;
    else
       pdevice->vid_decode_ip = AMD_IP_VCN_DEC;
@@ -114,6 +161,16 @@ radv_init_physical_device_decoder(struct 
radv_physical_device *pdevice)
       pdevice->vid_dec_reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD;
       pdevice->vid_dec_reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL;
       break;
+   case CHIP_GFX940:
+      pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX9;
+      break;
+   case CHIP_GFX1100:
+   case CHIP_GFX1101:
+   case CHIP_GFX1102:
+   case CHIP_GFX1103_R1:
+   case CHIP_GFX1103_R2:
+      pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11;
+      break;
    default:
       if (radv_has_uvd(pdevice)) {
          pdevice->vid_dec_reg.data0 = RUVD_GPCOM_VCPU_DATA0;
@@ -560,9 +617,62 @@ static void send_cmd(struct radv_cmd_buffer *cmd_buffer, 
unsigned cmd,
    radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
    addr = radv_buffer_get_va(bo);
    addr += offset;
-   set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr);
-   set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32);
-   set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1);
+
+   if (cmd_buffer->device->physical_device->vid_decode_ip != 
AMD_IP_VCN_UNIFIED) {
+      set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr);
+      set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32);
+      set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1);
+      return;
+   }
+   switch(cmd) {
+   case RDECODE_CMD_MSG_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= 
RDECODE_CMDBUF_FLAGS_MSG_BUFFER;
+      cmd_buffer->video.decode_buffer->msg_buffer_address_hi = (addr >> 32);
+      cmd_buffer->video.decode_buffer->msg_buffer_address_lo = (addr);
+      break;
+   case RDECODE_CMD_DPB_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= 
(RDECODE_CMDBUF_FLAGS_DPB_BUFFER);
+      cmd_buffer->video.decode_buffer->dpb_buffer_address_hi = (addr >> 32);
+      cmd_buffer->video.decode_buffer->dpb_buffer_address_lo = (addr);
+      break;
+   case RDECODE_CMD_DECODING_TARGET_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= 
(RDECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER);
+      cmd_buffer->video.decode_buffer->target_buffer_address_hi = (addr >> 32);
+      cmd_buffer->video.decode_buffer->target_buffer_address_lo = (addr);
+      break;
+   case RDECODE_CMD_FEEDBACK_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= 
(RDECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER);
+      cmd_buffer->video.decode_buffer->feedback_buffer_address_hi = (addr >> 
32);
+      cmd_buffer->video.decode_buffer->feedback_buffer_address_lo = (addr);
+      break;
+   case RDECODE_CMD_PROB_TBL_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= 
(RDECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER);
+      cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_hi = (addr >> 
32);
+      cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_lo = (addr);
+      break;
+   case RDECODE_CMD_SESSION_CONTEXT_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= 
(RDECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER);
+      cmd_buffer->video.decode_buffer->session_contex_buffer_address_hi = 
(addr >> 32);
+      cmd_buffer->video.decode_buffer->session_contex_buffer_address_lo = 
(addr);
+      break;
+   case RDECODE_CMD_BITSTREAM_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= 
(RDECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER);
+      cmd_buffer->video.decode_buffer->bitstream_buffer_address_hi = (addr >> 
32);
+      cmd_buffer->video.decode_buffer->bitstream_buffer_address_lo = (addr);
+      break;
+   case RDECODE_CMD_IT_SCALING_TABLE_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= 
(RDECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER);
+      cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_hi = (addr 
>> 32);
+      cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_lo = 
(addr);
+      break;
+   case RDECODE_CMD_CONTEXT_BUFFER:
+      cmd_buffer->video.decode_buffer->valid_buf_flag |= 
(RDECODE_CMDBUF_FLAGS_CONTEXT_BUFFER);
+      cmd_buffer->video.decode_buffer->context_buffer_address_hi = (addr >> 
32);
+      cmd_buffer->video.decode_buffer->context_buffer_address_lo = (addr);
+      break;
+   default:
+      assert(0);
+   }
 }
 
 static void rvcn_dec_message_create(struct radv_video_session *vid,
@@ -1482,6 +1592,22 @@ radv_CmdBeginVideoCodingKHR(VkCommandBuffer 
commandBuffer,
 
    cmd_buffer->video.vid = vid;
    cmd_buffer->video.params = params;
+
+   if (cmd_buffer->device->physical_device->vid_decode_ip == 
AMD_IP_VCN_UNIFIED) {
+      radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, false);
+      rvcn_decode_ib_package_t *ib_header =
+         (rvcn_decode_ib_package_t 
*)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
+      ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) +
+         sizeof(struct rvcn_decode_ib_package_s);
+      cmd_buffer->cs->cdw++;
+      ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
+      cmd_buffer->cs->cdw++;
+      cmd_buffer->video.decode_buffer =
+         (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
+      cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
+      memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct 
rvcn_decode_buffer_s));
+   }
+
 }
 
 static void
@@ -1499,8 +1625,11 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
    send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, 
vid->sessionctx.mem->bo, vid->sessionctx.offset);
    send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, 
out_offset);
    /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be 
unhappy */
-   for (unsigned i = 0; i < 8; i++)
-      radeon_emit(cmd_buffer->cs, 0x81ff);
+
+   if (cmd_buffer->device->physical_device->vid_decode_ip != 
AMD_IP_VCN_UNIFIED) {
+      for (unsigned i = 0; i < 8; i++)
+         radeon_emit(cmd_buffer->cs, 0x81ff);
+   }
 }
 
 static void
@@ -1539,6 +1668,12 @@ void
 radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer,
                           const VkVideoEndCodingInfoKHR *pEndCodingInfo)
 {
+   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   if (cmd_buffer->device->physical_device->vid_decode_ip != 
AMD_IP_VCN_UNIFIED)
+      return;
+
+   radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
 }
 
 static void
@@ -1662,7 +1797,8 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer,
    if (have_it(vid))
       send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_bo, 
it_offset);
 
-   set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 
1);
+   if (cmd_buffer->device->physical_device->vid_decode_ip != 
AMD_IP_VCN_UNIFIED)
+      set_reg(cmd_buffer, 
cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1);
 }
 
 void

Reply via email to