Module: Mesa
Branch: main
Commit: 3e2c768aa860f96074df73cd3171960e76f5c312
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3e2c768aa860f96074df73cd3171960e76f5c312

Author: Dave Airlie <[email protected]>
Date:   Tue Dec  6 15:19:08 2022 +1000

radv/vcn: enable dynamic dpb tier 2 for h264/h265 on navi21+

navi21 can do separate image support, using tier 2 DPB messages.

This enables support for doing that in the vulkan video driver.

Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20388>

---

 src/amd/vulkan/radv_video.c | 109 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 99 insertions(+), 10 deletions(-)

diff --git a/src/amd/vulkan/radv_video.c b/src/amd/vulkan/radv_video.c
index aa4013f2d59..c348fac3af4 100644
--- a/src/amd/vulkan/radv_video.c
+++ b/src/amd/vulkan/radv_video.c
@@ -213,13 +213,18 @@ radv_CreateVideoSessionKHR(VkDevice _device,
    }
 
    vid->interlaced = false;
+   vid->dpb_type = DPB_MAX_RES;
 
    switch (vid->vk.op) {
    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
       vid->stream_type = RDECODE_CODEC_H264_PERF;
+      if (device->physical_device->rad_info.family >= CHIP_NAVI21)
+         vid->dpb_type = DPB_DYNAMIC_TIER_2;
       break;
    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
       vid->stream_type = RDECODE_CODEC_H265;
+      if (device->physical_device->rad_info.family >= CHIP_NAVI21)
+         vid->dpb_type = DPB_DYNAMIC_TIER_2;
       break;
    default:
       return VK_ERROR_FEATURE_NOT_PRESENT;
@@ -227,7 +232,6 @@ radv_CreateVideoSessionKHR(VkDevice _device,
 
    vid->stream_handle = si_vid_alloc_stream_handle();
    vid->dbg_frame_cnt = 0;
-   vid->dpb_type = DPB_MAX_RES;
    vid->db_alignment = (device->physical_device->rad_info.family >= 
CHIP_RENOIR &&
                         vid->vk.max_coded.width > 32 &&
                         (vid->stream_type == RDECODE_CODEC_H265 &&
@@ -334,6 +338,9 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice 
physicalDevice,
       pCapabilities->maxDpbSlots = NUM_H264_REFS;
       pCapabilities->maxActiveReferencePictures = NUM_H264_REFS;
 
+      /* for h264 on navi21+ separate dpb images should work */
+      if (pdevice->rad_info.family >= CHIP_NAVI21)
+         pCapabilities->flags |= 
VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
       ext->fieldOffsetGranularity.x = 0;
       ext->fieldOffsetGranularity.y = 0;
       ext->maxLevelIdc = 51;
@@ -346,6 +353,9 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice 
physicalDevice,
          vk_find_struct(pCapabilities->pNext, 
VIDEO_DECODE_H265_CAPABILITIES_KHR);
       pCapabilities->maxDpbSlots = NUM_H264_REFS;
       pCapabilities->maxActiveReferencePictures = NUM_H265_REFS;
+      /* for h265 on navi21+ separate dpb images should work */
+      if (pdevice->rad_info.family >= CHIP_NAVI21)
+         pCapabilities->flags |= 
VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
       ext->maxLevelIdc = 51;
       strcpy(pCapabilities->stdHeaderVersion.extensionName, 
VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME);
       pCapabilities->stdHeaderVersion.specVersion = 
VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION;
@@ -625,7 +635,8 @@ static rvcn_dec_message_avc_t get_h264_msg(struct 
radv_video_session *vid,
    result.sps_info_flags |= sps->flags.mb_adaptive_frame_field_flag << 1;
    result.sps_info_flags |= sps->flags.frame_mbs_only_flag << 2;
    result.sps_info_flags |= sps->flags.delta_pic_order_always_zero_flag << 3;
-   result.sps_info_flags |= 1 << 
RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
+   if (vid->dpb_type != DPB_DYNAMIC_TIER_2)
+      result.sps_info_flags |= 1 << 
RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
 
    result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
    result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
@@ -676,6 +687,11 @@ static rvcn_dec_message_avc_t get_h264_msg(struct 
radv_video_session *vid,
    result.frame_num = h264_pic_info->pStdPictureInfo->frame_num;
 
    result.num_ref_frames = sps->max_num_ref_frames;
+   result.non_existing_frame_flags = 0;
+   result.used_for_reference_flags = 0;
+
+   memset(result.ref_frame_list, 0xff, sizeof(unsigned char) * 16);
+   memset(result.frame_num_list, 0, sizeof(unsigned int) * 16);
    for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
       int idx = frame_info->pReferenceSlots[i].slotIndex;
       const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
@@ -684,7 +700,23 @@ static rvcn_dec_message_avc_t get_h264_msg(struct 
radv_video_session *vid,
       result.frame_num_list[idx] = idx;
       result.field_order_cnt_list[idx][0] = 
dpb_slot->pStdReferenceInfo->PicOrderCnt[0];
       result.field_order_cnt_list[idx][1] = 
dpb_slot->pStdReferenceInfo->PicOrderCnt[1];
+
+      result.ref_frame_list[idx] = idx;
+
+      if (dpb_slot->pStdReferenceInfo->flags.top_field_flag)
+         result.used_for_reference_flags |= (1 << (2 * idx));
+      if (dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
+         result.used_for_reference_flags |= (1 << (2 * idx + 1));
+      else
+         result.used_for_reference_flags |= (3 << (2 * idx));
+
+      if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference)
+        result.ref_frame_list[idx] |= 0x80;
+      if (dpb_slot->pStdReferenceInfo->flags.is_non_existing)
+         result.non_existing_frame_flags |= 1 << idx;
+
    }
+   result.curr_pic_ref_frame_num = frame_info->referenceSlotCount;
    result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex;
 
    return result;
@@ -841,7 +873,7 @@ static rvcn_dec_message_hevc_t get_h265_msg(struct 
radv_device *device,
    return result;
 }
 
-static bool rvcn_dec_message_decode(struct radv_device *device,
+static bool rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer,
                                     struct radv_video_session *vid,
                                     struct radv_video_session_params *params,
                                     void *ptr,
@@ -849,11 +881,14 @@ static bool rvcn_dec_message_decode(struct radv_device 
*device,
                                     uint32_t *slice_offset,
                                     const struct VkVideoDecodeInfoKHR 
*frame_info)
 {
+   struct radv_device *device = cmd_buffer->device;
    rvcn_dec_message_header_t *header;
    rvcn_dec_message_index_t *index_codec;
    rvcn_dec_message_decode_t *decode;
+   rvcn_dec_message_index_t *index_dynamic_dpb = NULL;
+   rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2 = NULL;
    void *codec;
-   unsigned sizes = 0, offset_decode, offset_codec;
+   unsigned sizes = 0, offset_decode, offset_codec, offset_dynamic_dpb;
    struct radv_image_view *dst_iv = 
radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
    struct radv_image *img = dst_iv->image;
    struct radv_image_plane *luma = &img->planes[0];
@@ -867,10 +902,21 @@ static bool rvcn_dec_message_decode(struct radv_device 
*device,
    index_codec = (void *)((char *)header + sizes);
    sizes += sizeof(rvcn_dec_message_index_t);
 
+   if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
+      index_dynamic_dpb = (void *)((char *)header + sizes);
+      sizes += sizeof(rvcn_dec_message_index_t);
+   }
+
    offset_decode = sizes;
    decode = (void *)((char*)header + sizes);
    sizes += sizeof(rvcn_dec_message_decode_t);
 
+   if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
+      offset_dynamic_dpb = sizes;
+      dynamic_dpb_t2 = (void*)((char *)header + sizes);
+      sizes += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
+   }
+
    offset_codec = sizes;
    codec = (void *)((char *)header + sizes);
 
@@ -893,6 +939,14 @@ static bool rvcn_dec_message_decode(struct radv_device 
*device,
    index_codec->filled = 0;
    ++header->num_buffers;
 
+   if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
+      index_dynamic_dpb->message_id = RDECODE_MESSAGE_DYNAMIC_DPB;
+      index_dynamic_dpb->offset = offset_dynamic_dpb;
+      index_dynamic_dpb->filled = 0;
+      ++header->num_buffers;
+      index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
+   }
+
    decode->stream_type = vid->stream_type;
    decode->decode_flags = 0;
    decode->width_in_samples = dst_iv->image->vk.extent.width;
@@ -915,8 +969,6 @@ static bool rvcn_dec_message_decode(struct radv_device 
*device,
         vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10))
       decode->db_aligned_height = 
align(frame_info->dstPictureResource.codedExtent.height, 64);
 
-   decode->db_surf_tile_config = 0;
-
    decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
    decode->dt_uv_pitch = chroma->surface.u.gfx9.surf_pitch * 
chroma->surface.blk_w;
 
@@ -965,6 +1017,39 @@ static bool rvcn_dec_message_decode(struct radv_device 
*device,
 
    decode->hw_ctxt_size = vid->ctx.size;
 
+   if (vid->dpb_type != DPB_DYNAMIC_TIER_2)
+      return true;
+
+   uint64_t addr;
+   for (int i = 0; i < frame_info->referenceSlotCount; i++) {
+      struct radv_image_view *f_dpb_iv = 
radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
+      struct radv_image *dpb_img = f_dpb_iv->image;
+      int idx = frame_info->pReferenceSlots[i].slotIndex;
+
+      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, 
dpb_img->bindings[0].bo);
+      addr = radv_buffer_get_va(dpb_img->bindings[0].bo) + 
dpb_img->bindings[0].offset;
+
+      dynamic_dpb_t2->dpbAddrLo[idx] = addr;
+      dynamic_dpb_t2->dpbAddrHi[idx] = addr >> 32;
+      ++dynamic_dpb_t2->dpbArraySize;
+   }
+
+   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, 
dpb->bindings[0].bo);
+   addr = radv_buffer_get_va(dpb->bindings[0].bo);
+
+   dynamic_dpb_t2->dpbCurrLo = addr;
+   dynamic_dpb_t2->dpbCurrHi = addr >> 32;
+
+   decode->decode_flags = 1;
+   dynamic_dpb_t2->dpbConfigFlags = 0;
+   dynamic_dpb_t2->dpbLumaPitch = luma->surface.u.gfx9.surf_pitch;
+   dynamic_dpb_t2->dpbLumaAlignedHeight = luma->surface.u.gfx9.surf_height;
+   dynamic_dpb_t2->dpbLumaAlignedSize = luma->surface.u.gfx9.surf_slice_size;
+
+   dynamic_dpb_t2->dpbChromaPitch = chroma->surface.u.gfx9.surf_pitch;
+   dynamic_dpb_t2->dpbChromaAlignedHeight = chroma->surface.u.gfx9.surf_height;
+   dynamic_dpb_t2->dpbChromaAlignedSize = 
chroma->surface.u.gfx9.surf_slice_size;
+
    return true;
 }
 
@@ -1509,9 +1594,13 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer,
    uint32_t out_offset, fb_offset, it_offset = 0;
    struct radeon_winsys_bo *msg_bo, *fb_bo, *it_bo = NULL;
 
-   size += sizeof(rvcn_dec_message_header_t);
-   size += sizeof(rvcn_dec_message_index_t);
-   size += sizeof(rvcn_dec_message_decode_t);
+   size += sizeof(rvcn_dec_message_header_t); /* header */
+   size += sizeof(rvcn_dec_message_index_t);  /* codec */
+   if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
+      size += sizeof(rvcn_dec_message_index_t);
+      size += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
+   }
+   size += sizeof(rvcn_dec_message_decode_t);   /* decode */
    switch (vid->vk.op) {
    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
       size += sizeof(rvcn_dec_message_avc_t);
@@ -1537,7 +1626,7 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer,
    msg_bo = cmd_buffer->upload.upload_bo;
 
    uint32_t slice_offset;        
-   rvcn_dec_message_decode(cmd_buffer->device, vid, params, ptr, it_ptr, 
&slice_offset, frame_info);
+   rvcn_dec_message_decode(cmd_buffer, vid, params, ptr, it_ptr, 
&slice_offset, frame_info);
    rvcn_dec_message_feedback(fb_ptr);
    send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, 
vid->sessionctx.mem->bo, vid->sessionctx.offset);
    send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);

Reply via email to