Due to the hardware modus operandi, dpb references must stay at a fixed slot for their entire lifetime.
Signed-off-by: averne <averne...@gmail.com> --- configure | 2 + libavcodec/Makefile | 1 + libavcodec/h264_slice.c | 6 +- libavcodec/h264dec.c | 3 + libavcodec/hwaccels.h | 1 + libavcodec/nvtegra_h264.c | 506 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 518 insertions(+), 1 deletion(-) create mode 100644 libavcodec/nvtegra_h264.c diff --git a/configure b/configure index 952e3aef7d..930cd3c9bd 100755 --- a/configure +++ b/configure @@ -3193,6 +3193,8 @@ h264_videotoolbox_hwaccel_deps="videotoolbox" h264_videotoolbox_hwaccel_select="h264_decoder" h264_vulkan_hwaccel_deps="vulkan" h264_vulkan_hwaccel_select="h264_decoder" +h264_nvtegra_hwaccel_deps="nvtegra" +h264_nvtegra_hwaccel_select="h264_decoder" hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC" hevc_d3d11va_hwaccel_select="hevc_decoder" hevc_d3d11va2_hwaccel_deps="d3d11va DXVA_PicParams_HEVC" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index e102d03e7d..2cb0ec21a8 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -1013,6 +1013,7 @@ OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o OBJS-$(CONFIG_H264_VDPAU_HWACCEL) += vdpau_h264.o OBJS-$(CONFIG_H264_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o OBJS-$(CONFIG_H264_VULKAN_HWACCEL) += vulkan_decode.o vulkan_h264.o +OBJS-$(CONFIG_H264_NVTEGRA_HWACCEL) += nvtegra_h264.o OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_D3D12VA_HWACCEL) += dxva2_hevc.o d3d12va_hevc.o diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index ce2c4caca1..dc4c5545c8 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -784,7 +784,8 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) CONFIG_H264_VAAPI_HWACCEL + \ CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \ CONFIG_H264_VDPAU_HWACCEL + \ - CONFIG_H264_VULKAN_HWACCEL) + CONFIG_H264_VULKAN_HWACCEL + \ + CONFIG_H264_NVTEGRA_HWACCEL) enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; switch (h->ps.sps->bit_depth_luma) { @@ -888,6 +889,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) #endif #if CONFIG_H264_VAAPI_HWACCEL *fmt++ = AV_PIX_FMT_VAAPI; +#endif +#if CONFIG_H264_NVTEGRA_HWACCEL + *fmt++ = AV_PIX_FMT_NVTEGRA; #endif if (h->avctx->color_range == AVCOL_RANGE_JPEG) *fmt++ = AV_PIX_FMT_YUVJ420P; diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c index fd23e367b4..51f53f07a9 100644 --- a/libavcodec/h264dec.c +++ b/libavcodec/h264dec.c @@ -1160,6 +1160,9 @@ const FFCodec ff_h264_decoder = { #endif #if CONFIG_H264_VULKAN_HWACCEL HWACCEL_VULKAN(h264), +#endif +#if CONFIG_H264_NVTEGRA_HWACCEL + HWACCEL_NVTEGRA(h264), #endif NULL }, diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index a69e6a1977..463fd333a1 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -37,6 +37,7 @@ extern const struct FFHWAccel ff_h264_nvdec_hwaccel; extern const struct FFHWAccel ff_h264_vaapi_hwaccel; extern const struct FFHWAccel ff_h264_vdpau_hwaccel; extern const struct FFHWAccel ff_h264_videotoolbox_hwaccel; +extern const struct FFHWAccel ff_h264_nvtegra_hwaccel; extern const struct FFHWAccel ff_h264_vulkan_hwaccel; extern const struct FFHWAccel ff_hevc_d3d11va_hwaccel; extern const struct FFHWAccel ff_hevc_d3d11va2_hwaccel; diff --git a/libavcodec/nvtegra_h264.c b/libavcodec/nvtegra_h264.c new file mode 100644 index 0000000000..63073c44a6 --- /dev/null +++ b/libavcodec/nvtegra_h264.c @@ -0,0 +1,506 @@ +/* + * Copyright (c) 2024 averne <averne...@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdbool.h> +#include <string.h> + +#include "config_components.h" + +#include "avcodec.h" +#include "hwaccel_internal.h" +#include "internal.h" +#include "hwconfig.h" +#include "h264dec.h" +#include "decode.h" +#include "nvtegra_decode.h" + +#include "libavutil/pixdesc.h" +#include "libavutil/nvtegra_host1x.h" + +typedef struct NVTegraH264DecodeContext { + FFNVTegraDecodeContext core; + + AVNVTegraMap common_map; + uint32_t coloc_off, mbhist_off, history_off; + uint32_t mbhist_size, history_size; + + struct NVTegraH264RefFrame { + AVNVTegraMap *map; + uint32_t chroma_off; + int16_t frame_num; + int16_t pic_id; + } refs[16+1]; + + uint8_t ordered_dpb_map[16+1], + pic_id_map[16+1], scratch_ref, cur_frame; + + uint64_t refs_mask, ordered_dpb_mask, pic_id_mask; +} NVTegraH264DecodeContext; + +/* Size (width, height) of a macroblock */ +#define MB_SIZE 16 + +static const uint8_t bitstream_end_sequence[16] = { + 0x00, 0x00, 0x01, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x0b, 0x00, 0x00, 0x00, 0x00, +}; + +static int nvtegra_h264_decode_uninit(AVCodecContext *avctx) { + NVTegraH264DecodeContext *ctx = avctx->internal->hwaccel_priv_data; + + int err; + + av_log(avctx, AV_LOG_DEBUG, "Deinitializing NVTEGRA H264 decoder\n"); + + err = av_nvtegra_map_destroy(&ctx->common_map); + if (err < 0) + return err; + + err = ff_nvtegra_decode_uninit(avctx, &ctx->core); + if (err < 0) + return err; + + return 0; +} + +static int nvtegra_h264_decode_init(AVCodecContext *avctx) { + H264Context *h = avctx->priv_data; + const SPS *sps = h->ps.sps; + NVTegraH264DecodeContext *ctx = avctx->internal->hwaccel_priv_data; + + AVHWDeviceContext *hw_device_ctx; + AVNVTegraDeviceContext *device_hwctx; + uint32_t aligned_width, aligned_height, + width_in_mbs, height_in_mbs, num_slices, + coloc_size, mbhist_size, history_size, common_map_size; + int err; + + av_log(avctx, AV_LOG_DEBUG, "Initializing NVTEGRA H264 decoder\n"); + + aligned_width = FFALIGN(avctx->coded_width, MB_SIZE); + aligned_height = FFALIGN(avctx->coded_height, MB_SIZE); + width_in_mbs = aligned_width / MB_SIZE; + height_in_mbs = aligned_height / MB_SIZE; + + num_slices = width_in_mbs * height_in_mbs; + + /* Ignored: histogram map, size 0x400 */ + ctx->core.pic_setup_off = 0; + ctx->core.status_off = FFALIGN(ctx->core.pic_setup_off + sizeof(nvdec_h264_pic_s), + AV_NVTEGRA_MAP_ALIGN); + ctx->core.cmdbuf_off = FFALIGN(ctx->core.status_off + sizeof(nvdec_status_s), + AV_NVTEGRA_MAP_ALIGN); + ctx->core.slice_offsets_off = FFALIGN(ctx->core.cmdbuf_off + 3*AV_NVTEGRA_MAP_ALIGN, + AV_NVTEGRA_MAP_ALIGN); + ctx->core.bitstream_off = FFALIGN(ctx->core.slice_offsets_off + num_slices * sizeof(uint32_t), + AV_NVTEGRA_MAP_ALIGN); + ctx->core.input_map_size = FFALIGN(ctx->core.bitstream_off + ff_nvtegra_decode_pick_bitstream_buffer_size(avctx), + 0x1000); + + ctx->core.max_cmdbuf_size = ctx->core.slice_offsets_off - ctx->core.cmdbuf_off; + ctx->core.max_num_slices = (ctx->core.bitstream_off - ctx->core.slice_offsets_off) / sizeof(uint32_t); + ctx->core.max_bitstream_size = ctx->core.input_map_size - ctx->core.bitstream_off; + + err = ff_nvtegra_decode_init(avctx, &ctx->core); + if (err < 0) + goto fail; + + hw_device_ctx = (AVHWDeviceContext *)ctx->core.hw_device_ref->data; + device_hwctx = hw_device_ctx->hwctx; + + coloc_size = FFALIGN(FFALIGN(height_in_mbs, 2) * (width_in_mbs * 64) - 63, 0x100); + coloc_size *= sps->ref_frame_count + 1; /* Max number of references frames, plus current frame */ + mbhist_size = FFALIGN(width_in_mbs * 104, 0x100); + history_size = FFALIGN(width_in_mbs * 0x200 + 0x1100, 0x200); + + ctx->coloc_off = 0; + ctx->mbhist_off = FFALIGN(ctx->coloc_off + coloc_size, AV_NVTEGRA_MAP_ALIGN); + ctx->history_off = FFALIGN(ctx->mbhist_off + mbhist_size, AV_NVTEGRA_MAP_ALIGN); + common_map_size = FFALIGN(ctx->history_off + history_size, 0x1000); + + err = av_nvtegra_map_create(&ctx->common_map, &device_hwctx->nvdec_channel, common_map_size, 0x100, + NVMAP_HEAP_IOVMM, NVMAP_HANDLE_WRITE_COMBINE); + if (err < 0) + goto fail; + + ctx->mbhist_size = mbhist_size; + ctx->history_size = history_size; + + memset(ctx->ordered_dpb_map, -1, sizeof(ctx->ordered_dpb_map)); + memset(ctx->pic_id_map, -1, sizeof(ctx->pic_id_map)); + + return 0; + +fail: + nvtegra_h264_decode_uninit(avctx); + return err; +} + +static inline int field_poc(int poc[2], bool top) { + return (poc[!top] != INT_MAX) ? poc[!top] : 0; +} + +static void dpb_add(H264Context *h, nvdec_dpb_entry_s *dst, + H264Picture *src, int pic_id) +{ + int marking; + + marking = src->long_ref ? 2 : 1; + *dst = (nvdec_dpb_entry_s){ + .index = pic_id, + .col_idx = pic_id, + .state = src->reference, + .is_long_term = src->long_ref, + .not_existing = src->invalid_gap, + .is_field = src->field_picture, + .top_field_marking = (src->reference & PICT_TOP_FIELD) ? marking : 0, + .bottom_field_marking = (src->reference & PICT_BOTTOM_FIELD) ? marking : 0, + .output_memory_layout = 0, /* NV12 */ + .FieldOrderCnt = { + field_poc(src->field_poc, true), + field_poc(src->field_poc, false), + }, + .FrameIdx = src->long_ref ? src->pic_id : src->frame_num, + }; +} + +static inline int find_slot(uint64_t *mask) { + int slot = ff_ctzll(~*mask); + *mask |= (1 << slot); + return slot; +} + +static void nvtegra_h264_prepare_frame_setup(nvdec_h264_pic_s *setup, H264Context *h, + NVTegraH264DecodeContext *ctx) +{ + const PPS *pps = h->ps.pps; + const SPS *sps = h->ps.sps; + + int dpb_size, i, j, diff; + H264Picture *refs [16+1] = {0}; + uint8_t dpb_to_ref[16+1] = {0}; + + *setup = (nvdec_h264_pic_s){ + .mbhist_buffer_size = ctx->mbhist_size, + + .gptimer_timeout_value = 0, /* Default value */ + + .log2_max_pic_order_cnt_lsb_minus4 = FFMAX(sps->log2_max_poc_lsb - 4, 0), + .delta_pic_order_always_zero_flag = sps->delta_pic_order_always_zero_flag, + .frame_mbs_only_flag = sps->frame_mbs_only_flag, + + .PicWidthInMbs = h->mb_width, + .FrameHeightInMbs = h->mb_height, + + .tileFormat = 0, /* TBL */ + .gob_height = 0, /* GOB_2 */ + + .entropy_coding_mode_flag = pps->cabac, + .pic_order_present_flag = pps->pic_order_present, + .num_ref_idx_l0_active_minus1 = pps->ref_count[0] - 1, + .num_ref_idx_l1_active_minus1 = pps->ref_count[1] - 1, + .deblocking_filter_control_present_flag = pps->deblocking_filter_parameters_present, + .redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present, + .transform_8x8_mode_flag = pps->transform_8x8_mode, + + .pitch_luma = h->cur_pic_ptr->f->linesize[0], + .pitch_chroma = h->cur_pic_ptr->f->linesize[1], + + .luma_top_offset = 0, + .luma_bot_offset = 0, + .luma_frame_offset = 0, + .chroma_top_offset = 0, + .chroma_bot_offset = 0, + .chroma_frame_offset = 0, + + .HistBufferSize = ctx->history_size / 256, + + .MbaffFrameFlag = sps->mb_aff && !FIELD_PICTURE(h), + .direct_8x8_inference_flag = sps->direct_8x8_inference_flag, + .weighted_pred_flag = pps->weighted_pred, + .constrained_intra_pred_flag = pps->constrained_intra_pred, + .ref_pic_flag = h->nal_ref_idc != 0, + .field_pic_flag = FIELD_PICTURE(h), + .bottom_field_flag = h->picture_structure == PICT_BOTTOM_FIELD, + .second_field = FIELD_PICTURE(h) && !h->first_field, + .log2_max_frame_num_minus4 = sps->log2_max_frame_num - 4, + .chroma_format_idc = sps->chroma_format_idc, + .pic_order_cnt_type = sps->poc_type, + .pic_init_qp_minus26 = pps->init_qp - 26, + .chroma_qp_index_offset = pps->chroma_qp_index_offset[0], + .second_chroma_qp_index_offset = pps->chroma_qp_index_offset[1], + + .weighted_bipred_idc = pps->weighted_bipred_idc, + .frame_num = h->cur_pic_ptr->frame_num, + .output_memory_layout = 0, /* NV12 */ + + .CurrFieldOrderCnt = { + field_poc(h->cur_pic_ptr->field_poc, true), + field_poc(h->cur_pic_ptr->field_poc, false), + }, + + .lossless_ipred8x8_filter_enable = true, + .qpprime_y_zero_transform_bypass_flag = sps->transform_bypass, + }; + + /* Build concatenated ref list for this frame */ + dpb_size = 0; + for (i = 0; i < h->short_ref_count; ++i) + refs[dpb_size++] = h->short_ref[i]; + + for (i = 0; i < 16; ++i) + if (h->long_ref[i]) + refs[dpb_size++] = h->long_ref[i]; + + /* Remove stale references from our ref list */ + for (i = 0; i < FF_ARRAY_ELEMS(ctx->refs); ++i) { + if (!(ctx->refs_mask & (1 << i))) + continue; + + for (j = 0; j < dpb_size; ++j) { + if (av_nvtegra_frame_get_fbuf_map(refs[j]->f) == ctx->refs[i].map) + break; + } + + if (j == dpb_size) { + ctx->pic_id_mask &= ~(1 << ctx->refs[i].pic_id); + ctx->pic_id_map[ctx->refs[i].pic_id] = -1; + + ctx->refs_mask &= ~(1 << i); + ctx->refs[i].map = NULL; + } else { + dpb_to_ref[i] = j; + } + } + + /* Update the ordered DPB mask */ + for (i = 0; i < FF_ARRAY_ELEMS(ctx->ordered_dpb_map); ++i) { + if (!(ctx->ordered_dpb_mask & (1 << i))) + continue; + if (!ctx->refs[ctx->ordered_dpb_map[i]].map) { + ctx->ordered_dpb_mask &= ~(1 << i); + ctx->ordered_dpb_map[i] = -1; + } + } + + /* Add new frames to the ordered DPB */ + for (i = 0; i < FF_ARRAY_ELEMS(ctx->refs); ++i) { + if (!(ctx->refs_mask & (1 << i))) + continue; + + for (j = 0; j < FF_ARRAY_ELEMS(ctx->ordered_dpb_map); ++j) { + if (ctx->ordered_dpb_map[j] == i) + break; + } + + if (j == FF_ARRAY_ELEMS(ctx->ordered_dpb_map)) + ctx->ordered_dpb_map[find_slot(&ctx->ordered_dpb_mask)] = i; + } + + /* + * Add the current frame to our ref list + * In the case of interlaced video, the new frame can be the same as the last + */ + if (ctx->refs[ctx->cur_frame].map != av_nvtegra_frame_get_fbuf_map(h->cur_pic_ptr->f)) { + /* Allocate a pic id for the current frame */ + i = find_slot(&ctx->pic_id_mask); + + /* Insert it in our ref list */ + ctx->cur_frame = find_slot(&ctx->refs_mask); + ctx->pic_id_map[i] = ctx->cur_frame; + ctx->refs[ctx->cur_frame] = (struct NVTegraH264RefFrame){ + .map = av_nvtegra_frame_get_fbuf_map(h->cur_pic_ptr->f), + .chroma_off = h->cur_pic_ptr->f->data[1] - h->cur_pic_ptr->f->data[0], + .frame_num = h->cur_pic_ptr->frame_num, + .pic_id = i, + }; + } + + setup->CurrPicIdx = setup->CurrColIdx = ctx->refs[ctx->cur_frame].pic_id; + + /* Find the temporally closest frame to be used as a scratch ref, or use the current one */ + diff = INT_MAX; + ctx->scratch_ref = ctx->cur_frame; + for (i = 0; i < FF_ARRAY_ELEMS(ctx->ordered_dpb_map); ++i) { + j = ctx->ordered_dpb_map[i]; + if ((ctx->ordered_dpb_mask & (1 << i)) && + FFABS(h->cur_pic_ptr->frame_num - refs[dpb_to_ref[j]]->frame_num) < diff) + ctx->scratch_ref = j; + } + + /* Build the NVDEC DPB */ + for (i = 0; i < FF_ARRAY_ELEMS(setup->dpb); ++i) { + if (ctx->ordered_dpb_mask & (1 << i)) { + j = ctx->ordered_dpb_map[i]; + dpb_add(h, &setup->dpb[i], refs[dpb_to_ref[j]], ctx->refs[j].pic_id); + } + } + + memcpy(setup->WeightScale, pps->scaling_matrix4, sizeof(setup->WeightScale)); + memcpy(setup->WeightScale8x8[0], pps->scaling_matrix8[0], sizeof(setup->WeightScale8x8[0])); + memcpy(setup->WeightScale8x8[1], pps->scaling_matrix8[3], sizeof(setup->WeightScale8x8[1])); +} + +static int nvtegra_h264_prepare_cmdbuf(AVNVTegraCmdbuf *cmdbuf, H264Context *h, + AVFrame *cur_frame, NVTegraH264DecodeContext *ctx) +{ + FrameDecodeData *fdd = (FrameDecodeData *)cur_frame->private_ref->data; + FFNVTegraDecodeFrame *tf = fdd->hwaccel_priv; + AVNVTegraMap *input_map = (AVNVTegraMap *)tf->input_map_ref->data; + + int err, i; + + err = av_nvtegra_cmdbuf_begin(cmdbuf, HOST1X_CLASS_NVDEC); + if (err < 0) + return err; + + AV_NVTEGRA_PUSH_VALUE(cmdbuf, NVC5B0_SET_APPLICATION_ID, + AV_NVTEGRA_ENUM(NVC5B0_SET_APPLICATION_ID, ID, H264)); + AV_NVTEGRA_PUSH_VALUE(cmdbuf, NVC5B0_SET_CONTROL_PARAMS, + AV_NVTEGRA_ENUM (NVC5B0_SET_CONTROL_PARAMS, CODEC_TYPE, H264) | + AV_NVTEGRA_VALUE(NVC5B0_SET_CONTROL_PARAMS, ERR_CONCEAL_ON, 1) | + AV_NVTEGRA_VALUE(NVC5B0_SET_CONTROL_PARAMS, GPTIMER_ON, 1)); + AV_NVTEGRA_PUSH_VALUE(cmdbuf, NVC5B0_SET_PICTURE_INDEX, + AV_NVTEGRA_VALUE(NVC5B0_SET_PICTURE_INDEX, INDEX, ctx->core.frame_idx)); + + AV_NVTEGRA_PUSH_RELOC(cmdbuf, NVC5B0_SET_DRV_PIC_SETUP_OFFSET, + input_map, ctx->core.pic_setup_off, NVHOST_RELOC_TYPE_DEFAULT); + AV_NVTEGRA_PUSH_RELOC(cmdbuf, NVC5B0_SET_IN_BUF_BASE_OFFSET, + input_map, ctx->core.bitstream_off, NVHOST_RELOC_TYPE_DEFAULT); + AV_NVTEGRA_PUSH_RELOC(cmdbuf, NVC5B0_SET_SLICE_OFFSETS_BUF_OFFSET, + input_map, ctx->core.slice_offsets_off, NVHOST_RELOC_TYPE_DEFAULT); + AV_NVTEGRA_PUSH_RELOC(cmdbuf, NVC5B0_SET_NVDEC_STATUS_OFFSET, + input_map, ctx->core.status_off, NVHOST_RELOC_TYPE_DEFAULT); + + AV_NVTEGRA_PUSH_RELOC(cmdbuf, NVC5B0_SET_COLOC_DATA_OFFSET, + &ctx->common_map, ctx->coloc_off, NVHOST_RELOC_TYPE_DEFAULT); + AV_NVTEGRA_PUSH_RELOC(cmdbuf, NVC5B0_H264_SET_MBHIST_BUF_OFFSET, + &ctx->common_map, ctx->mbhist_off, NVHOST_RELOC_TYPE_DEFAULT); + AV_NVTEGRA_PUSH_RELOC(cmdbuf, NVC5B0_SET_HISTORY_OFFSET, + &ctx->common_map, ctx->history_off, NVHOST_RELOC_TYPE_DEFAULT); + +#define PUSH_FRAME(ref, offset) ({ \ + AV_NVTEGRA_PUSH_RELOC(cmdbuf, NVC5B0_SET_PICTURE_LUMA_OFFSET0 + offset * 4, \ + ref.map, 0, NVHOST_RELOC_TYPE_DEFAULT); \ + AV_NVTEGRA_PUSH_RELOC(cmdbuf, NVC5B0_SET_PICTURE_CHROMA_OFFSET0 + offset * 4, \ + ref.map, ref.chroma_off, NVHOST_RELOC_TYPE_DEFAULT); \ +}) + + for (i = 0; i < 16 + 1; ++i) { + if (i == ctx->cur_frame) + PUSH_FRAME(ctx->refs[i], i); + else if (ctx->pic_id_mask & (1 << i)) + PUSH_FRAME(ctx->refs[ctx->pic_id_map[i]], i); + else + PUSH_FRAME(ctx->refs[ctx->scratch_ref], i); + } + + AV_NVTEGRA_PUSH_VALUE(cmdbuf, NVC5B0_EXECUTE, + AV_NVTEGRA_ENUM(NVC5B0_EXECUTE, AWAKEN, ENABLE)); + + err = av_nvtegra_cmdbuf_end(cmdbuf); + if (err < 0) + return err; + + return 0; +} + +static int nvtegra_h264_start_frame(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size) { + H264Context *h = avctx->priv_data; + AVFrame *frame = h->cur_pic_ptr->f; + FrameDecodeData *fdd = (FrameDecodeData *)frame->private_ref->data; + NVTegraH264DecodeContext *ctx = avctx->internal->hwaccel_priv_data; + + FFNVTegraDecodeFrame *tf; + AVNVTegraMap *input_map; + uint8_t *mem; + int err; + + av_log(avctx, AV_LOG_DEBUG, "Starting H264-NVTEGRA frame with pixel format %s\n", + av_get_pix_fmt_name(avctx->sw_pix_fmt)); + + err = ff_nvtegra_start_frame(avctx, frame, &ctx->core); + if (err < 0) + return err; + + tf = fdd->hwaccel_priv; + input_map = (AVNVTegraMap *)tf->input_map_ref->data; + mem = av_nvtegra_map_get_addr(input_map); + + nvtegra_h264_prepare_frame_setup((nvdec_h264_pic_s *)(mem + ctx->core.pic_setup_off), h, ctx); + + return 0; +} + +static int nvtegra_h264_end_frame(AVCodecContext *avctx) { + H264Context *h = avctx->priv_data; + NVTegraH264DecodeContext *ctx = avctx->internal->hwaccel_priv_data; + AVFrame *frame = h->cur_pic_ptr->f; + FrameDecodeData *fdd = (FrameDecodeData *)frame->private_ref->data; + FFNVTegraDecodeFrame *tf = fdd->hwaccel_priv; + + nvdec_h264_pic_s *setup; + uint8_t *mem; + int err; + + av_log(avctx, AV_LOG_DEBUG, "Ending H264-NVTEGRA frame with %u slices -> %u bytes\n", + ctx->core.num_slices, ctx->core.bitstream_len); + + if (!tf || !ctx->core.num_slices) + return 0; + + mem = av_nvtegra_map_get_addr((AVNVTegraMap *)tf->input_map_ref->data); + + setup = (nvdec_h264_pic_s *)(mem + ctx->core.pic_setup_off); + setup->stream_len = ctx->core.bitstream_len + sizeof(bitstream_end_sequence); + setup->slice_count = ctx->core.num_slices; + + err = nvtegra_h264_prepare_cmdbuf(&ctx->core.cmdbuf, h, frame, ctx); + if (err < 0) + return err; + + return ff_nvtegra_end_frame(avctx, frame, &ctx->core, bitstream_end_sequence, + sizeof(bitstream_end_sequence)); +} + +static int nvtegra_h264_decode_slice(AVCodecContext *avctx, const uint8_t *buf, + uint32_t buf_size) +{ + H264Context *h = avctx->priv_data; + AVFrame *frame = h->cur_pic_ptr->f; + + return ff_nvtegra_decode_slice(avctx, frame, buf, buf_size, true); +} + +#if CONFIG_H264_NVTEGRA_HWACCEL +const FFHWAccel ff_h264_nvtegra_hwaccel = { + .p.name = "h264_nvtegra", + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_H264, + .p.pix_fmt = AV_PIX_FMT_NVTEGRA, + .start_frame = &nvtegra_h264_start_frame, + .end_frame = &nvtegra_h264_end_frame, + .decode_slice = &nvtegra_h264_decode_slice, + .init = &nvtegra_h264_decode_init, + .uninit = &nvtegra_h264_decode_uninit, + .frame_params = &ff_nvtegra_frame_params, + .priv_data_size = sizeof(NVTegraH264DecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE, +}; +#endif -- 2.45.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".