[FFmpeg-devel] [PATCH] libavfi/dnn: enable LibTorch xpu device option support
From: Wenbin Chen Add xpu device support to libtorch backend. To enable xpu support you need to add "-Wl,--no-as-needed -lintel-ext-pt-gpu -Wl,--as-needed" to "--extra-libs" when configure ffmpeg. Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_torch.cpp | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp index 2557264713..ea493f5873 100644 --- a/libavfilter/dnn/dnn_backend_torch.cpp +++ b/libavfilter/dnn/dnn_backend_torch.cpp @@ -250,6 +250,10 @@ static int th_start_inference(void *args) av_log(ctx, AV_LOG_ERROR, "input or output tensor is NULL\n"); return DNN_GENERIC_ERROR; } +// Transfer tensor to the same device as model +c10::Device device = (*th_model->jit_model->parameters().begin()).device(); +if (infer_request->input_tensor->device() != device) +*infer_request->input_tensor = infer_request->input_tensor->to(device); inputs.push_back(*infer_request->input_tensor); *infer_request->output = th_model->jit_model->forward(inputs).toTensor(); @@ -285,6 +289,9 @@ static void infer_completion_callback(void *args) { switch (th_model->model.func_type) { case DFT_PROCESS_FRAME: if (task->do_ioproc) { +// Post process can only deal with CPU memory. +if (output->device() != torch::kCPU) +*output = output->to(torch::kCPU); outputs.scale = 255; outputs.data = output->data_ptr(); if (th_model->model.frame_post_proc != NULL) { @@ -424,7 +431,13 @@ static DNNModel *dnn_load_model_th(DnnContext *ctx, DNNFunctionType func_type, A th_model->ctx = ctx; c10::Device device = c10::Device(device_name); -if (!device.is_cpu()) { +if (device.is_xpu()) { +if (!at::hasXPU()) { +av_log(ctx, AV_LOG_ERROR, "No XPU device found\n"); +goto fail; +} +at::detail::getXPUHooks().initXPU(); +} else if (!device.is_cpu()) { av_log(ctx, AV_LOG_ERROR, "Not supported device:\"%s\"\n", device_name); goto fail; } @@ -432,6 +445,7 @@ static DNNModel *dnn_load_model_th(DnnContext *ctx, DNNFunctionType func_type, A try { th_model->jit_model = new torch::jit::Module; (*th_model->jit_model) = torch::jit::load(ctx->model_filename); +th_model->jit_model->to(device); } catch (const c10::Error& e) { av_log(ctx, AV_LOG_ERROR, "Failed to load torch model\n"); goto fail; -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/2] libavfilter/dnn_io_proc: Take step into consideration when crop frame
From: Wenbin Chen Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_io_proc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c index e5d6edb301..d2ec9f63f5 100644 --- a/libavfilter/dnn/dnn_io_proc.c +++ b/libavfilter/dnn/dnn_io_proc.c @@ -350,6 +350,7 @@ int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index const AVDetectionBBoxHeader *header; const AVDetectionBBox *bbox; AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES); +int max_step[4] = { 0 }; av_assert0(sd); /* (scale != 1 and scale != 0) or mean != 0 */ @@ -405,8 +406,9 @@ int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index offsety[1] = offsety[2] = AV_CEIL_RSHIFT(top, desc->log2_chroma_h); offsety[0] = offsety[3] = top; +av_image_fill_max_pixsteps(max_step, NULL, desc); for (int k = 0; frame->data[k]; k++) -bbox_data[k] = frame->data[k] + offsety[k] * frame->linesize[k] + offsetx[k]; +bbox_data[k] = frame->data[k] + offsety[k] * frame->linesize[k] + offsetx[k] * max_step[k]; sws_scale(sws_ctx, (const uint8_t *const *)&bbox_data, frame->linesize, 0, height, -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] libavfilter/dnn_backend_openvino: Check bbox's height
From: Wenbin Chen Check bbox's height with frame's height rather than frame's width. Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_openvino.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 1e2c2404c7..8907bef69b 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -1200,7 +1200,7 @@ static int contain_valid_detection_bbox(AVFrame *frame) if (bbox->x < 0 || bbox->w < 0 || bbox->x + bbox->w >= frame->width) { return 0; } -if (bbox->y < 0 || bbox->h < 0 || bbox->y + bbox->h >= frame->width) { +if (bbox->y < 0 || bbox->h < 0 || bbox->y + bbox->h >= frame->height) { return 0; } -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2] doc: Add libtoch backend option to dnn_processing
From: Wenbin Chen Signed-off-by: Wenbin Chen --- doc/filters.texi | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/filters.texi b/doc/filters.texi index 18f0d1c5a7..bfa8ccec8b 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -12073,11 +12073,21 @@ need to build and install the OpenVINO for C library (see @code{--enable-libopenvino} (--extra-cflags=-I... --extra-ldflags=-L... might be needed if the header files and libraries are not installed into system path) +@item torch +Libtorch backend. To enable this backend you need to build and install Libtroch +for C++ library. Please download cxx11 ABI version (see +@url{https://pytorch.org/get-started/locally}) +and configure FFmpeg with @code{--enable-libtorch +--extra-cflags=-I/libtorch_root/libtorch/include +--extra-cflags=-I/libtorch_root/libtorch/include/torch/csrc/api/include +--extra-ldflags=-L/libtorch_root/libtorch/lib/} + @end table @item model Set path to model file specifying network architecture and its parameters. -Note that different backends use different file formats. TensorFlow, OpenVINO backend can load files for only its format. +Note that different backends use different file formats. TensorFlow, OpenVINO +and Libtorch backend can load files for only its format. @item input Set the input name of the dnn network. -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] doc: Add libtoch backend option to dnn_processing
From: Wenbin Chen Signed-off-by: Wenbin Chen --- doc/filters.texi | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/filters.texi b/doc/filters.texi index 913365671d..20605e72b2 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -12069,11 +12069,21 @@ need to build and install the OpenVINO for C library (see @code{--enable-libopenvino} (--extra-cflags=-I... --extra-ldflags=-L... might be needed if the header files and libraries are not installed into system path) +@item torch +Libtorch backend. To enable this backend you need to build and install Libtroch +for C++ library. Please download cxx11 ABI version (see +@url{https://pytorch.org/get-started/locally}) +and configure FFmpeg with @code{--enable-libtorch +--extra-cflag=-I/libtorch_root/libtorch/include +--extra-cflag=-I/libtorch_root/libtorch/include/torch/csrc/api/include +--extra-ldflags=-L/libtorch_root/libtorch/lib/} + @end table @item model Set path to model file specifying network architecture and its parameters. -Note that different backends use different file formats. TensorFlow, OpenVINO backend can load files for only its format. +Note that different backends use different file formats. TensorFlow, OpenVINO +and Libtorch backend can load files for only its format. @item input Set the input name of the dnn network. -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2] Changelog: Add libtorch
From: Wenbin Chen Signed-off-by: Wenbin Chen --- Changelog | 1 + 1 file changed, 1 insertion(+) diff --git a/Changelog b/Changelog index e3ca52430c..4af55ff537 100644 --- a/Changelog +++ b/Changelog @@ -35,6 +35,7 @@ version : - AEA muxer - ffmpeg CLI loopback decoders - Support PacketTypeMetadata of PacketType in enhanced flv format +- dnn filter libtorch backend version 6.1: -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] Changelog: Add libtorch
From: Wenbin Chen Signed-off-by: Wenbin Chen --- Changelog | 1 + 1 file changed, 1 insertion(+) diff --git a/Changelog b/Changelog index e3ca52430c..d0c41887f3 100644 --- a/Changelog +++ b/Changelog @@ -35,6 +35,7 @@ version : - AEA muxer - ffmpeg CLI loopback decoders - Support PacketTypeMetadata of PacketType in enhanced flv format +- Support libtorch as DNN backend version 6.1: -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v6] libavfi/dnn: add LibTorch as one of DNN backend
From: Wenbin Chen PyTorch is an open source machine learning framework that accelerates the path from research prototyping to production deployment. Official website: https://pytorch.org/. We call the C++ library of PyTorch as LibTorch, the same below. To build FFmpeg with LibTorch, please take following steps as reference: 1. download LibTorch C++ library in https://pytorch.org/get-started/locally/, please select C++/Java for language, and other options as your need. Please download cxx11 ABI version: (libtorch-cxx11-abi-shared-with-deps-*.zip). 2. unzip the file to your own dir, with command unzip libtorch-shared-with-deps-latest.zip -d your_dir 3. export libtorch_root/libtorch/include and libtorch_root/libtorch/include/torch/csrc/api/include to $PATH export libtorch_root/libtorch/lib/ to $LD_LIBRARY_PATH 4. config FFmpeg with ../configure --enable-libtorch \ --extra-cflag=-I/libtorch_root/libtorch/include \ --extra-cflag=-I/libtorch_root/libtorch/include/torch/csrc/api/include \ --extra-ldflags=-L/libtorch_root/libtorch/lib/ 5. make To run FFmpeg DNN inference with LibTorch backend: ./ffmpeg -i input.jpg -vf \ dnn_processing=dnn_backend=torch:model=LibTorch_model.pt -y output.jpg The LibTorch_model.pt can be generated by Python with torch.jit.script() api. https://pytorch.org/tutorials/advanced/cpp_export.html. This is pytorch official guide about how to convert and load torchscript model. Please note, torch.jit.trace() is not recommanded, since it does not support ambiguous input size. Signed-off-by: Ting Fu Signed-off-by: Wenbin Chen --- configure | 5 +- libavfilter/dnn/Makefile | 1 + libavfilter/dnn/dnn_backend_torch.cpp | 597 ++ libavfilter/dnn/dnn_interface.c | 5 + libavfilter/dnn_filter_common.c | 15 +- libavfilter/dnn_interface.h | 2 +- libavfilter/vf_dnn_processing.c | 3 + 7 files changed, 624 insertions(+), 4 deletions(-) create mode 100644 libavfilter/dnn/dnn_backend_torch.cpp diff --git a/configure b/configure index c34bdd13f5..e68b6f22b0 100755 --- a/configure +++ b/configure @@ -281,6 +281,7 @@ External library support: --enable-libtheora enable Theora encoding via libtheora [no] --enable-libtls enable LibreSSL (via libtls), needed for https support if openssl, gnutls or mbedtls is not used [no] + --enable-libtorchenable Torch as one DNN backend [no] --enable-libtwolame enable MP2 encoding via libtwolame [no] --enable-libuavs3d enable AVS3 decoding via libuavs3d [no] --enable-libv4l2 enable libv4l2/v4l-utils [no] @@ -1905,6 +1906,7 @@ EXTERNAL_LIBRARY_LIST=" libtensorflow libtesseract libtheora +libtorch libtwolame libuavs3d libv4l2 @@ -2785,7 +2787,7 @@ cbs_vp9_select="cbs" deflate_wrapper_deps="zlib" dirac_parse_select="golomb" dovi_rpu_select="golomb" -dnn_suggest="libtensorflow libopenvino" +dnn_suggest="libtensorflow libopenvino libtorch" dnn_deps="avformat swscale" error_resilience_select="me_cmp" evcparse_select="golomb" @@ -6888,6 +6890,7 @@ enabled libtensorflow && require libtensorflow tensorflow/c/c_api.h TF_Versi enabled libtesseract && require_pkg_config libtesseract tesseract tesseract/capi.h TessBaseAPICreate enabled libtheora && require libtheora theora/theoraenc.h th_info_init -ltheoraenc -ltheoradec -logg enabled libtls&& require_pkg_config libtls libtls tls.h tls_configure +enabled libtorch && check_cxxflags -std=c++17 && require_cpp libtorch torch/torch.h "torch::Tensor" -ltorch -lc10 -ltorch_cpu -lstdc++ -lpthread enabled libtwolame&& require libtwolame twolame.h twolame_init -ltwolame && { check_lib libtwolame twolame.h twolame_encode_buffer_float32_interleaved -ltwolame || die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; } diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile index 5d5697ea42..3d09927c98 100644 --- a/libavfilter/dnn/Makefile +++ b/libavfilter/dnn/Makefile @@ -6,5 +6,6 @@ OBJS-$(CONFIG_DNN) += dnn/dnn_backend_common.o DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o DNN-OBJS-$(CONFIG_LIBOPENVINO) += dnn/dnn_backend_openvino.o +DNN-OBJS-$(CONFIG_LIBTORCH) += dnn/dnn_backend_torch.o OBJS-$(CONFIG_DNN) += $(DNN-OBJS-yes) diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp new file mode 100644 index 00..fa9a2e6d99 --- /dev/null +++ b/libavfilter/dnn/dnn_backend_torch.cpp @@ -0,0 +1,597 @@ +/* + * Copyright (c) 2024 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as publishe
[FFmpeg-devel] [PATCH v5] libavfi/dnn: add LibTorch as one of DNN backend
From: Wenbin Chen PyTorch is an open source machine learning framework that accelerates the path from research prototyping to production deployment. Official website: https://pytorch.org/. We call the C++ library of PyTorch as LibTorch, the same below. To build FFmpeg with LibTorch, please take following steps as reference: 1. download LibTorch C++ library in https://pytorch.org/get-started/locally/, please select C++/Java for language, and other options as your need. Please download cxx11 ABI version (libtorch-cxx11-abi-shared-with-deps-*.zip). 2. unzip the file to your own dir, with command unzip libtorch-shared-with-deps-latest.zip -d your_dir 3. export libtorch_root/libtorch/include and libtorch_root/libtorch/include/torch/csrc/api/include to $PATH export libtorch_root/libtorch/lib/ to $LD_LIBRARY_PATH 4. config FFmpeg with ../configure --enable-libtorch --extra-cflag=-I/libtorch_root/libtorch/include --extra-cflag=-I/libtorch_root/libtorch/include/torch/csrc/api/include --extra-ldflags=-L/libtorch_root/libtorch/lib/ 5. make To run FFmpeg DNN inference with LibTorch backend: ./ffmpeg -i input.jpg -vf dnn_processing=dnn_backend=torch:model=LibTorch_model.pt -y output.jpg The LibTorch_model.pt can be generated by Python with torch.jit.script() api. Please note, torch.jit.trace() is not recommanded, since it does not support ambiguous input size. Signed-off-by: Ting Fu Signed-off-by: Wenbin Chen --- configure | 5 +- libavfilter/dnn/Makefile | 1 + libavfilter/dnn/dnn_backend_torch.cpp | 597 ++ libavfilter/dnn/dnn_interface.c | 5 + libavfilter/dnn_filter_common.c | 15 +- libavfilter/dnn_interface.h | 2 +- libavfilter/vf_dnn_processing.c | 3 + 7 files changed, 624 insertions(+), 4 deletions(-) create mode 100644 libavfilter/dnn/dnn_backend_torch.cpp diff --git a/configure b/configure index 05f8283af9..3584728464 100755 --- a/configure +++ b/configure @@ -281,6 +281,7 @@ External library support: --enable-libtheora enable Theora encoding via libtheora [no] --enable-libtls enable LibreSSL (via libtls), needed for https support if openssl, gnutls or mbedtls is not used [no] + --enable-libtorchenable Torch as one DNN backend [no] --enable-libtwolame enable MP2 encoding via libtwolame [no] --enable-libuavs3d enable AVS3 decoding via libuavs3d [no] --enable-libv4l2 enable libv4l2/v4l-utils [no] @@ -1905,6 +1906,7 @@ EXTERNAL_LIBRARY_LIST=" libtensorflow libtesseract libtheora +libtorch libtwolame libuavs3d libv4l2 @@ -2785,7 +2787,7 @@ cbs_vp9_select="cbs" deflate_wrapper_deps="zlib" dirac_parse_select="golomb" dovi_rpu_select="golomb" -dnn_suggest="libtensorflow libopenvino" +dnn_suggest="libtensorflow libopenvino libtorch" dnn_deps="avformat swscale" error_resilience_select="me_cmp" evcparse_select="golomb" @@ -6886,6 +6888,7 @@ enabled libtensorflow && require libtensorflow tensorflow/c/c_api.h TF_Versi enabled libtesseract && require_pkg_config libtesseract tesseract tesseract/capi.h TessBaseAPICreate enabled libtheora && require libtheora theora/theoraenc.h th_info_init -ltheoraenc -ltheoradec -logg enabled libtls&& require_pkg_config libtls libtls tls.h tls_configure +enabled libtorch && check_cxxflags -std=c++17 && require_cpp libtorch torch/torch.h "torch::Tensor" -ltorch -lc10 -ltorch_cpu -lstdc++ -lpthread enabled libtwolame&& require libtwolame twolame.h twolame_init -ltwolame && { check_lib libtwolame twolame.h twolame_encode_buffer_float32_interleaved -ltwolame || die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; } diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile index 5d5697ea42..3d09927c98 100644 --- a/libavfilter/dnn/Makefile +++ b/libavfilter/dnn/Makefile @@ -6,5 +6,6 @@ OBJS-$(CONFIG_DNN) += dnn/dnn_backend_common.o DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o DNN-OBJS-$(CONFIG_LIBOPENVINO) += dnn/dnn_backend_openvino.o +DNN-OBJS-$(CONFIG_LIBTORCH) += dnn/dnn_backend_torch.o OBJS-$(CONFIG_DNN) += $(DNN-OBJS-yes) diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp new file mode 100644 index 00..54d3b309a1 --- /dev/null +++ b/libavfilter/dnn/dnn_backend_torch.cpp @@ -0,0 +1,597 @@ +/* + * Copyright (c) 2024 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in
[FFmpeg-devel] [PATCH v4] libavfi/dnn: add LibTorch as one of DNN backend
From: Wenbin Chen PyTorch is an open source machine learning framework that accelerates the path from research prototyping to production deployment. Official website: https://pytorch.org/. We call the C++ library of PyTorch as LibTorch, the same below. To build FFmpeg with LibTorch, please take following steps as reference: 1. download LibTorch C++ library in https://pytorch.org/get-started/locally/, please select C++/Java for language, and other options as your need. 2. unzip the file to your own dir, with command unzip libtorch-shared-with-deps-latest.zip -d your_dir 3. export libtorch_root/libtorch/include and libtorch_root/libtorch/include/torch/csrc/api/include to $PATH export libtorch_root/libtorch/lib/ to $LD_LIBRARY_PATH 4. config FFmpeg with ../configure --enable-libtorch --extra-cflag=-I/libtorch_root/libtorch/include --extra-cflag=-I/libtorch_root/libtorch/include/torch/csrc/api/include --extra-ldflags=-L/libtorch_root/libtorch/lib/ 5. make To run FFmpeg DNN inference with LibTorch backend: ./ffmpeg -i input.jpg -vf dnn_processing=dnn_backend=torch:model=LibTorch_model.pt -y output.jpg The LibTorch_model.pt can be generated by Python with torch.jit.script() api. Please note, torch.jit.trace() is not recommanded, since it does not support ambiguous input size. Signed-off-by: Ting Fu Signed-off-by: Wenbin Chen --- configure | 5 +- libavfilter/dnn/Makefile | 1 + libavfilter/dnn/dnn_backend_torch.cpp | 597 ++ libavfilter/dnn/dnn_interface.c | 5 + libavfilter/dnn_filter_common.c | 15 +- libavfilter/dnn_interface.h | 2 +- libavfilter/vf_dnn_processing.c | 3 + 7 files changed, 624 insertions(+), 4 deletions(-) create mode 100644 libavfilter/dnn/dnn_backend_torch.cpp diff --git a/configure b/configure index 2c635043dd..450ef54a80 100755 --- a/configure +++ b/configure @@ -279,6 +279,7 @@ External library support: --enable-libtheora enable Theora encoding via libtheora [no] --enable-libtls enable LibreSSL (via libtls), needed for https support if openssl, gnutls or mbedtls is not used [no] + --enable-libtorchenable Torch as one DNN backend [no] --enable-libtwolame enable MP2 encoding via libtwolame [no] --enable-libuavs3d enable AVS3 decoding via libuavs3d [no] --enable-libv4l2 enable libv4l2/v4l-utils [no] @@ -1901,6 +1902,7 @@ EXTERNAL_LIBRARY_LIST=" libtensorflow libtesseract libtheora +libtorch libtwolame libuavs3d libv4l2 @@ -2781,7 +2783,7 @@ cbs_vp9_select="cbs" deflate_wrapper_deps="zlib" dirac_parse_select="golomb" dovi_rpu_select="golomb" -dnn_suggest="libtensorflow libopenvino" +dnn_suggest="libtensorflow libopenvino libtorch" dnn_deps="avformat swscale" error_resilience_select="me_cmp" evcparse_select="golomb" @@ -6886,6 +6888,7 @@ enabled libtensorflow && require libtensorflow tensorflow/c/c_api.h TF_Versi enabled libtesseract && require_pkg_config libtesseract tesseract tesseract/capi.h TessBaseAPICreate enabled libtheora && require libtheora theora/theoraenc.h th_info_init -ltheoraenc -ltheoradec -logg enabled libtls&& require_pkg_config libtls libtls tls.h tls_configure +enabled libtorch && check_cxxflags -std=c++14 && require_cpp libtorch torch/torch.h "torch::Tensor" -ltorch -lc10 -ltorch_cpu -lstdc++ -lpthread enabled libtwolame&& require libtwolame twolame.h twolame_init -ltwolame && { check_lib libtwolame twolame.h twolame_encode_buffer_float32_interleaved -ltwolame || die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; } diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile index 5d5697ea42..3d09927c98 100644 --- a/libavfilter/dnn/Makefile +++ b/libavfilter/dnn/Makefile @@ -6,5 +6,6 @@ OBJS-$(CONFIG_DNN) += dnn/dnn_backend_common.o DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o DNN-OBJS-$(CONFIG_LIBOPENVINO) += dnn/dnn_backend_openvino.o +DNN-OBJS-$(CONFIG_LIBTORCH) += dnn/dnn_backend_torch.o OBJS-$(CONFIG_DNN) += $(DNN-OBJS-yes) diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp new file mode 100644 index 00..54d3b309a1 --- /dev/null +++ b/libavfilter/dnn/dnn_backend_torch.cpp @@ -0,0 +1,597 @@ +/* + * Copyright (c) 2024 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even th
[FFmpeg-devel] [PATCH v3] libavfi/dnn: add LibTorch as one of DNN backend
From: Wenbin Chen PyTorch is an open source machine learning framework that accelerates the path from research prototyping to production deployment. Official websit: https://pytorch.org/. We call the C++ library of PyTorch as LibTorch, the same below. To build FFmpeg with LibTorch, please take following steps as reference: 1. download LibTorch C++ library in https://pytorch.org/get-started/locally/, please select C++/Java for language, and other options as your need. 2. unzip the file to your own dir, with command unzip libtorch-shared-with-deps-latest.zip -d your_dir 3. export libtorch_root/libtorch/include and libtorch_root/libtorch/include/torch/csrc/api/include to $PATH export libtorch_root/libtorch/lib/ to $LD_LIBRARY_PATH 4. config FFmpeg with ../configure --enable-libtorch --extra-cflag=-I/libtorch_root/libtorch/include --extra-cflag=-I/libtorch_root/libtorch/include/torch/csrc/api/include --extra-ldflags=-L/libtorch_root/libtorch/lib/ 5. make To run FFmpeg DNN inference with LibTorch backend: ./ffmpeg -i input.jpg -vf dnn_processing=dnn_backend=torch:model=LibTorch_model.pt -y output.jpg The LibTorch_model.pt can be generated by Python with torch.jit.script() api. Please note, torch.jit.trace() is not recommanded, since it does not support ambiguous input size. Signed-off-by: Ting Fu Signed-off-by: Wenbin Chen --- configure | 5 +- libavfilter/dnn/Makefile | 1 + libavfilter/dnn/dnn_backend_torch.cpp | 597 ++ libavfilter/dnn/dnn_interface.c | 5 + libavfilter/dnn_filter_common.c | 15 +- libavfilter/dnn_interface.h | 2 +- libavfilter/vf_dnn_processing.c | 3 + 7 files changed, 624 insertions(+), 4 deletions(-) create mode 100644 libavfilter/dnn/dnn_backend_torch.cpp diff --git a/configure b/configure index 2c635043dd..450ef54a80 100755 --- a/configure +++ b/configure @@ -279,6 +279,7 @@ External library support: --enable-libtheora enable Theora encoding via libtheora [no] --enable-libtls enable LibreSSL (via libtls), needed for https support if openssl, gnutls or mbedtls is not used [no] + --enable-libtorchenable Torch as one DNN backend [no] --enable-libtwolame enable MP2 encoding via libtwolame [no] --enable-libuavs3d enable AVS3 decoding via libuavs3d [no] --enable-libv4l2 enable libv4l2/v4l-utils [no] @@ -1901,6 +1902,7 @@ EXTERNAL_LIBRARY_LIST=" libtensorflow libtesseract libtheora +libtorch libtwolame libuavs3d libv4l2 @@ -2781,7 +2783,7 @@ cbs_vp9_select="cbs" deflate_wrapper_deps="zlib" dirac_parse_select="golomb" dovi_rpu_select="golomb" -dnn_suggest="libtensorflow libopenvino" +dnn_suggest="libtensorflow libopenvino libtorch" dnn_deps="avformat swscale" error_resilience_select="me_cmp" evcparse_select="golomb" @@ -6886,6 +6888,7 @@ enabled libtensorflow && require libtensorflow tensorflow/c/c_api.h TF_Versi enabled libtesseract && require_pkg_config libtesseract tesseract tesseract/capi.h TessBaseAPICreate enabled libtheora && require libtheora theora/theoraenc.h th_info_init -ltheoraenc -ltheoradec -logg enabled libtls&& require_pkg_config libtls libtls tls.h tls_configure +enabled libtorch && check_cxxflags -std=c++14 && require_cpp libtorch torch/torch.h "torch::Tensor" -ltorch -lc10 -ltorch_cpu -lstdc++ -lpthread enabled libtwolame&& require libtwolame twolame.h twolame_init -ltwolame && { check_lib libtwolame twolame.h twolame_encode_buffer_float32_interleaved -ltwolame || die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; } diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile index 5d5697ea42..3d09927c98 100644 --- a/libavfilter/dnn/Makefile +++ b/libavfilter/dnn/Makefile @@ -6,5 +6,6 @@ OBJS-$(CONFIG_DNN) += dnn/dnn_backend_common.o DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o DNN-OBJS-$(CONFIG_LIBOPENVINO) += dnn/dnn_backend_openvino.o +DNN-OBJS-$(CONFIG_LIBTORCH) += dnn/dnn_backend_torch.o OBJS-$(CONFIG_DNN) += $(DNN-OBJS-yes) diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp new file mode 100644 index 00..54d3b309a1 --- /dev/null +++ b/libavfilter/dnn/dnn_backend_torch.cpp @@ -0,0 +1,597 @@ +/* + * Copyright (c) 2024 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the
[FFmpeg-devel] [PATCH v2] libavfi/dnn: add LibTorch as one of DNN backend
From: Wenbin Chen PyTorch is an open source machine learning framework that accelerates the path from research prototyping to production deployment. Official websit: https://pytorch.org/. We call the C++ library of PyTorch as LibTorch, the same below. To build FFmpeg with LibTorch, please take following steps as reference: 1. download LibTorch C++ library in https://pytorch.org/get-started/locally/, please select C++/Java for language, and other options as your need. 2. unzip the file to your own dir, with command unzip libtorch-shared-with-deps-latest.zip -d your_dir 3. export libtorch_root/libtorch/include and libtorch_root/libtorch/include/torch/csrc/api/include to $PATH export libtorch_root/libtorch/lib/ to $LD_LIBRARY_PATH 4. config FFmpeg with ../configure --enable-libtorch --extra-cflag=-I/libtorch_root/libtorch/include --extra-cflag=-I/libtorch_root/libtorch/include/torch/csrc/api/include --extra-ldflags=-L/libtorch_root/libtorch/lib/ 5. make To run FFmpeg DNN inference with LibTorch backend: ./ffmpeg -i input.jpg -vf dnn_processing=dnn_backend=torch:model=LibTorch_model.pt -y output.jpg The LibTorch_model.pt can be generated by Python with torch.jit.script() api. Please note, torch.jit.trace() is not recommanded, since it does not support ambiguous input size. Signed-off-by: Ting Fu Signed-off-by: Wenbin Chen --- configure | 5 +- libavfilter/dnn/Makefile | 1 + libavfilter/dnn/dnn_backend_torch.cpp | 587 ++ libavfilter/dnn/dnn_interface.c | 5 + libavfilter/dnn_filter_common.c | 15 +- libavfilter/dnn_interface.h | 2 +- libavfilter/vf_dnn_processing.c | 3 + 7 files changed, 614 insertions(+), 4 deletions(-) create mode 100644 libavfilter/dnn/dnn_backend_torch.cpp diff --git a/configure b/configure index 68f675a4bc..bc11172fe4 100755 --- a/configure +++ b/configure @@ -279,6 +279,7 @@ External library support: --enable-libtheora enable Theora encoding via libtheora [no] --enable-libtls enable LibreSSL (via libtls), needed for https support if openssl, gnutls or mbedtls is not used [no] + --enable-libtorchenable Torch as one DNN backend [no] --enable-libtwolame enable MP2 encoding via libtwolame [no] --enable-libuavs3d enable AVS3 decoding via libuavs3d [no] --enable-libv4l2 enable libv4l2/v4l-utils [no] @@ -1901,6 +1902,7 @@ EXTERNAL_LIBRARY_LIST=" libtensorflow libtesseract libtheora +libtorch libtwolame libuavs3d libv4l2 @@ -2776,7 +2778,7 @@ cbs_vp9_select="cbs" deflate_wrapper_deps="zlib" dirac_parse_select="golomb" dovi_rpu_select="golomb" -dnn_suggest="libtensorflow libopenvino" +dnn_suggest="libtensorflow libopenvino libtorch" dnn_deps="avformat swscale" error_resilience_select="me_cmp" evcparse_select="golomb" @@ -6873,6 +6875,7 @@ enabled libtensorflow && require libtensorflow tensorflow/c/c_api.h TF_Versi enabled libtesseract && require_pkg_config libtesseract tesseract tesseract/capi.h TessBaseAPICreate enabled libtheora && require libtheora theora/theoraenc.h th_info_init -ltheoraenc -ltheoradec -logg enabled libtls&& require_pkg_config libtls libtls tls.h tls_configure +enabled libtorch && check_cxxflags -std=c++14 && require_cpp libtorch torch/torch.h "torch::Tensor" -ltorch -lc10 -ltorch_cpu -lstdc++ -lpthread enabled libtwolame&& require libtwolame twolame.h twolame_init -ltwolame && { check_lib libtwolame twolame.h twolame_encode_buffer_float32_interleaved -ltwolame || die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; } diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile index 5d5697ea42..3d09927c98 100644 --- a/libavfilter/dnn/Makefile +++ b/libavfilter/dnn/Makefile @@ -6,5 +6,6 @@ OBJS-$(CONFIG_DNN) += dnn/dnn_backend_common.o DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o DNN-OBJS-$(CONFIG_LIBOPENVINO) += dnn/dnn_backend_openvino.o +DNN-OBJS-$(CONFIG_LIBTORCH) += dnn/dnn_backend_torch.o OBJS-$(CONFIG_DNN) += $(DNN-OBJS-yes) diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp new file mode 100644 index 00..b905c55175 --- /dev/null +++ b/libavfilter/dnn/dnn_backend_torch.cpp @@ -0,0 +1,587 @@ +/* + * Copyright (c) 2024 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the
[FFmpeg-devel] [PATCH] libavfi/dnn: add LibTorch as one of DNN backend
From: Wenbin Chen PyTorch is an open source machine learning framework that accelerates the path from research prototyping to production deployment. Official websit: https://pytorch.org/. We call the C++ library of PyTorch as LibTorch, the same below. To build FFmpeg with LibTorch, please take following steps as reference: 1. download LibTorch C++ library in https://pytorch.org/get-started/locally/, please select C++/Java for language, and other options as your need. 2. unzip the file to your own dir, with command unzip libtorch-shared-with-deps-latest.zip -d your_dir 3. export libtorch_root/libtorch/include and libtorch_root/libtorch/include/torch/csrc/api/include to $PATH export libtorch_root/libtorch/lib/ to $LD_LIBRARY_PATH 4. config FFmpeg with ../configure --enable-libtorch --extra-cflag=-I/libtorch_root/libtorch/include --extra-cflag=-I/libtorch_root/libtorch/include/torch/csrc/api/include --extra-ldflags=-L/libtorch_root/libtorch/lib/ 5. make To run FFmpeg DNN inference with LibTorch backend: ./ffmpeg -i input.jpg -vf dnn_processing=dnn_backend=torch:model=LibTorch_model.pt -y output.jpg The LibTorch_model.pt can be generated by Python with torch.jit.script() api. Please note, torch.jit.trace() is not recommanded, since it does not support ambiguous input size. Signed-off-by: Ting Fu Signed-off-by: Wenbin Chen --- configure | 5 +- libavfilter/dnn/Makefile | 1 + libavfilter/dnn/dnn_backend_torch.cpp | 585 ++ libavfilter/dnn/dnn_interface.c | 5 + libavfilter/dnn_filter_common.c | 31 +- libavfilter/dnn_interface.h | 2 +- libavfilter/vf_dnn_processing.c | 3 + 7 files changed, 621 insertions(+), 11 deletions(-) create mode 100644 libavfilter/dnn/dnn_backend_torch.cpp diff --git a/configure b/configure index c8ae0a061d..75061692b1 100755 --- a/configure +++ b/configure @@ -279,6 +279,7 @@ External library support: --enable-libtheora enable Theora encoding via libtheora [no] --enable-libtls enable LibreSSL (via libtls), needed for https support if openssl, gnutls or mbedtls is not used [no] + --enable-libtorchenable Torch as one DNN backend [no] --enable-libtwolame enable MP2 encoding via libtwolame [no] --enable-libuavs3d enable AVS3 decoding via libuavs3d [no] --enable-libv4l2 enable libv4l2/v4l-utils [no] @@ -1901,6 +1902,7 @@ EXTERNAL_LIBRARY_LIST=" libtensorflow libtesseract libtheora +libtorch libtwolame libuavs3d libv4l2 @@ -2776,7 +2778,7 @@ cbs_vp9_select="cbs" deflate_wrapper_deps="zlib" dirac_parse_select="golomb" dovi_rpu_select="golomb" -dnn_suggest="libtensorflow libopenvino" +dnn_suggest="libtensorflow libopenvino libtorch" dnn_deps="avformat swscale" error_resilience_select="me_cmp" evcparse_select="golomb" @@ -6872,6 +6874,7 @@ enabled libtensorflow && require libtensorflow tensorflow/c/c_api.h TF_Versi enabled libtesseract && require_pkg_config libtesseract tesseract tesseract/capi.h TessBaseAPICreate enabled libtheora && require libtheora theora/theoraenc.h th_info_init -ltheoraenc -ltheoradec -logg enabled libtls&& require_pkg_config libtls libtls tls.h tls_configure +enabled libtorch && check_cxxflags -std=c++14 && require_cpp libtorch torch/torch.h "torch::Tensor" -ltorch -lc10 -ltorch_cpu -lstdc++ -lpthread enabled libtwolame&& require libtwolame twolame.h twolame_init -ltwolame && { check_lib libtwolame twolame.h twolame_encode_buffer_float32_interleaved -ltwolame || die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; } diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile index 5d5697ea42..3d09927c98 100644 --- a/libavfilter/dnn/Makefile +++ b/libavfilter/dnn/Makefile @@ -6,5 +6,6 @@ OBJS-$(CONFIG_DNN) += dnn/dnn_backend_common.o DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o DNN-OBJS-$(CONFIG_LIBOPENVINO) += dnn/dnn_backend_openvino.o +DNN-OBJS-$(CONFIG_LIBTORCH) += dnn/dnn_backend_torch.o OBJS-$(CONFIG_DNN) += $(DNN-OBJS-yes) diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp new file mode 100644 index 00..4fc76d0ce4 --- /dev/null +++ b/libavfilter/dnn/dnn_backend_torch.cpp @@ -0,0 +1,585 @@ +/* + * Copyright (c) 2024 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even th
[FFmpeg-devel] [PATCH 3/3] libavfilter/vf_dnn_detect: Use class confidence to filt boxes
From: Wenbin Chen Use class confidence instead of box_score to filt boxes, which is more accurate. Class confidence is obtained by multiplying class probability distribution and box_score. Signed-off-by: Wenbin Chen --- libavfilter/vf_dnn_detect.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index caccbf7a12..2bf5ed7476 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -236,9 +236,6 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int out conf = post_process_raw_data( detection_boxes_data[cy * cell_w + cx + 4 * cell_w * cell_h]); } -if (conf < conf_threshold) { -continue; -} if (is_NHWC) { x = post_process_raw_data(detection_boxes_data[0]); @@ -257,6 +254,9 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int out conf = conf * post_process_raw_data( detection_boxes_data[cy * cell_w + cx + (label_id + 5) * cell_w * cell_h]); } +if (conf < conf_threshold) { +continue; +} bbox = av_mallocz(sizeof(*bbox)); if (!bbox) -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/3] libavfilter/dnn_interface: use dims to represent shapes
From: Wenbin Chen For detect and classify output, width and height make no sence, so change width, height to dims to represent the shape of tensor. Use layout and dims to get width, height and channel. Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_openvino.c | 80 ++ libavfilter/dnn/dnn_backend_tf.c | 32 +++ libavfilter/dnn/dnn_io_proc.c | 30 +++--- libavfilter/dnn_interface.h| 17 +- libavfilter/vf_dnn_classify.c | 6 +- libavfilter/vf_dnn_detect.c| 50 libavfilter/vf_dnn_processing.c| 21 --- 7 files changed, 146 insertions(+), 90 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 590ddd586c..73b42c32b1 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -253,9 +253,9 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) ov_shape_free(&input_shape); return ov2_map_error(status, NULL); } -input.height = dims[1]; -input.width = dims[2]; -input.channels = dims[3]; +for (int i = 0; i < input_shape.rank; i++) +input.dims[i] = dims[i]; +input.layout = DL_NHWC; input.dt = precision_to_datatype(precision); #else status = ie_infer_request_get_blob(request->infer_request, task->input_name, &input_blob); @@ -278,9 +278,9 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) av_log(ctx, AV_LOG_ERROR, "Failed to get input blob buffer\n"); return DNN_GENERIC_ERROR; } -input.height = dims.dims[2]; -input.width = dims.dims[3]; -input.channels = dims.dims[1]; +for (int i = 0; i < input_shape.rank; i++) +input.dims[i] = dims[i]; +input.layout = DL_NCHW; input.data = blob_buffer.buffer; input.dt = precision_to_datatype(precision); #endif @@ -339,8 +339,8 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) av_assert0(!"should not reach here"); break; } -input.data = (uint8_t *)input.data - + input.width * input.height * input.channels * get_datatype_size(input.dt); +input.data = (uint8_t *)input.data + +input.dims[1] * input.dims[2] * input.dims[3] * get_datatype_size(input.dt); } #if HAVE_OPENVINO2 ov_tensor_free(tensor); @@ -403,10 +403,11 @@ static void infer_completion_callback(void *args) goto end; } outputs[i].dt = precision_to_datatype(precision); - -outputs[i].channels = output_shape.rank > 2 ? dims[output_shape.rank - 3] : 1; -outputs[i].height = output_shape.rank > 1 ? dims[output_shape.rank - 2] : 1; -outputs[i].width= output_shape.rank > 0 ? dims[output_shape.rank - 1] : 1; +outputs[i].layout = DL_NCHW; +outputs[i].dims[0] = 1; +outputs[i].dims[1] = output_shape.rank > 2 ? dims[output_shape.rank - 3] : 1; +outputs[i].dims[2] = output_shape.rank > 1 ? dims[output_shape.rank - 2] : 1; +outputs[i].dims[3] = output_shape.rank > 0 ? dims[output_shape.rank - 1] : 1; av_assert0(request->lltask_count <= dims[0]); outputs[i].layout = ctx->options.layout; outputs[i].scale= ctx->options.scale; @@ -445,9 +446,9 @@ static void infer_completion_callback(void *args) return; } output.data = blob_buffer.buffer; -output.channels = dims.dims[1]; -output.height = dims.dims[2]; -output.width= dims.dims[3]; +output.layout = DL_NCHW; +for (int i = 0; i < 4; i++) +output.dims[i] = dims.dims[i]; av_assert0(request->lltask_count <= dims.dims[0]); output.dt = precision_to_datatype(precision); output.layout = ctx->options.layout; @@ -469,8 +470,10 @@ static void infer_completion_callback(void *args) ff_proc_from_dnn_to_frame(task->out_frame, outputs, ctx); } } else { -task->out_frame->width = outputs[0].width; -task->out_frame->height = outputs[0].height; +task->out_frame->width = + outputs[0].dims[dnn_get_width_idx_by_layout(outputs[0].layout)]; +task->out_frame->height = + outputs[0].dims[dnn_get_height_idx_by_layout(outputs[0].layout)]; } break; case DFT_ANALYTICS_DETECT: @@ -501,7 +504,8 @@ static void infer_completion_callback(void *args) av_freep(&request->lltasks[i]); for (int i = 0; i < ov_model->nb_outputs; i++) outputs[i].data = (uint8_t *)outputs[i].data + -outputs[i].width * outputs[i].height * outputs[i].channels * get_datatype_size(outputs[i].dt); +outputs[i].dims[1] * outputs[i].dims[2] * outputs[i].dims[3] * +
[FFmpeg-devel] [PATCH 1/3] libavfilter/dnn_bakcend_openvino: Add automatic input/output detection
From: Wenbin Chen Now when using openvino backend, user doesn't need to set input/output names in command line. Model ports will be automatically detected. For example: ffmpeg -i input.png -vf \ dnn_detect=dnn_backend=openvino:model=model.xml:input=image:\ output=detection_out -y output.png can be simplified to: ffmpeg -i input.png -vf dnn_detect=dnn_backend=openvino:model=model.xml\ -y output.png Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_openvino.c | 64 ++ libavfilter/dnn_filter_common.c| 21 + 2 files changed, 67 insertions(+), 18 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index e207d44584..590ddd586c 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -205,6 +205,7 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) ov_tensor_t* tensor = NULL; ov_shape_t input_shape = {0}; ov_element_type_e precision; +char *port_name; #else dimensions_t dims; precision_e precision; @@ -223,11 +224,23 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) ov_output_const_port_free(ov_model->input_port); ov_model->input_port = NULL; } -status = ov_model_const_input_by_name(ov_model->ov_model, task->input_name, &ov_model->input_port); +if (task->input_name) +status = ov_model_const_input_by_name(ov_model->ov_model, task->input_name, &ov_model->input_port); +else +status = ov_model_const_input(ov_model->ov_model, &ov_model->input_port); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n"); return ov2_map_error(status, NULL); } +status = ov_port_get_any_name(ov_model->input_port, &port_name); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get input port name.\n"); +return ov2_map_error(status, NULL); +} +av_log(ctx, AV_LOG_VERBOSE, "OpenVINO model input: %s\n", port_name); +ov_free(port_name); +port_name = NULL; + status = ov_const_port_get_shape(ov_model->input_port, &input_shape); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n"); @@ -620,7 +633,10 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * goto err; } -status = ov_preprocess_prepostprocessor_get_input_info_by_name(ov_model->preprocess, input_name, &ov_model->input_info); +if (input_name) +status = ov_preprocess_prepostprocessor_get_input_info_by_name(ov_model->preprocess, input_name, &ov_model->input_info); +else +status = ov_preprocess_prepostprocessor_get_input_info(ov_model->preprocess, &ov_model->input_info); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to get input info from preprocess.\n"); ret = ov2_map_error(status, NULL); @@ -673,10 +689,24 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * goto err; } +if (!nb_outputs) { +size_t output_size; +status = ov_model_outputs_size(ov_model->ov_model, &output_size); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get output size.\n"); +ret = ov2_map_error(status, NULL); +goto err; +} +nb_outputs = output_size; +} ov_model->nb_outputs = nb_outputs; for (int i = 0; i < nb_outputs; i++) { -status = ov_preprocess_prepostprocessor_get_output_info_by_name( -ov_model->preprocess, output_names[i], &ov_model->output_info); +if (output_names) +status = ov_preprocess_prepostprocessor_get_output_info_by_name( +ov_model->preprocess, output_names[i], &ov_model->output_info); +else +status = ov_preprocess_prepostprocessor_get_output_info_by_index( +ov_model->preprocess, i, &ov_model->output_info); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to get output info from preprocess.\n"); ret = ov2_map_error(status, NULL); @@ -758,12 +788,25 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * } for (int i = 0; i < nb_outputs; i++) { -status = ov_model_const_output_by_name(ov_model->ov_model, output_names[i], - &ov_model->output_ports[i]); +char *port_name; +if (output_names) +status = ov_model_const_output_by_name(ov_model->ov_model, output_names[i], +&ov_model->output_ports[i]); +else +status = ov_model_const_output_by_index(ov_model->ov_model, i, +&ov_model->output_ports[i]); if (status != OK) {
[FFmpeg-devel] [PATCH 2/2] libavfilter/vf_dnn_detect: Add two outputs ssd support
From: Wenbin Chen For this kind of model, we can directly use its output as final result just like ssd model. The difference is that it splits output into two tensors. [x_min, y_min, x_max, y_max, confidence] and [lable_id]. Model example refer to: https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/intel/person-detection-0106 Signed-off-by: Wenbin Chen --- libavfilter/vf_dnn_detect.c | 64 + 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 88865c8a8e..249cbba0f7 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -359,24 +359,48 @@ static int dnn_detect_post_proc_yolov3(AVFrame *frame, DNNData *output, return 0; } -static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx) +static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, int nb_outputs, +AVFilterContext *filter_ctx) { DnnDetectContext *ctx = filter_ctx->priv; float conf_threshold = ctx->confidence; -int proposal_count = output->height; -int detect_size = output->width; -float *detections = output->data; +int proposal_count = 0; +int detect_size = 0; +float *detections = NULL, *labels = NULL; int nb_bboxes = 0; AVDetectionBBoxHeader *header; AVDetectionBBox *bbox; - -if (output->width != 7) { +int scale_w = ctx->scale_width; +int scale_h = ctx->scale_height; + +if (nb_outputs == 1 && output->width == 7) { +proposal_count = output->height; +detect_size = output->width; +detections = output->data; +} else if (nb_outputs == 2 && output[0].width == 5) { +proposal_count = output[0].height; +detect_size = output[0].width; +detections = output[0].data; +labels = output[1].data; +} else if (nb_outputs == 2 && output[1].width == 5) { +proposal_count = output[1].height; +detect_size = output[1].width; +detections = output[1].data; +labels = output[0].data; +} else { av_log(filter_ctx, AV_LOG_ERROR, "Model output shape doesn't match ssd requirement.\n"); return AVERROR(EINVAL); } +if (proposal_count == 0) +return 0; + for (int i = 0; i < proposal_count; ++i) { -float conf = detections[i * detect_size + 2]; +float conf; +if (nb_outputs == 1) +conf = detections[i * detect_size + 2]; +else +conf = detections[i * detect_size + 4]; if (conf < conf_threshold) { continue; } @@ -398,12 +422,24 @@ static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, AVFilterCon for (int i = 0; i < proposal_count; ++i) { int av_unused image_id = (int)detections[i * detect_size + 0]; -int label_id = (int)detections[i * detect_size + 1]; -float conf = detections[i * detect_size + 2]; -float x0 = detections[i * detect_size + 3]; -float y0 = detections[i * detect_size + 4]; -float x1 = detections[i * detect_size + 5]; -float y1 = detections[i * detect_size + 6]; +int label_id; +float conf, x0, y0, x1, y1; + +if (nb_outputs == 1) { +label_id = (int)detections[i * detect_size + 1]; +conf = detections[i * detect_size + 2]; +x0 = detections[i * detect_size + 3]; +y0 = detections[i * detect_size + 4]; +x1 = detections[i * detect_size + 5]; +y1 = detections[i * detect_size + 6]; +} else { +label_id = (int)labels[i]; +x0 = detections[i * detect_size] / scale_w; +y0 = detections[i * detect_size + 1] / scale_h; +x1 = detections[i * detect_size + 2] / scale_w; +y1 = detections[i * detect_size + 3] / scale_h; +conf = detections[i * detect_size + 4]; +} if (conf < conf_threshold) { continue; @@ -447,7 +483,7 @@ static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, int nb_outpu switch (ctx->model_type) { case DDMT_SSD: -ret = dnn_detect_post_proc_ssd(frame, output, filter_ctx); +ret = dnn_detect_post_proc_ssd(frame, output, nb_outputs, filter_ctx); if (ret < 0) return ret; break; -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] libavfilter/dnn_backend_openvino: Add dynamic output support
From: Wenbin Chen Add dynamic outputs support. Some models don't have fixed output size. Its size changes according to result. Now openvino can run these kinds of models. Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_openvino.c | 134 +++-- 1 file changed, 59 insertions(+), 75 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 671a995c70..e207d44584 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -219,31 +219,26 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) task = lltask->task; #if HAVE_OPENVINO2 -if (!ov_model_is_dynamic(ov_model->ov_model)) { -if (ov_model->input_port) { -ov_output_const_port_free(ov_model->input_port); -ov_model->input_port = NULL; -} -status = ov_model_const_input_by_name(ov_model->ov_model, task->input_name, &ov_model->input_port); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n"); -return ov2_map_error(status, NULL); -} -status = ov_const_port_get_shape(ov_model->input_port, &input_shape); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n"); -return ov2_map_error(status, NULL); -} -dims = input_shape.dims; -status = ov_port_get_element_type(ov_model->input_port, &precision); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Failed to get input port data type.\n"); -ov_shape_free(&input_shape); -return ov2_map_error(status, NULL); -} -} else { -avpriv_report_missing_feature(ctx, "Do not support dynamic model."); -return AVERROR(ENOSYS); +if (ov_model->input_port) { +ov_output_const_port_free(ov_model->input_port); +ov_model->input_port = NULL; +} +status = ov_model_const_input_by_name(ov_model->ov_model, task->input_name, &ov_model->input_port); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n"); +return ov2_map_error(status, NULL); +} +status = ov_const_port_get_shape(ov_model->input_port, &input_shape); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n"); +return ov2_map_error(status, NULL); +} +dims = input_shape.dims; +status = ov_port_get_element_type(ov_model->input_port, &precision); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get input port data type.\n"); +ov_shape_free(&input_shape); +return ov2_map_error(status, NULL); } input.height = dims[1]; input.width = dims[2]; @@ -1049,30 +1044,22 @@ static int get_input_ov(void *model, DNNData *input, const char *input_name) ov_element_type_e precision; int64_t* dims; ov_status_e status; -if (!ov_model_is_dynamic(ov_model->ov_model)) { -status = ov_model_const_input_by_name(ov_model->ov_model, input_name, &ov_model->input_port); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n"); -return ov2_map_error(status, NULL); -} - -status = ov_const_port_get_shape(ov_model->input_port, &input_shape); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n"); -return ov2_map_error(status, NULL); -} -dims = input_shape.dims; - -status = ov_port_get_element_type(ov_model->input_port, &precision); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Failed to get input port data type.\n"); -return ov2_map_error(status, NULL); -} -} else { -avpriv_report_missing_feature(ctx, "Do not support dynamic model now."); -return AVERROR(ENOSYS); +status = ov_model_const_input_by_name(ov_model->ov_model, input_name, &ov_model->input_port); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n"); +return ov2_map_error(status, NULL); } - +status = ov_port_get_element_type(ov_model->input_port, &precision); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get input port data type.\n"); +return ov2_map_error(status, NULL); +} +status = ov_const_port_get_shape(ov_model->input_port, &input_shape); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n"); +return ov2_map_error(status, NULL); +} +dims = input_shape.dims; if (dims[1] <= 3) { // NCHW input->channels = dims[1]; input->height = input_resizable ? -1 : dims[2]; @@ -1083,7 +1070,7 @@ static int get_input_ov(void *model, DNNData *input, const char *input_name) input->channels = dims
[FFmpeg-devel] [PATCH 2/2] libavfilter/vf_dnn_detect: Add initialized value to function pointer
From: Wenbin Chen Signed-off-by: Wenbin Chen --- libavfilter/vf_dnn_detect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 52d5c3d798..88865c8a8e 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -157,7 +157,7 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int out float *output_data = output[output_index].data; float *anchors = ctx->anchors; AVDetectionBBox *bbox; -float (*post_process_raw_data)(float x); +float (*post_process_raw_data)(float x) = linear; int is_NHWC = 0; if (ctx->model_type == DDMT_YOLOV1V2) { -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] libavfilter/vf_dnn_detect: Fix a control flow issue
From: Wenbin Chen Signed-off-by: Wenbin Chen --- libavfilter/vf_dnn_detect.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index fcc64118b6..52d5c3d798 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -455,11 +455,13 @@ static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, int nb_outpu ret = dnn_detect_post_proc_yolo(frame, output, filter_ctx); if (ret < 0) return ret; +break; case DDMT_YOLOV3: case DDMT_YOLOV4: ret = dnn_detect_post_proc_yolov3(frame, output, filter_ctx, nb_outputs); if (ret < 0) return ret; +break; } return 0; } -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 4/4] libavfilter/vf_dnn_detect: Set used pointer to NULL
From: Wenbin Chen Set used pointer to NULL in case it leaks the storage. Signed-off-by: Wenbin Chen --- libavfilter/vf_dnn_detect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 5668b8b017..3464af86c8 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -223,6 +223,7 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int out av_freep(&bbox); return AVERROR(ENOMEM); } +bbox = NULL; } } return 0; -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 3/4] libavfilter/vf_dnn_detect: Fix uninitialized variables error
From: Wenbin Chen Signed-off-by: Wenbin Chen --- libavfilter/vf_dnn_detect.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index b2e9b8d4c6..5668b8b017 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -139,7 +139,8 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int out { DnnDetectContext *ctx = filter_ctx->priv; float conf_threshold = ctx->confidence; -int detection_boxes, box_size, cell_w, cell_h, scale_w, scale_h; +int detection_boxes, box_size; +int cell_w = 0, cell_h = 0, scale_w = 0, scale_h = 0; int nb_classes = ctx->nb_classes; float *output_data = output[output_index].data; float *anchors = ctx->anchors; -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/4] libavfilter/vf_dnn_detect: Add NULL pointer check
From: Wenbin Chen Signed-off-by: Wenbin Chen --- libavfilter/vf_dnn_detect.c | 4 1 file changed, 4 insertions(+) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index b82916ce6d..b2e9b8d4c6 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -112,6 +112,10 @@ static int dnn_detect_parse_anchors(char *anchors_str, float **anchors) } for (int i = 0; i < nb_anchor; i++) { token = av_strtok(anchors_str, "&", &saveptr); +if (!token) { +av_freep(&anchors_buf); +return 0; +} anchors_buf[i] = strtof(token, NULL); anchors_str = NULL; } -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/4] libavfilter/vf_dnn_detect: Fix an incorrect expression
From: Wenbin Chen Signed-off-by: Wenbin Chen --- libavfilter/vf_dnn_detect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 7ac3bb0b58..b82916ce6d 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -106,7 +106,7 @@ static int dnn_detect_parse_anchors(char *anchors_str, float **anchors) i++; } nb_anchor++; -anchors_buf = av_mallocz(nb_anchor * sizeof(*anchors)); +anchors_buf = av_mallocz(nb_anchor * sizeof(**anchors)); if (!anchors_buf) { return 0; } -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2 3/4] libavfilter/vf_dnn_detect: Add yolov3 support
From: Wenbin Chen Add yolov3 support. The difference of yolov3 is that it has multiple outputs in different scale to perform better on both large and small object. The model detail refer to: https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v3-tf Signed-off-by: Wenbin Chen --- libavfilter/vf_dnn_detect.c | 28 +++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 86f61c9907..7a32b191c3 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -35,6 +35,7 @@ typedef enum { DDMT_SSD, DDMT_YOLOV1V2, +DDMT_YOLOV3 } DNNDetectionModelType; typedef struct DnnDetectContext { @@ -73,6 +74,7 @@ static const AVOption dnn_detect_options[] = { { "model_type", "DNN detection model type", OFFSET2(model_type), AV_OPT_TYPE_INT, { .i64 = DDMT_SSD },INT_MIN, INT_MAX, FLAGS, "model_type" }, { "ssd", "output shape [1, 1, N, 7]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_SSD },0, 0, FLAGS, "model_type" }, { "yolo","output shape [1, N*Cx*Cy*DetectionBox]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV1V2 },0, 0, FLAGS, "model_type" }, +{ "yolov3", "outputs shape [1, N*D, Cx, Cy]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV3 }, 0, 0, FLAGS, "model_type" }, { "cell_w", "cell width", OFFSET2(cell_w), AV_OPT_TYPE_INT, { .i64 = 0 },0, INTMAX_MAX, FLAGS }, { "cell_h", "cell height",OFFSET2(cell_h), AV_OPT_TYPE_INT, { .i64 = 0 },0, INTMAX_MAX, FLAGS }, { "nb_classes", "The number of class",OFFSET2(nb_classes), AV_OPT_TYPE_INT, { .i64 = 0 },0, INTMAX_MAX, FLAGS }, @@ -146,6 +148,11 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int out cell_h = ctx->cell_h; scale_w = cell_w; scale_h = cell_h; +} else { +cell_w = output[output_index].width; +cell_h = output[output_index].height; +scale_w = ctx->scale_width; +scale_h = ctx->scale_height; } box_size = nb_classes + 5; @@ -173,6 +180,7 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int out output[output_index].height * output[output_index].width / box_size / cell_w / cell_h; +anchors = anchors + (detection_boxes * output_index * 2); /** * find all candidate bbox * yolo output can be reshaped to [B, N*D, Cx, Cy] @@ -284,6 +292,21 @@ static int dnn_detect_post_proc_yolo(AVFrame *frame, DNNData *output, AVFilterCo return 0; } +static int dnn_detect_post_proc_yolov3(AVFrame *frame, DNNData *output, + AVFilterContext *filter_ctx, int nb_outputs) +{ +int ret = 0; +for (int i = 0; i < nb_outputs; i++) { +ret = dnn_detect_parse_yolo_output(frame, output, i, filter_ctx); +if (ret < 0) +return ret; +} +ret = dnn_detect_fill_side_data(frame, filter_ctx); +if (ret < 0) +return ret; +return 0; +} + static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx) { DnnDetectContext *ctx = filter_ctx->priv; @@ -380,8 +403,11 @@ static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, int nb_outpu ret = dnn_detect_post_proc_yolo(frame, output, filter_ctx); if (ret < 0) return ret; +case DDMT_YOLOV3: +ret = dnn_detect_post_proc_yolov3(frame, output, filter_ctx, nb_outputs); +if (ret < 0) +return ret; } - return 0; } -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2 1/4] libavfiter/dnn_backend_openvino: Add multiple output support
From: Wenbin Chen Add multiple output support to openvino backend. You can use '&' to split different output when you set output name using command line. Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_common.c | 7 - libavfilter/dnn/dnn_backend_openvino.c | 216 + libavfilter/vf_dnn_detect.c| 11 +- 3 files changed, 150 insertions(+), 84 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_common.c b/libavfilter/dnn/dnn_backend_common.c index 91a4a3c4bf..632832ec36 100644 --- a/libavfilter/dnn/dnn_backend_common.c +++ b/libavfilter/dnn/dnn_backend_common.c @@ -43,13 +43,6 @@ int ff_check_exec_params(void *ctx, DNNBackendType backend, DNNFunctionType func return AVERROR(EINVAL); } -if (exec_params->nb_output != 1 && backend != DNN_TF) { -// currently, the filter does not need multiple outputs, -// so we just pending the support until we really need it. -avpriv_report_missing_feature(ctx, "multiple outputs"); -return AVERROR(ENOSYS); -} - return 0; } diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 6fe8b9c243..089e028818 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -64,7 +64,7 @@ typedef struct OVModel{ ov_compiled_model_t *compiled_model; ov_output_const_port_t* input_port; ov_preprocess_input_info_t* input_info; -ov_output_const_port_t* output_port; +ov_output_const_port_t** output_ports; ov_preprocess_output_info_t* output_info; ov_preprocess_prepostprocessor_t* preprocess; #else @@ -77,6 +77,7 @@ typedef struct OVModel{ SafeQueue *request_queue; // holds OVRequestItem Queue *task_queue; // holds TaskItem Queue *lltask_queue; // holds LastLevelTaskItem +int nb_outputs; } OVModel; // one request for one call to openvino @@ -349,7 +350,7 @@ static void infer_completion_callback(void *args) TaskItem *task = lltask->task; OVModel *ov_model = task->model; SafeQueue *requestq = ov_model->request_queue; -DNNData output; +DNNData *outputs; OVContext *ctx = &ov_model->ctx; #if HAVE_OPENVINO2 size_t* dims; @@ -358,45 +359,61 @@ static void infer_completion_callback(void *args) ov_shape_t output_shape = {0}; ov_element_type_e precision; -memset(&output, 0, sizeof(output)); -status = ov_infer_request_get_output_tensor_by_index(request->infer_request, 0, &output_tensor); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, - "Failed to get output tensor."); +outputs = av_calloc(ov_model->nb_outputs, sizeof(*outputs)); +if (!outputs) { +av_log(ctx, AV_LOG_ERROR, "Failed to alloc outputs."); return; } -status = ov_tensor_data(output_tensor, &output.data); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, - "Failed to get output data."); -return; -} +for (int i = 0; i < ov_model->nb_outputs; i++) { +status = ov_infer_request_get_tensor_by_const_port(request->infer_request, + ov_model->output_ports[i], + &output_tensor); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, +"Failed to get output tensor."); +goto end; +} -status = ov_tensor_get_shape(output_tensor, &output_shape); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Failed to get output port shape.\n"); -return; -} -dims = output_shape.dims; +status = ov_tensor_data(output_tensor, &outputs[i].data); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, +"Failed to get output data."); +goto end; +} -status = ov_port_get_element_type(ov_model->output_port, &precision); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Failed to get output port data type.\n"); +status = ov_tensor_get_shape(output_tensor, &output_shape); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get output port shape.\n"); +goto end; +} +dims = output_shape.dims; + +status = ov_port_get_element_type(ov_model->output_ports[i], &precision); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get output port data type.\n"); +goto end; +} +outputs[i].dt = precision_to_datatype(precision); + +outputs[i].channels = output_shape.rank > 2 ? dims[output_shape.rank - 3] : 1; +outputs[i].height = output_shape.rank > 1 ? dims[output_shape.rank - 2] : 1; +outputs[i].width= output_shape.rank > 0 ? dims[output_shape.rank - 1] : 1; +av_assert0(request->lltask_count <= dims[0]); +outputs[i].layou
[FFmpeg-devel] [PATCH v2 4/4] libavfilter/vf_dnn_detect: Add yolov4 support
From: Wenbin Chen The difference of yolov4 is that sigmoid function needed to be applied on x, y coordinates. Also make it compatiple with NHWC output as the yolov4 model from openvino model zoo has NHWC output layout. Model refer to: https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v4-tf Signed-off-by: Wenbin Chen --- libavfilter/vf_dnn_detect.c | 71 ++--- 1 file changed, 59 insertions(+), 12 deletions(-) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 7a32b191c3..1b04a2cb98 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -35,7 +35,8 @@ typedef enum { DDMT_SSD, DDMT_YOLOV1V2, -DDMT_YOLOV3 +DDMT_YOLOV3, +DDMT_YOLOV4 } DNNDetectionModelType; typedef struct DnnDetectContext { @@ -75,6 +76,7 @@ static const AVOption dnn_detect_options[] = { { "ssd", "output shape [1, 1, N, 7]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_SSD },0, 0, FLAGS, "model_type" }, { "yolo","output shape [1, N*Cx*Cy*DetectionBox]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV1V2 },0, 0, FLAGS, "model_type" }, { "yolov3", "outputs shape [1, N*D, Cx, Cy]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV3 }, 0, 0, FLAGS, "model_type" }, +{ "yolov4", "outputs shape [1, N*D, Cx, Cy]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV4 },0, 0, FLAGS, "model_type" }, { "cell_w", "cell width", OFFSET2(cell_w), AV_OPT_TYPE_INT, { .i64 = 0 },0, INTMAX_MAX, FLAGS }, { "cell_h", "cell height",OFFSET2(cell_h), AV_OPT_TYPE_INT, { .i64 = 0 },0, INTMAX_MAX, FLAGS }, { "nb_classes", "The number of class",OFFSET2(nb_classes), AV_OPT_TYPE_INT, { .i64 = 0 },0, INTMAX_MAX, FLAGS }, @@ -84,6 +86,14 @@ static const AVOption dnn_detect_options[] = { AVFILTER_DEFINE_CLASS(dnn_detect); +static inline float sigmoid(float x) { +return 1.f / (1.f + exp(-x)); +} + +static inline float linear(float x) { +return x; +} + static int dnn_detect_get_label_id(int nb_classes, int cell_size, float *label_data) { float max_prob = 0; @@ -142,6 +152,8 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int out float *output_data = output[output_index].data; float *anchors = ctx->anchors; AVDetectionBBox *bbox; +float (*post_process_raw_data)(float x); +int is_NHWC = 0; if (ctx->model_type == DDMT_YOLOV1V2) { cell_w = ctx->cell_w; @@ -149,13 +161,30 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int out scale_w = cell_w; scale_h = cell_h; } else { -cell_w = output[output_index].width; -cell_h = output[output_index].height; +if (output[output_index].height != output[output_index].width && +output[output_index].height == output[output_index].channels) { +is_NHWC = 1; +cell_w = output[output_index].height; +cell_h = output[output_index].channels; +} else { +cell_w = output[output_index].width; +cell_h = output[output_index].height; +} scale_w = ctx->scale_width; scale_h = ctx->scale_height; } box_size = nb_classes + 5; +switch (ctx->model_type) { +case DDMT_YOLOV1V2: +case DDMT_YOLOV3: +post_process_raw_data = linear; +break; +case DDMT_YOLOV4: +post_process_raw_data = sigmoid; + break; +} + if (!cell_h || !cell_w) { av_log(filter_ctx, AV_LOG_ERROR, "cell_w and cell_h are detected\n"); return AVERROR(EINVAL); @@ -193,19 +222,36 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int out float *detection_boxes_data; int label_id; -detection_boxes_data = output_data + box_id * box_size * cell_w * cell_h; -conf = detection_boxes_data[cy * cell_w + cx + 4 * cell_w * cell_h]; +if (is_NHWC) { +detection_boxes_data = output_data + +((cy * cell_w + cx) * detection_boxes + box_id) * box_size; +conf = post_process_raw_data(detection_boxes_data[4]); +} else { +detection_boxes_data = output_data + box_id * box_size * cell_w * cell_h; +conf = post_process_raw_data( +detection_boxes_data[cy * cell_w + cx + 4 * cell_w * cell_h]); +} if (conf < conf_threshold) { continue; } -x= detection_boxes_data[cy * cell_w + cx]; -y= detection_boxes_data[cy * cell_w + cx + cel
[FFmpeg-devel] [PATCH v2 2/4] libavfilter/vf_dnn_detect: Add input pad
From: Wenbin Chen Add input pad to get model input resolution. Detection models always have fixed input size. And the output coordinators are based on the input resolution, so we need to get input size to map coordinators to our real output frames. Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_openvino.c | 24 -- libavfilter/vf_dnn_detect.c| 28 +- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 089e028818..671a995c70 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -1073,9 +1073,15 @@ static int get_input_ov(void *model, DNNData *input, const char *input_name) return AVERROR(ENOSYS); } -input->channels = dims[1]; -input->height = input_resizable ? -1 : dims[2]; -input->width= input_resizable ? -1 : dims[3]; +if (dims[1] <= 3) { // NCHW +input->channels = dims[1]; +input->height = input_resizable ? -1 : dims[2]; +input->width= input_resizable ? -1 : dims[3]; +} else { // NHWC +input->height = input_resizable ? -1 : dims[1]; +input->width= input_resizable ? -1 : dims[2]; +input->channels = dims[3]; +} input->dt = precision_to_datatype(precision); return 0; @@ -1105,9 +,15 @@ static int get_input_ov(void *model, DNNData *input, const char *input_name) return DNN_GENERIC_ERROR; } -input->channels = dims.dims[1]; -input->height = input_resizable ? -1 : dims.dims[2]; -input->width= input_resizable ? -1 : dims.dims[3]; +if (dims[1] <= 3) { // NCHW +input->channels = dims[1]; +input->height = input_resizable ? -1 : dims[2]; +input->width= input_resizable ? -1 : dims[3]; +} else { // NHWC +input->height = input_resizable ? -1 : dims[1]; +input->width= input_resizable ? -1 : dims[2]; +input->channels = dims[3]; +} input->dt = precision_to_datatype(precision); return 0; } diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 373dda58bf..86f61c9907 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -699,13 +699,39 @@ static av_cold void dnn_detect_uninit(AVFilterContext *context) free_detect_labels(ctx); } +static int config_input(AVFilterLink *inlink) +{ +AVFilterContext *context = inlink->dst; +DnnDetectContext *ctx = context->priv; +DNNData model_input; +int ret; + +ret = ff_dnn_get_input(&ctx->dnnctx, &model_input); +if (ret != 0) { +av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n"); +return ret; +} +ctx->scale_width = model_input.width == -1 ? inlink->w : model_input.width; +ctx->scale_height = model_input.height == -1 ? inlink->h : model_input.height; + +return 0; +} + +static const AVFilterPad dnn_detect_inputs[] = { +{ +.name = "default", +.type = AVMEDIA_TYPE_VIDEO, +.config_props = config_input, +}, +}; + const AVFilter ff_vf_dnn_detect = { .name = "dnn_detect", .description = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the input."), .priv_size = sizeof(DnnDetectContext), .init = dnn_detect_init, .uninit= dnn_detect_uninit, -FILTER_INPUTS(ff_video_default_filterpad), +FILTER_INPUTS(dnn_detect_inputs), FILTER_OUTPUTS(ff_video_default_filterpad), FILTER_PIXFMTS_ARRAY(pix_fmts), .priv_class= &dnn_detect_class, -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 4/4] libavfilter/vf_dnn_detect: Add yolov4 support
From: Wenbin Chen The difference of yolov4 is that sigmoid function needed to be applied on x, y coordinates. Also make it compatiple with NHWC output as the yolov4 model from openvino model zoo has NHWC output layout. Model refer to: https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v4-tf Signed-off-by: Wenbin Chen --- libavfilter/vf_dnn_detect.c | 71 ++--- 1 file changed, 59 insertions(+), 12 deletions(-) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 7a32b191c3..1b04a2cb98 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -35,7 +35,8 @@ typedef enum { DDMT_SSD, DDMT_YOLOV1V2, -DDMT_YOLOV3 +DDMT_YOLOV3, +DDMT_YOLOV4 } DNNDetectionModelType; typedef struct DnnDetectContext { @@ -75,6 +76,7 @@ static const AVOption dnn_detect_options[] = { { "ssd", "output shape [1, 1, N, 7]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_SSD },0, 0, FLAGS, "model_type" }, { "yolo","output shape [1, N*Cx*Cy*DetectionBox]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV1V2 },0, 0, FLAGS, "model_type" }, { "yolov3", "outputs shape [1, N*D, Cx, Cy]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV3 }, 0, 0, FLAGS, "model_type" }, +{ "yolov4", "outputs shape [1, N*D, Cx, Cy]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV4 },0, 0, FLAGS, "model_type" }, { "cell_w", "cell width", OFFSET2(cell_w), AV_OPT_TYPE_INT, { .i64 = 0 },0, INTMAX_MAX, FLAGS }, { "cell_h", "cell height",OFFSET2(cell_h), AV_OPT_TYPE_INT, { .i64 = 0 },0, INTMAX_MAX, FLAGS }, { "nb_classes", "The number of class",OFFSET2(nb_classes), AV_OPT_TYPE_INT, { .i64 = 0 },0, INTMAX_MAX, FLAGS }, @@ -84,6 +86,14 @@ static const AVOption dnn_detect_options[] = { AVFILTER_DEFINE_CLASS(dnn_detect); +static inline float sigmoid(float x) { +return 1.f / (1.f + exp(-x)); +} + +static inline float linear(float x) { +return x; +} + static int dnn_detect_get_label_id(int nb_classes, int cell_size, float *label_data) { float max_prob = 0; @@ -142,6 +152,8 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int out float *output_data = output[output_index].data; float *anchors = ctx->anchors; AVDetectionBBox *bbox; +float (*post_process_raw_data)(float x); +int is_NHWC = 0; if (ctx->model_type == DDMT_YOLOV1V2) { cell_w = ctx->cell_w; @@ -149,13 +161,30 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int out scale_w = cell_w; scale_h = cell_h; } else { -cell_w = output[output_index].width; -cell_h = output[output_index].height; +if (output[output_index].height != output[output_index].width && +output[output_index].height == output[output_index].channels) { +is_NHWC = 1; +cell_w = output[output_index].height; +cell_h = output[output_index].channels; +} else { +cell_w = output[output_index].width; +cell_h = output[output_index].height; +} scale_w = ctx->scale_width; scale_h = ctx->scale_height; } box_size = nb_classes + 5; +switch (ctx->model_type) { +case DDMT_YOLOV1V2: +case DDMT_YOLOV3: +post_process_raw_data = linear; +break; +case DDMT_YOLOV4: +post_process_raw_data = sigmoid; + break; +} + if (!cell_h || !cell_w) { av_log(filter_ctx, AV_LOG_ERROR, "cell_w and cell_h are detected\n"); return AVERROR(EINVAL); @@ -193,19 +222,36 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int out float *detection_boxes_data; int label_id; -detection_boxes_data = output_data + box_id * box_size * cell_w * cell_h; -conf = detection_boxes_data[cy * cell_w + cx + 4 * cell_w * cell_h]; +if (is_NHWC) { +detection_boxes_data = output_data + +((cy * cell_w + cx) * detection_boxes + box_id) * box_size; +conf = post_process_raw_data(detection_boxes_data[4]); +} else { +detection_boxes_data = output_data + box_id * box_size * cell_w * cell_h; +conf = post_process_raw_data( +detection_boxes_data[cy * cell_w + cx + 4 * cell_w * cell_h]); +} if (conf < conf_threshold) { continue; } -x= detection_boxes_data[cy * cell_w + cx]; -y= detection_boxes_data[cy * cell_w + cx + cel
[FFmpeg-devel] [PATCH 3/4] libavfilter/vf_dnn_detect: Add yolov3 support
From: Wenbin Chen Add yolov3 support. The difference of yolov3 is that it has multiple outputs in different scale to perform better on both large and small object. The model detail refer to: https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v3-tf Signed-off-by: Wenbin Chen --- libavfilter/vf_dnn_detect.c | 28 +++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 86f61c9907..7a32b191c3 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -35,6 +35,7 @@ typedef enum { DDMT_SSD, DDMT_YOLOV1V2, +DDMT_YOLOV3 } DNNDetectionModelType; typedef struct DnnDetectContext { @@ -73,6 +74,7 @@ static const AVOption dnn_detect_options[] = { { "model_type", "DNN detection model type", OFFSET2(model_type), AV_OPT_TYPE_INT, { .i64 = DDMT_SSD },INT_MIN, INT_MAX, FLAGS, "model_type" }, { "ssd", "output shape [1, 1, N, 7]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_SSD },0, 0, FLAGS, "model_type" }, { "yolo","output shape [1, N*Cx*Cy*DetectionBox]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV1V2 },0, 0, FLAGS, "model_type" }, +{ "yolov3", "outputs shape [1, N*D, Cx, Cy]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV3 }, 0, 0, FLAGS, "model_type" }, { "cell_w", "cell width", OFFSET2(cell_w), AV_OPT_TYPE_INT, { .i64 = 0 },0, INTMAX_MAX, FLAGS }, { "cell_h", "cell height",OFFSET2(cell_h), AV_OPT_TYPE_INT, { .i64 = 0 },0, INTMAX_MAX, FLAGS }, { "nb_classes", "The number of class",OFFSET2(nb_classes), AV_OPT_TYPE_INT, { .i64 = 0 },0, INTMAX_MAX, FLAGS }, @@ -146,6 +148,11 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int out cell_h = ctx->cell_h; scale_w = cell_w; scale_h = cell_h; +} else { +cell_w = output[output_index].width; +cell_h = output[output_index].height; +scale_w = ctx->scale_width; +scale_h = ctx->scale_height; } box_size = nb_classes + 5; @@ -173,6 +180,7 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int out output[output_index].height * output[output_index].width / box_size / cell_w / cell_h; +anchors = anchors + (detection_boxes * output_index * 2); /** * find all candidate bbox * yolo output can be reshaped to [B, N*D, Cx, Cy] @@ -284,6 +292,21 @@ static int dnn_detect_post_proc_yolo(AVFrame *frame, DNNData *output, AVFilterCo return 0; } +static int dnn_detect_post_proc_yolov3(AVFrame *frame, DNNData *output, + AVFilterContext *filter_ctx, int nb_outputs) +{ +int ret = 0; +for (int i = 0; i < nb_outputs; i++) { +ret = dnn_detect_parse_yolo_output(frame, output, i, filter_ctx); +if (ret < 0) +return ret; +} +ret = dnn_detect_fill_side_data(frame, filter_ctx); +if (ret < 0) +return ret; +return 0; +} + static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx) { DnnDetectContext *ctx = filter_ctx->priv; @@ -380,8 +403,11 @@ static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, int nb_outpu ret = dnn_detect_post_proc_yolo(frame, output, filter_ctx); if (ret < 0) return ret; +case DDMT_YOLOV3: +ret = dnn_detect_post_proc_yolov3(frame, output, filter_ctx, nb_outputs); +if (ret < 0) +return ret; } - return 0; } -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/4] libavfilter/vf_dnn_detect: Add input pad
From: Wenbin Chen Add input pad to get model input resolution. Detection models always have fixed input size. And the output coordinators are based on the input resolution, so we need to get input size to map coordinators to our real output frames. Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_openvino.c | 24 -- libavfilter/vf_dnn_detect.c| 28 +- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 089e028818..671a995c70 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -1073,9 +1073,15 @@ static int get_input_ov(void *model, DNNData *input, const char *input_name) return AVERROR(ENOSYS); } -input->channels = dims[1]; -input->height = input_resizable ? -1 : dims[2]; -input->width= input_resizable ? -1 : dims[3]; +if (dims[1] <= 3) { // NCHW +input->channels = dims[1]; +input->height = input_resizable ? -1 : dims[2]; +input->width= input_resizable ? -1 : dims[3]; +} else { // NHWC +input->height = input_resizable ? -1 : dims[1]; +input->width= input_resizable ? -1 : dims[2]; +input->channels = dims[3]; +} input->dt = precision_to_datatype(precision); return 0; @@ -1105,9 +,15 @@ static int get_input_ov(void *model, DNNData *input, const char *input_name) return DNN_GENERIC_ERROR; } -input->channels = dims.dims[1]; -input->height = input_resizable ? -1 : dims.dims[2]; -input->width= input_resizable ? -1 : dims.dims[3]; +if (dims[1] <= 3) { // NCHW +input->channels = dims[1]; +input->height = input_resizable ? -1 : dims[2]; +input->width= input_resizable ? -1 : dims[3]; +} else { // NHWC +input->height = input_resizable ? -1 : dims[1]; +input->width= input_resizable ? -1 : dims[2]; +input->channels = dims[3]; +} input->dt = precision_to_datatype(precision); return 0; } diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 373dda58bf..86f61c9907 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -699,13 +699,39 @@ static av_cold void dnn_detect_uninit(AVFilterContext *context) free_detect_labels(ctx); } +static int config_input(AVFilterLink *inlink) +{ +AVFilterContext *context = inlink->dst; +DnnDetectContext *ctx = context->priv; +DNNData model_input; +int ret; + +ret = ff_dnn_get_input(&ctx->dnnctx, &model_input); +if (ret != 0) { +av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n"); +return ret; +} +ctx->scale_width = model_input.width == -1 ? inlink->w : model_input.width; +ctx->scale_height = model_input.height == -1 ? inlink->h : model_input.height; + +return 0; +} + +static const AVFilterPad dnn_detect_inputs[] = { +{ +.name = "default", +.type = AVMEDIA_TYPE_VIDEO, +.config_props = config_input, +}, +}; + const AVFilter ff_vf_dnn_detect = { .name = "dnn_detect", .description = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the input."), .priv_size = sizeof(DnnDetectContext), .init = dnn_detect_init, .uninit= dnn_detect_uninit, -FILTER_INPUTS(ff_video_default_filterpad), +FILTER_INPUTS(dnn_detect_inputs), FILTER_OUTPUTS(ff_video_default_filterpad), FILTER_PIXFMTS_ARRAY(pix_fmts), .priv_class= &dnn_detect_class, -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/4] libavfiter/dnn/dnn_backend_openvino: add multiple output support
From: Wenbin Chen Add multiple output support to openvino backend. You can use '&' to split different output when you set output name using command line. Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_common.c | 7 - libavfilter/dnn/dnn_backend_openvino.c | 216 + libavfilter/vf_dnn_detect.c| 11 +- 3 files changed, 150 insertions(+), 84 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_common.c b/libavfilter/dnn/dnn_backend_common.c index 91a4a3c4bf..632832ec36 100644 --- a/libavfilter/dnn/dnn_backend_common.c +++ b/libavfilter/dnn/dnn_backend_common.c @@ -43,13 +43,6 @@ int ff_check_exec_params(void *ctx, DNNBackendType backend, DNNFunctionType func return AVERROR(EINVAL); } -if (exec_params->nb_output != 1 && backend != DNN_TF) { -// currently, the filter does not need multiple outputs, -// so we just pending the support until we really need it. -avpriv_report_missing_feature(ctx, "multiple outputs"); -return AVERROR(ENOSYS); -} - return 0; } diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 6fe8b9c243..089e028818 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -64,7 +64,7 @@ typedef struct OVModel{ ov_compiled_model_t *compiled_model; ov_output_const_port_t* input_port; ov_preprocess_input_info_t* input_info; -ov_output_const_port_t* output_port; +ov_output_const_port_t** output_ports; ov_preprocess_output_info_t* output_info; ov_preprocess_prepostprocessor_t* preprocess; #else @@ -77,6 +77,7 @@ typedef struct OVModel{ SafeQueue *request_queue; // holds OVRequestItem Queue *task_queue; // holds TaskItem Queue *lltask_queue; // holds LastLevelTaskItem +int nb_outputs; } OVModel; // one request for one call to openvino @@ -349,7 +350,7 @@ static void infer_completion_callback(void *args) TaskItem *task = lltask->task; OVModel *ov_model = task->model; SafeQueue *requestq = ov_model->request_queue; -DNNData output; +DNNData *outputs; OVContext *ctx = &ov_model->ctx; #if HAVE_OPENVINO2 size_t* dims; @@ -358,45 +359,61 @@ static void infer_completion_callback(void *args) ov_shape_t output_shape = {0}; ov_element_type_e precision; -memset(&output, 0, sizeof(output)); -status = ov_infer_request_get_output_tensor_by_index(request->infer_request, 0, &output_tensor); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, - "Failed to get output tensor."); +outputs = av_calloc(ov_model->nb_outputs, sizeof(*outputs)); +if (!outputs) { +av_log(ctx, AV_LOG_ERROR, "Failed to alloc outputs."); return; } -status = ov_tensor_data(output_tensor, &output.data); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, - "Failed to get output data."); -return; -} +for (int i = 0; i < ov_model->nb_outputs; i++) { +status = ov_infer_request_get_tensor_by_const_port(request->infer_request, + ov_model->output_ports[i], + &output_tensor); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, +"Failed to get output tensor."); +goto end; +} -status = ov_tensor_get_shape(output_tensor, &output_shape); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Failed to get output port shape.\n"); -return; -} -dims = output_shape.dims; +status = ov_tensor_data(output_tensor, &outputs[i].data); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, +"Failed to get output data."); +goto end; +} -status = ov_port_get_element_type(ov_model->output_port, &precision); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Failed to get output port data type.\n"); +status = ov_tensor_get_shape(output_tensor, &output_shape); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get output port shape.\n"); +goto end; +} +dims = output_shape.dims; + +status = ov_port_get_element_type(ov_model->output_ports[i], &precision); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get output port data type.\n"); +goto end; +} +outputs[i].dt = precision_to_datatype(precision); + +outputs[i].channels = output_shape.rank > 2 ? dims[output_shape.rank - 3] : 1; +outputs[i].height = output_shape.rank > 1 ? dims[output_shape.rank - 2] : 1; +outputs[i].width= output_shape.rank > 0 ? dims[output_shape.rank - 1] : 1; +av_assert0(request->lltask_count <= dims[0]); +outputs[i].layou
[FFmpeg-devel] [PATCH 2/2] libavfilter/vf_dnn_detect: Add yolo support
From: Wenbin Chen Add yolo support. Yolo model doesn't output final result. It outputs candidate boxes, so we need post-process to remove overlap boxes to get final results. Also, the box's coordinators relate to cell and anchors, so we need these information to calculate boxes as well. Model detail please refer to: https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v2-tf Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_openvino.c | 6 +- libavfilter/vf_dnn_detect.c| 242 - 2 files changed, 244 insertions(+), 4 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index d3af8c34ce..6fe8b9c243 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -386,9 +386,9 @@ static void infer_completion_callback(void *args) ov_shape_free(&output_shape); return; } -output.channels = dims[1]; -output.height = dims[2]; -output.width= dims[3]; +output.channels = output_shape.rank > 2 ? dims[output_shape.rank - 3] : 1; +output.height = output_shape.rank > 1 ? dims[output_shape.rank - 2] : 1; +output.width= output_shape.rank > 0 ? dims[output_shape.rank - 1] : 1; av_assert0(request->lltask_count <= dims[0]); ov_shape_free(&output_shape); #else diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 9db90ee4cf..7ac3bb0b58 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -30,9 +30,11 @@ #include "libavutil/time.h" #include "libavutil/avstring.h" #include "libavutil/detection_bbox.h" +#include "libavutil/fifo.h" typedef enum { -DDMT_SSD +DDMT_SSD, +DDMT_YOLOV1V2, } DNNDetectionModelType; typedef struct DnnDetectContext { @@ -43,6 +45,15 @@ typedef struct DnnDetectContext { char **labels; int label_count; DNNDetectionModelType model_type; +int cell_w; +int cell_h; +int nb_classes; +AVFifo *bboxes_fifo; +int scale_width; +int scale_height; +char *anchors_str; +float *anchors; +int nb_anchor; } DnnDetectContext; #define OFFSET(x) offsetof(DnnDetectContext, dnnctx.x) @@ -61,11 +72,218 @@ static const AVOption dnn_detect_options[] = { { "labels", "path to labels file",OFFSET2(labels_filename), AV_OPT_TYPE_STRING,{ .str = NULL }, 0, 0, FLAGS }, { "model_type", "DNN detection model type", OFFSET2(model_type), AV_OPT_TYPE_INT, { .i64 = DDMT_SSD },INT_MIN, INT_MAX, FLAGS, "model_type" }, { "ssd", "output shape [1, 1, N, 7]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_SSD },0, 0, FLAGS, "model_type" }, +{ "yolo","output shape [1, N*Cx*Cy*DetectionBox]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV1V2 },0, 0, FLAGS, "model_type" }, +{ "cell_w", "cell width", OFFSET2(cell_w), AV_OPT_TYPE_INT, { .i64 = 0 },0, INTMAX_MAX, FLAGS }, +{ "cell_h", "cell height",OFFSET2(cell_h), AV_OPT_TYPE_INT, { .i64 = 0 },0, INTMAX_MAX, FLAGS }, +{ "nb_classes", "The number of class",OFFSET2(nb_classes), AV_OPT_TYPE_INT, { .i64 = 0 },0, INTMAX_MAX, FLAGS }, +{ "anchors", "anchors, splited by '&'",OFFSET2(anchors_str), AV_OPT_TYPE_STRING,{ .str = NULL }, 0, 0, FLAGS }, { NULL } }; AVFILTER_DEFINE_CLASS(dnn_detect); +static int dnn_detect_get_label_id(int nb_classes, int cell_size, float *label_data) +{ +float max_prob = 0; +int label_id = 0; +for (int i = 0; i < nb_classes; i++) { +if (label_data[i * cell_size] > max_prob) { +max_prob = label_data[i * cell_size]; +label_id = i; +} +} +return label_id; +} + +static int dnn_detect_parse_anchors(char *anchors_str, float **anchors) +{ +char *saveptr = NULL, *token; +float *anchors_buf; +int nb_anchor = 0, i = 0; +while(anchors_str[i] != '\0') { +if(anchors_str[i] == '&') +nb_anchor++; +i++; +} +nb_anchor++; +anchors_buf = av_mallocz(nb_anchor * sizeof(*anchors)); +if (!anchors_buf) { +return 0; +} +for (int i = 0; i < nb_anchor; i++) { +token = av_strtok(anchors_str, "&", &saveptr); +anchors_buf[i] = strtof(token, NULL); +anchors_str = NULL; +} +*anchors = anchors_buf; +return nb_anchor; +} + +/* Calculate Intersection Over Union */ +static float dnn_detect_IOU(AVDetectionBBox *bbox1, AVDetectionBBox *bbox2) +{ +float overlapping_width = FFMIN(bbox1->x + bbox1->w, bbox2->x + bbox2->w) - FFMAX(bbox1->x, bbox2->x); +float overlapping_height = FFMIN(bbox1->y + bbox1->h, bbox2->y + bbox2->h) - FFMAX(bbox1->y, bbox2->y); +float intersection_area = +(overlapping_width < 0 || overlappi
[FFmpeg-devel] [PATCH 1/2] libavfilter/vf_dnn_detect: Add model_type option.
From: Wenbin Chen There are many kinds of detection DNN model and they have different preprocess and postprocess methods. To support more models, "model_type" option is added to help to choose preprocess and postprocess function. Signed-off-by: Wenbin Chen --- libavfilter/vf_dnn_detect.c | 42 ++--- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index b5dae42c65..9db90ee4cf 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -31,6 +31,10 @@ #include "libavutil/avstring.h" #include "libavutil/detection_bbox.h" +typedef enum { +DDMT_SSD +} DNNDetectionModelType; + typedef struct DnnDetectContext { const AVClass *class; DnnContext dnnctx; @@ -38,6 +42,7 @@ typedef struct DnnDetectContext { char *labels_filename; char **labels; int label_count; +DNNDetectionModelType model_type; } DnnDetectContext; #define OFFSET(x) offsetof(DnnDetectContext, dnnctx.x) @@ -54,12 +59,14 @@ static const AVOption dnn_detect_options[] = { DNN_COMMON_OPTIONS { "confidence", "threshold of confidence",OFFSET2(confidence), AV_OPT_TYPE_FLOAT, { .dbl = 0.5 }, 0, 1, FLAGS}, { "labels", "path to labels file",OFFSET2(labels_filename), AV_OPT_TYPE_STRING,{ .str = NULL }, 0, 0, FLAGS }, +{ "model_type", "DNN detection model type", OFFSET2(model_type), AV_OPT_TYPE_INT, { .i64 = DDMT_SSD },INT_MIN, INT_MAX, FLAGS, "model_type" }, +{ "ssd", "output shape [1, 1, N, 7]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_SSD },0, 0, FLAGS, "model_type" }, { NULL } }; AVFILTER_DEFINE_CLASS(dnn_detect); -static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx) +static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx) { DnnDetectContext *ctx = filter_ctx->priv; float conf_threshold = ctx->confidence; @@ -67,14 +74,12 @@ static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, AVFilterCont int detect_size = output->width; float *detections = output->data; int nb_bboxes = 0; -AVFrameSideData *sd; -AVDetectionBBox *bbox; AVDetectionBBoxHeader *header; +AVDetectionBBox *bbox; -sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES); -if (sd) { -av_log(filter_ctx, AV_LOG_ERROR, "already have bounding boxes in side data.\n"); -return -1; +if (output->width != 7) { +av_log(filter_ctx, AV_LOG_ERROR, "Model output shape doesn't match ssd requirement.\n"); +return AVERROR(EINVAL); } for (int i = 0; i < proposal_count; ++i) { @@ -135,6 +140,29 @@ static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, AVFilterCont return 0; } +static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx) +{ +AVFrameSideData *sd; +DnnDetectContext *ctx = filter_ctx->priv; +int ret = 0; + +sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES); +if (sd) { +av_log(filter_ctx, AV_LOG_ERROR, "already have bounding boxes in side data.\n"); +return -1; +} + +switch (ctx->model_type) { +case DDMT_SSD: +ret = dnn_detect_post_proc_ssd(frame, output, filter_ctx); +if (ret < 0) +return ret; +break; +} + +return 0; +} + static int dnn_detect_post_proc_tf(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx) { DnnDetectContext *ctx = filter_ctx->priv; -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] libavfilter/dnn/openvino: Reduce redundant memory allocation
From: Wenbin Chen We can directly get data ptr from tensor, so that extral memory allocation can be removed. Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_openvino.c | 42 +- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 10520cd765..d3af8c34ce 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -204,7 +204,6 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) ov_tensor_t* tensor = NULL; ov_shape_t input_shape = {0}; ov_element_type_e precision; -void *input_data_ptr = NULL; #else dimensions_t dims; precision_e precision; @@ -249,12 +248,6 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) input.width = dims[2]; input.channels = dims[3]; input.dt = precision_to_datatype(precision); -input.data = av_malloc(input.height * input.width * input.channels * get_datatype_size(input.dt)); -if (!input.data) { -ov_shape_free(&input_shape); -return AVERROR(ENOMEM); -} -input_data_ptr = input.data; #else status = ie_infer_request_get_blob(request->infer_request, task->input_name, &input_blob); if (status != OK) { @@ -297,6 +290,26 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) request->lltasks[i] = lltask; request->lltask_count = i + 1; task = lltask->task; +#if HAVE_OPENVINO2 +if (tensor) +ov_tensor_free(tensor); +status = ov_tensor_create(precision, input_shape, &tensor); +ov_shape_free(&input_shape); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to create tensor from host prt.\n"); +return ov2_map_error(status, NULL); +} +status = ov_tensor_data(tensor, &input.data); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get input data.\n"); +return ov2_map_error(status, NULL); +} +status = ov_infer_request_set_input_tensor(request->infer_request, tensor); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to Set an input tensor for the model.\n"); +return ov2_map_error(status, NULL); +} +#endif switch (ov_model->model->func_type) { case DFT_PROCESS_FRAME: if (task->do_ioproc) { @@ -317,24 +330,11 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) av_assert0(!"should not reach here"); break; } -#if HAVE_OPENVINO2 -status = ov_tensor_create_from_host_ptr(precision, input_shape, input.data, &tensor); -ov_shape_free(&input_shape); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Failed to create tensor from host prt.\n"); -return ov2_map_error(status, NULL); -} -status = ov_infer_request_set_input_tensor(request->infer_request, tensor); -if (status != OK) { -av_log(ctx, AV_LOG_ERROR, "Failed to Set an input tensor for the model.\n"); -return ov2_map_error(status, NULL); -} -#endif input.data = (uint8_t *)input.data + input.width * input.height * input.channels * get_datatype_size(input.dt); } #if HAVE_OPENVINO2 -av_freep(&input_data_ptr); +ov_tensor_free(tensor); #else ie_blob_free(&input_blob); #endif -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2 3/3] libavfilter/dnn: Initialze DNNData variables
From: Wenbin Chen Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_tf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index b521de7fbe..25046b58d9 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -622,7 +622,7 @@ err: } static int fill_model_input_tf(TFModel *tf_model, TFRequestItem *request) { -DNNData input; +DNNData input = { 0 }; LastLevelTaskItem *lltask; TaskItem *task; TFInferRequest *infer_request = NULL; @@ -724,7 +724,7 @@ static void infer_completion_callback(void *args) { TFModel *tf_model = task->model; TFContext *ctx = &tf_model->ctx; -outputs = av_malloc_array(task->nb_output, sizeof(*outputs)); +outputs = av_calloc(task->nb_output, sizeof(*outputs)); if (!outputs) { av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for *outputs\n"); goto err; -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2 2/3] libavfilter/dnn: Add scale and mean preprocess to openvino backend
From: Wenbin Chen Dnn models has different data preprocess requirements. Scale and mean parameters are added to preprocess input data. Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_openvino.c | 43 -- libavfilter/dnn/dnn_io_proc.c | 82 +- libavfilter/dnn_interface.h| 2 + 3 files changed, 108 insertions(+), 19 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 3ba5f5331a..4224600f94 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -46,6 +46,8 @@ typedef struct OVOptions{ int batch_size; int input_resizable; DNNLayout layout; +float scale; +float mean; } OVOptions; typedef struct OVContext { @@ -105,6 +107,8 @@ static const AVOption dnn_openvino_options[] = { { "none", "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, FLAGS, "layout"}, { "nchw", "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, FLAGS, "layout"}, { "nhwc", "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, FLAGS, "layout"}, +{ "scale", "Add scale preprocess operation. Divide each element of input by specified value.", OFFSET(options.scale), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS}, +{ "mean", "Add mean preprocess operation. Subtract specified value from each element of input.", OFFSET(options.mean), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS}, { NULL } }; @@ -209,6 +213,7 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) ie_blob_t *input_blob = NULL; #endif +memset(&input, 0, sizeof(input)); lltask = ff_queue_peek_front(ov_model->lltask_queue); av_assert0(lltask); task = lltask->task; @@ -274,6 +279,9 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) // all models in openvino open model zoo use BGR as input, // change to be an option when necessary. input.order = DCO_BGR; +// We use preprocess_steps to scale input data, so disable scale and mean here. +input.scale = 1; +input.mean = 0; for (int i = 0; i < ctx->options.batch_size; ++i) { lltask = ff_queue_pop_front(ov_model->lltask_queue); @@ -343,6 +351,7 @@ static void infer_completion_callback(void *args) ov_shape_t output_shape = {0}; ov_element_type_e precision; +memset(&output, 0, sizeof(output)); status = ov_infer_request_get_output_tensor_by_index(request->infer_request, 0, &output_tensor); if (status != OK) { av_log(ctx, AV_LOG_ERROR, @@ -409,6 +418,8 @@ static void infer_completion_callback(void *args) #endif output.dt = precision_to_datatype(precision); output.layout = ctx->options.layout; +output.scale= ctx->options.scale; +output.mean = ctx->options.mean; av_assert0(request->lltask_count >= 1); for (int i = 0; i < request->lltask_count; ++i) { @@ -542,7 +553,9 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * ie_config_t config = {NULL, NULL, NULL}; char *all_dev_names = NULL; #endif - +// We scale pixel by default when do frame processing. +if (fabsf(ctx->options.scale) < 1e-6f) +ctx->options.scale = ov_model->model->func_type == DFT_PROCESS_FRAME ? 255 : 1; // batch size if (ctx->options.batch_size <= 0) { ctx->options.batch_size = 1; @@ -609,15 +622,37 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * goto err; } +status = ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8); if (ov_model->model->func_type != DFT_PROCESS_FRAME) -//set precision only for detect and classify -status = ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8); -status |= ov_preprocess_output_set_element_type(output_tensor_info, F32); +status |= ov_preprocess_output_set_element_type(output_tensor_info, F32); +else if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 1e-6f) +status |= ov_preprocess_output_set_element_type(output_tensor_info, F32); +else +status |= ov_preprocess_output_set_element_type(output_tensor_info, U8); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to set input/output element type\n"); ret = ov2_map_error(status, NULL); goto err; } +// set preprocess steps. +if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 1e-6f) { +ov_preprocess_preprocess_steps_t* input_process_steps = NULL; +status = ov_preprocess_input_info_get_preprocess_steps(ov_model->input_info, &input_process_steps); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get preprocess steps\n"); +ret = ov2_map_error(st
[FFmpeg-devel] [PATCH v2 1/3] libavfilter/dnn: add layout option to openvino backend
From: Wenbin Chen Dnn models have different input layout (NCHW or NHWC), so a "layout" option is added Use openvino's API to do layout conversion for input data. Use swscale to do layout conversion for output data as openvino doesn't have similiar C API for output. Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_openvino.c | 47 +++- libavfilter/dnn/dnn_io_proc.c | 151 ++--- libavfilter/dnn_interface.h| 7 ++ 3 files changed, 185 insertions(+), 20 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 4922833b07..3ba5f5331a 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -45,6 +45,7 @@ typedef struct OVOptions{ uint8_t async; int batch_size; int input_resizable; +DNNLayout layout; } OVOptions; typedef struct OVContext { @@ -100,6 +101,10 @@ static const AVOption dnn_openvino_options[] = { DNN_BACKEND_COMMON_OPTIONS { "batch_size", "batch size per request", OFFSET(options.batch_size), AV_OPT_TYPE_INT,{ .i64 = 1 }, 1, 1000, FLAGS}, { "input_resizable", "can input be resizable or not", OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, +{ "layout", "input layout of model", OFFSET(options.layout), AV_OPT_TYPE_INT, { .i64 = DL_NONE}, DL_NONE, DL_NHWC, FLAGS, "layout" }, +{ "none", "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, FLAGS, "layout"}, +{ "nchw", "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, FLAGS, "layout"}, +{ "nhwc", "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, FLAGS, "layout"}, { NULL } }; @@ -231,9 +236,9 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) avpriv_report_missing_feature(ctx, "Do not support dynamic model."); return AVERROR(ENOSYS); } -input.height = dims[2]; -input.width = dims[3]; -input.channels = dims[1]; +input.height = dims[1]; +input.width = dims[2]; +input.channels = dims[3]; input.dt = precision_to_datatype(precision); input.data = av_malloc(input.height * input.width * input.channels * get_datatype_size(input.dt)); if (!input.data) @@ -403,6 +408,7 @@ static void infer_completion_callback(void *args) av_assert0(request->lltask_count <= dims.dims[0]); #endif output.dt = precision_to_datatype(precision); +output.layout = ctx->options.layout; av_assert0(request->lltask_count >= 1); for (int i = 0; i < request->lltask_count; ++i) { @@ -521,11 +527,14 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * OVContext *ctx = &ov_model->ctx; #if HAVE_OPENVINO2 ov_status_e status; -ov_preprocess_input_tensor_info_t* input_tensor_info; -ov_preprocess_output_tensor_info_t* output_tensor_info; +ov_preprocess_input_tensor_info_t* input_tensor_info = NULL; +ov_preprocess_output_tensor_info_t* output_tensor_info = NULL; +ov_preprocess_input_model_info_t* input_model_info = NULL; ov_model_t *tmp_ov_model; ov_layout_t* NHWC_layout = NULL; +ov_layout_t* NCHW_layout = NULL; const char* NHWC_desc = "NHWC"; +const char* NCHW_desc = "NCHW"; const char* device = ctx->options.device_type; #else IEStatusCode status; @@ -570,6 +579,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * //set input layout status = ov_layout_create(NHWC_desc, &NHWC_layout); +status |= ov_layout_create(NCHW_desc, &NCHW_layout); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to create layout for input.\n"); ret = ov2_map_error(status, NULL); @@ -583,6 +593,22 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * goto err; } +status = ov_preprocess_input_info_get_model_info(ov_model->input_info, &input_model_info); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get input model info\n"); +ret = ov2_map_error(status, NULL); +goto err; +} +if (ctx->options.layout == DL_NCHW) +status = ov_preprocess_input_model_info_set_layout(input_model_info, NCHW_layout); +else if (ctx->options.layout == DL_NHWC) +status = ov_preprocess_input_model_info_set_layout(input_model_info, NHWC_layout); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get set input model layout\n"); +ret = ov2_map_error(status, NULL); +goto err; +} + if (ov_model->model->func_type != DFT_PROCESS_FRAME) //set precision only for detect and classify status = ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8); @@ -618,6 +644,9 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * ret = ov2_map_error(sta
[FFmpeg-devel] [PATCH 3/3] libavfilter/dnn: Initialze DNNData variables
From: Wenbin Chen Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_tf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index b521de7fbe..e1e8cef0d2 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -629,6 +629,7 @@ static int fill_model_input_tf(TFModel *tf_model, TFRequestItem *request) { TFContext *ctx = &tf_model->ctx; int ret = 0; +memset(&input, 0, sizeof(input)); lltask = ff_queue_pop_front(tf_model->lltask_queue); av_assert0(lltask); task = lltask->task; @@ -724,7 +725,7 @@ static void infer_completion_callback(void *args) { TFModel *tf_model = task->model; TFContext *ctx = &tf_model->ctx; -outputs = av_malloc_array(task->nb_output, sizeof(*outputs)); +outputs = av_calloc(task->nb_output, sizeof(*outputs)); if (!outputs) { av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for *outputs\n"); goto err; -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/3] libavfilter/dnn: Add scale and mean preprocess to openvino backend
From: Wenbin Chen Dnn models has different data preprocess requirements. Scale and mean parameters are added to preprocess input data. Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_openvino.c | 43 -- libavfilter/dnn/dnn_io_proc.c | 82 +- libavfilter/dnn_interface.h| 2 + 3 files changed, 108 insertions(+), 19 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 3ba5f5331a..4224600f94 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -46,6 +46,8 @@ typedef struct OVOptions{ int batch_size; int input_resizable; DNNLayout layout; +float scale; +float mean; } OVOptions; typedef struct OVContext { @@ -105,6 +107,8 @@ static const AVOption dnn_openvino_options[] = { { "none", "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, FLAGS, "layout"}, { "nchw", "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, FLAGS, "layout"}, { "nhwc", "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, FLAGS, "layout"}, +{ "scale", "Add scale preprocess operation. Divide each element of input by specified value.", OFFSET(options.scale), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS}, +{ "mean", "Add mean preprocess operation. Subtract specified value from each element of input.", OFFSET(options.mean), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS}, { NULL } }; @@ -209,6 +213,7 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) ie_blob_t *input_blob = NULL; #endif +memset(&input, 0, sizeof(input)); lltask = ff_queue_peek_front(ov_model->lltask_queue); av_assert0(lltask); task = lltask->task; @@ -274,6 +279,9 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) // all models in openvino open model zoo use BGR as input, // change to be an option when necessary. input.order = DCO_BGR; +// We use preprocess_steps to scale input data, so disable scale and mean here. +input.scale = 1; +input.mean = 0; for (int i = 0; i < ctx->options.batch_size; ++i) { lltask = ff_queue_pop_front(ov_model->lltask_queue); @@ -343,6 +351,7 @@ static void infer_completion_callback(void *args) ov_shape_t output_shape = {0}; ov_element_type_e precision; +memset(&output, 0, sizeof(output)); status = ov_infer_request_get_output_tensor_by_index(request->infer_request, 0, &output_tensor); if (status != OK) { av_log(ctx, AV_LOG_ERROR, @@ -409,6 +418,8 @@ static void infer_completion_callback(void *args) #endif output.dt = precision_to_datatype(precision); output.layout = ctx->options.layout; +output.scale= ctx->options.scale; +output.mean = ctx->options.mean; av_assert0(request->lltask_count >= 1); for (int i = 0; i < request->lltask_count; ++i) { @@ -542,7 +553,9 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * ie_config_t config = {NULL, NULL, NULL}; char *all_dev_names = NULL; #endif - +// We scale pixel by default when do frame processing. +if (fabsf(ctx->options.scale) < 1e-6f) +ctx->options.scale = ov_model->model->func_type == DFT_PROCESS_FRAME ? 255 : 1; // batch size if (ctx->options.batch_size <= 0) { ctx->options.batch_size = 1; @@ -609,15 +622,37 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * goto err; } +status = ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8); if (ov_model->model->func_type != DFT_PROCESS_FRAME) -//set precision only for detect and classify -status = ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8); -status |= ov_preprocess_output_set_element_type(output_tensor_info, F32); +status |= ov_preprocess_output_set_element_type(output_tensor_info, F32); +else if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 1e-6f) +status |= ov_preprocess_output_set_element_type(output_tensor_info, F32); +else +status |= ov_preprocess_output_set_element_type(output_tensor_info, U8); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to set input/output element type\n"); ret = ov2_map_error(status, NULL); goto err; } +// set preprocess steps. +if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 1e-6f) { +ov_preprocess_preprocess_steps_t* input_process_steps = NULL; +status = ov_preprocess_input_info_get_preprocess_steps(ov_model->input_info, &input_process_steps); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get preprocess steps\n"); +ret = ov2_map_error(st
[FFmpeg-devel] [PATCH 1/3] libavfilter/dnn: add layout option to openvino backend
From: Wenbin Chen Dnn models have different input layout (NCHW or NHWC), so a "layout" option is added Use openvino's API to do layout conversion for input data. Use swscale to do layout conversion for output data as openvino doesn't have similiar C API for output. Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_openvino.c | 47 +++- libavfilter/dnn/dnn_io_proc.c | 151 ++--- libavfilter/dnn_interface.h| 7 ++ 3 files changed, 185 insertions(+), 20 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 4922833b07..3ba5f5331a 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -45,6 +45,7 @@ typedef struct OVOptions{ uint8_t async; int batch_size; int input_resizable; +DNNLayout layout; } OVOptions; typedef struct OVContext { @@ -100,6 +101,10 @@ static const AVOption dnn_openvino_options[] = { DNN_BACKEND_COMMON_OPTIONS { "batch_size", "batch size per request", OFFSET(options.batch_size), AV_OPT_TYPE_INT,{ .i64 = 1 }, 1, 1000, FLAGS}, { "input_resizable", "can input be resizable or not", OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, +{ "layout", "input layout of model", OFFSET(options.layout), AV_OPT_TYPE_INT, { .i64 = DL_NONE}, DL_NONE, DL_NHWC, FLAGS, "layout" }, +{ "none", "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, FLAGS, "layout"}, +{ "nchw", "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, FLAGS, "layout"}, +{ "nhwc", "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, FLAGS, "layout"}, { NULL } }; @@ -231,9 +236,9 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) avpriv_report_missing_feature(ctx, "Do not support dynamic model."); return AVERROR(ENOSYS); } -input.height = dims[2]; -input.width = dims[3]; -input.channels = dims[1]; +input.height = dims[1]; +input.width = dims[2]; +input.channels = dims[3]; input.dt = precision_to_datatype(precision); input.data = av_malloc(input.height * input.width * input.channels * get_datatype_size(input.dt)); if (!input.data) @@ -403,6 +408,7 @@ static void infer_completion_callback(void *args) av_assert0(request->lltask_count <= dims.dims[0]); #endif output.dt = precision_to_datatype(precision); +output.layout = ctx->options.layout; av_assert0(request->lltask_count >= 1); for (int i = 0; i < request->lltask_count; ++i) { @@ -521,11 +527,14 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * OVContext *ctx = &ov_model->ctx; #if HAVE_OPENVINO2 ov_status_e status; -ov_preprocess_input_tensor_info_t* input_tensor_info; -ov_preprocess_output_tensor_info_t* output_tensor_info; +ov_preprocess_input_tensor_info_t* input_tensor_info = NULL; +ov_preprocess_output_tensor_info_t* output_tensor_info = NULL; +ov_preprocess_input_model_info_t* input_model_info = NULL; ov_model_t *tmp_ov_model; ov_layout_t* NHWC_layout = NULL; +ov_layout_t* NCHW_layout = NULL; const char* NHWC_desc = "NHWC"; +const char* NCHW_desc = "NCHW"; const char* device = ctx->options.device_type; #else IEStatusCode status; @@ -570,6 +579,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * //set input layout status = ov_layout_create(NHWC_desc, &NHWC_layout); +status |= ov_layout_create(NCHW_desc, &NCHW_layout); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to create layout for input.\n"); ret = ov2_map_error(status, NULL); @@ -583,6 +593,22 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * goto err; } +status = ov_preprocess_input_info_get_model_info(ov_model->input_info, &input_model_info); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get input model info\n"); +ret = ov2_map_error(status, NULL); +goto err; +} +if (ctx->options.layout == DL_NCHW) +status = ov_preprocess_input_model_info_set_layout(input_model_info, NCHW_layout); +else if (ctx->options.layout == DL_NHWC) +status = ov_preprocess_input_model_info_set_layout(input_model_info, NHWC_layout); +if (status != OK) { +av_log(ctx, AV_LOG_ERROR, "Failed to get set input model layout\n"); +ret = ov2_map_error(status, NULL); +goto err; +} + if (ov_model->model->func_type != DFT_PROCESS_FRAME) //set precision only for detect and classify status = ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8); @@ -618,6 +644,9 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * ret = ov2_map_error(sta
[FFmpeg-devel] [PATCH] libavfilter/vf_vpp_qsv: Fix a timestamp bug when framerate is not set
From: Wenbin Chen If user doesn't set framerate when he creates a filter, the filter uses default framerate {0, 1}. This causes error when setting timebase to 1/framerate. Now change it to pass inlink->time_base to outlink when framerate is not set. This patch fixes ticket: #10476 #10468 Signed-off-by: Wenbin Chen --- libavfilter/vf_vpp_qsv.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c index 92ef0f1d89..c07b45fedb 100644 --- a/libavfilter/vf_vpp_qsv.c +++ b/libavfilter/vf_vpp_qsv.c @@ -536,7 +536,10 @@ static int config_output(AVFilterLink *outlink) outlink->w = vpp->out_width; outlink->h = vpp->out_height; outlink->frame_rate = vpp->framerate; -outlink->time_base = av_inv_q(vpp->framerate); +if (vpp->framerate.num == 0 || vpp->framerate.den == 0) +outlink->time_base = inlink->time_base; +else +outlink->time_base = av_inv_q(vpp->framerate); param.filter_frame = NULL; param.set_frame_ext_params = vpp_set_frame_ext_params; -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2] lavfi/dnn: Add OpenVINO API 2.0 support
From: Wenbin Chen OpenVINO API 2.0 was released in March 2022, which introduced new features. This commit implements current OpenVINO features with new 2.0 APIs. And will add other features in API 2.0. Please add installation path, which include openvino.pc, to PKG_CONFIG_PATH mannually for new OpenVINO libs config. Signed-off-by: Ting Fu Signed-off-by: Wenbin Chen --- configure | 6 +- libavfilter/dnn/dnn_backend_openvino.c | 515 +++-- 2 files changed, 487 insertions(+), 34 deletions(-) diff --git a/configure b/configure index 99388e7664..90caa26107 100755 --- a/configure +++ b/configure @@ -2459,6 +2459,7 @@ HAVE_LIST=" texi2html xmllint zlib_gzip +openvino2 " # options emitted with CONFIG_ prefix but not available on the command line @@ -6767,8 +6768,9 @@ enabled libopenh264 && require_pkg_config libopenh264 openh264 wels/codec_ enabled libopenjpeg && { check_pkg_config libopenjpeg "libopenjp2 >= 2.1.0" openjpeg.h opj_version || { require_pkg_config libopenjpeg "libopenjp2 >= 2.1.0" openjpeg.h opj_version -DOPJ_STATIC && add_cppflags -DOPJ_STATIC; } } enabled libopenmpt&& require_pkg_config libopenmpt "libopenmpt >= 0.2.6557" libopenmpt/libopenmpt.h openmpt_module_create -lstdc++ && append libopenmpt_extralibs "-lstdc++" -enabled libopenvino && { check_pkg_config libopenvino openvino c_api/ie_c_api.h ie_c_api_version || - require libopenvino c_api/ie_c_api.h ie_c_api_version -linference_engine_c_api; } +enabled libopenvino && { { check_pkg_config libopenvino openvino openvino/c/openvino.h ov_core_create && enable openvino2; } || +{ check_pkg_config libopenvino openvino c_api/ie_c_api.h ie_c_api_version || + require libopenvino c_api/ie_c_api.h ie_c_api_version -linference_engine_c_api; } } enabled libopus && { enabled libopus_decoder && { require_pkg_config libopus opus opus_multistream.h opus_multistream_decoder_create diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 46cbe8270e..4922833b07 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -32,7 +32,11 @@ #include "libavutil/detection_bbox.h" #include "../internal.h" #include "safe_queue.h" +#if HAVE_OPENVINO2 +#include +#else #include +#endif #include "dnn_backend_common.h" typedef struct OVOptions{ @@ -51,9 +55,20 @@ typedef struct OVContext { typedef struct OVModel{ OVContext ctx; DNNModel *model; +#if HAVE_OPENVINO2 +ov_core_t *core; +ov_model_t *ov_model; +ov_compiled_model_t *compiled_model; +ov_output_const_port_t* input_port; +ov_preprocess_input_info_t* input_info; +ov_output_const_port_t* output_port; +ov_preprocess_output_info_t* output_info; +ov_preprocess_prepostprocessor_t* preprocess; +#else ie_core_t *core; ie_network_t *network; ie_executable_network_t *exe_network; +#endif SafeQueue *request_queue; // holds OVRequestItem Queue *task_queue; // holds TaskItem Queue *lltask_queue; // holds LastLevelTaskItem @@ -63,10 +78,15 @@ typedef struct OVModel{ // one request for one call to openvino typedef struct OVRequestItem { -ie_infer_request_t *infer_request; LastLevelTaskItem **lltasks; uint32_t lltask_count; +#if HAVE_OPENVINO2 +ov_infer_request_t *infer_request; +ov_callback_t callback; +#else ie_complete_call_back_t callback; +ie_infer_request_t *infer_request; +#endif } OVRequestItem; #define APPEND_STRING(generated_string, iterate_string) \ @@ -85,11 +105,61 @@ static const AVOption dnn_openvino_options[] = { AVFILTER_DEFINE_CLASS(dnn_openvino); +#if HAVE_OPENVINO2 +static const struct { +ov_status_e status; +int av_err; +const char *desc; +} ov2_errors[] = { +{ OK, 0, "success"}, +{ GENERAL_ERROR, AVERROR_EXTERNAL, "general error" }, +{ NOT_IMPLEMENTED,AVERROR(ENOSYS),"not implemented"}, +{ NETWORK_NOT_LOADED, AVERROR_EXTERNAL, "network not loaded" }, +{ PARAMETER_MISMATCH, AVERROR(EINVAL),"parameter mismatch" }, +{ NOT_FOUND, AVERROR_EXTERNAL, "not found" }, +{ OUT_OF_BOUNDS, AVERROR(EOVERFLOW), "out of bounds" }, +{ UNEXPECTED, AVERROR_EXTERNAL, "unexpected" }, +{ REQUEST_BUSY, AVERROR(EBUSY), "request busy" }, +{ RESULT_NOT_READY, AVERROR(EBUSY), "result not ready" }, +{ NOT_ALLOCATED, AVERROR(ENODATA), "not allocated" }, +{ INFER_NOT_STARTED, AVERROR_EXTERNAL,
[FFmpeg-devel] [PATCH] libavfilter/vf_dnn_detect: bbox index may bigger than bbox number
From: Wenbin Chen Fix a bug that queried bbox index may bigger than bbox's total number. Signed-off-by: Wenbin Chen --- libavfilter/vf_dnn_detect.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index 06efce02a6..6ef04e0958 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -106,12 +106,11 @@ static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, AVFilterCont float x1 = detections[i * detect_size + 5]; float y1 = detections[i * detect_size + 6]; -bbox = av_get_detection_bbox(header, i); - if (conf < conf_threshold) { continue; } +bbox = av_get_detection_bbox(header, header->nb_bboxes - nb_bboxes); bbox->x = (int)(x0 * frame->width); bbox->w = (int)(x1 * frame->width) - bbox->x; bbox->y = (int)(y0 * frame->height); -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] doc/encoders: Add av1 to qsv encoder's summary
From: Wenbin Chen Signed-off-by: Wenbin Chen --- doc/encoders.texi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/encoders.texi b/doc/encoders.texi index b02737b9df..d6dddc2bd5 100644 --- a/doc/encoders.texi +++ b/doc/encoders.texi @@ -3188,8 +3188,8 @@ recommended value) and do not set a size constraint. @section QSV Encoders -The family of Intel QuickSync Video encoders (MPEG-2, H.264, HEVC, JPEG/MJPEG -and VP9) +The family of Intel QuickSync Video encoders (MPEG-2, H.264, HEVC, JPEG/MJPEG, +VP9, AV1) @subsection Ratecontrol Method The ratecontrol method is selected as follows: -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] doc/examples/qsv_transcode: Fix a bug when use more than one parameter set
From: Wenbin Chen Signed-off-by: Wenbin Chen --- doc/examples/qsv_transcode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/examples/qsv_transcode.c b/doc/examples/qsv_transcode.c index 7ea3ef5674..48128b200c 100644 --- a/doc/examples/qsv_transcode.c +++ b/doc/examples/qsv_transcode.c @@ -88,7 +88,7 @@ static int dynamic_set_parameter(AVCodecContext *avctx) if (current_setting_number < setting_number && frame_number == dynamic_setting[current_setting_number].frame_number) { AVDictionaryEntry *e = NULL; -ret = str_to_dict(dynamic_setting[current_setting_number].optstr, &opts); +ret = str_to_dict(dynamic_setting[current_setting_number++].optstr, &opts); if (ret < 0) { fprintf(stderr, "The dynamic parameter is wrong\n"); goto fail; -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] libavcodec/qsvenc: Add dynamic setting support of low_delay_brc to av1_qsv
From: Wenbin Chen Signed-off-by: Wenbin Chen --- doc/encoders.texi | 2 +- libavcodec/qsvenc.c | 5 - 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/encoders.texi b/doc/encoders.texi index b02737b9df..e9b34010ed 100644 --- a/doc/encoders.texi +++ b/doc/encoders.texi @@ -3344,7 +3344,7 @@ Supported in h264_qsv. Change these value to reset qsv codec's max/min qp configuration. @item @var{low_delay_brc} -Supported in h264_qsv and hevc_qsv. +Supported in h264_qsv, hevc_qsv and av1_qsv. Change this value to reset qsv codec's low_delay_brc configuration. @item @var{framerate} diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index 3607859cb8..c975302b4f 100644 --- a/libavcodec/qsvenc.c +++ b/libavcodec/qsvenc.c @@ -1127,6 +1127,7 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q) } else if (avctx->codec_id == AV_CODEC_ID_AV1) { if (q->low_delay_brc >= 0) q->extco3.LowDelayBRC = q->low_delay_brc ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF; +q->old_low_delay_brc = q->low_delay_brc; } if (avctx->codec_id == AV_CODEC_ID_HEVC) { @@ -2213,7 +2214,9 @@ static int update_low_delay_brc(AVCodecContext *avctx, QSVEncContext *q) { int updated = 0; -if (avctx->codec_id != AV_CODEC_ID_H264 && avctx->codec_id != AV_CODEC_ID_HEVC) +if (avctx->codec_id != AV_CODEC_ID_H264 && +avctx->codec_id != AV_CODEC_ID_HEVC && +avctx->codec_id != AV_CODEC_ID_AV1) return 0; UPDATE_PARAM(q->old_low_delay_brc, q->low_delay_brc); -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2] libavcodec/qsvenc: Flush cached frames before reset encoder
From: Wenbin Chen According to https://github.com/Intel-Media-SDK/MediaSDK/blob/master/doc/mediasdk-man.md#configuration-change. Before calling MFXVideoENCODE_Reset, The application needs to retrieve any cached frames in the SDK encoder. A loop is added before MFXVideoENCODE_Reset to retrieve cached frames and add them to async_fifo, so that dynamic configuration works when async_depth > 1. Signed-off-by: Wenbin Chen --- libavcodec/qsvenc.c | 122 1 file changed, 66 insertions(+), 56 deletions(-) diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index 2f0e94a914..3951f40e7b 100644 --- a/libavcodec/qsvenc.c +++ b/libavcodec/qsvenc.c @@ -1600,7 +1600,7 @@ int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q) q->param.AsyncDepth = q->async_depth; -q->async_fifo = av_fifo_alloc2(q->async_depth, sizeof(QSVPacket), 0); +q->async_fifo = av_fifo_alloc2(q->async_depth, sizeof(QSVPacket), AV_FIFO_FLAG_AUTO_GROW); if (!q->async_fifo) return AVERROR(ENOMEM); @@ -2296,58 +2296,6 @@ static int update_pic_timing_sei(AVCodecContext *avctx, QSVEncContext *q) return updated; } -static int update_parameters(AVCodecContext *avctx, QSVEncContext *q, - const AVFrame *frame) -{ -int needReset = 0, ret = 0; - -if (!frame || avctx->codec_id == AV_CODEC_ID_MJPEG) -return 0; - -needReset = update_qp(avctx, q); -needReset |= update_max_frame_size(avctx, q); -needReset |= update_gop_size(avctx, q); -needReset |= update_rir(avctx, q); -needReset |= update_low_delay_brc(avctx, q); -needReset |= update_frame_rate(avctx, q); -needReset |= update_bitrate(avctx, q); -needReset |= update_pic_timing_sei(avctx, q); -ret = update_min_max_qp(avctx, q); -if (ret < 0) -return ret; -needReset |= ret; -if (!needReset) -return 0; - -if (avctx->hwaccel_context) { -AVQSVContext *qsv = avctx->hwaccel_context; -int i, j; -q->param.ExtParam = q->extparam; -for (i = 0; i < qsv->nb_ext_buffers; i++) -q->param.ExtParam[i] = qsv->ext_buffers[i]; -q->param.NumExtParam = qsv->nb_ext_buffers; - -for (i = 0; i < q->nb_extparam_internal; i++) { -for (j = 0; j < qsv->nb_ext_buffers; j++) { -if (qsv->ext_buffers[j]->BufferId == q->extparam_internal[i]->BufferId) -break; -} -if (j < qsv->nb_ext_buffers) -continue; -q->param.ExtParam[q->param.NumExtParam++] = q->extparam_internal[i]; -} -} else { -q->param.ExtParam= q->extparam_internal; -q->param.NumExtParam = q->nb_extparam_internal; -} -av_log(avctx, AV_LOG_DEBUG, "Parameter change, call msdk reset.\n"); -ret = MFXVideoENCODE_Reset(q->session, &q->param); -if (ret < 0) -return ff_qsv_print_error(avctx, ret, "Error during resetting"); - -return 0; -} - static int encode_frame(AVCodecContext *avctx, QSVEncContext *q, const AVFrame *frame) { @@ -2438,7 +2386,7 @@ static int encode_frame(AVCodecContext *avctx, QSVEncContext *q, if (ret < 0) { ret = (ret == MFX_ERR_MORE_DATA) ? - 0 : ff_qsv_print_error(avctx, ret, "Error during encoding"); + AVERROR(EAGAIN) : ff_qsv_print_error(avctx, ret, "Error during encoding"); goto free; } @@ -2448,7 +2396,9 @@ static int encode_frame(AVCodecContext *avctx, QSVEncContext *q, ret = 0; if (*pkt.sync) { -av_fifo_write(q->async_fifo, &pkt, 1); +ret = av_fifo_write(q->async_fifo, &pkt, 1); +if (ret < 0) +goto free; } else { free: av_freep(&pkt.sync); @@ -2466,6 +2416,66 @@ nomem: goto free; } +static int update_parameters(AVCodecContext *avctx, QSVEncContext *q, + const AVFrame *frame) +{ +int needReset = 0, ret = 0; + +if (!frame || avctx->codec_id == AV_CODEC_ID_MJPEG) +return 0; + +needReset = update_qp(avctx, q); +needReset |= update_max_frame_size(avctx, q); +needReset |= update_gop_size(avctx, q); +needReset |= update_rir(avctx, q); +needReset |= update_low_delay_brc(avctx, q); +needReset |= update_frame_rate(avctx, q); +needReset |= update_bitrate(avctx, q); +needReset |= update_pic_timing_sei(avctx, q); +ret = update_min_max_qp(avctx, q); +if (ret < 0) +return ret; +needReset |= ret; +if (!needReset) +return 0; + +if (avctx->hwaccel_context) { +AVQSVContext *qsv = avctx->hwaccel_context; +int i, j; +q->param.ExtParam = q->extparam; +for (i = 0; i < qsv->nb_ext_buffers; i++) +q->param.ExtParam[i] = qsv->ext_buffers[i]; +q->param.NumExtParam = qsv->nb_ext_buffers; + +for (i = 0; i < q->nb_extparam_internal; i++) { +
[FFmpeg-devel] [PATCH v2] libavcodec/qsvenc: Do not pass RGB solorspace to VPL/MSDK
From: Wenbin Chen When encode RGB frame, Intel driver convert RGB to YUV, so we cannot set RGB colorspace to VPL/MSDK. Signed-off-by: Wenbin Chen --- libavcodec/qsvenc.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index 2f0e94a914..d3f7532fc0 100644 --- a/libavcodec/qsvenc.c +++ b/libavcodec/qsvenc.c @@ -1185,7 +1185,12 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q) q->extvsi.ColourDescriptionPresent = 1; q->extvsi.ColourPrimaries = avctx->color_primaries; q->extvsi.TransferCharacteristics = avctx->color_trc; -q->extvsi.MatrixCoefficients = avctx->colorspace; +if (avctx->colorspace == AVCOL_SPC_RGB) +// RGB will be converted to YUV, so RGB colorspace is not supported +q->extvsi.MatrixCoefficients = AVCOL_SPC_UNSPECIFIED; +else +q->extvsi.MatrixCoefficients = avctx->colorspace; + } if ((avctx->codec_id != AV_CODEC_ID_VP9) && (q->extvsi.VideoFullRange || q->extvsi.ColourDescriptionPresent)) { -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] libavcodec/qsvenc: Do not pass RGB solorspace to VPL/MSDK
From: Wenbin Chen When encode RGB frame, Intel driver convert RGB to YUV, so we cannot set RGB colorspace to VPL/MSDK. Signed-off-by: Wenbin Chen --- libavcodec/qsvenc.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index 2f0e94a914..944a76f4f1 100644 --- a/libavcodec/qsvenc.c +++ b/libavcodec/qsvenc.c @@ -1185,7 +1185,12 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q) q->extvsi.ColourDescriptionPresent = 1; q->extvsi.ColourPrimaries = avctx->color_primaries; q->extvsi.TransferCharacteristics = avctx->color_trc; -q->extvsi.MatrixCoefficients = avctx->colorspace; +if (avctx->colorspace == AVCOL_SPC_RGB) +//YUV will be converted to RGB, so RGB colorspace is not supported +q->extvsi.MatrixCoefficients = AVCOL_SPC_UNSPECIFIED; +else +q->extvsi.MatrixCoefficients = avctx->colorspace; + } if ((avctx->codec_id != AV_CODEC_ID_VP9) && (q->extvsi.VideoFullRange || q->extvsi.ColourDescriptionPresent)) { -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] libavcodec/qsvenc: Flush cached frames before reset encoder
From: Wenbin Chen According to https://github.com/Intel-Media-SDK/MediaSDK/blob/master/doc/mediasdk-man.md#configuration-change. Before calling MFXVideoENCODE_Reset, The application needs to retrieve any cached frames in the SDK encoder. A loop is added before MFXVideoENCODE_Reset to retrieve cached frames and add them to async_fifo, so that dynamic configuration works when async_depth > 1. Signed-off-by: Wenbin Chen --- libavcodec/qsvenc.c | 118 +++- 1 file changed, 63 insertions(+), 55 deletions(-) diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index 2f0e94a914..f3b488dec8 100644 --- a/libavcodec/qsvenc.c +++ b/libavcodec/qsvenc.c @@ -1600,7 +1600,7 @@ int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q) q->param.AsyncDepth = q->async_depth; -q->async_fifo = av_fifo_alloc2(q->async_depth, sizeof(QSVPacket), 0); +q->async_fifo = av_fifo_alloc2(q->async_depth, sizeof(QSVPacket), AV_FIFO_FLAG_AUTO_GROW); if (!q->async_fifo) return AVERROR(ENOMEM); @@ -2296,58 +2296,6 @@ static int update_pic_timing_sei(AVCodecContext *avctx, QSVEncContext *q) return updated; } -static int update_parameters(AVCodecContext *avctx, QSVEncContext *q, - const AVFrame *frame) -{ -int needReset = 0, ret = 0; - -if (!frame || avctx->codec_id == AV_CODEC_ID_MJPEG) -return 0; - -needReset = update_qp(avctx, q); -needReset |= update_max_frame_size(avctx, q); -needReset |= update_gop_size(avctx, q); -needReset |= update_rir(avctx, q); -needReset |= update_low_delay_brc(avctx, q); -needReset |= update_frame_rate(avctx, q); -needReset |= update_bitrate(avctx, q); -needReset |= update_pic_timing_sei(avctx, q); -ret = update_min_max_qp(avctx, q); -if (ret < 0) -return ret; -needReset |= ret; -if (!needReset) -return 0; - -if (avctx->hwaccel_context) { -AVQSVContext *qsv = avctx->hwaccel_context; -int i, j; -q->param.ExtParam = q->extparam; -for (i = 0; i < qsv->nb_ext_buffers; i++) -q->param.ExtParam[i] = qsv->ext_buffers[i]; -q->param.NumExtParam = qsv->nb_ext_buffers; - -for (i = 0; i < q->nb_extparam_internal; i++) { -for (j = 0; j < qsv->nb_ext_buffers; j++) { -if (qsv->ext_buffers[j]->BufferId == q->extparam_internal[i]->BufferId) -break; -} -if (j < qsv->nb_ext_buffers) -continue; -q->param.ExtParam[q->param.NumExtParam++] = q->extparam_internal[i]; -} -} else { -q->param.ExtParam= q->extparam_internal; -q->param.NumExtParam = q->nb_extparam_internal; -} -av_log(avctx, AV_LOG_DEBUG, "Parameter change, call msdk reset.\n"); -ret = MFXVideoENCODE_Reset(q->session, &q->param); -if (ret < 0) -return ff_qsv_print_error(avctx, ret, "Error during resetting"); - -return 0; -} - static int encode_frame(AVCodecContext *avctx, QSVEncContext *q, const AVFrame *frame) { @@ -2438,7 +2386,7 @@ static int encode_frame(AVCodecContext *avctx, QSVEncContext *q, if (ret < 0) { ret = (ret == MFX_ERR_MORE_DATA) ? - 0 : ff_qsv_print_error(avctx, ret, "Error during encoding"); + AVERROR(EAGAIN) : ff_qsv_print_error(avctx, ret, "Error during encoding"); goto free; } @@ -2466,6 +2414,66 @@ nomem: goto free; } +static int update_parameters(AVCodecContext *avctx, QSVEncContext *q, + const AVFrame *frame) +{ +int needReset = 0, ret = 0; + +if (!frame || avctx->codec_id == AV_CODEC_ID_MJPEG) +return 0; + +needReset = update_qp(avctx, q); +needReset |= update_max_frame_size(avctx, q); +needReset |= update_gop_size(avctx, q); +needReset |= update_rir(avctx, q); +needReset |= update_low_delay_brc(avctx, q); +needReset |= update_frame_rate(avctx, q); +needReset |= update_bitrate(avctx, q); +needReset |= update_pic_timing_sei(avctx, q); +ret = update_min_max_qp(avctx, q); +if (ret < 0) +return ret; +needReset |= ret; +if (!needReset) +return 0; + +if (avctx->hwaccel_context) { +AVQSVContext *qsv = avctx->hwaccel_context; +int i, j; +q->param.ExtParam = q->extparam; +for (i = 0; i < qsv->nb_ext_buffers; i++) +q->param.ExtParam[i] = qsv->ext_buffers[i]; +q->param.NumExtParam = qsv->nb_ext_buffers; + +for (i = 0; i < q->nb_extparam_internal; i++) { +for (j = 0; j < qsv->nb_ext_buffers; j++) { +if (qsv->ext_buffers[j]->BufferId == q->extparam_internal[i]->BufferId) +break; +} +if (j < qsv->nb_ext_buffers) +continue; +q->param.ExtParam[q->param.NumExtParam++] = q->extparam_in
[FFmpeg-devel] [PATCH] libavfilter/qsvvpp: check the return value
From: Wenbin Chen Signed-off-by: Wenbin Chen --- libavfilter/qsvvpp.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c index e181e7b584..54e7284234 100644 --- a/libavfilter/qsvvpp.c +++ b/libavfilter/qsvvpp.c @@ -441,7 +441,10 @@ static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *p return NULL; } -av_frame_copy_props(qsv_frame->frame, picref); +if (av_frame_copy_props(qsv_frame->frame, picref) < 0) { +av_frame_free(&qsv_frame->frame); +return NULL; +} } else qsv_frame->frame = av_frame_clone(picref); -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] libavcodec/qsvenc: Enable 444 encoding for RGB input
From: Wenbin Chen MSDK/VPL uses 420 chroma format as default to encode RGB, and this is not a proper usage. Now enable 444 encoding for RGB input by default. RGB is encoded using 444 chroma format when user doesn't specify the profile or uses rext profile, otherwise, 420 is used. Signed-off-by: Wenbin Chen --- libavcodec/qsvenc.c | 4 1 file changed, 4 insertions(+) diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index 514a1e8148..150fc9c729 100644 --- a/libavcodec/qsvenc.c +++ b/libavcodec/qsvenc.c @@ -1088,6 +1088,10 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q) q->extco3.MaxFrameSizeI = q->max_frame_size_i; if (q->max_frame_size_p >= 0) q->extco3.MaxFrameSizeP = q->max_frame_size_p; +if (sw_format == AV_PIX_FMT_BGRA && +(q->profile == MFX_PROFILE_HEVC_REXT || +q->profile == MFX_PROFILE_UNKNOWN)) +q->extco3.TargetChromaFormatPlus1 = MFX_CHROMAFORMAT_YUV444 + 1; q->extco3.ScenarioInfo = q->scenario; } -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/2] libavcodec/qsvenc_av1: Add max_frame_size support to av1_qsv encoder
From: Wenbin Chen Signed-off-by: Wenbin Chen --- doc/encoders.texi | 5 + libavcodec/qsvenc.c | 3 +++ libavcodec/qsvenc_av1.c | 1 + 3 files changed, 9 insertions(+) diff --git a/doc/encoders.texi b/doc/encoders.texi index 543b5e26a9..727f12a59d 100644 --- a/doc/encoders.texi +++ b/doc/encoders.texi @@ -3855,6 +3855,11 @@ Depth of look ahead in number frames, available when extbrc option is enabled. Setting this flag turns on or off LowDelayBRC feautre in qsv plugin, which provides more accurate bitrate control to minimize the variance of bitstream size frame by frame. Value: -1-default 0-off 1-on + +@item max_frame_size +Set the allowed max size in bytes for each frame. If the frame size exceeds +the limitation, encoder will adjust the QP value to control the frame size. +Invalid in CQP rate control mode. @end table @section snow diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index f5c6a164bb..93f1862a4b 100644 --- a/libavcodec/qsvenc.c +++ b/libavcodec/qsvenc.c @@ -538,6 +538,7 @@ static void dump_video_av1_param(AVCodecContext *avctx, QSVEncContext *q, av_log(avctx, AV_LOG_VERBOSE, "WriteIVFHeaders: %s \n", print_threestate(av1_bs_param->WriteIVFHeaders)); av_log(avctx, AV_LOG_VERBOSE, "LowDelayBRC: %s\n", print_threestate(co3->LowDelayBRC)); +av_log(avctx, AV_LOG_VERBOSE, "MaxFrameSize: %d;\n", co2->MaxFrameSize); } #endif @@ -1034,6 +1035,8 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q) q->extco2.AdaptiveI = q->adaptive_i ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF; if (q->adaptive_b >= 0) q->extco2.AdaptiveB = q->adaptive_b ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF; +if (q->max_frame_size >= 0) +q->extco2.MaxFrameSize = q->max_frame_size; q->extco2.Header.BufferId = MFX_EXTBUFF_CODING_OPTION2; q->extco2.Header.BufferSz = sizeof(q->extco2); diff --git a/libavcodec/qsvenc_av1.c b/libavcodec/qsvenc_av1.c index 1e7801fefe..c697845d7b 100644 --- a/libavcodec/qsvenc_av1.c +++ b/libavcodec/qsvenc_av1.c @@ -111,6 +111,7 @@ static const AVOption options[] = { QSV_OPTION_ADAPTIVE_B QSV_OPTION_EXTBRC QSV_OPTION_LOW_DELAY_BRC +QSV_OPTION_MAX_FRAME_SIZE { "profile", NULL, OFFSET(qsv.profile), AV_OPT_TYPE_INT, { .i64 = MFX_PROFILE_UNKNOWN }, 0, INT_MAX, VE, "profile" }, { "unknown" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_UNKNOWN }, INT_MIN, INT_MAX, VE, "profile" }, { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_AV1_MAIN }, INT_MIN, INT_MAX, VE, "profile" }, -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] libavcodec/qsvenc_av1: Add low_delay_brc support to av1_qsv encoder
From: Wenbin Chen Signed-off-by: Wenbin Chen --- doc/encoders.texi | 5 + libavcodec/qsvenc.c | 4 libavcodec/qsvenc_av1.c | 1 + 3 files changed, 10 insertions(+) diff --git a/doc/encoders.texi b/doc/encoders.texi index b8051cda3f..543b5e26a9 100644 --- a/doc/encoders.texi +++ b/doc/encoders.texi @@ -3850,6 +3850,11 @@ Extended bitrate control. @item @var{look_ahead_depth} Depth of look ahead in number frames, available when extbrc option is enabled. + +@item @var{low_delay_brc} +Setting this flag turns on or off LowDelayBRC feautre in qsv plugin, which provides +more accurate bitrate control to minimize the variance of bitstream size frame +by frame. Value: -1-default 0-off 1-on @end table @section snow diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index 514a1e8148..f5c6a164bb 100644 --- a/libavcodec/qsvenc.c +++ b/libavcodec/qsvenc.c @@ -537,6 +537,7 @@ static void dump_video_av1_param(AVCodecContext *avctx, QSVEncContext *q, av_log(avctx, AV_LOG_VERBOSE, "WriteIVFHeaders: %s \n", print_threestate(av1_bs_param->WriteIVFHeaders)); +av_log(avctx, AV_LOG_VERBOSE, "LowDelayBRC: %s\n", print_threestate(co3->LowDelayBRC)); } #endif @@ -1090,6 +1091,9 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q) q->extco3.MaxFrameSizeP = q->max_frame_size_p; q->extco3.ScenarioInfo = q->scenario; +} else if (avctx->codec_id == AV_CODEC_ID_AV1) { +if (q->low_delay_brc >= 0) +q->extco3.LowDelayBRC = q->low_delay_brc ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF; } if (avctx->codec_id == AV_CODEC_ID_HEVC) { diff --git a/libavcodec/qsvenc_av1.c b/libavcodec/qsvenc_av1.c index bb9ad16927..1e7801fefe 100644 --- a/libavcodec/qsvenc_av1.c +++ b/libavcodec/qsvenc_av1.c @@ -110,6 +110,7 @@ static const AVOption options[] = { QSV_OPTION_ADAPTIVE_I QSV_OPTION_ADAPTIVE_B QSV_OPTION_EXTBRC +QSV_OPTION_LOW_DELAY_BRC { "profile", NULL, OFFSET(qsv.profile), AV_OPT_TYPE_INT, { .i64 = MFX_PROFILE_UNKNOWN }, 0, INT_MAX, VE, "profile" }, { "unknown" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_UNKNOWN }, INT_MIN, INT_MAX, VE, "profile" }, { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_AV1_MAIN }, INT_MIN, INT_MAX, VE, "profile" }, -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v3] libavfilter/qsvvpp: Change the alignment to meet the requirement of YUV420P format
From: Wenbin Chen When process yuv420 frames, FFmpeg uses same alignment on Y/U/V planes. VPL and MSDK use Y plane's pitch / 2 as U/V planes's pitch, which makes U/V planes 16-bytes aligned. We need to set a separate alignment to meet runtime's behaviour. Now alignment is changed to 16 so that the linesizes of U/V planes meet the requirment of VPL/MSDK. Add get_buffer.video callback to qsv filters to change the default get_buffer behaviour. Now the commandline works fine: ffmpeg -f rawvideo -pix_fmt yuv420p -s:v 3082x1884 \ -i ./3082x1884.yuv -vf 'vpp_qsv=w=2466:h=1508' -f rawvideo \ -pix_fmt yuv420p 2466_1508.yuv Signed-off-by: Wenbin Chen --- libavfilter/qsvvpp.c | 13 + libavfilter/qsvvpp.h | 1 + libavfilter/vf_deinterlace_qsv.c | 1 + libavfilter/vf_overlay_qsv.c | 2 ++ libavfilter/vf_scale_qsv.c | 1 + libavfilter/vf_vpp_qsv.c | 1 + 6 files changed, 19 insertions(+) diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c index 8428ee89ab..d5cfeab402 100644 --- a/libavfilter/qsvvpp.c +++ b/libavfilter/qsvvpp.c @@ -1003,3 +1003,16 @@ int ff_qsvvpp_create_mfx_session(void *ctx, } #endif + +AVFrame *ff_qsvvpp_get_video_buffer(AVFilterLink *inlink, int w, int h) +{ +/* When process YUV420 frames, FFmpeg uses same alignment on Y/U/V + * planes. VPL and MSDK use Y plane's pitch / 2 as U/V planes's + * pitch, which makes U/V planes 16-bytes aligned. We need to set a + * separate alignment to meet runtime's behaviour. +*/ +return ff_default_get_video_buffer2(inlink, +FFALIGN(inlink->w, 32), +FFALIGN(inlink->h, 32), +16); +} diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h index a8cfcc565a..6f7c9bfc15 100644 --- a/libavfilter/qsvvpp.h +++ b/libavfilter/qsvvpp.h @@ -127,4 +127,5 @@ int ff_qsvvpp_print_warning(void *log_ctx, mfxStatus err, int ff_qsvvpp_create_mfx_session(void *ctx, void *loader, mfxIMPL implementation, mfxVersion *pver, mfxSession *psession); +AVFrame *ff_qsvvpp_get_video_buffer(AVFilterLink *inlink, int w, int h); #endif /* AVFILTER_QSVVPP_H */ diff --git a/libavfilter/vf_deinterlace_qsv.c b/libavfilter/vf_deinterlace_qsv.c index 98ed7283ad..6c94923f02 100644 --- a/libavfilter/vf_deinterlace_qsv.c +++ b/libavfilter/vf_deinterlace_qsv.c @@ -581,6 +581,7 @@ static const AVFilterPad qsvdeint_inputs[] = { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .filter_frame = qsvdeint_filter_frame, +.get_buffer.video = ff_qsvvpp_get_video_buffer, }, }; diff --git a/libavfilter/vf_overlay_qsv.c b/libavfilter/vf_overlay_qsv.c index d947a1faa1..1a2c1b1e96 100644 --- a/libavfilter/vf_overlay_qsv.c +++ b/libavfilter/vf_overlay_qsv.c @@ -399,11 +399,13 @@ static const AVFilterPad overlay_qsv_inputs[] = { .name = "main", .type = AVMEDIA_TYPE_VIDEO, .config_props = config_main_input, +.get_buffer.video = ff_qsvvpp_get_video_buffer, }, { .name = "overlay", .type = AVMEDIA_TYPE_VIDEO, .config_props = config_overlay_input, +.get_buffer.video = ff_qsvvpp_get_video_buffer, }, }; diff --git a/libavfilter/vf_scale_qsv.c b/libavfilter/vf_scale_qsv.c index 758e730f78..36d5f3a6ec 100644 --- a/libavfilter/vf_scale_qsv.c +++ b/libavfilter/vf_scale_qsv.c @@ -641,6 +641,7 @@ static const AVFilterPad qsvscale_inputs[] = { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .filter_frame = qsvscale_filter_frame, +.get_buffer.video = ff_qsvvpp_get_video_buffer, }, }; diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c index 4a053f9145..b26d19c3bc 100644 --- a/libavfilter/vf_vpp_qsv.c +++ b/libavfilter/vf_vpp_qsv.c @@ -634,6 +634,7 @@ static const AVFilterPad vpp_inputs[] = { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .config_props = config_input, +.get_buffer.video = ff_qsvvpp_get_video_buffer, }, }; -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2] libavfilter/qsvvpp: Use different alignment for YUV420P format
From: Wenbin Chen When process yuv420 frames, FFmpeg uses same alignment on Y/U/V planes. VPL and MSDK use Y plane's pitch / 2 as U/V planes's pitch, which makes U/V planes 16-bytes aligned. We need to set a separate alignment to meet runtime's behaviour. Now alignment is changed to 16 so that the linesizes of U/V planes meet the requirment of VPL/MSDK. Add get_buffer.video callback to qsv filters to change the default get_buffer behaviour. Now the commandline works fine: ffmpeg -f rawvideo -pix_fmt yuv420p -s:v 3082x1884 \ -i ./3082x1884.yuv -vf 'vpp_qsv=w=2466:h=1508' -f rawvideo \ -pix_fmt yuv420p 2466_1508.yuv Signed-off-by: Wenbin Chen --- libavfilter/qsvvpp.c | 13 + libavfilter/qsvvpp.h | 1 + libavfilter/vf_deinterlace_qsv.c | 1 + libavfilter/vf_overlay_qsv.c | 2 ++ libavfilter/vf_scale_qsv.c | 1 + libavfilter/vf_vpp_qsv.c | 1 + 6 files changed, 19 insertions(+) diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c index 8428ee89ab..d5cfeab402 100644 --- a/libavfilter/qsvvpp.c +++ b/libavfilter/qsvvpp.c @@ -1003,3 +1003,16 @@ int ff_qsvvpp_create_mfx_session(void *ctx, } #endif + +AVFrame *ff_qsvvpp_get_video_buffer(AVFilterLink *inlink, int w, int h) +{ +/* When process YUV420 frames, FFmpeg uses same alignment on Y/U/V + * planes. VPL and MSDK use Y plane's pitch / 2 as U/V planes's + * pitch, which makes U/V planes 16-bytes aligned. We need to set a + * separate alignment to meet runtime's behaviour. +*/ +return ff_default_get_video_buffer2(inlink, +FFALIGN(inlink->w, 32), +FFALIGN(inlink->h, 32), +16); +} diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h index a8cfcc565a..6f7c9bfc15 100644 --- a/libavfilter/qsvvpp.h +++ b/libavfilter/qsvvpp.h @@ -127,4 +127,5 @@ int ff_qsvvpp_print_warning(void *log_ctx, mfxStatus err, int ff_qsvvpp_create_mfx_session(void *ctx, void *loader, mfxIMPL implementation, mfxVersion *pver, mfxSession *psession); +AVFrame *ff_qsvvpp_get_video_buffer(AVFilterLink *inlink, int w, int h); #endif /* AVFILTER_QSVVPP_H */ diff --git a/libavfilter/vf_deinterlace_qsv.c b/libavfilter/vf_deinterlace_qsv.c index 98ed7283ad..6c94923f02 100644 --- a/libavfilter/vf_deinterlace_qsv.c +++ b/libavfilter/vf_deinterlace_qsv.c @@ -581,6 +581,7 @@ static const AVFilterPad qsvdeint_inputs[] = { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .filter_frame = qsvdeint_filter_frame, +.get_buffer.video = ff_qsvvpp_get_video_buffer, }, }; diff --git a/libavfilter/vf_overlay_qsv.c b/libavfilter/vf_overlay_qsv.c index d947a1faa1..1a2c1b1e96 100644 --- a/libavfilter/vf_overlay_qsv.c +++ b/libavfilter/vf_overlay_qsv.c @@ -399,11 +399,13 @@ static const AVFilterPad overlay_qsv_inputs[] = { .name = "main", .type = AVMEDIA_TYPE_VIDEO, .config_props = config_main_input, +.get_buffer.video = ff_qsvvpp_get_video_buffer, }, { .name = "overlay", .type = AVMEDIA_TYPE_VIDEO, .config_props = config_overlay_input, +.get_buffer.video = ff_qsvvpp_get_video_buffer, }, }; diff --git a/libavfilter/vf_scale_qsv.c b/libavfilter/vf_scale_qsv.c index 758e730f78..36d5f3a6ec 100644 --- a/libavfilter/vf_scale_qsv.c +++ b/libavfilter/vf_scale_qsv.c @@ -641,6 +641,7 @@ static const AVFilterPad qsvscale_inputs[] = { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .filter_frame = qsvscale_filter_frame, +.get_buffer.video = ff_qsvvpp_get_video_buffer, }, }; diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c index 4a053f9145..b26d19c3bc 100644 --- a/libavfilter/vf_vpp_qsv.c +++ b/libavfilter/vf_vpp_qsv.c @@ -634,6 +634,7 @@ static const AVFilterPad vpp_inputs[] = { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .config_props = config_input, +.get_buffer.video = ff_qsvvpp_get_video_buffer, }, }; -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] libavfilter/qsvvpp: Use different alignment for YUV420P format
From: Wenbin Chen When process yuv420 frames, FFmpeg use same alignment on Y/U/V planes. VPL and MSDK use Y plane's pitch / 2 as U/V planes's pitch, which make U/V planes 16-bytes aligned. We need to set a separate alignment to meet runtime's behaviour. Now the commandline works fine: ffmpeg -f rawvideo -pix_fmt yuv420p -s:v 3082x1884 \ -i ./3082x1884.yuv -vf 'vpp_qsv=w=2466:h=1508' -f rawvideo \ -pix_fmt yuv420p 2466_1508.yuv Signed-off-by: Wenbin Chen --- libavfilter/qsvvpp.c | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c index 8428ee89ab..ad09114cb7 100644 --- a/libavfilter/qsvvpp.c +++ b/libavfilter/qsvvpp.c @@ -408,9 +408,15 @@ static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *p } else { /* make a copy if the input is not padded as libmfx requires */ if (picref->height & 31 || picref->linesize[0] & 31) { -qsv_frame->frame = ff_get_video_buffer(inlink, - FFALIGN(inlink->w, 32), - FFALIGN(inlink->h, 32)); +/* When process YUV420 frames, FFmpeg uses same alignment on Y/U/V + * planes. VPL and MSDK use Y plane's pitch / 2 as U/V planes's + * pitch, which makes U/V planes 16-bytes aligned. We need to set a + * separate alignment to meet runtime's behaviour. + */ +qsv_frame->frame = ff_default_get_video_buffer2(inlink, +FFALIGN(inlink->w, 32), +FFALIGN(inlink->h, 32), +16); if (!qsv_frame->frame) return NULL; -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v2] libavcodec/qsvenc.c: Enable MFX_GOP_STRICT when adpative gop is disabled
From: Wenbin Chen adaptive_i and adaptive_b cannot work with MFX_GOP_STRICT, so only enable MFX_GOP_STRICT when these features are disabled. Signed-off-by: Wenbin Chen --- libavcodec/qsvenc.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index d5e9f2d420..514a1e8148 100644 --- a/libavcodec/qsvenc.c +++ b/libavcodec/qsvenc.c @@ -644,6 +644,12 @@ static int check_enc_param(AVCodecContext *avctx, QSVEncContext *q) return 1; } +static int is_strict_gop(QSVEncContext *q) { +if (q->adaptive_b == 0 && q->adaptive_i == 0) +return 1; +return 0; +} + static int init_video_param_jpeg(AVCodecContext *avctx, QSVEncContext *q) { enum AVPixelFormat sw_format = avctx->pix_fmt == AV_PIX_FMT_QSV ? @@ -755,7 +761,8 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q) q->old_gop_size = avctx->gop_size; q->param.mfx.GopRefDist = FFMAX(-1, avctx->max_b_frames) + 1; q->param.mfx.GopOptFlag = avctx->flags & AV_CODEC_FLAG_CLOSED_GOP ? - MFX_GOP_CLOSED : MFX_GOP_STRICT; + MFX_GOP_CLOSED : is_strict_gop(q) ? + MFX_GOP_STRICT : 0; q->param.mfx.IdrInterval= q->idr_interval; q->param.mfx.NumSlice = avctx->slices; q->param.mfx.NumRefFrame= FFMAX(0, avctx->refs); -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] libavcodec/qsvenc.c: Disable MFX_GOP_STRICT when encode adpative gop
From: Wenbin Chen b_strategy, p_strategy, adaptive_i and adaptive_b cannot work with MFX_GOP_STRICT, so disable MFX_GOP_STRICT when these features are used. Signed-off-by: Wenbin Chen --- libavcodec/qsvenc.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index d5e9f2d420..6777a6fb5f 100644 --- a/libavcodec/qsvenc.c +++ b/libavcodec/qsvenc.c @@ -644,6 +644,13 @@ static int check_enc_param(AVCodecContext *avctx, QSVEncContext *q) return 1; } +static int is_adaptive_gop(QSVEncContext *q) { +if (q->adaptive_b > 0 || q->adaptive_i > 0 || +q->b_strategy > 0 || q->p_strategy > 0) +return 1; +return 0; +} + static int init_video_param_jpeg(AVCodecContext *avctx, QSVEncContext *q) { enum AVPixelFormat sw_format = avctx->pix_fmt == AV_PIX_FMT_QSV ? @@ -755,7 +762,8 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q) q->old_gop_size = avctx->gop_size; q->param.mfx.GopRefDist = FFMAX(-1, avctx->max_b_frames) + 1; q->param.mfx.GopOptFlag = avctx->flags & AV_CODEC_FLAG_CLOSED_GOP ? - MFX_GOP_CLOSED : MFX_GOP_STRICT; + MFX_GOP_CLOSED : is_adaptive_gop(q) ? + 0 : MFX_GOP_STRICT; q->param.mfx.IdrInterval= q->idr_interval; q->param.mfx.NumSlice = avctx->slices; q->param.mfx.NumRefFrame= FFMAX(0, avctx->refs); -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] libavcodec/qsvenc: Add skip_frame support to qsvenc
From: Wenbin Chen Add skip_frame support to qsvenc. Use per-frame metadata "qsv_skip_frame" to control it. skip_frame option defines the behavior of qsv_skip_frame. no_skip: Frame skipping is disabled. insert_dummy: Encoder inserts into bitstream frame where all macroblocks are encoded as skipped. insert_nothing: Similar to insert_dummy, but encoder inserts nothing. The skipped frames are still used in brc. For example, gop still include skipped frames, and the frames after skipped frames will be larger in size. brc_only: skip_frame metadata indicates the number of missed frames before the current frame. Signed-off-by: Wenbin Chen --- doc/encoders.texi| 36 libavcodec/qsvenc.c | 36 libavcodec/qsvenc.h | 13 + libavcodec/qsvenc_h264.c | 1 + libavcodec/qsvenc_hevc.c | 1 + 5 files changed, 87 insertions(+) diff --git a/doc/encoders.texi b/doc/encoders.texi index 53dd02fd28..59f39d18f6 100644 --- a/doc/encoders.texi +++ b/doc/encoders.texi @@ -3564,6 +3564,24 @@ bitrate, @var{target_bitrate}, within the accuracy range @var{avbr_accuracy}, after a @var{avbr_Convergence} period. This method does not follow HRD and the instant bitrate is not capped or padded. +@item @var{skip_frame} +Use per-frame metadata "qsv_skip_frame" to skip frame when encoding. This option +defines the usage of this metadata. +@table @samp +@item no_skip +Frame skipping is disabled. +@item insert_dummy +Encoder inserts into bitstream frame where all macroblocks are encoded as +skipped. +@item insert_nothing +Similar to insert_dummy, but encoder inserts nothing into bitstream. The skipped +frames are still used in brc. For example, gop still include skipped frames, and +the frames after skipped frames will be larger in size. +@item brc_only +skip_frame metadata indicates the number of missed frames before the current +frame. +@end table + @end table @subsection HEVC Options @@ -3742,6 +3760,24 @@ bitrate, @var{target_bitrate}, within the accuracy range @var{avbr_accuracy}, after a @var{avbr_Convergence} period. This method does not follow HRD and the instant bitrate is not capped or padded. +@item @var{skip_frame} +Use per-frame metadata "qsv_skip_frame" to skip frame when encoding. This option +defines the usage of this metadata. +@table @samp +@item no_skip +Frame skipping is disabled. +@item insert_dummy +Encoder inserts into bitstream frame where all macroblocks are encoded as +skipped. +@item insert_nothing +Similar to insert_dummy, but encoder inserts nothing into bitstream. The skipped +frames are still used in brc. For example, gop still include skipped frames, and +the frames after skipped frames will be larger in size. +@item brc_only +skip_frame metadata indicates the number of missed frames before the current +frame. +@end table + @end table @subsection MPEG2 Options diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index 0db774ea63..4bfa65c575 100644 --- a/libavcodec/qsvenc.c +++ b/libavcodec/qsvenc.c @@ -329,6 +329,22 @@ static void dump_video_param(AVCodecContext *avctx, QSVEncContext *q, "MinQPI: %"PRIu8"; MaxQPI: %"PRIu8"; MinQPP: %"PRIu8"; MaxQPP: %"PRIu8"; MinQPB: %"PRIu8"; MaxQPB: %"PRIu8"\n", co2->MinQPI, co2->MaxQPI, co2->MinQPP, co2->MaxQPP, co2->MinQPB, co2->MaxQPB); av_log(avctx, AV_LOG_VERBOSE, "DisableDeblockingIdc: %"PRIu32" \n", co2->DisableDeblockingIdc); + +switch (co2->SkipFrame) { +case MFX_SKIPFRAME_NO_SKIP: +av_log(avctx, AV_LOG_VERBOSE, "SkipFrame: no_skip\n"); +break; +case MFX_SKIPFRAME_INSERT_DUMMY: +av_log(avctx, AV_LOG_VERBOSE, "SkipFrame: insert_dummy\n"); +break; +case MFX_SKIPFRAME_INSERT_NOTHING: +av_log(avctx, AV_LOG_VERBOSE, "SkipFrame: insert_nothing\n"); +break; +case MFX_SKIPFRAME_BRC_ONLY: +av_log(avctx, AV_LOG_VERBOSE, "SkipFrame: brc_only\n"); +break; +default: break; +} } if (co3) { @@ -991,6 +1007,8 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q) q->old_max_qp_b = q->max_qp_b; if (q->mbbrc >= 0) q->extco2.MBBRC = q->mbbrc ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF; +if (q->skip_frame >= 0) +q->extco2.SkipFrame = q->skip_frame; q->extco2.Header.BufferId = MFX_EXTBUFF_CODING_OPTION2; q->extco2.Header.BufferSz = sizeof(q->extco2); @@ -1911,6 +1929,19 @@ static int set_roi_encode_ctrl(AVCodecContext *avctx, const AVFrame *frame, return 0; } +static void set_skip_frame_encode_ctrl(AVCodecContext *avctx, const AVFrame *frame, + mfxEncodeCtrl *enc_ctrl) +{ +AVDictionaryEntry* skip_frame_dict = NULL; +if (!frame->metadata) +return; +skip_frame_dict = av_dict_get(fr
[FFmpeg-devel] [PATCH] doc/examples: Add qsv_transcode example
From: Wenbin Chen Add qsv_transcode example which shows how to use qsv to do hardware accelerated transcoding, also show how to dynamically set encoding parameters. examples: Normal usage: qsv_transcode input.mp4 h264_qsv output.mp4 "g 60" Dynamic setting usage: qsv_transcode input.mp4 hevc_qsv output.mp4 "g 60 asyne_depth 1" 100 "g 120" This command initializes codec with gop_size 60 and change it to 120 after 100 frames Signed-off-by: Wenbin Chen --- configure| 2 + doc/examples/.gitignore | 1 + doc/examples/Makefile| 1 + doc/examples/qsv_transcode.c | 440 +++ 4 files changed, 444 insertions(+) create mode 100644 doc/examples/qsv_transcode.c diff --git a/configure b/configure index 70c9e41dcc..a8b4496465 100755 --- a/configure +++ b/configure @@ -1748,6 +1748,7 @@ EXAMPLE_LIST=" transcoding_example vaapi_encode_example vaapi_transcode_example +qsv_transcode_example " EXTERNAL_AUTODETECT_LIBRARY_LIST=" @@ -3811,6 +3812,7 @@ transcode_aac_example_deps="avcodec avformat swresample" transcoding_example_deps="avfilter avcodec avformat avutil" vaapi_encode_example_deps="avcodec avutil h264_vaapi_encoder" vaapi_transcode_example_deps="avcodec avformat avutil h264_vaapi_encoder" +qsv_transcode_example_deps="avcodec avformat avutil h264_qsv_encoder" # EXTRALIBS_LIST cpu_init_extralibs="pthreads_extralibs" diff --git a/doc/examples/.gitignore b/doc/examples/.gitignore index 44960e1de7..d787afdd4c 100644 --- a/doc/examples/.gitignore +++ b/doc/examples/.gitignore @@ -22,3 +22,4 @@ /transcoding /vaapi_encode /vaapi_transcode +/qsv_transcode diff --git a/doc/examples/Makefile b/doc/examples/Makefile index 81bfd34d5d..f937fbefda 100644 --- a/doc/examples/Makefile +++ b/doc/examples/Makefile @@ -21,6 +21,7 @@ EXAMPLES-$(CONFIG_TRANSCODE_AAC_EXAMPLE) += transcode_aac EXAMPLES-$(CONFIG_TRANSCODING_EXAMPLE) += transcoding EXAMPLES-$(CONFIG_VAAPI_ENCODE_EXAMPLE) += vaapi_encode EXAMPLES-$(CONFIG_VAAPI_TRANSCODE_EXAMPLE) += vaapi_transcode +EXAMPLES-$(CONFIG_QSV_TRANSCODE_EXAMPLE) += qsv_transcode EXAMPLES := $(EXAMPLES-yes:%=doc/examples/%$(PROGSSUF)$(EXESUF)) EXAMPLES_G := $(EXAMPLES-yes:%=doc/examples/%$(PROGSSUF)_g$(EXESUF)) diff --git a/doc/examples/qsv_transcode.c b/doc/examples/qsv_transcode.c new file mode 100644 index 00..9b37bbea9f --- /dev/null +++ b/doc/examples/qsv_transcode.c @@ -0,0 +1,440 @@ +/* + * Quick Sync Video (video transcoding) transcode sample + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/** + * @file + * Intel QSV-accelerated transcoding example. + * + * @example qsv_transcode.c + * This example shows how to do QSV-accelerated transcoding and how to + * dynamically change encoder's option. + * Usage: qsv_transcode input_stream codec output_stream initial option + * { frame_number new_option } + * e.g: - qsv_transcode input.mp4 h264_qsv output_h264.mp4 "g 60" + * - qsv_transcode input.mp4 hevc_qsv output_hevc.mp4 "g 60 async_depth 1" + * 100 "g 120" + * (initialize codec with gop_size 60 and change it to 120 after 100 + * frames) + */ + +#include +#include + +#include +#include +#include +#include + +static AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx = NULL; +static AVBufferRef *hw_device_ctx = NULL; +static AVCodecContext *decoder_ctx = NULL, *encoder_ctx = NULL; +static int video_stream = -1; + +typedef struct DynamicSetting { +int frame_number; +char* optstr; +} DynamicSetting; +static DynamicSetting *dynamic_setting; +static int setting_number; +static int current_setting_number; + +static int str_to_dict(char* optstr, AVDictionary **opt) +{ +char *key, *value; +if (strlen(optstr) == 0) +return 0; +key = strtok(optstr, " "); +if (key == NULL) +return AVERROR(ENAVAIL); +value = strtok(NUL
[FFmpeg-devel] [PATCH 2/2] doc/encoders: Add doc for av1_qsv
From: Wenbin Chen Add doc for av1_qsv. Signed-off-by: Wenbin Chen --- doc/encoders.texi | 32 1 file changed, 32 insertions(+) diff --git a/doc/encoders.texi b/doc/encoders.texi index 1a5216f8eb..53dd02fd28 100644 --- a/doc/encoders.texi +++ b/doc/encoders.texi @@ -3775,6 +3775,38 @@ Number of columns for tiled encoding (requires libmfx >= 1.29). Number of rows for tiled encoding (requires libmfx >= 1.29). @end table +@subsection AV1 Options +These options are used by av1_qsv (requires libvpl). +@table @option +@item @var{profile} +@table @samp +@item unknown +@item main +@end table + +@item @var{tile_cols} +Number of columns for tiled encoding. + +@item @var{tile_rows} +Number of rows for tiled encoding. + +@item @var{adaptive_i} +This flag controls insertion of I frames by the QSV encoder. Turn ON this flag +to allow changing of frame type from P and B to I. + +@item @var{adaptive_b} +This flag controls changing of frame type from B to P. + +@item @var{b_strategy} +This option controls usage of B frames as reference. + +@item @var{extbrc} +Extended bitrate control. + +@item @var{look_ahead_depth} +Depth of look ahead in number frames, available when extbrc option is enabled. +@end table + @section snow @subsection Options -- 2.34.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] libavcodec/qsvenc_av1: add av1_qsv encoder
From: Wenbin Chen It is available only when libvpl is enabled. MSDK doesn't support av1 encoding. sample command: ffmpeg -f rawvideo -pix_fmt nv12 -s 1920x1080 -i input.yuv \ -c:v av1_qsv output.ivf Signed-off-by: Wenbin Chen Signed-off-by: Haihao Xiang --- configure | 2 + libavcodec/Makefile | 1 + libavcodec/allcodecs.c | 1 + libavcodec/qsvenc.c | 196 +++- libavcodec/qsvenc.h | 7 +- libavcodec/qsvenc_av1.c | 156 6 files changed, 358 insertions(+), 5 deletions(-) create mode 100644 libavcodec/qsvenc_av1.c diff --git a/configure b/configure index f3fd91f592..7c4fef6cb0 100755 --- a/configure +++ b/configure @@ -3269,6 +3269,8 @@ vp9_qsv_encoder_select="qsvenc" vp9_v4l2m2m_decoder_deps="v4l2_m2m vp9_v4l2_m2m" wmv3_crystalhd_decoder_select="crystalhd" av1_qsv_decoder_select="qsvdec" +av1_qsv_encoder_select="qsvenc" +av1_qsv_encoder_deps="libvpl" # parsers aac_parser_select="adts_header mpeg4audio" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 37b63cadc2..77deaafe98 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -244,6 +244,7 @@ OBJS-$(CONFIG_AURA_DECODER)+= cyuv.o OBJS-$(CONFIG_AURA2_DECODER) += aura.o OBJS-$(CONFIG_AV1_DECODER) += av1dec.o OBJS-$(CONFIG_AV1_CUVID_DECODER) += cuviddec.o +OBJS-$(CONFIG_AV1_QSV_ENCODER) += qsvenc_av1.o OBJS-$(CONFIG_AVRN_DECODER)+= avrndec.o OBJS-$(CONFIG_AVRP_DECODER)+= r210dec.o OBJS-$(CONFIG_AVRP_ENCODER)+= r210enc.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index cfeb01ac1c..57e53437dc 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -827,6 +827,7 @@ extern const FFCodec ff_libaom_av1_decoder; extern const FFCodec ff_av1_decoder; extern const FFCodec ff_av1_cuvid_decoder; extern const FFCodec ff_av1_qsv_decoder; +extern const FFCodec ff_av1_qsv_encoder; extern const FFCodec ff_libopenh264_encoder; extern const FFCodec ff_libopenh264_decoder; extern const FFCodec ff_h264_amf_encoder; diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index dc5479d0f3..fd3b9d5cbe 100644 --- a/libavcodec/qsvenc.c +++ b/libavcodec/qsvenc.c @@ -82,6 +82,14 @@ static const struct profile_names vp9_profiles[] = { { MFX_PROFILE_VP9_3,"vp9 3" }, }; +static const struct profile_names av1_profiles[] = { +#if QSV_VERSION_ATLEAST(1, 34) +{ MFX_PROFILE_AV1_MAIN, "av1 main" }, +{ MFX_PROFILE_AV1_HIGH, "av1 high" }, +{ MFX_PROFILE_AV1_PRO, "av1 professional" }, +#endif +}; + typedef struct QSVPacket { AVPacketpkt; mfxSyncPoint *sync; @@ -114,6 +122,11 @@ static const char *print_profile(enum AVCodecID codec_id, mfxU16 profile) num_profiles = FF_ARRAY_ELEMS(vp9_profiles); break; +case AV_CODEC_ID_AV1: +profiles = av1_profiles; +num_profiles = FF_ARRAY_ELEMS(av1_profiles); +break; + default: return "unknown"; } @@ -429,6 +442,88 @@ static void dump_video_mjpeg_param(AVCodecContext *avctx, QSVEncContext *q) info->FrameInfo.FrameRateExtD, info->FrameInfo.FrameRateExtN); } +#if QSV_HAVE_EXT_AV1_PARAM +static void dump_video_av1_param(AVCodecContext *avctx, QSVEncContext *q, + mfxExtBuffer **coding_opts) +{ +mfxInfoMFX *info = &q->param.mfx; +mfxExtAV1TileParam *av1_tile_param = (mfxExtAV1TileParam *)coding_opts[0]; +mfxExtAV1BitstreamParam *av1_bs_param = (mfxExtAV1BitstreamParam *)coding_opts[1]; +mfxExtCodingOption2 *co2 = (mfxExtCodingOption2*)coding_opts[2]; +mfxExtCodingOption3 *co3 = (mfxExtCodingOption3*)coding_opts[3]; + +av_log(avctx, AV_LOG_VERBOSE, "profile: %s; level: %"PRIu16"\n", + print_profile(avctx->codec_id, info->CodecProfile), info->CodecLevel); + +av_log(avctx, AV_LOG_VERBOSE, + "GopPicSize: %"PRIu16"; GopRefDist: %"PRIu16"; GopOptFlag:%s%s; IdrInterval: %"PRIu16"\n", + info->GopPicSize, info->GopRefDist, + info->GopOptFlag & MFX_GOP_CLOSED ? " closed" : "", + info->GopOptFlag & MFX_GOP_STRICT ? " strict" : "", + info->IdrInterval); + +av_log(avctx, AV_LOG_VERBOSE, "TargetUsage: %"PRIu16"; RateControlMethod: %s\n", + info->TargetUsage, print_ratecontrol(info->RateControlMethod)); + +if (info->RateControlMethod == MFX_RATECONTROL_CBR || +info->RateControlMethod == MFX_RATECONTROL_VBR) +av_log(avctx, AV_LOG_VERBOSE, + "BufferSizeInKB: %"PRIu16"; InitialDelayInKB: %"PRIu16"; TargetKbps: %"PRIu16"; MaxKbps: %"PRIu16"; BRCParamMultiplier: %"PRIu16"\n", + info->BufferSizeInKB, info->InitialDelayInKB, info->TargetKbps, info->MaxKbps, info->BRCParamMultiplie