from:"wenbin . chen\-at\-intel . com"

[FFmpeg-devel] [PATCH] libavfi/dnn: enable LibTorch xpu device option support

2024-06-02 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Add xpu device support to libtorch backend.
To enable xpu support you need to add
 "-Wl,--no-as-needed -lintel-ext-pt-gpu -Wl,--as-needed" to
"--extra-libs" when configure ffmpeg.

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_torch.cpp | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/libavfilter/dnn/dnn_backend_torch.cpp 
b/libavfilter/dnn/dnn_backend_torch.cpp
index 2557264713..ea493f5873 100644
--- a/libavfilter/dnn/dnn_backend_torch.cpp
+++ b/libavfilter/dnn/dnn_backend_torch.cpp
@@ -250,6 +250,10 @@ static int th_start_inference(void *args)
 av_log(ctx, AV_LOG_ERROR, "input or output tensor is NULL\n");
 return DNN_GENERIC_ERROR;
 }
+// Transfer tensor to the same device as model
+c10::Device device = (*th_model->jit_model->parameters().begin()).device();
+if (infer_request->input_tensor->device() != device)
+*infer_request->input_tensor = infer_request->input_tensor->to(device);
 inputs.push_back(*infer_request->input_tensor);
 
 *infer_request->output = th_model->jit_model->forward(inputs).toTensor();
@@ -285,6 +289,9 @@ static void infer_completion_callback(void *args) {
 switch (th_model->model.func_type) {
 case DFT_PROCESS_FRAME:
 if (task->do_ioproc) {
+// Post process can only deal with CPU memory.
+if (output->device() != torch::kCPU)
+*output = output->to(torch::kCPU);
 outputs.scale = 255;
 outputs.data = output->data_ptr();
 if (th_model->model.frame_post_proc != NULL) {
@@ -424,7 +431,13 @@ static DNNModel *dnn_load_model_th(DnnContext *ctx, 
DNNFunctionType func_type, A
 th_model->ctx = ctx;
 
 c10::Device device = c10::Device(device_name);
-if (!device.is_cpu()) {
+if (device.is_xpu()) {
+if (!at::hasXPU()) {
+av_log(ctx, AV_LOG_ERROR, "No XPU device found\n");
+goto fail;
+}
+at::detail::getXPUHooks().initXPU();
+} else if (!device.is_cpu()) {
 av_log(ctx, AV_LOG_ERROR, "Not supported device:\"%s\"\n", 
device_name);
 goto fail;
 }
@@ -432,6 +445,7 @@ static DNNModel *dnn_load_model_th(DnnContext *ctx, 
DNNFunctionType func_type, A
 try {
 th_model->jit_model = new torch::jit::Module;
 (*th_model->jit_model) = torch::jit::load(ctx->model_filename);
+th_model->jit_model->to(device);
 } catch (const c10::Error& e) {
 av_log(ctx, AV_LOG_ERROR, "Failed to load torch model\n");
 goto fail;
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 2/2] libavfilter/dnn_io_proc: Take step into consideration when crop frame

2024-04-02 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_io_proc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c
index e5d6edb301..d2ec9f63f5 100644
--- a/libavfilter/dnn/dnn_io_proc.c
+++ b/libavfilter/dnn/dnn_io_proc.c
@@ -350,6 +350,7 @@ int ff_frame_to_dnn_classify(AVFrame *frame, DNNData 
*input, uint32_t bbox_index
 const AVDetectionBBoxHeader *header;
 const AVDetectionBBox *bbox;
 AVFrameSideData *sd = av_frame_get_side_data(frame, 
AV_FRAME_DATA_DETECTION_BBOXES);
+int max_step[4] = { 0 };
 av_assert0(sd);
 
 /* (scale != 1 and scale != 0) or mean != 0 */
@@ -405,8 +406,9 @@ int ff_frame_to_dnn_classify(AVFrame *frame, DNNData 
*input, uint32_t bbox_index
 offsety[1] = offsety[2] = AV_CEIL_RSHIFT(top, desc->log2_chroma_h);
 offsety[0] = offsety[3] = top;
 
+av_image_fill_max_pixsteps(max_step, NULL, desc);
 for (int k = 0; frame->data[k]; k++)
-bbox_data[k] = frame->data[k] + offsety[k] * frame->linesize[k] + 
offsetx[k];
+bbox_data[k] = frame->data[k] + offsety[k] * frame->linesize[k] + 
offsetx[k] * max_step[k];
 
 sws_scale(sws_ctx, (const uint8_t *const *)&bbox_data, frame->linesize,
0, height,
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/2] libavfilter/dnn_backend_openvino: Check bbox's height

2024-04-02 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Check bbox's height with frame's height rather than frame's width.

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_openvino.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 1e2c2404c7..8907bef69b 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -1200,7 +1200,7 @@ static int contain_valid_detection_bbox(AVFrame *frame)
 if (bbox->x < 0 || bbox->w < 0 || bbox->x + bbox->w >= frame->width) {
 return 0;
 }
-if (bbox->y < 0 || bbox->h < 0 || bbox->y + bbox->h >= frame->width) {
+if (bbox->y < 0 || bbox->h < 0 || bbox->y + bbox->h >= frame->height) {
 return 0;
 }
 
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2] doc: Add libtoch backend option to dnn_processing

2024-03-24 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 doc/filters.texi | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index 18f0d1c5a7..bfa8ccec8b 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -12073,11 +12073,21 @@ need to build and install the OpenVINO for C library 
(see
 @code{--enable-libopenvino} (--extra-cflags=-I... --extra-ldflags=-L... might
 be needed if the header files and libraries are not installed into system path)
 
+@item torch
+Libtorch backend. To enable this backend you need to build and install Libtroch
+for C++ library. Please download cxx11 ABI version (see
+@url{https://pytorch.org/get-started/locally})
+and configure FFmpeg with @code{--enable-libtorch
+--extra-cflags=-I/libtorch_root/libtorch/include
+--extra-cflags=-I/libtorch_root/libtorch/include/torch/csrc/api/include
+--extra-ldflags=-L/libtorch_root/libtorch/lib/}
+
 @end table
 
 @item model
 Set path to model file specifying network architecture and its parameters.
-Note that different backends use different file formats. TensorFlow, OpenVINO 
backend can load files for only its format.
+Note that different backends use different file formats. TensorFlow, OpenVINO
+and Libtorch backend can load files for only its format.
 
 @item input
 Set the input name of the dnn network.
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] doc: Add libtoch backend option to dnn_processing

2024-03-20 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 doc/filters.texi | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index 913365671d..20605e72b2 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -12069,11 +12069,21 @@ need to build and install the OpenVINO for C library 
(see
 @code{--enable-libopenvino} (--extra-cflags=-I... --extra-ldflags=-L... might
 be needed if the header files and libraries are not installed into system path)
 
+@item torch
+Libtorch backend. To enable this backend you need to build and install Libtroch
+for C++ library. Please download cxx11 ABI version (see
+@url{https://pytorch.org/get-started/locally})
+and configure FFmpeg with @code{--enable-libtorch
+--extra-cflag=-I/libtorch_root/libtorch/include
+--extra-cflag=-I/libtorch_root/libtorch/include/torch/csrc/api/include
+--extra-ldflags=-L/libtorch_root/libtorch/lib/}
+
 @end table
 
 @item model
 Set path to model file specifying network architecture and its parameters.
-Note that different backends use different file formats. TensorFlow, OpenVINO 
backend can load files for only its format.
+Note that different backends use different file formats. TensorFlow, OpenVINO
+and Libtorch backend can load files for only its format.
 
 @item input
 Set the input name of the dnn network.
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2] Changelog: Add libtorch

2024-03-20 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 Changelog | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Changelog b/Changelog
index e3ca52430c..4af55ff537 100644
--- a/Changelog
+++ b/Changelog
@@ -35,6 +35,7 @@ version :
 - AEA muxer
 - ffmpeg CLI loopback decoders
 - Support PacketTypeMetadata of PacketType in enhanced flv format
+- dnn filter libtorch backend
 
 
 version 6.1:
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] Changelog: Add libtorch

2024-03-20 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 Changelog | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Changelog b/Changelog
index e3ca52430c..d0c41887f3 100644
--- a/Changelog
+++ b/Changelog
@@ -35,6 +35,7 @@ version :
 - AEA muxer
 - ffmpeg CLI loopback decoders
 - Support PacketTypeMetadata of PacketType in enhanced flv format
+- Support libtorch as DNN backend
 
 
 version 6.1:
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v6] libavfi/dnn: add LibTorch as one of DNN backend

2024-03-14 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

PyTorch is an open source machine learning framework that accelerates
the path from research prototyping to production deployment. Official
website: https://pytorch.org/. We call the C++ library of PyTorch as
LibTorch, the same below.

To build FFmpeg with LibTorch, please take following steps as
reference:
1. download LibTorch C++ library in
 https://pytorch.org/get-started/locally/,
please select C++/Java for language, and other options as your need.
Please download cxx11 ABI version:
 (libtorch-cxx11-abi-shared-with-deps-*.zip).
2. unzip the file to your own dir, with command
unzip libtorch-shared-with-deps-latest.zip -d your_dir
3. export libtorch_root/libtorch/include and
libtorch_root/libtorch/include/torch/csrc/api/include to $PATH
export libtorch_root/libtorch/lib/ to $LD_LIBRARY_PATH
4. config FFmpeg with ../configure --enable-libtorch \
 --extra-cflag=-I/libtorch_root/libtorch/include \
 --extra-cflag=-I/libtorch_root/libtorch/include/torch/csrc/api/include \
 --extra-ldflags=-L/libtorch_root/libtorch/lib/
5. make

To run FFmpeg DNN inference with LibTorch backend:
./ffmpeg -i input.jpg -vf \
dnn_processing=dnn_backend=torch:model=LibTorch_model.pt -y output.jpg

The LibTorch_model.pt can be generated by Python with torch.jit.script()
api. https://pytorch.org/tutorials/advanced/cpp_export.html. This is
pytorch official guide about how to convert and load torchscript model.
Please note, torch.jit.trace() is not recommanded, since it does
not support ambiguous input size.

Signed-off-by: Ting Fu 
Signed-off-by: Wenbin Chen 
---
 configure |   5 +-
 libavfilter/dnn/Makefile  |   1 +
 libavfilter/dnn/dnn_backend_torch.cpp | 597 ++
 libavfilter/dnn/dnn_interface.c   |   5 +
 libavfilter/dnn_filter_common.c   |  15 +-
 libavfilter/dnn_interface.h   |   2 +-
 libavfilter/vf_dnn_processing.c   |   3 +
 7 files changed, 624 insertions(+), 4 deletions(-)
 create mode 100644 libavfilter/dnn/dnn_backend_torch.cpp

diff --git a/configure b/configure
index c34bdd13f5..e68b6f22b0 100755
--- a/configure
+++ b/configure
@@ -281,6 +281,7 @@ External library support:
   --enable-libtheora   enable Theora encoding via libtheora [no]
   --enable-libtls  enable LibreSSL (via libtls), needed for https 
support
if openssl, gnutls or mbedtls is not used [no]
+  --enable-libtorchenable Torch as one DNN backend [no]
   --enable-libtwolame  enable MP2 encoding via libtwolame [no]
   --enable-libuavs3d   enable AVS3 decoding via libuavs3d [no]
   --enable-libv4l2 enable libv4l2/v4l-utils [no]
@@ -1905,6 +1906,7 @@ EXTERNAL_LIBRARY_LIST="
 libtensorflow
 libtesseract
 libtheora
+libtorch
 libtwolame
 libuavs3d
 libv4l2
@@ -2785,7 +2787,7 @@ cbs_vp9_select="cbs"
 deflate_wrapper_deps="zlib"
 dirac_parse_select="golomb"
 dovi_rpu_select="golomb"
-dnn_suggest="libtensorflow libopenvino"
+dnn_suggest="libtensorflow libopenvino libtorch"
 dnn_deps="avformat swscale"
 error_resilience_select="me_cmp"
 evcparse_select="golomb"
@@ -6888,6 +6890,7 @@ enabled libtensorflow && require libtensorflow 
tensorflow/c/c_api.h TF_Versi
 enabled libtesseract  && require_pkg_config libtesseract tesseract 
tesseract/capi.h TessBaseAPICreate
 enabled libtheora && require libtheora theora/theoraenc.h th_info_init 
-ltheoraenc -ltheoradec -logg
 enabled libtls&& require_pkg_config libtls libtls tls.h 
tls_configure
+enabled libtorch  && check_cxxflags -std=c++17 && require_cpp libtorch 
torch/torch.h "torch::Tensor" -ltorch -lc10 -ltorch_cpu -lstdc++ -lpthread
 enabled libtwolame&& require libtwolame twolame.h twolame_init 
-ltwolame &&
  { check_lib libtwolame twolame.h 
twolame_encode_buffer_float32_interleaved -ltwolame ||
die "ERROR: libtwolame must be installed and 
version must be >= 0.3.10"; }
diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile
index 5d5697ea42..3d09927c98 100644
--- a/libavfilter/dnn/Makefile
+++ b/libavfilter/dnn/Makefile
@@ -6,5 +6,6 @@ OBJS-$(CONFIG_DNN)   += 
dnn/dnn_backend_common.o
 
 DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o
 DNN-OBJS-$(CONFIG_LIBOPENVINO)   += dnn/dnn_backend_openvino.o
+DNN-OBJS-$(CONFIG_LIBTORCH)  += dnn/dnn_backend_torch.o
 
 OBJS-$(CONFIG_DNN)   += $(DNN-OBJS-yes)
diff --git a/libavfilter/dnn/dnn_backend_torch.cpp 
b/libavfilter/dnn/dnn_backend_torch.cpp
new file mode 100644
index 00..fa9a2e6d99
--- /dev/null
+++ b/libavfilter/dnn/dnn_backend_torch.cpp
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2024
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as publishe

[FFmpeg-devel] [PATCH v5] libavfi/dnn: add LibTorch as one of DNN backend

2024-03-10 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

PyTorch is an open source machine learning framework that accelerates
the path from research prototyping to production deployment. Official
website: https://pytorch.org/. We call the C++ library of PyTorch as
LibTorch, the same below.

To build FFmpeg with LibTorch, please take following steps as reference:
1. download LibTorch C++ library in https://pytorch.org/get-started/locally/,
please select C++/Java for language, and other options as your need.
Please download cxx11 ABI version (libtorch-cxx11-abi-shared-with-deps-*.zip).
2. unzip the file to your own dir, with command
unzip libtorch-shared-with-deps-latest.zip -d your_dir
3. export libtorch_root/libtorch/include and
libtorch_root/libtorch/include/torch/csrc/api/include to $PATH
export libtorch_root/libtorch/lib/ to $LD_LIBRARY_PATH
4. config FFmpeg with ../configure --enable-libtorch 
--extra-cflag=-I/libtorch_root/libtorch/include 
--extra-cflag=-I/libtorch_root/libtorch/include/torch/csrc/api/include 
--extra-ldflags=-L/libtorch_root/libtorch/lib/
5. make

To run FFmpeg DNN inference with LibTorch backend:
./ffmpeg -i input.jpg -vf 
dnn_processing=dnn_backend=torch:model=LibTorch_model.pt -y output.jpg
The LibTorch_model.pt can be generated by Python with torch.jit.script() api. 
Please note, torch.jit.trace() is not recommanded, since it does not support 
ambiguous input size.

Signed-off-by: Ting Fu 
Signed-off-by: Wenbin Chen 
---
 configure |   5 +-
 libavfilter/dnn/Makefile  |   1 +
 libavfilter/dnn/dnn_backend_torch.cpp | 597 ++
 libavfilter/dnn/dnn_interface.c   |   5 +
 libavfilter/dnn_filter_common.c   |  15 +-
 libavfilter/dnn_interface.h   |   2 +-
 libavfilter/vf_dnn_processing.c   |   3 +
 7 files changed, 624 insertions(+), 4 deletions(-)
 create mode 100644 libavfilter/dnn/dnn_backend_torch.cpp

diff --git a/configure b/configure
index 05f8283af9..3584728464 100755
--- a/configure
+++ b/configure
@@ -281,6 +281,7 @@ External library support:
   --enable-libtheora   enable Theora encoding via libtheora [no]
   --enable-libtls  enable LibreSSL (via libtls), needed for https 
support
if openssl, gnutls or mbedtls is not used [no]
+  --enable-libtorchenable Torch as one DNN backend [no]
   --enable-libtwolame  enable MP2 encoding via libtwolame [no]
   --enable-libuavs3d   enable AVS3 decoding via libuavs3d [no]
   --enable-libv4l2 enable libv4l2/v4l-utils [no]
@@ -1905,6 +1906,7 @@ EXTERNAL_LIBRARY_LIST="
 libtensorflow
 libtesseract
 libtheora
+libtorch
 libtwolame
 libuavs3d
 libv4l2
@@ -2785,7 +2787,7 @@ cbs_vp9_select="cbs"
 deflate_wrapper_deps="zlib"
 dirac_parse_select="golomb"
 dovi_rpu_select="golomb"
-dnn_suggest="libtensorflow libopenvino"
+dnn_suggest="libtensorflow libopenvino libtorch"
 dnn_deps="avformat swscale"
 error_resilience_select="me_cmp"
 evcparse_select="golomb"
@@ -6886,6 +6888,7 @@ enabled libtensorflow && require libtensorflow 
tensorflow/c/c_api.h TF_Versi
 enabled libtesseract  && require_pkg_config libtesseract tesseract 
tesseract/capi.h TessBaseAPICreate
 enabled libtheora && require libtheora theora/theoraenc.h th_info_init 
-ltheoraenc -ltheoradec -logg
 enabled libtls&& require_pkg_config libtls libtls tls.h 
tls_configure
+enabled libtorch  && check_cxxflags -std=c++17 && require_cpp libtorch 
torch/torch.h "torch::Tensor" -ltorch -lc10 -ltorch_cpu -lstdc++ -lpthread
 enabled libtwolame&& require libtwolame twolame.h twolame_init 
-ltwolame &&
  { check_lib libtwolame twolame.h 
twolame_encode_buffer_float32_interleaved -ltwolame ||
die "ERROR: libtwolame must be installed and 
version must be >= 0.3.10"; }
diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile
index 5d5697ea42..3d09927c98 100644
--- a/libavfilter/dnn/Makefile
+++ b/libavfilter/dnn/Makefile
@@ -6,5 +6,6 @@ OBJS-$(CONFIG_DNN)   += 
dnn/dnn_backend_common.o
 
 DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o
 DNN-OBJS-$(CONFIG_LIBOPENVINO)   += dnn/dnn_backend_openvino.o
+DNN-OBJS-$(CONFIG_LIBTORCH)  += dnn/dnn_backend_torch.o
 
 OBJS-$(CONFIG_DNN)   += $(DNN-OBJS-yes)
diff --git a/libavfilter/dnn/dnn_backend_torch.cpp 
b/libavfilter/dnn/dnn_backend_torch.cpp
new file mode 100644
index 00..54d3b309a1
--- /dev/null
+++ b/libavfilter/dnn/dnn_backend_torch.cpp
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2024
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in

[FFmpeg-devel] [PATCH v4] libavfi/dnn: add LibTorch as one of DNN backend

2024-02-20 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

PyTorch is an open source machine learning framework that accelerates
the path from research prototyping to production deployment. Official
website: https://pytorch.org/. We call the C++ library of PyTorch as
LibTorch, the same below.

To build FFmpeg with LibTorch, please take following steps as reference:
1. download LibTorch C++ library in https://pytorch.org/get-started/locally/,
please select C++/Java for language, and other options as your need.
2. unzip the file to your own dir, with command
unzip libtorch-shared-with-deps-latest.zip -d your_dir
3. export libtorch_root/libtorch/include and
libtorch_root/libtorch/include/torch/csrc/api/include to $PATH
export libtorch_root/libtorch/lib/ to $LD_LIBRARY_PATH
4. config FFmpeg with ../configure --enable-libtorch 
--extra-cflag=-I/libtorch_root/libtorch/include 
--extra-cflag=-I/libtorch_root/libtorch/include/torch/csrc/api/include 
--extra-ldflags=-L/libtorch_root/libtorch/lib/
5. make

To run FFmpeg DNN inference with LibTorch backend:
./ffmpeg -i input.jpg -vf 
dnn_processing=dnn_backend=torch:model=LibTorch_model.pt -y output.jpg
The LibTorch_model.pt can be generated by Python with torch.jit.script() api. 
Please note, torch.jit.trace() is not recommanded, since it does not support 
ambiguous input size.

Signed-off-by: Ting Fu 
Signed-off-by: Wenbin Chen 
---
 configure |   5 +-
 libavfilter/dnn/Makefile  |   1 +
 libavfilter/dnn/dnn_backend_torch.cpp | 597 ++
 libavfilter/dnn/dnn_interface.c   |   5 +
 libavfilter/dnn_filter_common.c   |  15 +-
 libavfilter/dnn_interface.h   |   2 +-
 libavfilter/vf_dnn_processing.c   |   3 +
 7 files changed, 624 insertions(+), 4 deletions(-)
 create mode 100644 libavfilter/dnn/dnn_backend_torch.cpp

diff --git a/configure b/configure
index 2c635043dd..450ef54a80 100755
--- a/configure
+++ b/configure
@@ -279,6 +279,7 @@ External library support:
   --enable-libtheora   enable Theora encoding via libtheora [no]
   --enable-libtls  enable LibreSSL (via libtls), needed for https 
support
if openssl, gnutls or mbedtls is not used [no]
+  --enable-libtorchenable Torch as one DNN backend [no]
   --enable-libtwolame  enable MP2 encoding via libtwolame [no]
   --enable-libuavs3d   enable AVS3 decoding via libuavs3d [no]
   --enable-libv4l2 enable libv4l2/v4l-utils [no]
@@ -1901,6 +1902,7 @@ EXTERNAL_LIBRARY_LIST="
 libtensorflow
 libtesseract
 libtheora
+libtorch
 libtwolame
 libuavs3d
 libv4l2
@@ -2781,7 +2783,7 @@ cbs_vp9_select="cbs"
 deflate_wrapper_deps="zlib"
 dirac_parse_select="golomb"
 dovi_rpu_select="golomb"
-dnn_suggest="libtensorflow libopenvino"
+dnn_suggest="libtensorflow libopenvino libtorch"
 dnn_deps="avformat swscale"
 error_resilience_select="me_cmp"
 evcparse_select="golomb"
@@ -6886,6 +6888,7 @@ enabled libtensorflow && require libtensorflow 
tensorflow/c/c_api.h TF_Versi
 enabled libtesseract  && require_pkg_config libtesseract tesseract 
tesseract/capi.h TessBaseAPICreate
 enabled libtheora && require libtheora theora/theoraenc.h th_info_init 
-ltheoraenc -ltheoradec -logg
 enabled libtls&& require_pkg_config libtls libtls tls.h 
tls_configure
+enabled libtorch  && check_cxxflags -std=c++14 && require_cpp libtorch 
torch/torch.h "torch::Tensor" -ltorch -lc10 -ltorch_cpu -lstdc++ -lpthread
 enabled libtwolame&& require libtwolame twolame.h twolame_init 
-ltwolame &&
  { check_lib libtwolame twolame.h 
twolame_encode_buffer_float32_interleaved -ltwolame ||
die "ERROR: libtwolame must be installed and 
version must be >= 0.3.10"; }
diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile
index 5d5697ea42..3d09927c98 100644
--- a/libavfilter/dnn/Makefile
+++ b/libavfilter/dnn/Makefile
@@ -6,5 +6,6 @@ OBJS-$(CONFIG_DNN)   += 
dnn/dnn_backend_common.o
 
 DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o
 DNN-OBJS-$(CONFIG_LIBOPENVINO)   += dnn/dnn_backend_openvino.o
+DNN-OBJS-$(CONFIG_LIBTORCH)  += dnn/dnn_backend_torch.o
 
 OBJS-$(CONFIG_DNN)   += $(DNN-OBJS-yes)
diff --git a/libavfilter/dnn/dnn_backend_torch.cpp 
b/libavfilter/dnn/dnn_backend_torch.cpp
new file mode 100644
index 00..54d3b309a1
--- /dev/null
+++ b/libavfilter/dnn/dnn_backend_torch.cpp
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2024
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even th

[FFmpeg-devel] [PATCH v3] libavfi/dnn: add LibTorch as one of DNN backend

2024-02-19 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

PyTorch is an open source machine learning framework that accelerates
the path from research prototyping to production deployment. Official
websit: https://pytorch.org/. We call the C++ library of PyTorch as
LibTorch, the same below.

To build FFmpeg with LibTorch, please take following steps as reference:
1. download LibTorch C++ library in https://pytorch.org/get-started/locally/,
please select C++/Java for language, and other options as your need.
2. unzip the file to your own dir, with command
unzip libtorch-shared-with-deps-latest.zip -d your_dir
3. export libtorch_root/libtorch/include and
libtorch_root/libtorch/include/torch/csrc/api/include to $PATH
export libtorch_root/libtorch/lib/ to $LD_LIBRARY_PATH
4. config FFmpeg with ../configure --enable-libtorch 
--extra-cflag=-I/libtorch_root/libtorch/include 
--extra-cflag=-I/libtorch_root/libtorch/include/torch/csrc/api/include 
--extra-ldflags=-L/libtorch_root/libtorch/lib/
5. make

To run FFmpeg DNN inference with LibTorch backend:
./ffmpeg -i input.jpg -vf 
dnn_processing=dnn_backend=torch:model=LibTorch_model.pt -y output.jpg
The LibTorch_model.pt can be generated by Python with torch.jit.script() api. 
Please note, torch.jit.trace() is not recommanded, since it does not support 
ambiguous input size.

Signed-off-by: Ting Fu 
Signed-off-by: Wenbin Chen 
---
 configure |   5 +-
 libavfilter/dnn/Makefile  |   1 +
 libavfilter/dnn/dnn_backend_torch.cpp | 597 ++
 libavfilter/dnn/dnn_interface.c   |   5 +
 libavfilter/dnn_filter_common.c   |  15 +-
 libavfilter/dnn_interface.h   |   2 +-
 libavfilter/vf_dnn_processing.c   |   3 +
 7 files changed, 624 insertions(+), 4 deletions(-)
 create mode 100644 libavfilter/dnn/dnn_backend_torch.cpp

diff --git a/configure b/configure
index 2c635043dd..450ef54a80 100755
--- a/configure
+++ b/configure
@@ -279,6 +279,7 @@ External library support:
   --enable-libtheora   enable Theora encoding via libtheora [no]
   --enable-libtls  enable LibreSSL (via libtls), needed for https 
support
if openssl, gnutls or mbedtls is not used [no]
+  --enable-libtorchenable Torch as one DNN backend [no]
   --enable-libtwolame  enable MP2 encoding via libtwolame [no]
   --enable-libuavs3d   enable AVS3 decoding via libuavs3d [no]
   --enable-libv4l2 enable libv4l2/v4l-utils [no]
@@ -1901,6 +1902,7 @@ EXTERNAL_LIBRARY_LIST="
 libtensorflow
 libtesseract
 libtheora
+libtorch
 libtwolame
 libuavs3d
 libv4l2
@@ -2781,7 +2783,7 @@ cbs_vp9_select="cbs"
 deflate_wrapper_deps="zlib"
 dirac_parse_select="golomb"
 dovi_rpu_select="golomb"
-dnn_suggest="libtensorflow libopenvino"
+dnn_suggest="libtensorflow libopenvino libtorch"
 dnn_deps="avformat swscale"
 error_resilience_select="me_cmp"
 evcparse_select="golomb"
@@ -6886,6 +6888,7 @@ enabled libtensorflow && require libtensorflow 
tensorflow/c/c_api.h TF_Versi
 enabled libtesseract  && require_pkg_config libtesseract tesseract 
tesseract/capi.h TessBaseAPICreate
 enabled libtheora && require libtheora theora/theoraenc.h th_info_init 
-ltheoraenc -ltheoradec -logg
 enabled libtls&& require_pkg_config libtls libtls tls.h 
tls_configure
+enabled libtorch  && check_cxxflags -std=c++14 && require_cpp libtorch 
torch/torch.h "torch::Tensor" -ltorch -lc10 -ltorch_cpu -lstdc++ -lpthread
 enabled libtwolame&& require libtwolame twolame.h twolame_init 
-ltwolame &&
  { check_lib libtwolame twolame.h 
twolame_encode_buffer_float32_interleaved -ltwolame ||
die "ERROR: libtwolame must be installed and 
version must be >= 0.3.10"; }
diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile
index 5d5697ea42..3d09927c98 100644
--- a/libavfilter/dnn/Makefile
+++ b/libavfilter/dnn/Makefile
@@ -6,5 +6,6 @@ OBJS-$(CONFIG_DNN)   += 
dnn/dnn_backend_common.o
 
 DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o
 DNN-OBJS-$(CONFIG_LIBOPENVINO)   += dnn/dnn_backend_openvino.o
+DNN-OBJS-$(CONFIG_LIBTORCH)  += dnn/dnn_backend_torch.o
 
 OBJS-$(CONFIG_DNN)   += $(DNN-OBJS-yes)
diff --git a/libavfilter/dnn/dnn_backend_torch.cpp 
b/libavfilter/dnn/dnn_backend_torch.cpp
new file mode 100644
index 00..54d3b309a1
--- /dev/null
+++ b/libavfilter/dnn/dnn_backend_torch.cpp
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2024
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the

[FFmpeg-devel] [PATCH v2] libavfi/dnn: add LibTorch as one of DNN backend

2024-02-01 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

PyTorch is an open source machine learning framework that accelerates
the path from research prototyping to production deployment. Official
websit: https://pytorch.org/. We call the C++ library of PyTorch as
LibTorch, the same below.

To build FFmpeg with LibTorch, please take following steps as reference:
1. download LibTorch C++ library in https://pytorch.org/get-started/locally/,
please select C++/Java for language, and other options as your need.
2. unzip the file to your own dir, with command
unzip libtorch-shared-with-deps-latest.zip -d your_dir
3. export libtorch_root/libtorch/include and
libtorch_root/libtorch/include/torch/csrc/api/include to $PATH
export libtorch_root/libtorch/lib/ to $LD_LIBRARY_PATH
4. config FFmpeg with ../configure --enable-libtorch 
--extra-cflag=-I/libtorch_root/libtorch/include 
--extra-cflag=-I/libtorch_root/libtorch/include/torch/csrc/api/include 
--extra-ldflags=-L/libtorch_root/libtorch/lib/
5. make

To run FFmpeg DNN inference with LibTorch backend:
./ffmpeg -i input.jpg -vf 
dnn_processing=dnn_backend=torch:model=LibTorch_model.pt -y output.jpg
The LibTorch_model.pt can be generated by Python with torch.jit.script() api. 
Please note, torch.jit.trace() is not recommanded, since it does not support 
ambiguous input size.

Signed-off-by: Ting Fu 
Signed-off-by: Wenbin Chen 
---
 configure |   5 +-
 libavfilter/dnn/Makefile  |   1 +
 libavfilter/dnn/dnn_backend_torch.cpp | 587 ++
 libavfilter/dnn/dnn_interface.c   |   5 +
 libavfilter/dnn_filter_common.c   |  15 +-
 libavfilter/dnn_interface.h   |   2 +-
 libavfilter/vf_dnn_processing.c   |   3 +
 7 files changed, 614 insertions(+), 4 deletions(-)
 create mode 100644 libavfilter/dnn/dnn_backend_torch.cpp

diff --git a/configure b/configure
index 68f675a4bc..bc11172fe4 100755
--- a/configure
+++ b/configure
@@ -279,6 +279,7 @@ External library support:
   --enable-libtheora   enable Theora encoding via libtheora [no]
   --enable-libtls  enable LibreSSL (via libtls), needed for https 
support
if openssl, gnutls or mbedtls is not used [no]
+  --enable-libtorchenable Torch as one DNN backend [no]
   --enable-libtwolame  enable MP2 encoding via libtwolame [no]
   --enable-libuavs3d   enable AVS3 decoding via libuavs3d [no]
   --enable-libv4l2 enable libv4l2/v4l-utils [no]
@@ -1901,6 +1902,7 @@ EXTERNAL_LIBRARY_LIST="
 libtensorflow
 libtesseract
 libtheora
+libtorch
 libtwolame
 libuavs3d
 libv4l2
@@ -2776,7 +2778,7 @@ cbs_vp9_select="cbs"
 deflate_wrapper_deps="zlib"
 dirac_parse_select="golomb"
 dovi_rpu_select="golomb"
-dnn_suggest="libtensorflow libopenvino"
+dnn_suggest="libtensorflow libopenvino libtorch"
 dnn_deps="avformat swscale"
 error_resilience_select="me_cmp"
 evcparse_select="golomb"
@@ -6873,6 +6875,7 @@ enabled libtensorflow && require libtensorflow 
tensorflow/c/c_api.h TF_Versi
 enabled libtesseract  && require_pkg_config libtesseract tesseract 
tesseract/capi.h TessBaseAPICreate
 enabled libtheora && require libtheora theora/theoraenc.h th_info_init 
-ltheoraenc -ltheoradec -logg
 enabled libtls&& require_pkg_config libtls libtls tls.h 
tls_configure
+enabled libtorch  && check_cxxflags -std=c++14 && require_cpp libtorch 
torch/torch.h "torch::Tensor" -ltorch -lc10 -ltorch_cpu -lstdc++ -lpthread
 enabled libtwolame&& require libtwolame twolame.h twolame_init 
-ltwolame &&
  { check_lib libtwolame twolame.h 
twolame_encode_buffer_float32_interleaved -ltwolame ||
die "ERROR: libtwolame must be installed and 
version must be >= 0.3.10"; }
diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile
index 5d5697ea42..3d09927c98 100644
--- a/libavfilter/dnn/Makefile
+++ b/libavfilter/dnn/Makefile
@@ -6,5 +6,6 @@ OBJS-$(CONFIG_DNN)   += 
dnn/dnn_backend_common.o
 
 DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o
 DNN-OBJS-$(CONFIG_LIBOPENVINO)   += dnn/dnn_backend_openvino.o
+DNN-OBJS-$(CONFIG_LIBTORCH)  += dnn/dnn_backend_torch.o
 
 OBJS-$(CONFIG_DNN)   += $(DNN-OBJS-yes)
diff --git a/libavfilter/dnn/dnn_backend_torch.cpp 
b/libavfilter/dnn/dnn_backend_torch.cpp
new file mode 100644
index 00..b905c55175
--- /dev/null
+++ b/libavfilter/dnn/dnn_backend_torch.cpp
@@ -0,0 +1,587 @@
+/*
+ * Copyright (c) 2024
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the

[FFmpeg-devel] [PATCH] libavfi/dnn: add LibTorch as one of DNN backend

2024-01-21 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

PyTorch is an open source machine learning framework that accelerates
the path from research prototyping to production deployment. Official
websit: https://pytorch.org/. We call the C++ library of PyTorch as
LibTorch, the same below.

To build FFmpeg with LibTorch, please take following steps as reference:
1. download LibTorch C++ library in https://pytorch.org/get-started/locally/,
please select C++/Java for language, and other options as your need.
2. unzip the file to your own dir, with command
unzip libtorch-shared-with-deps-latest.zip -d your_dir
3. export libtorch_root/libtorch/include and
libtorch_root/libtorch/include/torch/csrc/api/include to $PATH
export libtorch_root/libtorch/lib/ to $LD_LIBRARY_PATH
4. config FFmpeg with ../configure --enable-libtorch 
--extra-cflag=-I/libtorch_root/libtorch/include 
--extra-cflag=-I/libtorch_root/libtorch/include/torch/csrc/api/include 
--extra-ldflags=-L/libtorch_root/libtorch/lib/
5. make

To run FFmpeg DNN inference with LibTorch backend:
./ffmpeg -i input.jpg -vf 
dnn_processing=dnn_backend=torch:model=LibTorch_model.pt -y output.jpg
The LibTorch_model.pt can be generated by Python with torch.jit.script() api. 
Please note, torch.jit.trace() is not recommanded, since it does not support 
ambiguous input size.

Signed-off-by: Ting Fu 
Signed-off-by: Wenbin Chen 
---
 configure |   5 +-
 libavfilter/dnn/Makefile  |   1 +
 libavfilter/dnn/dnn_backend_torch.cpp | 585 ++
 libavfilter/dnn/dnn_interface.c   |   5 +
 libavfilter/dnn_filter_common.c   |  31 +-
 libavfilter/dnn_interface.h   |   2 +-
 libavfilter/vf_dnn_processing.c   |   3 +
 7 files changed, 621 insertions(+), 11 deletions(-)
 create mode 100644 libavfilter/dnn/dnn_backend_torch.cpp

diff --git a/configure b/configure
index c8ae0a061d..75061692b1 100755
--- a/configure
+++ b/configure
@@ -279,6 +279,7 @@ External library support:
   --enable-libtheora   enable Theora encoding via libtheora [no]
   --enable-libtls  enable LibreSSL (via libtls), needed for https 
support
if openssl, gnutls or mbedtls is not used [no]
+  --enable-libtorchenable Torch as one DNN backend [no]
   --enable-libtwolame  enable MP2 encoding via libtwolame [no]
   --enable-libuavs3d   enable AVS3 decoding via libuavs3d [no]
   --enable-libv4l2 enable libv4l2/v4l-utils [no]
@@ -1901,6 +1902,7 @@ EXTERNAL_LIBRARY_LIST="
 libtensorflow
 libtesseract
 libtheora
+libtorch
 libtwolame
 libuavs3d
 libv4l2
@@ -2776,7 +2778,7 @@ cbs_vp9_select="cbs"
 deflate_wrapper_deps="zlib"
 dirac_parse_select="golomb"
 dovi_rpu_select="golomb"
-dnn_suggest="libtensorflow libopenvino"
+dnn_suggest="libtensorflow libopenvino libtorch"
 dnn_deps="avformat swscale"
 error_resilience_select="me_cmp"
 evcparse_select="golomb"
@@ -6872,6 +6874,7 @@ enabled libtensorflow && require libtensorflow 
tensorflow/c/c_api.h TF_Versi
 enabled libtesseract  && require_pkg_config libtesseract tesseract 
tesseract/capi.h TessBaseAPICreate
 enabled libtheora && require libtheora theora/theoraenc.h th_info_init 
-ltheoraenc -ltheoradec -logg
 enabled libtls&& require_pkg_config libtls libtls tls.h 
tls_configure
+enabled libtorch  && check_cxxflags -std=c++14 && require_cpp libtorch 
torch/torch.h "torch::Tensor" -ltorch -lc10 -ltorch_cpu -lstdc++ -lpthread
 enabled libtwolame&& require libtwolame twolame.h twolame_init 
-ltwolame &&
  { check_lib libtwolame twolame.h 
twolame_encode_buffer_float32_interleaved -ltwolame ||
die "ERROR: libtwolame must be installed and 
version must be >= 0.3.10"; }
diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile
index 5d5697ea42..3d09927c98 100644
--- a/libavfilter/dnn/Makefile
+++ b/libavfilter/dnn/Makefile
@@ -6,5 +6,6 @@ OBJS-$(CONFIG_DNN)   += 
dnn/dnn_backend_common.o
 
 DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o
 DNN-OBJS-$(CONFIG_LIBOPENVINO)   += dnn/dnn_backend_openvino.o
+DNN-OBJS-$(CONFIG_LIBTORCH)  += dnn/dnn_backend_torch.o
 
 OBJS-$(CONFIG_DNN)   += $(DNN-OBJS-yes)
diff --git a/libavfilter/dnn/dnn_backend_torch.cpp 
b/libavfilter/dnn/dnn_backend_torch.cpp
new file mode 100644
index 00..4fc76d0ce4
--- /dev/null
+++ b/libavfilter/dnn/dnn_backend_torch.cpp
@@ -0,0 +1,585 @@
+/*
+ * Copyright (c) 2024
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even th

[FFmpeg-devel] [PATCH 3/3] libavfilter/vf_dnn_detect: Use class confidence to filt boxes

2024-01-16 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Use class confidence instead of box_score to filt boxes, which is more
accurate. Class confidence is obtained by multiplying class probability
distribution and box_score.

Signed-off-by: Wenbin Chen 
---
 libavfilter/vf_dnn_detect.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index caccbf7a12..2bf5ed7476 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -236,9 +236,6 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, 
DNNData *output, int out
 conf = post_process_raw_data(
 detection_boxes_data[cy * cell_w + cx + 4 * 
cell_w * cell_h]);
 }
-if (conf < conf_threshold) {
-continue;
-}
 
 if (is_NHWC) {
 x = post_process_raw_data(detection_boxes_data[0]);
@@ -257,6 +254,9 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, 
DNNData *output, int out
 conf = conf * post_process_raw_data(
 detection_boxes_data[cy * cell_w + cx + 
(label_id + 5) * cell_w * cell_h]);
 }
+if (conf < conf_threshold) {
+continue;
+}
 
 bbox = av_mallocz(sizeof(*bbox));
 if (!bbox)
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 2/3] libavfilter/dnn_interface: use dims to represent shapes

2024-01-16 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

For detect and classify output, width and height make no sence, so
change width, height to dims to represent the shape of tensor. Use
layout and dims to get width, height and channel.

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_openvino.c | 80 ++
 libavfilter/dnn/dnn_backend_tf.c   | 32 +++
 libavfilter/dnn/dnn_io_proc.c  | 30 +++---
 libavfilter/dnn_interface.h| 17 +-
 libavfilter/vf_dnn_classify.c  |  6 +-
 libavfilter/vf_dnn_detect.c| 50 
 libavfilter/vf_dnn_processing.c| 21 ---
 7 files changed, 146 insertions(+), 90 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 590ddd586c..73b42c32b1 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -253,9 +253,9 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 ov_shape_free(&input_shape);
 return ov2_map_error(status, NULL);
 }
-input.height = dims[1];
-input.width = dims[2];
-input.channels = dims[3];
+for (int i = 0; i < input_shape.rank; i++)
+input.dims[i] = dims[i];
+input.layout = DL_NHWC;
 input.dt = precision_to_datatype(precision);
 #else
 status = ie_infer_request_get_blob(request->infer_request, 
task->input_name, &input_blob);
@@ -278,9 +278,9 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 av_log(ctx, AV_LOG_ERROR, "Failed to get input blob buffer\n");
 return DNN_GENERIC_ERROR;
 }
-input.height = dims.dims[2];
-input.width = dims.dims[3];
-input.channels = dims.dims[1];
+for (int i = 0; i < input_shape.rank; i++)
+input.dims[i] = dims[i];
+input.layout = DL_NCHW;
 input.data = blob_buffer.buffer;
 input.dt = precision_to_datatype(precision);
 #endif
@@ -339,8 +339,8 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 av_assert0(!"should not reach here");
 break;
 }
-input.data = (uint8_t *)input.data
- + input.width * input.height * input.channels * 
get_datatype_size(input.dt);
+input.data = (uint8_t *)input.data +
+input.dims[1] * input.dims[2] * input.dims[3] * 
get_datatype_size(input.dt);
 }
 #if HAVE_OPENVINO2
 ov_tensor_free(tensor);
@@ -403,10 +403,11 @@ static void infer_completion_callback(void *args)
 goto end;
 }
 outputs[i].dt   = precision_to_datatype(precision);
-
-outputs[i].channels = output_shape.rank > 2 ? dims[output_shape.rank - 
3] : 1;
-outputs[i].height   = output_shape.rank > 1 ? dims[output_shape.rank - 
2] : 1;
-outputs[i].width= output_shape.rank > 0 ? dims[output_shape.rank - 
1] : 1;
+outputs[i].layout   = DL_NCHW;
+outputs[i].dims[0]  = 1;
+outputs[i].dims[1]  = output_shape.rank > 2 ? dims[output_shape.rank - 
3] : 1;
+outputs[i].dims[2]  = output_shape.rank > 1 ? dims[output_shape.rank - 
2] : 1;
+outputs[i].dims[3]  = output_shape.rank > 0 ? dims[output_shape.rank - 
1] : 1;
 av_assert0(request->lltask_count <= dims[0]);
 outputs[i].layout   = ctx->options.layout;
 outputs[i].scale= ctx->options.scale;
@@ -445,9 +446,9 @@ static void infer_completion_callback(void *args)
 return;
 }
 output.data = blob_buffer.buffer;
-output.channels = dims.dims[1];
-output.height   = dims.dims[2];
-output.width= dims.dims[3];
+output.layout   = DL_NCHW;
+for (int i = 0; i < 4; i++)
+output.dims[i] = dims.dims[i];
 av_assert0(request->lltask_count <= dims.dims[0]);
 output.dt   = precision_to_datatype(precision);
 output.layout   = ctx->options.layout;
@@ -469,8 +470,10 @@ static void infer_completion_callback(void *args)
 ff_proc_from_dnn_to_frame(task->out_frame, outputs, ctx);
 }
 } else {
-task->out_frame->width = outputs[0].width;
-task->out_frame->height = outputs[0].height;
+task->out_frame->width =
+
outputs[0].dims[dnn_get_width_idx_by_layout(outputs[0].layout)];
+task->out_frame->height =
+
outputs[0].dims[dnn_get_height_idx_by_layout(outputs[0].layout)];
 }
 break;
 case DFT_ANALYTICS_DETECT:
@@ -501,7 +504,8 @@ static void infer_completion_callback(void *args)
 av_freep(&request->lltasks[i]);
 for (int i = 0; i < ov_model->nb_outputs; i++)
 outputs[i].data = (uint8_t *)outputs[i].data +
-outputs[i].width * outputs[i].height * outputs[i].channels * 
get_datatype_size(outputs[i].dt);
+outputs[i].dims[1] * outputs[i].dims[2] * outputs[i].dims[3] *
+

[FFmpeg-devel] [PATCH 1/3] libavfilter/dnn_bakcend_openvino: Add automatic input/output detection

2024-01-16 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Now when using openvino backend, user doesn't need to set input/output
names in command line. Model ports will be automatically detected.

For example:
ffmpeg -i input.png -vf \
dnn_detect=dnn_backend=openvino:model=model.xml:input=image:\
output=detection_out -y output.png

can be simplified to:
ffmpeg -i input.png -vf dnn_detect=dnn_backend=openvino:model=model.xml\
 -y output.png

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_openvino.c | 64 ++
 libavfilter/dnn_filter_common.c| 21 +
 2 files changed, 67 insertions(+), 18 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index e207d44584..590ddd586c 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -205,6 +205,7 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 ov_tensor_t* tensor = NULL;
 ov_shape_t input_shape = {0};
 ov_element_type_e precision;
+char *port_name;
 #else
 dimensions_t dims;
 precision_e precision;
@@ -223,11 +224,23 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 ov_output_const_port_free(ov_model->input_port);
 ov_model->input_port = NULL;
 }
-status = ov_model_const_input_by_name(ov_model->ov_model, 
task->input_name, &ov_model->input_port);
+if (task->input_name)
+status = ov_model_const_input_by_name(ov_model->ov_model, 
task->input_name, &ov_model->input_port);
+else
+status = ov_model_const_input(ov_model->ov_model, 
&ov_model->input_port);
 if (status != OK) {
 av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n");
 return ov2_map_error(status, NULL);
 }
+status = ov_port_get_any_name(ov_model->input_port, &port_name);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get input port name.\n");
+return ov2_map_error(status, NULL);
+}
+av_log(ctx, AV_LOG_VERBOSE, "OpenVINO model input: %s\n", port_name);
+ov_free(port_name);
+port_name = NULL;
+
 status = ov_const_port_get_shape(ov_model->input_port, &input_shape);
 if (status != OK) {
 av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n");
@@ -620,7 +633,10 @@ static int init_model_ov(OVModel *ov_model, const char 
*input_name, const char *
 goto err;
 }
 
-status = 
ov_preprocess_prepostprocessor_get_input_info_by_name(ov_model->preprocess, 
input_name, &ov_model->input_info);
+if (input_name)
+status = 
ov_preprocess_prepostprocessor_get_input_info_by_name(ov_model->preprocess, 
input_name, &ov_model->input_info);
+else
+status = 
ov_preprocess_prepostprocessor_get_input_info(ov_model->preprocess, 
&ov_model->input_info);
 if (status != OK) {
 av_log(ctx, AV_LOG_ERROR, "Failed to get input info from 
preprocess.\n");
 ret = ov2_map_error(status, NULL);
@@ -673,10 +689,24 @@ static int init_model_ov(OVModel *ov_model, const char 
*input_name, const char *
 goto err;
 }
 
+if (!nb_outputs) {
+size_t output_size;
+status = ov_model_outputs_size(ov_model->ov_model, &output_size);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get output size.\n");
+ret = ov2_map_error(status, NULL);
+goto err;
+}
+nb_outputs = output_size;
+}
 ov_model->nb_outputs = nb_outputs;
 for (int i = 0; i < nb_outputs; i++) {
-status = ov_preprocess_prepostprocessor_get_output_info_by_name(
-ov_model->preprocess, output_names[i], &ov_model->output_info);
+if (output_names)
+status = ov_preprocess_prepostprocessor_get_output_info_by_name(
+ov_model->preprocess, output_names[i], 
&ov_model->output_info);
+else
+status = ov_preprocess_prepostprocessor_get_output_info_by_index(
+ov_model->preprocess, i, &ov_model->output_info);
 if (status != OK) {
 av_log(ctx, AV_LOG_ERROR, "Failed to get output info from 
preprocess.\n");
 ret = ov2_map_error(status, NULL);
@@ -758,12 +788,25 @@ static int init_model_ov(OVModel *ov_model, const char 
*input_name, const char *
 }
 
 for (int i = 0; i < nb_outputs; i++) {
-status = ov_model_const_output_by_name(ov_model->ov_model, 
output_names[i],
-   &ov_model->output_ports[i]);
+char *port_name;
+if (output_names)
+status = ov_model_const_output_by_name(ov_model->ov_model, 
output_names[i],
+&ov_model->output_ports[i]);
+else
+status = ov_model_const_output_by_index(ov_model->ov_model, i,
+&ov_model->output_ports[i]);
 if (status != OK) {

[FFmpeg-devel] [PATCH 2/2] libavfilter/vf_dnn_detect: Add two outputs ssd support

2023-12-26 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

For this kind of model, we can directly use its output as final result
just like ssd model. The difference is that it splits output into two
tensors. [x_min, y_min, x_max, y_max, confidence] and [lable_id].

Model example refer to: 
https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/intel/person-detection-0106

Signed-off-by: Wenbin Chen 
---
 libavfilter/vf_dnn_detect.c | 64 +
 1 file changed, 50 insertions(+), 14 deletions(-)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 88865c8a8e..249cbba0f7 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -359,24 +359,48 @@ static int dnn_detect_post_proc_yolov3(AVFrame *frame, 
DNNData *output,
 return 0;
 }
 
-static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, 
AVFilterContext *filter_ctx)
+static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, int 
nb_outputs,
+AVFilterContext *filter_ctx)
 {
 DnnDetectContext *ctx = filter_ctx->priv;
 float conf_threshold = ctx->confidence;
-int proposal_count = output->height;
-int detect_size = output->width;
-float *detections = output->data;
+int proposal_count = 0;
+int detect_size = 0;
+float *detections = NULL, *labels = NULL;
 int nb_bboxes = 0;
 AVDetectionBBoxHeader *header;
 AVDetectionBBox *bbox;
-
-if (output->width != 7) {
+int scale_w = ctx->scale_width;
+int scale_h = ctx->scale_height;
+
+if (nb_outputs == 1 && output->width == 7) {
+proposal_count = output->height;
+detect_size = output->width;
+detections = output->data;
+} else if (nb_outputs == 2 && output[0].width == 5) {
+proposal_count = output[0].height;
+detect_size = output[0].width;
+detections = output[0].data;
+labels = output[1].data;
+} else if (nb_outputs == 2 && output[1].width == 5) {
+proposal_count = output[1].height;
+detect_size = output[1].width;
+detections = output[1].data;
+labels = output[0].data;
+} else {
 av_log(filter_ctx, AV_LOG_ERROR, "Model output shape doesn't match ssd 
requirement.\n");
 return AVERROR(EINVAL);
 }
 
+if (proposal_count == 0)
+return 0;
+
 for (int i = 0; i < proposal_count; ++i) {
-float conf = detections[i * detect_size + 2];
+float conf;
+if (nb_outputs == 1)
+conf = detections[i * detect_size + 2];
+else
+conf = detections[i * detect_size + 4];
 if (conf < conf_threshold) {
 continue;
 }
@@ -398,12 +422,24 @@ static int dnn_detect_post_proc_ssd(AVFrame *frame, 
DNNData *output, AVFilterCon
 
 for (int i = 0; i < proposal_count; ++i) {
 int av_unused image_id = (int)detections[i * detect_size + 0];
-int label_id = (int)detections[i * detect_size + 1];
-float conf   =  detections[i * detect_size + 2];
-float x0 =  detections[i * detect_size + 3];
-float y0 =  detections[i * detect_size + 4];
-float x1 =  detections[i * detect_size + 5];
-float y1 =  detections[i * detect_size + 6];
+int label_id;
+float conf, x0, y0, x1, y1;
+
+if (nb_outputs == 1) {
+label_id = (int)detections[i * detect_size + 1];
+conf = detections[i * detect_size + 2];
+x0   = detections[i * detect_size + 3];
+y0   = detections[i * detect_size + 4];
+x1   = detections[i * detect_size + 5];
+y1   = detections[i * detect_size + 6];
+} else {
+label_id = (int)labels[i];
+x0 =  detections[i * detect_size] / scale_w;
+y0 =  detections[i * detect_size + 1] / scale_h;
+x1 =  detections[i * detect_size + 2] / scale_w;
+y1 =  detections[i * detect_size + 3] / scale_h;
+conf   =  detections[i * detect_size + 4];
+}
 
 if (conf < conf_threshold) {
 continue;
@@ -447,7 +483,7 @@ static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData 
*output, int nb_outpu
 
 switch (ctx->model_type) {
 case DDMT_SSD:
-ret = dnn_detect_post_proc_ssd(frame, output, filter_ctx);
+ret = dnn_detect_post_proc_ssd(frame, output, nb_outputs, filter_ctx);
 if (ret < 0)
 return ret;
 break;
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/2] libavfilter/dnn_backend_openvino: Add dynamic output support

2023-12-26 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Add dynamic outputs support. Some models don't have fixed output size.
Its size changes according to result. Now openvino can run these kinds of
models.

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_openvino.c | 134 +++--
 1 file changed, 59 insertions(+), 75 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 671a995c70..e207d44584 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -219,31 +219,26 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 task = lltask->task;
 
 #if HAVE_OPENVINO2
-if (!ov_model_is_dynamic(ov_model->ov_model)) {
-if (ov_model->input_port) {
-ov_output_const_port_free(ov_model->input_port);
-ov_model->input_port = NULL;
-}
-status = ov_model_const_input_by_name(ov_model->ov_model, 
task->input_name, &ov_model->input_port);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n");
-return ov2_map_error(status, NULL);
-}
-status = ov_const_port_get_shape(ov_model->input_port, &input_shape);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n");
-return ov2_map_error(status, NULL);
-}
-dims = input_shape.dims;
-status = ov_port_get_element_type(ov_model->input_port, &precision);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Failed to get input port data type.\n");
-ov_shape_free(&input_shape);
-return ov2_map_error(status, NULL);
-}
-} else {
-avpriv_report_missing_feature(ctx, "Do not support dynamic model.");
-return AVERROR(ENOSYS);
+if (ov_model->input_port) {
+ov_output_const_port_free(ov_model->input_port);
+ov_model->input_port = NULL;
+}
+status = ov_model_const_input_by_name(ov_model->ov_model, 
task->input_name, &ov_model->input_port);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n");
+return ov2_map_error(status, NULL);
+}
+status = ov_const_port_get_shape(ov_model->input_port, &input_shape);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n");
+return ov2_map_error(status, NULL);
+}
+dims = input_shape.dims;
+status = ov_port_get_element_type(ov_model->input_port, &precision);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get input port data type.\n");
+ov_shape_free(&input_shape);
+return ov2_map_error(status, NULL);
 }
 input.height = dims[1];
 input.width = dims[2];
@@ -1049,30 +1044,22 @@ static int get_input_ov(void *model, DNNData *input, 
const char *input_name)
 ov_element_type_e precision;
 int64_t* dims;
 ov_status_e status;
-if (!ov_model_is_dynamic(ov_model->ov_model)) {
-status = ov_model_const_input_by_name(ov_model->ov_model, input_name, 
&ov_model->input_port);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n");
-return ov2_map_error(status, NULL);
-}
-
-status = ov_const_port_get_shape(ov_model->input_port, &input_shape);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n");
-return ov2_map_error(status, NULL);
-}
-dims = input_shape.dims;
-
-status = ov_port_get_element_type(ov_model->input_port, &precision);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Failed to get input port data type.\n");
-return ov2_map_error(status, NULL);
-}
-} else {
-avpriv_report_missing_feature(ctx, "Do not support dynamic model 
now.");
-return AVERROR(ENOSYS);
+status = ov_model_const_input_by_name(ov_model->ov_model, input_name, 
&ov_model->input_port);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n");
+return ov2_map_error(status, NULL);
 }
-
+status = ov_port_get_element_type(ov_model->input_port, &precision);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get input port data type.\n");
+return ov2_map_error(status, NULL);
+}
+status = ov_const_port_get_shape(ov_model->input_port, &input_shape);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get input port shape.\n");
+return ov2_map_error(status, NULL);
+}
+dims = input_shape.dims;
 if (dims[1] <= 3) { // NCHW
 input->channels = dims[1];
 input->height   = input_resizable ? -1 : dims[2];
@@ -1083,7 +1070,7 @@ static int get_input_ov(void *model, DNNData *input, 
const char *input_name)
 input->channels = dims

[FFmpeg-devel] [PATCH 2/2] libavfilter/vf_dnn_detect: Add initialized value to function pointer

2023-12-17 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 libavfilter/vf_dnn_detect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 52d5c3d798..88865c8a8e 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -157,7 +157,7 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, 
DNNData *output, int out
 float *output_data = output[output_index].data;
 float *anchors = ctx->anchors;
 AVDetectionBBox *bbox;
-float (*post_process_raw_data)(float x);
+float (*post_process_raw_data)(float x) = linear;
 int is_NHWC = 0;
 
 if (ctx->model_type == DDMT_YOLOV1V2) {
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/2] libavfilter/vf_dnn_detect: Fix a control flow issue

2023-12-17 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 libavfilter/vf_dnn_detect.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index fcc64118b6..52d5c3d798 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -455,11 +455,13 @@ static int dnn_detect_post_proc_ov(AVFrame *frame, 
DNNData *output, int nb_outpu
 ret = dnn_detect_post_proc_yolo(frame, output, filter_ctx);
 if (ret < 0)
 return ret;
+break;
 case DDMT_YOLOV3:
 case DDMT_YOLOV4:
 ret = dnn_detect_post_proc_yolov3(frame, output, filter_ctx, 
nb_outputs);
 if (ret < 0)
 return ret;
+break;
 }
 return 0;
 }
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 4/4] libavfilter/vf_dnn_detect: Set used pointer to NULL

2023-12-13 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Set used pointer to NULL in case it leaks the storage.

Signed-off-by: Wenbin Chen 
---
 libavfilter/vf_dnn_detect.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 5668b8b017..3464af86c8 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -223,6 +223,7 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, 
DNNData *output, int out
 av_freep(&bbox);
 return AVERROR(ENOMEM);
 }
+bbox = NULL;
 }
 }
 return 0;
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 3/4] libavfilter/vf_dnn_detect: Fix uninitialized variables error

2023-12-13 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 libavfilter/vf_dnn_detect.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index b2e9b8d4c6..5668b8b017 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -139,7 +139,8 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, 
DNNData *output, int out
 {
 DnnDetectContext *ctx = filter_ctx->priv;
 float conf_threshold = ctx->confidence;
-int detection_boxes, box_size, cell_w, cell_h, scale_w, scale_h;
+int detection_boxes, box_size;
+int cell_w = 0, cell_h = 0, scale_w = 0, scale_h = 0;
 int nb_classes = ctx->nb_classes;
 float *output_data = output[output_index].data;
 float *anchors = ctx->anchors;
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 2/4] libavfilter/vf_dnn_detect: Add NULL pointer check

2023-12-13 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 libavfilter/vf_dnn_detect.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index b82916ce6d..b2e9b8d4c6 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -112,6 +112,10 @@ static int dnn_detect_parse_anchors(char *anchors_str, 
float **anchors)
 }
 for (int i = 0; i < nb_anchor; i++) {
 token = av_strtok(anchors_str, "&", &saveptr);
+if (!token) {
+av_freep(&anchors_buf);
+return 0;
+}
 anchors_buf[i] = strtof(token, NULL);
 anchors_str = NULL;
 }
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/4] libavfilter/vf_dnn_detect: Fix an incorrect expression

2023-12-13 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 libavfilter/vf_dnn_detect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 7ac3bb0b58..b82916ce6d 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -106,7 +106,7 @@ static int dnn_detect_parse_anchors(char *anchors_str, 
float **anchors)
 i++;
 }
 nb_anchor++;
-anchors_buf = av_mallocz(nb_anchor * sizeof(*anchors));
+anchors_buf = av_mallocz(nb_anchor * sizeof(**anchors));
 if (!anchors_buf) {
 return 0;
 }
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2 3/4] libavfilter/vf_dnn_detect: Add yolov3 support

2023-12-11 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Add yolov3 support. The difference of yolov3 is that it has multiple
outputs in different scale to perform better on both large and small
object.

The model detail refer to: 
https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v3-tf

Signed-off-by: Wenbin Chen 
---
 libavfilter/vf_dnn_detect.c | 28 +++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 86f61c9907..7a32b191c3 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -35,6 +35,7 @@
 typedef enum {
 DDMT_SSD,
 DDMT_YOLOV1V2,
+DDMT_YOLOV3
 } DNNDetectionModelType;
 
 typedef struct DnnDetectContext {
@@ -73,6 +74,7 @@ static const AVOption dnn_detect_options[] = {
 { "model_type",  "DNN detection model type",   OFFSET2(model_type),  
AV_OPT_TYPE_INT,   { .i64 = DDMT_SSD },INT_MIN, INT_MAX, FLAGS, 
"model_type" },
 { "ssd", "output shape [1, 1, N, 7]",  0,
AV_OPT_TYPE_CONST,   { .i64 = DDMT_SSD },0, 0, FLAGS, "model_type" },
 { "yolo","output shape [1, N*Cx*Cy*DetectionBox]",  0,   
AV_OPT_TYPE_CONST,   { .i64 = DDMT_YOLOV1V2 },0, 0, FLAGS, "model_type" 
},
+{ "yolov3",  "outputs shape [1, N*D, Cx, Cy]",  0,   
AV_OPT_TYPE_CONST,   { .i64 = DDMT_YOLOV3 },  0, 0, FLAGS, "model_type" 
},
 { "cell_w",  "cell width", OFFSET2(cell_w),  
AV_OPT_TYPE_INT,   { .i64 = 0 },0, INTMAX_MAX, FLAGS },
 { "cell_h",  "cell height",OFFSET2(cell_h),  
AV_OPT_TYPE_INT,   { .i64 = 0 },0, INTMAX_MAX, FLAGS },
 { "nb_classes",  "The number of class",OFFSET2(nb_classes),  
AV_OPT_TYPE_INT,   { .i64 = 0 },0, INTMAX_MAX, FLAGS },
@@ -146,6 +148,11 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, 
DNNData *output, int out
 cell_h = ctx->cell_h;
 scale_w = cell_w;
 scale_h = cell_h;
+} else {
+cell_w = output[output_index].width;
+cell_h = output[output_index].height;
+scale_w = ctx->scale_width;
+scale_h = ctx->scale_height;
 }
 box_size = nb_classes + 5;
 
@@ -173,6 +180,7 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, 
DNNData *output, int out
   output[output_index].height *
   output[output_index].width / box_size / cell_w / cell_h;
 
+anchors = anchors + (detection_boxes * output_index * 2);
 /**
  * find all candidate bbox
  * yolo output can be reshaped to [B, N*D, Cx, Cy]
@@ -284,6 +292,21 @@ static int dnn_detect_post_proc_yolo(AVFrame *frame, 
DNNData *output, AVFilterCo
 return 0;
 }
 
+static int dnn_detect_post_proc_yolov3(AVFrame *frame, DNNData *output,
+   AVFilterContext *filter_ctx, int 
nb_outputs)
+{
+int ret = 0;
+for (int i = 0; i < nb_outputs; i++) {
+ret = dnn_detect_parse_yolo_output(frame, output, i, filter_ctx);
+if (ret < 0)
+return ret;
+}
+ret = dnn_detect_fill_side_data(frame, filter_ctx);
+if (ret < 0)
+return ret;
+return 0;
+}
+
 static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, 
AVFilterContext *filter_ctx)
 {
 DnnDetectContext *ctx = filter_ctx->priv;
@@ -380,8 +403,11 @@ static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData 
*output, int nb_outpu
 ret = dnn_detect_post_proc_yolo(frame, output, filter_ctx);
 if (ret < 0)
 return ret;
+case DDMT_YOLOV3:
+ret = dnn_detect_post_proc_yolov3(frame, output, filter_ctx, 
nb_outputs);
+if (ret < 0)
+return ret;
 }
-
 return 0;
 }
 
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2 1/4] libavfiter/dnn_backend_openvino: Add multiple output support

2023-12-11 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Add multiple output support to openvino backend. You can use '&' to
split different output when you set output name using command line.

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_common.c   |   7 -
 libavfilter/dnn/dnn_backend_openvino.c | 216 +
 libavfilter/vf_dnn_detect.c|  11 +-
 3 files changed, 150 insertions(+), 84 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_common.c 
b/libavfilter/dnn/dnn_backend_common.c
index 91a4a3c4bf..632832ec36 100644
--- a/libavfilter/dnn/dnn_backend_common.c
+++ b/libavfilter/dnn/dnn_backend_common.c
@@ -43,13 +43,6 @@ int ff_check_exec_params(void *ctx, DNNBackendType backend, 
DNNFunctionType func
 return AVERROR(EINVAL);
 }
 
-if (exec_params->nb_output != 1 && backend != DNN_TF) {
-// currently, the filter does not need multiple outputs,
-// so we just pending the support until we really need it.
-avpriv_report_missing_feature(ctx, "multiple outputs");
-return AVERROR(ENOSYS);
-}
-
 return 0;
 }
 
diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 6fe8b9c243..089e028818 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -64,7 +64,7 @@ typedef struct OVModel{
 ov_compiled_model_t *compiled_model;
 ov_output_const_port_t* input_port;
 ov_preprocess_input_info_t* input_info;
-ov_output_const_port_t* output_port;
+ov_output_const_port_t** output_ports;
 ov_preprocess_output_info_t* output_info;
 ov_preprocess_prepostprocessor_t* preprocess;
 #else
@@ -77,6 +77,7 @@ typedef struct OVModel{
 SafeQueue *request_queue;   // holds OVRequestItem
 Queue *task_queue;  // holds TaskItem
 Queue *lltask_queue; // holds LastLevelTaskItem
+int nb_outputs;
 } OVModel;
 
 // one request for one call to openvino
@@ -349,7 +350,7 @@ static void infer_completion_callback(void *args)
 TaskItem *task = lltask->task;
 OVModel *ov_model = task->model;
 SafeQueue *requestq = ov_model->request_queue;
-DNNData output;
+DNNData *outputs;
 OVContext *ctx = &ov_model->ctx;
 #if HAVE_OPENVINO2
 size_t* dims;
@@ -358,45 +359,61 @@ static void infer_completion_callback(void *args)
 ov_shape_t output_shape = {0};
 ov_element_type_e precision;
 
-memset(&output, 0, sizeof(output));
-status = 
ov_infer_request_get_output_tensor_by_index(request->infer_request, 0, 
&output_tensor);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR,
-   "Failed to get output tensor.");
+outputs = av_calloc(ov_model->nb_outputs, sizeof(*outputs));
+if (!outputs) {
+av_log(ctx, AV_LOG_ERROR, "Failed to alloc outputs.");
 return;
 }
 
-status = ov_tensor_data(output_tensor, &output.data);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR,
-   "Failed to get output data.");
-return;
-}
+for (int i = 0; i < ov_model->nb_outputs; i++) {
+status = 
ov_infer_request_get_tensor_by_const_port(request->infer_request,
+   
ov_model->output_ports[i],
+   &output_tensor);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR,
+"Failed to get output tensor.");
+goto end;
+}
 
-status = ov_tensor_get_shape(output_tensor, &output_shape);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Failed to get output port shape.\n");
-return;
-}
-dims = output_shape.dims;
+status = ov_tensor_data(output_tensor, &outputs[i].data);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR,
+"Failed to get output data.");
+goto end;
+}
 
-status = ov_port_get_element_type(ov_model->output_port, &precision);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Failed to get output port data type.\n");
+status = ov_tensor_get_shape(output_tensor, &output_shape);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get output port shape.\n");
+goto end;
+}
+dims = output_shape.dims;
+
+status = ov_port_get_element_type(ov_model->output_ports[i], 
&precision);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get output port data 
type.\n");
+goto end;
+}
+outputs[i].dt   = precision_to_datatype(precision);
+
+outputs[i].channels = output_shape.rank > 2 ? dims[output_shape.rank - 
3] : 1;
+outputs[i].height   = output_shape.rank > 1 ? dims[output_shape.rank - 
2] : 1;
+outputs[i].width= output_shape.rank > 0 ? dims[output_shape.rank - 
1] : 1;
+av_assert0(request->lltask_count <= dims[0]);
+outputs[i].layou

[FFmpeg-devel] [PATCH v2 4/4] libavfilter/vf_dnn_detect: Add yolov4 support

2023-12-11 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

The difference of yolov4 is that sigmoid function needed to be applied
on x, y coordinates. Also make it compatiple with NHWC output as the
yolov4 model from openvino model zoo has NHWC output layout.

Model refer to: 
https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v4-tf

Signed-off-by: Wenbin Chen 
---
 libavfilter/vf_dnn_detect.c | 71 ++---
 1 file changed, 59 insertions(+), 12 deletions(-)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 7a32b191c3..1b04a2cb98 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -35,7 +35,8 @@
 typedef enum {
 DDMT_SSD,
 DDMT_YOLOV1V2,
-DDMT_YOLOV3
+DDMT_YOLOV3,
+DDMT_YOLOV4
 } DNNDetectionModelType;
 
 typedef struct DnnDetectContext {
@@ -75,6 +76,7 @@ static const AVOption dnn_detect_options[] = {
 { "ssd", "output shape [1, 1, N, 7]",  0,
AV_OPT_TYPE_CONST,   { .i64 = DDMT_SSD },0, 0, FLAGS, "model_type" },
 { "yolo","output shape [1, N*Cx*Cy*DetectionBox]",  0,   
AV_OPT_TYPE_CONST,   { .i64 = DDMT_YOLOV1V2 },0, 0, FLAGS, "model_type" 
},
 { "yolov3",  "outputs shape [1, N*D, Cx, Cy]",  0,   
AV_OPT_TYPE_CONST,   { .i64 = DDMT_YOLOV3 },  0, 0, FLAGS, "model_type" 
},
+{ "yolov4",  "outputs shape [1, N*D, Cx, Cy]",  0,   
AV_OPT_TYPE_CONST,   { .i64 = DDMT_YOLOV4 },0, 0, FLAGS, "model_type" },
 { "cell_w",  "cell width", OFFSET2(cell_w),  
AV_OPT_TYPE_INT,   { .i64 = 0 },0, INTMAX_MAX, FLAGS },
 { "cell_h",  "cell height",OFFSET2(cell_h),  
AV_OPT_TYPE_INT,   { .i64 = 0 },0, INTMAX_MAX, FLAGS },
 { "nb_classes",  "The number of class",OFFSET2(nb_classes),  
AV_OPT_TYPE_INT,   { .i64 = 0 },0, INTMAX_MAX, FLAGS },
@@ -84,6 +86,14 @@ static const AVOption dnn_detect_options[] = {
 
 AVFILTER_DEFINE_CLASS(dnn_detect);
 
+static inline float sigmoid(float x) {
+return 1.f / (1.f + exp(-x));
+}
+
+static inline float linear(float x) {
+return x;
+}
+
 static int dnn_detect_get_label_id(int nb_classes, int cell_size, float 
*label_data)
 {
 float max_prob = 0;
@@ -142,6 +152,8 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, 
DNNData *output, int out
 float *output_data = output[output_index].data;
 float *anchors = ctx->anchors;
 AVDetectionBBox *bbox;
+float (*post_process_raw_data)(float x);
+int is_NHWC = 0;
 
 if (ctx->model_type == DDMT_YOLOV1V2) {
 cell_w = ctx->cell_w;
@@ -149,13 +161,30 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, 
DNNData *output, int out
 scale_w = cell_w;
 scale_h = cell_h;
 } else {
-cell_w = output[output_index].width;
-cell_h = output[output_index].height;
+if (output[output_index].height != output[output_index].width &&
+output[output_index].height == output[output_index].channels) {
+is_NHWC = 1;
+cell_w = output[output_index].height;
+cell_h = output[output_index].channels;
+} else {
+cell_w = output[output_index].width;
+cell_h = output[output_index].height;
+}
 scale_w = ctx->scale_width;
 scale_h = ctx->scale_height;
 }
 box_size = nb_classes + 5;
 
+switch (ctx->model_type) {
+case DDMT_YOLOV1V2:
+case DDMT_YOLOV3:
+post_process_raw_data = linear;
+break;
+case DDMT_YOLOV4:
+post_process_raw_data = sigmoid;
+ break;
+}
+
 if (!cell_h || !cell_w) {
 av_log(filter_ctx, AV_LOG_ERROR, "cell_w and cell_h are detected\n");
 return AVERROR(EINVAL);
@@ -193,19 +222,36 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, 
DNNData *output, int out
 float *detection_boxes_data;
 int label_id;
 
-detection_boxes_data = output_data + box_id * box_size * 
cell_w * cell_h;
-conf = detection_boxes_data[cy * cell_w + cx + 4 * cell_w * 
cell_h];
+if (is_NHWC) {
+detection_boxes_data = output_data +
+((cy * cell_w + cx) * detection_boxes + box_id) * 
box_size;
+conf = post_process_raw_data(detection_boxes_data[4]);
+} else {
+detection_boxes_data = output_data + box_id * box_size * 
cell_w * cell_h;
+conf = post_process_raw_data(
+detection_boxes_data[cy * cell_w + cx + 4 * 
cell_w * cell_h]);
+}
 if (conf < conf_threshold) {
 continue;
 }
 
-x= detection_boxes_data[cy * cell_w + cx];
-y= detection_boxes_data[cy * cell_w + cx + cel

[FFmpeg-devel] [PATCH v2 2/4] libavfilter/vf_dnn_detect: Add input pad

2023-12-11 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Add input pad to get model input resolution. Detection models always
have fixed input size. And the output coordinators are based on the
input resolution, so we need to get input size to map coordinators to
our real output frames.

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_openvino.c | 24 --
 libavfilter/vf_dnn_detect.c| 28 +-
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 089e028818..671a995c70 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -1073,9 +1073,15 @@ static int get_input_ov(void *model, DNNData *input, 
const char *input_name)
 return AVERROR(ENOSYS);
 }
 
-input->channels = dims[1];
-input->height   = input_resizable ? -1 : dims[2];
-input->width= input_resizable ? -1 : dims[3];
+if (dims[1] <= 3) { // NCHW
+input->channels = dims[1];
+input->height   = input_resizable ? -1 : dims[2];
+input->width= input_resizable ? -1 : dims[3];
+} else { // NHWC
+input->height   = input_resizable ? -1 : dims[1];
+input->width= input_resizable ? -1 : dims[2];
+input->channels = dims[3];
+}
 input->dt   = precision_to_datatype(precision);
 
 return 0;
@@ -1105,9 +,15 @@ static int get_input_ov(void *model, DNNData *input, 
const char *input_name)
 return DNN_GENERIC_ERROR;
 }
 
-input->channels = dims.dims[1];
-input->height   = input_resizable ? -1 : dims.dims[2];
-input->width= input_resizable ? -1 : dims.dims[3];
+if (dims[1] <= 3) { // NCHW
+input->channels = dims[1];
+input->height   = input_resizable ? -1 : dims[2];
+input->width= input_resizable ? -1 : dims[3];
+} else { // NHWC
+input->height   = input_resizable ? -1 : dims[1];
+input->width= input_resizable ? -1 : dims[2];
+input->channels = dims[3];
+}
 input->dt   = precision_to_datatype(precision);
 return 0;
 }
diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 373dda58bf..86f61c9907 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -699,13 +699,39 @@ static av_cold void dnn_detect_uninit(AVFilterContext 
*context)
 free_detect_labels(ctx);
 }
 
+static int config_input(AVFilterLink *inlink)
+{
+AVFilterContext *context = inlink->dst;
+DnnDetectContext *ctx = context->priv;
+DNNData model_input;
+int ret;
+
+ret = ff_dnn_get_input(&ctx->dnnctx, &model_input);
+if (ret != 0) {
+av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n");
+return ret;
+}
+ctx->scale_width = model_input.width == -1 ? inlink->w : model_input.width;
+ctx->scale_height = model_input.height ==  -1 ? inlink->h : 
model_input.height;
+
+return 0;
+}
+
+static const AVFilterPad dnn_detect_inputs[] = {
+{
+.name = "default",
+.type = AVMEDIA_TYPE_VIDEO,
+.config_props = config_input,
+},
+};
+
 const AVFilter ff_vf_dnn_detect = {
 .name  = "dnn_detect",
 .description   = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the 
input."),
 .priv_size = sizeof(DnnDetectContext),
 .init  = dnn_detect_init,
 .uninit= dnn_detect_uninit,
-FILTER_INPUTS(ff_video_default_filterpad),
+FILTER_INPUTS(dnn_detect_inputs),
 FILTER_OUTPUTS(ff_video_default_filterpad),
 FILTER_PIXFMTS_ARRAY(pix_fmts),
 .priv_class= &dnn_detect_class,
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 4/4] libavfilter/vf_dnn_detect: Add yolov4 support

2023-12-03 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

The difference of yolov4 is that sigmoid function needed to be applied
on x, y coordinates. Also make it compatiple with NHWC output as the
yolov4 model from openvino model zoo has NHWC output layout.

Model refer to: 
https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v4-tf

Signed-off-by: Wenbin Chen 
---
 libavfilter/vf_dnn_detect.c | 71 ++---
 1 file changed, 59 insertions(+), 12 deletions(-)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 7a32b191c3..1b04a2cb98 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -35,7 +35,8 @@
 typedef enum {
 DDMT_SSD,
 DDMT_YOLOV1V2,
-DDMT_YOLOV3
+DDMT_YOLOV3,
+DDMT_YOLOV4
 } DNNDetectionModelType;
 
 typedef struct DnnDetectContext {
@@ -75,6 +76,7 @@ static const AVOption dnn_detect_options[] = {
 { "ssd", "output shape [1, 1, N, 7]",  0,
AV_OPT_TYPE_CONST,   { .i64 = DDMT_SSD },0, 0, FLAGS, "model_type" },
 { "yolo","output shape [1, N*Cx*Cy*DetectionBox]",  0,   
AV_OPT_TYPE_CONST,   { .i64 = DDMT_YOLOV1V2 },0, 0, FLAGS, "model_type" 
},
 { "yolov3",  "outputs shape [1, N*D, Cx, Cy]",  0,   
AV_OPT_TYPE_CONST,   { .i64 = DDMT_YOLOV3 },  0, 0, FLAGS, "model_type" 
},
+{ "yolov4",  "outputs shape [1, N*D, Cx, Cy]",  0,   
AV_OPT_TYPE_CONST,   { .i64 = DDMT_YOLOV4 },0, 0, FLAGS, "model_type" },
 { "cell_w",  "cell width", OFFSET2(cell_w),  
AV_OPT_TYPE_INT,   { .i64 = 0 },0, INTMAX_MAX, FLAGS },
 { "cell_h",  "cell height",OFFSET2(cell_h),  
AV_OPT_TYPE_INT,   { .i64 = 0 },0, INTMAX_MAX, FLAGS },
 { "nb_classes",  "The number of class",OFFSET2(nb_classes),  
AV_OPT_TYPE_INT,   { .i64 = 0 },0, INTMAX_MAX, FLAGS },
@@ -84,6 +86,14 @@ static const AVOption dnn_detect_options[] = {
 
 AVFILTER_DEFINE_CLASS(dnn_detect);
 
+static inline float sigmoid(float x) {
+return 1.f / (1.f + exp(-x));
+}
+
+static inline float linear(float x) {
+return x;
+}
+
 static int dnn_detect_get_label_id(int nb_classes, int cell_size, float 
*label_data)
 {
 float max_prob = 0;
@@ -142,6 +152,8 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, 
DNNData *output, int out
 float *output_data = output[output_index].data;
 float *anchors = ctx->anchors;
 AVDetectionBBox *bbox;
+float (*post_process_raw_data)(float x);
+int is_NHWC = 0;
 
 if (ctx->model_type == DDMT_YOLOV1V2) {
 cell_w = ctx->cell_w;
@@ -149,13 +161,30 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, 
DNNData *output, int out
 scale_w = cell_w;
 scale_h = cell_h;
 } else {
-cell_w = output[output_index].width;
-cell_h = output[output_index].height;
+if (output[output_index].height != output[output_index].width &&
+output[output_index].height == output[output_index].channels) {
+is_NHWC = 1;
+cell_w = output[output_index].height;
+cell_h = output[output_index].channels;
+} else {
+cell_w = output[output_index].width;
+cell_h = output[output_index].height;
+}
 scale_w = ctx->scale_width;
 scale_h = ctx->scale_height;
 }
 box_size = nb_classes + 5;
 
+switch (ctx->model_type) {
+case DDMT_YOLOV1V2:
+case DDMT_YOLOV3:
+post_process_raw_data = linear;
+break;
+case DDMT_YOLOV4:
+post_process_raw_data = sigmoid;
+ break;
+}
+
 if (!cell_h || !cell_w) {
 av_log(filter_ctx, AV_LOG_ERROR, "cell_w and cell_h are detected\n");
 return AVERROR(EINVAL);
@@ -193,19 +222,36 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, 
DNNData *output, int out
 float *detection_boxes_data;
 int label_id;
 
-detection_boxes_data = output_data + box_id * box_size * 
cell_w * cell_h;
-conf = detection_boxes_data[cy * cell_w + cx + 4 * cell_w * 
cell_h];
+if (is_NHWC) {
+detection_boxes_data = output_data +
+((cy * cell_w + cx) * detection_boxes + box_id) * 
box_size;
+conf = post_process_raw_data(detection_boxes_data[4]);
+} else {
+detection_boxes_data = output_data + box_id * box_size * 
cell_w * cell_h;
+conf = post_process_raw_data(
+detection_boxes_data[cy * cell_w + cx + 4 * 
cell_w * cell_h]);
+}
 if (conf < conf_threshold) {
 continue;
 }
 
-x= detection_boxes_data[cy * cell_w + cx];
-y= detection_boxes_data[cy * cell_w + cx + cel

[FFmpeg-devel] [PATCH 3/4] libavfilter/vf_dnn_detect: Add yolov3 support

2023-12-03 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Add yolov3 support. The difference of yolov3 is that it has multiple
outputs in different scale to perform better on both large and small
object.

The model detail refer to: 
https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v3-tf

Signed-off-by: Wenbin Chen 
---
 libavfilter/vf_dnn_detect.c | 28 +++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 86f61c9907..7a32b191c3 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -35,6 +35,7 @@
 typedef enum {
 DDMT_SSD,
 DDMT_YOLOV1V2,
+DDMT_YOLOV3
 } DNNDetectionModelType;
 
 typedef struct DnnDetectContext {
@@ -73,6 +74,7 @@ static const AVOption dnn_detect_options[] = {
 { "model_type",  "DNN detection model type",   OFFSET2(model_type),  
AV_OPT_TYPE_INT,   { .i64 = DDMT_SSD },INT_MIN, INT_MAX, FLAGS, 
"model_type" },
 { "ssd", "output shape [1, 1, N, 7]",  0,
AV_OPT_TYPE_CONST,   { .i64 = DDMT_SSD },0, 0, FLAGS, "model_type" },
 { "yolo","output shape [1, N*Cx*Cy*DetectionBox]",  0,   
AV_OPT_TYPE_CONST,   { .i64 = DDMT_YOLOV1V2 },0, 0, FLAGS, "model_type" 
},
+{ "yolov3",  "outputs shape [1, N*D, Cx, Cy]",  0,   
AV_OPT_TYPE_CONST,   { .i64 = DDMT_YOLOV3 },  0, 0, FLAGS, "model_type" 
},
 { "cell_w",  "cell width", OFFSET2(cell_w),  
AV_OPT_TYPE_INT,   { .i64 = 0 },0, INTMAX_MAX, FLAGS },
 { "cell_h",  "cell height",OFFSET2(cell_h),  
AV_OPT_TYPE_INT,   { .i64 = 0 },0, INTMAX_MAX, FLAGS },
 { "nb_classes",  "The number of class",OFFSET2(nb_classes),  
AV_OPT_TYPE_INT,   { .i64 = 0 },0, INTMAX_MAX, FLAGS },
@@ -146,6 +148,11 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, 
DNNData *output, int out
 cell_h = ctx->cell_h;
 scale_w = cell_w;
 scale_h = cell_h;
+} else {
+cell_w = output[output_index].width;
+cell_h = output[output_index].height;
+scale_w = ctx->scale_width;
+scale_h = ctx->scale_height;
 }
 box_size = nb_classes + 5;
 
@@ -173,6 +180,7 @@ static int dnn_detect_parse_yolo_output(AVFrame *frame, 
DNNData *output, int out
   output[output_index].height *
   output[output_index].width / box_size / cell_w / cell_h;
 
+anchors = anchors + (detection_boxes * output_index * 2);
 /**
  * find all candidate bbox
  * yolo output can be reshaped to [B, N*D, Cx, Cy]
@@ -284,6 +292,21 @@ static int dnn_detect_post_proc_yolo(AVFrame *frame, 
DNNData *output, AVFilterCo
 return 0;
 }
 
+static int dnn_detect_post_proc_yolov3(AVFrame *frame, DNNData *output,
+   AVFilterContext *filter_ctx, int 
nb_outputs)
+{
+int ret = 0;
+for (int i = 0; i < nb_outputs; i++) {
+ret = dnn_detect_parse_yolo_output(frame, output, i, filter_ctx);
+if (ret < 0)
+return ret;
+}
+ret = dnn_detect_fill_side_data(frame, filter_ctx);
+if (ret < 0)
+return ret;
+return 0;
+}
+
 static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, 
AVFilterContext *filter_ctx)
 {
 DnnDetectContext *ctx = filter_ctx->priv;
@@ -380,8 +403,11 @@ static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData 
*output, int nb_outpu
 ret = dnn_detect_post_proc_yolo(frame, output, filter_ctx);
 if (ret < 0)
 return ret;
+case DDMT_YOLOV3:
+ret = dnn_detect_post_proc_yolov3(frame, output, filter_ctx, 
nb_outputs);
+if (ret < 0)
+return ret;
 }
-
 return 0;
 }
 
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 2/4] libavfilter/vf_dnn_detect: Add input pad

2023-12-03 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Add input pad to get model input resolution. Detection models always
have fixed input size. And the output coordinators are based on the
input resolution, so we need to get input size to map coordinators to
our real output frames.

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_openvino.c | 24 --
 libavfilter/vf_dnn_detect.c| 28 +-
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 089e028818..671a995c70 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -1073,9 +1073,15 @@ static int get_input_ov(void *model, DNNData *input, 
const char *input_name)
 return AVERROR(ENOSYS);
 }
 
-input->channels = dims[1];
-input->height   = input_resizable ? -1 : dims[2];
-input->width= input_resizable ? -1 : dims[3];
+if (dims[1] <= 3) { // NCHW
+input->channels = dims[1];
+input->height   = input_resizable ? -1 : dims[2];
+input->width= input_resizable ? -1 : dims[3];
+} else { // NHWC
+input->height   = input_resizable ? -1 : dims[1];
+input->width= input_resizable ? -1 : dims[2];
+input->channels = dims[3];
+}
 input->dt   = precision_to_datatype(precision);
 
 return 0;
@@ -1105,9 +,15 @@ static int get_input_ov(void *model, DNNData *input, 
const char *input_name)
 return DNN_GENERIC_ERROR;
 }
 
-input->channels = dims.dims[1];
-input->height   = input_resizable ? -1 : dims.dims[2];
-input->width= input_resizable ? -1 : dims.dims[3];
+if (dims[1] <= 3) { // NCHW
+input->channels = dims[1];
+input->height   = input_resizable ? -1 : dims[2];
+input->width= input_resizable ? -1 : dims[3];
+} else { // NHWC
+input->height   = input_resizable ? -1 : dims[1];
+input->width= input_resizable ? -1 : dims[2];
+input->channels = dims[3];
+}
 input->dt   = precision_to_datatype(precision);
 return 0;
 }
diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 373dda58bf..86f61c9907 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -699,13 +699,39 @@ static av_cold void dnn_detect_uninit(AVFilterContext 
*context)
 free_detect_labels(ctx);
 }
 
+static int config_input(AVFilterLink *inlink)
+{
+AVFilterContext *context = inlink->dst;
+DnnDetectContext *ctx = context->priv;
+DNNData model_input;
+int ret;
+
+ret = ff_dnn_get_input(&ctx->dnnctx, &model_input);
+if (ret != 0) {
+av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n");
+return ret;
+}
+ctx->scale_width = model_input.width == -1 ? inlink->w : model_input.width;
+ctx->scale_height = model_input.height ==  -1 ? inlink->h : 
model_input.height;
+
+return 0;
+}
+
+static const AVFilterPad dnn_detect_inputs[] = {
+{
+.name = "default",
+.type = AVMEDIA_TYPE_VIDEO,
+.config_props = config_input,
+},
+};
+
 const AVFilter ff_vf_dnn_detect = {
 .name  = "dnn_detect",
 .description   = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the 
input."),
 .priv_size = sizeof(DnnDetectContext),
 .init  = dnn_detect_init,
 .uninit= dnn_detect_uninit,
-FILTER_INPUTS(ff_video_default_filterpad),
+FILTER_INPUTS(dnn_detect_inputs),
 FILTER_OUTPUTS(ff_video_default_filterpad),
 FILTER_PIXFMTS_ARRAY(pix_fmts),
 .priv_class= &dnn_detect_class,
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/4] libavfiter/dnn/dnn_backend_openvino: add multiple output support

2023-12-03 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Add multiple output support to openvino backend. You can use '&' to
split different output when you set output name using command line.

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_common.c   |   7 -
 libavfilter/dnn/dnn_backend_openvino.c | 216 +
 libavfilter/vf_dnn_detect.c|  11 +-
 3 files changed, 150 insertions(+), 84 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_common.c 
b/libavfilter/dnn/dnn_backend_common.c
index 91a4a3c4bf..632832ec36 100644
--- a/libavfilter/dnn/dnn_backend_common.c
+++ b/libavfilter/dnn/dnn_backend_common.c
@@ -43,13 +43,6 @@ int ff_check_exec_params(void *ctx, DNNBackendType backend, 
DNNFunctionType func
 return AVERROR(EINVAL);
 }
 
-if (exec_params->nb_output != 1 && backend != DNN_TF) {
-// currently, the filter does not need multiple outputs,
-// so we just pending the support until we really need it.
-avpriv_report_missing_feature(ctx, "multiple outputs");
-return AVERROR(ENOSYS);
-}
-
 return 0;
 }
 
diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 6fe8b9c243..089e028818 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -64,7 +64,7 @@ typedef struct OVModel{
 ov_compiled_model_t *compiled_model;
 ov_output_const_port_t* input_port;
 ov_preprocess_input_info_t* input_info;
-ov_output_const_port_t* output_port;
+ov_output_const_port_t** output_ports;
 ov_preprocess_output_info_t* output_info;
 ov_preprocess_prepostprocessor_t* preprocess;
 #else
@@ -77,6 +77,7 @@ typedef struct OVModel{
 SafeQueue *request_queue;   // holds OVRequestItem
 Queue *task_queue;  // holds TaskItem
 Queue *lltask_queue; // holds LastLevelTaskItem
+int nb_outputs;
 } OVModel;
 
 // one request for one call to openvino
@@ -349,7 +350,7 @@ static void infer_completion_callback(void *args)
 TaskItem *task = lltask->task;
 OVModel *ov_model = task->model;
 SafeQueue *requestq = ov_model->request_queue;
-DNNData output;
+DNNData *outputs;
 OVContext *ctx = &ov_model->ctx;
 #if HAVE_OPENVINO2
 size_t* dims;
@@ -358,45 +359,61 @@ static void infer_completion_callback(void *args)
 ov_shape_t output_shape = {0};
 ov_element_type_e precision;
 
-memset(&output, 0, sizeof(output));
-status = 
ov_infer_request_get_output_tensor_by_index(request->infer_request, 0, 
&output_tensor);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR,
-   "Failed to get output tensor.");
+outputs = av_calloc(ov_model->nb_outputs, sizeof(*outputs));
+if (!outputs) {
+av_log(ctx, AV_LOG_ERROR, "Failed to alloc outputs.");
 return;
 }
 
-status = ov_tensor_data(output_tensor, &output.data);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR,
-   "Failed to get output data.");
-return;
-}
+for (int i = 0; i < ov_model->nb_outputs; i++) {
+status = 
ov_infer_request_get_tensor_by_const_port(request->infer_request,
+   
ov_model->output_ports[i],
+   &output_tensor);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR,
+"Failed to get output tensor.");
+goto end;
+}
 
-status = ov_tensor_get_shape(output_tensor, &output_shape);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Failed to get output port shape.\n");
-return;
-}
-dims = output_shape.dims;
+status = ov_tensor_data(output_tensor, &outputs[i].data);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR,
+"Failed to get output data.");
+goto end;
+}
 
-status = ov_port_get_element_type(ov_model->output_port, &precision);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Failed to get output port data type.\n");
+status = ov_tensor_get_shape(output_tensor, &output_shape);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get output port shape.\n");
+goto end;
+}
+dims = output_shape.dims;
+
+status = ov_port_get_element_type(ov_model->output_ports[i], 
&precision);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get output port data 
type.\n");
+goto end;
+}
+outputs[i].dt   = precision_to_datatype(precision);
+
+outputs[i].channels = output_shape.rank > 2 ? dims[output_shape.rank - 
3] : 1;
+outputs[i].height   = output_shape.rank > 1 ? dims[output_shape.rank - 
2] : 1;
+outputs[i].width= output_shape.rank > 0 ? dims[output_shape.rank - 
1] : 1;
+av_assert0(request->lltask_count <= dims[0]);
+outputs[i].layou

[FFmpeg-devel] [PATCH 2/2] libavfilter/vf_dnn_detect: Add yolo support

2023-11-20 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Add yolo support. Yolo model doesn't output final result. It outputs
candidate boxes, so we need post-process to remove overlap boxes to
get final results. Also, the box's coordinators relate to cell and
anchors, so we need these information to calculate boxes as well.

Model detail please refer to: 
https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v2-tf

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_openvino.c |   6 +-
 libavfilter/vf_dnn_detect.c| 242 -
 2 files changed, 244 insertions(+), 4 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index d3af8c34ce..6fe8b9c243 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -386,9 +386,9 @@ static void infer_completion_callback(void *args)
 ov_shape_free(&output_shape);
 return;
 }
-output.channels = dims[1];
-output.height   = dims[2];
-output.width= dims[3];
+output.channels = output_shape.rank > 2 ? dims[output_shape.rank - 3] : 1;
+output.height   = output_shape.rank > 1 ? dims[output_shape.rank - 2] : 1;
+output.width= output_shape.rank > 0 ? dims[output_shape.rank - 1] : 1;
 av_assert0(request->lltask_count <= dims[0]);
 ov_shape_free(&output_shape);
 #else
diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 9db90ee4cf..7ac3bb0b58 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -30,9 +30,11 @@
 #include "libavutil/time.h"
 #include "libavutil/avstring.h"
 #include "libavutil/detection_bbox.h"
+#include "libavutil/fifo.h"
 
 typedef enum {
-DDMT_SSD
+DDMT_SSD,
+DDMT_YOLOV1V2,
 } DNNDetectionModelType;
 
 typedef struct DnnDetectContext {
@@ -43,6 +45,15 @@ typedef struct DnnDetectContext {
 char **labels;
 int label_count;
 DNNDetectionModelType model_type;
+int cell_w;
+int cell_h;
+int nb_classes;
+AVFifo *bboxes_fifo;
+int scale_width;
+int scale_height;
+char *anchors_str;
+float *anchors;
+int nb_anchor;
 } DnnDetectContext;
 
 #define OFFSET(x) offsetof(DnnDetectContext, dnnctx.x)
@@ -61,11 +72,218 @@ static const AVOption dnn_detect_options[] = {
 { "labels",  "path to labels file",OFFSET2(labels_filename), 
AV_OPT_TYPE_STRING,{ .str = NULL }, 0, 0, FLAGS },
 { "model_type",  "DNN detection model type",   OFFSET2(model_type),  
AV_OPT_TYPE_INT,   { .i64 = DDMT_SSD },INT_MIN, INT_MAX, FLAGS, 
"model_type" },
 { "ssd", "output shape [1, 1, N, 7]",  0,
AV_OPT_TYPE_CONST,   { .i64 = DDMT_SSD },0, 0, FLAGS, "model_type" },
+{ "yolo","output shape [1, N*Cx*Cy*DetectionBox]",  0,   
AV_OPT_TYPE_CONST,   { .i64 = DDMT_YOLOV1V2 },0, 0, FLAGS, "model_type" 
},
+{ "cell_w",  "cell width", OFFSET2(cell_w),  
AV_OPT_TYPE_INT,   { .i64 = 0 },0, INTMAX_MAX, FLAGS },
+{ "cell_h",  "cell height",OFFSET2(cell_h),  
AV_OPT_TYPE_INT,   { .i64 = 0 },0, INTMAX_MAX, FLAGS },
+{ "nb_classes",  "The number of class",OFFSET2(nb_classes),  
AV_OPT_TYPE_INT,   { .i64 = 0 },0, INTMAX_MAX, FLAGS },
+{ "anchors", "anchors, splited by '&'",OFFSET2(anchors_str),   
  AV_OPT_TYPE_STRING,{ .str = NULL }, 0, 0, FLAGS },
 { NULL }
 };
 
 AVFILTER_DEFINE_CLASS(dnn_detect);
 
+static int dnn_detect_get_label_id(int nb_classes, int cell_size, float 
*label_data)
+{
+float max_prob = 0;
+int label_id = 0;
+for (int i = 0; i < nb_classes; i++) {
+if (label_data[i * cell_size] > max_prob) {
+max_prob = label_data[i * cell_size];
+label_id = i;
+}
+}
+return label_id;
+}
+
+static int dnn_detect_parse_anchors(char *anchors_str, float **anchors)
+{
+char *saveptr = NULL, *token;
+float *anchors_buf;
+int nb_anchor = 0, i = 0;
+while(anchors_str[i] != '\0') {
+if(anchors_str[i] == '&')
+nb_anchor++;
+i++;
+}
+nb_anchor++;
+anchors_buf = av_mallocz(nb_anchor * sizeof(*anchors));
+if (!anchors_buf) {
+return 0;
+}
+for (int i = 0; i < nb_anchor; i++) {
+token = av_strtok(anchors_str, "&", &saveptr);
+anchors_buf[i] = strtof(token, NULL);
+anchors_str = NULL;
+}
+*anchors = anchors_buf;
+return nb_anchor;
+}
+
+/* Calculate Intersection Over Union */
+static float dnn_detect_IOU(AVDetectionBBox *bbox1, AVDetectionBBox *bbox2)
+{
+float overlapping_width = FFMIN(bbox1->x + bbox1->w, bbox2->x + bbox2->w) 
- FFMAX(bbox1->x, bbox2->x);
+float overlapping_height = FFMIN(bbox1->y + bbox1->h, bbox2->y + bbox2->h) 
- FFMAX(bbox1->y, bbox2->y);
+float intersection_area =
+(overlapping_width < 0 || overlappi

[FFmpeg-devel] [PATCH 1/2] libavfilter/vf_dnn_detect: Add model_type option.

2023-11-20 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

There are many kinds of detection DNN model and they have different
preprocess and postprocess methods. To support more models,
"model_type" option is added to help to choose preprocess and
postprocess function.

Signed-off-by: Wenbin Chen 
---
 libavfilter/vf_dnn_detect.c | 42 ++---
 1 file changed, 35 insertions(+), 7 deletions(-)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index b5dae42c65..9db90ee4cf 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -31,6 +31,10 @@
 #include "libavutil/avstring.h"
 #include "libavutil/detection_bbox.h"
 
+typedef enum {
+DDMT_SSD
+} DNNDetectionModelType;
+
 typedef struct DnnDetectContext {
 const AVClass *class;
 DnnContext dnnctx;
@@ -38,6 +42,7 @@ typedef struct DnnDetectContext {
 char *labels_filename;
 char **labels;
 int label_count;
+DNNDetectionModelType model_type;
 } DnnDetectContext;
 
 #define OFFSET(x) offsetof(DnnDetectContext, dnnctx.x)
@@ -54,12 +59,14 @@ static const AVOption dnn_detect_options[] = {
 DNN_COMMON_OPTIONS
 { "confidence",  "threshold of confidence",OFFSET2(confidence),  
AV_OPT_TYPE_FLOAT, { .dbl = 0.5 },  0, 1, FLAGS},
 { "labels",  "path to labels file",OFFSET2(labels_filename), 
AV_OPT_TYPE_STRING,{ .str = NULL }, 0, 0, FLAGS },
+{ "model_type",  "DNN detection model type",   OFFSET2(model_type),  
AV_OPT_TYPE_INT,   { .i64 = DDMT_SSD },INT_MIN, INT_MAX, FLAGS, 
"model_type" },
+{ "ssd", "output shape [1, 1, N, 7]",  0,
AV_OPT_TYPE_CONST,   { .i64 = DDMT_SSD },0, 0, FLAGS, "model_type" },
 { NULL }
 };
 
 AVFILTER_DEFINE_CLASS(dnn_detect);
 
-static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, 
AVFilterContext *filter_ctx)
+static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, 
AVFilterContext *filter_ctx)
 {
 DnnDetectContext *ctx = filter_ctx->priv;
 float conf_threshold = ctx->confidence;
@@ -67,14 +74,12 @@ static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData 
*output, AVFilterCont
 int detect_size = output->width;
 float *detections = output->data;
 int nb_bboxes = 0;
-AVFrameSideData *sd;
-AVDetectionBBox *bbox;
 AVDetectionBBoxHeader *header;
+AVDetectionBBox *bbox;
 
-sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);
-if (sd) {
-av_log(filter_ctx, AV_LOG_ERROR, "already have bounding boxes in side 
data.\n");
-return -1;
+if (output->width != 7) {
+av_log(filter_ctx, AV_LOG_ERROR, "Model output shape doesn't match ssd 
requirement.\n");
+return AVERROR(EINVAL);
 }
 
 for (int i = 0; i < proposal_count; ++i) {
@@ -135,6 +140,29 @@ static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData 
*output, AVFilterCont
 return 0;
 }
 
+static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, 
AVFilterContext *filter_ctx)
+{
+AVFrameSideData *sd;
+DnnDetectContext *ctx = filter_ctx->priv;
+int ret = 0;
+
+sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);
+if (sd) {
+av_log(filter_ctx, AV_LOG_ERROR, "already have bounding boxes in side 
data.\n");
+return -1;
+}
+
+switch (ctx->model_type) {
+case DDMT_SSD:
+ret = dnn_detect_post_proc_ssd(frame, output, filter_ctx);
+if (ret < 0)
+return ret;
+break;
+}
+
+return 0;
+}
+
 static int dnn_detect_post_proc_tf(AVFrame *frame, DNNData *output, 
AVFilterContext *filter_ctx)
 {
 DnnDetectContext *ctx = filter_ctx->priv;
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] libavfilter/dnn/openvino: Reduce redundant memory allocation

2023-11-09 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

We can directly get data ptr from tensor, so that extral memory
allocation can be removed.

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_openvino.c | 42 +-
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 10520cd765..d3af8c34ce 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -204,7 +204,6 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 ov_tensor_t* tensor = NULL;
 ov_shape_t input_shape = {0};
 ov_element_type_e precision;
-void *input_data_ptr = NULL;
 #else
 dimensions_t dims;
 precision_e precision;
@@ -249,12 +248,6 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 input.width = dims[2];
 input.channels = dims[3];
 input.dt = precision_to_datatype(precision);
-input.data = av_malloc(input.height * input.width * input.channels * 
get_datatype_size(input.dt));
-if (!input.data) {
-ov_shape_free(&input_shape);
-return AVERROR(ENOMEM);
-}
-input_data_ptr = input.data;
 #else
 status = ie_infer_request_get_blob(request->infer_request, 
task->input_name, &input_blob);
 if (status != OK) {
@@ -297,6 +290,26 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 request->lltasks[i] = lltask;
 request->lltask_count = i + 1;
 task = lltask->task;
+#if HAVE_OPENVINO2
+if (tensor)
+ov_tensor_free(tensor);
+status = ov_tensor_create(precision, input_shape, &tensor);
+ov_shape_free(&input_shape);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to create tensor from host 
prt.\n");
+return ov2_map_error(status, NULL);
+}
+status = ov_tensor_data(tensor, &input.data);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get input data.\n");
+return ov2_map_error(status, NULL);
+}
+status = ov_infer_request_set_input_tensor(request->infer_request, 
tensor);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to Set an input tensor for the 
model.\n");
+return ov2_map_error(status, NULL);
+}
+#endif
 switch (ov_model->model->func_type) {
 case DFT_PROCESS_FRAME:
 if (task->do_ioproc) {
@@ -317,24 +330,11 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 av_assert0(!"should not reach here");
 break;
 }
-#if HAVE_OPENVINO2
-status = ov_tensor_create_from_host_ptr(precision, input_shape, 
input.data, &tensor);
-ov_shape_free(&input_shape);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Failed to create tensor from host 
prt.\n");
-return ov2_map_error(status, NULL);
-}
-status = ov_infer_request_set_input_tensor(request->infer_request, 
tensor);
-if (status != OK) {
-av_log(ctx, AV_LOG_ERROR, "Failed to Set an input tensor for the 
model.\n");
-return ov2_map_error(status, NULL);
-}
-#endif
 input.data = (uint8_t *)input.data
  + input.width * input.height * input.channels * 
get_datatype_size(input.dt);
 }
 #if HAVE_OPENVINO2
-av_freep(&input_data_ptr);
+ov_tensor_free(tensor);
 #else
 ie_blob_free(&input_blob);
 #endif
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2 3/3] libavfilter/dnn: Initialze DNNData variables

2023-09-20 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_tf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index b521de7fbe..25046b58d9 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -622,7 +622,7 @@ err:
 }
 
 static int fill_model_input_tf(TFModel *tf_model, TFRequestItem *request) {
-DNNData input;
+DNNData input = { 0 };
 LastLevelTaskItem *lltask;
 TaskItem *task;
 TFInferRequest *infer_request = NULL;
@@ -724,7 +724,7 @@ static void infer_completion_callback(void *args) {
 TFModel *tf_model = task->model;
 TFContext *ctx = &tf_model->ctx;
 
-outputs = av_malloc_array(task->nb_output, sizeof(*outputs));
+outputs = av_calloc(task->nb_output, sizeof(*outputs));
 if (!outputs) {
 av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for *outputs\n");
 goto err;
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2 2/3] libavfilter/dnn: Add scale and mean preprocess to openvino backend

2023-09-20 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Dnn models has different data preprocess requirements. Scale and mean
parameters are added to preprocess input data.

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_openvino.c | 43 --
 libavfilter/dnn/dnn_io_proc.c  | 82 +-
 libavfilter/dnn_interface.h|  2 +
 3 files changed, 108 insertions(+), 19 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 3ba5f5331a..4224600f94 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -46,6 +46,8 @@ typedef struct OVOptions{
 int batch_size;
 int input_resizable;
 DNNLayout layout;
+float scale;
+float mean;
 } OVOptions;
 
 typedef struct OVContext {
@@ -105,6 +107,8 @@ static const AVOption dnn_openvino_options[] = {
 { "none",  "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, 
FLAGS, "layout"},
 { "nchw",  "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, 
FLAGS, "layout"},
 { "nhwc",  "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, 
FLAGS, "layout"},
+{ "scale", "Add scale preprocess operation. Divide each element of input 
by specified value.", OFFSET(options.scale), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, 
INT_MIN, INT_MAX, FLAGS},
+{ "mean",  "Add mean preprocess operation. Subtract specified value from 
each element of input.", OFFSET(options.mean),  AV_OPT_TYPE_FLOAT, { .dbl = 0 
}, INT_MIN, INT_MAX, FLAGS},
 { NULL }
 };
 
@@ -209,6 +213,7 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 ie_blob_t *input_blob = NULL;
 #endif
 
+memset(&input, 0, sizeof(input));
 lltask = ff_queue_peek_front(ov_model->lltask_queue);
 av_assert0(lltask);
 task = lltask->task;
@@ -274,6 +279,9 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 // all models in openvino open model zoo use BGR as input,
 // change to be an option when necessary.
 input.order = DCO_BGR;
+// We use preprocess_steps to scale input data, so disable scale and mean 
here.
+input.scale = 1;
+input.mean = 0;
 
 for (int i = 0; i < ctx->options.batch_size; ++i) {
 lltask = ff_queue_pop_front(ov_model->lltask_queue);
@@ -343,6 +351,7 @@ static void infer_completion_callback(void *args)
 ov_shape_t output_shape = {0};
 ov_element_type_e precision;
 
+memset(&output, 0, sizeof(output));
 status = 
ov_infer_request_get_output_tensor_by_index(request->infer_request, 0, 
&output_tensor);
 if (status != OK) {
 av_log(ctx, AV_LOG_ERROR,
@@ -409,6 +418,8 @@ static void infer_completion_callback(void *args)
 #endif
 output.dt   = precision_to_datatype(precision);
 output.layout   = ctx->options.layout;
+output.scale= ctx->options.scale;
+output.mean = ctx->options.mean;
 
 av_assert0(request->lltask_count >= 1);
 for (int i = 0; i < request->lltask_count; ++i) {
@@ -542,7 +553,9 @@ static int init_model_ov(OVModel *ov_model, const char 
*input_name, const char *
 ie_config_t config = {NULL, NULL, NULL};
 char *all_dev_names = NULL;
 #endif
-
+// We scale pixel by default when do frame processing.
+if (fabsf(ctx->options.scale) < 1e-6f)
+ctx->options.scale = ov_model->model->func_type == DFT_PROCESS_FRAME ? 
255 : 1;
 // batch size
 if (ctx->options.batch_size <= 0) {
 ctx->options.batch_size = 1;
@@ -609,15 +622,37 @@ static int init_model_ov(OVModel *ov_model, const char 
*input_name, const char *
 goto err;
 }
 
+status = 
ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8);
 if (ov_model->model->func_type != DFT_PROCESS_FRAME)
-//set precision only for detect and classify
-status = 
ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8);
-status |= ov_preprocess_output_set_element_type(output_tensor_info, F32);
+status |= ov_preprocess_output_set_element_type(output_tensor_info, 
F32);
+else if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) 
> 1e-6f)
+status |= ov_preprocess_output_set_element_type(output_tensor_info, 
F32);
+else
+status |= ov_preprocess_output_set_element_type(output_tensor_info, 
U8);
 if (status != OK) {
 av_log(ctx, AV_LOG_ERROR, "Failed to set input/output element type\n");
 ret = ov2_map_error(status, NULL);
 goto err;
 }
+// set preprocess steps.
+if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 
1e-6f) {
+ov_preprocess_preprocess_steps_t* input_process_steps = NULL;
+status = 
ov_preprocess_input_info_get_preprocess_steps(ov_model->input_info, 
&input_process_steps);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get preprocess steps\n");
+ret = ov2_map_error(st

[FFmpeg-devel] [PATCH v2 1/3] libavfilter/dnn: add layout option to openvino backend

2023-09-20 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Dnn models have different input layout (NCHW or NHWC), so a
"layout" option is added
Use openvino's API to do layout conversion for input data. Use swscale
to do layout conversion for output data as openvino doesn't have
similiar C API for output.

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_openvino.c |  47 +++-
 libavfilter/dnn/dnn_io_proc.c  | 151 ++---
 libavfilter/dnn_interface.h|   7 ++
 3 files changed, 185 insertions(+), 20 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 4922833b07..3ba5f5331a 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -45,6 +45,7 @@ typedef struct OVOptions{
 uint8_t async;
 int batch_size;
 int input_resizable;
+DNNLayout layout;
 } OVOptions;
 
 typedef struct OVContext {
@@ -100,6 +101,10 @@ static const AVOption dnn_openvino_options[] = {
 DNN_BACKEND_COMMON_OPTIONS
 { "batch_size",  "batch size per request", OFFSET(options.batch_size),  
AV_OPT_TYPE_INT,{ .i64 = 1 }, 1, 1000, FLAGS},
 { "input_resizable", "can input be resizable or not", 
OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL,   { .i64 = 0 }, 0, 1, 
FLAGS },
+{ "layout", "input layout of model", OFFSET(options.layout), 
AV_OPT_TYPE_INT, { .i64 = DL_NONE}, DL_NONE, DL_NHWC, FLAGS, "layout" },
+{ "none",  "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, 
FLAGS, "layout"},
+{ "nchw",  "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, 
FLAGS, "layout"},
+{ "nhwc",  "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, 
FLAGS, "layout"},
 { NULL }
 };
 
@@ -231,9 +236,9 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 avpriv_report_missing_feature(ctx, "Do not support dynamic model.");
 return AVERROR(ENOSYS);
 }
-input.height = dims[2];
-input.width = dims[3];
-input.channels = dims[1];
+input.height = dims[1];
+input.width = dims[2];
+input.channels = dims[3];
 input.dt = precision_to_datatype(precision);
 input.data = av_malloc(input.height * input.width * input.channels * 
get_datatype_size(input.dt));
 if (!input.data)
@@ -403,6 +408,7 @@ static void infer_completion_callback(void *args)
 av_assert0(request->lltask_count <= dims.dims[0]);
 #endif
 output.dt   = precision_to_datatype(precision);
+output.layout   = ctx->options.layout;
 
 av_assert0(request->lltask_count >= 1);
 for (int i = 0; i < request->lltask_count; ++i) {
@@ -521,11 +527,14 @@ static int init_model_ov(OVModel *ov_model, const char 
*input_name, const char *
 OVContext *ctx = &ov_model->ctx;
 #if HAVE_OPENVINO2
 ov_status_e status;
-ov_preprocess_input_tensor_info_t* input_tensor_info;
-ov_preprocess_output_tensor_info_t* output_tensor_info;
+ov_preprocess_input_tensor_info_t* input_tensor_info = NULL;
+ov_preprocess_output_tensor_info_t* output_tensor_info = NULL;
+ov_preprocess_input_model_info_t* input_model_info = NULL;
 ov_model_t *tmp_ov_model;
 ov_layout_t* NHWC_layout = NULL;
+ov_layout_t* NCHW_layout = NULL;
 const char* NHWC_desc = "NHWC";
+const char* NCHW_desc = "NCHW";
 const char* device = ctx->options.device_type;
 #else
 IEStatusCode status;
@@ -570,6 +579,7 @@ static int init_model_ov(OVModel *ov_model, const char 
*input_name, const char *
 
 //set input layout
 status = ov_layout_create(NHWC_desc, &NHWC_layout);
+status |= ov_layout_create(NCHW_desc, &NCHW_layout);
 if (status != OK) {
 av_log(ctx, AV_LOG_ERROR, "Failed to create layout for input.\n");
 ret = ov2_map_error(status, NULL);
@@ -583,6 +593,22 @@ static int init_model_ov(OVModel *ov_model, const char 
*input_name, const char *
 goto err;
 }
 
+status = ov_preprocess_input_info_get_model_info(ov_model->input_info, 
&input_model_info);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get input model info\n");
+ret = ov2_map_error(status, NULL);
+goto err;
+}
+if (ctx->options.layout == DL_NCHW)
+status = ov_preprocess_input_model_info_set_layout(input_model_info, 
NCHW_layout);
+else if (ctx->options.layout == DL_NHWC)
+status = ov_preprocess_input_model_info_set_layout(input_model_info, 
NHWC_layout);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get set input model layout\n");
+ret = ov2_map_error(status, NULL);
+goto err;
+}
+
 if (ov_model->model->func_type != DFT_PROCESS_FRAME)
 //set precision only for detect and classify
 status = 
ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8);
@@ -618,6 +644,9 @@ static int init_model_ov(OVModel *ov_model, const char 
*input_name, const char *
 ret = ov2_map_error(sta

[FFmpeg-devel] [PATCH 3/3] libavfilter/dnn: Initialze DNNData variables

2023-09-19 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_tf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index b521de7fbe..e1e8cef0d2 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -629,6 +629,7 @@ static int fill_model_input_tf(TFModel *tf_model, 
TFRequestItem *request) {
 TFContext *ctx = &tf_model->ctx;
 int ret = 0;
 
+memset(&input, 0, sizeof(input));
 lltask = ff_queue_pop_front(tf_model->lltask_queue);
 av_assert0(lltask);
 task = lltask->task;
@@ -724,7 +725,7 @@ static void infer_completion_callback(void *args) {
 TFModel *tf_model = task->model;
 TFContext *ctx = &tf_model->ctx;
 
-outputs = av_malloc_array(task->nb_output, sizeof(*outputs));
+outputs = av_calloc(task->nb_output, sizeof(*outputs));
 if (!outputs) {
 av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for *outputs\n");
 goto err;
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 2/3] libavfilter/dnn: Add scale and mean preprocess to openvino backend

2023-09-19 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Dnn models has different data preprocess requirements. Scale and mean
parameters are added to preprocess input data.

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_openvino.c | 43 --
 libavfilter/dnn/dnn_io_proc.c  | 82 +-
 libavfilter/dnn_interface.h|  2 +
 3 files changed, 108 insertions(+), 19 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 3ba5f5331a..4224600f94 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -46,6 +46,8 @@ typedef struct OVOptions{
 int batch_size;
 int input_resizable;
 DNNLayout layout;
+float scale;
+float mean;
 } OVOptions;
 
 typedef struct OVContext {
@@ -105,6 +107,8 @@ static const AVOption dnn_openvino_options[] = {
 { "none",  "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, 
FLAGS, "layout"},
 { "nchw",  "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, 
FLAGS, "layout"},
 { "nhwc",  "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, 
FLAGS, "layout"},
+{ "scale", "Add scale preprocess operation. Divide each element of input 
by specified value.", OFFSET(options.scale), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, 
INT_MIN, INT_MAX, FLAGS},
+{ "mean",  "Add mean preprocess operation. Subtract specified value from 
each element of input.", OFFSET(options.mean),  AV_OPT_TYPE_FLOAT, { .dbl = 0 
}, INT_MIN, INT_MAX, FLAGS},
 { NULL }
 };
 
@@ -209,6 +213,7 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 ie_blob_t *input_blob = NULL;
 #endif
 
+memset(&input, 0, sizeof(input));
 lltask = ff_queue_peek_front(ov_model->lltask_queue);
 av_assert0(lltask);
 task = lltask->task;
@@ -274,6 +279,9 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 // all models in openvino open model zoo use BGR as input,
 // change to be an option when necessary.
 input.order = DCO_BGR;
+// We use preprocess_steps to scale input data, so disable scale and mean 
here.
+input.scale = 1;
+input.mean = 0;
 
 for (int i = 0; i < ctx->options.batch_size; ++i) {
 lltask = ff_queue_pop_front(ov_model->lltask_queue);
@@ -343,6 +351,7 @@ static void infer_completion_callback(void *args)
 ov_shape_t output_shape = {0};
 ov_element_type_e precision;
 
+memset(&output, 0, sizeof(output));
 status = 
ov_infer_request_get_output_tensor_by_index(request->infer_request, 0, 
&output_tensor);
 if (status != OK) {
 av_log(ctx, AV_LOG_ERROR,
@@ -409,6 +418,8 @@ static void infer_completion_callback(void *args)
 #endif
 output.dt   = precision_to_datatype(precision);
 output.layout   = ctx->options.layout;
+output.scale= ctx->options.scale;
+output.mean = ctx->options.mean;
 
 av_assert0(request->lltask_count >= 1);
 for (int i = 0; i < request->lltask_count; ++i) {
@@ -542,7 +553,9 @@ static int init_model_ov(OVModel *ov_model, const char 
*input_name, const char *
 ie_config_t config = {NULL, NULL, NULL};
 char *all_dev_names = NULL;
 #endif
-
+// We scale pixel by default when do frame processing.
+if (fabsf(ctx->options.scale) < 1e-6f)
+ctx->options.scale = ov_model->model->func_type == DFT_PROCESS_FRAME ? 
255 : 1;
 // batch size
 if (ctx->options.batch_size <= 0) {
 ctx->options.batch_size = 1;
@@ -609,15 +622,37 @@ static int init_model_ov(OVModel *ov_model, const char 
*input_name, const char *
 goto err;
 }
 
+status = 
ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8);
 if (ov_model->model->func_type != DFT_PROCESS_FRAME)
-//set precision only for detect and classify
-status = 
ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8);
-status |= ov_preprocess_output_set_element_type(output_tensor_info, F32);
+status |= ov_preprocess_output_set_element_type(output_tensor_info, 
F32);
+else if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) 
> 1e-6f)
+status |= ov_preprocess_output_set_element_type(output_tensor_info, 
F32);
+else
+status |= ov_preprocess_output_set_element_type(output_tensor_info, 
U8);
 if (status != OK) {
 av_log(ctx, AV_LOG_ERROR, "Failed to set input/output element type\n");
 ret = ov2_map_error(status, NULL);
 goto err;
 }
+// set preprocess steps.
+if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 
1e-6f) {
+ov_preprocess_preprocess_steps_t* input_process_steps = NULL;
+status = 
ov_preprocess_input_info_get_preprocess_steps(ov_model->input_info, 
&input_process_steps);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get preprocess steps\n");
+ret = ov2_map_error(st

[FFmpeg-devel] [PATCH 1/3] libavfilter/dnn: add layout option to openvino backend

2023-09-19 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Dnn models have different input layout (NCHW or NHWC), so a
"layout" option is added
Use openvino's API to do layout conversion for input data. Use swscale
to do layout conversion for output data as openvino doesn't have
similiar C API for output.

Signed-off-by: Wenbin Chen 
---
 libavfilter/dnn/dnn_backend_openvino.c |  47 +++-
 libavfilter/dnn/dnn_io_proc.c  | 151 ++---
 libavfilter/dnn_interface.h|   7 ++
 3 files changed, 185 insertions(+), 20 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 4922833b07..3ba5f5331a 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -45,6 +45,7 @@ typedef struct OVOptions{
 uint8_t async;
 int batch_size;
 int input_resizable;
+DNNLayout layout;
 } OVOptions;
 
 typedef struct OVContext {
@@ -100,6 +101,10 @@ static const AVOption dnn_openvino_options[] = {
 DNN_BACKEND_COMMON_OPTIONS
 { "batch_size",  "batch size per request", OFFSET(options.batch_size),  
AV_OPT_TYPE_INT,{ .i64 = 1 }, 1, 1000, FLAGS},
 { "input_resizable", "can input be resizable or not", 
OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL,   { .i64 = 0 }, 0, 1, 
FLAGS },
+{ "layout", "input layout of model", OFFSET(options.layout), 
AV_OPT_TYPE_INT, { .i64 = DL_NONE}, DL_NONE, DL_NHWC, FLAGS, "layout" },
+{ "none",  "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, 
FLAGS, "layout"},
+{ "nchw",  "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, 
FLAGS, "layout"},
+{ "nhwc",  "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, 
FLAGS, "layout"},
 { NULL }
 };
 
@@ -231,9 +236,9 @@ static int fill_model_input_ov(OVModel *ov_model, 
OVRequestItem *request)
 avpriv_report_missing_feature(ctx, "Do not support dynamic model.");
 return AVERROR(ENOSYS);
 }
-input.height = dims[2];
-input.width = dims[3];
-input.channels = dims[1];
+input.height = dims[1];
+input.width = dims[2];
+input.channels = dims[3];
 input.dt = precision_to_datatype(precision);
 input.data = av_malloc(input.height * input.width * input.channels * 
get_datatype_size(input.dt));
 if (!input.data)
@@ -403,6 +408,7 @@ static void infer_completion_callback(void *args)
 av_assert0(request->lltask_count <= dims.dims[0]);
 #endif
 output.dt   = precision_to_datatype(precision);
+output.layout   = ctx->options.layout;
 
 av_assert0(request->lltask_count >= 1);
 for (int i = 0; i < request->lltask_count; ++i) {
@@ -521,11 +527,14 @@ static int init_model_ov(OVModel *ov_model, const char 
*input_name, const char *
 OVContext *ctx = &ov_model->ctx;
 #if HAVE_OPENVINO2
 ov_status_e status;
-ov_preprocess_input_tensor_info_t* input_tensor_info;
-ov_preprocess_output_tensor_info_t* output_tensor_info;
+ov_preprocess_input_tensor_info_t* input_tensor_info = NULL;
+ov_preprocess_output_tensor_info_t* output_tensor_info = NULL;
+ov_preprocess_input_model_info_t* input_model_info = NULL;
 ov_model_t *tmp_ov_model;
 ov_layout_t* NHWC_layout = NULL;
+ov_layout_t* NCHW_layout = NULL;
 const char* NHWC_desc = "NHWC";
+const char* NCHW_desc = "NCHW";
 const char* device = ctx->options.device_type;
 #else
 IEStatusCode status;
@@ -570,6 +579,7 @@ static int init_model_ov(OVModel *ov_model, const char 
*input_name, const char *
 
 //set input layout
 status = ov_layout_create(NHWC_desc, &NHWC_layout);
+status |= ov_layout_create(NCHW_desc, &NCHW_layout);
 if (status != OK) {
 av_log(ctx, AV_LOG_ERROR, "Failed to create layout for input.\n");
 ret = ov2_map_error(status, NULL);
@@ -583,6 +593,22 @@ static int init_model_ov(OVModel *ov_model, const char 
*input_name, const char *
 goto err;
 }
 
+status = ov_preprocess_input_info_get_model_info(ov_model->input_info, 
&input_model_info);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get input model info\n");
+ret = ov2_map_error(status, NULL);
+goto err;
+}
+if (ctx->options.layout == DL_NCHW)
+status = ov_preprocess_input_model_info_set_layout(input_model_info, 
NCHW_layout);
+else if (ctx->options.layout == DL_NHWC)
+status = ov_preprocess_input_model_info_set_layout(input_model_info, 
NHWC_layout);
+if (status != OK) {
+av_log(ctx, AV_LOG_ERROR, "Failed to get set input model layout\n");
+ret = ov2_map_error(status, NULL);
+goto err;
+}
+
 if (ov_model->model->func_type != DFT_PROCESS_FRAME)
 //set precision only for detect and classify
 status = 
ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8);
@@ -618,6 +644,9 @@ static int init_model_ov(OVModel *ov_model, const char 
*input_name, const char *
 ret = ov2_map_error(sta

[FFmpeg-devel] [PATCH] libavfilter/vf_vpp_qsv: Fix a timestamp bug when framerate is not set

2023-08-15 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

If user doesn't set framerate when he creates a filter, the filter uses
default framerate {0, 1}. This causes error when setting timebase to
1/framerate. Now change it to pass inlink->time_base to outlink when
framerate is not set.
This patch fixes ticket: #10476 #10468

Signed-off-by: Wenbin Chen 
---
 libavfilter/vf_vpp_qsv.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c
index 92ef0f1d89..c07b45fedb 100644
--- a/libavfilter/vf_vpp_qsv.c
+++ b/libavfilter/vf_vpp_qsv.c
@@ -536,7 +536,10 @@ static int config_output(AVFilterLink *outlink)
 outlink->w  = vpp->out_width;
 outlink->h  = vpp->out_height;
 outlink->frame_rate = vpp->framerate;
-outlink->time_base  = av_inv_q(vpp->framerate);
+if (vpp->framerate.num == 0 || vpp->framerate.den == 0)
+outlink->time_base = inlink->time_base;
+else
+outlink->time_base = av_inv_q(vpp->framerate);
 
 param.filter_frame  = NULL;
 param.set_frame_ext_params = vpp_set_frame_ext_params;
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2] lavfi/dnn: Add OpenVINO API 2.0 support

2023-08-15 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

OpenVINO API 2.0 was released in March 2022, which introduced new
features.
This commit implements current OpenVINO features with new 2.0 APIs. And
will add other features in API 2.0.
Please add installation path, which include openvino.pc, to
PKG_CONFIG_PATH mannually for new OpenVINO libs config.

Signed-off-by: Ting Fu 
Signed-off-by: Wenbin Chen 
---
 configure  |   6 +-
 libavfilter/dnn/dnn_backend_openvino.c | 515 +++--
 2 files changed, 487 insertions(+), 34 deletions(-)

diff --git a/configure b/configure
index 99388e7664..90caa26107 100755
--- a/configure
+++ b/configure
@@ -2459,6 +2459,7 @@ HAVE_LIST="
 texi2html
 xmllint
 zlib_gzip
+openvino2
 "
 
 # options emitted with CONFIG_ prefix but not available on the command line
@@ -6767,8 +6768,9 @@ enabled libopenh264   && require_pkg_config 
libopenh264 openh264 wels/codec_
 enabled libopenjpeg   && { check_pkg_config libopenjpeg "libopenjp2 >= 
2.1.0" openjpeg.h opj_version ||
{ require_pkg_config libopenjpeg "libopenjp2 >= 
2.1.0" openjpeg.h opj_version -DOPJ_STATIC && add_cppflags -DOPJ_STATIC; } }
 enabled libopenmpt&& require_pkg_config libopenmpt "libopenmpt >= 
0.2.6557" libopenmpt/libopenmpt.h openmpt_module_create -lstdc++ && append 
libopenmpt_extralibs "-lstdc++"
-enabled libopenvino   && { check_pkg_config libopenvino openvino 
c_api/ie_c_api.h ie_c_api_version ||
-   require libopenvino c_api/ie_c_api.h 
ie_c_api_version -linference_engine_c_api; }
+enabled libopenvino   && { { check_pkg_config libopenvino openvino 
openvino/c/openvino.h ov_core_create && enable openvino2; } ||
+{ check_pkg_config libopenvino openvino 
c_api/ie_c_api.h ie_c_api_version ||
+  require libopenvino c_api/ie_c_api.h 
ie_c_api_version -linference_engine_c_api; } }
 enabled libopus   && {
 enabled libopus_decoder && {
 require_pkg_config libopus opus opus_multistream.h 
opus_multistream_decoder_create
diff --git a/libavfilter/dnn/dnn_backend_openvino.c 
b/libavfilter/dnn/dnn_backend_openvino.c
index 46cbe8270e..4922833b07 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -32,7 +32,11 @@
 #include "libavutil/detection_bbox.h"
 #include "../internal.h"
 #include "safe_queue.h"
+#if HAVE_OPENVINO2
+#include 
+#else
 #include 
+#endif
 #include "dnn_backend_common.h"
 
 typedef struct OVOptions{
@@ -51,9 +55,20 @@ typedef struct OVContext {
 typedef struct OVModel{
 OVContext ctx;
 DNNModel *model;
+#if HAVE_OPENVINO2
+ov_core_t *core;
+ov_model_t *ov_model;
+ov_compiled_model_t *compiled_model;
+ov_output_const_port_t* input_port;
+ov_preprocess_input_info_t* input_info;
+ov_output_const_port_t* output_port;
+ov_preprocess_output_info_t* output_info;
+ov_preprocess_prepostprocessor_t* preprocess;
+#else
 ie_core_t *core;
 ie_network_t *network;
 ie_executable_network_t *exe_network;
+#endif
 SafeQueue *request_queue;   // holds OVRequestItem
 Queue *task_queue;  // holds TaskItem
 Queue *lltask_queue; // holds LastLevelTaskItem
@@ -63,10 +78,15 @@ typedef struct OVModel{
 
 // one request for one call to openvino
 typedef struct OVRequestItem {
-ie_infer_request_t *infer_request;
 LastLevelTaskItem **lltasks;
 uint32_t lltask_count;
+#if HAVE_OPENVINO2
+ov_infer_request_t *infer_request;
+ov_callback_t callback;
+#else
 ie_complete_call_back_t callback;
+ie_infer_request_t *infer_request;
+#endif
 } OVRequestItem;
 
 #define APPEND_STRING(generated_string, iterate_string)
\
@@ -85,11 +105,61 @@ static const AVOption dnn_openvino_options[] = {
 
 AVFILTER_DEFINE_CLASS(dnn_openvino);
 
+#if HAVE_OPENVINO2
+static const struct {
+ov_status_e status;
+int av_err;
+const char *desc;
+} ov2_errors[] = {
+{ OK, 0,  "success"},
+{ GENERAL_ERROR,  AVERROR_EXTERNAL,   "general error"  },
+{ NOT_IMPLEMENTED,AVERROR(ENOSYS),"not implemented"},
+{ NETWORK_NOT_LOADED, AVERROR_EXTERNAL,   "network not loaded" },
+{ PARAMETER_MISMATCH, AVERROR(EINVAL),"parameter mismatch" },
+{ NOT_FOUND,  AVERROR_EXTERNAL,   "not found"  },
+{ OUT_OF_BOUNDS,  AVERROR(EOVERFLOW), "out of bounds"  },
+{ UNEXPECTED, AVERROR_EXTERNAL,   "unexpected" },
+{ REQUEST_BUSY,   AVERROR(EBUSY), "request busy"   },
+{ RESULT_NOT_READY,   AVERROR(EBUSY), "result not ready"   },
+{ NOT_ALLOCATED,  AVERROR(ENODATA),   "not allocated"  },
+{ INFER_NOT_STARTED,  AVERROR_EXTERNAL,

[FFmpeg-devel] [PATCH] libavfilter/vf_dnn_detect: bbox index may bigger than bbox number

2023-07-16 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Fix a bug that queried bbox index may bigger than bbox's total number.

Signed-off-by: Wenbin Chen 
---
 libavfilter/vf_dnn_detect.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 06efce02a6..6ef04e0958 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -106,12 +106,11 @@ static int dnn_detect_post_proc_ov(AVFrame *frame, 
DNNData *output, AVFilterCont
 float x1 =  detections[i * detect_size + 5];
 float y1 =  detections[i * detect_size + 6];
 
-bbox = av_get_detection_bbox(header, i);
-
 if (conf < conf_threshold) {
 continue;
 }
 
+bbox = av_get_detection_bbox(header, header->nb_bboxes - nb_bboxes);
 bbox->x = (int)(x0 * frame->width);
 bbox->w = (int)(x1 * frame->width) - bbox->x;
 bbox->y = (int)(y0 * frame->height);
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] doc/encoders: Add av1 to qsv encoder's summary

2023-03-12 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 doc/encoders.texi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/encoders.texi b/doc/encoders.texi
index b02737b9df..d6dddc2bd5 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -3188,8 +3188,8 @@ recommended value) and do not set a size constraint.
 
 @section QSV Encoders
 
-The family of Intel QuickSync Video encoders (MPEG-2, H.264, HEVC, JPEG/MJPEG
-and VP9)
+The family of Intel QuickSync Video encoders (MPEG-2, H.264, HEVC, JPEG/MJPEG,
+VP9, AV1)
 
 @subsection Ratecontrol Method
 The ratecontrol method is selected as follows:
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] doc/examples/qsv_transcode: Fix a bug when use more than one parameter set

2023-03-01 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 doc/examples/qsv_transcode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/examples/qsv_transcode.c b/doc/examples/qsv_transcode.c
index 7ea3ef5674..48128b200c 100644
--- a/doc/examples/qsv_transcode.c
+++ b/doc/examples/qsv_transcode.c
@@ -88,7 +88,7 @@ static int dynamic_set_parameter(AVCodecContext *avctx)
 if (current_setting_number < setting_number &&
 frame_number == dynamic_setting[current_setting_number].frame_number) {
 AVDictionaryEntry *e = NULL;
-ret = str_to_dict(dynamic_setting[current_setting_number].optstr, 
&opts);
+ret = str_to_dict(dynamic_setting[current_setting_number++].optstr, 
&opts);
 if (ret < 0) {
 fprintf(stderr, "The dynamic parameter is wrong\n");
 goto fail;
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] libavcodec/qsvenc: Add dynamic setting support of low_delay_brc to av1_qsv

2023-03-01 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 doc/encoders.texi   | 2 +-
 libavcodec/qsvenc.c | 5 -
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/encoders.texi b/doc/encoders.texi
index b02737b9df..e9b34010ed 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -3344,7 +3344,7 @@ Supported in h264_qsv.
 Change these value to reset qsv codec's max/min qp configuration.
 
 @item @var{low_delay_brc}
-Supported in h264_qsv and hevc_qsv.
+Supported in h264_qsv, hevc_qsv and av1_qsv.
 Change this value to reset qsv codec's low_delay_brc configuration.
 
 @item @var{framerate}
diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index 3607859cb8..c975302b4f 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -1127,6 +1127,7 @@ static int init_video_param(AVCodecContext *avctx, 
QSVEncContext *q)
 } else if (avctx->codec_id == AV_CODEC_ID_AV1) {
 if (q->low_delay_brc >= 0)
 q->extco3.LowDelayBRC = q->low_delay_brc ? MFX_CODINGOPTION_ON 
: MFX_CODINGOPTION_OFF;
+q->old_low_delay_brc = q->low_delay_brc;
 }
 
 if (avctx->codec_id == AV_CODEC_ID_HEVC) {
@@ -2213,7 +2214,9 @@ static int update_low_delay_brc(AVCodecContext *avctx, 
QSVEncContext *q)
 {
 int updated = 0;
 
-if (avctx->codec_id != AV_CODEC_ID_H264 && avctx->codec_id != 
AV_CODEC_ID_HEVC)
+if (avctx->codec_id != AV_CODEC_ID_H264 &&
+avctx->codec_id != AV_CODEC_ID_HEVC &&
+avctx->codec_id != AV_CODEC_ID_AV1)
 return 0;
 
 UPDATE_PARAM(q->old_low_delay_brc, q->low_delay_brc);
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2] libavcodec/qsvenc: Flush cached frames before reset encoder

2023-02-13 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

According to 
https://github.com/Intel-Media-SDK/MediaSDK/blob/master/doc/mediasdk-man.md#configuration-change.
Before calling MFXVideoENCODE_Reset, The application needs to retrieve
any cached frames in the SDK encoder.
A loop is added before MFXVideoENCODE_Reset to retrieve cached frames
and add them to async_fifo, so that dynamic configuration works when
async_depth > 1.

Signed-off-by: Wenbin Chen 
---
 libavcodec/qsvenc.c | 122 
 1 file changed, 66 insertions(+), 56 deletions(-)

diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index 2f0e94a914..3951f40e7b 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -1600,7 +1600,7 @@ int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext 
*q)
 
 q->param.AsyncDepth = q->async_depth;
 
-q->async_fifo = av_fifo_alloc2(q->async_depth, sizeof(QSVPacket), 0);
+q->async_fifo = av_fifo_alloc2(q->async_depth, sizeof(QSVPacket), 
AV_FIFO_FLAG_AUTO_GROW);
 if (!q->async_fifo)
 return AVERROR(ENOMEM);
 
@@ -2296,58 +2296,6 @@ static int update_pic_timing_sei(AVCodecContext *avctx, 
QSVEncContext *q)
 return updated;
 }
 
-static int update_parameters(AVCodecContext *avctx, QSVEncContext *q,
- const AVFrame *frame)
-{
-int needReset = 0, ret = 0;
-
-if (!frame || avctx->codec_id == AV_CODEC_ID_MJPEG)
-return 0;
-
-needReset = update_qp(avctx, q);
-needReset |= update_max_frame_size(avctx, q);
-needReset |= update_gop_size(avctx, q);
-needReset |= update_rir(avctx, q);
-needReset |= update_low_delay_brc(avctx, q);
-needReset |= update_frame_rate(avctx, q);
-needReset |= update_bitrate(avctx, q);
-needReset |= update_pic_timing_sei(avctx, q);
-ret = update_min_max_qp(avctx, q);
-if (ret < 0)
-return ret;
-needReset |= ret;
-if (!needReset)
-return 0;
-
-if (avctx->hwaccel_context) {
-AVQSVContext *qsv = avctx->hwaccel_context;
-int i, j;
-q->param.ExtParam = q->extparam;
-for (i = 0; i < qsv->nb_ext_buffers; i++)
-q->param.ExtParam[i] = qsv->ext_buffers[i];
-q->param.NumExtParam = qsv->nb_ext_buffers;
-
-for (i = 0; i < q->nb_extparam_internal; i++) {
-for (j = 0; j < qsv->nb_ext_buffers; j++) {
-if (qsv->ext_buffers[j]->BufferId == 
q->extparam_internal[i]->BufferId)
-break;
-}
-if (j < qsv->nb_ext_buffers)
-continue;
-q->param.ExtParam[q->param.NumExtParam++] = 
q->extparam_internal[i];
-}
-} else {
-q->param.ExtParam= q->extparam_internal;
-q->param.NumExtParam = q->nb_extparam_internal;
-}
-av_log(avctx, AV_LOG_DEBUG, "Parameter change, call msdk reset.\n");
-ret = MFXVideoENCODE_Reset(q->session, &q->param);
-if (ret < 0)
-return ff_qsv_print_error(avctx, ret, "Error during resetting");
-
-return 0;
-}
-
 static int encode_frame(AVCodecContext *avctx, QSVEncContext *q,
 const AVFrame *frame)
 {
@@ -2438,7 +2386,7 @@ static int encode_frame(AVCodecContext *avctx, 
QSVEncContext *q,
 
 if (ret < 0) {
 ret = (ret == MFX_ERR_MORE_DATA) ?
-   0 : ff_qsv_print_error(avctx, ret, "Error during encoding");
+   AVERROR(EAGAIN) : ff_qsv_print_error(avctx, ret, "Error during 
encoding");
 goto free;
 }
 
@@ -2448,7 +2396,9 @@ static int encode_frame(AVCodecContext *avctx, 
QSVEncContext *q,
 ret = 0;
 
 if (*pkt.sync) {
-av_fifo_write(q->async_fifo, &pkt, 1);
+ret = av_fifo_write(q->async_fifo, &pkt, 1);
+if (ret < 0)
+goto free;
 } else {
 free:
 av_freep(&pkt.sync);
@@ -2466,6 +2416,66 @@ nomem:
 goto free;
 }
 
+static int update_parameters(AVCodecContext *avctx, QSVEncContext *q,
+ const AVFrame *frame)
+{
+int needReset = 0, ret = 0;
+
+if (!frame || avctx->codec_id == AV_CODEC_ID_MJPEG)
+return 0;
+
+needReset = update_qp(avctx, q);
+needReset |= update_max_frame_size(avctx, q);
+needReset |= update_gop_size(avctx, q);
+needReset |= update_rir(avctx, q);
+needReset |= update_low_delay_brc(avctx, q);
+needReset |= update_frame_rate(avctx, q);
+needReset |= update_bitrate(avctx, q);
+needReset |= update_pic_timing_sei(avctx, q);
+ret = update_min_max_qp(avctx, q);
+if (ret < 0)
+return ret;
+needReset |= ret;
+if (!needReset)
+return 0;
+
+if (avctx->hwaccel_context) {
+AVQSVContext *qsv = avctx->hwaccel_context;
+int i, j;
+q->param.ExtParam = q->extparam;
+for (i = 0; i < qsv->nb_ext_buffers; i++)
+q->param.ExtParam[i] = qsv->ext_buffers[i];
+q->param.NumExtParam = qsv->nb_ext_buffers;
+
+for (i = 0; i < q->nb_extparam_internal; i++) {
+

[FFmpeg-devel] [PATCH v2] libavcodec/qsvenc: Do not pass RGB solorspace to VPL/MSDK

2023-02-13 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

When encode RGB frame, Intel driver convert RGB to YUV, so we cannot
set RGB colorspace to VPL/MSDK.

Signed-off-by: Wenbin Chen 
---
 libavcodec/qsvenc.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index 2f0e94a914..d3f7532fc0 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -1185,7 +1185,12 @@ static int init_video_param(AVCodecContext *avctx, 
QSVEncContext *q)
 q->extvsi.ColourDescriptionPresent = 1;
 q->extvsi.ColourPrimaries = avctx->color_primaries;
 q->extvsi.TransferCharacteristics = avctx->color_trc;
-q->extvsi.MatrixCoefficients = avctx->colorspace;
+if (avctx->colorspace == AVCOL_SPC_RGB)
+// RGB will be converted to YUV, so RGB colorspace is not supported
+q->extvsi.MatrixCoefficients = AVCOL_SPC_UNSPECIFIED;
+else
+q->extvsi.MatrixCoefficients = avctx->colorspace;
+
 }
 
 if ((avctx->codec_id != AV_CODEC_ID_VP9) && (q->extvsi.VideoFullRange || 
q->extvsi.ColourDescriptionPresent)) {
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] libavcodec/qsvenc: Do not pass RGB solorspace to VPL/MSDK

2023-02-12 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

When encode RGB frame, Intel driver convert RGB to YUV, so we cannot
set RGB colorspace to VPL/MSDK.

Signed-off-by: Wenbin Chen 
---
 libavcodec/qsvenc.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index 2f0e94a914..944a76f4f1 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -1185,7 +1185,12 @@ static int init_video_param(AVCodecContext *avctx, 
QSVEncContext *q)
 q->extvsi.ColourDescriptionPresent = 1;
 q->extvsi.ColourPrimaries = avctx->color_primaries;
 q->extvsi.TransferCharacteristics = avctx->color_trc;
-q->extvsi.MatrixCoefficients = avctx->colorspace;
+if (avctx->colorspace == AVCOL_SPC_RGB)
+//YUV will be converted to RGB, so RGB colorspace is not supported
+q->extvsi.MatrixCoefficients = AVCOL_SPC_UNSPECIFIED;
+else
+q->extvsi.MatrixCoefficients = avctx->colorspace;
+
 }
 
 if ((avctx->codec_id != AV_CODEC_ID_VP9) && (q->extvsi.VideoFullRange || 
q->extvsi.ColourDescriptionPresent)) {
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] libavcodec/qsvenc: Flush cached frames before reset encoder

2023-02-12 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

According to 
https://github.com/Intel-Media-SDK/MediaSDK/blob/master/doc/mediasdk-man.md#configuration-change.
Before calling MFXVideoENCODE_Reset, The application needs to retrieve
any cached frames in the SDK encoder.
A loop is added before MFXVideoENCODE_Reset to retrieve cached frames
and add them to async_fifo, so that dynamic configuration works when
async_depth > 1.

Signed-off-by: Wenbin Chen 
---
 libavcodec/qsvenc.c | 118 +++-
 1 file changed, 63 insertions(+), 55 deletions(-)

diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index 2f0e94a914..f3b488dec8 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -1600,7 +1600,7 @@ int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext 
*q)
 
 q->param.AsyncDepth = q->async_depth;
 
-q->async_fifo = av_fifo_alloc2(q->async_depth, sizeof(QSVPacket), 0);
+q->async_fifo = av_fifo_alloc2(q->async_depth, sizeof(QSVPacket), 
AV_FIFO_FLAG_AUTO_GROW);
 if (!q->async_fifo)
 return AVERROR(ENOMEM);
 
@@ -2296,58 +2296,6 @@ static int update_pic_timing_sei(AVCodecContext *avctx, 
QSVEncContext *q)
 return updated;
 }
 
-static int update_parameters(AVCodecContext *avctx, QSVEncContext *q,
- const AVFrame *frame)
-{
-int needReset = 0, ret = 0;
-
-if (!frame || avctx->codec_id == AV_CODEC_ID_MJPEG)
-return 0;
-
-needReset = update_qp(avctx, q);
-needReset |= update_max_frame_size(avctx, q);
-needReset |= update_gop_size(avctx, q);
-needReset |= update_rir(avctx, q);
-needReset |= update_low_delay_brc(avctx, q);
-needReset |= update_frame_rate(avctx, q);
-needReset |= update_bitrate(avctx, q);
-needReset |= update_pic_timing_sei(avctx, q);
-ret = update_min_max_qp(avctx, q);
-if (ret < 0)
-return ret;
-needReset |= ret;
-if (!needReset)
-return 0;
-
-if (avctx->hwaccel_context) {
-AVQSVContext *qsv = avctx->hwaccel_context;
-int i, j;
-q->param.ExtParam = q->extparam;
-for (i = 0; i < qsv->nb_ext_buffers; i++)
-q->param.ExtParam[i] = qsv->ext_buffers[i];
-q->param.NumExtParam = qsv->nb_ext_buffers;
-
-for (i = 0; i < q->nb_extparam_internal; i++) {
-for (j = 0; j < qsv->nb_ext_buffers; j++) {
-if (qsv->ext_buffers[j]->BufferId == 
q->extparam_internal[i]->BufferId)
-break;
-}
-if (j < qsv->nb_ext_buffers)
-continue;
-q->param.ExtParam[q->param.NumExtParam++] = 
q->extparam_internal[i];
-}
-} else {
-q->param.ExtParam= q->extparam_internal;
-q->param.NumExtParam = q->nb_extparam_internal;
-}
-av_log(avctx, AV_LOG_DEBUG, "Parameter change, call msdk reset.\n");
-ret = MFXVideoENCODE_Reset(q->session, &q->param);
-if (ret < 0)
-return ff_qsv_print_error(avctx, ret, "Error during resetting");
-
-return 0;
-}
-
 static int encode_frame(AVCodecContext *avctx, QSVEncContext *q,
 const AVFrame *frame)
 {
@@ -2438,7 +2386,7 @@ static int encode_frame(AVCodecContext *avctx, 
QSVEncContext *q,
 
 if (ret < 0) {
 ret = (ret == MFX_ERR_MORE_DATA) ?
-   0 : ff_qsv_print_error(avctx, ret, "Error during encoding");
+   AVERROR(EAGAIN) : ff_qsv_print_error(avctx, ret, "Error during 
encoding");
 goto free;
 }
 
@@ -2466,6 +2414,66 @@ nomem:
 goto free;
 }
 
+static int update_parameters(AVCodecContext *avctx, QSVEncContext *q,
+ const AVFrame *frame)
+{
+int needReset = 0, ret = 0;
+
+if (!frame || avctx->codec_id == AV_CODEC_ID_MJPEG)
+return 0;
+
+needReset = update_qp(avctx, q);
+needReset |= update_max_frame_size(avctx, q);
+needReset |= update_gop_size(avctx, q);
+needReset |= update_rir(avctx, q);
+needReset |= update_low_delay_brc(avctx, q);
+needReset |= update_frame_rate(avctx, q);
+needReset |= update_bitrate(avctx, q);
+needReset |= update_pic_timing_sei(avctx, q);
+ret = update_min_max_qp(avctx, q);
+if (ret < 0)
+return ret;
+needReset |= ret;
+if (!needReset)
+return 0;
+
+if (avctx->hwaccel_context) {
+AVQSVContext *qsv = avctx->hwaccel_context;
+int i, j;
+q->param.ExtParam = q->extparam;
+for (i = 0; i < qsv->nb_ext_buffers; i++)
+q->param.ExtParam[i] = qsv->ext_buffers[i];
+q->param.NumExtParam = qsv->nb_ext_buffers;
+
+for (i = 0; i < q->nb_extparam_internal; i++) {
+for (j = 0; j < qsv->nb_ext_buffers; j++) {
+if (qsv->ext_buffers[j]->BufferId == 
q->extparam_internal[i]->BufferId)
+break;
+}
+if (j < qsv->nb_ext_buffers)
+continue;
+q->param.ExtParam[q->param.NumExtParam++] = 
q->extparam_in

[FFmpeg-devel] [PATCH] libavfilter/qsvvpp: check the return value

2023-02-08 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 libavfilter/qsvvpp.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c
index e181e7b584..54e7284234 100644
--- a/libavfilter/qsvvpp.c
+++ b/libavfilter/qsvvpp.c
@@ -441,7 +441,10 @@ static QSVFrame *submit_frame(QSVVPPContext *s, 
AVFilterLink *inlink, AVFrame *p
 return NULL;
 }
 
-av_frame_copy_props(qsv_frame->frame, picref);
+if (av_frame_copy_props(qsv_frame->frame, picref) < 0) {
+av_frame_free(&qsv_frame->frame);
+return NULL;
+}
 } else
 qsv_frame->frame = av_frame_clone(picref);
 
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] libavcodec/qsvenc: Enable 444 encoding for RGB input

2023-01-02 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

MSDK/VPL uses 420 chroma format as default to encode RGB, and this is
not a proper usage. Now enable 444 encoding for RGB input by default.
RGB is encoded using 444 chroma format when user doesn't specify the
profile or uses rext profile, otherwise, 420 is used.

Signed-off-by: Wenbin Chen 
---
 libavcodec/qsvenc.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index 514a1e8148..150fc9c729 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -1088,6 +1088,10 @@ static int init_video_param(AVCodecContext *avctx, 
QSVEncContext *q)
 q->extco3.MaxFrameSizeI = q->max_frame_size_i;
 if (q->max_frame_size_p >= 0)
 q->extco3.MaxFrameSizeP = q->max_frame_size_p;
+if (sw_format == AV_PIX_FMT_BGRA &&
+(q->profile == MFX_PROFILE_HEVC_REXT ||
+q->profile == MFX_PROFILE_UNKNOWN))
+q->extco3.TargetChromaFormatPlus1 = MFX_CHROMAFORMAT_YUV444 + 
1;
 
 q->extco3.ScenarioInfo = q->scenario;
 }
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 2/2] libavcodec/qsvenc_av1: Add max_frame_size support to av1_qsv encoder

2022-12-25 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 doc/encoders.texi   | 5 +
 libavcodec/qsvenc.c | 3 +++
 libavcodec/qsvenc_av1.c | 1 +
 3 files changed, 9 insertions(+)

diff --git a/doc/encoders.texi b/doc/encoders.texi
index 543b5e26a9..727f12a59d 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -3855,6 +3855,11 @@ Depth of look ahead in number frames, available when 
extbrc option is enabled.
 Setting this flag turns on or off LowDelayBRC feautre in qsv plugin, which 
provides
 more accurate bitrate control to minimize the variance of bitstream size frame
 by frame. Value: -1-default 0-off 1-on
+
+@item max_frame_size
+Set the allowed max size in bytes for each frame. If the frame size exceeds
+the limitation, encoder will adjust the QP value to control the frame size.
+Invalid in CQP rate control mode.
 @end table
 
 @section snow
diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index f5c6a164bb..93f1862a4b 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -538,6 +538,7 @@ static void dump_video_av1_param(AVCodecContext *avctx, 
QSVEncContext *q,
 av_log(avctx, AV_LOG_VERBOSE, "WriteIVFHeaders: %s \n",
print_threestate(av1_bs_param->WriteIVFHeaders));
 av_log(avctx, AV_LOG_VERBOSE, "LowDelayBRC: %s\n", 
print_threestate(co3->LowDelayBRC));
+av_log(avctx, AV_LOG_VERBOSE, "MaxFrameSize: %d;\n", co2->MaxFrameSize);
 }
 #endif
 
@@ -1034,6 +1035,8 @@ static int init_video_param(AVCodecContext *avctx, 
QSVEncContext *q)
 q->extco2.AdaptiveI = q->adaptive_i ? MFX_CODINGOPTION_ON : 
MFX_CODINGOPTION_OFF;
 if (q->adaptive_b >= 0)
 q->extco2.AdaptiveB = q->adaptive_b ? MFX_CODINGOPTION_ON : 
MFX_CODINGOPTION_OFF;
+if (q->max_frame_size >= 0)
+q->extco2.MaxFrameSize = q->max_frame_size;
 
 q->extco2.Header.BufferId = MFX_EXTBUFF_CODING_OPTION2;
 q->extco2.Header.BufferSz = sizeof(q->extco2);
diff --git a/libavcodec/qsvenc_av1.c b/libavcodec/qsvenc_av1.c
index 1e7801fefe..c697845d7b 100644
--- a/libavcodec/qsvenc_av1.c
+++ b/libavcodec/qsvenc_av1.c
@@ -111,6 +111,7 @@ static const AVOption options[] = {
 QSV_OPTION_ADAPTIVE_B
 QSV_OPTION_EXTBRC
 QSV_OPTION_LOW_DELAY_BRC
+QSV_OPTION_MAX_FRAME_SIZE
 { "profile", NULL, OFFSET(qsv.profile), AV_OPT_TYPE_INT, { .i64 = 
MFX_PROFILE_UNKNOWN }, 0, INT_MAX, VE, "profile" },
 { "unknown" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_UNKNOWN 
 }, INT_MIN, INT_MAX, VE, "profile" },
 { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
MFX_PROFILE_AV1_MAIN }, INT_MIN, INT_MAX, VE, "profile" },
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/2] libavcodec/qsvenc_av1: Add low_delay_brc support to av1_qsv encoder

2022-12-25 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Signed-off-by: Wenbin Chen 
---
 doc/encoders.texi   | 5 +
 libavcodec/qsvenc.c | 4 
 libavcodec/qsvenc_av1.c | 1 +
 3 files changed, 10 insertions(+)

diff --git a/doc/encoders.texi b/doc/encoders.texi
index b8051cda3f..543b5e26a9 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -3850,6 +3850,11 @@ Extended bitrate control.
 
 @item @var{look_ahead_depth}
 Depth of look ahead in number frames, available when extbrc option is enabled.
+
+@item @var{low_delay_brc}
+Setting this flag turns on or off LowDelayBRC feautre in qsv plugin, which 
provides
+more accurate bitrate control to minimize the variance of bitstream size frame
+by frame. Value: -1-default 0-off 1-on
 @end table
 
 @section snow
diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index 514a1e8148..f5c6a164bb 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -537,6 +537,7 @@ static void dump_video_av1_param(AVCodecContext *avctx, 
QSVEncContext *q,
 
 av_log(avctx, AV_LOG_VERBOSE, "WriteIVFHeaders: %s \n",
print_threestate(av1_bs_param->WriteIVFHeaders));
+av_log(avctx, AV_LOG_VERBOSE, "LowDelayBRC: %s\n", 
print_threestate(co3->LowDelayBRC));
 }
 #endif
 
@@ -1090,6 +1091,9 @@ static int init_video_param(AVCodecContext *avctx, 
QSVEncContext *q)
 q->extco3.MaxFrameSizeP = q->max_frame_size_p;
 
 q->extco3.ScenarioInfo = q->scenario;
+} else if (avctx->codec_id == AV_CODEC_ID_AV1) {
+if (q->low_delay_brc >= 0)
+q->extco3.LowDelayBRC = q->low_delay_brc ? MFX_CODINGOPTION_ON 
: MFX_CODINGOPTION_OFF;
 }
 
 if (avctx->codec_id == AV_CODEC_ID_HEVC) {
diff --git a/libavcodec/qsvenc_av1.c b/libavcodec/qsvenc_av1.c
index bb9ad16927..1e7801fefe 100644
--- a/libavcodec/qsvenc_av1.c
+++ b/libavcodec/qsvenc_av1.c
@@ -110,6 +110,7 @@ static const AVOption options[] = {
 QSV_OPTION_ADAPTIVE_I
 QSV_OPTION_ADAPTIVE_B
 QSV_OPTION_EXTBRC
+QSV_OPTION_LOW_DELAY_BRC
 { "profile", NULL, OFFSET(qsv.profile), AV_OPT_TYPE_INT, { .i64 = 
MFX_PROFILE_UNKNOWN }, 0, INT_MAX, VE, "profile" },
 { "unknown" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_UNKNOWN 
 }, INT_MIN, INT_MAX, VE, "profile" },
 { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 
MFX_PROFILE_AV1_MAIN }, INT_MIN, INT_MAX, VE, "profile" },
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v3] libavfilter/qsvvpp: Change the alignment to meet the requirement of YUV420P format

2022-12-04 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

When process yuv420 frames, FFmpeg uses same alignment on Y/U/V
planes. VPL and MSDK use Y plane's pitch / 2 as U/V planes's
pitch, which makes U/V planes 16-bytes aligned. We need to set
a separate alignment to meet runtime's behaviour.

Now alignment is changed to 16 so that the linesizes of U/V planes
meet the requirment of VPL/MSDK. Add get_buffer.video callback to
qsv filters to change the default get_buffer behaviour.

Now the commandline works fine:
ffmpeg -f rawvideo -pix_fmt yuv420p -s:v 3082x1884 \
-i ./3082x1884.yuv -vf 'vpp_qsv=w=2466:h=1508' -f rawvideo \
-pix_fmt yuv420p 2466_1508.yuv

Signed-off-by: Wenbin Chen 
---
 libavfilter/qsvvpp.c | 13 +
 libavfilter/qsvvpp.h |  1 +
 libavfilter/vf_deinterlace_qsv.c |  1 +
 libavfilter/vf_overlay_qsv.c |  2 ++
 libavfilter/vf_scale_qsv.c   |  1 +
 libavfilter/vf_vpp_qsv.c |  1 +
 6 files changed, 19 insertions(+)

diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c
index 8428ee89ab..d5cfeab402 100644
--- a/libavfilter/qsvvpp.c
+++ b/libavfilter/qsvvpp.c
@@ -1003,3 +1003,16 @@ int ff_qsvvpp_create_mfx_session(void *ctx,
 }
 
 #endif
+
+AVFrame *ff_qsvvpp_get_video_buffer(AVFilterLink *inlink, int w, int h)
+{
+/* When process YUV420 frames, FFmpeg uses same alignment on Y/U/V
+ * planes. VPL and MSDK use Y plane's pitch / 2 as U/V planes's
+ * pitch, which makes U/V planes 16-bytes aligned. We need to set a
+ * separate alignment to meet runtime's behaviour.
+*/
+return ff_default_get_video_buffer2(inlink,
+FFALIGN(inlink->w, 32),
+FFALIGN(inlink->h, 32),
+16);
+}
diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h
index a8cfcc565a..6f7c9bfc15 100644
--- a/libavfilter/qsvvpp.h
+++ b/libavfilter/qsvvpp.h
@@ -127,4 +127,5 @@ int ff_qsvvpp_print_warning(void *log_ctx, mfxStatus err,
 int ff_qsvvpp_create_mfx_session(void *ctx, void *loader, mfxIMPL 
implementation,
  mfxVersion *pver, mfxSession *psession);
 
+AVFrame *ff_qsvvpp_get_video_buffer(AVFilterLink *inlink, int w, int h);
 #endif /* AVFILTER_QSVVPP_H */
diff --git a/libavfilter/vf_deinterlace_qsv.c b/libavfilter/vf_deinterlace_qsv.c
index 98ed7283ad..6c94923f02 100644
--- a/libavfilter/vf_deinterlace_qsv.c
+++ b/libavfilter/vf_deinterlace_qsv.c
@@ -581,6 +581,7 @@ static const AVFilterPad qsvdeint_inputs[] = {
 .name = "default",
 .type = AVMEDIA_TYPE_VIDEO,
 .filter_frame = qsvdeint_filter_frame,
+.get_buffer.video = ff_qsvvpp_get_video_buffer,
 },
 };
 
diff --git a/libavfilter/vf_overlay_qsv.c b/libavfilter/vf_overlay_qsv.c
index d947a1faa1..1a2c1b1e96 100644
--- a/libavfilter/vf_overlay_qsv.c
+++ b/libavfilter/vf_overlay_qsv.c
@@ -399,11 +399,13 @@ static const AVFilterPad overlay_qsv_inputs[] = {
 .name  = "main",
 .type  = AVMEDIA_TYPE_VIDEO,
 .config_props  = config_main_input,
+.get_buffer.video = ff_qsvvpp_get_video_buffer,
 },
 {
 .name  = "overlay",
 .type  = AVMEDIA_TYPE_VIDEO,
 .config_props  = config_overlay_input,
+.get_buffer.video = ff_qsvvpp_get_video_buffer,
 },
 };
 
diff --git a/libavfilter/vf_scale_qsv.c b/libavfilter/vf_scale_qsv.c
index 758e730f78..36d5f3a6ec 100644
--- a/libavfilter/vf_scale_qsv.c
+++ b/libavfilter/vf_scale_qsv.c
@@ -641,6 +641,7 @@ static const AVFilterPad qsvscale_inputs[] = {
 .name = "default",
 .type = AVMEDIA_TYPE_VIDEO,
 .filter_frame = qsvscale_filter_frame,
+.get_buffer.video = ff_qsvvpp_get_video_buffer,
 },
 };
 
diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c
index 4a053f9145..b26d19c3bc 100644
--- a/libavfilter/vf_vpp_qsv.c
+++ b/libavfilter/vf_vpp_qsv.c
@@ -634,6 +634,7 @@ static const AVFilterPad vpp_inputs[] = {
 .name  = "default",
 .type  = AVMEDIA_TYPE_VIDEO,
 .config_props  = config_input,
+.get_buffer.video = ff_qsvvpp_get_video_buffer,
 },
 };
 
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2] libavfilter/qsvvpp: Use different alignment for YUV420P format

2022-11-30 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

When process yuv420 frames, FFmpeg uses same alignment on Y/U/V
planes. VPL and MSDK use Y plane's pitch / 2 as U/V planes's
pitch, which makes U/V planes 16-bytes aligned. We need to set
a separate alignment to meet runtime's behaviour.

Now alignment is changed to 16 so that the linesizes of U/V planes
meet the requirment of VPL/MSDK. Add get_buffer.video callback to
qsv filters to change the default get_buffer behaviour.

Now the commandline works fine:
ffmpeg -f rawvideo -pix_fmt yuv420p -s:v 3082x1884 \
-i ./3082x1884.yuv -vf 'vpp_qsv=w=2466:h=1508' -f rawvideo \
-pix_fmt yuv420p 2466_1508.yuv

Signed-off-by: Wenbin Chen 
---
 libavfilter/qsvvpp.c | 13 +
 libavfilter/qsvvpp.h |  1 +
 libavfilter/vf_deinterlace_qsv.c |  1 +
 libavfilter/vf_overlay_qsv.c |  2 ++
 libavfilter/vf_scale_qsv.c   |  1 +
 libavfilter/vf_vpp_qsv.c |  1 +
 6 files changed, 19 insertions(+)

diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c
index 8428ee89ab..d5cfeab402 100644
--- a/libavfilter/qsvvpp.c
+++ b/libavfilter/qsvvpp.c
@@ -1003,3 +1003,16 @@ int ff_qsvvpp_create_mfx_session(void *ctx,
 }
 
 #endif
+
+AVFrame *ff_qsvvpp_get_video_buffer(AVFilterLink *inlink, int w, int h)
+{
+/* When process YUV420 frames, FFmpeg uses same alignment on Y/U/V
+ * planes. VPL and MSDK use Y plane's pitch / 2 as U/V planes's
+ * pitch, which makes U/V planes 16-bytes aligned. We need to set a
+ * separate alignment to meet runtime's behaviour.
+*/
+return ff_default_get_video_buffer2(inlink,
+FFALIGN(inlink->w, 32),
+FFALIGN(inlink->h, 32),
+16);
+}
diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h
index a8cfcc565a..6f7c9bfc15 100644
--- a/libavfilter/qsvvpp.h
+++ b/libavfilter/qsvvpp.h
@@ -127,4 +127,5 @@ int ff_qsvvpp_print_warning(void *log_ctx, mfxStatus err,
 int ff_qsvvpp_create_mfx_session(void *ctx, void *loader, mfxIMPL 
implementation,
  mfxVersion *pver, mfxSession *psession);
 
+AVFrame *ff_qsvvpp_get_video_buffer(AVFilterLink *inlink, int w, int h);
 #endif /* AVFILTER_QSVVPP_H */
diff --git a/libavfilter/vf_deinterlace_qsv.c b/libavfilter/vf_deinterlace_qsv.c
index 98ed7283ad..6c94923f02 100644
--- a/libavfilter/vf_deinterlace_qsv.c
+++ b/libavfilter/vf_deinterlace_qsv.c
@@ -581,6 +581,7 @@ static const AVFilterPad qsvdeint_inputs[] = {
 .name = "default",
 .type = AVMEDIA_TYPE_VIDEO,
 .filter_frame = qsvdeint_filter_frame,
+.get_buffer.video = ff_qsvvpp_get_video_buffer,
 },
 };
 
diff --git a/libavfilter/vf_overlay_qsv.c b/libavfilter/vf_overlay_qsv.c
index d947a1faa1..1a2c1b1e96 100644
--- a/libavfilter/vf_overlay_qsv.c
+++ b/libavfilter/vf_overlay_qsv.c
@@ -399,11 +399,13 @@ static const AVFilterPad overlay_qsv_inputs[] = {
 .name  = "main",
 .type  = AVMEDIA_TYPE_VIDEO,
 .config_props  = config_main_input,
+.get_buffer.video = ff_qsvvpp_get_video_buffer,
 },
 {
 .name  = "overlay",
 .type  = AVMEDIA_TYPE_VIDEO,
 .config_props  = config_overlay_input,
+.get_buffer.video = ff_qsvvpp_get_video_buffer,
 },
 };
 
diff --git a/libavfilter/vf_scale_qsv.c b/libavfilter/vf_scale_qsv.c
index 758e730f78..36d5f3a6ec 100644
--- a/libavfilter/vf_scale_qsv.c
+++ b/libavfilter/vf_scale_qsv.c
@@ -641,6 +641,7 @@ static const AVFilterPad qsvscale_inputs[] = {
 .name = "default",
 .type = AVMEDIA_TYPE_VIDEO,
 .filter_frame = qsvscale_filter_frame,
+.get_buffer.video = ff_qsvvpp_get_video_buffer,
 },
 };
 
diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c
index 4a053f9145..b26d19c3bc 100644
--- a/libavfilter/vf_vpp_qsv.c
+++ b/libavfilter/vf_vpp_qsv.c
@@ -634,6 +634,7 @@ static const AVFilterPad vpp_inputs[] = {
 .name  = "default",
 .type  = AVMEDIA_TYPE_VIDEO,
 .config_props  = config_input,
+.get_buffer.video = ff_qsvvpp_get_video_buffer,
 },
 };
 
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] libavfilter/qsvvpp: Use different alignment for YUV420P format

2022-11-29 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

When process yuv420 frames, FFmpeg use same alignment on Y/U/V
planes. VPL and MSDK use Y plane's pitch / 2 as U/V planes's
pitch, which make U/V planes 16-bytes aligned. We need to set
a separate alignment to meet runtime's behaviour.

Now the commandline works fine:
ffmpeg -f rawvideo -pix_fmt yuv420p -s:v 3082x1884 \
-i ./3082x1884.yuv -vf 'vpp_qsv=w=2466:h=1508' -f rawvideo \
-pix_fmt yuv420p 2466_1508.yuv

Signed-off-by: Wenbin Chen 
---
 libavfilter/qsvvpp.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c
index 8428ee89ab..ad09114cb7 100644
--- a/libavfilter/qsvvpp.c
+++ b/libavfilter/qsvvpp.c
@@ -408,9 +408,15 @@ static QSVFrame *submit_frame(QSVVPPContext *s, 
AVFilterLink *inlink, AVFrame *p
 } else {
 /* make a copy if the input is not padded as libmfx requires */
 if (picref->height & 31 || picref->linesize[0] & 31) {
-qsv_frame->frame = ff_get_video_buffer(inlink,
-   FFALIGN(inlink->w, 32),
-   FFALIGN(inlink->h, 32));
+/* When process YUV420 frames, FFmpeg uses same alignment on Y/U/V
+ * planes. VPL and MSDK use Y plane's pitch / 2 as U/V planes's
+ * pitch, which makes U/V planes 16-bytes aligned. We need to set a
+ * separate alignment to meet runtime's behaviour.
+ */
+qsv_frame->frame = ff_default_get_video_buffer2(inlink,
+FFALIGN(inlink->w, 32),
+FFALIGN(inlink->h, 32),
+16);
 if (!qsv_frame->frame)
 return NULL;
 
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH v2] libavcodec/qsvenc.c: Enable MFX_GOP_STRICT when adpative gop is disabled

2022-11-23 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

adaptive_i and adaptive_b cannot work with MFX_GOP_STRICT,
so only enable MFX_GOP_STRICT when these features are disabled.

Signed-off-by: Wenbin Chen 
---
 libavcodec/qsvenc.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index d5e9f2d420..514a1e8148 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -644,6 +644,12 @@ static int check_enc_param(AVCodecContext *avctx, 
QSVEncContext *q)
 return 1;
 }
 
+static int is_strict_gop(QSVEncContext *q) {
+if (q->adaptive_b == 0 && q->adaptive_i == 0)
+return 1;
+return 0;
+}
+
 static int init_video_param_jpeg(AVCodecContext *avctx, QSVEncContext *q)
 {
 enum AVPixelFormat sw_format = avctx->pix_fmt == AV_PIX_FMT_QSV ?
@@ -755,7 +761,8 @@ static int init_video_param(AVCodecContext *avctx, 
QSVEncContext *q)
 q->old_gop_size = avctx->gop_size;
 q->param.mfx.GopRefDist = FFMAX(-1, avctx->max_b_frames) + 1;
 q->param.mfx.GopOptFlag = avctx->flags & AV_CODEC_FLAG_CLOSED_GOP ?
-  MFX_GOP_CLOSED : MFX_GOP_STRICT;
+  MFX_GOP_CLOSED : is_strict_gop(q) ?
+  MFX_GOP_STRICT : 0;
 q->param.mfx.IdrInterval= q->idr_interval;
 q->param.mfx.NumSlice   = avctx->slices;
 q->param.mfx.NumRefFrame= FFMAX(0, avctx->refs);
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] libavcodec/qsvenc.c: Disable MFX_GOP_STRICT when encode adpative gop

2022-11-15 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

b_strategy, p_strategy, adaptive_i and adaptive_b cannot work with
MFX_GOP_STRICT, so disable MFX_GOP_STRICT when these features are used.

Signed-off-by: Wenbin Chen 
---
 libavcodec/qsvenc.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index d5e9f2d420..6777a6fb5f 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -644,6 +644,13 @@ static int check_enc_param(AVCodecContext *avctx, 
QSVEncContext *q)
 return 1;
 }
 
+static int is_adaptive_gop(QSVEncContext *q) {
+if (q->adaptive_b > 0 || q->adaptive_i > 0 ||
+q->b_strategy > 0 || q->p_strategy > 0)
+return 1;
+return 0;
+}
+
 static int init_video_param_jpeg(AVCodecContext *avctx, QSVEncContext *q)
 {
 enum AVPixelFormat sw_format = avctx->pix_fmt == AV_PIX_FMT_QSV ?
@@ -755,7 +762,8 @@ static int init_video_param(AVCodecContext *avctx, 
QSVEncContext *q)
 q->old_gop_size = avctx->gop_size;
 q->param.mfx.GopRefDist = FFMAX(-1, avctx->max_b_frames) + 1;
 q->param.mfx.GopOptFlag = avctx->flags & AV_CODEC_FLAG_CLOSED_GOP ?
-  MFX_GOP_CLOSED : MFX_GOP_STRICT;
+  MFX_GOP_CLOSED : is_adaptive_gop(q) ?
+  0 : MFX_GOP_STRICT;
 q->param.mfx.IdrInterval= q->idr_interval;
 q->param.mfx.NumSlice   = avctx->slices;
 q->param.mfx.NumRefFrame= FFMAX(0, avctx->refs);
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] libavcodec/qsvenc: Add skip_frame support to qsvenc

2022-11-02 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Add skip_frame support to qsvenc. Use per-frame metadata
"qsv_skip_frame" to control it. skip_frame option defines the behavior
of qsv_skip_frame.
no_skip: Frame skipping is disabled.
insert_dummy: Encoder inserts into bitstream frame where all macroblocks
are encoded as skipped.
insert_nothing: Similar to insert_dummy, but encoder inserts nothing.
The skipped frames are still used in brc. For example, gop still include
skipped frames, and the frames after skipped frames will be larger in
size.
brc_only: skip_frame metadata indicates the number of missed frames
before the current frame.

Signed-off-by: Wenbin Chen 
---
 doc/encoders.texi| 36 
 libavcodec/qsvenc.c  | 36 
 libavcodec/qsvenc.h  | 13 +
 libavcodec/qsvenc_h264.c |  1 +
 libavcodec/qsvenc_hevc.c |  1 +
 5 files changed, 87 insertions(+)

diff --git a/doc/encoders.texi b/doc/encoders.texi
index 53dd02fd28..59f39d18f6 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -3564,6 +3564,24 @@ bitrate, @var{target_bitrate}, within the accuracy range 
@var{avbr_accuracy},
 after a @var{avbr_Convergence} period. This method does not follow HRD and the
 instant bitrate is not capped or padded.
 
+@item @var{skip_frame}
+Use per-frame metadata "qsv_skip_frame" to skip frame when encoding. This 
option
+defines the usage of this metadata.
+@table @samp
+@item no_skip
+Frame skipping is disabled.
+@item insert_dummy
+Encoder inserts into bitstream frame where all macroblocks are encoded as
+skipped.
+@item insert_nothing
+Similar to insert_dummy, but encoder inserts nothing into bitstream. The 
skipped
+frames are still used in brc. For example, gop still include skipped frames, 
and
+the frames after skipped frames will be larger in size.
+@item brc_only
+skip_frame metadata indicates the number of missed frames before the current
+frame.
+@end table
+
 @end table
 
 @subsection HEVC Options
@@ -3742,6 +3760,24 @@ bitrate, @var{target_bitrate}, within the accuracy range 
@var{avbr_accuracy},
 after a @var{avbr_Convergence} period. This method does not follow HRD and the
 instant bitrate is not capped or padded.
 
+@item @var{skip_frame}
+Use per-frame metadata "qsv_skip_frame" to skip frame when encoding. This 
option
+defines the usage of this metadata.
+@table @samp
+@item no_skip
+Frame skipping is disabled.
+@item insert_dummy
+Encoder inserts into bitstream frame where all macroblocks are encoded as
+skipped.
+@item insert_nothing
+Similar to insert_dummy, but encoder inserts nothing into bitstream. The 
skipped
+frames are still used in brc. For example, gop still include skipped frames, 
and
+the frames after skipped frames will be larger in size.
+@item brc_only
+skip_frame metadata indicates the number of missed frames before the current
+frame.
+@end table
+
 @end table
 
 @subsection MPEG2 Options
diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index 0db774ea63..4bfa65c575 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -329,6 +329,22 @@ static void dump_video_param(AVCodecContext *avctx, 
QSVEncContext *q,
"MinQPI: %"PRIu8"; MaxQPI: %"PRIu8"; MinQPP: %"PRIu8"; MaxQPP: 
%"PRIu8"; MinQPB: %"PRIu8"; MaxQPB: %"PRIu8"\n",
co2->MinQPI, co2->MaxQPI, co2->MinQPP, co2->MaxQPP, 
co2->MinQPB, co2->MaxQPB);
 av_log(avctx, AV_LOG_VERBOSE, "DisableDeblockingIdc: %"PRIu32" \n", 
co2->DisableDeblockingIdc);
+
+switch (co2->SkipFrame) {
+case MFX_SKIPFRAME_NO_SKIP:
+av_log(avctx, AV_LOG_VERBOSE, "SkipFrame: no_skip\n");
+break;
+case MFX_SKIPFRAME_INSERT_DUMMY:
+av_log(avctx, AV_LOG_VERBOSE, "SkipFrame: insert_dummy\n");
+break;
+case MFX_SKIPFRAME_INSERT_NOTHING:
+av_log(avctx, AV_LOG_VERBOSE, "SkipFrame: insert_nothing\n");
+break;
+case MFX_SKIPFRAME_BRC_ONLY:
+av_log(avctx, AV_LOG_VERBOSE, "SkipFrame: brc_only\n");
+break;
+default: break;
+}
 }
 
 if (co3) {
@@ -991,6 +1007,8 @@ static int init_video_param(AVCodecContext *avctx, 
QSVEncContext *q)
 q->old_max_qp_b = q->max_qp_b;
 if (q->mbbrc >= 0)
 q->extco2.MBBRC = q->mbbrc ? MFX_CODINGOPTION_ON : 
MFX_CODINGOPTION_OFF;
+if (q->skip_frame >= 0)
+q->extco2.SkipFrame = q->skip_frame;
 
 q->extco2.Header.BufferId = MFX_EXTBUFF_CODING_OPTION2;
 q->extco2.Header.BufferSz = sizeof(q->extco2);
@@ -1911,6 +1929,19 @@ static int set_roi_encode_ctrl(AVCodecContext *avctx, 
const AVFrame *frame,
 return 0;
 }
 
+static void set_skip_frame_encode_ctrl(AVCodecContext *avctx, const AVFrame 
*frame,
+   mfxEncodeCtrl *enc_ctrl)
+{
+AVDictionaryEntry* skip_frame_dict = NULL;
+if (!frame->metadata)
+return;
+skip_frame_dict = av_dict_get(fr

[FFmpeg-devel] [PATCH] doc/examples: Add qsv_transcode example

2022-10-31 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Add qsv_transcode example which shows how to use qsv to do hardware
accelerated transcoding, also show how to dynamically set encoding
parameters.

examples:
Normal usage:
qsv_transcode input.mp4 h264_qsv output.mp4 "g 60"

Dynamic setting usage:
qsv_transcode input.mp4 hevc_qsv output.mp4 "g 60 asyne_depth 1"
100 "g 120"
This command initializes codec with gop_size 60 and change it to
120 after 100 frames

Signed-off-by: Wenbin Chen 
---
 configure|   2 +
 doc/examples/.gitignore  |   1 +
 doc/examples/Makefile|   1 +
 doc/examples/qsv_transcode.c | 440 +++
 4 files changed, 444 insertions(+)
 create mode 100644 doc/examples/qsv_transcode.c

diff --git a/configure b/configure
index 70c9e41dcc..a8b4496465 100755
--- a/configure
+++ b/configure
@@ -1748,6 +1748,7 @@ EXAMPLE_LIST="
 transcoding_example
 vaapi_encode_example
 vaapi_transcode_example
+qsv_transcode_example
 "
 
 EXTERNAL_AUTODETECT_LIBRARY_LIST="
@@ -3811,6 +3812,7 @@ transcode_aac_example_deps="avcodec avformat swresample"
 transcoding_example_deps="avfilter avcodec avformat avutil"
 vaapi_encode_example_deps="avcodec avutil h264_vaapi_encoder"
 vaapi_transcode_example_deps="avcodec avformat avutil h264_vaapi_encoder"
+qsv_transcode_example_deps="avcodec avformat avutil h264_qsv_encoder"
 
 # EXTRALIBS_LIST
 cpu_init_extralibs="pthreads_extralibs"
diff --git a/doc/examples/.gitignore b/doc/examples/.gitignore
index 44960e1de7..d787afdd4c 100644
--- a/doc/examples/.gitignore
+++ b/doc/examples/.gitignore
@@ -22,3 +22,4 @@
 /transcoding
 /vaapi_encode
 /vaapi_transcode
+/qsv_transcode
diff --git a/doc/examples/Makefile b/doc/examples/Makefile
index 81bfd34d5d..f937fbefda 100644
--- a/doc/examples/Makefile
+++ b/doc/examples/Makefile
@@ -21,6 +21,7 @@ EXAMPLES-$(CONFIG_TRANSCODE_AAC_EXAMPLE) += transcode_aac
 EXAMPLES-$(CONFIG_TRANSCODING_EXAMPLE)   += transcoding
 EXAMPLES-$(CONFIG_VAAPI_ENCODE_EXAMPLE)  += vaapi_encode
 EXAMPLES-$(CONFIG_VAAPI_TRANSCODE_EXAMPLE)   += vaapi_transcode
+EXAMPLES-$(CONFIG_QSV_TRANSCODE_EXAMPLE) += qsv_transcode
 
 EXAMPLES   := $(EXAMPLES-yes:%=doc/examples/%$(PROGSSUF)$(EXESUF))
 EXAMPLES_G := $(EXAMPLES-yes:%=doc/examples/%$(PROGSSUF)_g$(EXESUF))
diff --git a/doc/examples/qsv_transcode.c b/doc/examples/qsv_transcode.c
new file mode 100644
index 00..9b37bbea9f
--- /dev/null
+++ b/doc/examples/qsv_transcode.c
@@ -0,0 +1,440 @@
+/*
+ * Quick Sync Video (video transcoding) transcode sample
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/**
+ * @file
+ * Intel QSV-accelerated transcoding example.
+ *
+ * @example qsv_transcode.c
+ * This example shows how to do QSV-accelerated transcoding and how to
+ * dynamically change encoder's option.
+ * Usage: qsv_transcode input_stream codec output_stream initial option
+ *  { frame_number new_option }
+ * e.g: - qsv_transcode input.mp4 h264_qsv output_h264.mp4 "g 60"
+ *  - qsv_transcode input.mp4 hevc_qsv output_hevc.mp4 "g 60 async_depth 1"
+ *  100 "g 120"
+ * (initialize codec with gop_size 60 and change it to 120 after 100
+ *  frames)
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+static AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx = NULL;
+static AVBufferRef *hw_device_ctx = NULL;
+static AVCodecContext *decoder_ctx = NULL, *encoder_ctx = NULL;
+static int video_stream = -1;
+
+typedef struct DynamicSetting {
+int frame_number;
+char* optstr;
+} DynamicSetting;
+static DynamicSetting *dynamic_setting;
+static int setting_number;
+static int current_setting_number;
+
+static int str_to_dict(char* optstr, AVDictionary **opt)
+{
+char *key, *value;
+if (strlen(optstr) == 0)
+return 0;
+key = strtok(optstr, " ");
+if (key == NULL)
+return AVERROR(ENAVAIL);
+value = strtok(NUL

[FFmpeg-devel] [PATCH 2/2] doc/encoders: Add doc for av1_qsv

2022-10-13 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

Add doc for av1_qsv.

Signed-off-by: Wenbin Chen 
---
 doc/encoders.texi | 32 
 1 file changed, 32 insertions(+)

diff --git a/doc/encoders.texi b/doc/encoders.texi
index 1a5216f8eb..53dd02fd28 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -3775,6 +3775,38 @@ Number of columns for tiled encoding (requires libmfx >= 
1.29).
 Number of rows for tiled encoding (requires libmfx  >= 1.29).
 @end table
 
+@subsection AV1 Options
+These options are used by av1_qsv (requires libvpl).
+@table @option
+@item @var{profile}
+@table @samp
+@item unknown
+@item main
+@end table
+
+@item @var{tile_cols}
+Number of columns for tiled encoding.
+
+@item @var{tile_rows}
+Number of rows for tiled encoding.
+
+@item @var{adaptive_i}
+This flag controls insertion of I frames by the QSV encoder. Turn ON this flag
+to allow changing of frame type from P and B to I.
+
+@item @var{adaptive_b}
+This flag controls changing of frame type from B to P.
+
+@item @var{b_strategy}
+This option controls usage of B frames as reference.
+
+@item @var{extbrc}
+Extended bitrate control.
+
+@item @var{look_ahead_depth}
+Depth of look ahead in number frames, available when extbrc option is enabled.
+@end table
+
 @section snow
 
 @subsection Options
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/2] libavcodec/qsvenc_av1: add av1_qsv encoder

2022-10-13 Thread wenbin . chen-at-intel . com

From: Wenbin Chen 

It is available only when libvpl is enabled. MSDK doesn't support av1
encoding.

sample command:
ffmpeg -f rawvideo -pix_fmt nv12 -s 1920x1080 -i input.yuv \
-c:v av1_qsv output.ivf

Signed-off-by: Wenbin Chen 
Signed-off-by: Haihao Xiang 
---
 configure   |   2 +
 libavcodec/Makefile |   1 +
 libavcodec/allcodecs.c  |   1 +
 libavcodec/qsvenc.c | 196 +++-
 libavcodec/qsvenc.h |   7 +-
 libavcodec/qsvenc_av1.c | 156 
 6 files changed, 358 insertions(+), 5 deletions(-)
 create mode 100644 libavcodec/qsvenc_av1.c

diff --git a/configure b/configure
index f3fd91f592..7c4fef6cb0 100755
--- a/configure
+++ b/configure
@@ -3269,6 +3269,8 @@ vp9_qsv_encoder_select="qsvenc"
 vp9_v4l2m2m_decoder_deps="v4l2_m2m vp9_v4l2_m2m"
 wmv3_crystalhd_decoder_select="crystalhd"
 av1_qsv_decoder_select="qsvdec"
+av1_qsv_encoder_select="qsvenc"
+av1_qsv_encoder_deps="libvpl"
 
 # parsers
 aac_parser_select="adts_header mpeg4audio"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 37b63cadc2..77deaafe98 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -244,6 +244,7 @@ OBJS-$(CONFIG_AURA_DECODER)+= cyuv.o
 OBJS-$(CONFIG_AURA2_DECODER)   += aura.o
 OBJS-$(CONFIG_AV1_DECODER) += av1dec.o
 OBJS-$(CONFIG_AV1_CUVID_DECODER)   += cuviddec.o
+OBJS-$(CONFIG_AV1_QSV_ENCODER) += qsvenc_av1.o
 OBJS-$(CONFIG_AVRN_DECODER)+= avrndec.o
 OBJS-$(CONFIG_AVRP_DECODER)+= r210dec.o
 OBJS-$(CONFIG_AVRP_ENCODER)+= r210enc.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index cfeb01ac1c..57e53437dc 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -827,6 +827,7 @@ extern const FFCodec ff_libaom_av1_decoder;
 extern const FFCodec ff_av1_decoder;
 extern const FFCodec ff_av1_cuvid_decoder;
 extern const FFCodec ff_av1_qsv_decoder;
+extern const FFCodec ff_av1_qsv_encoder;
 extern const FFCodec ff_libopenh264_encoder;
 extern const FFCodec ff_libopenh264_decoder;
 extern const FFCodec ff_h264_amf_encoder;
diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index dc5479d0f3..fd3b9d5cbe 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -82,6 +82,14 @@ static const struct profile_names vp9_profiles[] = {
 { MFX_PROFILE_VP9_3,"vp9 3" },
 };
 
+static const struct profile_names av1_profiles[] = {
+#if QSV_VERSION_ATLEAST(1, 34)
+{ MFX_PROFILE_AV1_MAIN, "av1 main"  },
+{ MFX_PROFILE_AV1_HIGH, "av1 high"  },
+{ MFX_PROFILE_AV1_PRO,  "av1 professional"  },
+#endif
+};
+
 typedef struct QSVPacket {
 AVPacketpkt;
 mfxSyncPoint   *sync;
@@ -114,6 +122,11 @@ static const char *print_profile(enum AVCodecID codec_id, 
mfxU16 profile)
 num_profiles = FF_ARRAY_ELEMS(vp9_profiles);
 break;
 
+case AV_CODEC_ID_AV1:
+profiles = av1_profiles;
+num_profiles = FF_ARRAY_ELEMS(av1_profiles);
+break;
+
 default:
 return "unknown";
 }
@@ -429,6 +442,88 @@ static void dump_video_mjpeg_param(AVCodecContext *avctx, 
QSVEncContext *q)
info->FrameInfo.FrameRateExtD, info->FrameInfo.FrameRateExtN);
 }
 
+#if QSV_HAVE_EXT_AV1_PARAM
+static void dump_video_av1_param(AVCodecContext *avctx, QSVEncContext *q,
+ mfxExtBuffer **coding_opts)
+{
+mfxInfoMFX *info = &q->param.mfx;
+mfxExtAV1TileParam *av1_tile_param = (mfxExtAV1TileParam *)coding_opts[0];
+mfxExtAV1BitstreamParam *av1_bs_param = (mfxExtAV1BitstreamParam 
*)coding_opts[1];
+mfxExtCodingOption2 *co2 = (mfxExtCodingOption2*)coding_opts[2];
+mfxExtCodingOption3 *co3 = (mfxExtCodingOption3*)coding_opts[3];
+
+av_log(avctx, AV_LOG_VERBOSE, "profile: %s; level: %"PRIu16"\n",
+   print_profile(avctx->codec_id, info->CodecProfile), 
info->CodecLevel);
+
+av_log(avctx, AV_LOG_VERBOSE,
+   "GopPicSize: %"PRIu16"; GopRefDist: %"PRIu16"; GopOptFlag:%s%s; 
IdrInterval: %"PRIu16"\n",
+   info->GopPicSize, info->GopRefDist,
+   info->GopOptFlag & MFX_GOP_CLOSED ? " closed" : "",
+   info->GopOptFlag & MFX_GOP_STRICT ? " strict" : "",
+   info->IdrInterval);
+
+av_log(avctx, AV_LOG_VERBOSE, "TargetUsage: %"PRIu16"; RateControlMethod: 
%s\n",
+   info->TargetUsage, print_ratecontrol(info->RateControlMethod));
+
+if (info->RateControlMethod == MFX_RATECONTROL_CBR ||
+info->RateControlMethod == MFX_RATECONTROL_VBR)
+av_log(avctx, AV_LOG_VERBOSE,
+   "BufferSizeInKB: %"PRIu16"; InitialDelayInKB: %"PRIu16"; 
TargetKbps: %"PRIu16"; MaxKbps: %"PRIu16"; BRCParamMultiplier: %"PRIu16"\n",
+   info->BufferSizeInKB, info->InitialDelayInKB, info->TargetKbps, 
info->MaxKbps, info->BRCParamMultiplie

64 matches

Mail list logo