On Tue, Feb 25, 2020 at 5:24 PM Guo, Yejun <yejun....@intel.com> wrote: > > The Y channel is handled by dnn, and also resized by dnn. The UV channels > are resized with swscale. For me, this is a little weird to resize Y with dnn backend but resize UV channel with FFmpeg swscale, is it used the same scale algorithm ?
> The command to use espcn.pb (see vf_sr) looks like: > ./ffmpeg -i 480p.jpg -vf > format=yuv420p,dnn_processing=dnn_backend=tensorflow:model=espcn.pb:input=x:output=y > -y tmp.espcn.jpg > > Signed-off-by: Guo, Yejun <yejun....@intel.com> > --- > doc/filters.texi | 9 +++++++++ > libavfilter/vf_dnn_processing.c | 37 ++++++++++++++++++++++++++++++------- > 2 files changed, 39 insertions(+), 7 deletions(-) > > diff --git a/doc/filters.texi b/doc/filters.texi > index 33b7857..e3df8f9 100644 > --- a/doc/filters.texi > +++ b/doc/filters.texi > @@ -9155,6 +9155,7 @@ ffmpeg -i INPUT -f lavfi -i > nullsrc=hd720,geq='r=128+80*(sin(sqrt((X-W/2)*(X-W/2 > @end example > @end itemize > > +@anchor{dnn_processing} > @section dnn_processing > > Do image processing with deep neural networks. It works together with > another filter > @@ -9216,6 +9217,12 @@ Handle the Y channel with srcnn.pb (see @ref{sr} > filter) for frame with yuv420p > ./ffmpeg -i 480p.jpg -vf > format=yuv420p,scale=w=iw*2:h=ih*2,dnn_processing=dnn_backend=tensorflow:model=srcnn.pb:input=x:output=y > -y srcnn.jpg > @end example > > +@item > +Handle the Y channel with espcn.pb (see @ref{sr} filter), which changes > frame size, for format yuv420p (planar YUV formats supported): > +@example > +./ffmpeg -i 480p.jpg -vf > format=yuv420p,dnn_processing=dnn_backend=tensorflow:model=espcn.pb:input=x:output=y > -y tmp.espcn.jpg > +@end example > + > @end itemize > > @section drawbox > @@ -17369,6 +17376,8 @@ Default value is @code{2}. Scale factor is necessary > for SRCNN model, because it > input upscaled using bicubic upscaling with proper scale factor. > @end table > > +This feature can also be finished with @ref{dnn_processing} filter. > + > @section ssim > > Obtain the SSIM (Structural SImilarity Metric) between two input videos. > diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c > index f9458f0..7f40f85 100644 > --- a/libavfilter/vf_dnn_processing.c > +++ b/libavfilter/vf_dnn_processing.c > @@ -51,6 +51,8 @@ typedef struct DnnProcessingContext { > > struct SwsContext *sws_gray8_to_grayf32; > struct SwsContext *sws_grayf32_to_gray8; > + struct SwsContext *sws_uv_scale; > + int sws_uv_height; > } DnnProcessingContext; > > #define OFFSET(x) offsetof(DnnProcessingContext, x) > @@ -274,6 +276,18 @@ static int prepare_sws_context(AVFilterLink *outlink) > outlink->h, > AV_PIX_FMT_GRAY8, > 0, NULL, NULL, NULL); > + > + if (inlink->w != outlink->w || inlink->h != outlink->h) { > + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); > + int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); > + int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); > + int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h); > + int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, desc->log2_chroma_w); > + ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, > AV_PIX_FMT_GRAY8, > + sws_dst_w, sws_dst_h, > AV_PIX_FMT_GRAY8, > + SWS_BICUBIC, NULL, NULL, > NULL); > + ctx->sws_uv_height = sws_src_h; > + } > return 0; > default: > //do nothing > @@ -404,13 +418,21 @@ static av_always_inline int isPlanarYUV(enum > AVPixelFormat pix_fmt) > > static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const > AVFrame *in) > { > - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(in->format); > - int uv_height = AV_CEIL_RSHIFT(in->height, desc->log2_chroma_h); > - for (int i = 1; i < 3; ++i) { > - int bytewidth = av_image_get_linesize(in->format, in->width, i); > - av_image_copy_plane(out->data[i], out->linesize[i], > - in->data[i], in->linesize[i], > - bytewidth, uv_height); > + if (!ctx->sws_uv_scale) { > + av_assert0(in->height == out->height && in->width == out->width); > + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(in->format); > + int uv_height = AV_CEIL_RSHIFT(in->height, desc->log2_chroma_h); > + for (int i = 1; i < 3; ++i) { > + int bytewidth = av_image_get_linesize(in->format, in->width, i); > + av_image_copy_plane(out->data[i], out->linesize[i], > + in->data[i], in->linesize[i], > + bytewidth, uv_height); > + } > + } else { > + sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), > in->linesize + 1, > + 0, ctx->sws_uv_height, out->data + 1, out->linesize + 1); > + sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 2), > in->linesize + 2, > + 0, ctx->sws_uv_height, out->data + 2, out->linesize + 2); > } > > return 0; > @@ -455,6 +477,7 @@ static av_cold void uninit(AVFilterContext *ctx) > > sws_freeContext(context->sws_gray8_to_grayf32); > sws_freeContext(context->sws_grayf32_to_gray8); > + sws_freeContext(context->sws_uv_scale); > > if (context->dnn_module) > (context->dnn_module->free_model)(&context->model); > -- > 2.7.4 > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".