[FFmpeg-devel] [PATCH] swscale/swscale: remove useless code
Signed-off-by: Ruiling Song --- libswscale/swscale.c | 16 +--- libswscale/swscale_internal.h | 5 + libswscale/x86/swscale.c | 3 +-- 3 files changed, 3 insertions(+), 21 deletions(-) diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 8436f056d4..001cfbf15b 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -266,8 +266,6 @@ static int swscale(SwsContext *c, const uint8_t *src[], /* vars which will change and which we need to store back in the context */ int dstY = c->dstY; -int lumBufIndex = c->lumBufIndex; -int chrBufIndex = c->chrBufIndex; int lastInLumBuf = c->lastInLumBuf; int lastInChrBuf = c->lastInChrBuf; @@ -336,8 +334,6 @@ static int swscale(SwsContext *c, const uint8_t *src[], * will not get executed. This is not really intended but works * currently, so people might do it. */ if (srcSliceY == 0) { -lumBufIndex = -1; -chrBufIndex = -1; dstY = 0; lastInLumBuf = -1; lastInChrBuf = -1; @@ -461,7 +457,6 @@ static int swscale(SwsContext *c, const uint8_t *src[], desc[i].process(c, &desc[i], firstPosY, lastPosY - firstPosY + 1); } -lumBufIndex += lastLumSrcY - lastInLumBuf; lastInLumBuf = lastLumSrcY; if (cPosY < lastChrSrcY + 1) { @@ -469,20 +464,13 @@ static int swscale(SwsContext *c, const uint8_t *src[], desc[i].process(c, &desc[i], firstCPosY, lastCPosY - firstCPosY + 1); } -chrBufIndex += lastChrSrcY - lastInChrBuf; lastInChrBuf = lastChrSrcY; -// wrap buf index around to stay inside the ring buffer -if (lumBufIndex >= vLumFilterSize) -lumBufIndex -= vLumFilterSize; -if (chrBufIndex >= vChrFilterSize) -chrBufIndex -= vChrFilterSize; if (!enough_lines) break; // we can't output a dstY line so let's try with the next slice #if HAVE_MMX_INLINE -ff_updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, - lastInLumBuf, lastInChrBuf); +ff_updateMMXDitherTables(c, dstY); #endif if (should_dither) { c->chrDither8 = ff_dither_8x8_128[chrDstY & 7]; @@ -524,8 +512,6 @@ static int swscale(SwsContext *c, const uint8_t *src[], /* store changed local vars back in the context */ c->dstY = dstY; -c->lumBufIndex = lumBufIndex; -c->chrBufIndex = chrBufIndex; c->lastInLumBuf = lastInLumBuf; c->lastInChrBuf = lastInChrBuf; diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index a59d12745a..9dda53eead 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -350,8 +350,6 @@ typedef struct SwsContext { //@{ int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer. int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer. -int lumBufIndex; ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source. -int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source. //@} uint8_t *formatConvBuffer; @@ -635,8 +633,7 @@ int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], void ff_yuv2rgb_init_tables_ppc(SwsContext *c, const int inv_table[4], int brightness, int contrast, int saturation); -void ff_updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, - int lastInLumBuf, int lastInChrBuf); +void ff_updateMMXDitherTables(SwsContext *c, int dstY); av_cold void ff_sws_init_range_convert(SwsContext *c); diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index e9d474a1e8..61110839ee 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -79,8 +79,7 @@ DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w)= 0x0001000100010001ULL; #include "swscale_template.c" #endif -void ff_updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, - int lastInLumBuf, int lastInChrBuf) +void ff_updateMMXDitherTables(SwsContext *c, int dstY) { const int dstH= c->dstH; const int flags= c->flags; -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V4] avfilter: Add tonemap vaapi filter for H2S
From: Xinpeng Sun It performs HDR(High Dynamic Range) to SDR(Standard Dynamic Range) conversion with tone-mapping. It only supports HDR10 as input temporarily. An example command to use this filter with vaapi codecs: FFMPEG -hwaccel vaapi -vaapi_device /dev/dri/renderD128 -hwaccel_output_format vaapi \ -i INPUT -vf 'tonemap_vaapi=format=p010' -c:v hevc_vaapi -profile 2 OUTPUT Signed-off-by: Xinpeng Sun Signed-off-by: Zachary Zhou Signed-off-by: Ruiling Song --- When I re-think about the document part. I find it is not necessary to repeat how to set up vaapi device in this filter part. There is already good explanation of it(https://trac.ffmpeg.org/wiki/Hardware/VAAPI), so I add a link to it. I only make code changes requested by Vittoria and me. So if no further comment, I am going to apply the patch next week. Thanks! Ruiling configure | 2 + doc/filters.texi | 59 + libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/vf_tonemap_vaapi.c | 419 + 5 files changed, 482 insertions(+) create mode 100644 libavfilter/vf_tonemap_vaapi.c diff --git a/configure b/configure index 42e7df3941..74f2d38317 100755 --- a/configure +++ b/configure @@ -3576,6 +3576,7 @@ tinterlace_filter_deps="gpl" tinterlace_merge_test_deps="tinterlace_filter" tinterlace_pad_test_deps="tinterlace_filter" tonemap_filter_deps="const_nan" +tonemap_vaapi_filter_deps="vaapi VAProcPipelineParameterBuffer_output_hdr_metadata" tonemap_opencl_filter_deps="opencl const_nan" transpose_opencl_filter_deps="opencl" transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags" @@ -6577,6 +6578,7 @@ if enabled vaapi; then check_type "va/va.h va/va_dec_hevc.h" "VAPictureParameterBufferHEVC" check_struct "va/va.h" "VADecPictureParameterBufferVP9" bit_depth +check_struct "va/va.h va/va_vpp.h" "VAProcPipelineParameterBuffer" output_hdr_metadata check_struct "va/va.h va/va_vpp.h" "VAProcPipelineCaps" rotation_flags check_type "va/va.h va/va_enc_hevc.h" "VAEncPictureParameterBufferHEVC" check_type "va/va.h va/va_enc_jpeg.h" "VAEncPictureParameterBufferJPEG" diff --git a/doc/filters.texi b/doc/filters.texi index 99da266cec..1d934b84f3 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -21034,6 +21034,65 @@ Apply a strong blur of both luma and chroma parameters: @c man end OPENCL VIDEO FILTERS +@chapter VAAPI Video Filters +@c man begin VAAPI VIDEO FILTERS + +VAAPI Video filters are usually used with VAAPI decoder and VAAPI encoder. Below is a description of VAAPI video filters. + +To enable compilation of these filters you need to configure FFmpeg with +@code{--enable-vaapi}. + +To use vaapi filters, you need to setup the vaapi device correctly. For more information, please read @url{https://trac.ffmpeg.org/wiki/Hardware/VAAPI} + +@section tonemap_vappi + +Perform HDR(High Dynamic Range) to SDR(Standard Dynamic Range) conversion with tone-mapping. +It maps the dynamic range of HDR10 content to the SDR content. +It currently only accepts HDR10 as input. + +It accepts the following parameters: + +@table @option +@item format +Specify the output pixel format. + +Currently supported formats are: +@table @var +@item p010 +@item nv12 +@end table + +Default is nv12. + +@item primaries, p +Set the output color primaries. + +Default is same as input. + +@item transfer, t +Set the output transfer characteristics. + +Default is bt709. + +@item matrix, m +Set the output colorspace matrix. + +Default is same as input. + +@end table + +@subsection Example + +@itemize +@item +Convert HDR(HDR10) video to bt2020-transfer-characteristic p010 format +@example +tonemap_vaapi=format=p010:t=bt2020-10 +@end example +@end itemize + +@c man end VAAPI VIDEO FILTERS + @chapter Video Sources @c man begin VIDEO SOURCES diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 446c802b98..37d4eee858 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -411,6 +411,7 @@ OBJS-$(CONFIG_TMIX_FILTER) += vf_mix.o framesync.o OBJS-$(CONFIG_TONEMAP_FILTER)+= vf_tonemap.o colorspace.o OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace.o opencl.o \ opencl/tonemap.o opencl/colorspace_common.o +OBJS-$(CONFIG_TONEMAP_VAAPI_FILTER) += vf_tonemap_vaapi.o vaapi_vpp.o OBJS-$(CONFIG_TPAD_FILTER) += vf_tpad.o OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER) += vf_transpose_npp.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 69953832da..c295f8e403 100644 --- a/libavfilter
[FFmpeg-devel] [PATCH V2] avfilter/vf_convolution: add x86 SIMD for filter_3x3()
Tested using a simple command (apply edge enhance): ./ffmpeg_g -i ~/Downloads/bbb_sunflower_1080p_30fps_normal.mp4 \ -vf convolution="0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:5:1:1:1:0:128:128:128" \ -an -vframes 1000 -f null /dev/null The fps increase from 151 to 270 on my local machine. Signed-off-by: Ruiling Song --- v2: fix a bug in scalar code path. Use macro PROCESS_V/S for the first tap to simplify code. libavfilter/convolution.h | 64 +++ libavfilter/vf_convolution.c | 41 +-- libavfilter/x86/Makefile | 2 + libavfilter/x86/vf_convolution.asm| 156 ++ libavfilter/x86/vf_convolution_init.c | 46 5 files changed, 271 insertions(+), 38 deletions(-) create mode 100644 libavfilter/convolution.h create mode 100644 libavfilter/x86/vf_convolution.asm create mode 100644 libavfilter/x86/vf_convolution_init.c diff --git a/libavfilter/convolution.h b/libavfilter/convolution.h new file mode 100644 index 00..fc6aad58fd --- /dev/null +++ b/libavfilter/convolution.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2012-2013 Oka Motofumi (chikuzen.mo at gmail dot com) + * Copyright (c) 2015 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef AVFILTER_CONVOLUTION_H +#define AVFILTER_CONVOLUTION_H +#include "avfilter.h" + +enum MatrixMode { +MATRIX_SQUARE, +MATRIX_ROW, +MATRIX_COLUMN, +MATRIX_NBMODES, +}; + +typedef struct ConvolutionContext { +const AVClass *class; + +char *matrix_str[4]; +float rdiv[4]; +float bias[4]; +int mode[4]; +float scale; +float delta; +int planes; + +int size[4]; +int depth; +int max; +int bpc; +int nb_planes; +int nb_threads; +int planewidth[4]; +int planeheight[4]; +int matrix[4][49]; +int matrix_length[4]; +int copy[4]; + +void (*setup[4])(int radius, const uint8_t *c[], const uint8_t *src, int stride, + int x, int width, int y, int height, int bpc); +void (*filter[4])(uint8_t *dst, int width, + float rdiv, float bias, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride); +} ConvolutionContext; + +void ff_convolution_init_x86(ConvolutionContext *s); +#endif diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c index 1305569c88..e3bf1df79f 100644 --- a/libavfilter/vf_convolution.c +++ b/libavfilter/vf_convolution.c @@ -25,48 +25,11 @@ #include "libavutil/opt.h" #include "libavutil/pixdesc.h" #include "avfilter.h" +#include "convolution.h" #include "formats.h" #include "internal.h" #include "video.h" -enum MatrixMode { -MATRIX_SQUARE, -MATRIX_ROW, -MATRIX_COLUMN, -MATRIX_NBMODES, -}; - -typedef struct ConvolutionContext { -const AVClass *class; - -char *matrix_str[4]; -float rdiv[4]; -float bias[4]; -int mode[4]; -float scale; -float delta; -int planes; - -int size[4]; -int depth; -int max; -int bpc; -int nb_planes; -int nb_threads; -int planewidth[4]; -int planeheight[4]; -int matrix[4][49]; -int matrix_length[4]; -int copy[4]; - -void (*setup[4])(int radius, const uint8_t *c[], const uint8_t *src, int stride, - int x, int width, int y, int height, int bpc); -void (*filter[4])(uint8_t *dst, int width, - float rdiv, float bias, const int *const matrix, - const uint8_t *c[], int peak, int radius, - int dstride, int stride); -} ConvolutionContext; - #define OFFSET(x) offsetof(ConvolutionContext, x) #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM @@ -625,6 +588,8 @@ static int config_input(AVFilterLink *inlink) s->filter[p] = filter16_7x7; } } +if (ARCH_X86_64) +ff_convolution_init_x86(s); } else if (!strcmp(ctx->filter->name, "prewitt")) { if (s->depth > 8) f
[FFmpeg-devel] [PATCH] avfilter/vf_convolution: add x86 SIMD for filter_3x3()
Tested using a simple command (apply edge enhance): ./ffmpeg_g -i ~/Downloads/bbb_sunflower_1080p_30fps_normal.mp4 \ -vf convolution="0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:5:1:1:1:0:128:128:128" \ -an -vframes 1000 -f null /dev/null The fps increase from 151 to 270 on my local machine. Signed-off-by: Ruiling Song --- libavfilter/convolution.h | 64 +++ libavfilter/vf_convolution.c | 41 +-- libavfilter/x86/Makefile | 2 + libavfilter/x86/vf_convolution.asm| 158 ++ libavfilter/x86/vf_convolution_init.c | 46 5 files changed, 273 insertions(+), 38 deletions(-) create mode 100644 libavfilter/convolution.h create mode 100644 libavfilter/x86/vf_convolution.asm create mode 100644 libavfilter/x86/vf_convolution_init.c diff --git a/libavfilter/convolution.h b/libavfilter/convolution.h new file mode 100644 index 00..fc6aad58fd --- /dev/null +++ b/libavfilter/convolution.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2012-2013 Oka Motofumi (chikuzen.mo at gmail dot com) + * Copyright (c) 2015 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef AVFILTER_CONVOLUTION_H +#define AVFILTER_CONVOLUTION_H +#include "avfilter.h" + +enum MatrixMode { +MATRIX_SQUARE, +MATRIX_ROW, +MATRIX_COLUMN, +MATRIX_NBMODES, +}; + +typedef struct ConvolutionContext { +const AVClass *class; + +char *matrix_str[4]; +float rdiv[4]; +float bias[4]; +int mode[4]; +float scale; +float delta; +int planes; + +int size[4]; +int depth; +int max; +int bpc; +int nb_planes; +int nb_threads; +int planewidth[4]; +int planeheight[4]; +int matrix[4][49]; +int matrix_length[4]; +int copy[4]; + +void (*setup[4])(int radius, const uint8_t *c[], const uint8_t *src, int stride, + int x, int width, int y, int height, int bpc); +void (*filter[4])(uint8_t *dst, int width, + float rdiv, float bias, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride); +} ConvolutionContext; + +void ff_convolution_init_x86(ConvolutionContext *s); +#endif diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c index 1305569c88..e3bf1df79f 100644 --- a/libavfilter/vf_convolution.c +++ b/libavfilter/vf_convolution.c @@ -25,48 +25,11 @@ #include "libavutil/opt.h" #include "libavutil/pixdesc.h" #include "avfilter.h" +#include "convolution.h" #include "formats.h" #include "internal.h" #include "video.h" -enum MatrixMode { -MATRIX_SQUARE, -MATRIX_ROW, -MATRIX_COLUMN, -MATRIX_NBMODES, -}; - -typedef struct ConvolutionContext { -const AVClass *class; - -char *matrix_str[4]; -float rdiv[4]; -float bias[4]; -int mode[4]; -float scale; -float delta; -int planes; - -int size[4]; -int depth; -int max; -int bpc; -int nb_planes; -int nb_threads; -int planewidth[4]; -int planeheight[4]; -int matrix[4][49]; -int matrix_length[4]; -int copy[4]; - -void (*setup[4])(int radius, const uint8_t *c[], const uint8_t *src, int stride, - int x, int width, int y, int height, int bpc); -void (*filter[4])(uint8_t *dst, int width, - float rdiv, float bias, const int *const matrix, - const uint8_t *c[], int peak, int radius, - int dstride, int stride); -} ConvolutionContext; - #define OFFSET(x) offsetof(ConvolutionContext, x) #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM @@ -625,6 +588,8 @@ static int config_input(AVFilterLink *inlink) s->filter[p] = filter16_7x7; } } +if (ARCH_X86_64) +ff_convolution_init_x86(s); } else if (!strcmp(ctx->filter->name, "prewitt")) { if (s->depth > 8) for (p = 0; p < s->nb_planes; p++) diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefi
[FFmpeg-devel] [PATCH V3 1/2] avfilter/vf_gblur: add x86 SIMD optimizations
The horizontal pass get ~2x performance with the patch under single thread. Tested overall performance using the command(avx2 enabled): ./ffmpeg -i 1080p.mp4 -vf gblur -f null /dev/null ./ffmpeg -i 1080p.mp4 -vf gblur=threads=1 -f null /dev/null For single thread, the fps improves from 43 to 60, about 40%. For multi-thread, the fps improves from 110 to 130, about 20%. v2: Fix the bug when steps is not one. v3: Fix the bug when the upper half of 64bit register for 'int' argument passing may have garbage. Signed-off-by: Ruiling Song --- libavfilter/gblur.h | 55 ++ libavfilter/vf_gblur.c | 71 ++-- libavfilter/x86/Makefile| 2 + libavfilter/x86/vf_gblur.asm| 185 libavfilter/x86/vf_gblur_init.c | 36 +++ 5 files changed, 310 insertions(+), 39 deletions(-) create mode 100644 libavfilter/gblur.h create mode 100644 libavfilter/x86/vf_gblur.asm create mode 100644 libavfilter/x86/vf_gblur_init.c diff --git a/libavfilter/gblur.h b/libavfilter/gblur.h new file mode 100644 index 00..87129801de --- /dev/null +++ b/libavfilter/gblur.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2011 Pascal Getreuer + * Copyright (c) 2016 Paul B Mahol + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + *notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + *copyright notice, this list of conditions and the following + *disclaimer in the documentation and/or other materials provided + *with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef AVFILTER_GBLUR_H +#define AVFILTER_GBLUR_H +#include "avfilter.h" + +typedef struct GBlurContext { +const AVClass *class; + +float sigma; +float sigmaV; +int steps; +int planes; + +int depth; +int planewidth[4]; +int planeheight[4]; +float *buffer; +float boundaryscale; +float boundaryscaleV; +float postscale; +float postscaleV; +float nu; +float nuV; +int nb_planes; +void (*horiz_slice)(float *buffer, int width, int height, int steps, float nu, float bscale); +} GBlurContext; +void ff_gblur_init(GBlurContext *s); +void ff_gblur_init_x86(GBlurContext *s); +#endif diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c index b91a8c074a..e71b33da80 100644 --- a/libavfilter/vf_gblur.c +++ b/libavfilter/vf_gblur.c @@ -30,30 +30,10 @@ #include "libavutil/pixdesc.h" #include "avfilter.h" #include "formats.h" +#include "gblur.h" #include "internal.h" #include "video.h" -typedef struct GBlurContext { -const AVClass *class; - -float sigma; -float sigmaV; -int steps; -int planes; - -int depth; -int planewidth[4]; -int planeheight[4]; -float *buffer; -float boundaryscale; -float boundaryscaleV; -float postscale; -float postscaleV; -float nu; -float nuV; -int nb_planes; -} GBlurContext; - #define OFFSET(x) offsetof(GBlurContext, x) #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM @@ -72,39 +52,44 @@ typedef struct ThreadData { int width; } ThreadData; -static int filter_horizontally(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +static void horiz_slice_c(float *buffer, int width, int height, int steps, + float nu, float bscale) { -GBlurContext *s = ctx->priv; -ThreadData *td = arg; -const int height = td->height; -const int width = td->width; -const int slice_start = (height * jobnr ) / nb_jobs; -const int slice_end = (height * (jobnr+1)) / nb_jobs; -const float boundaryscale = s->boundaryscale; -const int steps = s->steps; -const float nu = s->nu; -float *buffer = s->buffer; -int y, x, step; +int step, x, y; float *ptr; - -/* Filter horizontally along each row */ -for (y = slice_start; y < slice_end; y+
[FFmpeg-devel] [PATCH V3 2/2] checkasm/vf_gblur: add test for horiz_slice simd
Signed-off-by: Ruiling Song --- tests/checkasm/Makefile | 1 + tests/checkasm/checkasm.c | 3 ++ tests/checkasm/checkasm.h | 1 + tests/checkasm/vf_gblur.c | 67 +++ tests/fate/checkasm.mak | 1 + 5 files changed, 73 insertions(+) create mode 100644 tests/checkasm/vf_gblur.c diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 886ae33167..f5780eedb2 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -35,6 +35,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes) AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o +AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o AVFILTEROBJS-$(CONFIG_HFLIP_FILTER) += vf_hflip.o AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o AVFILTEROBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index bf51e00eab..3e2ec377be 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -162,6 +162,9 @@ static const struct { #if CONFIG_COLORSPACE_FILTER { "vf_colorspace", checkasm_check_colorspace }, #endif +#if CONFIG_GBLUR_FILTER +{ "vf_gblur", checkasm_check_vf_gblur }, +#endif #if CONFIG_HFLIP_FILTER { "vf_hflip", checkasm_check_vf_hflip }, #endif diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 9b8d2f5419..aed15b5fa4 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -71,6 +71,7 @@ void checkasm_check_sw_rgb(void); void checkasm_check_utvideodsp(void); void checkasm_check_v210dec(void); void checkasm_check_v210enc(void); +void checkasm_check_vf_gblur(void); void checkasm_check_vf_hflip(void); void checkasm_check_vf_threshold(void); void checkasm_check_vp8dsp(void); diff --git a/tests/checkasm/vf_gblur.c b/tests/checkasm/vf_gblur.c new file mode 100644 index 00..582bc7cc0f --- /dev/null +++ b/tests/checkasm/vf_gblur.c @@ -0,0 +1,67 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include "checkasm.h" +#include "libavfilter/gblur.h" + +#define WIDTH 256 +#define HEIGHT 256 +#define PIXELS (WIDTH * HEIGHT) +#define BUF_SIZE (PIXELS * 4) + +#define randomize_buffers(buf, size) \ +do { \ +int j; \ +float *tmp_buf = (float *)buf; \ +for (j = 0; j < size; j++) \ +tmp_buf[j] = (float)(rnd() & 0xFF); \ +} while (0) + +void checkasm_check_vf_gblur(void) +{ +float *dst_ref = av_malloc(BUF_SIZE); +float *dst_new = av_malloc(BUF_SIZE); +int i, j; +int w = WIDTH; +int h = HEIGHT; +int steps = 2; +float nu = 0.101f; +float bscale = 1.112f; +GBlurContext s; + +declare_func(void, float *dst, int w, int h, int steps, float nu, float bscale); + +randomize_buffers(dst_ref, PIXELS); +memcpy(dst_new, dst_ref, BUF_SIZE); + +ff_gblur_init(&s); + +if (check_func(s.horiz_slice, "horiz_slice")) { +call_ref(dst_ref, w, h, steps, nu, bscale); +call_new(dst_new, w, h, steps, nu, bscale); + +if (!float_near_abs_eps_array(dst_ref, dst_new, 0.01f, PIXELS)) { +fail(); +} +bench_new(dst_new, w, h, 1, nu, bscale); +} +report("horiz_slice"); +av_freep(&dst_ref); +av_freep(&dst_new); +} diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak index c453273cd0..618bde509f 100644 --- a/tests/fate/checkasm.mak +++ b/tests/fate/checkasm.mak @@ -27,6 +27,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp \ fate-checkasm-v210enc \ fate-checkasm-vf_blend \ fate-checkasm-vf_colorspace \ +fate-checkasm-vf_gblur \ fate-checkasm-vf_hflip \ fate-che
[FFmpeg-devel] [PATCH V2 2/2] checkasm/vf_gblur: add test for horiz_slice simd
Signed-off-by: Ruiling Song --- tests/checkasm/Makefile | 1 + tests/checkasm/checkasm.c | 3 ++ tests/checkasm/checkasm.h | 1 + tests/checkasm/vf_gblur.c | 67 +++ tests/fate/checkasm.mak | 1 + 5 files changed, 73 insertions(+) create mode 100644 tests/checkasm/vf_gblur.c diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 886ae33167..f5780eedb2 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -35,6 +35,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes) AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o +AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o AVFILTEROBJS-$(CONFIG_HFLIP_FILTER) += vf_hflip.o AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o AVFILTEROBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index bf51e00eab..3e2ec377be 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -162,6 +162,9 @@ static const struct { #if CONFIG_COLORSPACE_FILTER { "vf_colorspace", checkasm_check_colorspace }, #endif +#if CONFIG_GBLUR_FILTER +{ "vf_gblur", checkasm_check_vf_gblur }, +#endif #if CONFIG_HFLIP_FILTER { "vf_hflip", checkasm_check_vf_hflip }, #endif diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 9b8d2f5419..aed15b5fa4 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -71,6 +71,7 @@ void checkasm_check_sw_rgb(void); void checkasm_check_utvideodsp(void); void checkasm_check_v210dec(void); void checkasm_check_v210enc(void); +void checkasm_check_vf_gblur(void); void checkasm_check_vf_hflip(void); void checkasm_check_vf_threshold(void); void checkasm_check_vp8dsp(void); diff --git a/tests/checkasm/vf_gblur.c b/tests/checkasm/vf_gblur.c new file mode 100644 index 00..582bc7cc0f --- /dev/null +++ b/tests/checkasm/vf_gblur.c @@ -0,0 +1,67 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include "checkasm.h" +#include "libavfilter/gblur.h" + +#define WIDTH 256 +#define HEIGHT 256 +#define PIXELS (WIDTH * HEIGHT) +#define BUF_SIZE (PIXELS * 4) + +#define randomize_buffers(buf, size) \ +do { \ +int j; \ +float *tmp_buf = (float *)buf; \ +for (j = 0; j < size; j++) \ +tmp_buf[j] = (float)(rnd() & 0xFF); \ +} while (0) + +void checkasm_check_vf_gblur(void) +{ +float *dst_ref = av_malloc(BUF_SIZE); +float *dst_new = av_malloc(BUF_SIZE); +int i, j; +int w = WIDTH; +int h = HEIGHT; +int steps = 2; +float nu = 0.101f; +float bscale = 1.112f; +GBlurContext s; + +declare_func(void, float *dst, int w, int h, int steps, float nu, float bscale); + +randomize_buffers(dst_ref, PIXELS); +memcpy(dst_new, dst_ref, BUF_SIZE); + +ff_gblur_init(&s); + +if (check_func(s.horiz_slice, "horiz_slice")) { +call_ref(dst_ref, w, h, steps, nu, bscale); +call_new(dst_new, w, h, steps, nu, bscale); + +if (!float_near_abs_eps_array(dst_ref, dst_new, 0.01f, PIXELS)) { +fail(); +} +bench_new(dst_new, w, h, 1, nu, bscale); +} +report("horiz_slice"); +av_freep(&dst_ref); +av_freep(&dst_new); +} diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak index c453273cd0..618bde509f 100644 --- a/tests/fate/checkasm.mak +++ b/tests/fate/checkasm.mak @@ -27,6 +27,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp \ fate-checkasm-v210enc \ fate-checkasm-vf_blend \ fate-checkasm-vf_colorspace \ +fate-checkasm-vf_gblur \ fate-checkasm-vf_hflip \ fate-che
[FFmpeg-devel] [PATCH V2 1/2] avfilter/vf_gblur: add x86 SIMD optimizations
The horizontal pass get ~2x performance with the patch under single thread. Tested overall performance using the command(avx2 enabled): ./ffmpeg -i 1080p.mp4 -vf gblur -f null /dev/null ./ffmpeg -i 1080p.mp4 -vf gblur=threads=1 -f null /dev/null For single thread, the fps improves from 43 to 60, about 40%. For multi-thread, the fps improves from 110 to 130, about 20%. v2: Fix the bug when steps is not one. Signed-off-by: Ruiling Song --- libavfilter/gblur.h | 55 ++ libavfilter/vf_gblur.c | 71 ++--- libavfilter/x86/Makefile| 2 + libavfilter/x86/vf_gblur.asm| 183 libavfilter/x86/vf_gblur_init.c | 36 +++ 5 files changed, 308 insertions(+), 39 deletions(-) create mode 100644 libavfilter/gblur.h create mode 100644 libavfilter/x86/vf_gblur.asm create mode 100644 libavfilter/x86/vf_gblur_init.c diff --git a/libavfilter/gblur.h b/libavfilter/gblur.h new file mode 100644 index 00..87129801de --- /dev/null +++ b/libavfilter/gblur.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2011 Pascal Getreuer + * Copyright (c) 2016 Paul B Mahol + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + *notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + *copyright notice, this list of conditions and the following + *disclaimer in the documentation and/or other materials provided + *with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef AVFILTER_GBLUR_H +#define AVFILTER_GBLUR_H +#include "avfilter.h" + +typedef struct GBlurContext { +const AVClass *class; + +float sigma; +float sigmaV; +int steps; +int planes; + +int depth; +int planewidth[4]; +int planeheight[4]; +float *buffer; +float boundaryscale; +float boundaryscaleV; +float postscale; +float postscaleV; +float nu; +float nuV; +int nb_planes; +void (*horiz_slice)(float *buffer, int width, int height, int steps, float nu, float bscale); +} GBlurContext; +void ff_gblur_init(GBlurContext *s); +void ff_gblur_init_x86(GBlurContext *s); +#endif diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c index b91a8c074a..e71b33da80 100644 --- a/libavfilter/vf_gblur.c +++ b/libavfilter/vf_gblur.c @@ -30,30 +30,10 @@ #include "libavutil/pixdesc.h" #include "avfilter.h" #include "formats.h" +#include "gblur.h" #include "internal.h" #include "video.h" -typedef struct GBlurContext { -const AVClass *class; - -float sigma; -float sigmaV; -int steps; -int planes; - -int depth; -int planewidth[4]; -int planeheight[4]; -float *buffer; -float boundaryscale; -float boundaryscaleV; -float postscale; -float postscaleV; -float nu; -float nuV; -int nb_planes; -} GBlurContext; - #define OFFSET(x) offsetof(GBlurContext, x) #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM @@ -72,39 +52,44 @@ typedef struct ThreadData { int width; } ThreadData; -static int filter_horizontally(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +static void horiz_slice_c(float *buffer, int width, int height, int steps, + float nu, float bscale) { -GBlurContext *s = ctx->priv; -ThreadData *td = arg; -const int height = td->height; -const int width = td->width; -const int slice_start = (height * jobnr ) / nb_jobs; -const int slice_end = (height * (jobnr+1)) / nb_jobs; -const float boundaryscale = s->boundaryscale; -const int steps = s->steps; -const float nu = s->nu; -float *buffer = s->buffer; -int y, x, step; +int step, x, y; float *ptr; - -/* Filter horizontally along each row */ -for (y = slice_start; y < slice_end; y++) { +for (y = 0; y < height; y++) { for (step = 0; step < steps; step++
[FFmpeg-devel] [PATCH] avfilter/vf_gblur: add x86 SIMD optimizations
For details of the implementation, please refer to the comment inlined in the assembly code. It improves the horizontal pass performance about 100% under single thread. Tested overall performance using the command(avx2 enabled): ./ffmpeg -i 1080p.mp4 -vf gblur -f null /dev/null ./ffmpeg -i 1080p.mp4 -vf gblur=threads=1 -f null /dev/null For single thread, the fps improves from 43 to 60, about 40%. For multi-thread, the fps improves from 110 to 130, about 20%. Signed-off-by: Ruiling Song --- libavfilter/gblur.h | 54 ++ libavfilter/vf_gblur.c | 66 +--- libavfilter/x86/Makefile| 2 + libavfilter/x86/vf_gblur.asm| 182 libavfilter/x86/vf_gblur_init.c | 36 +++ 5 files changed, 302 insertions(+), 38 deletions(-) create mode 100644 libavfilter/gblur.h create mode 100644 libavfilter/x86/vf_gblur.asm create mode 100644 libavfilter/x86/vf_gblur_init.c diff --git a/libavfilter/gblur.h b/libavfilter/gblur.h new file mode 100644 index 00..97217044d0 --- /dev/null +++ b/libavfilter/gblur.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2011 Pascal Getreuer + * Copyright (c) 2016 Paul B Mahol + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + *notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + *copyright notice, this list of conditions and the following + *disclaimer in the documentation and/or other materials provided + *with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef AVFILTER_GBLUR_H +#define AVFILTER_GBLUR_H +#include "avfilter.h" + +typedef struct GBlurContext { +const AVClass *class; + +float sigma; +float sigmaV; +int steps; +int planes; + +int depth; +int planewidth[4]; +int planeheight[4]; +float *buffer; +float boundaryscale; +float boundaryscaleV; +float postscale; +float postscaleV; +float nu; +float nuV; +int nb_planes; +void (*horiz_slice)(float *buffer, int width, int height, int steps, float nu, float bscale); +} GBlurContext; +void ff_gblur_init_x86(GBlurContext *s); +#endif diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c index b91a8c074a..4e876bca05 100644 --- a/libavfilter/vf_gblur.c +++ b/libavfilter/vf_gblur.c @@ -30,29 +30,11 @@ #include "libavutil/pixdesc.h" #include "avfilter.h" #include "formats.h" +#include "gblur.h" #include "internal.h" #include "video.h" +#include -typedef struct GBlurContext { -const AVClass *class; - -float sigma; -float sigmaV; -int steps; -int planes; - -int depth; -int planewidth[4]; -int planeheight[4]; -float *buffer; -float boundaryscale; -float boundaryscaleV; -float postscale; -float postscaleV; -float nu; -float nuV; -int nb_planes; -} GBlurContext; #define OFFSET(x) offsetof(GBlurContext, x) #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM @@ -72,39 +54,44 @@ typedef struct ThreadData { int width; } ThreadData; -static int filter_horizontally(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +static void horiz_slice_c(float *buffer, int width, int height, int steps, + float nu, float bscale) { -GBlurContext *s = ctx->priv; -ThreadData *td = arg; -const int height = td->height; -const int width = td->width; -const int slice_start = (height * jobnr ) / nb_jobs; -const int slice_end = (height * (jobnr+1)) / nb_jobs; -const float boundaryscale = s->boundaryscale; -const int steps = s->steps; -const float nu = s->nu; -float *buffer = s->buffer; -int y, x, step; +int step, x, y; float *ptr; - -/* Filter horizontally along each row */ -for (y = slice_start; y < slice_end; y++) { +for (y = 0; y < height; y++) { for (step = 0; step < st
[FFmpeg-devel] [PATCH V2] avfilter/vf_unsharp: enable slice threading
benchmarking with a simple command: ffmpeg -i 1080p.mp4 -vf unsharp=la=3:ca=3 -an -f null /dev/null with the patch, the fps increase from 50 to 120 on my local machine (i7-6770HQ). v2: make av_image_copy_plane() only copy per-slice content. Signed-off-by: Ruiling Song --- libavfilter/unsharp.h| 4 +- libavfilter/vf_unsharp.c | 102 ++- 2 files changed, 81 insertions(+), 25 deletions(-) diff --git a/libavfilter/unsharp.h b/libavfilter/unsharp.h index caff986fc1..a60b30f31a 100644 --- a/libavfilter/unsharp.h +++ b/libavfilter/unsharp.h @@ -37,7 +37,8 @@ typedef struct UnsharpFilterParam { int steps_y; ///< vertical step count int scalebits; ///< bits to shift pixel int32_t halfscale; ///< amount to add to pixel -uint32_t *sc[MAX_MATRIX_SIZE - 1]; ///< finite state machine storage +uint32_t *sr;///< finite state machine storage within a row +uint32_t **sc; ///< finite state machine storage across rows } UnsharpFilterParam; typedef struct UnsharpContext { @@ -47,6 +48,7 @@ typedef struct UnsharpContext { UnsharpFilterParam luma; ///< luma parameters (width, height, amount) UnsharpFilterParam chroma; ///< chroma parameters (width, height, amount) int hsub, vsub; +int nb_threads; int opencl; int (* apply_unsharp)(AVFilterContext *ctx, AVFrame *in, AVFrame *out); } UnsharpContext; diff --git a/libavfilter/vf_unsharp.c b/libavfilter/vf_unsharp.c index 41ccc56942..af05833a5d 100644 --- a/libavfilter/vf_unsharp.c +++ b/libavfilter/vf_unsharp.c @@ -47,15 +47,22 @@ #include "libavutil/pixdesc.h" #include "unsharp.h" -static void apply_unsharp( uint8_t *dst, int dst_stride, - const uint8_t *src, int src_stride, - int width, int height, UnsharpFilterParam *fp) +typedef struct TheadData { +UnsharpFilterParam *fp; +uint8_t *dst; +const uint8_t *src; +int dst_stride; +int src_stride; +int width; +int height; +} ThreadData; + +static int unsharp_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { +ThreadData *td = arg; +UnsharpFilterParam *fp = td->fp; uint32_t **sc = fp->sc; -uint32_t sr[MAX_MATRIX_SIZE - 1], tmp1, tmp2; - -int32_t res; -int x, y, z; +uint32_t *sr = fp->sr; const uint8_t *src2 = NULL; //silence a warning const int amount = fp->amount; const int steps_x = fp->steps_x; @@ -63,30 +70,54 @@ static void apply_unsharp( uint8_t *dst, int dst_stride, const int scalebits = fp->scalebits; const int32_t halfscale = fp->halfscale; +uint8_t *dst = td->dst; +const uint8_t *src = td->src; +const int dst_stride = td->dst_stride; +const int src_stride = td->src_stride; +const int width = td->width; +const int height = td->height; +const int sc_offset = jobnr * 2 * steps_y; +const int sr_offset = jobnr * (MAX_MATRIX_SIZE - 1); +const int slice_start = (height * jobnr) / nb_jobs; +const int slice_end = (height * (jobnr+1)) / nb_jobs; + +int32_t res; +int x, y, z; +uint32_t tmp1, tmp2; + if (!amount) { -av_image_copy_plane(dst, dst_stride, src, src_stride, width, height); -return; +av_image_copy_plane(dst + slice_start * dst_stride, dst_stride, +src + slice_start * src_stride, src_stride, +width, slice_end - slice_start); +return 0; } for (y = 0; y < 2 * steps_y; y++) -memset(sc[y], 0, sizeof(sc[y][0]) * (width + 2 * steps_x)); +memset(sc[sc_offset + y], 0, sizeof(sc[y][0]) * (width + 2 * steps_x)); -for (y = -steps_y; y < height + steps_y; y++) { +// if this is not the first tile, we start from (slice_start - steps_y), +// so we can get smooth result at slice boundary +if (slice_start > steps_y) { +src += (slice_start - steps_y) * src_stride; +dst += (slice_start - steps_y) * dst_stride; +} + +for (y = -steps_y + slice_start; y < steps_y + slice_end; y++) { if (y < height) src2 = src; -memset(sr, 0, sizeof(sr[0]) * (2 * steps_x - 1)); +memset(sr + sr_offset, 0, sizeof(sr[0]) * (2 * steps_x - 1)); for (x = -steps_x; x < width + steps_x; x++) { tmp1 = x <= 0 ? src2[0] : x >= width ? src2[width-1] : src2[x]; for (z = 0; z < steps_x * 2; z += 2) { -tmp2 = sr[z + 0] + tmp1; sr[z + 0] = tmp1; -tmp1 = sr[z + 1] + tmp2; sr[z + 1] = tmp2; +tmp2 = sr[sr_offset + z + 0] + tmp1; sr[sr_offset + z + 0] = tmp1; +tmp1 = sr[sr_offset + z + 1] + tmp2; sr[sr_offset + z + 1] = tmp2; } for (
[FFmpeg-devel] [PATCH V2] avutil/tx: add check against (*ctx)
ctx is a pointer to pointer here. Signed-off-by: Ruiling Song --- libavutil/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavutil/tx.c b/libavutil/tx.c index 934ef27c81..1690604040 100644 --- a/libavutil/tx.c +++ b/libavutil/tx.c @@ -697,7 +697,7 @@ static int gen_mdct_exptab(AVTXContext *s, int len4, double scale) av_cold void av_tx_uninit(AVTXContext **ctx) { -if (!ctx) +if (!ctx || !(*ctx)) return; av_free((*ctx)->pfatab); -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avutil/tx: should check against (*ctx)
ctx is a pointer to pointer here. Signed-off-by: Ruiling Song --- libavutil/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavutil/tx.c b/libavutil/tx.c index 934ef27c81..2bf4aa1c28 100644 --- a/libavutil/tx.c +++ b/libavutil/tx.c @@ -697,7 +697,7 @@ static int gen_mdct_exptab(AVTXContext *s, int len4, double scale) av_cold void av_tx_uninit(AVTXContext **ctx) { -if (!ctx) +if (!(*ctx)) return; av_free((*ctx)->pfatab); -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] avfilter/vf_unsharp: enable slice threading
Signed-off-by: Ruiling Song --- libavfilter/unsharp.h| 4 +- libavfilter/vf_unsharp.c | 98 ++-- 2 files changed, 78 insertions(+), 24 deletions(-) diff --git a/libavfilter/unsharp.h b/libavfilter/unsharp.h index caff986fc1..a60b30f31a 100644 --- a/libavfilter/unsharp.h +++ b/libavfilter/unsharp.h @@ -37,7 +37,8 @@ typedef struct UnsharpFilterParam { int steps_y; ///< vertical step count int scalebits; ///< bits to shift pixel int32_t halfscale; ///< amount to add to pixel -uint32_t *sc[MAX_MATRIX_SIZE - 1]; ///< finite state machine storage +uint32_t *sr;///< finite state machine storage within a row +uint32_t **sc; ///< finite state machine storage across rows } UnsharpFilterParam; typedef struct UnsharpContext { @@ -47,6 +48,7 @@ typedef struct UnsharpContext { UnsharpFilterParam luma; ///< luma parameters (width, height, amount) UnsharpFilterParam chroma; ///< chroma parameters (width, height, amount) int hsub, vsub; +int nb_threads; int opencl; int (* apply_unsharp)(AVFilterContext *ctx, AVFrame *in, AVFrame *out); } UnsharpContext; diff --git a/libavfilter/vf_unsharp.c b/libavfilter/vf_unsharp.c index 41ccc56942..41c62d101a 100644 --- a/libavfilter/vf_unsharp.c +++ b/libavfilter/vf_unsharp.c @@ -47,15 +47,22 @@ #include "libavutil/pixdesc.h" #include "unsharp.h" -static void apply_unsharp( uint8_t *dst, int dst_stride, - const uint8_t *src, int src_stride, - int width, int height, UnsharpFilterParam *fp) +typedef struct TheadData { +UnsharpFilterParam *fp; +uint8_t *dst; +const uint8_t *src; +int dst_stride; +int src_stride; +int width; +int height; +} ThreadData; + +static int unsharp_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { +ThreadData *td = arg; +UnsharpFilterParam *fp = td->fp; uint32_t **sc = fp->sc; -uint32_t sr[MAX_MATRIX_SIZE - 1], tmp1, tmp2; - -int32_t res; -int x, y, z; +uint32_t *sr = fp->sr; const uint8_t *src2 = NULL; //silence a warning const int amount = fp->amount; const int steps_x = fp->steps_x; @@ -63,30 +70,52 @@ static void apply_unsharp( uint8_t *dst, int dst_stride, const int scalebits = fp->scalebits; const int32_t halfscale = fp->halfscale; +uint8_t *dst = td->dst; +const uint8_t *src = td->src; +const int dst_stride = td->dst_stride; +const int src_stride = td->src_stride; +const int width = td->width; +const int height = td->height; +const int sc_offset = jobnr * 2 * steps_y; +const int sr_offset = jobnr * (MAX_MATRIX_SIZE - 1); +const int slice_start = (height * jobnr) / nb_jobs; +const int slice_end = (height * (jobnr+1)) / nb_jobs; + +int32_t res; +int x, y, z; +uint32_t tmp1, tmp2; + if (!amount) { av_image_copy_plane(dst, dst_stride, src, src_stride, width, height); -return; +return 0; } for (y = 0; y < 2 * steps_y; y++) -memset(sc[y], 0, sizeof(sc[y][0]) * (width + 2 * steps_x)); +memset(sc[sc_offset + y], 0, sizeof(sc[y][0]) * (width + 2 * steps_x)); -for (y = -steps_y; y < height + steps_y; y++) { +// if this is not the first tile, we start from (slice_start - steps_y), +// so we can get smooth result at slice boundary +if (slice_start > steps_y) { +src += (slice_start - steps_y) * src_stride; +dst += (slice_start - steps_y) * dst_stride; +} + +for (y = -steps_y + slice_start; y < steps_y + slice_end; y++) { if (y < height) src2 = src; -memset(sr, 0, sizeof(sr[0]) * (2 * steps_x - 1)); +memset(sr + sr_offset, 0, sizeof(sr[0]) * (2 * steps_x - 1)); for (x = -steps_x; x < width + steps_x; x++) { tmp1 = x <= 0 ? src2[0] : x >= width ? src2[width-1] : src2[x]; for (z = 0; z < steps_x * 2; z += 2) { -tmp2 = sr[z + 0] + tmp1; sr[z + 0] = tmp1; -tmp1 = sr[z + 1] + tmp2; sr[z + 1] = tmp2; +tmp2 = sr[sr_offset + z + 0] + tmp1; sr[sr_offset + z + 0] = tmp1; +tmp1 = sr[sr_offset + z + 1] + tmp2; sr[sr_offset + z + 1] = tmp2; } for (z = 0; z < steps_y * 2; z += 2) { -tmp2 = sc[z + 0][x + steps_x] + tmp1; sc[z + 0][x + steps_x] = tmp1; -tmp1 = sc[z + 1][x + steps_x] + tmp2; sc[z + 1][x + steps_x] = tmp2; +tmp2 = sc[sc_offset + z + 0][x + steps_x] + tmp1; sc[sc_offset + z + 0][x + steps_x] = tmp1; +tmp1 = sc[sc_offset + z + 1][x + steps_x] + tmp2; sc[sc_offset + z + 1][x + steps_x] = tmp2;
[FFmpeg-devel] [PATCH V3] lavfi/opencl: add nlmeans_opencl filter
Signed-off-by: Ruiling Song --- configure | 1 + doc/filters.texi| 4 + libavfilter/Makefile| 1 + libavfilter/allfilters.c| 1 + libavfilter/opencl/nlmeans.cl | 115 + libavfilter/opencl_source.h | 1 + libavfilter/vf_nlmeans_opencl.c | 443 7 files changed, 566 insertions(+) create mode 100644 libavfilter/opencl/nlmeans.cl create mode 100644 libavfilter/vf_nlmeans_opencl.c diff --git a/configure b/configure index d644a5b1d4..ee4041e5e0 100755 --- a/configure +++ b/configure @@ -3464,6 +3464,7 @@ mpdecimate_filter_select="pixelutils" minterpolate_filter_select="scene_sad" mptestsrc_filter_deps="gpl" negate_filter_deps="lut_filter" +nlmeans_opencl_filter_deps="opencl" nnedi_filter_deps="gpl" ocr_filter_deps="libtesseract" ocv_filter_deps="libopencv" diff --git a/doc/filters.texi b/doc/filters.texi index 3c15bb95f4..ee0db1809e 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -19240,6 +19240,10 @@ Make every semi-green pixel in the input transparent with some slight blending: @end example @end itemize +@section nlmeans_opencl + +Non-local Means denoise filter through OpenCL, this filter accepts same options as @ref{nlmeans}. + @section overlay_opencl Overlay one video on top of another. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 59d12ce069..3e409fc62c 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -295,6 +295,7 @@ OBJS-$(CONFIG_MIX_FILTER)+= vf_mix.o OBJS-$(CONFIG_MPDECIMATE_FILTER) += vf_mpdecimate.o OBJS-$(CONFIG_NEGATE_FILTER) += vf_lut.o OBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o +OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o opencl/nlmeans.o OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index ae725cb0e0..fe0f8d7612 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -280,6 +280,7 @@ extern AVFilter ff_vf_mix; extern AVFilter ff_vf_mpdecimate; extern AVFilter ff_vf_negate; extern AVFilter ff_vf_nlmeans; +extern AVFilter ff_vf_nlmeans_opencl; extern AVFilter ff_vf_nnedi; extern AVFilter ff_vf_noformat; extern AVFilter ff_vf_noise; diff --git a/libavfilter/opencl/nlmeans.cl b/libavfilter/opencl/nlmeans.cl new file mode 100644 index 00..72bd681fd6 --- /dev/null +++ b/libavfilter/opencl/nlmeans.cl @@ -0,0 +1,115 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST); + +kernel void horiz_sum(__global uint4 *integral_img, + __read_only image2d_t src, + int width, + int height, + int4 dx, + int4 dy) +{ + +int y = get_global_id(0); +int work_size = get_global_size(0); + +uint4 sum = (uint4)(0); +float4 s2; +for (int i = 0; i < width; i++) { +float s1 = read_imagef(src, sampler, (int2)(i, y)).x; +s2.x = read_imagef(src, sampler, (int2)(i + dx.x, y + dy.x)).x; +s2.y = read_imagef(src, sampler, (int2)(i + dx.y, y + dy.y)).x; +s2.z = read_imagef(src, sampler, (int2)(i + dx.z, y + dy.z)).x; +s2.w = read_imagef(src, sampler, (int2)(i + dx.w, y + dy.w)).x; +sum += convert_uint4((s1 - s2) * (s1 - s2) * 255 * 255); +integral_img[y * width + i] = sum; +} +} + +kernel void vert_sum(__global uint4 *integral_img, + __global int *overflow, + int width, + int height) +{ +int x = get_global_id(0); +uint4 sum = 0; +for (int i = 0; i < height; i++) { +if (any((uint4)UINT_MAX - integral_img[i * width + x] < sum)) +atomic_inc(overflow); +integral_img[i * width + x] += sum; +
[FFmpeg-devel] [PATCH] lavfi/gblur: doing several columns at the same time
Instead of doing each column one by one, doing several columns together gives about 30% better performance. Signed-off-by: Ruiling Song --- below is some of performance numbers(fps) on my i7-6770HQ (decode + gblur): resolution:480p | 720p | 1080p | 4k without patch: 393 | 146 | 71| 14 with patch:502 | 184 | 95| 18 libavfilter/vf_gblur.c | 62 -- 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c index 5d05cac44c..9f07705ec4 100644 --- a/libavfilter/vf_gblur.c +++ b/libavfilter/vf_gblur.c @@ -108,6 +108,40 @@ static int filter_horizontally(AVFilterContext *ctx, void *arg, int jobnr, int n return 0; } +static void do_vertical_columns(float *buffer, int width, int height, +int column_begin, int column_end, int steps, +float nu, float boundaryscale, int column_step) +{ +const int numpixels = width * height; +int i, x, k, step; +float *ptr; +for (x = column_begin; x < column_end;) { +for (step = 0; step < steps; step++) { +ptr = buffer + x; +for (k = 0; k < column_step; k++) { +ptr[k] *= boundaryscale; +} +/* Filter downwards */ +for (i = width; i < numpixels; i += width) { +for (k = 0; k < column_step; k++) { +ptr[i + k] += nu * ptr[i - width + k]; +} +} +i = numpixels - width; + +for (k = 0; k < column_step; k++) +ptr[i + k] *= boundaryscale; + +/* Filter upwards */ +for (; i > 0; i -= width) { +for (k = 0; k < column_step; k++) +ptr[i - width + k] += nu * ptr[i + k]; +} +} +x += column_step; +} +} + static int filter_vertically(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { GBlurContext *s = ctx->priv; @@ -117,31 +151,19 @@ static int filter_vertically(AVFilterContext *ctx, void *arg, int jobnr, int nb_ const int slice_start = (width * jobnr ) / nb_jobs; const int slice_end = (width * (jobnr+1)) / nb_jobs; const float boundaryscale = s->boundaryscaleV; -const int numpixels = width * height; const int steps = s->steps; const float nu = s->nuV; float *buffer = s->buffer; -int i, x, step; -float *ptr; - -/* Filter vertically along each column */ -for (x = slice_start; x < slice_end; x++) { -for (step = 0; step < steps; step++) { -ptr = buffer + x; -ptr[0] *= boundaryscale; - -/* Filter downwards */ -for (i = width; i < numpixels; i += width) -ptr[i] += nu * ptr[i - width]; - -ptr[i = numpixels - width] *= boundaryscale; +int aligned_end; -/* Filter upwards */ -for (; i > 0; i -= width) -ptr[i - width] += nu * ptr[i]; -} -} +aligned_end = slice_start + (((slice_end - slice_start) >> 3) << 3); +/* Filter vertically along columns (process 8 columns in each step) */ +do_vertical_columns(buffer, width, height, slice_start, aligned_end, +steps, nu, boundaryscale, 8); +// Filter un-aligned columns one by one +do_vertical_columns(buffer, width, height, aligned_end, slice_end, +steps, nu, boundaryscale, 1); return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V2 2/2] lavfi/opencl: add nlmeans_opencl filter
Signed-off-by: Ruiling Song --- configure | 1 + doc/filters.texi| 4 + libavfilter/Makefile| 1 + libavfilter/allfilters.c| 1 + libavfilter/opencl/nlmeans.cl | 115 + libavfilter/opencl_source.h | 1 + libavfilter/vf_nlmeans_opencl.c | 442 7 files changed, 565 insertions(+) create mode 100644 libavfilter/opencl/nlmeans.cl create mode 100644 libavfilter/vf_nlmeans_opencl.c diff --git a/configure b/configure index 0cdf0ffa8a..93ebfd6784 100755 --- a/configure +++ b/configure @@ -3461,6 +3461,7 @@ mpdecimate_filter_select="pixelutils" minterpolate_filter_select="scene_sad" mptestsrc_filter_deps="gpl" negate_filter_deps="lut_filter" +nlmeans_opencl_filter_deps="opencl" nnedi_filter_deps="gpl" ocr_filter_deps="libtesseract" ocv_filter_deps="libopencv" diff --git a/doc/filters.texi b/doc/filters.texi index 867607d870..21c2c1a4b5 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -19030,6 +19030,10 @@ Apply erosion filter with threshold0 set to 30, threshold1 set 40, threshold2 se @end example @end itemize +@section nlmeans_opencl + +Non-local Means denoise filter through OpenCL, this filter accepts same options as @ref{nlmeans}. + @section overlay_opencl Overlay one video on top of another. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index fef6ec5c55..92039bfdcf 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -291,6 +291,7 @@ OBJS-$(CONFIG_MIX_FILTER)+= vf_mix.o OBJS-$(CONFIG_MPDECIMATE_FILTER) += vf_mpdecimate.o OBJS-$(CONFIG_NEGATE_FILTER) += vf_lut.o OBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o +OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o opencl/nlmeans.o OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index c51ae0f3c7..2a6390c92d 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -277,6 +277,7 @@ extern AVFilter ff_vf_mix; extern AVFilter ff_vf_mpdecimate; extern AVFilter ff_vf_negate; extern AVFilter ff_vf_nlmeans; +extern AVFilter ff_vf_nlmeans_opencl; extern AVFilter ff_vf_nnedi; extern AVFilter ff_vf_noformat; extern AVFilter ff_vf_noise; diff --git a/libavfilter/opencl/nlmeans.cl b/libavfilter/opencl/nlmeans.cl new file mode 100644 index 00..72bd681fd6 --- /dev/null +++ b/libavfilter/opencl/nlmeans.cl @@ -0,0 +1,115 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST); + +kernel void horiz_sum(__global uint4 *integral_img, + __read_only image2d_t src, + int width, + int height, + int4 dx, + int4 dy) +{ + +int y = get_global_id(0); +int work_size = get_global_size(0); + +uint4 sum = (uint4)(0); +float4 s2; +for (int i = 0; i < width; i++) { +float s1 = read_imagef(src, sampler, (int2)(i, y)).x; +s2.x = read_imagef(src, sampler, (int2)(i + dx.x, y + dy.x)).x; +s2.y = read_imagef(src, sampler, (int2)(i + dx.y, y + dy.y)).x; +s2.z = read_imagef(src, sampler, (int2)(i + dx.z, y + dy.z)).x; +s2.w = read_imagef(src, sampler, (int2)(i + dx.w, y + dy.w)).x; +sum += convert_uint4((s1 - s2) * (s1 - s2) * 255 * 255); +integral_img[y * width + i] = sum; +} +} + +kernel void vert_sum(__global uint4 *integral_img, + __global int *overflow, + int width, + int height) +{ +int x = get_global_id(0); +uint4 sum = 0; +for (int i = 0; i < height; i++) { +if (any((uint4)UINT_MAX - integral_img[i * width + x] < sum)) +atomic_inc(overflow); +integral_img[i * width + x] += sum; +
[FFmpeg-devel] [PATCH V2 1/2] lavfi/opencl: add more opencl helper macro
Signed-off-by: Ruiling Song --- libavfilter/opencl.h | 38 ++ 1 file changed, 38 insertions(+) diff --git a/libavfilter/opencl.h b/libavfilter/opencl.h index 0b06232ade..0fa5b49d3f 100644 --- a/libavfilter/opencl.h +++ b/libavfilter/opencl.h @@ -73,6 +73,44 @@ typedef struct OpenCLFilterContext { goto fail; \ } \ } while(0) +/** + * release an OpenCL Kernel + */ +#define CL_RELEASE_KERNEL(k) \ +do { \ +if (k) { \ +cle = clReleaseKernel(k); \ +if (cle != CL_SUCCESS)\ +av_log(avctx, AV_LOG_ERROR, "Failed to release " \ + "OpenCL kernel: %d.\n", cle); \ +} \ +} while(0) + +/** + * release an OpenCL Memory Object + */ +#define CL_RELEASE_MEMORY(m) \ +do { \ +if (m) { \ +cle = clReleaseMemObject(m); \ +if (cle != CL_SUCCESS)\ +av_log(avctx, AV_LOG_ERROR, "Failed to release " \ + "OpenCL memory: %d.\n", cle); \ +} \ +} while(0) + +/** + * release an OpenCL Command Queue + */ +#define CL_RELEASE_QUEUE(q) \ +do { \ +if (q) { \ +cle = clReleaseCommandQueue(q); \ +if (cle != CL_SUCCESS)\ +av_log(avctx, AV_LOG_ERROR, "Failed to release " \ + "cl command queue: %d.\n", cle); \ +} \ +} while(0) /** * Return that all inputs and outputs support only AV_PIX_FMT_OPENCL. -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] lavfi: add nlmeans_opencl filter
Signed-off-by: Ruiling Song --- This filter runs about 2x faster on integrated GPU than nlmeans on my Skylake CPU. Anybody like to give some comments? Ruiling configure | 1 + doc/filters.texi| 4 + libavfilter/Makefile| 1 + libavfilter/allfilters.c| 1 + libavfilter/opencl/nlmeans.cl | 108 + libavfilter/opencl_source.h | 1 + libavfilter/vf_nlmeans_opencl.c | 390 7 files changed, 506 insertions(+) create mode 100644 libavfilter/opencl/nlmeans.cl create mode 100644 libavfilter/vf_nlmeans_opencl.c diff --git a/configure b/configure index f6123f53e5..a233512491 100755 --- a/configure +++ b/configure @@ -3460,6 +3460,7 @@ mpdecimate_filter_select="pixelutils" minterpolate_filter_select="scene_sad" mptestsrc_filter_deps="gpl" negate_filter_deps="lut_filter" +nlmeans_opencl_filter_deps="opencl" nnedi_filter_deps="gpl" ocr_filter_deps="libtesseract" ocv_filter_deps="libopencv" diff --git a/doc/filters.texi b/doc/filters.texi index 867607d870..21c2c1a4b5 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -19030,6 +19030,10 @@ Apply erosion filter with threshold0 set to 30, threshold1 set 40, threshold2 se @end example @end itemize +@section nlmeans_opencl + +Non-local Means denoise filter through OpenCL, this filter accepts same options as @ref{nlmeans}. + @section overlay_opencl Overlay one video on top of another. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index fef6ec5c55..92039bfdcf 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -291,6 +291,7 @@ OBJS-$(CONFIG_MIX_FILTER)+= vf_mix.o OBJS-$(CONFIG_MPDECIMATE_FILTER) += vf_mpdecimate.o OBJS-$(CONFIG_NEGATE_FILTER) += vf_lut.o OBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o +OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o opencl/nlmeans.o OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index c51ae0f3c7..2a6390c92d 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -277,6 +277,7 @@ extern AVFilter ff_vf_mix; extern AVFilter ff_vf_mpdecimate; extern AVFilter ff_vf_negate; extern AVFilter ff_vf_nlmeans; +extern AVFilter ff_vf_nlmeans_opencl; extern AVFilter ff_vf_nnedi; extern AVFilter ff_vf_noformat; extern AVFilter ff_vf_noise; diff --git a/libavfilter/opencl/nlmeans.cl b/libavfilter/opencl/nlmeans.cl new file mode 100644 index 00..dcb04834ca --- /dev/null +++ b/libavfilter/opencl/nlmeans.cl @@ -0,0 +1,108 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST); + +kernel void horiz_sum(__global uint4 *ii, + __read_only image2d_t src, + int width, + int height, + int4 dx, + int4 dy) +{ + +int y = get_global_id(0); +int work_size = get_global_size(0); + +uint4 sum = (uint4)(0); +float4 s2; +for (int i = 0; i < width; i++) { +float s1 = read_imagef(src, sampler, (int2)(i, y)).x; +s2.x = read_imagef(src, sampler, (int2)(i+dx.x, y+dy.x)).x; +s2.y = read_imagef(src, sampler, (int2)(i+dx.y, y+dy.y)).x; +s2.z = read_imagef(src, sampler, (int2)(i+dx.z, y+dy.z)).x; +s2.w = read_imagef(src, sampler, (int2)(i+dx.w, y+dy.w)).x; +sum += convert_uint4((s1-s2)*(s1-s2) * 255*255); +ii[y * width + i] = sum; +} +} + +kernel void vert_sum(__global uint4 *ii, + int width, + int height) +{ +int x = get_global_id(0); +uint4 sum = 0; +for (int i = 0; i < height; i++) { +ii[i * width + x] += sum; +sum = ii[i * width + x]; +} +} + +kernel void weight_accum(global float *sum, globa
[FFmpeg-devel] [PATCH] MAINTAINERS: add myself for tonemap_opencl
Signed-off-by: Ruiling Song --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7ac2d22..412a739 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -362,6 +362,7 @@ Filters: vf_ssim.c Paul B Mahol vf_stereo3d.c Paul B Mahol vf_telecine.c Paul B Mahol + vf_tonemap_opencl.c Ruiling Song vf_yadif.cMichael Niedermayer vf_zoompan.c Paul B Mahol -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH] lavfi/vf_hwmap: move some code into seperate function
This is just code fine. No functional change. Signed-off-by: Ruiling Song --- libavfilter/vf_hwmap.c | 83 -- 1 file changed, 39 insertions(+), 44 deletions(-) diff --git a/libavfilter/vf_hwmap.c b/libavfilter/vf_hwmap.c index 290559a..14276ce 100644 --- a/libavfilter/vf_hwmap.c +++ b/libavfilter/vf_hwmap.c @@ -50,6 +50,37 @@ static int hwmap_query_formats(AVFilterContext *avctx) return 0; } +static int create_hwframe_context(AVFilterContext *avctx, AVBufferRef *device, + int format, int sw_format, + int width, int height) +{ +HWMapContext *ctx = avctx->priv; +int err; +AVHWFramesContext *frames; + +ctx->hwframes_ref = av_hwframe_ctx_alloc(device); +if (!ctx->hwframes_ref) { +return AVERROR(ENOMEM); +} +frames = (AVHWFramesContext*)ctx->hwframes_ref->data; + +frames->format= format; +frames->sw_format = sw_format; +frames->width = width; +frames->height= height; + +if (avctx->extra_hw_frames >= 0) +frames->initial_pool_size = 2 + avctx->extra_hw_frames; + +err = av_hwframe_ctx_init(ctx->hwframes_ref); +if (err < 0) { +av_log(avctx, AV_LOG_ERROR, "Failed to initialise " + "hardware frames context: %d.\n", err); +return err; +} +return 0; +} + static int hwmap_config_output(AVFilterLink *outlink) { AVFilterContext *avctx = outlink->src; @@ -130,29 +161,12 @@ static int hwmap_config_output(AVFilterLink *outlink) // overwrite the input hwframe context with a derived context // mapped from that back to the source type. AVBufferRef *source; -AVHWFramesContext *frames; - -ctx->hwframes_ref = av_hwframe_ctx_alloc(device); -if (!ctx->hwframes_ref) { -err = AVERROR(ENOMEM); -goto fail; -} -frames = (AVHWFramesContext*)ctx->hwframes_ref->data; - -frames->format= outlink->format; -frames->sw_format = hwfc->sw_format; -frames->width = hwfc->width; -frames->height= hwfc->height; - -if (avctx->extra_hw_frames >= 0) -frames->initial_pool_size = 2 + avctx->extra_hw_frames; -err = av_hwframe_ctx_init(ctx->hwframes_ref); -if (err < 0) { -av_log(avctx, AV_LOG_ERROR, "Failed to initialise " - "target frames context: %d.\n", err); -goto fail; -} +err = create_hwframe_context(avctx, device, outlink->format, + hwfc->sw_format, hwfc->width, + hwfc->height); +if (err < 0) + goto fail; err = av_hwframe_ctx_create_derived(&source, inlink->format, @@ -212,29 +226,10 @@ static int hwmap_config_output(AVFilterLink *outlink) } ctx->reverse = 1; - -ctx->hwframes_ref = av_hwframe_ctx_alloc(device); -if (!ctx->hwframes_ref) { -err = AVERROR(ENOMEM); -goto fail; -} -hwfc = (AVHWFramesContext*)ctx->hwframes_ref->data; - -hwfc->format= outlink->format; -hwfc->sw_format = inlink->format; -hwfc->width = inlink->w; -hwfc->height= inlink->h; - -if (avctx->extra_hw_frames >= 0) -hwfc->initial_pool_size = 2 + avctx->extra_hw_frames; - -err = av_hwframe_ctx_init(ctx->hwframes_ref); -if (err < 0) { -av_log(avctx, AV_LOG_ERROR, "Failed to create frame " - "context for reverse mapping: %d.\n", err); +err = create_hwframe_context(avctx, device, outlink->format, + inlink->format, inlink->w, inlink->h); +if (err < 0) goto fail; -} - } else { av_log(avctx, AV_LOG_ERROR, "Mapping requires a hardware " "context (a device, or frames on input).\n"); -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [RFC] lavfi: add scale_opencl filter.
Signed-off-by: Ruiling Song --- This patch depends on the colorspace patchset I sent before (https://patchwork.ffmpeg.org/patch/11820/) Although I am still working on some minor functionality, hope somebody could give some comments about the overall design. Ruiling configure | 1 + libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/opencl/scale.cl | 252 libavfilter/opencl_source.h | 1 + libavfilter/vf_scale_opencl.c | 682 ++ 6 files changed, 939 insertions(+) create mode 100644 libavfilter/opencl/scale.cl create mode 100644 libavfilter/vf_scale_opencl.c diff --git a/configure b/configure index ec8f70d..5640137 100755 --- a/configure +++ b/configure @@ -3450,6 +3450,7 @@ rubberband_filter_deps="librubberband" sab_filter_deps="gpl swscale" scale2ref_filter_deps="swscale" scale_filter_deps="swscale" +scale_opencl_filter_deps="opencl" scale_qsv_filter_deps="libmfx" select_filter_select="scene_sad" sharpness_vaapi_filter_deps="vaapi" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index bc642ac..9de7d44 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -343,6 +343,8 @@ OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale.o OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.ptx.o \ cuda_check.o OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale.o cuda_check.o +OBJS-$(CONFIG_SCALE_OPENCL_FILTER) += vf_scale_opencl.o opencl.o \ +opencl/scale.o OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o OBJS-$(CONFIG_SCALE_VAAPI_FILTER)+= vf_scale_vaapi.o scale.o vaapi_vpp.o OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index c51ae0f..5708d16 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -325,6 +325,7 @@ extern AVFilter ff_vf_sab; extern AVFilter ff_vf_scale; extern AVFilter ff_vf_scale_cuda; extern AVFilter ff_vf_scale_npp; +extern AVFilter ff_vf_scale_opencl; extern AVFilter ff_vf_scale_qsv; extern AVFilter ff_vf_scale_vaapi; extern AVFilter ff_vf_scale2ref; diff --git a/libavfilter/opencl/scale.cl b/libavfilter/opencl/scale.cl new file mode 100644 index 000..5d3deda --- /dev/null +++ b/libavfilter/opencl/scale.cl @@ -0,0 +1,252 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +extern float3 yuv2rgb(float, float, float); +extern float3 rgb2yuv(float, float, float); + +const sampler_t sampler_nearest = (CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP | + CLK_FILTER_NEAREST); + +const sampler_t sampler_linear = (CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP | + CLK_FILTER_LINEAR); + +float4 neighbor(image2d_t img, float vscale, +float hscale, int x, int y, +__constant float *coff_x, +__constant float *coff_y, +int2 filter_size) +{ +float xi = ((float)x + 0.5f) * hscale; +float yi = ((float)y + 0.5f) * vscale; + +return read_imagef(img, sampler_nearest, (float2)(xi, yi)); +} + +float4 bilinear(image2d_t img, float vscale, +float hscale, int x, int y, +__constant float *coff_x, +__constant float *coff_y, +int2 filter_size) +{ +float xi = ((float)x + 0.5f) * hscale; +float yi = ((float)y + 0.5f) * vscale; + +return read_imagef(img, sampler_linear, (float2)(xi, yi)); +} + +float4 generic_filter(image2d_t img, float vscale, float hscale, int x, int y, + __constant float *coff_x, __constant float *coff_y, + int2 filter_size) +{ +int2 dst_pos = (int2)(x, y); +float2 src_coord = (convert_float2(dst_pos) + 0.5f) * + (float2)(hscale, vscale); +int2 src_pos = convert_int2(floor(src_coord -
[FFmpeg-devel] [PATCH 4/5] lavfi/tonemap_opencl: reuse color matrix calculation from colorspace.c
Signed-off-by: Ruiling Song --- libavfilter/opencl/colorspace_common.cl | 25 - libavfilter/vf_tonemap_opencl.c | 64 +++-- 2 files changed, 29 insertions(+), 60 deletions(-) diff --git a/libavfilter/opencl/colorspace_common.cl b/libavfilter/opencl/colorspace_common.cl index 94a4dd0..1d68a54 100644 --- a/libavfilter/opencl/colorspace_common.cl +++ b/libavfilter/opencl/colorspace_common.cl @@ -39,31 +39,6 @@ constant const float ST2084_C1 = 0.8359375f; constant const float ST2084_C2 = 18.8515625f; constant const float ST2084_C3 = 18.6875f; -__constant float yuv2rgb_bt2020[] = { -1.0f, 0.0f, 1.4746f, -1.0f, -0.16455f, -0.57135f, -1.0f, 1.8814f, 0.0f -}; - -__constant float yuv2rgb_bt709[] = { -1.0f, 0.0f, 1.5748f, -1.0f, -0.18732f, -0.46812f, -1.0f, 1.8556f, 0.0f -}; - -__constant float rgb2yuv_bt709[] = { -0.2126f, 0.7152f, 0.0722f, --0.11457f, -0.38543f, 0.5f, -0.5f, -0.45415f, -0.04585f -}; - -__constant float rgb2yuv_bt2020[] ={ -0.2627f, 0.678f, 0.0593f, --0.1396f, -0.36037f, 0.5f, -0.5f, -0.4598f, -0.0402f, -}; - - float get_luma_dst(float3 c) { return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z; } diff --git a/libavfilter/vf_tonemap_opencl.c b/libavfilter/vf_tonemap_opencl.c index ae3f98d..315ead4 100644 --- a/libavfilter/vf_tonemap_opencl.c +++ b/libavfilter/vf_tonemap_opencl.c @@ -18,7 +18,6 @@ #include #include "libavutil/avassert.h" -#include "libavutil/bprint.h" #include "libavutil/common.h" #include "libavutil/imgutils.h" #include "libavutil/mem.h" @@ -35,7 +34,6 @@ // TODO: // - separate peak-detection from tone-mapping kernel to solve //one-frame-delay issue. -// - import colorspace matrix generation from vf_colorspace.c // - more format support #define DETECTION_FRAMES 63 @@ -73,16 +71,6 @@ typedef struct TonemapOpenCLContext { cl_memutil_mem; } TonemapOpenCLContext; -static const char *yuv_coff[AVCOL_SPC_NB] = { -[AVCOL_SPC_BT709] = "rgb2yuv_bt709", -[AVCOL_SPC_BT2020_NCL] = "rgb2yuv_bt2020", -}; - -static const char *rgb_coff[AVCOL_SPC_NB] = { -[AVCOL_SPC_BT709] = "yuv2rgb_bt709", -[AVCOL_SPC_BT2020_NCL] = "yuv2rgb_bt2020", -}; - static const char *linearize_funcs[AVCOL_TRC_NB] = { [AVCOL_TRC_SMPTE2084] = "eotf_st2084", [AVCOL_TRC_ARIB_STD_B67] = "inverse_oetf_hlg", @@ -93,11 +81,6 @@ static const char *delinearize_funcs[AVCOL_TRC_NB] = { [AVCOL_TRC_BT2020_10] = "inverse_eotf_bt1886", }; -static const struct LumaCoefficients luma_coefficients[AVCOL_SPC_NB] = { -[AVCOL_SPC_BT709] = { 0.2126, 0.7152, 0.0722 }, -[AVCOL_SPC_BT2020_NCL] = { 0.2627, 0.6780, 0.0593 }, -}; - static const struct PrimaryCoefficients primaries_table[AVCOL_PRI_NB] = { [AVCOL_PRI_BT709] = { 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 }, [AVCOL_PRI_BT2020] = { 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 }, @@ -137,8 +120,8 @@ static int tonemap_opencl_init(AVFilterContext *avctx) { TonemapOpenCLContext *ctx = avctx->priv; int rgb2rgb_passthrough = 1; -double rgb2rgb[3][3]; -struct LumaCoefficients luma_src, luma_dst; +double rgb2rgb[3][3], rgb2yuv[3][3], yuv2rgb[3][3]; +const struct LumaCoefficients *luma_src, *luma_dst; cl_int cle; int err; AVBPrint header; @@ -215,27 +198,37 @@ static int tonemap_opencl_init(AVFilterContext *avctx) if (rgb2rgb_passthrough) av_bprintf(&header, "#define RGB2RGB_PASSTHROUGH\n"); -else { -av_bprintf(&header, "__constant float rgb2rgb[9] = {\n"); -av_bprintf(&header, "%.4ff, %.4ff, %.4ff,\n", - rgb2rgb[0][0], rgb2rgb[0][1], rgb2rgb[0][2]); -av_bprintf(&header, "%.4ff, %.4ff, %.4ff,\n", - rgb2rgb[1][0], rgb2rgb[1][1], rgb2rgb[1][2]); -av_bprintf(&header, "%.4ff, %.4ff, %.4ff};\n", - rgb2rgb[2][0], rgb2rgb[2][1], rgb2rgb[2][2]); +else +ff_opencl_print_const_matrix_3x3(&header, "rgb2rgb", rgb2rgb); + + +luma_src = ff_get_luma_coefficients(ctx->colorspace_in); +if (!luma_src) { +err = AVERROR(EINVAL); +av_log(avctx, AV_LOG_ERROR, "unsupported input colorspace %d (%s)\n", + ctx->colorspace_in, av_color_space_name(ctx->colorspace_in)); +goto fail; } -av_bprintf(&header, "#define rgb_matrix %s\n", - rgb_coff[ctx->colorspace_in]); -av_bprintf(&header, "#define yuv_matrix %s\n", - yuv_coff[ctx->colorspace_out]); +luma_dst = ff_get_luma_coefficients(ctx->colorspace_out); +if (!luma_dst) { +err = AVERROR(EINVAL); +av_log(avctx, AV_LOG_ERROR, &quo
[FFmpeg-devel] [PATCH 5/5] lavfi/colorspace_common: add ifdef check to be more compatible.
Some filters may not need to do linearize/delinearize, thus will even not define them. Add ifdef check, so they could easily re-use the .cl file. Signed-off-by: Ruiling Song --- libavfilter/opencl/colorspace_common.cl | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/libavfilter/opencl/colorspace_common.cl b/libavfilter/opencl/colorspace_common.cl index 1d68a54..ac911f0 100644 --- a/libavfilter/opencl/colorspace_common.cl +++ b/libavfilter/opencl/colorspace_common.cl @@ -124,10 +124,14 @@ float3 yuv2rgb(float y, float u, float v) { float3 yuv2lrgb(float3 yuv) { float3 rgb = yuv2rgb(yuv.x, yuv.y, yuv.z); +#ifdef linearize float r = linearize(rgb.x); float g = linearize(rgb.y); float b = linearize(rgb.z); return (float3)(r, g, b); +#else +return rgb; +#endif } float3 rgb2yuv(float r, float g, float b) { @@ -151,19 +155,25 @@ float rgb2y(float r, float g, float b) { } float3 lrgb2yuv(float3 c) { +#ifdef delinearize float r = delinearize(c.x); float g = delinearize(c.y); float b = delinearize(c.z); - return rgb2yuv(r, g, b); +#else +return rgb2yuv(c.x, c.y, c.z); +#endif } float lrgb2y(float3 c) { +#ifdef delinearize float r = delinearize(c.x); float g = delinearize(c.y); float b = delinearize(c.z); - return rgb2y(r, g, b); +#else +return rgb2y(c.x, c.y, c.z); +#endif } float3 lrgb2lrgb(float3 c) { -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 3/5] lavfi/opencl: add ff_opencl_print_const_matrix_3x3()
This is used to print a 3x3 matrix into a part of OpenCL source code. Signed-off-by: Ruiling Song --- libavfilter/opencl.c | 13 + libavfilter/opencl.h | 8 2 files changed, 21 insertions(+) diff --git a/libavfilter/opencl.c b/libavfilter/opencl.c index ac5eec6..95f0bfc 100644 --- a/libavfilter/opencl.c +++ b/libavfilter/opencl.c @@ -337,3 +337,16 @@ int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, return 0; } + +void ff_opencl_print_const_matrix_3x3(AVBPrint *buf, const char *name_str, + double mat[3][3]) +{ +int i, j; +av_bprintf(buf, "__constant float %s[9] = {\n", name_str); +for (i = 0; i < 3; i++) { +for (j = 0; j < 3; j++) +av_bprintf(buf, " %.5ff,", mat[i][j]); +av_bprintf(buf, "\n"); +} +av_bprintf(buf, "};\n"); +} diff --git a/libavfilter/opencl.h b/libavfilter/opencl.h index 1b7f117..0b06232 100644 --- a/libavfilter/opencl.h +++ b/libavfilter/opencl.h @@ -25,6 +25,7 @@ // it was introduced in OpenCL 2.0. #define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#include "libavutil/bprint.h" #include "libavutil/buffer.h" #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_opencl.h" @@ -124,5 +125,12 @@ int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment); +/** + * Print a 3x3 matrix into a buffer as __constant array, which could + * be included in an OpenCL program. +*/ + +void ff_opencl_print_const_matrix_3x3(AVBPrint *buf, const char *name_str, + double mat[3][3]); #endif /* AVFILTER_OPENCL_H */ -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 2/5] lavfi/colorspace: move some functions to common file
These functions can be reused by other colorspace filters, so move them to common file. No functional changes. Signed-off-by: Ruiling Song --- libavfilter/colorspace.c| 71 libavfilter/colorspace.h| 4 +++ libavfilter/vf_colorspace.c | 80 +++-- 3 files changed, 79 insertions(+), 76 deletions(-) diff --git a/libavfilter/colorspace.c b/libavfilter/colorspace.c index c668221..19616e4 100644 --- a/libavfilter/colorspace.c +++ b/libavfilter/colorspace.c @@ -93,6 +93,77 @@ void ff_fill_rgb2xyz_table(const struct PrimaryCoefficients *coeffs, rgb2xyz[2][1] *= sg; rgb2xyz[2][2] *= sb; } +static const double ycgco_matrix[3][3] = +{ +{ 0.25, 0.5, 0.25 }, +{ -0.25, 0.5, -0.25 }, +{ 0.5, 0, -0.5 }, +}; + +static const double gbr_matrix[3][3] = +{ +{ 0,1, 0 }, +{ 0, -0.5, 0.5 }, +{ 0.5, -0.5, 0 }, +}; + +/* + * All constants explained in e.g. https://linuxtv.org/downloads/v4l-dvb-apis/ch02s06.html + * The older ones (bt470bg/m) are also explained in their respective ITU docs + * (e.g. https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.470-5-199802-S!!PDF-E.pdf) + * whereas the newer ones can typically be copied directly from wikipedia :) + */ +static const struct LumaCoefficients luma_coefficients[AVCOL_SPC_NB] = { +[AVCOL_SPC_FCC]= { 0.30, 0.59, 0.11 }, +[AVCOL_SPC_BT470BG]= { 0.299, 0.587, 0.114 }, +[AVCOL_SPC_SMPTE170M] = { 0.299, 0.587, 0.114 }, +[AVCOL_SPC_BT709] = { 0.2126, 0.7152, 0.0722 }, +[AVCOL_SPC_SMPTE240M] = { 0.212, 0.701, 0.087 }, +[AVCOL_SPC_YCOCG] = { 0.25, 0.5,0.25 }, +[AVCOL_SPC_RGB]= { 1, 1, 1 }, +[AVCOL_SPC_BT2020_NCL] = { 0.2627, 0.6780, 0.0593 }, +[AVCOL_SPC_BT2020_CL] = { 0.2627, 0.6780, 0.0593 }, +}; + +const struct LumaCoefficients *ff_get_luma_coefficients(enum AVColorSpace csp) +{ +const struct LumaCoefficients *coeffs; + +if (csp >= AVCOL_SPC_NB) +return NULL; +coeffs = &luma_coefficients[csp]; +if (!coeffs->cr) +return NULL; + +return coeffs; +} + +void ff_fill_rgb2yuv_table(const struct LumaCoefficients *coeffs, + double rgb2yuv[3][3]) +{ +double bscale, rscale; + +// special ycgco matrix +if (coeffs->cr == 0.25 && coeffs->cg == 0.5 && coeffs->cb == 0.25) { +memcpy(rgb2yuv, ycgco_matrix, sizeof(double) * 9); +return; +} else if (coeffs->cr == 1 && coeffs->cg == 1 && coeffs->cb == 1) { +memcpy(rgb2yuv, gbr_matrix, sizeof(double) * 9); +return; +} + +rgb2yuv[0][0] = coeffs->cr; +rgb2yuv[0][1] = coeffs->cg; +rgb2yuv[0][2] = coeffs->cb; +bscale = 0.5 / (coeffs->cb - 1.0); +rscale = 0.5 / (coeffs->cr - 1.0); +rgb2yuv[1][0] = bscale * coeffs->cr; +rgb2yuv[1][1] = bscale * coeffs->cg; +rgb2yuv[1][2] = 0.5; +rgb2yuv[2][0] = 0.5; +rgb2yuv[2][1] = rscale * coeffs->cg; +rgb2yuv[2][2] = rscale * coeffs->cb; +} double ff_determine_signal_peak(AVFrame *in) { diff --git a/libavfilter/colorspace.h b/libavfilter/colorspace.h index 9366818..459a5df 100644 --- a/libavfilter/colorspace.h +++ b/libavfilter/colorspace.h @@ -44,6 +44,10 @@ void ff_fill_rgb2xyz_table(const struct PrimaryCoefficients *coeffs, const struct WhitepointCoefficients *wp, double rgb2xyz[3][3]); +const struct LumaCoefficients *ff_get_luma_coefficients(enum AVColorSpace csp); +void ff_fill_rgb2yuv_table(const struct LumaCoefficients *coeffs, + double rgb2yuv[3][3]); + double ff_determine_signal_peak(AVFrame *in); void ff_update_hdr_metadata(AVFrame *in, double peak); diff --git a/libavfilter/vf_colorspace.c b/libavfilter/vf_colorspace.c index f8d1ecd..2120199 100644 --- a/libavfilter/vf_colorspace.c +++ b/libavfilter/vf_colorspace.c @@ -170,78 +170,6 @@ typedef struct ColorSpaceContext { // FIXME dithering if bitdepth goes down? // FIXME bitexact for fate integration? -static const double ycgco_matrix[3][3] = -{ -{ 0.25, 0.5, 0.25 }, -{ -0.25, 0.5, -0.25 }, -{ 0.5, 0, -0.5 }, -}; - -static const double gbr_matrix[3][3] = -{ -{ 0,1, 0 }, -{ 0, -0.5, 0.5 }, -{ 0.5, -0.5, 0 }, -}; - -/* - * All constants explained in e.g. https://linuxtv.org/downloads/v4l-dvb-apis/ch02s06.html - * The older ones (bt470bg/m) are also explained in their respective ITU docs - * (e.g. https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.470-5-199802-S!!PDF-E.pdf) - * whereas the newer ones can typically be copied directly from wikipedia :) - */ -static const struct LumaCoefficients luma_coefficients[AVCOL_SPC_NB] = { -[AVCOL_SPC_FCC]= { 0.30, 0.59, 0.11 }, -[AVCOL_SPC_BT470BG]= { 0.299, 0.587, 0.
[FFmpeg-devel] [PATCH 1/5] lavu/opencl: replace va_ext.h with standard name
Khronos OpenCL header (https://github.com/KhronosGroup/OpenCL-Headers) uses cl_va_api_media_sharing_intel.h. And Intel's official OpenCL driver for Intel GPU (https://github.com/intel/compute-runtime) was compiled against Khronos OpenCL header. So it's better to align with Khronos. Signed-off-by: Ruiling Song --- configure| 2 +- libavutil/hwcontext_opencl.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configure b/configure index c2b8fac..48fdc8e 100755 --- a/configure +++ b/configure @@ -6427,7 +6427,7 @@ fi if enabled_all opencl vaapi ; then enabled opencl_drm_beignet && enable opencl_vaapi_beignet -check_type "CL/cl.h CL/va_ext.h" "clCreateFromVA_APIMediaSurfaceINTEL_fn" && +check_type "CL/cl.h CL/cl_va_api_media_sharing_intel.h" "clCreateFromVA_APIMediaSurfaceINTEL_fn" && enable opencl_vaapi_intel_media fi diff --git a/libavutil/hwcontext_opencl.c b/libavutil/hwcontext_opencl.c index d3df622..b116c5b 100644 --- a/libavutil/hwcontext_opencl.c +++ b/libavutil/hwcontext_opencl.c @@ -50,7 +50,7 @@ #include #endif #include -#include +#include #include "hwcontext_vaapi.h" #endif -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 2/2] lavu: relax the condition to do hwframe unmapping.
This patch aims to fix failure of hwmap/hwunmap working against passthrough mode filters like with transpose_opencl: [vaapi_frame] "hwmap,transpose_opencl=passthrough=landscape, hwmap=derive_device=vaapi:reverse=1" [vappi_frame] If the frame meet the pass-through criteria, then the output of the first hwmap will directly goes into the input of the second hwmap. What we need to do here is simply unmap the frame. The current issue is when we try to do unmap in the frame-context of the second hwmap, it fails to meet the check in av_hwframe_map(), which requires the original hw_frames_ctx same as the destination hw_frames_ctx. But I think that if we are trying to map to the same device as the orginal device_ctx, then we can just do the unmap. Signed-off-by: Ruiling Song --- I am not sure if there are any concern or side-effects of doing like this? The first idea came up to fix the issue is do the checking against internal->source_frames in vf_hwmap.c. but I find that this is not accessible outside libavutil. So I use this fix. Hope to have your comment and discussion. Ruiling libavutil/hwcontext.c | 9 + libavutil/hwcontext.h | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c index f1e404a..a006212 100644 --- a/libavutil/hwcontext.c +++ b/libavutil/hwcontext.c @@ -739,20 +739,21 @@ fail: int av_hwframe_map(AVFrame *dst, const AVFrame *src, int flags) { -AVHWFramesContext *src_frames, *dst_frames; +AVHWFramesContext *src_frames, *dst_frames, *src_src = NULL; HWMapDescriptor *hwmap; int ret; if (src->hw_frames_ctx && dst->hw_frames_ctx) { src_frames = (AVHWFramesContext*)src->hw_frames_ctx->data; dst_frames = (AVHWFramesContext*)dst->hw_frames_ctx->data; +if (src_frames->internal->source_frames) +src_src = +(AVHWFramesContext*)src_frames->internal->source_frames->data; if ((src_frames == dst_frames && src->format == dst_frames->sw_format && dst->format == dst_frames->format) || -(src_frames->internal->source_frames && - src_frames->internal->source_frames->data == - (uint8_t*)dst_frames)) { +(src_src && src_src->device_ctx == dst_frames->device_ctx)) { // This is an unmap operation. We don't need to directly // do anything here other than fill in the original frame, // because the real unmap will be invoked when the last diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h index f5a4b62..efe3988 100644 --- a/libavutil/hwcontext.h +++ b/libavutil/hwcontext.h @@ -528,9 +528,9 @@ enum { * by av_frame_alloc()). src should have an associated hwframe context, and * dst may optionally have a format and associated hwframe context. * - * If src was created by mapping a frame from the hwframe context of dst, - * then this function undoes the mapping - dst is replaced by a reference to - * the frame that src was originally mapped from. + * If src was created by mapping a frame from a hwframe context which shares the + * same device_ctx with dst, then this function undoes the mapping - dst is + * replaced by a reference to the frame that src was originally mapped from. * * If both src and dst have an associated hwframe context, then this function * attempts to map the src frame from its hardware context to that of dst and -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 1/2] lavfi/vf_hwmap: make hwunmap from software frame work.
This patch was used to fix the second hwmap filter issue: [vaapi_frame] hwmap [software filters] hwmap [vaapi_frame] For such case, we also need to allocate the hardware frame and map it back to software. Signed-off-by: Ruiling Song --- libavfilter/vf_hwmap.c | 125 + 1 file changed, 75 insertions(+), 50 deletions(-) diff --git a/libavfilter/vf_hwmap.c b/libavfilter/vf_hwmap.c index 290559a..03cb325 100644 --- a/libavfilter/vf_hwmap.c +++ b/libavfilter/vf_hwmap.c @@ -50,6 +50,36 @@ static int hwmap_query_formats(AVFilterContext *avctx) return 0; } +static int create_hwframe_context(HWMapContext *ctx, AVFilterContext *avctx, + AVBufferRef *device, int format, + int sw_format, int width, int height) +{ +int err; +AVHWFramesContext *frames; + +ctx->hwframes_ref = av_hwframe_ctx_alloc(device); +if (!ctx->hwframes_ref) { +return AVERROR(ENOMEM); +} +frames = (AVHWFramesContext*)ctx->hwframes_ref->data; + +frames->format= format; +frames->sw_format = sw_format; +frames->width = width; +frames->height= height; + +if (avctx->extra_hw_frames >= 0) +frames->initial_pool_size = 2 + avctx->extra_hw_frames; + +err = av_hwframe_ctx_init(ctx->hwframes_ref); +if (err < 0) { +av_log(avctx, AV_LOG_ERROR, "Failed to initialise " + "target frames context: %d.\n", err); +return err; +} +return 0; +} + static int hwmap_config_output(AVFilterLink *outlink) { AVFilterContext *avctx = outlink->src; @@ -130,29 +160,11 @@ static int hwmap_config_output(AVFilterLink *outlink) // overwrite the input hwframe context with a derived context // mapped from that back to the source type. AVBufferRef *source; -AVHWFramesContext *frames; - -ctx->hwframes_ref = av_hwframe_ctx_alloc(device); -if (!ctx->hwframes_ref) { -err = AVERROR(ENOMEM); +err = create_hwframe_context(ctx, avctx, device, outlink->format, + hwfc->sw_format, hwfc->width, + hwfc->height); +if (err < 0) goto fail; -} -frames = (AVHWFramesContext*)ctx->hwframes_ref->data; - -frames->format= outlink->format; -frames->sw_format = hwfc->sw_format; -frames->width = hwfc->width; -frames->height= hwfc->height; - -if (avctx->extra_hw_frames >= 0) -frames->initial_pool_size = 2 + avctx->extra_hw_frames; - -err = av_hwframe_ctx_init(ctx->hwframes_ref); -if (err < 0) { -av_log(avctx, AV_LOG_ERROR, "Failed to initialise " - "target frames context: %d.\n", err); -goto fail; -} err = av_hwframe_ctx_create_derived(&source, inlink->format, @@ -175,10 +187,20 @@ static int hwmap_config_output(AVFilterLink *outlink) inlink->hw_frames_ctx = source; } else if ((outlink->format == hwfc->format && -inlink->format == hwfc->sw_format) || - inlink->format == hwfc->format) { -// Map from a hardware format to a software format, or -// undo an existing such mapping. +inlink->format == hwfc->sw_format)) { +// unmap a software frame back to hardware +ctx->reverse = 1; +// incase user does not provide filter device, use the device_ref +// from inlink +if (!device) +device = hwfc->device_ref; + +err = create_hwframe_context(ctx, avctx, device, outlink->format, + inlink->format, inlink->w, inlink->h); +if (err < 0) +goto fail; +} else if (inlink->format == hwfc->format) { +// Map from a hardware format to a software format ctx->hwframes_ref = av_buffer_ref(inlink->hw_frames_ctx); if (!ctx->hwframes_ref) { @@ -212,29 +234,10 @@ static int hwmap_config_output(AVFilterLink *outlink) } ctx->reverse = 1; - -ctx->hwframes_ref = av_hwframe_ctx_alloc(device); -if (!ctx->hwframes_ref) { -err = AVERROR(ENOMEM); -goto fail; -} -hwfc = (AVHWFramesContext*)ctx->hwframes_ref->data; - -hwfc->format= outlink->format; -hwfc->sw_format = inlink->format; -
[FFmpeg-devel] [PATCH] lavfi/tonemap_opencl: reuse matrix calculation from vf_colorspace
As these functions are moved to shared file, other colorspace-related filters could also leverage the code. Signed-off-by: Ruiling Song --- libavfilter/colorspace.c| 71 + libavfilter/colorspace.h| 4 ++ libavfilter/opencl/colorspace_common.cl | 25 --- libavfilter/vf_colorspace.c | 80 ++--- libavfilter/vf_tonemap_opencl.c | 62 +++-- 5 files changed, 106 insertions(+), 136 deletions(-) diff --git a/libavfilter/colorspace.c b/libavfilter/colorspace.c index c668221..19616e4 100644 --- a/libavfilter/colorspace.c +++ b/libavfilter/colorspace.c @@ -93,6 +93,77 @@ void ff_fill_rgb2xyz_table(const struct PrimaryCoefficients *coeffs, rgb2xyz[2][1] *= sg; rgb2xyz[2][2] *= sb; } +static const double ycgco_matrix[3][3] = +{ +{ 0.25, 0.5, 0.25 }, +{ -0.25, 0.5, -0.25 }, +{ 0.5, 0, -0.5 }, +}; + +static const double gbr_matrix[3][3] = +{ +{ 0,1, 0 }, +{ 0, -0.5, 0.5 }, +{ 0.5, -0.5, 0 }, +}; + +/* + * All constants explained in e.g. https://linuxtv.org/downloads/v4l-dvb-apis/ch02s06.html + * The older ones (bt470bg/m) are also explained in their respective ITU docs + * (e.g. https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.470-5-199802-S!!PDF-E.pdf) + * whereas the newer ones can typically be copied directly from wikipedia :) + */ +static const struct LumaCoefficients luma_coefficients[AVCOL_SPC_NB] = { +[AVCOL_SPC_FCC]= { 0.30, 0.59, 0.11 }, +[AVCOL_SPC_BT470BG]= { 0.299, 0.587, 0.114 }, +[AVCOL_SPC_SMPTE170M] = { 0.299, 0.587, 0.114 }, +[AVCOL_SPC_BT709] = { 0.2126, 0.7152, 0.0722 }, +[AVCOL_SPC_SMPTE240M] = { 0.212, 0.701, 0.087 }, +[AVCOL_SPC_YCOCG] = { 0.25, 0.5,0.25 }, +[AVCOL_SPC_RGB]= { 1, 1, 1 }, +[AVCOL_SPC_BT2020_NCL] = { 0.2627, 0.6780, 0.0593 }, +[AVCOL_SPC_BT2020_CL] = { 0.2627, 0.6780, 0.0593 }, +}; + +const struct LumaCoefficients *ff_get_luma_coefficients(enum AVColorSpace csp) +{ +const struct LumaCoefficients *coeffs; + +if (csp >= AVCOL_SPC_NB) +return NULL; +coeffs = &luma_coefficients[csp]; +if (!coeffs->cr) +return NULL; + +return coeffs; +} + +void ff_fill_rgb2yuv_table(const struct LumaCoefficients *coeffs, + double rgb2yuv[3][3]) +{ +double bscale, rscale; + +// special ycgco matrix +if (coeffs->cr == 0.25 && coeffs->cg == 0.5 && coeffs->cb == 0.25) { +memcpy(rgb2yuv, ycgco_matrix, sizeof(double) * 9); +return; +} else if (coeffs->cr == 1 && coeffs->cg == 1 && coeffs->cb == 1) { +memcpy(rgb2yuv, gbr_matrix, sizeof(double) * 9); +return; +} + +rgb2yuv[0][0] = coeffs->cr; +rgb2yuv[0][1] = coeffs->cg; +rgb2yuv[0][2] = coeffs->cb; +bscale = 0.5 / (coeffs->cb - 1.0); +rscale = 0.5 / (coeffs->cr - 1.0); +rgb2yuv[1][0] = bscale * coeffs->cr; +rgb2yuv[1][1] = bscale * coeffs->cg; +rgb2yuv[1][2] = 0.5; +rgb2yuv[2][0] = 0.5; +rgb2yuv[2][1] = rscale * coeffs->cg; +rgb2yuv[2][2] = rscale * coeffs->cb; +} double ff_determine_signal_peak(AVFrame *in) { diff --git a/libavfilter/colorspace.h b/libavfilter/colorspace.h index 9366818..459a5df 100644 --- a/libavfilter/colorspace.h +++ b/libavfilter/colorspace.h @@ -44,6 +44,10 @@ void ff_fill_rgb2xyz_table(const struct PrimaryCoefficients *coeffs, const struct WhitepointCoefficients *wp, double rgb2xyz[3][3]); +const struct LumaCoefficients *ff_get_luma_coefficients(enum AVColorSpace csp); +void ff_fill_rgb2yuv_table(const struct LumaCoefficients *coeffs, + double rgb2yuv[3][3]); + double ff_determine_signal_peak(AVFrame *in); void ff_update_hdr_metadata(AVFrame *in, double peak); diff --git a/libavfilter/opencl/colorspace_common.cl b/libavfilter/opencl/colorspace_common.cl index 94a4dd0..1d68a54 100644 --- a/libavfilter/opencl/colorspace_common.cl +++ b/libavfilter/opencl/colorspace_common.cl @@ -39,31 +39,6 @@ constant const float ST2084_C1 = 0.8359375f; constant const float ST2084_C2 = 18.8515625f; constant const float ST2084_C3 = 18.6875f; -__constant float yuv2rgb_bt2020[] = { -1.0f, 0.0f, 1.4746f, -1.0f, -0.16455f, -0.57135f, -1.0f, 1.8814f, 0.0f -}; - -__constant float yuv2rgb_bt709[] = { -1.0f, 0.0f, 1.5748f, -1.0f, -0.18732f, -0.46812f, -1.0f, 1.8556f, 0.0f -}; - -__constant float rgb2yuv_bt709[] = { -0.2126f, 0.7152f, 0.0722f, --0.11457f, -0.38543f, 0.5f, -0.5f, -0.45415f, -0.04585f -}; - -__constant float rgb2yuv_bt2020[] ={ -0.2627f, 0.678f, 0.0593f, --0.1396f, -0.36037f, 0.5f, -0.5f, -0.4598f, -0.0402f, -}; - - float get_luma_dst(float3 c) { ret
[FFmpeg-devel] [PATCH V2] lavf: add transpose_opencl filter
Signed-off-by: Ruiling Song --- configure | 1 + libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/opencl/transpose.cl | 35 + libavfilter/opencl_source.h | 1 + libavfilter/transpose.h | 34 + libavfilter/vf_transpose.c| 14 +- libavfilter/vf_transpose_opencl.c | 288 ++ 8 files changed, 362 insertions(+), 13 deletions(-) create mode 100644 libavfilter/opencl/transpose.cl create mode 100644 libavfilter/transpose.h create mode 100644 libavfilter/vf_transpose_opencl.c diff --git a/configure b/configure index b4f944c..dcb3f5f 100755 --- a/configure +++ b/configure @@ -3479,6 +3479,7 @@ tinterlace_merge_test_deps="tinterlace_filter" tinterlace_pad_test_deps="tinterlace_filter" tonemap_filter_deps="const_nan" tonemap_opencl_filter_deps="opencl const_nan" +transpose_opencl_filter_deps="opencl" unsharp_opencl_filter_deps="opencl" uspp_filter_deps="gpl avcodec" vaguedenoiser_filter_deps="gpl" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 1895fa2..6e26581 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -393,6 +393,7 @@ OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace.o OBJS-$(CONFIG_TPAD_FILTER) += vf_tpad.o OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER) += vf_transpose_npp.o cuda_check.o +OBJS-$(CONFIG_TRANSPOSE_OPENCL_FILTER) += vf_transpose_opencl.o opencl.o opencl/transpose.o OBJS-$(CONFIG_TRIM_FILTER) += trim.o OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o OBJS-$(CONFIG_UNSHARP_FILTER)+= vf_unsharp.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 837c99e..a600069 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -372,6 +372,7 @@ extern AVFilter ff_vf_tonemap_opencl; extern AVFilter ff_vf_tpad; extern AVFilter ff_vf_transpose; extern AVFilter ff_vf_transpose_npp; +extern AVFilter ff_vf_transpose_opencl; extern AVFilter ff_vf_trim; extern AVFilter ff_vf_unpremultiply; extern AVFilter ff_vf_unsharp; diff --git a/libavfilter/opencl/transpose.cl b/libavfilter/opencl/transpose.cl new file mode 100644 index 000..e6388ab --- /dev/null +++ b/libavfilter/opencl/transpose.cl @@ -0,0 +1,35 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +kernel void transpose(__write_only image2d_t dst, + __read_only image2d_t src, + int dir) { +const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST); + +int2 size = get_image_dim(dst); +int x = get_global_id(0); +int y = get_global_id(1); + +int xin = (dir & 2) ? (size.y - 1 - y) : y; +int yin = (dir & 1) ? (size.x - 1 - x) : x; +float4 data = read_imagef(src, sampler, (int2)(xin, yin)); + +if (x < size.x && y < size.y) +write_imagef(dst, (int2)(x, y), data); +} diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h index 2f67d89..4118138 100644 --- a/libavfilter/opencl_source.h +++ b/libavfilter/opencl_source.h @@ -25,6 +25,7 @@ extern const char *ff_opencl_source_convolution; extern const char *ff_opencl_source_neighbor; extern const char *ff_opencl_source_overlay; extern const char *ff_opencl_source_tonemap; +extern const char *ff_opencl_source_transpose; extern const char *ff_opencl_source_unsharp; #endif /* AVFILTER_OPENCL_SOURCE_H */ diff --git a/libavfilter/transpose.h b/libavfilter/transpose.h new file mode 100644 index 000..d4bb4da --- /dev/null +++ b/libavfilter/transpose.h @@ -0,0 +1,34 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is
[FFmpeg-devel] [PATCH] lavf: add tranpose_opencl filter
Signed-off-by: Ruiling Song --- configure | 1 + libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/opencl/transpose.cl | 35 + libavfilter/opencl_source.h | 1 + libavfilter/transpose.h | 34 + libavfilter/vf_transpose.c| 14 +- libavfilter/vf_transpose_opencl.c | 294 ++ 8 files changed, 368 insertions(+), 13 deletions(-) create mode 100644 libavfilter/opencl/transpose.cl create mode 100644 libavfilter/transpose.h create mode 100644 libavfilter/vf_transpose_opencl.c diff --git a/configure b/configure index b4f944c..dcb3f5f 100755 --- a/configure +++ b/configure @@ -3479,6 +3479,7 @@ tinterlace_merge_test_deps="tinterlace_filter" tinterlace_pad_test_deps="tinterlace_filter" tonemap_filter_deps="const_nan" tonemap_opencl_filter_deps="opencl const_nan" +transpose_opencl_filter_deps="opencl" unsharp_opencl_filter_deps="opencl" uspp_filter_deps="gpl avcodec" vaguedenoiser_filter_deps="gpl" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 1895fa2..6e26581 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -393,6 +393,7 @@ OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace.o OBJS-$(CONFIG_TPAD_FILTER) += vf_tpad.o OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER) += vf_transpose_npp.o cuda_check.o +OBJS-$(CONFIG_TRANSPOSE_OPENCL_FILTER) += vf_transpose_opencl.o opencl.o opencl/transpose.o OBJS-$(CONFIG_TRIM_FILTER) += trim.o OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o OBJS-$(CONFIG_UNSHARP_FILTER)+= vf_unsharp.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 837c99e..a600069 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -372,6 +372,7 @@ extern AVFilter ff_vf_tonemap_opencl; extern AVFilter ff_vf_tpad; extern AVFilter ff_vf_transpose; extern AVFilter ff_vf_transpose_npp; +extern AVFilter ff_vf_transpose_opencl; extern AVFilter ff_vf_trim; extern AVFilter ff_vf_unpremultiply; extern AVFilter ff_vf_unsharp; diff --git a/libavfilter/opencl/transpose.cl b/libavfilter/opencl/transpose.cl new file mode 100644 index 000..e6388ab --- /dev/null +++ b/libavfilter/opencl/transpose.cl @@ -0,0 +1,35 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +kernel void transpose(__write_only image2d_t dst, + __read_only image2d_t src, + int dir) { +const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST); + +int2 size = get_image_dim(dst); +int x = get_global_id(0); +int y = get_global_id(1); + +int xin = (dir & 2) ? (size.y - 1 - y) : y; +int yin = (dir & 1) ? (size.x - 1 - x) : x; +float4 data = read_imagef(src, sampler, (int2)(xin, yin)); + +if (x < size.x && y < size.y) +write_imagef(dst, (int2)(x, y), data); +} diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h index 2f67d89..4118138 100644 --- a/libavfilter/opencl_source.h +++ b/libavfilter/opencl_source.h @@ -25,6 +25,7 @@ extern const char *ff_opencl_source_convolution; extern const char *ff_opencl_source_neighbor; extern const char *ff_opencl_source_overlay; extern const char *ff_opencl_source_tonemap; +extern const char *ff_opencl_source_transpose; extern const char *ff_opencl_source_unsharp; #endif /* AVFILTER_OPENCL_SOURCE_H */ diff --git a/libavfilter/transpose.h b/libavfilter/transpose.h new file mode 100644 index 000..da8b28e --- /dev/null +++ b/libavfilter/transpose.h @@ -0,0 +1,34 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is
[FFmpeg-devel] [PATCH 3/4] doc/filters: add tonemap_opencl document.
Signed-off-by: Ruiling Song --- doc/filters.texi | 96 1 file changed, 96 insertions(+) diff --git a/doc/filters.texi b/doc/filters.texi index 83df460..f884ba4 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -16387,6 +16387,7 @@ tmix=frames=3:weights="-1 2 -1":scale=1 @end example @end itemize +@anchor{tonemap} @section tonemap Tone map colors from different dynamic ranges. @@ -18440,6 +18441,101 @@ Apply sobel operator with scale set to 2 and delta set to 10 @end example @end itemize +@section tonemap_opencl + +Perform HDR(PQ/HLG) to SDR conversion with tone-mapping. + +It accepts the following parameters: + +@table @option +@item tonemap +Specify the tone-mapping operator to be used. Same as tonemap option in @ref{tonemap}. + +@item param +Tune the tone mapping algorithm. same as param option in @ref{tonemap}. + +@item desat +Apply desaturation for highlights that exceed this level of brightness. The +higher the parameter, the more color information will be preserved. This +setting helps prevent unnaturally blown-out colors for super-highlights, by +(smoothly) turning into white instead. This makes images feel more natural, +at the cost of reducing information about out-of-range colors. + +The default value is 0.5, and the algorithm here is a little different from +the cpu version tonemap currently. A setting of 0.0 disables this option. + +@item threshold +The tonemapping algorithm parameters is fine-tuned per each scene. And a threshold +is used to detect whether the scene has changed or not. If the distance beween +the current frame average brightness and the current running average exceeds +a threshold value, we would re-calculate scene average and peak brightness. +The default value is 0.2. + +@item format +Specify the output pixel format. + +Currently supported formats are: +@table @var +@item p010 +@item nv12 +@end table + +@item range, r +Set the output color range. + +Possible values are: +@table @var +@item tv/mpeg +@item pc/jpeg +@end table + +Default is same as input. + +@item primaries, p +Set the output color primaries. + +Possible values are: +@table @var +@item bt709 +@item bt2020 +@end table + +Default is same as input. + +@item transfer, t +Set the output transfer characteristics. + +Possible values are: +@table @var +@item bt709 +@item bt2020 +@end table + +Default is bt709. + +@item matrix, m +Set the output colorspace matrix. + +Possible value are: +@table @var +@item bt709 +@item bt2020 +@end table + +Default is same as input. + +@end table + +@subsection Example + +@itemize +@item +Convert HDR(PQ/HLG) video to bt2020-transfer-characteristic p010 format using linear operator. +@example +-i INPUT -vf "format=p010,hwupload,tonemap_opencl=t=bt2020:tonemap=linear:format=p010,hwdownload,format=p010" OUTPUT +@end example +@end itemize + @section unsharp_opencl Sharpen or blur the input video. -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 2/4] lavfi/opencl: Handle overlay input formats correctly.
The main input may have alpha channel, we just ignore it. Also add some checks for incompatible input formats. Signed-off-by: Ruiling Song --- libavfilter/vf_overlay_opencl.c | 58 - 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/vf_overlay_opencl.c index e9c8532..320c1a5 100644 --- a/libavfilter/vf_overlay_opencl.c +++ b/libavfilter/vf_overlay_opencl.c @@ -37,7 +37,7 @@ typedef struct OverlayOpenCLContext { FFFrameSync fs; -int nb_planes; +int nb_color_planes; int x_subsample; int y_subsample; int alpha_separate; @@ -46,6 +46,22 @@ typedef struct OverlayOpenCLContext { int y_position; } OverlayOpenCLContext; +static int has_planar_alpha(const AVPixFmtDescriptor *fmt) { +int nb_components; +int has_alpha = !!(fmt->flags & AV_PIX_FMT_FLAG_ALPHA); +if (!has_alpha) return 0; + +nb_components = fmt->nb_components; +// PAL8 +if (nb_components < 2) return 0; + +if (fmt->comp[nb_components - 1].plane > +fmt->comp[nb_components - 2].plane) +return 1; +else +return 0; +} + static int overlay_opencl_load(AVFilterContext *avctx, enum AVPixelFormat main_format, enum AVPixelFormat overlay_format) @@ -55,10 +71,13 @@ static int overlay_opencl_load(AVFilterContext *avctx, const char *source = ff_opencl_source_overlay; const char *kernel; const AVPixFmtDescriptor *main_desc, *overlay_desc; -int err, i, main_planes, overlay_planes; +int err, i, main_planes, overlay_planes, overlay_alpha, +main_planar_alpha, overlay_planar_alpha; main_desc= av_pix_fmt_desc_get(main_format); overlay_desc = av_pix_fmt_desc_get(overlay_format); +overlay_alpha = !!(overlay_desc->flags & AV_PIX_FMT_FLAG_ALPHA); +main_planar_alpha = has_planar_alpha(main_desc); main_planes = overlay_planes = 0; for (i = 0; i < main_desc->nb_components; i++) @@ -68,7 +87,7 @@ static int overlay_opencl_load(AVFilterContext *avctx, overlay_planes = FFMAX(overlay_planes, overlay_desc->comp[i].plane + 1); -ctx->nb_planes = main_planes; +ctx->nb_color_planes = main_planar_alpha ? (main_planes - 1) : main_planes; ctx->x_subsample = 1 << main_desc->log2_chroma_w; ctx->y_subsample = 1 << main_desc->log2_chroma_h; @@ -80,15 +99,30 @@ static int overlay_opencl_load(AVFilterContext *avctx, ctx->x_subsample, ctx->y_subsample); } -if (main_planes == overlay_planes) { -if (main_desc->nb_components == overlay_desc->nb_components) -kernel = "overlay_no_alpha"; -else -kernel = "overlay_internal_alpha"; +if ((main_desc->flags & AV_PIX_FMT_FLAG_RGB) != +(overlay_desc->flags & AV_PIX_FMT_FLAG_RGB)) { +av_log(avctx, AV_LOG_ERROR, "mixed YUV/RGB input formats.\n"); +return AVERROR(EINVAL); +} + +if (main_desc->log2_chroma_w != overlay_desc->log2_chroma_w || +main_desc->log2_chroma_h != overlay_desc->log2_chroma_h) { +av_log(avctx, AV_LOG_ERROR, "incompatible chroma sub-sampling.\n"); +return AVERROR(EINVAL); +} + +if (!overlay_alpha) { ctx->alpha_separate = 0; +kernel = "overlay_no_alpha"; } else { -kernel = "overlay_external_alpha"; -ctx->alpha_separate = 1; +overlay_planar_alpha = has_planar_alpha(overlay_desc); +if (overlay_planar_alpha) { +ctx->alpha_separate = 1; +kernel = "overlay_external_alpha"; +} else { +ctx->alpha_separate = 0; +kernel = "overlay_internal_alpha"; +} } av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", kernel); @@ -155,7 +189,7 @@ static int overlay_opencl_blend(FFFrameSync *fs) goto fail; } -for (plane = 0; plane < ctx->nb_planes; plane++) { +for (plane = 0; plane < ctx->nb_color_planes; plane++) { kernel_arg = 0; mem = (cl_mem)output->data[plane]; @@ -171,7 +205,7 @@ static int overlay_opencl_blend(FFFrameSync *fs) kernel_arg++; if (ctx->alpha_separate) { -mem = (cl_mem)input_overlay->data[ctx->nb_planes]; +mem = (cl_mem)input_overlay->data[ctx->nb_color_planes]; CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem); kernel_arg++; } -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 1/4] doc/filters: add document for opencl filters
Signed-off-by: Danil Iashchenko Signed-off-by: Ruiling Song --- Seems like Danil is not working on this recently. So I re-submit this patch to address the comment over overlay_opencl. Thanks! Ruiling doc/filters.texi | 486 +++ 1 file changed, 486 insertions(+) diff --git a/doc/filters.texi b/doc/filters.texi index cadf78c..83df460 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -10485,6 +10485,7 @@ A floating point number which specifies chroma temporal strength. It defaults to @var{luma_tmp}*@var{chroma_spatial}/@var{luma_spatial}. @end table +@anchor{hwdownload} @section hwdownload Download hardware frames to system memory. @@ -10575,6 +10576,7 @@ ways if there are any additional constraints on that filter's output. Do not use it without fully understanding the implications of its use. @end table +@anchor{hwupload} @section hwupload Upload system memory frames to hardware surfaces. @@ -18014,6 +18016,490 @@ pixel format "yuv422p" @var{hsub} is 2 and @var{vsub} is 1. @c man end VIDEO FILTERS +@chapter OpenCL Video Filters +@c man begin OPENCL VIDEO FILTERS + +Below is a description of the currently available OpenCL video filters. + +To enable compilation of these filters you need to configure FFmpeg with +@code{--enable-opencl}. + +Running OpenCL filters requires you to initialize a hardware device and to pass that device to all filters in any filter graph. +@table @option + +@item -init_hw_device opencl[=@var{name}][:@var{device}[,@var{key=value}...]] +Initialise a new hardware device of type @var{opencl} called @var{name}, using the +given device parameters. + +@item -filter_hw_device @var{name} +Pass the hardware device called @var{name} to all filters in any filter graph. + +@end table + +For more detailed information see @url{https://www.ffmpeg.org/ffmpeg.html#Advanced-Video-options} + +@itemize +@item +Example of choosing the first device on the second platform and running avgblur_opencl filter with default parameters on it. +@example +-init_hw_device opencl=gpu:1.0 -filter_hw_device gpu -i INPUT -vf "hwupload, avgblur_opencl, hwdownload" OUTPUT +@end example +@end itemize + +Since OpenCL filters are not able to access frame data in normal memory, all frame data needs to be uploaded(@ref{hwupload}) to hardware surfaces connected to the appropriate device before being used and then downloaded(@ref{hwdownload}) back to normal memory. Note that @ref{hwupload} will upload to a surface with the same layout as the software frame, so it may be necessary to add a @ref{format} filter immediately before to get the input into the right format and @ref{hwdownload} does not support all formats on the output - it may be necessary to insert an additional @ref{format} filter immediately following in the graph to get the output in a supported format. + +@section avgblur_opencl + +Apply average blur filter. + +The filter accepts the following options: + +@table @option +@item sizeX +Set horizontal radius size. +Range is @code{[1, 1024]} and default value is @code{1}. + +@item planes +Set which planes to filter. Default value is @code{0xf}, by which all planes are processed. + +@item sizeY +Set vertical radius size. Range is @code{[1, 1024]} and default value is @code{0}. If zero, @code{sizeX} value will be used. +@end table + +@subsection Example + +@itemize +@item +Apply average blur filter with horizontal and vertical size of 3, setting each pixel of the output to the average value of the 7x7 region centered on it in the input. For pixels on the edges of the image, the region does not extend beyond the image boundaries, and so out-of-range coordinates are not used in the calculations. +@example +-i INPUT -vf "hwupload, avgblur_opencl=3, hwdownload" OUTPUT +@end example +@end itemize + +@section boxblur_opencl + +Apply a boxblur algorithm to the input video. + +It accepts the following parameters: + +@table @option + +@item luma_radius, lr +@item luma_power, lp +@item chroma_radius, cr +@item chroma_power, cp +@item alpha_radius, ar +@item alpha_power, ap + +@end table + +A description of the accepted options follows. + +@table @option +@item luma_radius, lr +@item chroma_radius, cr +@item alpha_radius, ar +Set an expression for the box radius in pixels used for blurring the +corresponding input plane. + +The radius value must be a non-negative number, and must not be +greater than the value of the expression @code{min(w,h)/2} for the +luma and alpha planes, and of @code{min(cw,ch)/2} for the chroma +planes. + +Default value for @option{luma_radius} is "2". If not specified, +@option{chroma_radius} and @option{alpha_radius} default to the +corresponding value set for @option{luma_radius}. + +The expressions can contain the following constants: +@table @option +@item w +@item h +The input width and height in pixels. + +@item cw +@item ch +The input chroma image wid
[FFmpeg-devel] [PATCH 4/4] lavfi/opencl: remove peak option of tonemap_opencl
Since the filter use auto-calculate the peak value, the option does not work as expected. So, remove it. Signed-off-by: Ruiling Song --- libavfilter/vf_tonemap_opencl.c | 7 ++- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/libavfilter/vf_tonemap_opencl.c b/libavfilter/vf_tonemap_opencl.c index cd293c2..88b3107 100644 --- a/libavfilter/vf_tonemap_opencl.c +++ b/libavfilter/vf_tonemap_opencl.c @@ -62,7 +62,6 @@ typedef struct TonemapOpenCLContext { enum TonemapAlgorithm tonemap; enum AVPixelFormatformat; -doublepeak; doubleparam; doubledesat_param; doubletarget_peak; @@ -349,7 +348,7 @@ static int tonemap_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) AVFrame *output = NULL; cl_int cle; int err; -double peak = ctx->peak; +double peak; AVHWFramesContext *input_frames_ctx = (AVHWFramesContext*)input->hw_frames_ctx->data; @@ -371,8 +370,7 @@ static int tonemap_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) if (err < 0) goto fail; -if (!peak) -peak = ff_determine_signal_peak(input); +peak = ff_determine_signal_peak(input); if (ctx->trc != -1) output->color_trc = ctx->trc; @@ -518,7 +516,6 @@ static const AVOption tonemap_opencl_options[] = { { "limited", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, "range" }, { "full", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, "range" }, { "format","output pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, INT_MAX, FLAGS, "fmt" }, -{ "peak", "signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS }, { "param", "tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS }, { "desat", "desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0, DBL_MAX, FLAGS }, { "threshold", "scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.2}, 0, DBL_MAX, FLAGS }, -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 2/4] lavfi/opencl: Handle overlay input formats correctly.
The main input may have alpha channel, we just ignore it. Also add some checks for incompatible input formats. Signed-off-by: Ruiling Song --- libavfilter/vf_overlay_opencl.c | 58 - 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/vf_overlay_opencl.c index e9c8532..320c1a5 100644 --- a/libavfilter/vf_overlay_opencl.c +++ b/libavfilter/vf_overlay_opencl.c @@ -37,7 +37,7 @@ typedef struct OverlayOpenCLContext { FFFrameSync fs; -int nb_planes; +int nb_color_planes; int x_subsample; int y_subsample; int alpha_separate; @@ -46,6 +46,22 @@ typedef struct OverlayOpenCLContext { int y_position; } OverlayOpenCLContext; +static int has_planar_alpha(const AVPixFmtDescriptor *fmt) { +int nb_components; +int has_alpha = !!(fmt->flags & AV_PIX_FMT_FLAG_ALPHA); +if (!has_alpha) return 0; + +nb_components = fmt->nb_components; +// PAL8 +if (nb_components < 2) return 0; + +if (fmt->comp[nb_components - 1].plane > +fmt->comp[nb_components - 2].plane) +return 1; +else +return 0; +} + static int overlay_opencl_load(AVFilterContext *avctx, enum AVPixelFormat main_format, enum AVPixelFormat overlay_format) @@ -55,10 +71,13 @@ static int overlay_opencl_load(AVFilterContext *avctx, const char *source = ff_opencl_source_overlay; const char *kernel; const AVPixFmtDescriptor *main_desc, *overlay_desc; -int err, i, main_planes, overlay_planes; +int err, i, main_planes, overlay_planes, overlay_alpha, +main_planar_alpha, overlay_planar_alpha; main_desc= av_pix_fmt_desc_get(main_format); overlay_desc = av_pix_fmt_desc_get(overlay_format); +overlay_alpha = !!(overlay_desc->flags & AV_PIX_FMT_FLAG_ALPHA); +main_planar_alpha = has_planar_alpha(main_desc); main_planes = overlay_planes = 0; for (i = 0; i < main_desc->nb_components; i++) @@ -68,7 +87,7 @@ static int overlay_opencl_load(AVFilterContext *avctx, overlay_planes = FFMAX(overlay_planes, overlay_desc->comp[i].plane + 1); -ctx->nb_planes = main_planes; +ctx->nb_color_planes = main_planar_alpha ? (main_planes - 1) : main_planes; ctx->x_subsample = 1 << main_desc->log2_chroma_w; ctx->y_subsample = 1 << main_desc->log2_chroma_h; @@ -80,15 +99,30 @@ static int overlay_opencl_load(AVFilterContext *avctx, ctx->x_subsample, ctx->y_subsample); } -if (main_planes == overlay_planes) { -if (main_desc->nb_components == overlay_desc->nb_components) -kernel = "overlay_no_alpha"; -else -kernel = "overlay_internal_alpha"; +if ((main_desc->flags & AV_PIX_FMT_FLAG_RGB) != +(overlay_desc->flags & AV_PIX_FMT_FLAG_RGB)) { +av_log(avctx, AV_LOG_ERROR, "mixed YUV/RGB input formats.\n"); +return AVERROR(EINVAL); +} + +if (main_desc->log2_chroma_w != overlay_desc->log2_chroma_w || +main_desc->log2_chroma_h != overlay_desc->log2_chroma_h) { +av_log(avctx, AV_LOG_ERROR, "incompatible chroma sub-sampling.\n"); +return AVERROR(EINVAL); +} + +if (!overlay_alpha) { ctx->alpha_separate = 0; +kernel = "overlay_no_alpha"; } else { -kernel = "overlay_external_alpha"; -ctx->alpha_separate = 1; +overlay_planar_alpha = has_planar_alpha(overlay_desc); +if (overlay_planar_alpha) { +ctx->alpha_separate = 1; +kernel = "overlay_external_alpha"; +} else { +ctx->alpha_separate = 0; +kernel = "overlay_internal_alpha"; +} } av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", kernel); @@ -155,7 +189,7 @@ static int overlay_opencl_blend(FFFrameSync *fs) goto fail; } -for (plane = 0; plane < ctx->nb_planes; plane++) { +for (plane = 0; plane < ctx->nb_color_planes; plane++) { kernel_arg = 0; mem = (cl_mem)output->data[plane]; @@ -171,7 +205,7 @@ static int overlay_opencl_blend(FFFrameSync *fs) kernel_arg++; if (ctx->alpha_separate) { -mem = (cl_mem)input_overlay->data[ctx->nb_planes]; +mem = (cl_mem)input_overlay->data[ctx->nb_color_planes]; CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem); kernel_arg++; } -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 3/4] doc/filters: add tonemap_opencl document.
Signed-off-by: Ruiling Song --- doc/filters.texi | 96 1 file changed, 96 insertions(+) diff --git a/doc/filters.texi b/doc/filters.texi index 83df460..f884ba4 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -16387,6 +16387,7 @@ tmix=frames=3:weights="-1 2 -1":scale=1 @end example @end itemize +@anchor{tonemap} @section tonemap Tone map colors from different dynamic ranges. @@ -18440,6 +18441,101 @@ Apply sobel operator with scale set to 2 and delta set to 10 @end example @end itemize +@section tonemap_opencl + +Perform HDR(PQ/HLG) to SDR conversion with tone-mapping. + +It accepts the following parameters: + +@table @option +@item tonemap +Specify the tone-mapping operator to be used. Same as tonemap option in @ref{tonemap}. + +@item param +Tune the tone mapping algorithm. same as param option in @ref{tonemap}. + +@item desat +Apply desaturation for highlights that exceed this level of brightness. The +higher the parameter, the more color information will be preserved. This +setting helps prevent unnaturally blown-out colors for super-highlights, by +(smoothly) turning into white instead. This makes images feel more natural, +at the cost of reducing information about out-of-range colors. + +The default value is 0.5, and the algorithm here is a little different from +the cpu version tonemap currently. A setting of 0.0 disables this option. + +@item threshold +The tonemapping algorithm parameters is fine-tuned per each scene. And a threshold +is used to detect whether the scene has changed or not. If the distance beween +the current frame average brightness and the current running average exceeds +a threshold value, we would re-calculate scene average and peak brightness. +The default value is 0.2. + +@item format +Specify the output pixel format. + +Currently supported formats are: +@table @var +@item p010 +@item nv12 +@end table + +@item range, r +Set the output color range. + +Possible values are: +@table @var +@item tv/mpeg +@item pc/jpeg +@end table + +Default is same as input. + +@item primaries, p +Set the output color primaries. + +Possible values are: +@table @var +@item bt709 +@item bt2020 +@end table + +Default is same as input. + +@item transfer, t +Set the output transfer characteristics. + +Possible values are: +@table @var +@item bt709 +@item bt2020 +@end table + +Default is bt709. + +@item matrix, m +Set the output colorspace matrix. + +Possible value are: +@table @var +@item bt709 +@item bt2020 +@end table + +Default is same as input. + +@end table + +@subsection Example + +@itemize +@item +Convert HDR(PQ/HLG) video to bt2020-transfer-characteristic p010 format using linear operator. +@example +-i INPUT -vf "format=p010,hwupload,tonemap_opencl=t=bt2020:tonemap=linear:format=p010,hwdownload,format=p010" OUTPUT +@end example +@end itemize + @section unsharp_opencl Sharpen or blur the input video. -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 1/4] doc/filters: add document for opencl filters
Signed-off-by: Danil Iashchenko Signed-off-by: Ruiling Song --- Seems like Danil is not working on this recently. So I re-submit this patch to address the comment over overlay_opencl. Thanks! Ruiling doc/filters.texi | 486 +++ 1 file changed, 486 insertions(+) diff --git a/doc/filters.texi b/doc/filters.texi index cadf78c..83df460 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -10485,6 +10485,7 @@ A floating point number which specifies chroma temporal strength. It defaults to @var{luma_tmp}*@var{chroma_spatial}/@var{luma_spatial}. @end table +@anchor{hwdownload} @section hwdownload Download hardware frames to system memory. @@ -10575,6 +10576,7 @@ ways if there are any additional constraints on that filter's output. Do not use it without fully understanding the implications of its use. @end table +@anchor{hwupload} @section hwupload Upload system memory frames to hardware surfaces. @@ -18014,6 +18016,490 @@ pixel format "yuv422p" @var{hsub} is 2 and @var{vsub} is 1. @c man end VIDEO FILTERS +@chapter OpenCL Video Filters +@c man begin OPENCL VIDEO FILTERS + +Below is a description of the currently available OpenCL video filters. + +To enable compilation of these filters you need to configure FFmpeg with +@code{--enable-opencl}. + +Running OpenCL filters requires you to initialize a hardware device and to pass that device to all filters in any filter graph. +@table @option + +@item -init_hw_device opencl[=@var{name}][:@var{device}[,@var{key=value}...]] +Initialise a new hardware device of type @var{opencl} called @var{name}, using the +given device parameters. + +@item -filter_hw_device @var{name} +Pass the hardware device called @var{name} to all filters in any filter graph. + +@end table + +For more detailed information see @url{https://www.ffmpeg.org/ffmpeg.html#Advanced-Video-options} + +@itemize +@item +Example of choosing the first device on the second platform and running avgblur_opencl filter with default parameters on it. +@example +-init_hw_device opencl=gpu:1.0 -filter_hw_device gpu -i INPUT -vf "hwupload, avgblur_opencl, hwdownload" OUTPUT +@end example +@end itemize + +Since OpenCL filters are not able to access frame data in normal memory, all frame data needs to be uploaded(@ref{hwupload}) to hardware surfaces connected to the appropriate device before being used and then downloaded(@ref{hwdownload}) back to normal memory. Note that @ref{hwupload} will upload to a surface with the same layout as the software frame, so it may be necessary to add a @ref{format} filter immediately before to get the input into the right format and @ref{hwdownload} does not support all formats on the output - it may be necessary to insert an additional @ref{format} filter immediately following in the graph to get the output in a supported format. + +@section avgblur_opencl + +Apply average blur filter. + +The filter accepts the following options: + +@table @option +@item sizeX +Set horizontal radius size. +Range is @code{[1, 1024]} and default value is @code{1}. + +@item planes +Set which planes to filter. Default value is @code{0xf}, by which all planes are processed. + +@item sizeY +Set vertical radius size. Range is @code{[1, 1024]} and default value is @code{0}. If zero, @code{sizeX} value will be used. +@end table + +@subsection Example + +@itemize +@item +Apply average blur filter with horizontal and vertical size of 3, setting each pixel of the output to the average value of the 7x7 region centered on it in the input. For pixels on the edges of the image, the region does not extend beyond the image boundaries, and so out-of-range coordinates are not used in the calculations. +@example +-i INPUT -vf "hwupload, avgblur_opencl=3, hwdownload" OUTPUT +@end example +@end itemize + +@section boxblur_opencl + +Apply a boxblur algorithm to the input video. + +It accepts the following parameters: + +@table @option + +@item luma_radius, lr +@item luma_power, lp +@item chroma_radius, cr +@item chroma_power, cp +@item alpha_radius, ar +@item alpha_power, ap + +@end table + +A description of the accepted options follows. + +@table @option +@item luma_radius, lr +@item chroma_radius, cr +@item alpha_radius, ar +Set an expression for the box radius in pixels used for blurring the +corresponding input plane. + +The radius value must be a non-negative number, and must not be +greater than the value of the expression @code{min(w,h)/2} for the +luma and alpha planes, and of @code{min(cw,ch)/2} for the chroma +planes. + +Default value for @option{luma_radius} is "2". If not specified, +@option{chroma_radius} and @option{alpha_radius} default to the +corresponding value set for @option{luma_radius}. + +The expressions can contain the following constants: +@table @option +@item w +@item h +The input width and height in pixels. + +@item cw +@item ch +The input chroma image wid
[FFmpeg-devel] [PATCH] doc/filters: add tonemap_opencl document.
Signed-off-by: Ruiling Song --- doc/filters.texi | 158 +++ 1 file changed, 158 insertions(+) diff --git a/doc/filters.texi b/doc/filters.texi index 6695999c84..f622d03226 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -17776,6 +17776,164 @@ Apply sobel operator with scale set to 2 and delta set to 10 @end example @end itemize +@section tonemap_opencl + +Perform HDR(PQ/HLG) to SDR conversion with tone-mapping. + +It accepts the following parameters: + +@table @option +@item tonemap +Specify the tone-mapping operator to be used. + +Possible values are: +@table @var +@item none +Do not apply any tone mapping, only desaturate overbright pixels. + +@item clip +Hard-clip any out-of-range values. Use it for perfect color accuracy for +in-range values, while distorting out-of-range values. + +@item linear +Stretch the entire reference gamut to a linear multiple of the display. + +@item gamma +Fit a logarithmic transfer between the tone curves. + +@item reinhard +Preserve overall image brightness with a simple curve, using nonlinear +contrast, which results in flattening details and degrading color accuracy. + +@item hable +Preserve both dark and bright details better than @var{reinhard}, at the cost +of slightly darkening everything. Use it when detail preservation is more +important than color and brightness accuracy. + +@item mobius +Smoothly map out-of-range values, while retaining contrast and colors for +in-range material as much as possible. Use it when color accuracy is more +important than detail preservation. +@end table + +@item param +Tune the tone mapping algorithm. + +This affects the following algorithms: +@table @var +@item none +Ignored. + +@item linear +Specifies the scale factor to use while stretching. +Default to 1.0. + +@item gamma +Specifies the exponent of the function. +Default to 1.8. + +@item clip +Specify an extra linear coefficient to multiply into the signal before clipping. +Default to 1.0. + +@item reinhard +Specify the local contrast coefficient at the display peak. +Default to 0.5, which means that in-gamut values will be about half as bright +as when clipping. + +@item hable +Ignored. + +@item mobius +Specify the transition point from linear to mobius transform. Every value +below this point is guaranteed to be mapped 1:1. The higher the value, the +more accurate the result will be, at the cost of losing bright details. +Default to 0.3, which due to the steep initial slope still preserves in-range +colors fairly accurately. +@end table + +@item desat +Apply desaturation for highlights that exceed this level of brightness. The +higher the parameter, the more color information will be preserved. This +setting helps prevent unnaturally blown-out colors for super-highlights, by +(smoothly) turning into white instead. This makes images feel more natural, +at the cost of reducing information about out-of-range colors. + +The default value is 0.5, and the algorithm here is a little different from +the cpu version tonemap currently. A setting of 0.0 disables this option. + +@item threshold +The tonemapping algorithm parameters is fine-tuned per each scene. And a threshold +is used to detect whether the scene has changed or not. If the distance beween +the current frame average brightness and the current running average exceeds +a threshold value, we would re-calculate scene average and peak brightness. +The default value is 0.2. + +@item format +Specify the output pixel format. + +Currently supported formats are: +@table @var +@item p010 +@item nv12 +@end table + +@item range, r +Set the output color range. + +Possible values are: +@table @var +@item tv/mpeg +@item pc/jpeg +@end table + +Default is same as input. + +@item primaries, p +Set the output color primaries. + +Possible values are: +@table @var +@item bt709 +@item bt2020 +@end table + +Default is same as input. + +@item transfer, t +Set the output transfer characteristics. + +Possible values are: +@table @var +@item bt709 +@item bt2020 +@end table + +Default is bt709. + +@item matrix, m +Set the output colorspace matrix. + +Possible value are: +@table @var +@item bt709 +@item bt2020 +@end table + +Default is same as input. + +@end table + +@subsection Example + +@itemize +@item +Convert HDR(PQ/HLG) video to bt2020-transfer-characteristic p010 format using linear operator. +@example +-i INPUT -vf "hwupload, format=p010,tonemap_opencl=t=bt2020:tonemap=linear:format=p010, hwdownload" OUTPUT +@end example +@end itemize + @section unsharp_opencl Sharpen or blur the input video. -- 2.17.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH] lavfi/colorspace: Suppress compile warning on incompatible pointer type.
Signed-off-by: Ruiling Song --- Sorry I have not verified this patch, I don't know how to reproduce the gcc warning. Thanks! Ruiling libavfilter/vf_colorspace.c | 16 libavfilter/vf_tonemap_opencl.c | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/libavfilter/vf_colorspace.c b/libavfilter/vf_colorspace.c index 56621d15e2..69c7674a7b 100644 --- a/libavfilter/vf_colorspace.c +++ b/libavfilter/vf_colorspace.c @@ -382,8 +382,8 @@ static void fill_whitepoint_conv_table(double out[3][3], enum WhitepointAdaptati fac[1][1] = gd / gs; fac[2][2] = bd / bs; fac[0][1] = fac[0][2] = fac[1][0] = fac[1][2] = fac[2][0] = fac[2][1] = 0.0; -ff_matrix_mul_3x3(tmp, ma, fac); -ff_matrix_mul_3x3(out, tmp, mai); +ff_matrix_mul_3x3(tmp, ma, (void *)fac); +ff_matrix_mul_3x3(out, (void *)tmp, (void *)mai); } static void apply_lut(int16_t *buf[3], ptrdiff_t stride, @@ -589,7 +589,7 @@ static int create_filtergraph(AVFilterContext *ctx, wp_out = &whitepoint_coefficients[s->out_primaries->wp]; wp_in = &whitepoint_coefficients[s->in_primaries->wp]; ff_fill_rgb2xyz_table(&s->out_primaries->coeff, wp_out, rgb2xyz); -ff_matrix_invert_3x3(rgb2xyz, xyz2rgb); +ff_matrix_invert_3x3((void *)rgb2xyz, xyz2rgb); ff_fill_rgb2xyz_table(&s->in_primaries->coeff, wp_in, rgb2xyz); if (s->out_primaries->wp != s->in_primaries->wp && s->wp_adapt != WP_ADAPT_IDENTITY) { @@ -597,10 +597,10 @@ static int create_filtergraph(AVFilterContext *ctx, fill_whitepoint_conv_table(wpconv, s->wp_adapt, s->in_primaries->wp, s->out_primaries->wp); -ff_matrix_mul_3x3(tmp, rgb2xyz, wpconv); -ff_matrix_mul_3x3(rgb2rgb, tmp, xyz2rgb); +ff_matrix_mul_3x3(tmp, (void *)rgb2xyz, (void *)wpconv); +ff_matrix_mul_3x3(rgb2rgb, (void *)tmp, (void *)xyz2rgb); } else { -ff_matrix_mul_3x3(rgb2rgb, rgb2xyz, xyz2rgb); +ff_matrix_mul_3x3(rgb2rgb, (void *)rgb2xyz, (void *)xyz2rgb); } for (m = 0; m < 3; m++) for (n = 0; n < 3; n++) { @@ -725,7 +725,7 @@ static int create_filtergraph(AVFilterContext *ctx, for (n = 0; n < 8; n++) s->yuv_offset[0][n] = off; fill_rgb2yuv_table(s->in_lumacoef, rgb2yuv); -ff_matrix_invert_3x3(rgb2yuv, yuv2rgb); +ff_matrix_invert_3x3((void *)rgb2yuv, yuv2rgb); bits = 1 << (in_desc->comp[0].depth - 1); for (n = 0; n < 3; n++) { for (in_rng = s->in_y_rng, m = 0; m < 3; m++, in_rng = s->in_uv_rng) { @@ -781,7 +781,7 @@ static int create_filtergraph(AVFilterContext *ctx, double yuv2yuv[3][3]; int in_rng, out_rng; -ff_matrix_mul_3x3(yuv2yuv, yuv2rgb, rgb2yuv); +ff_matrix_mul_3x3(yuv2yuv, (void *)yuv2rgb, (void *)rgb2yuv); for (out_rng = s->out_y_rng, m = 0; m < 3; m++, out_rng = s->out_uv_rng) { for (in_rng = s->in_y_rng, n = 0; n < 3; n++, in_rng = s->in_uv_rng) { s->yuv2yuv_coeffs[m][n][0] = diff --git a/libavfilter/vf_tonemap_opencl.c b/libavfilter/vf_tonemap_opencl.c index 241f95e6c3..0cb2da0da2 100644 --- a/libavfilter/vf_tonemap_opencl.c +++ b/libavfilter/vf_tonemap_opencl.c @@ -125,9 +125,9 @@ static void get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries o double rgb2xyz[3][3], xyz2rgb[3][3]; ff_fill_rgb2xyz_table(&primaries_table[out], &whitepoint_table[out], rgb2xyz); -ff_matrix_invert_3x3(rgb2xyz, xyz2rgb); +ff_matrix_invert_3x3((void *)rgb2xyz, xyz2rgb); ff_fill_rgb2xyz_table(&primaries_table[in], &whitepoint_table[in], rgb2xyz); -ff_matrix_mul_3x3(rgb2rgb, rgb2xyz, xyz2rgb); +ff_matrix_mul_3x3(rgb2rgb, (void *)rgb2xyz, (void *)xyz2rgb); } #define OPENCL_SOURCE_NB 3 -- 2.17.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH v2 2/2] lavfi/vf_avgblur_opencl: remove useless clFinish().
The very last clFinish() should be ok. Signed-off-by: Ruiling Song --- libavfilter/vf_avgblur_opencl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_opencl.c index bc6bcab..99ed1ca 100644 --- a/libavfilter/vf_avgblur_opencl.c +++ b/libavfilter/vf_avgblur_opencl.c @@ -228,7 +228,6 @@ static int avgblur_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) 0, NULL, NULL); CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue horizontal " "kernel: %d.\n", cle); -cle = clFinish(ctx->command_queue); err = ff_opencl_filter_work_size_from_image(avctx, global_work, i == 0 ? output : intermediate, p, 0); -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH v2 1/2] lavfi/opencl: add macro for opencl error handling.
Signed-off-by: Ruiling Song --- libavfilter/opencl.h| 11 + libavfilter/vf_avgblur_opencl.c | 45 +-- libavfilter/vf_overlay_opencl.c | 29 +-- libavfilter/vf_program_opencl.c | 14 ++- libavfilter/vf_tonemap_opencl.c | 33 +- libavfilter/vf_unsharp_opencl.c | 52 + 6 files changed, 47 insertions(+), 137 deletions(-) diff --git a/libavfilter/opencl.h b/libavfilter/opencl.h index 7441b11..0ed360b 100644 --- a/libavfilter/opencl.h +++ b/libavfilter/opencl.h @@ -112,5 +112,16 @@ int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment); +/** + * A helper macro to handle OpenCL error. It will assign errcode to + * variable err, log error msg, and jump to fail label on error. + */ +#define CL_FAIL_ON_ERROR(errcode, ...) do {\ +if (cle != CL_SUCCESS) {\ +av_log(avctx, AV_LOG_ERROR, __VA_ARGS__);\ +err = errcode;\ +goto fail;\ +}\ +} while(0) #endif /* AVFILTER_OPENCL_H */ diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_opencl.c index d1d3eb1..bc6bcab 100644 --- a/libavfilter/vf_avgblur_opencl.c +++ b/libavfilter/vf_avgblur_opencl.c @@ -64,26 +64,16 @@ static int avgblur_opencl_init(AVFilterContext *avctx) ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context, ctx->ocf.hwctx->device_id, 0, &cle); -if (!ctx->command_queue) { -av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL " - "command queue: %d.\n", cle); -err = AVERROR(EIO); -goto fail; -} +CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL " + "command queue %d.\n", cle); ctx->kernel_horiz = clCreateKernel(ctx->ocf.program,"avgblur_horiz", &cle); -if (!ctx->kernel_horiz) { -av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle); -err = AVERROR(EIO); -goto fail; -} +CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create horizontal " + "kernel %d.\n", cle); ctx->kernel_vert = clCreateKernel(ctx->ocf.program,"avgblur_vert", &cle); -if (!ctx->kernel_vert) { -av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle); -err = AVERROR(EIO); -goto fail; -} +CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create vertical " + "kernel %d.\n", cle); ctx->initialised = 1; return 0; @@ -236,12 +226,8 @@ static int avgblur_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_horiz, 2, NULL, global_work, NULL, 0, NULL, NULL); -if (cle != CL_SUCCESS) { -av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n", - cle); -err = AVERROR(EIO); -goto fail; -} +CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue horizontal " + "kernel: %d.\n", cle); cle = clFinish(ctx->command_queue); err = ff_opencl_filter_work_size_from_image(avctx, global_work, @@ -259,22 +245,13 @@ static int avgblur_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_vert, 2, NULL, global_work, NULL, 0, NULL, NULL); -if (cle != CL_SUCCESS) { -av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n", - cle); -err = AVERROR(EIO); -goto fail; -} +CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue vertical " + "kernel: %d.\n", cle); } } cle = clFinish(ctx->command_queue); -if (cle != CL_SUCCESS) { -av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue: %d.\n", - cle); -err = AVERROR(EIO); -goto fail; -} +CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle); err = av_frame_copy_props(output, input); if (err < 0) diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/v
[FFmpeg-devel] [PATCH 2/2] lavfi/vf_avgblur_opencl: remove useless clFinish().
The very last clFinish() should be ok. Signed-off-by: Ruiling Song --- libavfilter/vf_avgblur_opencl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_opencl.c index bc6bcab..99ed1ca 100644 --- a/libavfilter/vf_avgblur_opencl.c +++ b/libavfilter/vf_avgblur_opencl.c @@ -228,7 +228,6 @@ static int avgblur_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) 0, NULL, NULL); CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue horizontal " "kernel: %d.\n", cle); -cle = clFinish(ctx->command_queue); err = ff_opencl_filter_work_size_from_image(avctx, global_work, i == 0 ? output : intermediate, p, 0); -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 1/2] lavfi/opencl: add macro for opencl error handling.
Signed-off-by: Ruiling Song --- libavfilter/opencl.h| 4 ++-- libavfilter/vf_avgblur_opencl.c | 45 +-- libavfilter/vf_overlay_opencl.c | 29 +-- libavfilter/vf_program_opencl.c | 14 ++- libavfilter/vf_tonemap_opencl.c | 33 +- libavfilter/vf_unsharp_opencl.c | 52 + 6 files changed, 38 insertions(+), 139 deletions(-) diff --git a/libavfilter/opencl.h b/libavfilter/opencl.h index fd76f72..0ed360b 100644 --- a/libavfilter/opencl.h +++ b/libavfilter/opencl.h @@ -116,9 +116,9 @@ int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, * A helper macro to handle OpenCL error. It will assign errcode to * variable err, log error msg, and jump to fail label on error. */ -#define OCL_FAIL_ON_ERR(logctx, cle, errcode, ...) do {\ +#define CL_FAIL_ON_ERROR(errcode, ...) do {\ if (cle != CL_SUCCESS) {\ -av_log(logctx, AV_LOG_ERROR, __VA_ARGS__);\ +av_log(avctx, AV_LOG_ERROR, __VA_ARGS__);\ err = errcode;\ goto fail;\ }\ diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_opencl.c index d1d3eb1..bc6bcab 100644 --- a/libavfilter/vf_avgblur_opencl.c +++ b/libavfilter/vf_avgblur_opencl.c @@ -64,26 +64,16 @@ static int avgblur_opencl_init(AVFilterContext *avctx) ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context, ctx->ocf.hwctx->device_id, 0, &cle); -if (!ctx->command_queue) { -av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL " - "command queue: %d.\n", cle); -err = AVERROR(EIO); -goto fail; -} +CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL " + "command queue %d.\n", cle); ctx->kernel_horiz = clCreateKernel(ctx->ocf.program,"avgblur_horiz", &cle); -if (!ctx->kernel_horiz) { -av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle); -err = AVERROR(EIO); -goto fail; -} +CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create horizontal " + "kernel %d.\n", cle); ctx->kernel_vert = clCreateKernel(ctx->ocf.program,"avgblur_vert", &cle); -if (!ctx->kernel_vert) { -av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle); -err = AVERROR(EIO); -goto fail; -} +CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create vertical " + "kernel %d.\n", cle); ctx->initialised = 1; return 0; @@ -236,12 +226,8 @@ static int avgblur_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_horiz, 2, NULL, global_work, NULL, 0, NULL, NULL); -if (cle != CL_SUCCESS) { -av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n", - cle); -err = AVERROR(EIO); -goto fail; -} +CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue horizontal " + "kernel: %d.\n", cle); cle = clFinish(ctx->command_queue); err = ff_opencl_filter_work_size_from_image(avctx, global_work, @@ -259,22 +245,13 @@ static int avgblur_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_vert, 2, NULL, global_work, NULL, 0, NULL, NULL); -if (cle != CL_SUCCESS) { -av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n", - cle); -err = AVERROR(EIO); -goto fail; -} +CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue vertical " + "kernel: %d.\n", cle); } } cle = clFinish(ctx->command_queue); -if (cle != CL_SUCCESS) { -av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue: %d.\n", - cle); -err = AVERROR(EIO); -goto fail; -} +CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle); err = av_frame_copy_props(output, input); if (err < 0) diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/vf_overlay_opencl.c index 556ce35..e9c8532 100644 --- a/libavfilter/vf_overlay_opencl.c +++ b/libavfilter/vf_overlay_opencl.c @@ -100,19 +100,11 @@
[FFmpeg-devel] [PATCH v4 1/2] lavfi: add opencl tonemap filter.
This filter does HDR(HDR10/HLG) to SDR conversion with tone-mapping. An example command to use this filter with vaapi codecs: FFMPEG -init_hw_device vaapi=va:/dev/dri/renderD128 -init_hw_device \ opencl=ocl@va -hwaccel vaapi -hwaccel_device va -hwaccel_output_format \ vaapi -i INPUT -filter_hw_device ocl -filter_complex \ '[0:v]hwmap,tonemap_opencl=t=bt2020:tonemap=linear:format=p010[x1]; \ [x1]hwmap=derive_device=vaapi:reverse=1' -c:v hevc_vaapi -profile 2 OUTPUT Signed-off-by: Ruiling Song --- As I didn't receive any other comment on v3, this version only fix the comment from Michael. And also include some little change to leverage CL_SET_KERNEL_ARG() macro. Thanks! Ruiling configure | 1 + libavfilter/Makefile| 2 + libavfilter/allfilters.c| 1 + libavfilter/colorspace.c| 90 + libavfilter/colorspace.h| 41 +++ libavfilter/opencl/colorspace_common.cl | 220 +++ libavfilter/opencl/tonemap.cl | 272 ++ libavfilter/opencl_source.h | 2 + libavfilter/vf_tonemap_opencl.c | 624 9 files changed, 1253 insertions(+) create mode 100644 libavfilter/colorspace.c create mode 100644 libavfilter/colorspace.h create mode 100644 libavfilter/opencl/colorspace_common.cl create mode 100644 libavfilter/opencl/tonemap.cl create mode 100644 libavfilter/vf_tonemap_opencl.c diff --git a/configure b/configure index 333e326..d9c5d63 100755 --- a/configure +++ b/configure @@ -3411,6 +3411,7 @@ tinterlace_filter_deps="gpl" tinterlace_merge_test_deps="tinterlace_filter" tinterlace_pad_test_deps="tinterlace_filter" tonemap_filter_deps="const_nan" +tonemap_opencl_filter_deps="opencl const_nan" unsharp_opencl_filter_deps="opencl" uspp_filter_deps="gpl avcodec" vaguedenoiser_filter_deps="gpl" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 5b4be49..d2c85cf 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -356,6 +356,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o OBJS-$(CONFIG_TLUT2_FILTER) += vf_lut2.o framesync.o OBJS-$(CONFIG_TMIX_FILTER) += vf_mix.o framesync.o OBJS-$(CONFIG_TONEMAP_FILTER)+= vf_tonemap.o +OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace.o opencl.o \ +opencl/tonemap.o opencl/colorspace_common.o OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o OBJS-$(CONFIG_TRIM_FILTER) += trim.o OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index f2d27d2..fa85c29 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -345,6 +345,7 @@ extern AVFilter ff_vf_tinterlace; extern AVFilter ff_vf_tlut2; extern AVFilter ff_vf_tmix; extern AVFilter ff_vf_tonemap; +extern AVFilter ff_vf_tonemap_opencl; extern AVFilter ff_vf_transpose; extern AVFilter ff_vf_trim; extern AVFilter ff_vf_unpremultiply; diff --git a/libavfilter/colorspace.c b/libavfilter/colorspace.c new file mode 100644 index 000..7fd7bdf --- /dev/null +++ b/libavfilter/colorspace.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016 Ronald S. Bultje + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "colorspace.h" + + +void invert_matrix3x3(const double in[3][3], double out[3][3]) +{ +double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2], + m10 = in[1][0], m11 = in[1][1], m12 = in[1][2], + m20 = in[2][0], m21 = in[2][1], m22 = in[2][2]; +int i, j; +double det; + +out[0][0] = (m11 * m22 - m21 * m12); +out[0][1] = -(m01 * m22 - m21 * m02); +out[0][2] = (m01 * m12 - m11 * m02); +out[1][0] = -(m10 * m22 - m20 * m12); +out[1][1] = (m00 * m22 - m20 * m02); +out[1][2] = -(m00 * m12 - m10 * m02); +out[2][0] = (m10 * m21 - m20 * m11); +out[2][1] = -(m00 * m21 - m20 * m01); +out[2][2] = (m00 * m11 - m10 * m01); + +det = m00 * out[0][0] + m10 * o
[FFmpeg-devel] [PATCH v4 2/2] lavfi: make vf_colorspace use functions from colorspace.c
These functions are shared among colorspace related filters. Signed-off-by: Ruiling Song --- libavfilter/Makefile| 2 +- libavfilter/vf_colorspace.c | 118 +--- 2 files changed, 23 insertions(+), 97 deletions(-) diff --git a/libavfilter/Makefile b/libavfilter/Makefile index d2c85cf..c20c270 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -166,7 +166,7 @@ OBJS-$(CONFIG_COLORCHANNELMIXER_FILTER) += vf_colorchannelmixer.o OBJS-$(CONFIG_COLORKEY_FILTER) += vf_colorkey.o OBJS-$(CONFIG_COLORLEVELS_FILTER)+= vf_colorlevels.o OBJS-$(CONFIG_COLORMATRIX_FILTER)+= vf_colormatrix.o -OBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o colorspacedsp.o +OBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o colorspace.o colorspacedsp.o OBJS-$(CONFIG_CONVOLUTION_FILTER)+= vf_convolution.o OBJS-$(CONFIG_CONVOLUTION_OPENCL_FILTER) += vf_convolution_opencl.o opencl.o \ opencl/convolution.o diff --git a/libavfilter/vf_colorspace.c b/libavfilter/vf_colorspace.c index 71ea08a..b593215 100644 --- a/libavfilter/vf_colorspace.c +++ b/libavfilter/vf_colorspace.c @@ -33,6 +33,7 @@ #include "formats.h" #include "internal.h" #include "video.h" +#include "colorspace.h" enum DitherMode { DITHER_NONE, @@ -110,21 +111,13 @@ static const enum AVColorSpace default_csp[CS_NB + 1] = { struct ColorPrimaries { enum Whitepoint wp; -double xr, yr, xg, yg, xb, yb; +struct PrimaryCoefficients coeff; }; struct TransferCharacteristics { double alpha, beta, gamma, delta; }; -struct LumaCoefficients { -double cr, cg, cb; -}; - -struct WhitepointCoefficients { -double xw, yw; -}; - typedef struct ColorSpaceContext { const AVClass *class; @@ -286,57 +279,30 @@ static const struct WhitepointCoefficients whitepoint_coefficients[WP_NB] = { }; static const struct ColorPrimaries color_primaries[AVCOL_PRI_NB] = { -[AVCOL_PRI_BT709] = { WP_D65, 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 }, -[AVCOL_PRI_BT470M]= { WP_C, 0.670, 0.330, 0.210, 0.710, 0.140, 0.080 }, -[AVCOL_PRI_BT470BG] = { WP_D65, 0.640, 0.330, 0.290, 0.600, 0.150, 0.060,}, -[AVCOL_PRI_SMPTE170M] = { WP_D65, 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 }, -[AVCOL_PRI_SMPTE240M] = { WP_D65, 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 }, -[AVCOL_PRI_SMPTE428] = { WP_E, 0.735, 0.265, 0.274, 0.718, 0.167, 0.009 }, -[AVCOL_PRI_SMPTE431] = { WP_DCI, 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 }, -[AVCOL_PRI_SMPTE432] = { WP_D65, 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 }, -[AVCOL_PRI_FILM] = { WP_C, 0.681, 0.319, 0.243, 0.692, 0.145, 0.049 }, -[AVCOL_PRI_BT2020]= { WP_D65, 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 }, -[AVCOL_PRI_JEDEC_P22] = { WP_D65, 0.630, 0.340, 0.295, 0.605, 0.155, 0.077 }, +[AVCOL_PRI_BT709] = { WP_D65, { 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 } }, +[AVCOL_PRI_BT470M]= { WP_C, { 0.670, 0.330, 0.210, 0.710, 0.140, 0.080 } }, +[AVCOL_PRI_BT470BG] = { WP_D65, { 0.640, 0.330, 0.290, 0.600, 0.150, 0.060 } }, +[AVCOL_PRI_SMPTE170M] = { WP_D65, { 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 } }, +[AVCOL_PRI_SMPTE240M] = { WP_D65, { 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 } }, +[AVCOL_PRI_SMPTE428] = { WP_E, { 0.735, 0.265, 0.274, 0.718, 0.167, 0.009 } }, +[AVCOL_PRI_SMPTE431] = { WP_DCI, { 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 } }, +[AVCOL_PRI_SMPTE432] = { WP_D65, { 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 } }, +[AVCOL_PRI_FILM] = { WP_C, { 0.681, 0.319, 0.243, 0.692, 0.145, 0.049 } }, +[AVCOL_PRI_BT2020]= { WP_D65, { 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 } }, +[AVCOL_PRI_JEDEC_P22] = { WP_D65, { 0.630, 0.340, 0.295, 0.605, 0.155, 0.077 } }, }; static const struct ColorPrimaries *get_color_primaries(enum AVColorPrimaries prm) { -const struct ColorPrimaries *coeffs; +const struct ColorPrimaries *p; if (prm >= AVCOL_PRI_NB) return NULL; -coeffs = &color_primaries[prm]; -if (!coeffs->xr) +p = &color_primaries[prm]; +if (!p->coeff.xr) return NULL; -return coeffs; -} - -static void invert_matrix3x3(const double in[3][3], double out[3][3]) -{ -double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2], - m10 = in[1][0], m11 = in[1][1], m12 = in[1][2], - m20 = in[2][0], m21 = in[2][1], m22 = in[2][2]; -int i, j; -double det; - -out[0][0] = (m11 * m22 - m21 * m12); -out[0][1] = -(m01 * m22 - m21 * m02); -out[0][2] = (m01 * m12 - m11 * m02); -out[1][0] = -(m10 * m22 - m20 * m12); -out[1][1] = (m00 * m22 - m20 * m02); -out[1][2] = -(m00 * m12 - m10 * m02); -out[2][0] = (m10 * m21 - m20 * m11); -out[2][1]
[FFmpeg-devel] [PATCH] lavfi: add helper macro for OpenCL error handling.
Signed-off-by: Ruiling Song --- I am not sure whether do you think this would be useful? the main purpose is to make OpenCL error check code simpler. If we think this is good, I can go to replace current OpenCL filters to use this macro. for example: if (cle != CL_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n", cle); err = AVERROR(EIO); goto fail; } can be replaced with: OCL_FAIL_ON_ERR(avctx, cle, AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle); Thanks! Ruiling libavfilter/opencl.h | 11 +++ 1 file changed, 11 insertions(+) diff --git a/libavfilter/opencl.h b/libavfilter/opencl.h index c0a4519..c33df1c 100644 --- a/libavfilter/opencl.h +++ b/libavfilter/opencl.h @@ -97,5 +97,16 @@ int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment); +/** + * A helper macro to handle OpenCL error. It will assign errcode to + * variable err, log error msg, and jump to fail label on error. + */ +#define OCL_FAIL_ON_ERR(logctx, cle, errcode, ...) do {\ +if (cle != CL_SUCCESS) {\ +av_log(logctx, AV_LOG_ERROR, __VA_ARGS__);\ +err = errcode;\ +goto fail;\ +}\ +} while(0) #endif /* AVFILTER_OPENCL_H */ -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH] lavu: add calling convention for OpenCL callback.
This fix a build error on Windows: C2440: connot convert from 'void (__cdecl *) (...)' to 'void (__stdcall *)(...)'. Signed-off-by: Ruiling Song --- libavutil/hwcontext_opencl.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libavutil/hwcontext_opencl.c b/libavutil/hwcontext_opencl.c index 43b5c5a..e08d7bc 100644 --- a/libavutil/hwcontext_opencl.c +++ b/libavutil/hwcontext_opencl.c @@ -141,9 +141,10 @@ typedef struct OpenCLFramesContext { } OpenCLFramesContext; -static void opencl_error_callback(const char *errinfo, - const void *private_info, size_t cb, - void *user_data) +static void CL_CALLBACK opencl_error_callback(const char *errinfo, + const void *private_info, + size_t cb, + void *user_data) { AVHWDeviceContext *ctx = user_data; av_log(ctx, AV_LOG_ERROR, "OpenCL error: %s\n", errinfo); -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH v3 2/2] lavfi: make vf_colorspace use functions from colorspace.c
These functions are shared among colorspace related filters. Signed-off-by: Ruiling Song --- libavfilter/Makefile| 2 +- libavfilter/vf_colorspace.c | 118 +--- 2 files changed, 23 insertions(+), 97 deletions(-) diff --git a/libavfilter/Makefile b/libavfilter/Makefile index d2c85cf..c20c270 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -166,7 +166,7 @@ OBJS-$(CONFIG_COLORCHANNELMIXER_FILTER) += vf_colorchannelmixer.o OBJS-$(CONFIG_COLORKEY_FILTER) += vf_colorkey.o OBJS-$(CONFIG_COLORLEVELS_FILTER)+= vf_colorlevels.o OBJS-$(CONFIG_COLORMATRIX_FILTER)+= vf_colormatrix.o -OBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o colorspacedsp.o +OBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o colorspace.o colorspacedsp.o OBJS-$(CONFIG_CONVOLUTION_FILTER)+= vf_convolution.o OBJS-$(CONFIG_CONVOLUTION_OPENCL_FILTER) += vf_convolution_opencl.o opencl.o \ opencl/convolution.o diff --git a/libavfilter/vf_colorspace.c b/libavfilter/vf_colorspace.c index 71ea08a..b593215 100644 --- a/libavfilter/vf_colorspace.c +++ b/libavfilter/vf_colorspace.c @@ -33,6 +33,7 @@ #include "formats.h" #include "internal.h" #include "video.h" +#include "colorspace.h" enum DitherMode { DITHER_NONE, @@ -110,21 +111,13 @@ static const enum AVColorSpace default_csp[CS_NB + 1] = { struct ColorPrimaries { enum Whitepoint wp; -double xr, yr, xg, yg, xb, yb; +struct PrimaryCoefficients coeff; }; struct TransferCharacteristics { double alpha, beta, gamma, delta; }; -struct LumaCoefficients { -double cr, cg, cb; -}; - -struct WhitepointCoefficients { -double xw, yw; -}; - typedef struct ColorSpaceContext { const AVClass *class; @@ -286,57 +279,30 @@ static const struct WhitepointCoefficients whitepoint_coefficients[WP_NB] = { }; static const struct ColorPrimaries color_primaries[AVCOL_PRI_NB] = { -[AVCOL_PRI_BT709] = { WP_D65, 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 }, -[AVCOL_PRI_BT470M]= { WP_C, 0.670, 0.330, 0.210, 0.710, 0.140, 0.080 }, -[AVCOL_PRI_BT470BG] = { WP_D65, 0.640, 0.330, 0.290, 0.600, 0.150, 0.060,}, -[AVCOL_PRI_SMPTE170M] = { WP_D65, 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 }, -[AVCOL_PRI_SMPTE240M] = { WP_D65, 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 }, -[AVCOL_PRI_SMPTE428] = { WP_E, 0.735, 0.265, 0.274, 0.718, 0.167, 0.009 }, -[AVCOL_PRI_SMPTE431] = { WP_DCI, 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 }, -[AVCOL_PRI_SMPTE432] = { WP_D65, 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 }, -[AVCOL_PRI_FILM] = { WP_C, 0.681, 0.319, 0.243, 0.692, 0.145, 0.049 }, -[AVCOL_PRI_BT2020]= { WP_D65, 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 }, -[AVCOL_PRI_JEDEC_P22] = { WP_D65, 0.630, 0.340, 0.295, 0.605, 0.155, 0.077 }, +[AVCOL_PRI_BT709] = { WP_D65, { 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 } }, +[AVCOL_PRI_BT470M]= { WP_C, { 0.670, 0.330, 0.210, 0.710, 0.140, 0.080 } }, +[AVCOL_PRI_BT470BG] = { WP_D65, { 0.640, 0.330, 0.290, 0.600, 0.150, 0.060 } }, +[AVCOL_PRI_SMPTE170M] = { WP_D65, { 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 } }, +[AVCOL_PRI_SMPTE240M] = { WP_D65, { 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 } }, +[AVCOL_PRI_SMPTE428] = { WP_E, { 0.735, 0.265, 0.274, 0.718, 0.167, 0.009 } }, +[AVCOL_PRI_SMPTE431] = { WP_DCI, { 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 } }, +[AVCOL_PRI_SMPTE432] = { WP_D65, { 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 } }, +[AVCOL_PRI_FILM] = { WP_C, { 0.681, 0.319, 0.243, 0.692, 0.145, 0.049 } }, +[AVCOL_PRI_BT2020]= { WP_D65, { 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 } }, +[AVCOL_PRI_JEDEC_P22] = { WP_D65, { 0.630, 0.340, 0.295, 0.605, 0.155, 0.077 } }, }; static const struct ColorPrimaries *get_color_primaries(enum AVColorPrimaries prm) { -const struct ColorPrimaries *coeffs; +const struct ColorPrimaries *p; if (prm >= AVCOL_PRI_NB) return NULL; -coeffs = &color_primaries[prm]; -if (!coeffs->xr) +p = &color_primaries[prm]; +if (!p->coeff.xr) return NULL; -return coeffs; -} - -static void invert_matrix3x3(const double in[3][3], double out[3][3]) -{ -double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2], - m10 = in[1][0], m11 = in[1][1], m12 = in[1][2], - m20 = in[2][0], m21 = in[2][1], m22 = in[2][2]; -int i, j; -double det; - -out[0][0] = (m11 * m22 - m21 * m12); -out[0][1] = -(m01 * m22 - m21 * m02); -out[0][2] = (m01 * m12 - m11 * m02); -out[1][0] = -(m10 * m22 - m20 * m12); -out[1][1] = (m00 * m22 - m20 * m02); -out[1][2] = -(m00 * m12 - m10 * m02); -out[2][0] = (m10 * m21 - m20 * m11); -out[2][1]
[FFmpeg-devel] [PATCH v3 1/2] lavfi: add opencl tonemap filter.
This filter does HDR(HDR10/HLG) to SDR conversion with tone-mapping. An example command to use this filter with vaapi codecs: FFMPEG -init_hw_device vaapi=va:/dev/dri/renderD128 -init_hw_device \ opencl=ocl@va -hwaccel vaapi -hwaccel_device va -hwaccel_output_format \ vaapi -i INPUT -filter_hw_device ocl -filter_complex \ '[0:v]hwmap,tonemap_opencl=t=bt2020:tonemap=linear:format=p010[x1]; \ [x1]hwmap=derive_device=vaapi:reverse=1' -c:v hevc_vaapi -profile 2 OUTPUT Signed-off-by: Ruiling Song --- this version mainly address Mark's comments on v2. Thanks! Ruiling configure | 1 + libavfilter/Makefile| 2 + libavfilter/allfilters.c| 1 + libavfilter/colorspace.c| 90 + libavfilter/colorspace.h| 41 ++ libavfilter/opencl/colorspace_common.cl | 220 +++ libavfilter/opencl/tonemap.cl | 272 + libavfilter/opencl_source.h | 2 + libavfilter/vf_tonemap_opencl.c | 657 9 files changed, 1286 insertions(+) create mode 100644 libavfilter/colorspace.c create mode 100644 libavfilter/colorspace.h create mode 100644 libavfilter/opencl/colorspace_common.cl create mode 100644 libavfilter/opencl/tonemap.cl create mode 100644 libavfilter/vf_tonemap_opencl.c diff --git a/configure b/configure index 53224f0..4ff651f 100755 --- a/configure +++ b/configure @@ -3410,6 +3410,7 @@ tinterlace_filter_deps="gpl" tinterlace_merge_test_deps="tinterlace_filter" tinterlace_pad_test_deps="tinterlace_filter" tonemap_filter_deps="const_nan" +tonemap_opencl_filter_deps="opencl const_nan" unsharp_opencl_filter_deps="opencl" uspp_filter_deps="gpl avcodec" vaguedenoiser_filter_deps="gpl" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 5b4be49..d2c85cf 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -356,6 +356,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o OBJS-$(CONFIG_TLUT2_FILTER) += vf_lut2.o framesync.o OBJS-$(CONFIG_TMIX_FILTER) += vf_mix.o framesync.o OBJS-$(CONFIG_TONEMAP_FILTER)+= vf_tonemap.o +OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace.o opencl.o \ +opencl/tonemap.o opencl/colorspace_common.o OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o OBJS-$(CONFIG_TRIM_FILTER) += trim.o OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index f2d27d2..fa85c29 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -345,6 +345,7 @@ extern AVFilter ff_vf_tinterlace; extern AVFilter ff_vf_tlut2; extern AVFilter ff_vf_tmix; extern AVFilter ff_vf_tonemap; +extern AVFilter ff_vf_tonemap_opencl; extern AVFilter ff_vf_transpose; extern AVFilter ff_vf_trim; extern AVFilter ff_vf_unpremultiply; diff --git a/libavfilter/colorspace.c b/libavfilter/colorspace.c new file mode 100644 index 000..7fd7bdf --- /dev/null +++ b/libavfilter/colorspace.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016 Ronald S. Bultje + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "colorspace.h" + + +void invert_matrix3x3(const double in[3][3], double out[3][3]) +{ +double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2], + m10 = in[1][0], m11 = in[1][1], m12 = in[1][2], + m20 = in[2][0], m21 = in[2][1], m22 = in[2][2]; +int i, j; +double det; + +out[0][0] = (m11 * m22 - m21 * m12); +out[0][1] = -(m01 * m22 - m21 * m02); +out[0][2] = (m01 * m12 - m11 * m02); +out[1][0] = -(m10 * m22 - m20 * m12); +out[1][1] = (m00 * m22 - m20 * m02); +out[1][2] = -(m00 * m12 - m10 * m02); +out[2][0] = (m10 * m21 - m20 * m11); +out[2][1] = -(m00 * m21 - m20 * m01); +out[2][2] = (m00 * m11 - m10 * m01); + +det = m00 * out[0][0] + m10 * out[0][1] + m20 * out[0][2]; +det = 1.0 / det; + +for (i = 0; i < 3; i++) { +for (j = 0; j < 3
[FFmpeg-devel] [PATCH] lavfi: add opencl tonemap filter.
This filter does HDR(HDR10/HLG) to SDR conversion with tone-mapping. An example command to use this filter with vaapi codecs: FFMPEG -init_hw_device vaapi=va:/dev/dri/renderD128 -init_hw_device \ opencl=ocl@va -hwaccel vaapi -hwaccel_device va -hwaccel_output_format \ vaapi -i INPUT -filter_hw_device ocl -filter_complex \ '[0:v]hwmap,tonemap_opencl=t=bt2020:tonemap=linear:format=p010[x1]; \ [x1]hwmap=derive_device=vaapi:reverse=1' -c:v hevc_vaapi -profile 2 OUTPUT v2: add peak detection. Signed-off-by: Ruiling Song --- configure | 1 + libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/colorspace_basic.c | 89 + libavfilter/colorspace_basic.h | 40 ++ libavfilter/opencl/colorspace_basic.cl | 187 ++ libavfilter/opencl/tonemap.cl | 278 ++ libavfilter/opencl_source.h| 2 + libavfilter/vf_tonemap_opencl.c| 655 + 9 files changed, 1255 insertions(+) create mode 100644 libavfilter/colorspace_basic.c create mode 100644 libavfilter/colorspace_basic.h create mode 100644 libavfilter/opencl/colorspace_basic.cl create mode 100644 libavfilter/opencl/tonemap.cl create mode 100644 libavfilter/vf_tonemap_opencl.c diff --git a/configure b/configure index e52f8f8..ee3586b 100755 --- a/configure +++ b/configure @@ -3401,6 +3401,7 @@ tinterlace_filter_deps="gpl" tinterlace_merge_test_deps="tinterlace_filter" tinterlace_pad_test_deps="tinterlace_filter" tonemap_filter_deps="const_nan" +tonemap_opencl_filter_deps="opencl" unsharp_opencl_filter_deps="opencl" uspp_filter_deps="gpl avcodec" vaguedenoiser_filter_deps="gpl" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index c68ef05..0915656 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -352,6 +352,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o OBJS-$(CONFIG_TLUT2_FILTER) += vf_lut2.o framesync.o OBJS-$(CONFIG_TMIX_FILTER) += vf_mix.o framesync.o OBJS-$(CONFIG_TONEMAP_FILTER)+= vf_tonemap.o +OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace_basic.o opencl.o \ +opencl/tonemap.o opencl/colorspace_basic.o OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o OBJS-$(CONFIG_TRIM_FILTER) += trim.o OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index b44093d..6873bab 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -343,6 +343,7 @@ extern AVFilter ff_vf_tinterlace; extern AVFilter ff_vf_tlut2; extern AVFilter ff_vf_tmix; extern AVFilter ff_vf_tonemap; +extern AVFilter ff_vf_tonemap_opencl; extern AVFilter ff_vf_transpose; extern AVFilter ff_vf_trim; extern AVFilter ff_vf_unpremultiply; diff --git a/libavfilter/colorspace_basic.c b/libavfilter/colorspace_basic.c new file mode 100644 index 000..93f9f08 --- /dev/null +++ b/libavfilter/colorspace_basic.c @@ -0,0 +1,89 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "colorspace_basic.h" + + +void invert_matrix3x3(const double in[3][3], double out[3][3]) +{ +double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2], + m10 = in[1][0], m11 = in[1][1], m12 = in[1][2], + m20 = in[2][0], m21 = in[2][1], m22 = in[2][2]; +int i, j; +double det; + +out[0][0] = (m11 * m22 - m21 * m12); +out[0][1] = -(m01 * m22 - m21 * m02); +out[0][2] = (m01 * m12 - m11 * m02); +out[1][0] = -(m10 * m22 - m20 * m12); +out[1][1] = (m00 * m22 - m20 * m02); +out[1][2] = -(m00 * m12 - m10 * m02); +out[2][0] = (m10 * m21 - m20 * m11); +out[2][1] = -(m00 * m21 - m20 * m01); +out[2][2] = (m00 * m11 - m10 * m01); + +det = m00 * out[0][0] + m10 * out[0][1] + m20 * out[0][2]; +det = 1.0 / det; + +for (i = 0; i < 3; i++) { +for (j = 0; j < 3; j++) +out[i][j] *= det; +} +} + +void mul3x3(double
[FFmpeg-devel] [PATCH] lavfi: a minor fix to tonemap peak detection.
If the transfer was SMPTE2084, use the peak of 1 even if not tagged. Otherwise, we would assume it is HLG with a peak of 1200. Based on suggestion by Niklas Haas. Signed-off-by: Ruiling Song --- libavfilter/vf_tonemap.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libavfilter/vf_tonemap.c b/libavfilter/vf_tonemap.c index 10308bd..ab45f2e 100644 --- a/libavfilter/vf_tonemap.c +++ b/libavfilter/vf_tonemap.c @@ -131,10 +131,9 @@ static double determine_signal_peak(AVFrame *in) peak = av_q2d(metadata->max_luminance) / REFERENCE_WHITE; } -/* smpte2084 needs the side data above to work correctly - * if missing, assume that the original transfer was arib-std-b67 */ +/* if not SMPTE2084, we would assume HLG */ if (!peak) -peak = 12; +peak = in->color_trc == AVCOL_TRC_SMPTE2084 ? 100 : 12; return peak; } -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH] lavfi: add opencl tonemap filter.
This filter does HDR(HDR10/HLG) to SDR conversion with tone-mapping. An example command to use this filter with vaapi codecs: FFMPEG -init_hw_device vaapi=va:/dev/dri/renderD128 -init_hw_device \ opencl=ocl@va -hwaccel vaapi -hwaccel_device va -hwaccel_output_format \ vaapi -i INPUT -filter_hw_device ocl -filter_complex \ '[0:v]hwmap,tonemap_opencl=t=bt2020:tonemap=linear:format=p010[x1]; \ [x1]hwmap=derive_device=vaapi:reverse=1' -c:v hevc_vaapi -profile 2 OUTPUT Signed-off-by: Ruiling Song --- configure | 1 + libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/colorspace_basic.c | 89 ++ libavfilter/colorspace_basic.h | 40 +++ libavfilter/opencl/colorspace_basic.cl | 179 +++ libavfilter/opencl/tonemap.cl | 258 +++ libavfilter/opencl_source.h| 2 + libavfilter/vf_tonemap_opencl.c| 560 + 9 files changed, 1132 insertions(+) create mode 100644 libavfilter/colorspace_basic.c create mode 100644 libavfilter/colorspace_basic.h create mode 100644 libavfilter/opencl/colorspace_basic.cl create mode 100644 libavfilter/opencl/tonemap.cl create mode 100644 libavfilter/vf_tonemap_opencl.c diff --git a/configure b/configure index e52f8f8..ee3586b 100755 --- a/configure +++ b/configure @@ -3401,6 +3401,7 @@ tinterlace_filter_deps="gpl" tinterlace_merge_test_deps="tinterlace_filter" tinterlace_pad_test_deps="tinterlace_filter" tonemap_filter_deps="const_nan" +tonemap_opencl_filter_deps="opencl" unsharp_opencl_filter_deps="opencl" uspp_filter_deps="gpl avcodec" vaguedenoiser_filter_deps="gpl" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index c68ef05..0915656 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -352,6 +352,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o OBJS-$(CONFIG_TLUT2_FILTER) += vf_lut2.o framesync.o OBJS-$(CONFIG_TMIX_FILTER) += vf_mix.o framesync.o OBJS-$(CONFIG_TONEMAP_FILTER)+= vf_tonemap.o +OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace_basic.o opencl.o \ +opencl/tonemap.o opencl/colorspace_basic.o OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o OBJS-$(CONFIG_TRIM_FILTER) += trim.o OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index b44093d..6873bab 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -343,6 +343,7 @@ extern AVFilter ff_vf_tinterlace; extern AVFilter ff_vf_tlut2; extern AVFilter ff_vf_tmix; extern AVFilter ff_vf_tonemap; +extern AVFilter ff_vf_tonemap_opencl; extern AVFilter ff_vf_transpose; extern AVFilter ff_vf_trim; extern AVFilter ff_vf_unpremultiply; diff --git a/libavfilter/colorspace_basic.c b/libavfilter/colorspace_basic.c new file mode 100644 index 000..93f9f08 --- /dev/null +++ b/libavfilter/colorspace_basic.c @@ -0,0 +1,89 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "colorspace_basic.h" + + +void invert_matrix3x3(const double in[3][3], double out[3][3]) +{ +double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2], + m10 = in[1][0], m11 = in[1][1], m12 = in[1][2], + m20 = in[2][0], m21 = in[2][1], m22 = in[2][2]; +int i, j; +double det; + +out[0][0] = (m11 * m22 - m21 * m12); +out[0][1] = -(m01 * m22 - m21 * m02); +out[0][2] = (m01 * m12 - m11 * m02); +out[1][0] = -(m10 * m22 - m20 * m12); +out[1][1] = (m00 * m22 - m20 * m02); +out[1][2] = -(m00 * m12 - m10 * m02); +out[2][0] = (m10 * m21 - m20 * m11); +out[2][1] = -(m00 * m21 - m20 * m01); +out[2][2] = (m00 * m11 - m10 * m01); + +det = m00 * out[0][0] + m10 * out[0][1] + m20 * out[0][2]; +det = 1.0 / det; + +for (i = 0; i < 3; i++) { +for (j = 0; j < 3; j++) +out[i][j] *= det; +} +} + +void mul3x3(double dst[3][3], const double
[FFmpeg-devel] [RFC] lavfi: add opencl tonemap filter.
It basically does hdr to sdr conversion with tonemapping. Signed-off-by: Ruiling Song --- This patch tries to add a filter to do hdr to sdr conversion with tonemapping. The filter does all the job of tonemapping in one pass, which is quite different from the vf_tonemap.c I choose this way because I think this would introduce less memory access. And I find that tonemaping shares lots of code with colorspace conversion. So I move color space related code into seprated files (both OpenCL kernel and host code). I am not sure whether the design seems OK? Is there anybody would like to give some comments on the overall design or implementation details? Thanks! Ruiling configure | 1 + libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/colorspace_basic.c | 89 +++ libavfilter/colorspace_basic.h | 40 +++ libavfilter/opencl/colorspace_basic.cl | 137 ++ libavfilter/opencl/tonemap.cl | 136 ++ libavfilter/opencl_source.h| 2 + libavfilter/vf_tonemap_opencl.c| 472 + 9 files changed, 880 insertions(+) create mode 100644 libavfilter/colorspace_basic.c create mode 100644 libavfilter/colorspace_basic.h create mode 100644 libavfilter/opencl/colorspace_basic.cl create mode 100644 libavfilter/opencl/tonemap.cl create mode 100644 libavfilter/vf_tonemap_opencl.c diff --git a/configure b/configure index 7f199c6..b9e464d 100755 --- a/configure +++ b/configure @@ -3395,6 +3395,7 @@ tinterlace_filter_deps="gpl" tinterlace_merge_test_deps="tinterlace_filter" tinterlace_pad_test_deps="tinterlace_filter" tonemap_filter_deps="const_nan" +tonemap_opencl_filter_deps="opencl" unsharp_opencl_filter_deps="opencl" uspp_filter_deps="gpl avcodec" vaguedenoiser_filter_deps="gpl" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 3454f25..7a1b0e8 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -348,6 +348,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o OBJS-$(CONFIG_TLUT2_FILTER) += vf_lut2.o framesync.o OBJS-$(CONFIG_TMIX_FILTER) += vf_mix.o framesync.o OBJS-$(CONFIG_TONEMAP_FILTER)+= vf_tonemap.o +OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace_basic.o opencl.o \ +opencl/tonemap.o opencl/colorspace_basic.o OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o OBJS-$(CONFIG_TRIM_FILTER) += trim.o OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index d958f9b..759097a 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -339,6 +339,7 @@ extern AVFilter ff_vf_tinterlace; extern AVFilter ff_vf_tlut2; extern AVFilter ff_vf_tmix; extern AVFilter ff_vf_tonemap; +extern AVFilter ff_vf_tonemap_opencl; extern AVFilter ff_vf_transpose; extern AVFilter ff_vf_trim; extern AVFilter ff_vf_unpremultiply; diff --git a/libavfilter/colorspace_basic.c b/libavfilter/colorspace_basic.c new file mode 100644 index 000..93f9f08 --- /dev/null +++ b/libavfilter/colorspace_basic.c @@ -0,0 +1,89 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "colorspace_basic.h" + + +void invert_matrix3x3(const double in[3][3], double out[3][3]) +{ +double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2], + m10 = in[1][0], m11 = in[1][1], m12 = in[1][2], + m20 = in[2][0], m21 = in[2][1], m22 = in[2][2]; +int i, j; +double det; + +out[0][0] = (m11 * m22 - m21 * m12); +out[0][1] = -(m01 * m22 - m21 * m02); +out[0][2] = (m01 * m12 - m11 * m02); +out[1][0] = -(m10 * m22 - m20 * m12); +out[1][1] = (m00 * m22 - m20 * m02); +out[1][2] = -(m00 * m12 - m10 * m02); +out[2][0] = (m10 * m21 - m20 * m11); +out[2][1] = -(m00 * m21 - m20 * m01); +out[2][2] = (m00 * m11 - m10 * m01); + +det = m00 * out[0][0] + m10 * out[0][1] + m20 * out[0][2]; +det = 1.0 / de
[FFmpeg-devel] [PATCH v2 2/2] lavf/qsv: clone the frame which may be managed by framework
For filters based on framesync, the input frame was managed by framesync, so we should not directly keep and destroy it, instead we make a clone of it here, or else double-free will occur. But for other filters not based on framesync, we still need to free the input frame inside filter_frame. That's why I made this v2 to fix the side-effect on normal filters. v2: and one av_frame_free() in vf_vpp_qsv.c Signed-off-by: Ruiling Song --- libavfilter/qsvvpp.c | 4 ++-- libavfilter/vf_vpp_qsv.c | 5 - 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c index f32b46d..86787c5 100644 --- a/libavfilter/qsvvpp.c +++ b/libavfilter/qsvvpp.c @@ -296,7 +296,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *p av_log(ctx, AV_LOG_ERROR, "QSVVPP gets a wrong frame.\n"); return NULL; } -qsv_frame->frame = picref; +qsv_frame->frame = av_frame_clone(picref); qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame->data[3]; } else { /* make a copy if the input is not padded as libmfx requires */ @@ -318,7 +318,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *p av_frame_copy_props(qsv_frame->frame, picref); av_frame_free(&picref); } else -qsv_frame->frame = picref; +qsv_frame->frame = av_frame_clone(picref); if (map_frame_to_surface(qsv_frame->frame, &qsv_frame->surface_internal) < 0) { diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c index bd5fc32..4ef5bfb 100644 --- a/libavfilter/vf_vpp_qsv.c +++ b/libavfilter/vf_vpp_qsv.c @@ -326,8 +326,11 @@ static int config_output(AVFilterLink *outlink) static int filter_frame(AVFilterLink *inlink, AVFrame *picref) { VPPContext *vpp = inlink->dst->priv; +int ret = 0; -return ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref); +ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref); +av_frame_free(&picref); +return ret; } static int query_formats(AVFilterContext *ctx) -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH v2 1/2] lavf: make overlay_qsv work based on framesync
The existing version which was cherry-picked from Libav does not work with FFmpeg framework, because ff_request_frame() was totally different between Libav (recursive) and FFmpeg (non-recursive). The existing overlay_qsv implementation depends on the recursive version of ff_request_frame to trigger immediate call to request_frame() on input pad. But this has been removed in FFmpeg since "lavfi: make request_frame() non-recursive." Now that we have handy framesync support in FFmpeg, so I make it work based on framesync. Some other fixing which is also needed to make overlay_qsv work are put in a separate patch. v2: add .preinit field to initilize framesync options. export more options like vf_overlay.c Signed-off-by: Ruiling Song --- libavfilter/Makefile | 2 +- libavfilter/vf_overlay_qsv.c | 213 --- 2 files changed, 78 insertions(+), 137 deletions(-) diff --git a/libavfilter/Makefile b/libavfilter/Makefile index a90ca30..7f2ad1f 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -267,7 +267,7 @@ OBJS-$(CONFIG_OSCILLOSCOPE_FILTER) += vf_datascope.o OBJS-$(CONFIG_OVERLAY_FILTER)+= vf_overlay.o framesync.o OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \ opencl/overlay.o framesync.o -OBJS-$(CONFIG_OVERLAY_QSV_FILTER)+= vf_overlay_qsv.o +OBJS-$(CONFIG_OVERLAY_QSV_FILTER)+= vf_overlay_qsv.o framesync.o OBJS-$(CONFIG_OWDENOISE_FILTER) += vf_owdenoise.o OBJS-$(CONFIG_PAD_FILTER)+= vf_pad.o OBJS-$(CONFIG_PALETTEGEN_FILTER) += vf_palettegen.o diff --git a/libavfilter/vf_overlay_qsv.c b/libavfilter/vf_overlay_qsv.c index 6c3efdb..2087178 100644 --- a/libavfilter/vf_overlay_qsv.c +++ b/libavfilter/vf_overlay_qsv.c @@ -36,6 +36,7 @@ #include "formats.h" #include "video.h" +#include "framesync.h" #include "qsvvpp.h" #define MAIN0 @@ -56,14 +57,10 @@ enum var_name { VAR_VARS_NB }; -enum EOFAction { -EOF_ACTION_REPEAT, -EOF_ACTION_ENDALL -}; - typedef struct QSVOverlayContext { const AVClass *class; +FFFrameSync fs; QSVVPPContext *qsv; QSVVPPParamqsv_param; mfxExtVPPComposite comp_conf; @@ -72,10 +69,6 @@ typedef struct QSVOverlayContext { char *overlay_ox, *overlay_oy, *overlay_ow, *overlay_oh; uint16_t overlay_alpha, overlay_pixel_alpha; -enum EOFAction eof_action; /* action to take on EOF from source */ - -AVFrame *main; -AVFrame *over_prev, *over_next; } QSVOverlayContext; static const char *const var_names[] = { @@ -90,20 +83,25 @@ static const char *const var_names[] = { NULL }; -static const AVOption options[] = { +static const AVOption overlay_qsv_options[] = { { "x", "Overlay x position", OFFSET(overlay_ox), AV_OPT_TYPE_STRING, { .str="0"}, 0, 255, .flags = FLAGS}, { "y", "Overlay y position", OFFSET(overlay_oy), AV_OPT_TYPE_STRING, { .str="0"}, 0, 255, .flags = FLAGS}, { "w", "Overlay width", OFFSET(overlay_ow), AV_OPT_TYPE_STRING, { .str="overlay_iw"}, 0, 255, .flags = FLAGS}, { "h", "Overlay height", OFFSET(overlay_oh), AV_OPT_TYPE_STRING, { .str="overlay_ih*w/overlay_iw"}, 0, 255, .flags = FLAGS}, { "alpha", "Overlay global alpha", OFFSET(overlay_alpha), AV_OPT_TYPE_INT, { .i64 = 255}, 0, 255, .flags = FLAGS}, { "eof_action", "Action to take when encountering EOF from secondary input ", -OFFSET(eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT }, -EOF_ACTION_REPEAT, EOF_ACTION_ENDALL, .flags = FLAGS, "eof_action" }, -{ "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" }, -{ "endall", "End both streams.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" }, +OFFSET(fs.opt_eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT }, +EOF_ACTION_REPEAT, EOF_ACTION_PASS, .flags = FLAGS, "eof_action" }, +{ "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" }, +{ "endall", "End both streams.",0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" }, +{ "pass", "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_PASS }, .flags = FLAGS, "eof_action" }, +{ "shortest", "force termination when the
[FFmpeg-devel] [PATCH 1/2] lavf: make overlay_qsv work based on framesync.
the previous version which was cherry-picked from Libav does not work with FFmpeg framework, because ff_request_frame() was totally different between Libav and FFmpeg. So, I make it work through using framesync. Some other fixing that is needed to run overlay_qsv was put in another separate patch. Signed-off-by: Ruiling Song --- libavfilter/Makefile | 2 +- libavfilter/vf_overlay_qsv.c | 212 +++ 2 files changed, 75 insertions(+), 139 deletions(-) diff --git a/libavfilter/Makefile b/libavfilter/Makefile index fc16512..e642b8d 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -263,7 +263,7 @@ OBJS-$(CONFIG_OSCILLOSCOPE_FILTER) += vf_datascope.o OBJS-$(CONFIG_OVERLAY_FILTER)+= vf_overlay.o framesync.o OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \ opencl/overlay.o framesync.o -OBJS-$(CONFIG_OVERLAY_QSV_FILTER)+= vf_overlay_qsv.o +OBJS-$(CONFIG_OVERLAY_QSV_FILTER)+= vf_overlay_qsv.o framesync.o OBJS-$(CONFIG_OWDENOISE_FILTER) += vf_owdenoise.o OBJS-$(CONFIG_PAD_FILTER)+= vf_pad.o OBJS-$(CONFIG_PALETTEGEN_FILTER) += vf_palettegen.o diff --git a/libavfilter/vf_overlay_qsv.c b/libavfilter/vf_overlay_qsv.c index 6c3efdb..c4c694f 100644 --- a/libavfilter/vf_overlay_qsv.c +++ b/libavfilter/vf_overlay_qsv.c @@ -36,6 +36,7 @@ #include "formats.h" #include "video.h" +#include "framesync.h" #include "qsvvpp.h" #define MAIN0 @@ -56,14 +57,10 @@ enum var_name { VAR_VARS_NB }; -enum EOFAction { -EOF_ACTION_REPEAT, -EOF_ACTION_ENDALL -}; - typedef struct QSVOverlayContext { const AVClass *class; +FFFrameSync fs; QSVVPPContext *qsv; QSVVPPParamqsv_param; mfxExtVPPComposite comp_conf; @@ -72,10 +69,7 @@ typedef struct QSVOverlayContext { char *overlay_ox, *overlay_oy, *overlay_ow, *overlay_oh; uint16_t overlay_alpha, overlay_pixel_alpha; -enum EOFAction eof_action; /* action to take on EOF from source */ - -AVFrame *main; -AVFrame *over_prev, *over_next; +enum FFFrameSyncExtMode eof_action; } QSVOverlayContext; static const char *const var_names[] = { @@ -90,20 +84,22 @@ static const char *const var_names[] = { NULL }; -static const AVOption options[] = { +static const AVOption overlay_qsv_options[] = { { "x", "Overlay x position", OFFSET(overlay_ox), AV_OPT_TYPE_STRING, { .str="0"}, 0, 255, .flags = FLAGS}, { "y", "Overlay y position", OFFSET(overlay_oy), AV_OPT_TYPE_STRING, { .str="0"}, 0, 255, .flags = FLAGS}, { "w", "Overlay width", OFFSET(overlay_ow), AV_OPT_TYPE_STRING, { .str="overlay_iw"}, 0, 255, .flags = FLAGS}, { "h", "Overlay height", OFFSET(overlay_oh), AV_OPT_TYPE_STRING, { .str="overlay_ih*w/overlay_iw"}, 0, 255, .flags = FLAGS}, { "alpha", "Overlay global alpha", OFFSET(overlay_alpha), AV_OPT_TYPE_INT, { .i64 = 255}, 0, 255, .flags = FLAGS}, { "eof_action", "Action to take when encountering EOF from secondary input ", -OFFSET(eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT }, -EOF_ACTION_REPEAT, EOF_ACTION_ENDALL, .flags = FLAGS, "eof_action" }, -{ "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" }, -{ "endall", "End both streams.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" }, +OFFSET(eof_action), AV_OPT_TYPE_INT, { .i64 = EXT_INFINITY }, +EXT_STOP, EXT_INFINITY, .flags = FLAGS, "eof_action" }, +{ "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 = EXT_INFINITY }, .flags = FLAGS, "eof_action" }, +{ "endall", "End both streams.", 0, AV_OPT_TYPE_CONST, { .i64 = EXT_STOP }, .flags = FLAGS, "eof_action" }, { NULL } }; +FRAMESYNC_DEFINE_CLASS(overlay_qsv, QSVOverlayContext, fs); + static int eval_expr(AVFilterContext *ctx) { QSVOverlayContext *vpp = ctx->priv; @@ -230,12 +226,53 @@ static int config_overlay_input(AVFilterLink *inlink) return 0; } +static int process_frame(FFFrameSync *fs) +{ +AVFilterContext *ctx = fs->parent; +QSVOverlayContext *s = fs->opaque; +AVFrame*frame = NULL; +int ret = 0, i; + +for (i = 0; i < ctx->nb_inputs; i++) { +ret = ff_framesync_get_frame(fs, i, &frame, 0); +if (ret == 0) +ret = ff_qsvvpp_filter
[FFmpeg-devel] [PATCH 2/2] lavf: clone the frame managed by framework.
we should clone the frame, which is managed by the framework. directly assign it will cause double-free issue when qsv try to free it. In fact, the frames was managed by the framework! Right now, I am still not quite sure why we receive 'more data' error from libmfx. But some simple debugging seems that it is non-sense. so just skip it totally, not bothering to return a EAGAIN error to the caller. Signed-off-by: Ruiling Song --- libavfilter/qsvvpp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c index f32b46d..980dd62 100644 --- a/libavfilter/qsvvpp.c +++ b/libavfilter/qsvvpp.c @@ -296,7 +296,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *p av_log(ctx, AV_LOG_ERROR, "QSVVPP gets a wrong frame.\n"); return NULL; } -qsv_frame->frame = picref; +qsv_frame->frame = av_frame_clone(picref); qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame->data[3]; } else { /* make a copy if the input is not padded as libmfx requires */ @@ -318,7 +318,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *p av_frame_copy_props(qsv_frame->frame, picref); av_frame_free(&picref); } else -qsv_frame->frame = picref; +qsv_frame->frame = av_frame_clone(picref); if (map_frame_to_surface(qsv_frame->frame, &qsv_frame->surface_internal) < 0) { @@ -707,7 +707,7 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picr if (ret < 0 && ret != MFX_ERR_MORE_SURFACE) { /* Ignore more_data error */ if (ret == MFX_ERR_MORE_DATA) -ret = AVERROR(EAGAIN); +ret = 0; break; } -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH] lavc/vaapi: release buffer before destroy context.
The common way to use libVA was first destroy the buffer, then the context. I am not sure whether libVA has clear statement on this. This patch just make things simple. This would fix an segmentation fault issue against iHD open source driver. Signed-off-by: Ruiling Song --- libavcodec/vaapi_encode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c index 550ea47..607e3ab 100644 --- a/libavcodec/vaapi_encode.c +++ b/libavcodec/vaapi_encode.c @@ -1562,6 +1562,8 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx) vaapi_encode_free(avctx, pic); } +av_buffer_pool_uninit(&ctx->output_buffer_pool); + if (ctx->va_context != VA_INVALID_ID) { vaDestroyContext(ctx->hwctx->display, ctx->va_context); ctx->va_context = VA_INVALID_ID; @@ -1572,7 +1574,6 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx) ctx->va_config = VA_INVALID_ID; } -av_buffer_pool_uninit(&ctx->output_buffer_pool); av_freep(&ctx->codec_sequence_params); av_freep(&ctx->codec_picture_params); -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH] lavc/qsv: skip the packet if decoding failure.
From: "Ruiling, Song" MediaSDK may fail to decode some frame, just skip it. Otherwise, it will keep decoding the failure packet repeatedly without processing any packet afterwards. v2: switch to using av_packet_unref(). Signed-off-by: Ruiling Song --- libavcodec/qsvdec_h2645.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c index 5e00673..d92a150 100644 --- a/libavcodec/qsvdec_h2645.c +++ b/libavcodec/qsvdec_h2645.c @@ -153,8 +153,12 @@ static int qsv_decode_frame(AVCodecContext *avctx, void *data, } ret = ff_qsv_process_data(avctx, &s->qsv, frame, got_frame, &s->buffer_pkt); -if (ret < 0) +if (ret < 0) { +/* Drop buffer_pkt when failed to decode the packet. Otherwise, + the decoder will keep decoding the failure packet. */ +av_packet_unref(&s->buffer_pkt); return ret; +} s->buffer_pkt.size -= ret; s->buffer_pkt.data += ret; -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH] lavc/qsv: skip the packet if decoding failure.
MediaSDK may fail to decode some frame, just skip it. Otherwise, it will keep decoding the failure packet repeatedly without processing any packet afterwards. Signed-off-by: Ruiling, Song --- libavcodec/qsvdec_h2645.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c index 5e00673..0d06b21 100644 --- a/libavcodec/qsvdec_h2645.c +++ b/libavcodec/qsvdec_h2645.c @@ -153,8 +153,12 @@ static int qsv_decode_frame(AVCodecContext *avctx, void *data, } ret = ff_qsv_process_data(avctx, &s->qsv, frame, got_frame, &s->buffer_pkt); -if (ret < 0) -return ret; +if (ret < 0){ + /* force the buffer_pkt's size to 0 when failed to decode the packet, +otherwise, the decoder will keep decoding the failure packet. */ + s->buffer_pkt.size = 0; + return ret; +} s->buffer_pkt.size -= ret; s->buffer_pkt.data += ret; -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH] vaapi_h265: general_level_idc should times 3.
Signed-off-by: Ruiling Song --- libavcodec/vaapi_encode_h265.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c index 3ae92a7..32b8bc6 100644 --- a/libavcodec/vaapi_encode_h265.c +++ b/libavcodec/vaapi_encode_h265.c @@ -219,7 +219,7 @@ static int vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx) .general_non_packed_constraint_flag = 1, .general_frame_only_constraint_flag = 1, -.general_level_idc = avctx->level, +.general_level_idc = avctx->level * 3, }; vps->profile_tier_level.general_profile_compatibility_flag[avctx->profile & 31] = 1; -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel