Input and output formats must be the same, the overlay format must be the same as the input except possibly with an additional alpha component. --- configure | 1 + libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/opencl/overlay.cl | 104 ++++++++++++ libavfilter/opencl_source.h | 2 + libavfilter/vf_overlay_opencl.c | 347 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 457 insertions(+) create mode 100644 libavfilter/opencl/overlay.cl create mode 100644 libavfilter/vf_overlay_opencl.c
diff --git a/configure b/configure index 895ae2ec38..c036a53a69 100755 --- a/configure +++ b/configure @@ -3178,6 +3178,7 @@ negate_filter_deps="lut_filter" nnedi_filter_deps="gpl" ocr_filter_deps="libtesseract" ocv_filter_deps="libopencv" +overlay_opencl_filter_deps="opencl" owdenoise_filter_deps="gpl" pan_filter_deps="swresample" perspective_filter_deps="gpl" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index cb3a1424d9..cc9d4021b8 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -249,6 +249,8 @@ OBJS-$(CONFIG_OCV_FILTER) += vf_libopencv.o OBJS-$(CONFIG_OPENCL) += deshake_opencl.o unsharp_opencl.o OBJS-$(CONFIG_OSCILLOSCOPE_FILTER) += vf_datascope.o OBJS-$(CONFIG_OVERLAY_FILTER) += vf_overlay.o framesync2.o +OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \ + opencl/overlay.o OBJS-$(CONFIG_OWDENOISE_FILTER) += vf_owdenoise.o OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o OBJS-$(CONFIG_PALETTEGEN_FILTER) += vf_palettegen.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 154ddf706d..261dd3a8e1 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -260,6 +260,7 @@ static void register_all(void) REGISTER_FILTER(OCV, ocv, vf); REGISTER_FILTER(OSCILLOSCOPE, oscilloscope, vf); REGISTER_FILTER(OVERLAY, overlay, vf); + REGISTER_FILTER(OVERLAY_OPENCL, overlay_opencl, vf); REGISTER_FILTER(OWDENOISE, owdenoise, vf); REGISTER_FILTER(PAD, pad, vf); REGISTER_FILTER(PALETTEGEN, palettegen, vf); diff --git a/libavfilter/opencl/overlay.cl b/libavfilter/opencl/overlay.cl new file mode 100644 index 0000000000..6cac5e28c4 --- /dev/null +++ b/libavfilter/opencl/overlay.cl @@ -0,0 +1,104 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +__kernel void overlay_no_alpha(__write_only image2d_t dst, + __read_only image2d_t main, + __read_only image2d_t overlay, + int x_position, + int y_position) +{ + const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_FILTER_NEAREST); + + int2 overlay_size = get_image_dim(overlay); + int2 loc = (int2)(get_global_id(0), get_global_id(1)); + + if (loc.x < x_position || + loc.y < y_position || + loc.x >= overlay_size.x + x_position || + loc.y >= overlay_size.y + y_position) { + float4 val = read_imagef(main, sampler, loc); + write_imagef(dst, loc, val); + } else { + int2 loc_overlay = (int2)(x_position, y_position); + float4 val = read_imagef(overlay, sampler, loc - loc_overlay); + write_imagef(dst, loc, val); + } +} + +__kernel void overlay_internal_alpha(__write_only image2d_t dst, + __read_only image2d_t main, + __read_only image2d_t overlay, + int x_position, + int y_position) +{ + const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_FILTER_NEAREST); + + int2 overlay_size = get_image_dim(overlay); + int2 loc = (int2)(get_global_id(0), get_global_id(1)); + + if (loc.x < x_position || + loc.y < y_position || + loc.x >= overlay_size.x + x_position || + loc.y >= overlay_size.y + y_position) { + float4 val = read_imagef(main, sampler, loc); + write_imagef(dst, loc, val); + } else { + int2 loc_overlay = (int2)(x_position, y_position); + float4 in_main = read_imagef(main, sampler, loc); + float4 in_overlay = read_imagef(overlay, sampler, loc - loc_overlay); + float4 val = in_overlay * in_overlay.w + in_main * (1.0f - in_overlay.w); + write_imagef(dst, loc, val); + } +} + +__kernel void overlay_external_alpha(__write_only image2d_t dst, + __read_only image2d_t main, + __read_only image2d_t overlay, + __read_only image2d_t alpha, + int x_position, + int y_position, + int alpha_subsample_x, + int alpha_subsample_y) +{ + const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_FILTER_NEAREST); + + int2 overlay_size = get_image_dim(overlay); + int2 loc = (int2)(get_global_id(0), get_global_id(1)); + + if (loc.x < x_position || + loc.y < y_position || + loc.x >= overlay_size.x + x_position || + loc.y >= overlay_size.y + y_position) { + float4 val = read_imagef(main, sampler, loc); + write_imagef(dst, loc, val); + } else { + int2 loc_overlay = (int2)(x_position, y_position); + float4 in_main = read_imagef(main, sampler, loc); + float4 in_overlay = read_imagef(overlay, sampler, loc - loc_overlay); + + int2 loc_alpha = (int2)(loc.x * alpha_subsample_x, + loc.y * alpha_subsample_y) - loc_overlay; + float4 in_alpha = read_imagef(alpha, sampler, loc_alpha); + + float4 val = in_overlay * in_alpha.x + in_main * (1.0f - in_alpha.x); + write_imagef(dst, loc, val); + } +} diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h index 8674a03a94..e7af58bcfa 100644 --- a/libavfilter/opencl_source.h +++ b/libavfilter/opencl_source.h @@ -19,4 +19,6 @@ #ifndef AVFILTER_OPENCL_SOURCE_H #define AVFILTER_OPENCL_SOURCE_H +extern const char *ff_opencl_source_overlay; + #endif /* AVFILTER_OPENCL_SOURCE_H */ diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/vf_overlay_opencl.c new file mode 100644 index 0000000000..4da244dd43 --- /dev/null +++ b/libavfilter/vf_overlay_opencl.c @@ -0,0 +1,347 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "libavutil/buffer.h" +#include "libavutil/common.h" +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_opencl.h" +#include "libavutil/log.h" +#include "libavutil/mathematics.h" +#include "libavutil/mem.h" +#include "libavutil/pixdesc.h" +#include "libavutil/opt.h" + +#include "avfilter.h" +#include "framesync2.h" +#include "internal.h" +#include "opencl.h" +#include "opencl_source.h" +#include "video.h" + +typedef struct OverlayOpenCLContext { + OpenCLFilterContext ocf; + + int initialised; + cl_kernel kernel; + cl_command_queue command_queue; + + FFFrameSync fs; + + int nb_planes; + int alpha_separate; + int alpha_subsample_x; + int alpha_subsample_y; + + int x_position; + int y_position; +} OverlayOpenCLContext; + +static int overlay_opencl_load(AVFilterContext *avctx, + enum AVPixelFormat main_format, + enum AVPixelFormat overlay_format) +{ + OverlayOpenCLContext *ctx = avctx->priv; + cl_int cle; + const char *source = ff_opencl_source_overlay; + const char *kernel; + const AVPixFmtDescriptor *main_desc, *overlay_desc; + int err, i, main_planes, overlay_planes; + + main_desc = av_pix_fmt_desc_get(main_format); + overlay_desc = av_pix_fmt_desc_get(overlay_format); + + main_planes = overlay_planes = 0; + for (i = 0; i < main_desc->nb_components; i++) + main_planes = FFMAX(main_planes, main_desc->comp[i].plane + 1); + for (i = 0; i < overlay_desc->nb_components; i++) + overlay_planes = FFMAX(overlay_planes, overlay_desc->comp[i].plane + 1); + + ctx->nb_planes = main_planes; + if (main_planes == overlay_planes) { + if (main_desc->nb_components == overlay_desc->nb_components) + kernel = "overlay_no_alpha"; + else + kernel = "overlay_internal_alpha"; + ctx->alpha_separate = 0; + } else { + kernel = "overlay_external_alpha"; + ctx->alpha_separate = 1; + ctx->alpha_subsample_x = 1 << main_desc->log2_chroma_w; + ctx->alpha_subsample_y = 1 << main_desc->log2_chroma_h; + } + av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", kernel); + + err = ff_opencl_filter_load_program(avctx, &source, 1); + if (err < 0) + goto fail; + + ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context, + ctx->ocf.hwctx->device_id, + 0, &cle); + if (!ctx->command_queue) { + av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL " + "command queue: %d.\n", cle); + err = AVERROR(EIO); + goto fail; + } + + ctx->kernel = clCreateKernel(ctx->ocf.program, kernel, &cle); + if (!ctx->kernel) { + av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle); + err = AVERROR(EIO); + goto fail; + } + + ctx->initialised = 1; + return 0; + +fail: + if (ctx->command_queue) + clReleaseCommandQueue(ctx->command_queue); + if (ctx->kernel) + clReleaseKernel(ctx->kernel); + return err; +} + +static int overlay_opencl_blend(FFFrameSync *fs) +{ + AVFilterContext *avctx = fs->parent; + AVFilterLink *outlink = avctx->outputs[0]; + OverlayOpenCLContext *ctx = avctx->priv; + AVFrame *input_main, *input_overlay; + AVFrame *output; + cl_mem mem; + cl_int cle, x, y; + size_t global_work[2]; + int kernel_arg = 0; + int err, plane; + + err = ff_framesync2_get_frame(fs, 0, &input_main, 0); + if (err < 0) + return err; + err = ff_framesync2_get_frame(fs, 1, &input_overlay, 0); + if (err < 0) + return err; + + if (!ctx->initialised) { + AVHWFramesContext *main_fc = + (AVHWFramesContext*)input_main->hw_frames_ctx->data; + AVHWFramesContext *overlay_fc = + (AVHWFramesContext*)input_overlay->hw_frames_ctx->data; + + err = overlay_opencl_load(avctx, main_fc->sw_format, + overlay_fc->sw_format); + if (err < 0) + return err; + } + + output = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!output) { + err = AVERROR(ENOMEM); + goto fail; + } + + for (plane = 0; plane < ctx->nb_planes; plane++) { + kernel_arg = 0; + + mem = (cl_mem)output->data[plane]; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); + if (cle != CL_SUCCESS) + goto fail_kernel_arg; + + mem = (cl_mem)input_main->data[plane]; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); + if (cle != CL_SUCCESS) + goto fail_kernel_arg; + + mem = (cl_mem)input_overlay->data[plane]; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); + if (cle != CL_SUCCESS) + goto fail_kernel_arg; + + if (ctx->alpha_separate) { + mem = (cl_mem)input_overlay->data[ctx->nb_planes]; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); + if (cle != CL_SUCCESS) + goto fail_kernel_arg; + } + + x = ctx->x_position; + y = ctx->y_position; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &x); + if (cle != CL_SUCCESS) + goto fail_kernel_arg; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &y); + if (cle != CL_SUCCESS) + goto fail_kernel_arg; + + if (ctx->alpha_separate) { + cl_int alpha_subsample_x = plane > 0 ? ctx->alpha_subsample_x : 1; + cl_int alpha_subsample_y = plane > 0 ? ctx->alpha_subsample_y : 1; + + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &alpha_subsample_x); + if (cle != CL_SUCCESS) + goto fail_kernel_arg; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &alpha_subsample_y); + if (cle != CL_SUCCESS) + goto fail_kernel_arg; + } + + global_work[0] = output->width; + global_work[1] = output->height; + + cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, + global_work, NULL, 0, NULL, NULL); + if (cle != CL_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to enqueue " + "overlay kernel for plane %d: %d.\n", cle, plane); + err = AVERROR(EIO); + goto fail; + } + } + + cle = clFinish(ctx->command_queue); + if (cle != CL_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to finish " + "command queue: %d.\n", cle); + err = AVERROR(EIO); + goto fail; + } + + err = av_frame_copy_props(output, input_main); + + av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n", + av_get_pix_fmt_name(output->format), + output->width, output->height, output->pts); + + return ff_filter_frame(outlink, output); + +fail_kernel_arg: + av_log(avctx, AV_LOG_ERROR, "Failed to set kernel arg %d: %d.\n", + kernel_arg, cle); + err = AVERROR(EIO); +fail: + return err; +} + +static int overlay_opencl_config_output(AVFilterLink *outlink) +{ + AVFilterContext *avctx = outlink->src; + OverlayOpenCLContext *ctx = avctx->priv; + int err; + + err = ff_opencl_filter_config_output(outlink); + if (err < 0) + return err; + + err = ff_framesync2_init_dualinput(&ctx->fs, avctx); + if (err < 0) + return err; + + return ff_framesync2_configure(&ctx->fs); +} + +static av_cold int overlay_opencl_init(AVFilterContext *avctx) +{ + OverlayOpenCLContext *ctx = avctx->priv; + + ctx->fs.on_event = &overlay_opencl_blend; + + return ff_opencl_filter_init(avctx); +} + +static int overlay_opencl_activate(AVFilterContext *avctx) +{ + OverlayOpenCLContext *ctx = avctx->priv; + + return ff_framesync2_activate(&ctx->fs); +} + +static av_cold void overlay_opencl_uninit(AVFilterContext *avctx) +{ + OverlayOpenCLContext *ctx = avctx->priv; + cl_int cle; + + if (ctx->kernel) { + cle = clReleaseKernel(ctx->kernel); + if (cle != CL_SUCCESS) + av_log(avctx, AV_LOG_ERROR, "Failed to release " + "kernel: %d.\n", cle); + } + + if (ctx->command_queue) { + cle = clReleaseCommandQueue(ctx->command_queue); + if (cle != CL_SUCCESS) + av_log(avctx, AV_LOG_ERROR, "Failed to release " + "command queue: %d.\n", cle); + } + + ff_opencl_filter_uninit(avctx); + + ff_framesync2_uninit(&ctx->fs); +} + +#define OFFSET(x) offsetof(OverlayOpenCLContext, x) +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption overlay_opencl_options[] = { + { "x", "Overlay x position", + OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, + { "y", "Overlay y position", + OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(overlay_opencl); + +static const AVFilterPad overlay_opencl_inputs[] = { + { + .name = "main", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_opencl_filter_config_input, + }, + { + .name = "overlay", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_opencl_filter_config_input, + }, + { NULL } +}; + +static const AVFilterPad overlay_opencl_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &overlay_opencl_config_output, + }, + { NULL } +}; + +AVFilter ff_vf_overlay_opencl = { + .name = "overlay_opencl", + .description = NULL_IF_CONFIG_SMALL("Overlay one video on top of another"), + .priv_size = sizeof(OverlayOpenCLContext), + .priv_class = &overlay_opencl_class, + .init = &overlay_opencl_init, + .uninit = &overlay_opencl_uninit, + .query_formats = &ff_opencl_filter_query_formats, + .activate = &overlay_opencl_activate, + .inputs = overlay_opencl_inputs, + .outputs = overlay_opencl_outputs, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.11.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel