Currently works only with RGBA on top of NV12 in BT.601. --- configure | 1 + libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/opencl/overlay.cl | 125 ++++++++++++++++ libavfilter/vf_overlay_opencl.c | 314 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 442 insertions(+) create mode 100644 libavfilter/opencl/overlay.cl create mode 100644 libavfilter/vf_overlay_opencl.c
diff --git a/configure b/configure index 93c576a38..7151ab30a 100755 --- a/configure +++ b/configure @@ -2494,6 +2494,7 @@ interlace_filter_deps="gpl" movie_filter_deps="avcodec avformat" ocv_filter_deps="libopencv" opencl_program_filter_deps="opencl" +overlay_opencl_filter_deps="opencl" resample_filter_deps="avresample" scale_filter_deps="swscale" scale_qsv_filter_deps="libmfx" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 86cfd20f3..994872418 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -77,6 +77,7 @@ OBJS-$(CONFIG_NULL_FILTER) += vf_null.o OBJS-$(CONFIG_OCV_FILTER) += vf_libopencv.o OBJS-$(CONFIG_OPENCL_PROGRAM_FILTER) += vf_opencl_program.o opencl.o OBJS-$(CONFIG_OVERLAY_FILTER) += vf_overlay.o +OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o OBJS-$(CONFIG_PIXDESCTEST_FILTER) += vf_pixdesctest.o OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index db41c498f..6ecd3b117 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -99,6 +99,7 @@ void avfilter_register_all(void) REGISTER_FILTER(OCV, ocv, vf); REGISTER_FILTER(OPENCL_PROGRAM, opencl_program, vf); REGISTER_FILTER(OVERLAY, overlay, vf); + REGISTER_FILTER(OVERLAY_OPENCL, overlay_opencl, vf); REGISTER_FILTER(PAD, pad, vf); REGISTER_FILTER(PIXDESCTEST, pixdesctest, vf); REGISTER_FILTER(SCALE, scale, vf); diff --git a/libavfilter/opencl/overlay.cl b/libavfilter/opencl/overlay.cl new file mode 100644 index 000000000..bd576315a --- /dev/null +++ b/libavfilter/opencl/overlay.cl @@ -0,0 +1,125 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +OPENCL_SOURCE(overlay, +static inline float4 yuv_to_rgb_bt601(float4 yuv) +{ + float4 yuv2r = (float4)(1.0, 0.0, +1.13983, 0.0); + float4 yuv2g = (float4)(1.0, -0.39465, -0.58060, 0.0); + float4 yuv2b = (float4)(1.0, +2.03211, 0.0, 0.0); + float4 rgb; + yuv -= (float4)(0.0, 0.5, 0.5, 0.0); + rgb.x = dot(yuv, yuv2r); + rgb.y = dot(yuv, yuv2g); + rgb.z = dot(yuv, yuv2b); + return rgb; +} + +static inline float4 rgb_to_yuv_bt601(float4 rgb) +{ + float4 rgb2y = (float4)(+0.299, +0.587, +0.114, 0.0); + float4 rgb2u = (float4)(-0.14713, -0.28886, +0.436, 0.0); + float4 rgb2v = (float4)(+0.615, -0.51499, -0.10001, 0.0); + float4 yuv; + yuv.x = dot(rgb, rgb2y); + yuv.y = 0.5f + dot(rgb, rgb2u); + yuv.z = 0.5f + dot(rgb, rgb2v); + return yuv; +} + +static inline float4 yuv_to_rgb_bt709(float4 yuv) +{ + float4 yuv2r = (float4)(1.0, 0.0, +1.28033, 0.0); + float4 yuv2g = (float4)(1.0, -0.21482, -0.38059, 0.0); + float4 yuv2b = (float4)(1.0, +2.12798, 0.0, 0.0); + float4 rgb; + yuv -= (float4)(0.0, 0.5, 0.5, 0.0); + rgb.x = dot(yuv, yuv2r); + rgb.y = dot(yuv, yuv2g); + rgb.z = dot(yuv, yuv2b); + return rgb; +} + +static inline float4 rgb_to_yuv_bt709(float4 rgb) +{ + float4 rgb2y = (float4)(+0.2126, +0.7152, +0.0722, 0.0); + float4 rgb2u = (float4)(-0.09991, -0.33609, +0.436, 0.0); + float4 rgb2v = (float4)(+0.615, -0.55861, -0.05639, 0.0); + float4 yuv; + yuv.x = dot(rgb, rgb2y); + yuv.y = 0.5f + dot(rgb, rgb2u); + yuv.z = 0.5f + dot(rgb, rgb2v); + return yuv; +} + +__kernel void overlay_nv12_rgba(__write_only image2d_t dst_y, + __write_only image2d_t dst_uv, + __read_only image2d_t src_y, + __read_only image2d_t src_uv, + __read_only image2d_t overlay_rgba, + int x_position, + int y_position) +{ + const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_FILTER_NEAREST); + int i; + + int2 overlay_size = get_image_dim(overlay_rgba); + + int2 loc_y[4]; + int2 loc_uv = (int2)(get_global_id(0), get_global_id(1)); + int2 loc_overlay = (int2)(x_position, y_position); + float4 in_y[4]; + float4 in_uv; + for (i = 0; i < 4; i++) { + loc_y[i] = 2 * loc_uv + (int2)(i & 1, !!(i & 2)); + in_y[i] = read_imagef(src_y, sampler, loc_y[i]); + } + in_uv = read_imagef(src_uv, sampler, loc_uv); + + if (loc_y[0].x < x_position || + loc_y[0].y < y_position || + loc_y[3].x >= overlay_size.x + x_position || + loc_y[3].y >= overlay_size.y + y_position) { + for (i = 0; i < 4; i++) + write_imagef(dst_y, loc_y[i], in_y[i]); + write_imagef(dst_uv, loc_uv, in_uv); + return; + } + + float4 in_yuv[4]; + float4 uval_rgb[4], oval_rgb[4]; + float4 out_rgb[4], out_yuv[4]; + float4 out_uv = 0.0f; + for (i = 0; i < 4; i++) { + in_yuv[i].x = in_y[i].x; + in_yuv[i].yz = in_uv.xy; + + uval_rgb[i] = yuv_to_rgb_bt709(in_yuv[i]); + oval_rgb[i] = read_imagef(overlay_rgba, sampler, loc_y[i] - loc_overlay); + + out_rgb[i] = uval_rgb[i] * (1.0f - oval_rgb[i].w) + + oval_rgb[i] * oval_rgb[i].w; + out_yuv[i] = rgb_to_yuv_bt709(out_rgb[i]); + + write_imagef(dst_y, loc_y[i], out_yuv[i].x); + out_uv.xy += out_yuv[i].yz; + } + write_imagef(dst_uv, loc_uv, 0.25f * out_uv); +} +); diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/vf_overlay_opencl.c new file mode 100644 index 000000000..7d88cc162 --- /dev/null +++ b/libavfilter/vf_overlay_opencl.c @@ -0,0 +1,314 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "libavutil/buffer.h" +#include "libavutil/common.h" +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_opencl.h" +#include "libavutil/log.h" +#include "libavutil/mathematics.h" +#include "libavutil/mem.h" +#include "libavutil/pixdesc.h" +#include "libavutil/opt.h" + +#include "avfilter.h" +#include "internal.h" +#include "opencl.h" +#include "video.h" + +#include "opencl/overlay.cl" + +typedef struct OverlayOpenCLContext { + OpenCLFilterContext ocf; + + int initialised; + cl_kernel kernel; + cl_command_queue command_queue; + + AVFrame *main; + AVFrame *overlay; + AVFrame *overlay_next; + + int x_position; + int y_position; +} OverlayOpenCLContext; + +static int overlay_opencl_load(AVFilterContext *avctx) +{ + OverlayOpenCLContext *ctx = avctx->priv; + cl_int cle; + int err; + + err = ff_opencl_filter_load_program(avctx, OPENCL_SOURCE_STRING(overlay)); + if (err < 0) + goto fail; + + ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context, + ctx->ocf.hwctx->device_id, + 0, &cle); + if (!ctx->command_queue) { + av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL " + "command queue: %d.\n", cle); + err = AVERROR(EIO); + goto fail; + } + + ctx->kernel = clCreateKernel(ctx->ocf.program, "overlay_nv12_rgba", &cle); + if (!ctx->kernel) { + av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle); + err = AVERROR(EIO); + goto fail; + } + + ctx->initialised = 1; + return 0; + +fail: + if (ctx->command_queue) + clReleaseCommandQueue(ctx->command_queue); + if (ctx->kernel) + clReleaseKernel(ctx->kernel); + return err; +} + +static int overlay_opencl_filter_main(AVFilterLink *inlink, AVFrame *input) +{ + AVFilterContext *avctx = inlink->dst; + OverlayOpenCLContext *ctx = avctx->priv; + + av_log(avctx, AV_LOG_DEBUG, "Filter main: %s, %ux%u (%"PRId64").\n", + av_get_pix_fmt_name(input->format), + input->width, input->height, input->pts); + + av_assert0(!ctx->main); + ctx->main = input; + + return 0; +} + +static int overlay_opencl_filter_overlay(AVFilterLink *inlink, AVFrame *input) +{ + AVFilterContext *avctx = inlink->dst; + OverlayOpenCLContext *ctx = avctx->priv; + + av_log(avctx, AV_LOG_DEBUG, "Filter overlay: %s, %ux%u (%"PRId64").\n", + av_get_pix_fmt_name(input->format), + input->width, input->height, input->pts); + + av_assert0(!ctx->overlay_next); + ctx->overlay_next = input; + + return 0; +} + +static int overlay_opencl_request_frame(AVFilterLink *outlink) +{ + AVFilterContext *avctx = outlink->src; + OverlayOpenCLContext *ctx = avctx->priv; + AVFrame *output; + cl_mem mem; + cl_int cle, x, y; + size_t global_work[2]; + int kernel_arg = 0; + int err; + + av_log(avctx, AV_LOG_DEBUG, "Filter request frame.\n"); + + if (!ctx->initialised) { + err = overlay_opencl_load(avctx); + if (err < 0) + return err; + } + + if (!ctx->main) { + err = ff_request_frame(avctx->inputs[0]); + if (err < 0) + return err; + } + if (!ctx->main) + return AVERROR(EAGAIN); + + if (!ctx->overlay_next) { + err = ff_request_frame(avctx->inputs[1]); + if (err < 0) + return err; + } + + while (!ctx->overlay || + av_compare_ts(ctx->main->pts, + avctx->inputs[0]->time_base, + ctx->overlay_next->pts, + avctx->inputs[1]->time_base) > 0) { + av_frame_free(&ctx->overlay); + ctx->overlay = ctx->overlay_next; + ctx->overlay_next = NULL; + + err = ff_request_frame(avctx->inputs[1]); + if (err < 0) + return err; + } + + output = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!output) { + err = AVERROR(ENOMEM); + goto fail; + } + + mem = (cl_mem)output->data[0]; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); + if (cle != CL_SUCCESS) goto fail_kernel_arg; + mem = (cl_mem)output->data[1]; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); + if (cle != CL_SUCCESS) goto fail_kernel_arg; + mem = (cl_mem)ctx->main->data[0]; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); + if (cle != CL_SUCCESS) goto fail_kernel_arg; + mem = (cl_mem)ctx->main->data[1]; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); + if (cle != CL_SUCCESS) goto fail_kernel_arg; + mem = (cl_mem)ctx->overlay->data[0]; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); + if (cle != CL_SUCCESS) goto fail_kernel_arg; + + x = ctx->x_position; + y = ctx->y_position; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &x); + if (cle != CL_SUCCESS) goto fail_kernel_arg; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &y); + if (cle != CL_SUCCESS) goto fail_kernel_arg; + + global_work[0] = output->width / 2; + global_work[1] = output->height / 2; + cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, + global_work, NULL, 0, NULL, NULL); + if (cle != CL_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to enqueue " + "overlay kernel: %d.\n", cle); + err = AVERROR(EIO); + goto fail; + } + + cle = clFinish(ctx->command_queue); + if (cle != CL_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to finish " + "command queue: %d.\n", cle); + err = AVERROR(EIO); + goto fail; + } + + err = av_frame_copy_props(output, ctx->main); + + av_frame_free(&ctx->main); + //av_frame_free(&ctx->overlay); + + av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n", + av_get_pix_fmt_name(output->format), + output->width, output->height, output->pts); + + return ff_filter_frame(outlink, output); +fail_kernel_arg: + av_log(avctx, AV_LOG_ERROR, "Failed to set kernel arg %d: %d.\n", + kernel_arg, cle); + err = AVERROR(EIO); +fail: + return err; +} + +static av_cold void overlay_opencl_uninit(AVFilterContext *avctx) +{ + OverlayOpenCLContext *ctx = avctx->priv; + cl_int cle; + + av_frame_free(&ctx->main); + av_frame_free(&ctx->overlay); + av_frame_free(&ctx->overlay_next); + + if (ctx->kernel) { + cle = clReleaseKernel(ctx->kernel); + if (cle != CL_SUCCESS) + av_log(avctx, AV_LOG_ERROR, "Failed to release " + "kernel: %d.\n", cle); + } + + if (ctx->command_queue) { + cle = clReleaseCommandQueue(ctx->command_queue); + if (cle != CL_SUCCESS) + av_log(avctx, AV_LOG_ERROR, "Failed to release " + "command queue: %d.\n", cle); + } + + ff_opencl_filter_uninit(avctx); +} + +#define OFFSET(x) offsetof(OverlayOpenCLContext, x) +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption overlay_opencl_options[] = { + { "x", "Overlay x position", + OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, + { "y", "Overlay y position", + OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, + { NULL }, +}; + +static const AVClass overlay_opencl_class = { + .class_name = "overlay_opencl", + .item_name = av_default_item_name, + .option = overlay_opencl_options, + .version = LIBAVUTIL_VERSION_INT, +}; + +static const AVFilterPad overlay_opencl_inputs[] = { + { + .name = "main", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_opencl_filter_config_input, + .filter_frame = &overlay_opencl_filter_main, + }, + { + .name = "overlay", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_opencl_filter_config_input, + .filter_frame = &overlay_opencl_filter_overlay, + .needs_fifo = 1, + }, + { NULL } +}; + +static const AVFilterPad overlay_opencl_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_opencl_filter_config_output, + .request_frame = &overlay_opencl_request_frame, + }, + { NULL } +}; + +AVFilter ff_vf_overlay_opencl = { + .name = "overlay_opencl", + .description = NULL_IF_CONFIG_SMALL("Overlay one video on top of another"), + .priv_size = sizeof(OverlayOpenCLContext), + .priv_class = &overlay_opencl_class, + .init = &ff_opencl_filter_init, + .uninit = &overlay_opencl_uninit, + .query_formats = &ff_opencl_filter_query_formats, + .inputs = overlay_opencl_inputs, + .outputs = overlay_opencl_outputs, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.11.0 _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel