On 09/07/18 03:26, Danil Iashchenko wrote:
> lavfi: add vflip_opencl, hflip_opencl.
> Behaves like existing vflip, hflip filters.
> ---
>  configure                     |   2 +
>  libavfilter/Makefile          |   4 +
>  libavfilter/allfilters.c      |   2 +
>  libavfilter/opencl/vflip.cl   |  60 ++++++++++
>  libavfilter/opencl_source.h   |   1 +
>  libavfilter/vf_vflip_opencl.c | 270 
> ++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 339 insertions(+)
>  create mode 100644 libavfilter/opencl/vflip.cl
>  create mode 100644 libavfilter/vf_vflip_opencl.c

These two filters feel a bit too trivial to make new files for?  Currently they 
can be implemented with program_opencl and a handful of lines of code:

hflip.cl:
"""
__kernel void hflip(__write_only image2d_t dst,
                    unsigned int index,
                    __read_only  image2d_t src)
{
    const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE;
    int2 dst_loc = (int2)(get_global_id(0), get_global_id(1));
    int2 src_loc = (int2)(get_image_dim(dst).x - 1 - dst_loc.x, dst_loc.y);
    write_imagef(dst, dst_loc, read_imagef(src, sampler, src_loc));
}
"""
+
-vf ...,program_opencl=source=hflip.cl:kernel=hflip,...

and equivalently for vflip.

> ...
> diff --git a/libavfilter/opencl/vflip.cl b/libavfilter/opencl/vflip.cl
> new file mode 100644
> index 0000000..4ed2f43
> --- /dev/null
> +++ b/libavfilter/opencl/vflip.cl
> @@ -0,0 +1,60 @@
> ...
> +
> +void swap_pix(__write_only image2d_t dst,
> +              __read_only  image2d_t src,
> +              int2 loc,
> +              int2 loc1) {
> +
> +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
> +                               CLK_ADDRESS_CLAMP_TO_EDGE   |
> +                               CLK_FILTER_NEAREST);
> +
> +    float4 px  = read_imagef(src, sampler, loc );
> +    float4 px1 = read_imagef(src, sampler, loc1);
> +
> +    write_imagef(dst, loc,  px1);
> +    write_imagef(dst, loc1, px );
> +}

Is this swap approach better than just writing the one pixel as above?  
Intuitively it feels slightly worse to me - every workitem ends up touching two 
completely different places in the input and output, which feels bad for 
optimisation/caching.

> +__kernel void vflip_global(__write_only image2d_t dst,
> +                           __read_only  image2d_t src)
> +{
> +
> +    int2 imgSize = get_image_dim(src);
> +    int2 loc  = (int2)(get_global_id(0), get_global_id(1));
> +    int2 loc1 = (int2)(loc.x, imgSize.y - loc.y - 1);
> +
> +    swap_pix(dst, src, loc, loc1);
> +}
> +
> +
> +__kernel void hflip_global(__write_only image2d_t dst,
> +                           __read_only  image2d_t src)
> +{
> +
> +    int2 imgSize = get_image_dim(src);
> +    int2 loc  = (int2)(get_global_id(0), get_global_id(1));
> +    int2 loc1 = (int2)(imgSize.x - loc.x - 1, loc.y);
> +
> +    swap_pix(dst, src, loc, loc1);
> +}
> ...

- Mark
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to