from:"Ruiling Song"

[FFmpeg-devel] [PATCH] swscale/swscale: remove useless code

2020-04-01 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 libswscale/swscale.c  | 16 +---
 libswscale/swscale_internal.h |  5 +
 libswscale/x86/swscale.c  |  3 +--
 3 files changed, 3 insertions(+), 21 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 8436f056d4..001cfbf15b 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -266,8 +266,6 @@ static int swscale(SwsContext *c, const uint8_t *src[],
 
 /* vars which will change and which we need to store back in the context */
 int dstY = c->dstY;
-int lumBufIndex  = c->lumBufIndex;
-int chrBufIndex  = c->chrBufIndex;
 int lastInLumBuf = c->lastInLumBuf;
 int lastInChrBuf = c->lastInChrBuf;
 
@@ -336,8 +334,6 @@ static int swscale(SwsContext *c, const uint8_t *src[],
  * will not get executed. This is not really intended but works
  * currently, so people might do it. */
 if (srcSliceY == 0) {
-lumBufIndex  = -1;
-chrBufIndex  = -1;
 dstY = 0;
 lastInLumBuf = -1;
 lastInChrBuf = -1;
@@ -461,7 +457,6 @@ static int swscale(SwsContext *c, const uint8_t *src[],
 desc[i].process(c, &desc[i], firstPosY, lastPosY - firstPosY + 
1);
 }
 
-lumBufIndex += lastLumSrcY - lastInLumBuf;
 lastInLumBuf = lastLumSrcY;
 
 if (cPosY < lastChrSrcY + 1) {
@@ -469,20 +464,13 @@ static int swscale(SwsContext *c, const uint8_t *src[],
 desc[i].process(c, &desc[i], firstCPosY, lastCPosY - 
firstCPosY + 1);
 }
 
-chrBufIndex += lastChrSrcY - lastInChrBuf;
 lastInChrBuf = lastChrSrcY;
 
-// wrap buf index around to stay inside the ring buffer
-if (lumBufIndex >= vLumFilterSize)
-lumBufIndex -= vLumFilterSize;
-if (chrBufIndex >= vChrFilterSize)
-chrBufIndex -= vChrFilterSize;
 if (!enough_lines)
 break;  // we can't output a dstY line so let's try with the next 
slice
 
 #if HAVE_MMX_INLINE
-ff_updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex,
-  lastInLumBuf, lastInChrBuf);
+ff_updateMMXDitherTables(c, dstY);
 #endif
 if (should_dither) {
 c->chrDither8 = ff_dither_8x8_128[chrDstY & 7];
@@ -524,8 +512,6 @@ static int swscale(SwsContext *c, const uint8_t *src[],
 
 /* store changed local vars back in the context */
 c->dstY = dstY;
-c->lumBufIndex  = lumBufIndex;
-c->chrBufIndex  = chrBufIndex;
 c->lastInLumBuf = lastInLumBuf;
 c->lastInChrBuf = lastInChrBuf;
 
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index a59d12745a..9dda53eead 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -350,8 +350,6 @@ typedef struct SwsContext {
 //@{
 int lastInLumBuf; ///< Last scaled horizontal luma/alpha line 
from source in the ring buffer.
 int lastInChrBuf; ///< Last scaled horizontal chroma line 
from source in the ring buffer.
-int lumBufIndex;  ///< Index in ring buffer of the last scaled 
horizontal luma/alpha line from source.
-int chrBufIndex;  ///< Index in ring buffer of the last scaled 
horizontal chroma line from source.
 //@}
 
 uint8_t *formatConvBuffer;
@@ -635,8 +633,7 @@ int ff_yuv2rgb_c_init_tables(SwsContext *c, const int 
inv_table[4],
 void ff_yuv2rgb_init_tables_ppc(SwsContext *c, const int inv_table[4],
 int brightness, int contrast, int saturation);
 
-void ff_updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int 
chrBufIndex,
-   int lastInLumBuf, int lastInChrBuf);
+void ff_updateMMXDitherTables(SwsContext *c, int dstY);
 
 av_cold void ff_sws_init_range_convert(SwsContext *c);
 
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index e9d474a1e8..61110839ee 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -79,8 +79,7 @@ DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w)= 
0x0001000100010001ULL;
 #include "swscale_template.c"
 #endif
 
-void ff_updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int 
chrBufIndex,
-   int lastInLumBuf, int lastInChrBuf)
+void ff_updateMMXDitherTables(SwsContext *c, int dstY)
 {
 const int dstH= c->dstH;
 const int flags= c->flags;
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V4] avfilter: Add tonemap vaapi filter for H2S

2019-12-11 Thread Ruiling Song

From: Xinpeng Sun 

It performs HDR(High Dynamic Range) to SDR(Standard Dynamic Range) conversion
with tone-mapping. It only supports HDR10 as input temporarily.

An example command to use this filter with vaapi codecs:
FFMPEG -hwaccel vaapi -vaapi_device /dev/dri/renderD128 -hwaccel_output_format 
vaapi \
-i INPUT -vf 'tonemap_vaapi=format=p010' -c:v hevc_vaapi -profile 2 OUTPUT

Signed-off-by: Xinpeng Sun 
Signed-off-by: Zachary Zhou 
Signed-off-by: Ruiling Song 
---
When I re-think about the document part. I find it is not necessary to repeat
how to set up vaapi device in this filter part. There is already good 
explanation
of it(https://trac.ffmpeg.org/wiki/Hardware/VAAPI), so I add a link to it.
I only make code changes requested by Vittoria and me. So if no further comment,
I am going to apply the patch next week. Thanks!

Ruiling

 configure  |   2 +
 doc/filters.texi   |  59 +
 libavfilter/Makefile   |   1 +
 libavfilter/allfilters.c   |   1 +
 libavfilter/vf_tonemap_vaapi.c | 419 +
 5 files changed, 482 insertions(+)
 create mode 100644 libavfilter/vf_tonemap_vaapi.c

diff --git a/configure b/configure
index 42e7df3941..74f2d38317 100755
--- a/configure
+++ b/configure
@@ -3576,6 +3576,7 @@ tinterlace_filter_deps="gpl"
 tinterlace_merge_test_deps="tinterlace_filter"
 tinterlace_pad_test_deps="tinterlace_filter"
 tonemap_filter_deps="const_nan"
+tonemap_vaapi_filter_deps="vaapi 
VAProcPipelineParameterBuffer_output_hdr_metadata"
 tonemap_opencl_filter_deps="opencl const_nan"
 transpose_opencl_filter_deps="opencl"
 transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags"
@@ -6577,6 +6578,7 @@ if enabled vaapi; then
 
 check_type "va/va.h va/va_dec_hevc.h" "VAPictureParameterBufferHEVC"
 check_struct "va/va.h" "VADecPictureParameterBufferVP9" bit_depth
+check_struct "va/va.h va/va_vpp.h" "VAProcPipelineParameterBuffer" 
output_hdr_metadata
 check_struct "va/va.h va/va_vpp.h" "VAProcPipelineCaps" rotation_flags
 check_type "va/va.h va/va_enc_hevc.h" "VAEncPictureParameterBufferHEVC"
 check_type "va/va.h va/va_enc_jpeg.h" "VAEncPictureParameterBufferJPEG"
diff --git a/doc/filters.texi b/doc/filters.texi
index 99da266cec..1d934b84f3 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -21034,6 +21034,65 @@ Apply a strong blur of both luma and chroma parameters:
 
 @c man end OPENCL VIDEO FILTERS
 
+@chapter VAAPI Video Filters
+@c man begin VAAPI VIDEO FILTERS
+
+VAAPI Video filters are usually used with VAAPI decoder and VAAPI encoder. 
Below is a description of VAAPI video filters.
+
+To enable compilation of these filters you need to configure FFmpeg with
+@code{--enable-vaapi}.
+
+To use vaapi filters, you need to setup the vaapi device correctly. For more 
information, please read @url{https://trac.ffmpeg.org/wiki/Hardware/VAAPI}
+
+@section tonemap_vappi
+
+Perform HDR(High Dynamic Range) to SDR(Standard Dynamic Range) conversion with 
tone-mapping.
+It maps the dynamic range of HDR10 content to the SDR content.
+It currently only accepts HDR10 as input.
+
+It accepts the following parameters:
+
+@table @option
+@item format
+Specify the output pixel format.
+
+Currently supported formats are:
+@table @var
+@item p010
+@item nv12
+@end table
+
+Default is nv12.
+
+@item primaries, p
+Set the output color primaries.
+
+Default is same as input.
+
+@item transfer, t
+Set the output transfer characteristics.
+
+Default is bt709.
+
+@item matrix, m
+Set the output colorspace matrix.
+
+Default is same as input.
+
+@end table
+
+@subsection Example
+
+@itemize
+@item
+Convert HDR(HDR10) video to bt2020-transfer-characteristic p010 format
+@example
+tonemap_vaapi=format=p010:t=bt2020-10
+@end example
+@end itemize
+
+@c man end VAAPI VIDEO FILTERS
+
 @chapter Video Sources
 @c man begin VIDEO SOURCES
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 446c802b98..37d4eee858 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -411,6 +411,7 @@ OBJS-$(CONFIG_TMIX_FILTER)   += vf_mix.o 
framesync.o
 OBJS-$(CONFIG_TONEMAP_FILTER)+= vf_tonemap.o colorspace.o
 OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o 
colorspace.o opencl.o \
 opencl/tonemap.o 
opencl/colorspace_common.o
+OBJS-$(CONFIG_TONEMAP_VAAPI_FILTER)  += vf_tonemap_vaapi.o vaapi_vpp.o
 OBJS-$(CONFIG_TPAD_FILTER)   += vf_tpad.o
 OBJS-$(CONFIG_TRANSPOSE_FILTER)  += vf_transpose.o
 OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER)  += vf_transpose_npp.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 69953832da..c295f8e403 100644
--- a/libavfilter

[FFmpeg-devel] [PATCH V2] avfilter/vf_convolution: add x86 SIMD for filter_3x3()

2019-07-31 Thread Ruiling Song

Tested using a simple command (apply edge enhance):
./ffmpeg_g -i ~/Downloads/bbb_sunflower_1080p_30fps_normal.mp4 \
 -vf convolution="0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:0 0 
0 -1 1 0 0 0 0:5:1:1:1:0:128:128:128" \
 -an -vframes 1000 -f null /dev/null

The fps increase from 151 to 270 on my local machine.

Signed-off-by: Ruiling Song 
---
v2:
  fix a bug in scalar code path.
  Use macro PROCESS_V/S for the first tap to simplify code.

 libavfilter/convolution.h |  64 +++
 libavfilter/vf_convolution.c  |  41 +--
 libavfilter/x86/Makefile  |   2 +
 libavfilter/x86/vf_convolution.asm| 156 ++
 libavfilter/x86/vf_convolution_init.c |  46 
 5 files changed, 271 insertions(+), 38 deletions(-)
 create mode 100644 libavfilter/convolution.h
 create mode 100644 libavfilter/x86/vf_convolution.asm
 create mode 100644 libavfilter/x86/vf_convolution_init.c

diff --git a/libavfilter/convolution.h b/libavfilter/convolution.h
new file mode 100644
index 00..fc6aad58fd
--- /dev/null
+++ b/libavfilter/convolution.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2012-2013 Oka Motofumi (chikuzen.mo at gmail dot com)
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVFILTER_CONVOLUTION_H
+#define AVFILTER_CONVOLUTION_H
+#include "avfilter.h"
+
+enum MatrixMode {
+MATRIX_SQUARE,
+MATRIX_ROW,
+MATRIX_COLUMN,
+MATRIX_NBMODES,
+};
+
+typedef struct ConvolutionContext {
+const AVClass *class;
+
+char *matrix_str[4];
+float rdiv[4];
+float bias[4];
+int mode[4];
+float scale;
+float delta;
+int planes;
+
+int size[4];
+int depth;
+int max;
+int bpc;
+int nb_planes;
+int nb_threads;
+int planewidth[4];
+int planeheight[4];
+int matrix[4][49];
+int matrix_length[4];
+int copy[4];
+
+void (*setup[4])(int radius, const uint8_t *c[], const uint8_t *src, int 
stride,
+ int x, int width, int y, int height, int bpc);
+void (*filter[4])(uint8_t *dst, int width,
+  float rdiv, float bias, const int *const matrix,
+  const uint8_t *c[], int peak, int radius,
+  int dstride, int stride);
+} ConvolutionContext;
+
+void ff_convolution_init_x86(ConvolutionContext *s);
+#endif
diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c
index 1305569c88..e3bf1df79f 100644
--- a/libavfilter/vf_convolution.c
+++ b/libavfilter/vf_convolution.c
@@ -25,48 +25,11 @@
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "avfilter.h"
+#include "convolution.h"
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
 
-enum MatrixMode {
-MATRIX_SQUARE,
-MATRIX_ROW,
-MATRIX_COLUMN,
-MATRIX_NBMODES,
-};
-
-typedef struct ConvolutionContext {
-const AVClass *class;
-
-char *matrix_str[4];
-float rdiv[4];
-float bias[4];
-int mode[4];
-float scale;
-float delta;
-int planes;
-
-int size[4];
-int depth;
-int max;
-int bpc;
-int nb_planes;
-int nb_threads;
-int planewidth[4];
-int planeheight[4];
-int matrix[4][49];
-int matrix_length[4];
-int copy[4];
-
-void (*setup[4])(int radius, const uint8_t *c[], const uint8_t *src, int 
stride,
- int x, int width, int y, int height, int bpc);
-void (*filter[4])(uint8_t *dst, int width,
-  float rdiv, float bias, const int *const matrix,
-  const uint8_t *c[], int peak, int radius,
-  int dstride, int stride);
-} ConvolutionContext;
-
 #define OFFSET(x) offsetof(ConvolutionContext, x)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 
@@ -625,6 +588,8 @@ static int config_input(AVFilterLink *inlink)
 s->filter[p] = filter16_7x7;
 }
 }
+if (ARCH_X86_64)
+ff_convolution_init_x86(s);
 } else if (!strcmp(ctx->filter->name, "prewitt")) {
 if (s->depth > 8)
 f

[FFmpeg-devel] [PATCH] avfilter/vf_convolution: add x86 SIMD for filter_3x3()

2019-07-08 Thread Ruiling Song

Tested using a simple command (apply edge enhance):
./ffmpeg_g -i ~/Downloads/bbb_sunflower_1080p_30fps_normal.mp4 \
 -vf convolution="0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:0 0 
0 -1 1 0 0 0 0:5:1:1:1:0:128:128:128" \
 -an -vframes 1000 -f null /dev/null

The fps increase from 151 to 270 on my local machine.

Signed-off-by: Ruiling Song 
---
 libavfilter/convolution.h |  64 +++
 libavfilter/vf_convolution.c  |  41 +--
 libavfilter/x86/Makefile  |   2 +
 libavfilter/x86/vf_convolution.asm| 158 ++
 libavfilter/x86/vf_convolution_init.c |  46 
 5 files changed, 273 insertions(+), 38 deletions(-)
 create mode 100644 libavfilter/convolution.h
 create mode 100644 libavfilter/x86/vf_convolution.asm
 create mode 100644 libavfilter/x86/vf_convolution_init.c

diff --git a/libavfilter/convolution.h b/libavfilter/convolution.h
new file mode 100644
index 00..fc6aad58fd
--- /dev/null
+++ b/libavfilter/convolution.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2012-2013 Oka Motofumi (chikuzen.mo at gmail dot com)
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVFILTER_CONVOLUTION_H
+#define AVFILTER_CONVOLUTION_H
+#include "avfilter.h"
+
+enum MatrixMode {
+MATRIX_SQUARE,
+MATRIX_ROW,
+MATRIX_COLUMN,
+MATRIX_NBMODES,
+};
+
+typedef struct ConvolutionContext {
+const AVClass *class;
+
+char *matrix_str[4];
+float rdiv[4];
+float bias[4];
+int mode[4];
+float scale;
+float delta;
+int planes;
+
+int size[4];
+int depth;
+int max;
+int bpc;
+int nb_planes;
+int nb_threads;
+int planewidth[4];
+int planeheight[4];
+int matrix[4][49];
+int matrix_length[4];
+int copy[4];
+
+void (*setup[4])(int radius, const uint8_t *c[], const uint8_t *src, int 
stride,
+ int x, int width, int y, int height, int bpc);
+void (*filter[4])(uint8_t *dst, int width,
+  float rdiv, float bias, const int *const matrix,
+  const uint8_t *c[], int peak, int radius,
+  int dstride, int stride);
+} ConvolutionContext;
+
+void ff_convolution_init_x86(ConvolutionContext *s);
+#endif
diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c
index 1305569c88..e3bf1df79f 100644
--- a/libavfilter/vf_convolution.c
+++ b/libavfilter/vf_convolution.c
@@ -25,48 +25,11 @@
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "avfilter.h"
+#include "convolution.h"
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
 
-enum MatrixMode {
-MATRIX_SQUARE,
-MATRIX_ROW,
-MATRIX_COLUMN,
-MATRIX_NBMODES,
-};
-
-typedef struct ConvolutionContext {
-const AVClass *class;
-
-char *matrix_str[4];
-float rdiv[4];
-float bias[4];
-int mode[4];
-float scale;
-float delta;
-int planes;
-
-int size[4];
-int depth;
-int max;
-int bpc;
-int nb_planes;
-int nb_threads;
-int planewidth[4];
-int planeheight[4];
-int matrix[4][49];
-int matrix_length[4];
-int copy[4];
-
-void (*setup[4])(int radius, const uint8_t *c[], const uint8_t *src, int 
stride,
- int x, int width, int y, int height, int bpc);
-void (*filter[4])(uint8_t *dst, int width,
-  float rdiv, float bias, const int *const matrix,
-  const uint8_t *c[], int peak, int radius,
-  int dstride, int stride);
-} ConvolutionContext;
-
 #define OFFSET(x) offsetof(ConvolutionContext, x)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 
@@ -625,6 +588,8 @@ static int config_input(AVFilterLink *inlink)
 s->filter[p] = filter16_7x7;
 }
 }
+if (ARCH_X86_64)
+ff_convolution_init_x86(s);
 } else if (!strcmp(ctx->filter->name, "prewitt")) {
 if (s->depth > 8)
 for (p = 0; p < s->nb_planes; p++)
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefi

[FFmpeg-devel] [PATCH V3 1/2] avfilter/vf_gblur: add x86 SIMD optimizations

2019-06-05 Thread Ruiling Song

The horizontal pass get ~2x performance with the patch
under single thread.

Tested overall performance using the command(avx2 enabled):
./ffmpeg -i 1080p.mp4 -vf gblur -f null /dev/null
./ffmpeg -i 1080p.mp4 -vf gblur=threads=1 -f null /dev/null
For single thread, the fps improves from 43 to 60, about 40%.
For multi-thread, the fps improves from 110 to 130, about 20%.

v2:
Fix the bug when steps is not one.
v3:
Fix the bug when the upper half of 64bit register for 'int'
argument passing may have garbage.

Signed-off-by: Ruiling Song 
---
 libavfilter/gblur.h |  55 ++
 libavfilter/vf_gblur.c  |  71 ++--
 libavfilter/x86/Makefile|   2 +
 libavfilter/x86/vf_gblur.asm| 185 
 libavfilter/x86/vf_gblur_init.c |  36 +++
 5 files changed, 310 insertions(+), 39 deletions(-)
 create mode 100644 libavfilter/gblur.h
 create mode 100644 libavfilter/x86/vf_gblur.asm
 create mode 100644 libavfilter/x86/vf_gblur_init.c

diff --git a/libavfilter/gblur.h b/libavfilter/gblur.h
new file mode 100644
index 00..87129801de
--- /dev/null
+++ b/libavfilter/gblur.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2011 Pascal Getreuer
+ * Copyright (c) 2016 Paul B Mahol
+ *
+ * Redistribution and use in source and binary forms, with or without 
modification,
+ * are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer in the documentation and/or other materials provided
+ *with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef AVFILTER_GBLUR_H
+#define AVFILTER_GBLUR_H
+#include "avfilter.h"
+
+typedef struct GBlurContext {
+const AVClass *class;
+
+float sigma;
+float sigmaV;
+int steps;
+int planes;
+
+int depth;
+int planewidth[4];
+int planeheight[4];
+float *buffer;
+float boundaryscale;
+float boundaryscaleV;
+float postscale;
+float postscaleV;
+float nu;
+float nuV;
+int nb_planes;
+void (*horiz_slice)(float *buffer, int width, int height, int steps, float 
nu, float bscale);
+} GBlurContext;
+void ff_gblur_init(GBlurContext *s);
+void ff_gblur_init_x86(GBlurContext *s);
+#endif
diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c
index b91a8c074a..e71b33da80 100644
--- a/libavfilter/vf_gblur.c
+++ b/libavfilter/vf_gblur.c
@@ -30,30 +30,10 @@
 #include "libavutil/pixdesc.h"
 #include "avfilter.h"
 #include "formats.h"
+#include "gblur.h"
 #include "internal.h"
 #include "video.h"
 
-typedef struct GBlurContext {
-const AVClass *class;
-
-float sigma;
-float sigmaV;
-int steps;
-int planes;
-
-int depth;
-int planewidth[4];
-int planeheight[4];
-float *buffer;
-float boundaryscale;
-float boundaryscaleV;
-float postscale;
-float postscaleV;
-float nu;
-float nuV;
-int nb_planes;
-} GBlurContext;
-
 #define OFFSET(x) offsetof(GBlurContext, x)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 
@@ -72,39 +52,44 @@ typedef struct ThreadData {
 int width;
 } ThreadData;
 
-static int filter_horizontally(AVFilterContext *ctx, void *arg, int jobnr, int 
nb_jobs)
+static void horiz_slice_c(float *buffer, int width, int height, int steps,
+  float nu, float bscale)
 {
-GBlurContext *s = ctx->priv;
-ThreadData *td = arg;
-const int height = td->height;
-const int width = td->width;
-const int slice_start = (height *  jobnr   ) / nb_jobs;
-const int slice_end   = (height * (jobnr+1)) / nb_jobs;
-const float boundaryscale = s->boundaryscale;
-const int steps = s->steps;
-const float nu = s->nu;
-float *buffer = s->buffer;
-int y, x, step;
+int step, x, y;
 float *ptr;
-
-/* Filter horizontally along each row */
-for (y = slice_start; y < slice_end; y+

[FFmpeg-devel] [PATCH V3 2/2] checkasm/vf_gblur: add test for horiz_slice simd

2019-06-05 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 tests/checkasm/Makefile   |  1 +
 tests/checkasm/checkasm.c |  3 ++
 tests/checkasm/checkasm.h |  1 +
 tests/checkasm/vf_gblur.c | 67 +++
 tests/fate/checkasm.mak   |  1 +
 5 files changed, 73 insertions(+)
 create mode 100644 tests/checkasm/vf_gblur.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 886ae33167..f5780eedb2 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -35,6 +35,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC)  += $(AVCODECOBJS-yes)
 AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
 AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
 AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
+AVFILTEROBJS-$(CONFIG_GBLUR_FILTER)  += vf_gblur.o
 AVFILTEROBJS-$(CONFIG_HFLIP_FILTER)  += vf_hflip.o
 AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER)  += vf_threshold.o
 AVFILTEROBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index bf51e00eab..3e2ec377be 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -162,6 +162,9 @@ static const struct {
 #if CONFIG_COLORSPACE_FILTER
 { "vf_colorspace", checkasm_check_colorspace },
 #endif
+#if CONFIG_GBLUR_FILTER
+{ "vf_gblur", checkasm_check_vf_gblur },
+#endif
 #if CONFIG_HFLIP_FILTER
 { "vf_hflip", checkasm_check_vf_hflip },
 #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 9b8d2f5419..aed15b5fa4 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -71,6 +71,7 @@ void checkasm_check_sw_rgb(void);
 void checkasm_check_utvideodsp(void);
 void checkasm_check_v210dec(void);
 void checkasm_check_v210enc(void);
+void checkasm_check_vf_gblur(void);
 void checkasm_check_vf_hflip(void);
 void checkasm_check_vf_threshold(void);
 void checkasm_check_vp8dsp(void);
diff --git a/tests/checkasm/vf_gblur.c b/tests/checkasm/vf_gblur.c
new file mode 100644
index 00..582bc7cc0f
--- /dev/null
+++ b/tests/checkasm/vf_gblur.c
@@ -0,0 +1,67 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+#include "checkasm.h"
+#include "libavfilter/gblur.h"
+
+#define WIDTH 256
+#define HEIGHT 256
+#define PIXELS (WIDTH * HEIGHT)
+#define BUF_SIZE (PIXELS * 4)
+
+#define randomize_buffers(buf, size) \
+do { \
+int j;   \
+float *tmp_buf = (float *)buf;   \
+for (j = 0; j < size; j++)   \
+tmp_buf[j] = (float)(rnd() & 0xFF); \
+} while (0)
+
+void checkasm_check_vf_gblur(void)
+{
+float *dst_ref = av_malloc(BUF_SIZE);
+float *dst_new = av_malloc(BUF_SIZE);
+int i, j;
+int w = WIDTH;
+int h = HEIGHT;
+int steps = 2;
+float nu = 0.101f;
+float bscale = 1.112f;
+GBlurContext s;
+
+declare_func(void, float *dst, int w, int h, int steps, float nu, float 
bscale);
+
+randomize_buffers(dst_ref, PIXELS);
+memcpy(dst_new, dst_ref, BUF_SIZE);
+
+ff_gblur_init(&s);
+
+if (check_func(s.horiz_slice, "horiz_slice")) {
+call_ref(dst_ref, w, h, steps, nu, bscale);
+call_new(dst_new, w, h, steps, nu, bscale);
+
+if (!float_near_abs_eps_array(dst_ref, dst_new, 0.01f, PIXELS)) {
+fail();
+}
+bench_new(dst_new, w, h, 1, nu, bscale);
+}
+report("horiz_slice");
+av_freep(&dst_ref);
+av_freep(&dst_new);
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index c453273cd0..618bde509f 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -27,6 +27,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp
  \
 fate-checkasm-v210enc   \
 fate-checkasm-vf_blend  \
 fate-checkasm-vf_colorspace \
+fate-checkasm-vf_gblur  \
 fate-checkasm-vf_hflip  \
 fate-che

[FFmpeg-devel] [PATCH V2 2/2] checkasm/vf_gblur: add test for horiz_slice simd

2019-06-04 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 tests/checkasm/Makefile   |  1 +
 tests/checkasm/checkasm.c |  3 ++
 tests/checkasm/checkasm.h |  1 +
 tests/checkasm/vf_gblur.c | 67 +++
 tests/fate/checkasm.mak   |  1 +
 5 files changed, 73 insertions(+)
 create mode 100644 tests/checkasm/vf_gblur.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 886ae33167..f5780eedb2 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -35,6 +35,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC)  += $(AVCODECOBJS-yes)
 AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
 AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
 AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
+AVFILTEROBJS-$(CONFIG_GBLUR_FILTER)  += vf_gblur.o
 AVFILTEROBJS-$(CONFIG_HFLIP_FILTER)  += vf_hflip.o
 AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER)  += vf_threshold.o
 AVFILTEROBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index bf51e00eab..3e2ec377be 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -162,6 +162,9 @@ static const struct {
 #if CONFIG_COLORSPACE_FILTER
 { "vf_colorspace", checkasm_check_colorspace },
 #endif
+#if CONFIG_GBLUR_FILTER
+{ "vf_gblur", checkasm_check_vf_gblur },
+#endif
 #if CONFIG_HFLIP_FILTER
 { "vf_hflip", checkasm_check_vf_hflip },
 #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 9b8d2f5419..aed15b5fa4 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -71,6 +71,7 @@ void checkasm_check_sw_rgb(void);
 void checkasm_check_utvideodsp(void);
 void checkasm_check_v210dec(void);
 void checkasm_check_v210enc(void);
+void checkasm_check_vf_gblur(void);
 void checkasm_check_vf_hflip(void);
 void checkasm_check_vf_threshold(void);
 void checkasm_check_vp8dsp(void);
diff --git a/tests/checkasm/vf_gblur.c b/tests/checkasm/vf_gblur.c
new file mode 100644
index 00..582bc7cc0f
--- /dev/null
+++ b/tests/checkasm/vf_gblur.c
@@ -0,0 +1,67 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+#include "checkasm.h"
+#include "libavfilter/gblur.h"
+
+#define WIDTH 256
+#define HEIGHT 256
+#define PIXELS (WIDTH * HEIGHT)
+#define BUF_SIZE (PIXELS * 4)
+
+#define randomize_buffers(buf, size) \
+do { \
+int j;   \
+float *tmp_buf = (float *)buf;   \
+for (j = 0; j < size; j++)   \
+tmp_buf[j] = (float)(rnd() & 0xFF); \
+} while (0)
+
+void checkasm_check_vf_gblur(void)
+{
+float *dst_ref = av_malloc(BUF_SIZE);
+float *dst_new = av_malloc(BUF_SIZE);
+int i, j;
+int w = WIDTH;
+int h = HEIGHT;
+int steps = 2;
+float nu = 0.101f;
+float bscale = 1.112f;
+GBlurContext s;
+
+declare_func(void, float *dst, int w, int h, int steps, float nu, float 
bscale);
+
+randomize_buffers(dst_ref, PIXELS);
+memcpy(dst_new, dst_ref, BUF_SIZE);
+
+ff_gblur_init(&s);
+
+if (check_func(s.horiz_slice, "horiz_slice")) {
+call_ref(dst_ref, w, h, steps, nu, bscale);
+call_new(dst_new, w, h, steps, nu, bscale);
+
+if (!float_near_abs_eps_array(dst_ref, dst_new, 0.01f, PIXELS)) {
+fail();
+}
+bench_new(dst_new, w, h, 1, nu, bscale);
+}
+report("horiz_slice");
+av_freep(&dst_ref);
+av_freep(&dst_new);
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index c453273cd0..618bde509f 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -27,6 +27,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp
  \
 fate-checkasm-v210enc   \
 fate-checkasm-vf_blend  \
 fate-checkasm-vf_colorspace \
+fate-checkasm-vf_gblur  \
 fate-checkasm-vf_hflip  \
 fate-che

[FFmpeg-devel] [PATCH V2 1/2] avfilter/vf_gblur: add x86 SIMD optimizations

2019-06-04 Thread Ruiling Song

The horizontal pass get ~2x performance with the patch
under single thread.

Tested overall performance using the command(avx2 enabled):
./ffmpeg -i 1080p.mp4 -vf gblur -f null /dev/null
./ffmpeg -i 1080p.mp4 -vf gblur=threads=1 -f null /dev/null
For single thread, the fps improves from 43 to 60, about 40%.
For multi-thread, the fps improves from 110 to 130, about 20%.

v2:
Fix the bug when steps is not one.

Signed-off-by: Ruiling Song 
---
 libavfilter/gblur.h |  55 ++
 libavfilter/vf_gblur.c  |  71 ++---
 libavfilter/x86/Makefile|   2 +
 libavfilter/x86/vf_gblur.asm| 183 
 libavfilter/x86/vf_gblur_init.c |  36 +++
 5 files changed, 308 insertions(+), 39 deletions(-)
 create mode 100644 libavfilter/gblur.h
 create mode 100644 libavfilter/x86/vf_gblur.asm
 create mode 100644 libavfilter/x86/vf_gblur_init.c

diff --git a/libavfilter/gblur.h b/libavfilter/gblur.h
new file mode 100644
index 00..87129801de
--- /dev/null
+++ b/libavfilter/gblur.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2011 Pascal Getreuer
+ * Copyright (c) 2016 Paul B Mahol
+ *
+ * Redistribution and use in source and binary forms, with or without 
modification,
+ * are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer in the documentation and/or other materials provided
+ *with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef AVFILTER_GBLUR_H
+#define AVFILTER_GBLUR_H
+#include "avfilter.h"
+
+typedef struct GBlurContext {
+const AVClass *class;
+
+float sigma;
+float sigmaV;
+int steps;
+int planes;
+
+int depth;
+int planewidth[4];
+int planeheight[4];
+float *buffer;
+float boundaryscale;
+float boundaryscaleV;
+float postscale;
+float postscaleV;
+float nu;
+float nuV;
+int nb_planes;
+void (*horiz_slice)(float *buffer, int width, int height, int steps, float 
nu, float bscale);
+} GBlurContext;
+void ff_gblur_init(GBlurContext *s);
+void ff_gblur_init_x86(GBlurContext *s);
+#endif
diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c
index b91a8c074a..e71b33da80 100644
--- a/libavfilter/vf_gblur.c
+++ b/libavfilter/vf_gblur.c
@@ -30,30 +30,10 @@
 #include "libavutil/pixdesc.h"
 #include "avfilter.h"
 #include "formats.h"
+#include "gblur.h"
 #include "internal.h"
 #include "video.h"
 
-typedef struct GBlurContext {
-const AVClass *class;
-
-float sigma;
-float sigmaV;
-int steps;
-int planes;
-
-int depth;
-int planewidth[4];
-int planeheight[4];
-float *buffer;
-float boundaryscale;
-float boundaryscaleV;
-float postscale;
-float postscaleV;
-float nu;
-float nuV;
-int nb_planes;
-} GBlurContext;
-
 #define OFFSET(x) offsetof(GBlurContext, x)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 
@@ -72,39 +52,44 @@ typedef struct ThreadData {
 int width;
 } ThreadData;
 
-static int filter_horizontally(AVFilterContext *ctx, void *arg, int jobnr, int 
nb_jobs)
+static void horiz_slice_c(float *buffer, int width, int height, int steps,
+  float nu, float bscale)
 {
-GBlurContext *s = ctx->priv;
-ThreadData *td = arg;
-const int height = td->height;
-const int width = td->width;
-const int slice_start = (height *  jobnr   ) / nb_jobs;
-const int slice_end   = (height * (jobnr+1)) / nb_jobs;
-const float boundaryscale = s->boundaryscale;
-const int steps = s->steps;
-const float nu = s->nu;
-float *buffer = s->buffer;
-int y, x, step;
+int step, x, y;
 float *ptr;
-
-/* Filter horizontally along each row */
-for (y = slice_start; y < slice_end; y++) {
+for (y = 0; y < height; y++) {
 for (step = 0; step < steps; step++

[FFmpeg-devel] [PATCH] avfilter/vf_gblur: add x86 SIMD optimizations

2019-05-29 Thread Ruiling Song

For details of the implementation, please refer to the comment
inlined in the assembly code. It improves the horizontal pass
performance about 100% under single thread.

Tested overall performance using the command(avx2 enabled):
./ffmpeg -i 1080p.mp4 -vf gblur -f null /dev/null
./ffmpeg -i 1080p.mp4 -vf gblur=threads=1 -f null /dev/null
For single thread, the fps improves from 43 to 60, about 40%.
For multi-thread, the fps improves from 110 to 130, about 20%.

Signed-off-by: Ruiling Song 
---
 libavfilter/gblur.h |  54 ++
 libavfilter/vf_gblur.c  |  66 +---
 libavfilter/x86/Makefile|   2 +
 libavfilter/x86/vf_gblur.asm| 182 
 libavfilter/x86/vf_gblur_init.c |  36 +++
 5 files changed, 302 insertions(+), 38 deletions(-)
 create mode 100644 libavfilter/gblur.h
 create mode 100644 libavfilter/x86/vf_gblur.asm
 create mode 100644 libavfilter/x86/vf_gblur_init.c

diff --git a/libavfilter/gblur.h b/libavfilter/gblur.h
new file mode 100644
index 00..97217044d0
--- /dev/null
+++ b/libavfilter/gblur.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2011 Pascal Getreuer
+ * Copyright (c) 2016 Paul B Mahol
+ *
+ * Redistribution and use in source and binary forms, with or without 
modification,
+ * are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer in the documentation and/or other materials provided
+ *with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef AVFILTER_GBLUR_H
+#define AVFILTER_GBLUR_H
+#include "avfilter.h"
+
+typedef struct GBlurContext {
+const AVClass *class;
+
+float sigma;
+float sigmaV;
+int steps;
+int planes;
+
+int depth;
+int planewidth[4];
+int planeheight[4];
+float *buffer;
+float boundaryscale;
+float boundaryscaleV;
+float postscale;
+float postscaleV;
+float nu;
+float nuV;
+int nb_planes;
+void (*horiz_slice)(float *buffer, int width, int height, int steps, float 
nu, float bscale);
+} GBlurContext;
+void ff_gblur_init_x86(GBlurContext *s);
+#endif
diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c
index b91a8c074a..4e876bca05 100644
--- a/libavfilter/vf_gblur.c
+++ b/libavfilter/vf_gblur.c
@@ -30,29 +30,11 @@
 #include "libavutil/pixdesc.h"
 #include "avfilter.h"
 #include "formats.h"
+#include "gblur.h"
 #include "internal.h"
 #include "video.h"
+#include 
 
-typedef struct GBlurContext {
-const AVClass *class;
-
-float sigma;
-float sigmaV;
-int steps;
-int planes;
-
-int depth;
-int planewidth[4];
-int planeheight[4];
-float *buffer;
-float boundaryscale;
-float boundaryscaleV;
-float postscale;
-float postscaleV;
-float nu;
-float nuV;
-int nb_planes;
-} GBlurContext;
 
 #define OFFSET(x) offsetof(GBlurContext, x)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
@@ -72,39 +54,44 @@ typedef struct ThreadData {
 int width;
 } ThreadData;
 
-static int filter_horizontally(AVFilterContext *ctx, void *arg, int jobnr, int 
nb_jobs)
+static void horiz_slice_c(float *buffer, int width, int height, int steps,
+  float nu, float bscale)
 {
-GBlurContext *s = ctx->priv;
-ThreadData *td = arg;
-const int height = td->height;
-const int width = td->width;
-const int slice_start = (height *  jobnr   ) / nb_jobs;
-const int slice_end   = (height * (jobnr+1)) / nb_jobs;
-const float boundaryscale = s->boundaryscale;
-const int steps = s->steps;
-const float nu = s->nu;
-float *buffer = s->buffer;
-int y, x, step;
+int step, x, y;
 float *ptr;
-
-/* Filter horizontally along each row */
-for (y = slice_start; y < slice_end; y++) {
+for (y = 0; y < height; y++) {
 for (step = 0; step < st

[FFmpeg-devel] [PATCH V2] avfilter/vf_unsharp: enable slice threading

2019-05-16 Thread Ruiling Song

benchmarking with a simple command:
ffmpeg -i 1080p.mp4 -vf unsharp=la=3:ca=3 -an -f null /dev/null
with the patch, the fps increase from 50 to 120 on my local machine (i7-6770HQ).

v2:
make av_image_copy_plane() only copy per-slice content.

Signed-off-by: Ruiling Song 
---
 libavfilter/unsharp.h|   4 +-
 libavfilter/vf_unsharp.c | 102 ++-
 2 files changed, 81 insertions(+), 25 deletions(-)

diff --git a/libavfilter/unsharp.h b/libavfilter/unsharp.h
index caff986fc1..a60b30f31a 100644
--- a/libavfilter/unsharp.h
+++ b/libavfilter/unsharp.h
@@ -37,7 +37,8 @@ typedef struct UnsharpFilterParam {
 int steps_y; ///< vertical step count
 int scalebits;   ///< bits to shift pixel
 int32_t halfscale;   ///< amount to add to pixel
-uint32_t *sc[MAX_MATRIX_SIZE - 1];   ///< finite state machine storage
+uint32_t *sr;///< finite state machine storage within a row
+uint32_t **sc;   ///< finite state machine storage across rows
 } UnsharpFilterParam;
 
 typedef struct UnsharpContext {
@@ -47,6 +48,7 @@ typedef struct UnsharpContext {
 UnsharpFilterParam luma;   ///< luma parameters (width, height, amount)
 UnsharpFilterParam chroma; ///< chroma parameters (width, height, amount)
 int hsub, vsub;
+int nb_threads;
 int opencl;
 int (* apply_unsharp)(AVFilterContext *ctx, AVFrame *in, AVFrame *out);
 } UnsharpContext;
diff --git a/libavfilter/vf_unsharp.c b/libavfilter/vf_unsharp.c
index 41ccc56942..af05833a5d 100644
--- a/libavfilter/vf_unsharp.c
+++ b/libavfilter/vf_unsharp.c
@@ -47,15 +47,22 @@
 #include "libavutil/pixdesc.h"
 #include "unsharp.h"
 
-static void apply_unsharp(  uint8_t *dst, int dst_stride,
-  const uint8_t *src, int src_stride,
-  int width, int height, UnsharpFilterParam *fp)
+typedef struct TheadData {
+UnsharpFilterParam *fp;
+uint8_t   *dst;
+const uint8_t *src;
+int dst_stride;
+int src_stride;
+int width;
+int height;
+} ThreadData;
+
+static int unsharp_slice(AVFilterContext *ctx, void *arg, int jobnr, int 
nb_jobs)
 {
+ThreadData *td = arg;
+UnsharpFilterParam *fp = td->fp;
 uint32_t **sc = fp->sc;
-uint32_t sr[MAX_MATRIX_SIZE - 1], tmp1, tmp2;
-
-int32_t res;
-int x, y, z;
+uint32_t *sr = fp->sr;
 const uint8_t *src2 = NULL;  //silence a warning
 const int amount = fp->amount;
 const int steps_x = fp->steps_x;
@@ -63,30 +70,54 @@ static void apply_unsharp(  uint8_t *dst, int 
dst_stride,
 const int scalebits = fp->scalebits;
 const int32_t halfscale = fp->halfscale;
 
+uint8_t *dst = td->dst;
+const uint8_t *src = td->src;
+const int dst_stride = td->dst_stride;
+const int src_stride = td->src_stride;
+const int width = td->width;
+const int height = td->height;
+const int sc_offset = jobnr * 2 * steps_y;
+const int sr_offset = jobnr * (MAX_MATRIX_SIZE - 1);
+const int slice_start = (height * jobnr) / nb_jobs;
+const int slice_end = (height * (jobnr+1)) / nb_jobs;
+
+int32_t res;
+int x, y, z;
+uint32_t tmp1, tmp2;
+
 if (!amount) {
-av_image_copy_plane(dst, dst_stride, src, src_stride, width, height);
-return;
+av_image_copy_plane(dst + slice_start * dst_stride, dst_stride,
+src + slice_start * src_stride, src_stride,
+width, slice_end - slice_start);
+return 0;
 }
 
 for (y = 0; y < 2 * steps_y; y++)
-memset(sc[y], 0, sizeof(sc[y][0]) * (width + 2 * steps_x));
+memset(sc[sc_offset + y], 0, sizeof(sc[y][0]) * (width + 2 * steps_x));
 
-for (y = -steps_y; y < height + steps_y; y++) {
+// if this is not the first tile, we start from (slice_start - steps_y),
+// so we can get smooth result at slice boundary
+if (slice_start > steps_y) {
+src += (slice_start - steps_y) * src_stride;
+dst += (slice_start - steps_y) * dst_stride;
+}
+
+for (y = -steps_y + slice_start; y < steps_y + slice_end; y++) {
 if (y < height)
 src2 = src;
 
-memset(sr, 0, sizeof(sr[0]) * (2 * steps_x - 1));
+memset(sr + sr_offset, 0, sizeof(sr[0]) * (2 * steps_x - 1));
 for (x = -steps_x; x < width + steps_x; x++) {
 tmp1 = x <= 0 ? src2[0] : x >= width ? src2[width-1] : src2[x];
 for (z = 0; z < steps_x * 2; z += 2) {
-tmp2 = sr[z + 0] + tmp1; sr[z + 0] = tmp1;
-tmp1 = sr[z + 1] + tmp2; sr[z + 1] = tmp2;
+tmp2 = sr[sr_offset + z + 0] + tmp1; sr[sr_offset + z + 0] = 
tmp1;
+tmp1 = sr[sr_offset + z + 1] + tmp2; sr[sr_offset + z + 1] = 
tmp2;
 }
 for (

[FFmpeg-devel] [PATCH V2] avutil/tx: add check against (*ctx)

2019-05-15 Thread Ruiling Song

ctx is a pointer to pointer here.

Signed-off-by: Ruiling Song 
---
 libavutil/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/tx.c b/libavutil/tx.c
index 934ef27c81..1690604040 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -697,7 +697,7 @@ static int gen_mdct_exptab(AVTXContext *s, int len4, double 
scale)
 
 av_cold void av_tx_uninit(AVTXContext **ctx)
 {
-if (!ctx)
+if (!ctx || !(*ctx))
 return;
 
 av_free((*ctx)->pfatab);
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] avutil/tx: should check against (*ctx)

2019-05-15 Thread Ruiling Song

ctx is a pointer to pointer here.

Signed-off-by: Ruiling Song 
---
 libavutil/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/tx.c b/libavutil/tx.c
index 934ef27c81..2bf4aa1c28 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -697,7 +697,7 @@ static int gen_mdct_exptab(AVTXContext *s, int len4, double 
scale)
 
 av_cold void av_tx_uninit(AVTXContext **ctx)
 {
-if (!ctx)
+if (!(*ctx))
 return;
 
 av_free((*ctx)->pfatab);
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] avfilter/vf_unsharp: enable slice threading

2019-05-08 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 libavfilter/unsharp.h|  4 +-
 libavfilter/vf_unsharp.c | 98 ++--
 2 files changed, 78 insertions(+), 24 deletions(-)

diff --git a/libavfilter/unsharp.h b/libavfilter/unsharp.h
index caff986fc1..a60b30f31a 100644
--- a/libavfilter/unsharp.h
+++ b/libavfilter/unsharp.h
@@ -37,7 +37,8 @@ typedef struct UnsharpFilterParam {
 int steps_y; ///< vertical step count
 int scalebits;   ///< bits to shift pixel
 int32_t halfscale;   ///< amount to add to pixel
-uint32_t *sc[MAX_MATRIX_SIZE - 1];   ///< finite state machine storage
+uint32_t *sr;///< finite state machine storage within a row
+uint32_t **sc;   ///< finite state machine storage across rows
 } UnsharpFilterParam;
 
 typedef struct UnsharpContext {
@@ -47,6 +48,7 @@ typedef struct UnsharpContext {
 UnsharpFilterParam luma;   ///< luma parameters (width, height, amount)
 UnsharpFilterParam chroma; ///< chroma parameters (width, height, amount)
 int hsub, vsub;
+int nb_threads;
 int opencl;
 int (* apply_unsharp)(AVFilterContext *ctx, AVFrame *in, AVFrame *out);
 } UnsharpContext;
diff --git a/libavfilter/vf_unsharp.c b/libavfilter/vf_unsharp.c
index 41ccc56942..41c62d101a 100644
--- a/libavfilter/vf_unsharp.c
+++ b/libavfilter/vf_unsharp.c
@@ -47,15 +47,22 @@
 #include "libavutil/pixdesc.h"
 #include "unsharp.h"
 
-static void apply_unsharp(  uint8_t *dst, int dst_stride,
-  const uint8_t *src, int src_stride,
-  int width, int height, UnsharpFilterParam *fp)
+typedef struct TheadData {
+UnsharpFilterParam *fp;
+uint8_t   *dst;
+const uint8_t *src;
+int dst_stride;
+int src_stride;
+int width;
+int height;
+} ThreadData;
+
+static int unsharp_slice(AVFilterContext *ctx, void *arg, int jobnr, int 
nb_jobs)
 {
+ThreadData *td = arg;
+UnsharpFilterParam *fp = td->fp;
 uint32_t **sc = fp->sc;
-uint32_t sr[MAX_MATRIX_SIZE - 1], tmp1, tmp2;
-
-int32_t res;
-int x, y, z;
+uint32_t *sr = fp->sr;
 const uint8_t *src2 = NULL;  //silence a warning
 const int amount = fp->amount;
 const int steps_x = fp->steps_x;
@@ -63,30 +70,52 @@ static void apply_unsharp(  uint8_t *dst, int 
dst_stride,
 const int scalebits = fp->scalebits;
 const int32_t halfscale = fp->halfscale;
 
+uint8_t *dst = td->dst;
+const uint8_t *src = td->src;
+const int dst_stride = td->dst_stride;
+const int src_stride = td->src_stride;
+const int width = td->width;
+const int height = td->height;
+const int sc_offset = jobnr * 2 * steps_y;
+const int sr_offset = jobnr * (MAX_MATRIX_SIZE - 1);
+const int slice_start = (height * jobnr) / nb_jobs;
+const int slice_end = (height * (jobnr+1)) / nb_jobs;
+
+int32_t res;
+int x, y, z;
+uint32_t tmp1, tmp2;
+
 if (!amount) {
 av_image_copy_plane(dst, dst_stride, src, src_stride, width, height);
-return;
+return 0;
 }
 
 for (y = 0; y < 2 * steps_y; y++)
-memset(sc[y], 0, sizeof(sc[y][0]) * (width + 2 * steps_x));
+memset(sc[sc_offset + y], 0, sizeof(sc[y][0]) * (width + 2 * steps_x));
 
-for (y = -steps_y; y < height + steps_y; y++) {
+// if this is not the first tile, we start from (slice_start - steps_y),
+// so we can get smooth result at slice boundary
+if (slice_start > steps_y) {
+src += (slice_start - steps_y) * src_stride;
+dst += (slice_start - steps_y) * dst_stride;
+}
+
+for (y = -steps_y + slice_start; y < steps_y + slice_end; y++) {
 if (y < height)
 src2 = src;
 
-memset(sr, 0, sizeof(sr[0]) * (2 * steps_x - 1));
+memset(sr + sr_offset, 0, sizeof(sr[0]) * (2 * steps_x - 1));
 for (x = -steps_x; x < width + steps_x; x++) {
 tmp1 = x <= 0 ? src2[0] : x >= width ? src2[width-1] : src2[x];
 for (z = 0; z < steps_x * 2; z += 2) {
-tmp2 = sr[z + 0] + tmp1; sr[z + 0] = tmp1;
-tmp1 = sr[z + 1] + tmp2; sr[z + 1] = tmp2;
+tmp2 = sr[sr_offset + z + 0] + tmp1; sr[sr_offset + z + 0] = 
tmp1;
+tmp1 = sr[sr_offset + z + 1] + tmp2; sr[sr_offset + z + 1] = 
tmp2;
 }
 for (z = 0; z < steps_y * 2; z += 2) {
-tmp2 = sc[z + 0][x + steps_x] + tmp1; sc[z + 0][x + steps_x] = 
tmp1;
-tmp1 = sc[z + 1][x + steps_x] + tmp2; sc[z + 1][x + steps_x] = 
tmp2;
+tmp2 = sc[sc_offset + z + 0][x + steps_x] + tmp1; sc[sc_offset 
+ z + 0][x + steps_x] = tmp1;
+tmp1 = sc[sc_offset + z + 1][x + steps_x] + tmp2; sc[sc_offset 
+ z + 1][x + steps_x] = tmp2;

[FFmpeg-devel] [PATCH V3] lavfi/opencl: add nlmeans_opencl filter

2019-05-06 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 configure   |   1 +
 doc/filters.texi|   4 +
 libavfilter/Makefile|   1 +
 libavfilter/allfilters.c|   1 +
 libavfilter/opencl/nlmeans.cl   | 115 +
 libavfilter/opencl_source.h |   1 +
 libavfilter/vf_nlmeans_opencl.c | 443 
 7 files changed, 566 insertions(+)
 create mode 100644 libavfilter/opencl/nlmeans.cl
 create mode 100644 libavfilter/vf_nlmeans_opencl.c

diff --git a/configure b/configure
index d644a5b1d4..ee4041e5e0 100755
--- a/configure
+++ b/configure
@@ -3464,6 +3464,7 @@ mpdecimate_filter_select="pixelutils"
 minterpolate_filter_select="scene_sad"
 mptestsrc_filter_deps="gpl"
 negate_filter_deps="lut_filter"
+nlmeans_opencl_filter_deps="opencl"
 nnedi_filter_deps="gpl"
 ocr_filter_deps="libtesseract"
 ocv_filter_deps="libopencv"
diff --git a/doc/filters.texi b/doc/filters.texi
index 3c15bb95f4..ee0db1809e 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -19240,6 +19240,10 @@ Make every semi-green pixel in the input transparent 
with some slight blending:
 @end example
 @end itemize
 
+@section nlmeans_opencl
+
+Non-local Means denoise filter through OpenCL, this filter accepts same 
options as @ref{nlmeans}.
+
 @section overlay_opencl
 
 Overlay one video on top of another.
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 59d12ce069..3e409fc62c 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -295,6 +295,7 @@ OBJS-$(CONFIG_MIX_FILTER)+= vf_mix.o
 OBJS-$(CONFIG_MPDECIMATE_FILTER) += vf_mpdecimate.o
 OBJS-$(CONFIG_NEGATE_FILTER) += vf_lut.o
 OBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o
+OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o 
opencl/nlmeans.o
 OBJS-$(CONFIG_NNEDI_FILTER)  += vf_nnedi.o
 OBJS-$(CONFIG_NOFORMAT_FILTER)   += vf_format.o
 OBJS-$(CONFIG_NOISE_FILTER)  += vf_noise.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index ae725cb0e0..fe0f8d7612 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -280,6 +280,7 @@ extern AVFilter ff_vf_mix;
 extern AVFilter ff_vf_mpdecimate;
 extern AVFilter ff_vf_negate;
 extern AVFilter ff_vf_nlmeans;
+extern AVFilter ff_vf_nlmeans_opencl;
 extern AVFilter ff_vf_nnedi;
 extern AVFilter ff_vf_noformat;
 extern AVFilter ff_vf_noise;
diff --git a/libavfilter/opencl/nlmeans.cl b/libavfilter/opencl/nlmeans.cl
new file mode 100644
index 00..72bd681fd6
--- /dev/null
+++ b/libavfilter/opencl/nlmeans.cl
@@ -0,0 +1,115 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+   CLK_ADDRESS_CLAMP_TO_EDGE   |
+   CLK_FILTER_NEAREST);
+
+kernel void horiz_sum(__global uint4 *integral_img,
+  __read_only image2d_t src,
+  int width,
+  int height,
+  int4 dx,
+  int4 dy)
+{
+
+int y = get_global_id(0);
+int work_size = get_global_size(0);
+
+uint4 sum = (uint4)(0);
+float4 s2;
+for (int i = 0; i < width; i++) {
+float s1 = read_imagef(src, sampler, (int2)(i, y)).x;
+s2.x = read_imagef(src, sampler, (int2)(i + dx.x, y + dy.x)).x;
+s2.y = read_imagef(src, sampler, (int2)(i + dx.y, y + dy.y)).x;
+s2.z = read_imagef(src, sampler, (int2)(i + dx.z, y + dy.z)).x;
+s2.w = read_imagef(src, sampler, (int2)(i + dx.w, y + dy.w)).x;
+sum += convert_uint4((s1 - s2) * (s1 - s2) * 255 * 255);
+integral_img[y * width + i] = sum;
+}
+}
+
+kernel void vert_sum(__global uint4 *integral_img,
+ __global int *overflow,
+ int width,
+ int height)
+{
+int x = get_global_id(0);
+uint4 sum = 0;
+for (int i = 0; i < height; i++) {
+if (any((uint4)UINT_MAX - integral_img[i * width + x] < sum))
+atomic_inc(overflow);
+integral_img[i * width + x] += sum;
+

[FFmpeg-devel] [PATCH] lavfi/gblur: doing several columns at the same time

2019-05-05 Thread Ruiling Song

Instead of doing each column one by one, doing several columns
together gives about 30% better performance.

Signed-off-by: Ruiling Song 
---
below is some of performance numbers(fps) on my i7-6770HQ (decode + gblur):

resolution:480p | 720p | 1080p | 4k
without patch: 393  | 146  | 71| 14
with patch:502  | 184  | 95| 18
 libavfilter/vf_gblur.c | 62 --
 1 file changed, 42 insertions(+), 20 deletions(-)

diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c
index 5d05cac44c..9f07705ec4 100644
--- a/libavfilter/vf_gblur.c
+++ b/libavfilter/vf_gblur.c
@@ -108,6 +108,40 @@ static int filter_horizontally(AVFilterContext *ctx, void 
*arg, int jobnr, int n
 return 0;
 }
 
+static void do_vertical_columns(float *buffer, int width, int height,
+int column_begin, int column_end, int steps,
+float nu, float boundaryscale, int column_step)
+{
+const int numpixels = width * height;
+int i, x, k, step;
+float *ptr;
+for (x = column_begin; x < column_end;) {
+for (step = 0; step < steps; step++) {
+ptr = buffer + x;
+for (k = 0; k < column_step; k++) {
+ptr[k] *= boundaryscale;
+}
+/* Filter downwards */
+for (i = width; i < numpixels; i += width) {
+for (k = 0; k < column_step; k++) {
+ptr[i + k] += nu * ptr[i - width + k];
+}
+}
+i = numpixels - width;
+
+for (k = 0; k < column_step; k++)
+ptr[i + k] *= boundaryscale;
+
+/* Filter upwards */
+for (; i > 0; i -= width) {
+for (k = 0; k < column_step; k++)
+ptr[i - width + k] += nu * ptr[i + k];
+}
+}
+x += column_step;
+}
+}
+
 static int filter_vertically(AVFilterContext *ctx, void *arg, int jobnr, int 
nb_jobs)
 {
 GBlurContext *s = ctx->priv;
@@ -117,31 +151,19 @@ static int filter_vertically(AVFilterContext *ctx, void 
*arg, int jobnr, int nb_
 const int slice_start = (width *  jobnr   ) / nb_jobs;
 const int slice_end   = (width * (jobnr+1)) / nb_jobs;
 const float boundaryscale = s->boundaryscaleV;
-const int numpixels = width * height;
 const int steps = s->steps;
 const float nu = s->nuV;
 float *buffer = s->buffer;
-int i, x, step;
-float *ptr;
-
-/* Filter vertically along each column */
-for (x = slice_start; x < slice_end; x++) {
-for (step = 0; step < steps; step++) {
-ptr = buffer + x;
-ptr[0] *= boundaryscale;
-
-/* Filter downwards */
-for (i = width; i < numpixels; i += width)
-ptr[i] += nu * ptr[i - width];
-
-ptr[i = numpixels - width] *= boundaryscale;
+int aligned_end;
 
-/* Filter upwards */
-for (; i > 0; i -= width)
-ptr[i - width] += nu * ptr[i];
-}
-}
+aligned_end = slice_start + (((slice_end - slice_start) >> 3) << 3);
+/* Filter vertically along columns (process 8 columns in each step) */
+do_vertical_columns(buffer, width, height, slice_start, aligned_end,
+steps, nu, boundaryscale, 8);
 
+// Filter un-aligned columns one by one
+do_vertical_columns(buffer, width, height, aligned_end, slice_end,
+steps, nu, boundaryscale, 1);
 return 0;
 }
 
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 2/2] lavfi/opencl: add nlmeans_opencl filter

2019-04-12 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 configure   |   1 +
 doc/filters.texi|   4 +
 libavfilter/Makefile|   1 +
 libavfilter/allfilters.c|   1 +
 libavfilter/opencl/nlmeans.cl   | 115 +
 libavfilter/opencl_source.h |   1 +
 libavfilter/vf_nlmeans_opencl.c | 442 
 7 files changed, 565 insertions(+)
 create mode 100644 libavfilter/opencl/nlmeans.cl
 create mode 100644 libavfilter/vf_nlmeans_opencl.c

diff --git a/configure b/configure
index 0cdf0ffa8a..93ebfd6784 100755
--- a/configure
+++ b/configure
@@ -3461,6 +3461,7 @@ mpdecimate_filter_select="pixelutils"
 minterpolate_filter_select="scene_sad"
 mptestsrc_filter_deps="gpl"
 negate_filter_deps="lut_filter"
+nlmeans_opencl_filter_deps="opencl"
 nnedi_filter_deps="gpl"
 ocr_filter_deps="libtesseract"
 ocv_filter_deps="libopencv"
diff --git a/doc/filters.texi b/doc/filters.texi
index 867607d870..21c2c1a4b5 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -19030,6 +19030,10 @@ Apply erosion filter with threshold0 set to 30, 
threshold1 set 40, threshold2 se
 @end example
 @end itemize
 
+@section nlmeans_opencl
+
+Non-local Means denoise filter through OpenCL, this filter accepts same 
options as @ref{nlmeans}.
+
 @section overlay_opencl
 
 Overlay one video on top of another.
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index fef6ec5c55..92039bfdcf 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -291,6 +291,7 @@ OBJS-$(CONFIG_MIX_FILTER)+= vf_mix.o
 OBJS-$(CONFIG_MPDECIMATE_FILTER) += vf_mpdecimate.o
 OBJS-$(CONFIG_NEGATE_FILTER) += vf_lut.o
 OBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o
+OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o 
opencl/nlmeans.o
 OBJS-$(CONFIG_NNEDI_FILTER)  += vf_nnedi.o
 OBJS-$(CONFIG_NOFORMAT_FILTER)   += vf_format.o
 OBJS-$(CONFIG_NOISE_FILTER)  += vf_noise.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index c51ae0f3c7..2a6390c92d 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -277,6 +277,7 @@ extern AVFilter ff_vf_mix;
 extern AVFilter ff_vf_mpdecimate;
 extern AVFilter ff_vf_negate;
 extern AVFilter ff_vf_nlmeans;
+extern AVFilter ff_vf_nlmeans_opencl;
 extern AVFilter ff_vf_nnedi;
 extern AVFilter ff_vf_noformat;
 extern AVFilter ff_vf_noise;
diff --git a/libavfilter/opencl/nlmeans.cl b/libavfilter/opencl/nlmeans.cl
new file mode 100644
index 00..72bd681fd6
--- /dev/null
+++ b/libavfilter/opencl/nlmeans.cl
@@ -0,0 +1,115 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+   CLK_ADDRESS_CLAMP_TO_EDGE   |
+   CLK_FILTER_NEAREST);
+
+kernel void horiz_sum(__global uint4 *integral_img,
+  __read_only image2d_t src,
+  int width,
+  int height,
+  int4 dx,
+  int4 dy)
+{
+
+int y = get_global_id(0);
+int work_size = get_global_size(0);
+
+uint4 sum = (uint4)(0);
+float4 s2;
+for (int i = 0; i < width; i++) {
+float s1 = read_imagef(src, sampler, (int2)(i, y)).x;
+s2.x = read_imagef(src, sampler, (int2)(i + dx.x, y + dy.x)).x;
+s2.y = read_imagef(src, sampler, (int2)(i + dx.y, y + dy.y)).x;
+s2.z = read_imagef(src, sampler, (int2)(i + dx.z, y + dy.z)).x;
+s2.w = read_imagef(src, sampler, (int2)(i + dx.w, y + dy.w)).x;
+sum += convert_uint4((s1 - s2) * (s1 - s2) * 255 * 255);
+integral_img[y * width + i] = sum;
+}
+}
+
+kernel void vert_sum(__global uint4 *integral_img,
+ __global int *overflow,
+ int width,
+ int height)
+{
+int x = get_global_id(0);
+uint4 sum = 0;
+for (int i = 0; i < height; i++) {
+if (any((uint4)UINT_MAX - integral_img[i * width + x] < sum))
+atomic_inc(overflow);
+integral_img[i * width + x] += sum;
+

[FFmpeg-devel] [PATCH V2 1/2] lavfi/opencl: add more opencl helper macro

2019-04-12 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 libavfilter/opencl.h | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/libavfilter/opencl.h b/libavfilter/opencl.h
index 0b06232ade..0fa5b49d3f 100644
--- a/libavfilter/opencl.h
+++ b/libavfilter/opencl.h
@@ -73,6 +73,44 @@ typedef struct OpenCLFilterContext {
 goto fail; \
 }  \
 } while(0)
+/**
+  * release an OpenCL Kernel
+  */
+#define CL_RELEASE_KERNEL(k)  \
+do {  \
+if (k) {  \
+cle = clReleaseKernel(k); \
+if (cle != CL_SUCCESS)\
+av_log(avctx, AV_LOG_ERROR, "Failed to release "  \
+   "OpenCL kernel: %d.\n", cle);  \
+} \
+} while(0)
+
+/**
+  * release an OpenCL Memory Object
+  */
+#define CL_RELEASE_MEMORY(m)  \
+do {  \
+if (m) {  \
+cle = clReleaseMemObject(m);  \
+if (cle != CL_SUCCESS)\
+av_log(avctx, AV_LOG_ERROR, "Failed to release "  \
+   "OpenCL memory: %d.\n", cle);  \
+} \
+} while(0)
+
+/**
+  * release an OpenCL Command Queue
+  */
+#define CL_RELEASE_QUEUE(q)   \
+do {  \
+if (q) {  \
+cle = clReleaseCommandQueue(q);   \
+if (cle != CL_SUCCESS)\
+av_log(avctx, AV_LOG_ERROR, "Failed to release "  \
+   "cl command queue: %d.\n", cle);   \
+} \
+} while(0)
 
 /**
  * Return that all inputs and outputs support only AV_PIX_FMT_OPENCL.
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] lavfi: add nlmeans_opencl filter

2019-04-01 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
This filter runs about 2x faster on integrated GPU than nlmeans on my Skylake 
CPU.
Anybody like to give some comments?

Ruiling

 configure   |   1 +
 doc/filters.texi|   4 +
 libavfilter/Makefile|   1 +
 libavfilter/allfilters.c|   1 +
 libavfilter/opencl/nlmeans.cl   | 108 +
 libavfilter/opencl_source.h |   1 +
 libavfilter/vf_nlmeans_opencl.c | 390 
 7 files changed, 506 insertions(+)
 create mode 100644 libavfilter/opencl/nlmeans.cl
 create mode 100644 libavfilter/vf_nlmeans_opencl.c

diff --git a/configure b/configure
index f6123f53e5..a233512491 100755
--- a/configure
+++ b/configure
@@ -3460,6 +3460,7 @@ mpdecimate_filter_select="pixelutils"
 minterpolate_filter_select="scene_sad"
 mptestsrc_filter_deps="gpl"
 negate_filter_deps="lut_filter"
+nlmeans_opencl_filter_deps="opencl"
 nnedi_filter_deps="gpl"
 ocr_filter_deps="libtesseract"
 ocv_filter_deps="libopencv"
diff --git a/doc/filters.texi b/doc/filters.texi
index 867607d870..21c2c1a4b5 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -19030,6 +19030,10 @@ Apply erosion filter with threshold0 set to 30, 
threshold1 set 40, threshold2 se
 @end example
 @end itemize
 
+@section nlmeans_opencl
+
+Non-local Means denoise filter through OpenCL, this filter accepts same 
options as @ref{nlmeans}.
+
 @section overlay_opencl
 
 Overlay one video on top of another.
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index fef6ec5c55..92039bfdcf 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -291,6 +291,7 @@ OBJS-$(CONFIG_MIX_FILTER)+= vf_mix.o
 OBJS-$(CONFIG_MPDECIMATE_FILTER) += vf_mpdecimate.o
 OBJS-$(CONFIG_NEGATE_FILTER) += vf_lut.o
 OBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o
+OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o 
opencl/nlmeans.o
 OBJS-$(CONFIG_NNEDI_FILTER)  += vf_nnedi.o
 OBJS-$(CONFIG_NOFORMAT_FILTER)   += vf_format.o
 OBJS-$(CONFIG_NOISE_FILTER)  += vf_noise.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index c51ae0f3c7..2a6390c92d 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -277,6 +277,7 @@ extern AVFilter ff_vf_mix;
 extern AVFilter ff_vf_mpdecimate;
 extern AVFilter ff_vf_negate;
 extern AVFilter ff_vf_nlmeans;
+extern AVFilter ff_vf_nlmeans_opencl;
 extern AVFilter ff_vf_nnedi;
 extern AVFilter ff_vf_noformat;
 extern AVFilter ff_vf_noise;
diff --git a/libavfilter/opencl/nlmeans.cl b/libavfilter/opencl/nlmeans.cl
new file mode 100644
index 00..dcb04834ca
--- /dev/null
+++ b/libavfilter/opencl/nlmeans.cl
@@ -0,0 +1,108 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+   CLK_ADDRESS_CLAMP_TO_EDGE   |
+   CLK_FILTER_NEAREST);
+
+kernel void horiz_sum(__global uint4 *ii,
+  __read_only image2d_t src,
+  int width,
+  int height,
+  int4 dx,
+  int4 dy)
+{
+
+int y = get_global_id(0);
+int work_size = get_global_size(0);
+
+uint4 sum = (uint4)(0);
+float4 s2;
+for (int i = 0; i < width; i++) {
+float s1 = read_imagef(src, sampler, (int2)(i, y)).x;
+s2.x = read_imagef(src, sampler, (int2)(i+dx.x, y+dy.x)).x;
+s2.y = read_imagef(src, sampler, (int2)(i+dx.y, y+dy.y)).x;
+s2.z = read_imagef(src, sampler, (int2)(i+dx.z, y+dy.z)).x;
+s2.w = read_imagef(src, sampler, (int2)(i+dx.w, y+dy.w)).x;
+sum += convert_uint4((s1-s2)*(s1-s2) * 255*255);
+ii[y * width + i] = sum;
+}
+}
+
+kernel void vert_sum(__global uint4 *ii,
+ int width,
+ int height)
+{
+int x = get_global_id(0);
+uint4 sum = 0;
+for (int i = 0; i < height; i++) {
+ii[i * width + x] += sum;
+sum = ii[i * width + x];
+}
+}
+
+kernel void weight_accum(global float *sum, globa

[FFmpeg-devel] [PATCH] MAINTAINERS: add myself for tonemap_opencl

2019-02-12 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 7ac2d22..412a739 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -362,6 +362,7 @@ Filters:
   vf_ssim.c Paul B Mahol
   vf_stereo3d.c Paul B Mahol
   vf_telecine.c Paul B Mahol
+  vf_tonemap_opencl.c   Ruiling Song
   vf_yadif.cMichael Niedermayer
   vf_zoompan.c  Paul B Mahol
 
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH] lavfi/vf_hwmap: move some code into seperate function

2019-02-01 Thread Ruiling Song

This is just code fine. No functional change.

Signed-off-by: Ruiling Song 
---
 libavfilter/vf_hwmap.c | 83 --
 1 file changed, 39 insertions(+), 44 deletions(-)

diff --git a/libavfilter/vf_hwmap.c b/libavfilter/vf_hwmap.c
index 290559a..14276ce 100644
--- a/libavfilter/vf_hwmap.c
+++ b/libavfilter/vf_hwmap.c
@@ -50,6 +50,37 @@ static int hwmap_query_formats(AVFilterContext *avctx)
 return 0;
 }
 
+static int create_hwframe_context(AVFilterContext *avctx, AVBufferRef *device,
+  int format, int sw_format,
+  int width, int height)
+{
+HWMapContext  *ctx = avctx->priv;
+int err;
+AVHWFramesContext *frames;
+
+ctx->hwframes_ref = av_hwframe_ctx_alloc(device);
+if (!ctx->hwframes_ref) {
+return AVERROR(ENOMEM);
+}
+frames = (AVHWFramesContext*)ctx->hwframes_ref->data;
+
+frames->format= format;
+frames->sw_format = sw_format;
+frames->width = width;
+frames->height= height;
+
+if (avctx->extra_hw_frames >= 0)
+frames->initial_pool_size = 2 + avctx->extra_hw_frames;
+
+err = av_hwframe_ctx_init(ctx->hwframes_ref);
+if (err < 0) {
+av_log(avctx, AV_LOG_ERROR, "Failed to initialise "
+   "hardware frames context: %d.\n", err);
+return err;
+}
+return 0;
+}
+
 static int hwmap_config_output(AVFilterLink *outlink)
 {
 AVFilterContext *avctx = outlink->src;
@@ -130,29 +161,12 @@ static int hwmap_config_output(AVFilterLink *outlink)
 // overwrite the input hwframe context with a derived context
 // mapped from that back to the source type.
 AVBufferRef *source;
-AVHWFramesContext *frames;
-
-ctx->hwframes_ref = av_hwframe_ctx_alloc(device);
-if (!ctx->hwframes_ref) {
-err = AVERROR(ENOMEM);
-goto fail;
-}
-frames = (AVHWFramesContext*)ctx->hwframes_ref->data;
-
-frames->format= outlink->format;
-frames->sw_format = hwfc->sw_format;
-frames->width = hwfc->width;
-frames->height= hwfc->height;
-
-if (avctx->extra_hw_frames >= 0)
-frames->initial_pool_size = 2 + avctx->extra_hw_frames;
 
-err = av_hwframe_ctx_init(ctx->hwframes_ref);
-if (err < 0) {
-av_log(avctx, AV_LOG_ERROR, "Failed to initialise "
-   "target frames context: %d.\n", err);
-goto fail;
-}
+err = create_hwframe_context(avctx, device, outlink->format,
+ hwfc->sw_format, hwfc->width,
+ hwfc->height);
+if (err < 0)
+ goto fail;
 
 err = av_hwframe_ctx_create_derived(&source,
 inlink->format,
@@ -212,29 +226,10 @@ static int hwmap_config_output(AVFilterLink *outlink)
 }
 
 ctx->reverse = 1;
-
-ctx->hwframes_ref = av_hwframe_ctx_alloc(device);
-if (!ctx->hwframes_ref) {
-err = AVERROR(ENOMEM);
-goto fail;
-}
-hwfc = (AVHWFramesContext*)ctx->hwframes_ref->data;
-
-hwfc->format= outlink->format;
-hwfc->sw_format = inlink->format;
-hwfc->width = inlink->w;
-hwfc->height= inlink->h;
-
-if (avctx->extra_hw_frames >= 0)
-hwfc->initial_pool_size = 2 + avctx->extra_hw_frames;
-
-err = av_hwframe_ctx_init(ctx->hwframes_ref);
-if (err < 0) {
-av_log(avctx, AV_LOG_ERROR, "Failed to create frame "
-   "context for reverse mapping: %d.\n", err);
+err = create_hwframe_context(avctx, device, outlink->format,
+ inlink->format, inlink->w, inlink->h);
+if (err < 0)
 goto fail;
-}
-
 } else {
 av_log(avctx, AV_LOG_ERROR, "Mapping requires a hardware "
"context (a device, or frames on input).\n");
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [RFC] lavfi: add scale_opencl filter.

2019-01-30 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
This patch depends on the colorspace patchset I sent before
(https://patchwork.ffmpeg.org/patch/11820/)
Although I am still working on some minor functionality,
hope somebody could give some comments about the overall design.

Ruiling

 configure |   1 +
 libavfilter/Makefile  |   2 +
 libavfilter/allfilters.c  |   1 +
 libavfilter/opencl/scale.cl   | 252 
 libavfilter/opencl_source.h   |   1 +
 libavfilter/vf_scale_opencl.c | 682 ++
 6 files changed, 939 insertions(+)
 create mode 100644 libavfilter/opencl/scale.cl
 create mode 100644 libavfilter/vf_scale_opencl.c

diff --git a/configure b/configure
index ec8f70d..5640137 100755
--- a/configure
+++ b/configure
@@ -3450,6 +3450,7 @@ rubberband_filter_deps="librubberband"
 sab_filter_deps="gpl swscale"
 scale2ref_filter_deps="swscale"
 scale_filter_deps="swscale"
+scale_opencl_filter_deps="opencl"
 scale_qsv_filter_deps="libmfx"
 select_filter_select="scene_sad"
 sharpness_vaapi_filter_deps="vaapi"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index bc642ac..9de7d44 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -343,6 +343,8 @@ OBJS-$(CONFIG_SCALE_FILTER)  += vf_scale.o 
scale.o
 OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o 
vf_scale_cuda.ptx.o \
 cuda_check.o
 OBJS-$(CONFIG_SCALE_NPP_FILTER)  += vf_scale_npp.o scale.o 
cuda_check.o
+OBJS-$(CONFIG_SCALE_OPENCL_FILTER)   += vf_scale_opencl.o opencl.o \
+opencl/scale.o
 OBJS-$(CONFIG_SCALE_QSV_FILTER)  += vf_scale_qsv.o
 OBJS-$(CONFIG_SCALE_VAAPI_FILTER)+= vf_scale_vaapi.o scale.o 
vaapi_vpp.o
 OBJS-$(CONFIG_SCALE2REF_FILTER)  += vf_scale.o scale.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index c51ae0f..5708d16 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -325,6 +325,7 @@ extern AVFilter ff_vf_sab;
 extern AVFilter ff_vf_scale;
 extern AVFilter ff_vf_scale_cuda;
 extern AVFilter ff_vf_scale_npp;
+extern AVFilter ff_vf_scale_opencl;
 extern AVFilter ff_vf_scale_qsv;
 extern AVFilter ff_vf_scale_vaapi;
 extern AVFilter ff_vf_scale2ref;
diff --git a/libavfilter/opencl/scale.cl b/libavfilter/opencl/scale.cl
new file mode 100644
index 000..5d3deda
--- /dev/null
+++ b/libavfilter/opencl/scale.cl
@@ -0,0 +1,252 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+extern float3 yuv2rgb(float, float, float);
+extern float3 rgb2yuv(float, float, float);
+
+const sampler_t sampler_nearest = (CLK_NORMALIZED_COORDS_FALSE |
+   CLK_ADDRESS_CLAMP |
+   CLK_FILTER_NEAREST);
+
+const sampler_t sampler_linear = (CLK_NORMALIZED_COORDS_FALSE |
+  CLK_ADDRESS_CLAMP |
+  CLK_FILTER_LINEAR);
+
+float4 neighbor(image2d_t img, float vscale,
+float hscale, int x, int y,
+__constant float *coff_x,
+__constant float *coff_y,
+int2 filter_size)
+{
+float xi = ((float)x + 0.5f) * hscale;
+float yi = ((float)y + 0.5f) * vscale;
+
+return read_imagef(img, sampler_nearest, (float2)(xi, yi));
+}
+
+float4 bilinear(image2d_t img, float vscale,
+float hscale, int x, int y,
+__constant float *coff_x,
+__constant float *coff_y,
+int2 filter_size)
+{
+float xi = ((float)x + 0.5f) * hscale;
+float yi = ((float)y + 0.5f) * vscale;
+
+return read_imagef(img, sampler_linear, (float2)(xi, yi));
+}
+
+float4 generic_filter(image2d_t img, float vscale, float hscale, int x, int y,
+  __constant float *coff_x, __constant float *coff_y,
+  int2 filter_size)
+{
+int2 dst_pos = (int2)(x, y);
+float2 src_coord = (convert_float2(dst_pos) + 0.5f) *
+   (float2)(hscale, vscale);
+int2 src_pos = convert_int2(floor(src_coord -

[FFmpeg-devel] [PATCH 4/5] lavfi/tonemap_opencl: reuse color matrix calculation from colorspace.c

2019-01-21 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 libavfilter/opencl/colorspace_common.cl | 25 -
 libavfilter/vf_tonemap_opencl.c | 64 +++--
 2 files changed, 29 insertions(+), 60 deletions(-)

diff --git a/libavfilter/opencl/colorspace_common.cl 
b/libavfilter/opencl/colorspace_common.cl
index 94a4dd0..1d68a54 100644
--- a/libavfilter/opencl/colorspace_common.cl
+++ b/libavfilter/opencl/colorspace_common.cl
@@ -39,31 +39,6 @@ constant const float ST2084_C1 = 0.8359375f;
 constant const float ST2084_C2 = 18.8515625f;
 constant const float ST2084_C3 = 18.6875f;
 
-__constant float yuv2rgb_bt2020[] = {
-1.0f, 0.0f, 1.4746f,
-1.0f, -0.16455f, -0.57135f,
-1.0f, 1.8814f, 0.0f
-};
-
-__constant float yuv2rgb_bt709[] = {
-1.0f, 0.0f, 1.5748f,
-1.0f, -0.18732f, -0.46812f,
-1.0f, 1.8556f, 0.0f
-};
-
-__constant float rgb2yuv_bt709[] = {
-0.2126f, 0.7152f, 0.0722f,
--0.11457f, -0.38543f, 0.5f,
-0.5f, -0.45415f, -0.04585f
-};
-
-__constant float rgb2yuv_bt2020[] ={
-0.2627f, 0.678f, 0.0593f,
--0.1396f, -0.36037f, 0.5f,
-0.5f, -0.4598f, -0.0402f,
-};
-
-
 float get_luma_dst(float3 c) {
 return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z;
 }
diff --git a/libavfilter/vf_tonemap_opencl.c b/libavfilter/vf_tonemap_opencl.c
index ae3f98d..315ead4 100644
--- a/libavfilter/vf_tonemap_opencl.c
+++ b/libavfilter/vf_tonemap_opencl.c
@@ -18,7 +18,6 @@
 #include 
 
 #include "libavutil/avassert.h"
-#include "libavutil/bprint.h"
 #include "libavutil/common.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/mem.h"
@@ -35,7 +34,6 @@
 // TODO:
 // - separate peak-detection from tone-mapping kernel to solve
 //one-frame-delay issue.
-// - import colorspace matrix generation from vf_colorspace.c
 // - more format support
 
 #define DETECTION_FRAMES 63
@@ -73,16 +71,6 @@ typedef struct TonemapOpenCLContext {
 cl_memutil_mem;
 } TonemapOpenCLContext;
 
-static const char *yuv_coff[AVCOL_SPC_NB] = {
-[AVCOL_SPC_BT709] = "rgb2yuv_bt709",
-[AVCOL_SPC_BT2020_NCL] = "rgb2yuv_bt2020",
-};
-
-static const char *rgb_coff[AVCOL_SPC_NB] = {
-[AVCOL_SPC_BT709] = "yuv2rgb_bt709",
-[AVCOL_SPC_BT2020_NCL] = "yuv2rgb_bt2020",
-};
-
 static const char *linearize_funcs[AVCOL_TRC_NB] = {
 [AVCOL_TRC_SMPTE2084] = "eotf_st2084",
 [AVCOL_TRC_ARIB_STD_B67] = "inverse_oetf_hlg",
@@ -93,11 +81,6 @@ static const char *delinearize_funcs[AVCOL_TRC_NB] = {
 [AVCOL_TRC_BT2020_10] = "inverse_eotf_bt1886",
 };
 
-static const struct LumaCoefficients luma_coefficients[AVCOL_SPC_NB] = {
-[AVCOL_SPC_BT709]  = { 0.2126, 0.7152, 0.0722 },
-[AVCOL_SPC_BT2020_NCL] = { 0.2627, 0.6780, 0.0593 },
-};
-
 static const struct PrimaryCoefficients primaries_table[AVCOL_PRI_NB] = {
 [AVCOL_PRI_BT709]  = { 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 },
 [AVCOL_PRI_BT2020] = { 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 },
@@ -137,8 +120,8 @@ static int tonemap_opencl_init(AVFilterContext *avctx)
 {
 TonemapOpenCLContext *ctx = avctx->priv;
 int rgb2rgb_passthrough = 1;
-double rgb2rgb[3][3];
-struct LumaCoefficients luma_src, luma_dst;
+double rgb2rgb[3][3], rgb2yuv[3][3], yuv2rgb[3][3];
+const struct LumaCoefficients *luma_src, *luma_dst;
 cl_int cle;
 int err;
 AVBPrint header;
@@ -215,27 +198,37 @@ static int tonemap_opencl_init(AVFilterContext *avctx)
 
 if (rgb2rgb_passthrough)
 av_bprintf(&header, "#define RGB2RGB_PASSTHROUGH\n");
-else {
-av_bprintf(&header, "__constant float rgb2rgb[9] = {\n");
-av_bprintf(&header, "%.4ff, %.4ff, %.4ff,\n",
-   rgb2rgb[0][0], rgb2rgb[0][1], rgb2rgb[0][2]);
-av_bprintf(&header, "%.4ff, %.4ff, %.4ff,\n",
-   rgb2rgb[1][0], rgb2rgb[1][1], rgb2rgb[1][2]);
-av_bprintf(&header, "%.4ff, %.4ff, %.4ff};\n",
-   rgb2rgb[2][0], rgb2rgb[2][1], rgb2rgb[2][2]);
+else
+ff_opencl_print_const_matrix_3x3(&header, "rgb2rgb", rgb2rgb);
+
+
+luma_src = ff_get_luma_coefficients(ctx->colorspace_in);
+if (!luma_src) {
+err = AVERROR(EINVAL);
+av_log(avctx, AV_LOG_ERROR, "unsupported input colorspace %d (%s)\n",
+   ctx->colorspace_in, av_color_space_name(ctx->colorspace_in));
+goto fail;
 }
 
-av_bprintf(&header, "#define rgb_matrix %s\n",
-   rgb_coff[ctx->colorspace_in]);
-av_bprintf(&header, "#define yuv_matrix %s\n",
-   yuv_coff[ctx->colorspace_out]);
+luma_dst = ff_get_luma_coefficients(ctx->colorspace_out);
+if (!luma_dst) {
+err = AVERROR(EINVAL);
+av_log(avctx, AV_LOG_ERROR, &quo

[FFmpeg-devel] [PATCH 5/5] lavfi/colorspace_common: add ifdef check to be more compatible.

2019-01-21 Thread Ruiling Song

Some filters may not need to do linearize/delinearize, thus
will even not define them. Add ifdef check, so they could easily
re-use the .cl file.

Signed-off-by: Ruiling Song 
---
 libavfilter/opencl/colorspace_common.cl | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/libavfilter/opencl/colorspace_common.cl 
b/libavfilter/opencl/colorspace_common.cl
index 1d68a54..ac911f0 100644
--- a/libavfilter/opencl/colorspace_common.cl
+++ b/libavfilter/opencl/colorspace_common.cl
@@ -124,10 +124,14 @@ float3 yuv2rgb(float y, float u, float v) {
 
 float3 yuv2lrgb(float3 yuv) {
 float3 rgb = yuv2rgb(yuv.x, yuv.y, yuv.z);
+#ifdef linearize
 float r = linearize(rgb.x);
 float g = linearize(rgb.y);
 float b = linearize(rgb.z);
 return (float3)(r, g, b);
+#else
+return rgb;
+#endif
 }
 
 float3 rgb2yuv(float r, float g, float b) {
@@ -151,19 +155,25 @@ float rgb2y(float r, float g, float b) {
 }
 
 float3 lrgb2yuv(float3 c) {
+#ifdef delinearize
 float r = delinearize(c.x);
 float g = delinearize(c.y);
 float b = delinearize(c.z);
-
 return rgb2yuv(r, g, b);
+#else
+return rgb2yuv(c.x, c.y, c.z);
+#endif
 }
 
 float lrgb2y(float3 c) {
+#ifdef delinearize
 float r = delinearize(c.x);
 float g = delinearize(c.y);
 float b = delinearize(c.z);
-
 return rgb2y(r, g, b);
+#else
+return rgb2y(c.x, c.y, c.z);
+#endif
 }
 
 float3 lrgb2lrgb(float3 c) {
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH 3/5] lavfi/opencl: add ff_opencl_print_const_matrix_3x3()

2019-01-21 Thread Ruiling Song

This is used to print a 3x3 matrix into a part of OpenCL
source code.

Signed-off-by: Ruiling Song 
---
 libavfilter/opencl.c | 13 +
 libavfilter/opencl.h |  8 
 2 files changed, 21 insertions(+)

diff --git a/libavfilter/opencl.c b/libavfilter/opencl.c
index ac5eec6..95f0bfc 100644
--- a/libavfilter/opencl.c
+++ b/libavfilter/opencl.c
@@ -337,3 +337,16 @@ int ff_opencl_filter_work_size_from_image(AVFilterContext 
*avctx,
 
 return 0;
 }
+
+void ff_opencl_print_const_matrix_3x3(AVBPrint *buf, const char *name_str,
+  double mat[3][3])
+{
+int i, j;
+av_bprintf(buf, "__constant float %s[9] = {\n", name_str);
+for (i = 0; i < 3; i++) {
+for (j = 0; j < 3; j++)
+av_bprintf(buf, " %.5ff,", mat[i][j]);
+av_bprintf(buf, "\n");
+}
+av_bprintf(buf, "};\n");
+}
diff --git a/libavfilter/opencl.h b/libavfilter/opencl.h
index 1b7f117..0b06232 100644
--- a/libavfilter/opencl.h
+++ b/libavfilter/opencl.h
@@ -25,6 +25,7 @@
 // it was introduced in OpenCL 2.0.
 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
 
+#include "libavutil/bprint.h"
 #include "libavutil/buffer.h"
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_opencl.h"
@@ -124,5 +125,12 @@ int ff_opencl_filter_work_size_from_image(AVFilterContext 
*avctx,
   size_t *work_size,
   AVFrame *frame, int plane,
   int block_alignment);
+/**
+ * Print a 3x3 matrix into a buffer as __constant array, which could
+ * be included in an OpenCL program.
+*/
+
+void ff_opencl_print_const_matrix_3x3(AVBPrint *buf, const char *name_str,
+  double mat[3][3]);
 
 #endif /* AVFILTER_OPENCL_H */
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH 2/5] lavfi/colorspace: move some functions to common file

2019-01-21 Thread Ruiling Song

These functions can be reused by other colorspace filters,
so move them to common file. No functional changes.

Signed-off-by: Ruiling Song 
---
 libavfilter/colorspace.c| 71 
 libavfilter/colorspace.h|  4 +++
 libavfilter/vf_colorspace.c | 80 +++--
 3 files changed, 79 insertions(+), 76 deletions(-)

diff --git a/libavfilter/colorspace.c b/libavfilter/colorspace.c
index c668221..19616e4 100644
--- a/libavfilter/colorspace.c
+++ b/libavfilter/colorspace.c
@@ -93,6 +93,77 @@ void ff_fill_rgb2xyz_table(const struct PrimaryCoefficients 
*coeffs,
 rgb2xyz[2][1] *= sg;
 rgb2xyz[2][2] *= sb;
 }
+static const double ycgco_matrix[3][3] =
+{
+{  0.25, 0.5,  0.25 },
+{ -0.25, 0.5, -0.25 },
+{  0.5,  0,   -0.5  },
+};
+
+static const double gbr_matrix[3][3] =
+{
+{ 0,1,   0   },
+{ 0,   -0.5, 0.5 },
+{ 0.5, -0.5, 0   },
+};
+
+/*
+ * All constants explained in e.g. 
https://linuxtv.org/downloads/v4l-dvb-apis/ch02s06.html
+ * The older ones (bt470bg/m) are also explained in their respective ITU docs
+ * (e.g. 
https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.470-5-199802-S!!PDF-E.pdf)
+ * whereas the newer ones can typically be copied directly from wikipedia :)
+ */
+static const struct LumaCoefficients luma_coefficients[AVCOL_SPC_NB] = {
+[AVCOL_SPC_FCC]= { 0.30,   0.59,   0.11   },
+[AVCOL_SPC_BT470BG]= { 0.299,  0.587,  0.114  },
+[AVCOL_SPC_SMPTE170M]  = { 0.299,  0.587,  0.114  },
+[AVCOL_SPC_BT709]  = { 0.2126, 0.7152, 0.0722 },
+[AVCOL_SPC_SMPTE240M]  = { 0.212,  0.701,  0.087  },
+[AVCOL_SPC_YCOCG]  = { 0.25,   0.5,0.25   },
+[AVCOL_SPC_RGB]= { 1,  1,  1  },
+[AVCOL_SPC_BT2020_NCL] = { 0.2627, 0.6780, 0.0593 },
+[AVCOL_SPC_BT2020_CL]  = { 0.2627, 0.6780, 0.0593 },
+};
+
+const struct LumaCoefficients *ff_get_luma_coefficients(enum AVColorSpace csp)
+{
+const struct LumaCoefficients *coeffs;
+
+if (csp >= AVCOL_SPC_NB)
+return NULL;
+coeffs = &luma_coefficients[csp];
+if (!coeffs->cr)
+return NULL;
+
+return coeffs;
+}
+
+void ff_fill_rgb2yuv_table(const struct LumaCoefficients *coeffs,
+   double rgb2yuv[3][3])
+{
+double bscale, rscale;
+
+// special ycgco matrix
+if (coeffs->cr == 0.25 && coeffs->cg == 0.5 && coeffs->cb == 0.25) {
+memcpy(rgb2yuv, ycgco_matrix, sizeof(double) * 9);
+return;
+} else if (coeffs->cr == 1 && coeffs->cg == 1 && coeffs->cb == 1) {
+memcpy(rgb2yuv, gbr_matrix, sizeof(double) * 9);
+return;
+}
+
+rgb2yuv[0][0] = coeffs->cr;
+rgb2yuv[0][1] = coeffs->cg;
+rgb2yuv[0][2] = coeffs->cb;
+bscale = 0.5 / (coeffs->cb - 1.0);
+rscale = 0.5 / (coeffs->cr - 1.0);
+rgb2yuv[1][0] = bscale * coeffs->cr;
+rgb2yuv[1][1] = bscale * coeffs->cg;
+rgb2yuv[1][2] = 0.5;
+rgb2yuv[2][0] = 0.5;
+rgb2yuv[2][1] = rscale * coeffs->cg;
+rgb2yuv[2][2] = rscale * coeffs->cb;
+}
 
 double ff_determine_signal_peak(AVFrame *in)
 {
diff --git a/libavfilter/colorspace.h b/libavfilter/colorspace.h
index 9366818..459a5df 100644
--- a/libavfilter/colorspace.h
+++ b/libavfilter/colorspace.h
@@ -44,6 +44,10 @@ void ff_fill_rgb2xyz_table(const struct PrimaryCoefficients 
*coeffs,
const struct WhitepointCoefficients *wp,
double rgb2xyz[3][3]);
 
+const struct LumaCoefficients *ff_get_luma_coefficients(enum AVColorSpace csp);
+void ff_fill_rgb2yuv_table(const struct LumaCoefficients *coeffs,
+   double rgb2yuv[3][3]);
+
 double ff_determine_signal_peak(AVFrame *in);
 void ff_update_hdr_metadata(AVFrame *in, double peak);
 
diff --git a/libavfilter/vf_colorspace.c b/libavfilter/vf_colorspace.c
index f8d1ecd..2120199 100644
--- a/libavfilter/vf_colorspace.c
+++ b/libavfilter/vf_colorspace.c
@@ -170,78 +170,6 @@ typedef struct ColorSpaceContext {
 // FIXME dithering if bitdepth goes down?
 // FIXME bitexact for fate integration?
 
-static const double ycgco_matrix[3][3] =
-{
-{  0.25, 0.5,  0.25 },
-{ -0.25, 0.5, -0.25 },
-{  0.5,  0,   -0.5  },
-};
-
-static const double gbr_matrix[3][3] =
-{
-{ 0,1,   0   },
-{ 0,   -0.5, 0.5 },
-{ 0.5, -0.5, 0   },
-};
-
-/*
- * All constants explained in e.g. 
https://linuxtv.org/downloads/v4l-dvb-apis/ch02s06.html
- * The older ones (bt470bg/m) are also explained in their respective ITU docs
- * (e.g. 
https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.470-5-199802-S!!PDF-E.pdf)
- * whereas the newer ones can typically be copied directly from wikipedia :)
- */
-static const struct LumaCoefficients luma_coefficients[AVCOL_SPC_NB] = {
-[AVCOL_SPC_FCC]= { 0.30,   0.59,   0.11   },
-[AVCOL_SPC_BT470BG]= { 0.299,  0.587,  0.

[FFmpeg-devel] [PATCH 1/5] lavu/opencl: replace va_ext.h with standard name

2019-01-21 Thread Ruiling Song

Khronos OpenCL header (https://github.com/KhronosGroup/OpenCL-Headers)
uses cl_va_api_media_sharing_intel.h. And Intel's official OpenCL driver
for Intel GPU (https://github.com/intel/compute-runtime) was compiled
against Khronos OpenCL header. So it's better to align with Khronos.

Signed-off-by: Ruiling Song 
---
 configure| 2 +-
 libavutil/hwcontext_opencl.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index c2b8fac..48fdc8e 100755
--- a/configure
+++ b/configure
@@ -6427,7 +6427,7 @@ fi
 
 if enabled_all opencl vaapi ; then
 enabled opencl_drm_beignet && enable opencl_vaapi_beignet
-check_type "CL/cl.h CL/va_ext.h" "clCreateFromVA_APIMediaSurfaceINTEL_fn" 
&&
+check_type "CL/cl.h CL/cl_va_api_media_sharing_intel.h" 
"clCreateFromVA_APIMediaSurfaceINTEL_fn" &&
 enable opencl_vaapi_intel_media
 fi
 
diff --git a/libavutil/hwcontext_opencl.c b/libavutil/hwcontext_opencl.c
index d3df622..b116c5b 100644
--- a/libavutil/hwcontext_opencl.c
+++ b/libavutil/hwcontext_opencl.c
@@ -50,7 +50,7 @@
 #include 
 #endif
 #include 
-#include 
+#include 
 #include "hwcontext_vaapi.h"
 #endif
 
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH 2/2] lavu: relax the condition to do hwframe unmapping.

2018-12-12 Thread Ruiling Song

This patch aims to fix failure of hwmap/hwunmap working against
passthrough mode filters like with transpose_opencl:
[vaapi_frame] "hwmap,transpose_opencl=passthrough=landscape,
hwmap=derive_device=vaapi:reverse=1" [vappi_frame]

If the frame meet the pass-through criteria, then the output of the
first hwmap will directly goes into the input of the second hwmap.
What we need to do here is simply unmap the frame. The current issue
is when we try to do unmap in the frame-context of the second hwmap,
it fails to meet the check in av_hwframe_map(), which requires the
original hw_frames_ctx same as the destination hw_frames_ctx. But I
think that if we are trying to map to the same device as the orginal
device_ctx, then we can just do the unmap.

Signed-off-by: Ruiling Song 
---
I am not sure if there are any concern or side-effects of doing like this?
The first idea came up to fix the issue is do the checking against
internal->source_frames in vf_hwmap.c. but I find that this is not accessible
outside libavutil. So I use this fix. Hope to have your comment and discussion.

Ruiling

 libavutil/hwcontext.c | 9 +
 libavutil/hwcontext.h | 6 +++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index f1e404a..a006212 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c
@@ -739,20 +739,21 @@ fail:
 
 int av_hwframe_map(AVFrame *dst, const AVFrame *src, int flags)
 {
-AVHWFramesContext *src_frames, *dst_frames;
+AVHWFramesContext *src_frames, *dst_frames, *src_src = NULL;
 HWMapDescriptor *hwmap;
 int ret;
 
 if (src->hw_frames_ctx && dst->hw_frames_ctx) {
 src_frames = (AVHWFramesContext*)src->hw_frames_ctx->data;
 dst_frames = (AVHWFramesContext*)dst->hw_frames_ctx->data;
+if (src_frames->internal->source_frames)
+src_src =
+(AVHWFramesContext*)src_frames->internal->source_frames->data;
 
 if ((src_frames == dst_frames &&
  src->format == dst_frames->sw_format &&
  dst->format == dst_frames->format) ||
-(src_frames->internal->source_frames &&
- src_frames->internal->source_frames->data ==
- (uint8_t*)dst_frames)) {
+(src_src && src_src->device_ctx == dst_frames->device_ctx)) {
 // This is an unmap operation.  We don't need to directly
 // do anything here other than fill in the original frame,
 // because the real unmap will be invoked when the last
diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
index f5a4b62..efe3988 100644
--- a/libavutil/hwcontext.h
+++ b/libavutil/hwcontext.h
@@ -528,9 +528,9 @@ enum {
  * by av_frame_alloc()).  src should have an associated hwframe context, and
  * dst may optionally have a format and associated hwframe context.
  *
- * If src was created by mapping a frame from the hwframe context of dst,
- * then this function undoes the mapping - dst is replaced by a reference to
- * the frame that src was originally mapped from.
+ * If src was created by mapping a frame from a hwframe context which shares 
the
+ * same device_ctx with dst, then this function undoes the mapping - dst is
+ * replaced by a reference to the frame that src was originally mapped from.
  *
  * If both src and dst have an associated hwframe context, then this function
  * attempts to map the src frame from its hardware context to that of dst and
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH 1/2] lavfi/vf_hwmap: make hwunmap from software frame work.

2018-12-12 Thread Ruiling Song

This patch was used to fix the second hwmap filter issue:
[vaapi_frame] hwmap [software filters] hwmap [vaapi_frame]
For such case, we also need to allocate the hardware frame
and map it back to software.

Signed-off-by: Ruiling Song 
---
 libavfilter/vf_hwmap.c | 125 +
 1 file changed, 75 insertions(+), 50 deletions(-)

diff --git a/libavfilter/vf_hwmap.c b/libavfilter/vf_hwmap.c
index 290559a..03cb325 100644
--- a/libavfilter/vf_hwmap.c
+++ b/libavfilter/vf_hwmap.c
@@ -50,6 +50,36 @@ static int hwmap_query_formats(AVFilterContext *avctx)
 return 0;
 }
 
+static int create_hwframe_context(HWMapContext *ctx, AVFilterContext *avctx,
+  AVBufferRef *device, int format,
+  int sw_format, int width, int height)
+{
+int err;
+AVHWFramesContext *frames;
+
+ctx->hwframes_ref = av_hwframe_ctx_alloc(device);
+if (!ctx->hwframes_ref) {
+return AVERROR(ENOMEM);
+}
+frames = (AVHWFramesContext*)ctx->hwframes_ref->data;
+
+frames->format= format;
+frames->sw_format = sw_format;
+frames->width = width;
+frames->height= height;
+
+if (avctx->extra_hw_frames >= 0)
+frames->initial_pool_size = 2 + avctx->extra_hw_frames;
+
+err = av_hwframe_ctx_init(ctx->hwframes_ref);
+if (err < 0) {
+av_log(avctx, AV_LOG_ERROR, "Failed to initialise "
+   "target frames context: %d.\n", err);
+return err;
+}
+return 0;
+}
+
 static int hwmap_config_output(AVFilterLink *outlink)
 {
 AVFilterContext *avctx = outlink->src;
@@ -130,29 +160,11 @@ static int hwmap_config_output(AVFilterLink *outlink)
 // overwrite the input hwframe context with a derived context
 // mapped from that back to the source type.
 AVBufferRef *source;
-AVHWFramesContext *frames;
-
-ctx->hwframes_ref = av_hwframe_ctx_alloc(device);
-if (!ctx->hwframes_ref) {
-err = AVERROR(ENOMEM);
+err = create_hwframe_context(ctx, avctx, device, outlink->format,
+ hwfc->sw_format, hwfc->width,
+ hwfc->height);
+if (err < 0)
 goto fail;
-}
-frames = (AVHWFramesContext*)ctx->hwframes_ref->data;
-
-frames->format= outlink->format;
-frames->sw_format = hwfc->sw_format;
-frames->width = hwfc->width;
-frames->height= hwfc->height;
-
-if (avctx->extra_hw_frames >= 0)
-frames->initial_pool_size = 2 + avctx->extra_hw_frames;
-
-err = av_hwframe_ctx_init(ctx->hwframes_ref);
-if (err < 0) {
-av_log(avctx, AV_LOG_ERROR, "Failed to initialise "
-   "target frames context: %d.\n", err);
-goto fail;
-}
 
 err = av_hwframe_ctx_create_derived(&source,
 inlink->format,
@@ -175,10 +187,20 @@ static int hwmap_config_output(AVFilterLink *outlink)
 inlink->hw_frames_ctx = source;
 
 } else if ((outlink->format == hwfc->format &&
-inlink->format  == hwfc->sw_format) ||
-   inlink->format == hwfc->format) {
-// Map from a hardware format to a software format, or
-// undo an existing such mapping.
+inlink->format  == hwfc->sw_format)) {
+// unmap a software frame back to hardware
+ctx->reverse = 1;
+// incase user does not provide filter device, use the device_ref
+// from inlink
+if (!device)
+device = hwfc->device_ref;
+
+err = create_hwframe_context(ctx, avctx, device, outlink->format,
+ inlink->format, inlink->w, inlink->h);
+if (err < 0)
+goto fail;
+} else if (inlink->format == hwfc->format) {
+// Map from a hardware format to a software format
 
 ctx->hwframes_ref = av_buffer_ref(inlink->hw_frames_ctx);
 if (!ctx->hwframes_ref) {
@@ -212,29 +234,10 @@ static int hwmap_config_output(AVFilterLink *outlink)
 }
 
 ctx->reverse = 1;
-
-ctx->hwframes_ref = av_hwframe_ctx_alloc(device);
-if (!ctx->hwframes_ref) {
-err = AVERROR(ENOMEM);
-goto fail;
-}
-hwfc = (AVHWFramesContext*)ctx->hwframes_ref->data;
-
-hwfc->format= outlink->format;
-hwfc->sw_format = inlink->format;
-

[FFmpeg-devel] [PATCH] lavfi/tonemap_opencl: reuse matrix calculation from vf_colorspace

2018-11-27 Thread Ruiling Song

As these functions are moved to shared file, other colorspace-related
filters could also leverage the code.

Signed-off-by: Ruiling Song 
---
 libavfilter/colorspace.c| 71 +
 libavfilter/colorspace.h|  4 ++
 libavfilter/opencl/colorspace_common.cl | 25 ---
 libavfilter/vf_colorspace.c | 80 ++---
 libavfilter/vf_tonemap_opencl.c | 62 +++--
 5 files changed, 106 insertions(+), 136 deletions(-)

diff --git a/libavfilter/colorspace.c b/libavfilter/colorspace.c
index c668221..19616e4 100644
--- a/libavfilter/colorspace.c
+++ b/libavfilter/colorspace.c
@@ -93,6 +93,77 @@ void ff_fill_rgb2xyz_table(const struct PrimaryCoefficients 
*coeffs,
 rgb2xyz[2][1] *= sg;
 rgb2xyz[2][2] *= sb;
 }
+static const double ycgco_matrix[3][3] =
+{
+{  0.25, 0.5,  0.25 },
+{ -0.25, 0.5, -0.25 },
+{  0.5,  0,   -0.5  },
+};
+
+static const double gbr_matrix[3][3] =
+{
+{ 0,1,   0   },
+{ 0,   -0.5, 0.5 },
+{ 0.5, -0.5, 0   },
+};
+
+/*
+ * All constants explained in e.g. 
https://linuxtv.org/downloads/v4l-dvb-apis/ch02s06.html
+ * The older ones (bt470bg/m) are also explained in their respective ITU docs
+ * (e.g. 
https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.470-5-199802-S!!PDF-E.pdf)
+ * whereas the newer ones can typically be copied directly from wikipedia :)
+ */
+static const struct LumaCoefficients luma_coefficients[AVCOL_SPC_NB] = {
+[AVCOL_SPC_FCC]= { 0.30,   0.59,   0.11   },
+[AVCOL_SPC_BT470BG]= { 0.299,  0.587,  0.114  },
+[AVCOL_SPC_SMPTE170M]  = { 0.299,  0.587,  0.114  },
+[AVCOL_SPC_BT709]  = { 0.2126, 0.7152, 0.0722 },
+[AVCOL_SPC_SMPTE240M]  = { 0.212,  0.701,  0.087  },
+[AVCOL_SPC_YCOCG]  = { 0.25,   0.5,0.25   },
+[AVCOL_SPC_RGB]= { 1,  1,  1  },
+[AVCOL_SPC_BT2020_NCL] = { 0.2627, 0.6780, 0.0593 },
+[AVCOL_SPC_BT2020_CL]  = { 0.2627, 0.6780, 0.0593 },
+};
+
+const struct LumaCoefficients *ff_get_luma_coefficients(enum AVColorSpace csp)
+{
+const struct LumaCoefficients *coeffs;
+
+if (csp >= AVCOL_SPC_NB)
+return NULL;
+coeffs = &luma_coefficients[csp];
+if (!coeffs->cr)
+return NULL;
+
+return coeffs;
+}
+
+void ff_fill_rgb2yuv_table(const struct LumaCoefficients *coeffs,
+   double rgb2yuv[3][3])
+{
+double bscale, rscale;
+
+// special ycgco matrix
+if (coeffs->cr == 0.25 && coeffs->cg == 0.5 && coeffs->cb == 0.25) {
+memcpy(rgb2yuv, ycgco_matrix, sizeof(double) * 9);
+return;
+} else if (coeffs->cr == 1 && coeffs->cg == 1 && coeffs->cb == 1) {
+memcpy(rgb2yuv, gbr_matrix, sizeof(double) * 9);
+return;
+}
+
+rgb2yuv[0][0] = coeffs->cr;
+rgb2yuv[0][1] = coeffs->cg;
+rgb2yuv[0][2] = coeffs->cb;
+bscale = 0.5 / (coeffs->cb - 1.0);
+rscale = 0.5 / (coeffs->cr - 1.0);
+rgb2yuv[1][0] = bscale * coeffs->cr;
+rgb2yuv[1][1] = bscale * coeffs->cg;
+rgb2yuv[1][2] = 0.5;
+rgb2yuv[2][0] = 0.5;
+rgb2yuv[2][1] = rscale * coeffs->cg;
+rgb2yuv[2][2] = rscale * coeffs->cb;
+}
 
 double ff_determine_signal_peak(AVFrame *in)
 {
diff --git a/libavfilter/colorspace.h b/libavfilter/colorspace.h
index 9366818..459a5df 100644
--- a/libavfilter/colorspace.h
+++ b/libavfilter/colorspace.h
@@ -44,6 +44,10 @@ void ff_fill_rgb2xyz_table(const struct PrimaryCoefficients 
*coeffs,
const struct WhitepointCoefficients *wp,
double rgb2xyz[3][3]);
 
+const struct LumaCoefficients *ff_get_luma_coefficients(enum AVColorSpace csp);
+void ff_fill_rgb2yuv_table(const struct LumaCoefficients *coeffs,
+   double rgb2yuv[3][3]);
+
 double ff_determine_signal_peak(AVFrame *in);
 void ff_update_hdr_metadata(AVFrame *in, double peak);
 
diff --git a/libavfilter/opencl/colorspace_common.cl 
b/libavfilter/opencl/colorspace_common.cl
index 94a4dd0..1d68a54 100644
--- a/libavfilter/opencl/colorspace_common.cl
+++ b/libavfilter/opencl/colorspace_common.cl
@@ -39,31 +39,6 @@ constant const float ST2084_C1 = 0.8359375f;
 constant const float ST2084_C2 = 18.8515625f;
 constant const float ST2084_C3 = 18.6875f;
 
-__constant float yuv2rgb_bt2020[] = {
-1.0f, 0.0f, 1.4746f,
-1.0f, -0.16455f, -0.57135f,
-1.0f, 1.8814f, 0.0f
-};
-
-__constant float yuv2rgb_bt709[] = {
-1.0f, 0.0f, 1.5748f,
-1.0f, -0.18732f, -0.46812f,
-1.0f, 1.8556f, 0.0f
-};
-
-__constant float rgb2yuv_bt709[] = {
-0.2126f, 0.7152f, 0.0722f,
--0.11457f, -0.38543f, 0.5f,
-0.5f, -0.45415f, -0.04585f
-};
-
-__constant float rgb2yuv_bt2020[] ={
-0.2627f, 0.678f, 0.0593f,
--0.1396f, -0.36037f, 0.5f,
-0.5f, -0.4598f, -0.0402f,
-};
-
-
 float get_luma_dst(float3 c) {
 ret

[FFmpeg-devel] [PATCH V2] lavf: add transpose_opencl filter

2018-11-27 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 configure |   1 +
 libavfilter/Makefile  |   1 +
 libavfilter/allfilters.c  |   1 +
 libavfilter/opencl/transpose.cl   |  35 +
 libavfilter/opencl_source.h   |   1 +
 libavfilter/transpose.h   |  34 +
 libavfilter/vf_transpose.c|  14 +-
 libavfilter/vf_transpose_opencl.c | 288 ++
 8 files changed, 362 insertions(+), 13 deletions(-)
 create mode 100644 libavfilter/opencl/transpose.cl
 create mode 100644 libavfilter/transpose.h
 create mode 100644 libavfilter/vf_transpose_opencl.c

diff --git a/configure b/configure
index b4f944c..dcb3f5f 100755
--- a/configure
+++ b/configure
@@ -3479,6 +3479,7 @@ tinterlace_merge_test_deps="tinterlace_filter"
 tinterlace_pad_test_deps="tinterlace_filter"
 tonemap_filter_deps="const_nan"
 tonemap_opencl_filter_deps="opencl const_nan"
+transpose_opencl_filter_deps="opencl"
 unsharp_opencl_filter_deps="opencl"
 uspp_filter_deps="gpl avcodec"
 vaguedenoiser_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 1895fa2..6e26581 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -393,6 +393,7 @@ OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += 
vf_tonemap_opencl.o colorspace.o
 OBJS-$(CONFIG_TPAD_FILTER)   += vf_tpad.o
 OBJS-$(CONFIG_TRANSPOSE_FILTER)  += vf_transpose.o
 OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER)  += vf_transpose_npp.o cuda_check.o
+OBJS-$(CONFIG_TRANSPOSE_OPENCL_FILTER)   += vf_transpose_opencl.o opencl.o 
opencl/transpose.o
 OBJS-$(CONFIG_TRIM_FILTER)   += trim.o
 OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)  += vf_premultiply.o framesync.o
 OBJS-$(CONFIG_UNSHARP_FILTER)+= vf_unsharp.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 837c99e..a600069 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -372,6 +372,7 @@ extern AVFilter ff_vf_tonemap_opencl;
 extern AVFilter ff_vf_tpad;
 extern AVFilter ff_vf_transpose;
 extern AVFilter ff_vf_transpose_npp;
+extern AVFilter ff_vf_transpose_opencl;
 extern AVFilter ff_vf_trim;
 extern AVFilter ff_vf_unpremultiply;
 extern AVFilter ff_vf_unsharp;
diff --git a/libavfilter/opencl/transpose.cl b/libavfilter/opencl/transpose.cl
new file mode 100644
index 000..e6388ab
--- /dev/null
+++ b/libavfilter/opencl/transpose.cl
@@ -0,0 +1,35 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+kernel void transpose(__write_only image2d_t dst,
+  __read_only image2d_t src,
+  int dir) {
+const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+   CLK_ADDRESS_CLAMP_TO_EDGE   |
+   CLK_FILTER_NEAREST);
+
+int2 size = get_image_dim(dst);
+int x = get_global_id(0);
+int y = get_global_id(1);
+
+int xin = (dir & 2) ? (size.y - 1 - y) : y;
+int yin = (dir & 1) ? (size.x - 1 - x) : x;
+float4 data = read_imagef(src, sampler, (int2)(xin, yin));
+
+if (x < size.x && y < size.y)
+write_imagef(dst, (int2)(x, y), data);
+}
diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
index 2f67d89..4118138 100644
--- a/libavfilter/opencl_source.h
+++ b/libavfilter/opencl_source.h
@@ -25,6 +25,7 @@ extern const char *ff_opencl_source_convolution;
 extern const char *ff_opencl_source_neighbor;
 extern const char *ff_opencl_source_overlay;
 extern const char *ff_opencl_source_tonemap;
+extern const char *ff_opencl_source_transpose;
 extern const char *ff_opencl_source_unsharp;
 
 #endif /* AVFILTER_OPENCL_SOURCE_H */
diff --git a/libavfilter/transpose.h b/libavfilter/transpose.h
new file mode 100644
index 000..d4bb4da
--- /dev/null
+++ b/libavfilter/transpose.h
@@ -0,0 +1,34 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is

[FFmpeg-devel] [PATCH] lavf: add tranpose_opencl filter

2018-11-25 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 configure |   1 +
 libavfilter/Makefile  |   1 +
 libavfilter/allfilters.c  |   1 +
 libavfilter/opencl/transpose.cl   |  35 +
 libavfilter/opencl_source.h   |   1 +
 libavfilter/transpose.h   |  34 +
 libavfilter/vf_transpose.c|  14 +-
 libavfilter/vf_transpose_opencl.c | 294 ++
 8 files changed, 368 insertions(+), 13 deletions(-)
 create mode 100644 libavfilter/opencl/transpose.cl
 create mode 100644 libavfilter/transpose.h
 create mode 100644 libavfilter/vf_transpose_opencl.c

diff --git a/configure b/configure
index b4f944c..dcb3f5f 100755
--- a/configure
+++ b/configure
@@ -3479,6 +3479,7 @@ tinterlace_merge_test_deps="tinterlace_filter"
 tinterlace_pad_test_deps="tinterlace_filter"
 tonemap_filter_deps="const_nan"
 tonemap_opencl_filter_deps="opencl const_nan"
+transpose_opencl_filter_deps="opencl"
 unsharp_opencl_filter_deps="opencl"
 uspp_filter_deps="gpl avcodec"
 vaguedenoiser_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 1895fa2..6e26581 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -393,6 +393,7 @@ OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += 
vf_tonemap_opencl.o colorspace.o
 OBJS-$(CONFIG_TPAD_FILTER)   += vf_tpad.o
 OBJS-$(CONFIG_TRANSPOSE_FILTER)  += vf_transpose.o
 OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER)  += vf_transpose_npp.o cuda_check.o
+OBJS-$(CONFIG_TRANSPOSE_OPENCL_FILTER)   += vf_transpose_opencl.o opencl.o 
opencl/transpose.o
 OBJS-$(CONFIG_TRIM_FILTER)   += trim.o
 OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)  += vf_premultiply.o framesync.o
 OBJS-$(CONFIG_UNSHARP_FILTER)+= vf_unsharp.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 837c99e..a600069 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -372,6 +372,7 @@ extern AVFilter ff_vf_tonemap_opencl;
 extern AVFilter ff_vf_tpad;
 extern AVFilter ff_vf_transpose;
 extern AVFilter ff_vf_transpose_npp;
+extern AVFilter ff_vf_transpose_opencl;
 extern AVFilter ff_vf_trim;
 extern AVFilter ff_vf_unpremultiply;
 extern AVFilter ff_vf_unsharp;
diff --git a/libavfilter/opencl/transpose.cl b/libavfilter/opencl/transpose.cl
new file mode 100644
index 000..e6388ab
--- /dev/null
+++ b/libavfilter/opencl/transpose.cl
@@ -0,0 +1,35 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+kernel void transpose(__write_only image2d_t dst,
+  __read_only image2d_t src,
+  int dir) {
+const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+   CLK_ADDRESS_CLAMP_TO_EDGE   |
+   CLK_FILTER_NEAREST);
+
+int2 size = get_image_dim(dst);
+int x = get_global_id(0);
+int y = get_global_id(1);
+
+int xin = (dir & 2) ? (size.y - 1 - y) : y;
+int yin = (dir & 1) ? (size.x - 1 - x) : x;
+float4 data = read_imagef(src, sampler, (int2)(xin, yin));
+
+if (x < size.x && y < size.y)
+write_imagef(dst, (int2)(x, y), data);
+}
diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
index 2f67d89..4118138 100644
--- a/libavfilter/opencl_source.h
+++ b/libavfilter/opencl_source.h
@@ -25,6 +25,7 @@ extern const char *ff_opencl_source_convolution;
 extern const char *ff_opencl_source_neighbor;
 extern const char *ff_opencl_source_overlay;
 extern const char *ff_opencl_source_tonemap;
+extern const char *ff_opencl_source_transpose;
 extern const char *ff_opencl_source_unsharp;
 
 #endif /* AVFILTER_OPENCL_SOURCE_H */
diff --git a/libavfilter/transpose.h b/libavfilter/transpose.h
new file mode 100644
index 000..da8b28e
--- /dev/null
+++ b/libavfilter/transpose.h
@@ -0,0 +1,34 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is

[FFmpeg-devel] [PATCH 3/4] doc/filters: add tonemap_opencl document.

2018-10-28 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 doc/filters.texi | 96 
 1 file changed, 96 insertions(+)

diff --git a/doc/filters.texi b/doc/filters.texi
index 83df460..f884ba4 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -16387,6 +16387,7 @@ tmix=frames=3:weights="-1 2 -1":scale=1
 @end example
 @end itemize
 
+@anchor{tonemap}
 @section tonemap
 Tone map colors from different dynamic ranges.
 
@@ -18440,6 +18441,101 @@ Apply sobel operator with scale set to 2 and delta 
set to 10
 @end example
 @end itemize
 
+@section tonemap_opencl
+
+Perform HDR(PQ/HLG) to SDR conversion with tone-mapping.
+
+It accepts the following parameters:
+
+@table @option
+@item tonemap
+Specify the tone-mapping operator to be used. Same as tonemap option in 
@ref{tonemap}.
+
+@item param
+Tune the tone mapping algorithm. same as param option in @ref{tonemap}.
+
+@item desat
+Apply desaturation for highlights that exceed this level of brightness. The
+higher the parameter, the more color information will be preserved. This
+setting helps prevent unnaturally blown-out colors for super-highlights, by
+(smoothly) turning into white instead. This makes images feel more natural,
+at the cost of reducing information about out-of-range colors.
+
+The default value is 0.5, and the algorithm here is a little different from
+the cpu version tonemap currently. A setting of 0.0 disables this option.
+
+@item threshold
+The tonemapping algorithm parameters is fine-tuned per each scene. And a 
threshold
+is used to detect whether the scene has changed or not. If the distance beween
+the current frame average brightness and the current running average exceeds
+a threshold value, we would re-calculate scene average and peak brightness.
+The default value is 0.2.
+
+@item format
+Specify the output pixel format.
+
+Currently supported formats are:
+@table @var
+@item p010
+@item nv12
+@end table
+
+@item range, r
+Set the output color range.
+
+Possible values are:
+@table @var
+@item tv/mpeg
+@item pc/jpeg
+@end table
+
+Default is same as input.
+
+@item primaries, p
+Set the output color primaries.
+
+Possible values are:
+@table @var
+@item bt709
+@item bt2020
+@end table
+
+Default is same as input.
+
+@item transfer, t
+Set the output transfer characteristics.
+
+Possible values are:
+@table @var
+@item bt709
+@item bt2020
+@end table
+
+Default is bt709.
+
+@item matrix, m
+Set the output colorspace matrix.
+
+Possible value are:
+@table @var
+@item bt709
+@item bt2020
+@end table
+
+Default is same as input.
+
+@end table
+
+@subsection Example
+
+@itemize
+@item
+Convert HDR(PQ/HLG) video to bt2020-transfer-characteristic p010 format using 
linear operator.
+@example
+-i INPUT -vf 
"format=p010,hwupload,tonemap_opencl=t=bt2020:tonemap=linear:format=p010,hwdownload,format=p010"
 OUTPUT
+@end example
+@end itemize
+
 @section unsharp_opencl
 
 Sharpen or blur the input video.
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH 2/4] lavfi/opencl: Handle overlay input formats correctly.

2018-10-28 Thread Ruiling Song

The main input may have alpha channel, we just ignore it.
Also add some checks for incompatible input formats.

Signed-off-by: Ruiling Song 
---
 libavfilter/vf_overlay_opencl.c | 58 -
 1 file changed, 46 insertions(+), 12 deletions(-)

diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/vf_overlay_opencl.c
index e9c8532..320c1a5 100644
--- a/libavfilter/vf_overlay_opencl.c
+++ b/libavfilter/vf_overlay_opencl.c
@@ -37,7 +37,7 @@ typedef struct OverlayOpenCLContext {
 
 FFFrameSync  fs;
 
-int  nb_planes;
+int  nb_color_planes;
 int  x_subsample;
 int  y_subsample;
 int  alpha_separate;
@@ -46,6 +46,22 @@ typedef struct OverlayOpenCLContext {
 int  y_position;
 } OverlayOpenCLContext;
 
+static int has_planar_alpha(const AVPixFmtDescriptor *fmt) {
+int nb_components;
+int has_alpha = !!(fmt->flags & AV_PIX_FMT_FLAG_ALPHA);
+if (!has_alpha) return 0;
+
+nb_components = fmt->nb_components;
+// PAL8
+if (nb_components < 2) return 0;
+
+if (fmt->comp[nb_components - 1].plane >
+fmt->comp[nb_components - 2].plane)
+return 1;
+else
+return 0;
+}
+
 static int overlay_opencl_load(AVFilterContext *avctx,
enum AVPixelFormat main_format,
enum AVPixelFormat overlay_format)
@@ -55,10 +71,13 @@ static int overlay_opencl_load(AVFilterContext *avctx,
 const char *source = ff_opencl_source_overlay;
 const char *kernel;
 const AVPixFmtDescriptor *main_desc, *overlay_desc;
-int err, i, main_planes, overlay_planes;
+int err, i, main_planes, overlay_planes, overlay_alpha,
+main_planar_alpha, overlay_planar_alpha;
 
 main_desc= av_pix_fmt_desc_get(main_format);
 overlay_desc = av_pix_fmt_desc_get(overlay_format);
+overlay_alpha = !!(overlay_desc->flags & AV_PIX_FMT_FLAG_ALPHA);
+main_planar_alpha = has_planar_alpha(main_desc);
 
 main_planes = overlay_planes = 0;
 for (i = 0; i < main_desc->nb_components; i++)
@@ -68,7 +87,7 @@ static int overlay_opencl_load(AVFilterContext *avctx,
 overlay_planes = FFMAX(overlay_planes,
overlay_desc->comp[i].plane + 1);
 
-ctx->nb_planes = main_planes;
+ctx->nb_color_planes = main_planar_alpha ? (main_planes - 1) : main_planes;
 ctx->x_subsample = 1 << main_desc->log2_chroma_w;
 ctx->y_subsample = 1 << main_desc->log2_chroma_h;
 
@@ -80,15 +99,30 @@ static int overlay_opencl_load(AVFilterContext *avctx,
ctx->x_subsample, ctx->y_subsample);
 }
 
-if (main_planes == overlay_planes) {
-if (main_desc->nb_components == overlay_desc->nb_components)
-kernel = "overlay_no_alpha";
-else
-kernel = "overlay_internal_alpha";
+if ((main_desc->flags & AV_PIX_FMT_FLAG_RGB) !=
+(overlay_desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+av_log(avctx, AV_LOG_ERROR, "mixed YUV/RGB input formats.\n");
+return AVERROR(EINVAL);
+}
+
+if (main_desc->log2_chroma_w != overlay_desc->log2_chroma_w ||
+main_desc->log2_chroma_h != overlay_desc->log2_chroma_h) {
+av_log(avctx, AV_LOG_ERROR, "incompatible chroma sub-sampling.\n");
+return AVERROR(EINVAL);
+}
+
+if (!overlay_alpha) {
 ctx->alpha_separate = 0;
+kernel = "overlay_no_alpha";
 } else {
-kernel = "overlay_external_alpha";
-ctx->alpha_separate = 1;
+overlay_planar_alpha = has_planar_alpha(overlay_desc);
+if (overlay_planar_alpha) {
+ctx->alpha_separate = 1;
+kernel = "overlay_external_alpha";
+} else {
+ctx->alpha_separate = 0;
+kernel = "overlay_internal_alpha";
+}
 }
 
 av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", kernel);
@@ -155,7 +189,7 @@ static int overlay_opencl_blend(FFFrameSync *fs)
 goto fail;
 }
 
-for (plane = 0; plane < ctx->nb_planes; plane++) {
+for (plane = 0; plane < ctx->nb_color_planes; plane++) {
 kernel_arg = 0;
 
 mem = (cl_mem)output->data[plane];
@@ -171,7 +205,7 @@ static int overlay_opencl_blend(FFFrameSync *fs)
 kernel_arg++;
 
 if (ctx->alpha_separate) {
-mem = (cl_mem)input_overlay->data[ctx->nb_planes];
+mem = (cl_mem)input_overlay->data[ctx->nb_color_planes];
 CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
 kernel_arg++;
 }
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH 1/4] doc/filters: add document for opencl filters

2018-10-28 Thread Ruiling Song

Signed-off-by: Danil Iashchenko 
Signed-off-by: Ruiling Song 
---
Seems like Danil is not working on this recently.
So I re-submit this patch to address the comment over overlay_opencl.

Thanks!
Ruiling
 doc/filters.texi | 486 +++
 1 file changed, 486 insertions(+)

diff --git a/doc/filters.texi b/doc/filters.texi
index cadf78c..83df460 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -10485,6 +10485,7 @@ A floating point number which specifies chroma temporal 
strength. It defaults to
 @var{luma_tmp}*@var{chroma_spatial}/@var{luma_spatial}.
 @end table
 
+@anchor{hwdownload}
 @section hwdownload
 
 Download hardware frames to system memory.
@@ -10575,6 +10576,7 @@ ways if there are any additional constraints on that 
filter's output.
 Do not use it without fully understanding the implications of its use.
 @end table
 
+@anchor{hwupload}
 @section hwupload
 
 Upload system memory frames to hardware surfaces.
@@ -18014,6 +18016,490 @@ pixel format "yuv422p" @var{hsub} is 2 and @var{vsub} 
is 1.
 
 @c man end VIDEO FILTERS
 
+@chapter OpenCL Video Filters
+@c man begin OPENCL VIDEO FILTERS
+
+Below is a description of the currently available OpenCL video filters.
+
+To enable compilation of these filters you need to configure FFmpeg with
+@code{--enable-opencl}.
+
+Running OpenCL filters requires you to initialize a hardware device and to 
pass that device to all filters in any filter graph.
+@table @option
+
+@item -init_hw_device opencl[=@var{name}][:@var{device}[,@var{key=value}...]]
+Initialise a new hardware device of type @var{opencl} called @var{name}, using 
the
+given device parameters.
+
+@item -filter_hw_device @var{name}
+Pass the hardware device called @var{name} to all filters in any filter graph.
+
+@end table
+
+For more detailed information see 
@url{https://www.ffmpeg.org/ffmpeg.html#Advanced-Video-options}
+
+@itemize
+@item
+Example of choosing the first device on the second platform and running 
avgblur_opencl filter with default parameters on it.
+@example
+-init_hw_device opencl=gpu:1.0 -filter_hw_device gpu -i INPUT -vf "hwupload, 
avgblur_opencl, hwdownload" OUTPUT
+@end example
+@end itemize
+
+Since OpenCL filters are not able to access frame data in normal memory, all 
frame data needs to be uploaded(@ref{hwupload}) to hardware surfaces connected 
to the appropriate device before being used and then 
downloaded(@ref{hwdownload}) back to normal memory. Note that @ref{hwupload} 
will upload to a surface with the same layout as the software frame, so it may 
be necessary to add a @ref{format} filter immediately before to get the input 
into the right format and @ref{hwdownload} does not support all formats on the 
output - it may be necessary to insert an additional @ref{format} filter 
immediately following in the graph to get the output in a supported format.
+
+@section avgblur_opencl
+
+Apply average blur filter.
+
+The filter accepts the following options:
+
+@table @option
+@item sizeX
+Set horizontal radius size.
+Range is @code{[1, 1024]} and default value is @code{1}.
+
+@item planes
+Set which planes to filter. Default value is @code{0xf}, by which all planes 
are processed.
+
+@item sizeY
+Set vertical radius size. Range is @code{[1, 1024]} and default value is 
@code{0}. If zero, @code{sizeX} value will be used.
+@end table
+
+@subsection Example
+
+@itemize
+@item
+Apply average blur filter with horizontal and vertical size of 3, setting each 
pixel of the output to the average value of the 7x7 region centered on it in 
the input. For pixels on the edges of the image, the region does not extend 
beyond the image boundaries, and so out-of-range coordinates are not used in 
the calculations.
+@example
+-i INPUT -vf "hwupload, avgblur_opencl=3, hwdownload" OUTPUT
+@end example
+@end itemize
+
+@section boxblur_opencl
+
+Apply a boxblur algorithm to the input video.
+
+It accepts the following parameters:
+
+@table @option
+
+@item luma_radius, lr
+@item luma_power, lp
+@item chroma_radius, cr
+@item chroma_power, cp
+@item alpha_radius, ar
+@item alpha_power, ap
+
+@end table
+
+A description of the accepted options follows.
+
+@table @option
+@item luma_radius, lr
+@item chroma_radius, cr
+@item alpha_radius, ar
+Set an expression for the box radius in pixels used for blurring the
+corresponding input plane.
+
+The radius value must be a non-negative number, and must not be
+greater than the value of the expression @code{min(w,h)/2} for the
+luma and alpha planes, and of @code{min(cw,ch)/2} for the chroma
+planes.
+
+Default value for @option{luma_radius} is "2". If not specified,
+@option{chroma_radius} and @option{alpha_radius} default to the
+corresponding value set for @option{luma_radius}.
+
+The expressions can contain the following constants:
+@table @option
+@item w
+@item h
+The input width and height in pixels.
+
+@item cw
+@item ch
+The input chroma image wid

[FFmpeg-devel] [PATCH 4/4] lavfi/opencl: remove peak option of tonemap_opencl

2018-10-28 Thread Ruiling Song

Since the filter use auto-calculate the peak value,
the option does not work as expected. So, remove it.

Signed-off-by: Ruiling Song 
---
 libavfilter/vf_tonemap_opencl.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/libavfilter/vf_tonemap_opencl.c b/libavfilter/vf_tonemap_opencl.c
index cd293c2..88b3107 100644
--- a/libavfilter/vf_tonemap_opencl.c
+++ b/libavfilter/vf_tonemap_opencl.c
@@ -62,7 +62,6 @@ typedef struct TonemapOpenCLContext {
 
 enum TonemapAlgorithm tonemap;
 enum AVPixelFormatformat;
-doublepeak;
 doubleparam;
 doubledesat_param;
 doubletarget_peak;
@@ -349,7 +348,7 @@ static int tonemap_opencl_filter_frame(AVFilterLink 
*inlink, AVFrame *input)
 AVFrame *output = NULL;
 cl_int cle;
 int err;
-double peak = ctx->peak;
+double peak;
 
 AVHWFramesContext *input_frames_ctx =
 (AVHWFramesContext*)input->hw_frames_ctx->data;
@@ -371,8 +370,7 @@ static int tonemap_opencl_filter_frame(AVFilterLink 
*inlink, AVFrame *input)
 if (err < 0)
 goto fail;
 
-if (!peak)
-peak = ff_determine_signal_peak(input);
+peak = ff_determine_signal_peak(input);
 
 if (ctx->trc != -1)
 output->color_trc = ctx->trc;
@@ -518,7 +516,6 @@ static const AVOption tonemap_opencl_options[] = {
 { "limited",   0,   0, AV_OPT_TYPE_CONST, 
{.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, "range" },
 { "full",  0,   0, AV_OPT_TYPE_CONST, 
{.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, "range" },
 { "format","output pixel format", OFFSET(format), 
AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, INT_MAX, 
FLAGS, "fmt" },
-{ "peak",  "signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, 
{.dbl = 0}, 0, DBL_MAX, FLAGS },
 { "param", "tonemap parameter",   OFFSET(param), AV_OPT_TYPE_DOUBLE, 
{.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS },
 { "desat", "desaturation parameter",   OFFSET(desat_param), 
AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0, DBL_MAX, FLAGS },
 { "threshold", "scene detection threshold",   OFFSET(scene_threshold), 
AV_OPT_TYPE_DOUBLE, {.dbl = 0.2}, 0, DBL_MAX, FLAGS },
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH 2/4] lavfi/opencl: Handle overlay input formats correctly.

2018-10-28 Thread Ruiling Song

The main input may have alpha channel, we just ignore it.
Also add some checks for incompatible input formats.

Signed-off-by: Ruiling Song 
---
 libavfilter/vf_overlay_opencl.c | 58 -
 1 file changed, 46 insertions(+), 12 deletions(-)

diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/vf_overlay_opencl.c
index e9c8532..320c1a5 100644
--- a/libavfilter/vf_overlay_opencl.c
+++ b/libavfilter/vf_overlay_opencl.c
@@ -37,7 +37,7 @@ typedef struct OverlayOpenCLContext {
 
 FFFrameSync  fs;
 
-int  nb_planes;
+int  nb_color_planes;
 int  x_subsample;
 int  y_subsample;
 int  alpha_separate;
@@ -46,6 +46,22 @@ typedef struct OverlayOpenCLContext {
 int  y_position;
 } OverlayOpenCLContext;
 
+static int has_planar_alpha(const AVPixFmtDescriptor *fmt) {
+int nb_components;
+int has_alpha = !!(fmt->flags & AV_PIX_FMT_FLAG_ALPHA);
+if (!has_alpha) return 0;
+
+nb_components = fmt->nb_components;
+// PAL8
+if (nb_components < 2) return 0;
+
+if (fmt->comp[nb_components - 1].plane >
+fmt->comp[nb_components - 2].plane)
+return 1;
+else
+return 0;
+}
+
 static int overlay_opencl_load(AVFilterContext *avctx,
enum AVPixelFormat main_format,
enum AVPixelFormat overlay_format)
@@ -55,10 +71,13 @@ static int overlay_opencl_load(AVFilterContext *avctx,
 const char *source = ff_opencl_source_overlay;
 const char *kernel;
 const AVPixFmtDescriptor *main_desc, *overlay_desc;
-int err, i, main_planes, overlay_planes;
+int err, i, main_planes, overlay_planes, overlay_alpha,
+main_planar_alpha, overlay_planar_alpha;
 
 main_desc= av_pix_fmt_desc_get(main_format);
 overlay_desc = av_pix_fmt_desc_get(overlay_format);
+overlay_alpha = !!(overlay_desc->flags & AV_PIX_FMT_FLAG_ALPHA);
+main_planar_alpha = has_planar_alpha(main_desc);
 
 main_planes = overlay_planes = 0;
 for (i = 0; i < main_desc->nb_components; i++)
@@ -68,7 +87,7 @@ static int overlay_opencl_load(AVFilterContext *avctx,
 overlay_planes = FFMAX(overlay_planes,
overlay_desc->comp[i].plane + 1);
 
-ctx->nb_planes = main_planes;
+ctx->nb_color_planes = main_planar_alpha ? (main_planes - 1) : main_planes;
 ctx->x_subsample = 1 << main_desc->log2_chroma_w;
 ctx->y_subsample = 1 << main_desc->log2_chroma_h;
 
@@ -80,15 +99,30 @@ static int overlay_opencl_load(AVFilterContext *avctx,
ctx->x_subsample, ctx->y_subsample);
 }
 
-if (main_planes == overlay_planes) {
-if (main_desc->nb_components == overlay_desc->nb_components)
-kernel = "overlay_no_alpha";
-else
-kernel = "overlay_internal_alpha";
+if ((main_desc->flags & AV_PIX_FMT_FLAG_RGB) !=
+(overlay_desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+av_log(avctx, AV_LOG_ERROR, "mixed YUV/RGB input formats.\n");
+return AVERROR(EINVAL);
+}
+
+if (main_desc->log2_chroma_w != overlay_desc->log2_chroma_w ||
+main_desc->log2_chroma_h != overlay_desc->log2_chroma_h) {
+av_log(avctx, AV_LOG_ERROR, "incompatible chroma sub-sampling.\n");
+return AVERROR(EINVAL);
+}
+
+if (!overlay_alpha) {
 ctx->alpha_separate = 0;
+kernel = "overlay_no_alpha";
 } else {
-kernel = "overlay_external_alpha";
-ctx->alpha_separate = 1;
+overlay_planar_alpha = has_planar_alpha(overlay_desc);
+if (overlay_planar_alpha) {
+ctx->alpha_separate = 1;
+kernel = "overlay_external_alpha";
+} else {
+ctx->alpha_separate = 0;
+kernel = "overlay_internal_alpha";
+}
 }
 
 av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", kernel);
@@ -155,7 +189,7 @@ static int overlay_opencl_blend(FFFrameSync *fs)
 goto fail;
 }
 
-for (plane = 0; plane < ctx->nb_planes; plane++) {
+for (plane = 0; plane < ctx->nb_color_planes; plane++) {
 kernel_arg = 0;
 
 mem = (cl_mem)output->data[plane];
@@ -171,7 +205,7 @@ static int overlay_opencl_blend(FFFrameSync *fs)
 kernel_arg++;
 
 if (ctx->alpha_separate) {
-mem = (cl_mem)input_overlay->data[ctx->nb_planes];
+mem = (cl_mem)input_overlay->data[ctx->nb_color_planes];
 CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
 kernel_arg++;
 }
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH 3/4] doc/filters: add tonemap_opencl document.

2018-10-28 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 doc/filters.texi | 96 
 1 file changed, 96 insertions(+)

diff --git a/doc/filters.texi b/doc/filters.texi
index 83df460..f884ba4 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -16387,6 +16387,7 @@ tmix=frames=3:weights="-1 2 -1":scale=1
 @end example
 @end itemize
 
+@anchor{tonemap}
 @section tonemap
 Tone map colors from different dynamic ranges.
 
@@ -18440,6 +18441,101 @@ Apply sobel operator with scale set to 2 and delta 
set to 10
 @end example
 @end itemize
 
+@section tonemap_opencl
+
+Perform HDR(PQ/HLG) to SDR conversion with tone-mapping.
+
+It accepts the following parameters:
+
+@table @option
+@item tonemap
+Specify the tone-mapping operator to be used. Same as tonemap option in 
@ref{tonemap}.
+
+@item param
+Tune the tone mapping algorithm. same as param option in @ref{tonemap}.
+
+@item desat
+Apply desaturation for highlights that exceed this level of brightness. The
+higher the parameter, the more color information will be preserved. This
+setting helps prevent unnaturally blown-out colors for super-highlights, by
+(smoothly) turning into white instead. This makes images feel more natural,
+at the cost of reducing information about out-of-range colors.
+
+The default value is 0.5, and the algorithm here is a little different from
+the cpu version tonemap currently. A setting of 0.0 disables this option.
+
+@item threshold
+The tonemapping algorithm parameters is fine-tuned per each scene. And a 
threshold
+is used to detect whether the scene has changed or not. If the distance beween
+the current frame average brightness and the current running average exceeds
+a threshold value, we would re-calculate scene average and peak brightness.
+The default value is 0.2.
+
+@item format
+Specify the output pixel format.
+
+Currently supported formats are:
+@table @var
+@item p010
+@item nv12
+@end table
+
+@item range, r
+Set the output color range.
+
+Possible values are:
+@table @var
+@item tv/mpeg
+@item pc/jpeg
+@end table
+
+Default is same as input.
+
+@item primaries, p
+Set the output color primaries.
+
+Possible values are:
+@table @var
+@item bt709
+@item bt2020
+@end table
+
+Default is same as input.
+
+@item transfer, t
+Set the output transfer characteristics.
+
+Possible values are:
+@table @var
+@item bt709
+@item bt2020
+@end table
+
+Default is bt709.
+
+@item matrix, m
+Set the output colorspace matrix.
+
+Possible value are:
+@table @var
+@item bt709
+@item bt2020
+@end table
+
+Default is same as input.
+
+@end table
+
+@subsection Example
+
+@itemize
+@item
+Convert HDR(PQ/HLG) video to bt2020-transfer-characteristic p010 format using 
linear operator.
+@example
+-i INPUT -vf 
"format=p010,hwupload,tonemap_opencl=t=bt2020:tonemap=linear:format=p010,hwdownload,format=p010"
 OUTPUT
+@end example
+@end itemize
+
 @section unsharp_opencl
 
 Sharpen or blur the input video.
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH 1/4] doc/filters: add document for opencl filters

2018-10-28 Thread Ruiling Song

Signed-off-by: Danil Iashchenko 
Signed-off-by: Ruiling Song 
---
Seems like Danil is not working on this recently.
So I re-submit this patch to address the comment over overlay_opencl.

Thanks!
Ruiling
 doc/filters.texi | 486 +++
 1 file changed, 486 insertions(+)

diff --git a/doc/filters.texi b/doc/filters.texi
index cadf78c..83df460 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -10485,6 +10485,7 @@ A floating point number which specifies chroma temporal 
strength. It defaults to
 @var{luma_tmp}*@var{chroma_spatial}/@var{luma_spatial}.
 @end table
 
+@anchor{hwdownload}
 @section hwdownload
 
 Download hardware frames to system memory.
@@ -10575,6 +10576,7 @@ ways if there are any additional constraints on that 
filter's output.
 Do not use it without fully understanding the implications of its use.
 @end table
 
+@anchor{hwupload}
 @section hwupload
 
 Upload system memory frames to hardware surfaces.
@@ -18014,6 +18016,490 @@ pixel format "yuv422p" @var{hsub} is 2 and @var{vsub} 
is 1.
 
 @c man end VIDEO FILTERS
 
+@chapter OpenCL Video Filters
+@c man begin OPENCL VIDEO FILTERS
+
+Below is a description of the currently available OpenCL video filters.
+
+To enable compilation of these filters you need to configure FFmpeg with
+@code{--enable-opencl}.
+
+Running OpenCL filters requires you to initialize a hardware device and to 
pass that device to all filters in any filter graph.
+@table @option
+
+@item -init_hw_device opencl[=@var{name}][:@var{device}[,@var{key=value}...]]
+Initialise a new hardware device of type @var{opencl} called @var{name}, using 
the
+given device parameters.
+
+@item -filter_hw_device @var{name}
+Pass the hardware device called @var{name} to all filters in any filter graph.
+
+@end table
+
+For more detailed information see 
@url{https://www.ffmpeg.org/ffmpeg.html#Advanced-Video-options}
+
+@itemize
+@item
+Example of choosing the first device on the second platform and running 
avgblur_opencl filter with default parameters on it.
+@example
+-init_hw_device opencl=gpu:1.0 -filter_hw_device gpu -i INPUT -vf "hwupload, 
avgblur_opencl, hwdownload" OUTPUT
+@end example
+@end itemize
+
+Since OpenCL filters are not able to access frame data in normal memory, all 
frame data needs to be uploaded(@ref{hwupload}) to hardware surfaces connected 
to the appropriate device before being used and then 
downloaded(@ref{hwdownload}) back to normal memory. Note that @ref{hwupload} 
will upload to a surface with the same layout as the software frame, so it may 
be necessary to add a @ref{format} filter immediately before to get the input 
into the right format and @ref{hwdownload} does not support all formats on the 
output - it may be necessary to insert an additional @ref{format} filter 
immediately following in the graph to get the output in a supported format.
+
+@section avgblur_opencl
+
+Apply average blur filter.
+
+The filter accepts the following options:
+
+@table @option
+@item sizeX
+Set horizontal radius size.
+Range is @code{[1, 1024]} and default value is @code{1}.
+
+@item planes
+Set which planes to filter. Default value is @code{0xf}, by which all planes 
are processed.
+
+@item sizeY
+Set vertical radius size. Range is @code{[1, 1024]} and default value is 
@code{0}. If zero, @code{sizeX} value will be used.
+@end table
+
+@subsection Example
+
+@itemize
+@item
+Apply average blur filter with horizontal and vertical size of 3, setting each 
pixel of the output to the average value of the 7x7 region centered on it in 
the input. For pixels on the edges of the image, the region does not extend 
beyond the image boundaries, and so out-of-range coordinates are not used in 
the calculations.
+@example
+-i INPUT -vf "hwupload, avgblur_opencl=3, hwdownload" OUTPUT
+@end example
+@end itemize
+
+@section boxblur_opencl
+
+Apply a boxblur algorithm to the input video.
+
+It accepts the following parameters:
+
+@table @option
+
+@item luma_radius, lr
+@item luma_power, lp
+@item chroma_radius, cr
+@item chroma_power, cp
+@item alpha_radius, ar
+@item alpha_power, ap
+
+@end table
+
+A description of the accepted options follows.
+
+@table @option
+@item luma_radius, lr
+@item chroma_radius, cr
+@item alpha_radius, ar
+Set an expression for the box radius in pixels used for blurring the
+corresponding input plane.
+
+The radius value must be a non-negative number, and must not be
+greater than the value of the expression @code{min(w,h)/2} for the
+luma and alpha planes, and of @code{min(cw,ch)/2} for the chroma
+planes.
+
+Default value for @option{luma_radius} is "2". If not specified,
+@option{chroma_radius} and @option{alpha_radius} default to the
+corresponding value set for @option{luma_radius}.
+
+The expressions can contain the following constants:
+@table @option
+@item w
+@item h
+The input width and height in pixels.
+
+@item cw
+@item ch
+The input chroma image wid

[FFmpeg-devel] [PATCH] doc/filters: add tonemap_opencl document.

2018-08-01 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 doc/filters.texi | 158 +++
 1 file changed, 158 insertions(+)

diff --git a/doc/filters.texi b/doc/filters.texi
index 6695999c84..f622d03226 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -17776,6 +17776,164 @@ Apply sobel operator with scale set to 2 and delta 
set to 10
 @end example
 @end itemize
 
+@section tonemap_opencl
+
+Perform HDR(PQ/HLG) to SDR conversion with tone-mapping.
+
+It accepts the following parameters:
+
+@table @option
+@item tonemap
+Specify the tone-mapping operator to be used.
+
+Possible values are:
+@table @var
+@item none
+Do not apply any tone mapping, only desaturate overbright pixels.
+
+@item clip
+Hard-clip any out-of-range values. Use it for perfect color accuracy for
+in-range values, while distorting out-of-range values.
+
+@item linear
+Stretch the entire reference gamut to a linear multiple of the display.
+
+@item gamma
+Fit a logarithmic transfer between the tone curves.
+
+@item reinhard
+Preserve overall image brightness with a simple curve, using nonlinear
+contrast, which results in flattening details and degrading color accuracy.
+
+@item hable
+Preserve both dark and bright details better than @var{reinhard}, at the cost
+of slightly darkening everything. Use it when detail preservation is more
+important than color and brightness accuracy.
+
+@item mobius
+Smoothly map out-of-range values, while retaining contrast and colors for
+in-range material as much as possible. Use it when color accuracy is more
+important than detail preservation.
+@end table
+
+@item param
+Tune the tone mapping algorithm.
+
+This affects the following algorithms:
+@table @var
+@item none
+Ignored.
+
+@item linear
+Specifies the scale factor to use while stretching.
+Default to 1.0.
+
+@item gamma
+Specifies the exponent of the function.
+Default to 1.8.
+
+@item clip
+Specify an extra linear coefficient to multiply into the signal before 
clipping.
+Default to 1.0.
+
+@item reinhard
+Specify the local contrast coefficient at the display peak.
+Default to 0.5, which means that in-gamut values will be about half as bright
+as when clipping.
+
+@item hable
+Ignored.
+
+@item mobius
+Specify the transition point from linear to mobius transform. Every value
+below this point is guaranteed to be mapped 1:1. The higher the value, the
+more accurate the result will be, at the cost of losing bright details.
+Default to 0.3, which due to the steep initial slope still preserves in-range
+colors fairly accurately.
+@end table
+
+@item desat
+Apply desaturation for highlights that exceed this level of brightness. The
+higher the parameter, the more color information will be preserved. This
+setting helps prevent unnaturally blown-out colors for super-highlights, by
+(smoothly) turning into white instead. This makes images feel more natural,
+at the cost of reducing information about out-of-range colors.
+
+The default value is 0.5, and the algorithm here is a little different from
+the cpu version tonemap currently. A setting of 0.0 disables this option.
+
+@item threshold
+The tonemapping algorithm parameters is fine-tuned per each scene. And a 
threshold
+is used to detect whether the scene has changed or not. If the distance beween
+the current frame average brightness and the current running average exceeds
+a threshold value, we would re-calculate scene average and peak brightness.
+The default value is 0.2.
+
+@item format
+Specify the output pixel format.
+
+Currently supported formats are:
+@table @var
+@item p010
+@item nv12
+@end table
+
+@item range, r
+Set the output color range.
+
+Possible values are:
+@table @var
+@item tv/mpeg
+@item pc/jpeg
+@end table
+
+Default is same as input.
+
+@item primaries, p
+Set the output color primaries.
+
+Possible values are:
+@table @var
+@item bt709
+@item bt2020
+@end table
+
+Default is same as input.
+
+@item transfer, t
+Set the output transfer characteristics.
+
+Possible values are:
+@table @var
+@item bt709
+@item bt2020
+@end table
+
+Default is bt709.
+
+@item matrix, m
+Set the output colorspace matrix.
+
+Possible value are:
+@table @var
+@item bt709
+@item bt2020
+@end table
+
+Default is same as input.
+
+@end table
+
+@subsection Example
+
+@itemize
+@item
+Convert HDR(PQ/HLG) video to bt2020-transfer-characteristic p010 format using 
linear operator.
+@example
+-i INPUT -vf "hwupload, 
format=p010,tonemap_opencl=t=bt2020:tonemap=linear:format=p010, hwdownload" 
OUTPUT
+@end example
+@end itemize
+
 @section unsharp_opencl
 
 Sharpen or blur the input video.
-- 
2.17.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH] lavfi/colorspace: Suppress compile warning on incompatible pointer type.

2018-07-13 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
Sorry I have not verified this patch, I don't know how to reproduce the gcc 
warning.

Thanks!
Ruiling

 libavfilter/vf_colorspace.c | 16 
 libavfilter/vf_tonemap_opencl.c |  4 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/libavfilter/vf_colorspace.c b/libavfilter/vf_colorspace.c
index 56621d15e2..69c7674a7b 100644
--- a/libavfilter/vf_colorspace.c
+++ b/libavfilter/vf_colorspace.c
@@ -382,8 +382,8 @@ static void fill_whitepoint_conv_table(double out[3][3], 
enum WhitepointAdaptati
 fac[1][1] = gd / gs;
 fac[2][2] = bd / bs;
 fac[0][1] = fac[0][2] = fac[1][0] = fac[1][2] = fac[2][0] = fac[2][1] = 
0.0;
-ff_matrix_mul_3x3(tmp, ma, fac);
-ff_matrix_mul_3x3(out, tmp, mai);
+ff_matrix_mul_3x3(tmp, ma, (void *)fac);
+ff_matrix_mul_3x3(out, (void *)tmp, (void *)mai);
 }
 
 static void apply_lut(int16_t *buf[3], ptrdiff_t stride,
@@ -589,7 +589,7 @@ static int create_filtergraph(AVFilterContext *ctx,
 wp_out = &whitepoint_coefficients[s->out_primaries->wp];
 wp_in = &whitepoint_coefficients[s->in_primaries->wp];
 ff_fill_rgb2xyz_table(&s->out_primaries->coeff, wp_out, rgb2xyz);
-ff_matrix_invert_3x3(rgb2xyz, xyz2rgb);
+ff_matrix_invert_3x3((void *)rgb2xyz, xyz2rgb);
 ff_fill_rgb2xyz_table(&s->in_primaries->coeff, wp_in, rgb2xyz);
 if (s->out_primaries->wp != s->in_primaries->wp &&
 s->wp_adapt != WP_ADAPT_IDENTITY) {
@@ -597,10 +597,10 @@ static int create_filtergraph(AVFilterContext *ctx,
 
 fill_whitepoint_conv_table(wpconv, s->wp_adapt, 
s->in_primaries->wp,
s->out_primaries->wp);
-ff_matrix_mul_3x3(tmp, rgb2xyz, wpconv);
-ff_matrix_mul_3x3(rgb2rgb, tmp, xyz2rgb);
+ff_matrix_mul_3x3(tmp, (void *)rgb2xyz, (void *)wpconv);
+ff_matrix_mul_3x3(rgb2rgb, (void *)tmp, (void *)xyz2rgb);
 } else {
-ff_matrix_mul_3x3(rgb2rgb, rgb2xyz, xyz2rgb);
+ff_matrix_mul_3x3(rgb2rgb, (void *)rgb2xyz, (void *)xyz2rgb);
 }
 for (m = 0; m < 3; m++)
 for (n = 0; n < 3; n++) {
@@ -725,7 +725,7 @@ static int create_filtergraph(AVFilterContext *ctx,
 for (n = 0; n < 8; n++)
 s->yuv_offset[0][n] = off;
 fill_rgb2yuv_table(s->in_lumacoef, rgb2yuv);
-ff_matrix_invert_3x3(rgb2yuv, yuv2rgb);
+ff_matrix_invert_3x3((void *)rgb2yuv, yuv2rgb);
 bits = 1 << (in_desc->comp[0].depth - 1);
 for (n = 0; n < 3; n++) {
 for (in_rng = s->in_y_rng, m = 0; m < 3; m++, in_rng = 
s->in_uv_rng) {
@@ -781,7 +781,7 @@ static int create_filtergraph(AVFilterContext *ctx,
 double yuv2yuv[3][3];
 int in_rng, out_rng;
 
-ff_matrix_mul_3x3(yuv2yuv, yuv2rgb, rgb2yuv);
+ff_matrix_mul_3x3(yuv2yuv, (void *)yuv2rgb, (void *)rgb2yuv);
 for (out_rng = s->out_y_rng, m = 0; m < 3; m++, out_rng = 
s->out_uv_rng) {
 for (in_rng = s->in_y_rng, n = 0; n < 3; n++, in_rng = 
s->in_uv_rng) {
 s->yuv2yuv_coeffs[m][n][0] =
diff --git a/libavfilter/vf_tonemap_opencl.c b/libavfilter/vf_tonemap_opencl.c
index 241f95e6c3..0cb2da0da2 100644
--- a/libavfilter/vf_tonemap_opencl.c
+++ b/libavfilter/vf_tonemap_opencl.c
@@ -125,9 +125,9 @@ static void get_rgb2rgb_matrix(enum AVColorPrimaries in, 
enum AVColorPrimaries o
 double rgb2xyz[3][3], xyz2rgb[3][3];
 
 ff_fill_rgb2xyz_table(&primaries_table[out], &whitepoint_table[out], 
rgb2xyz);
-ff_matrix_invert_3x3(rgb2xyz, xyz2rgb);
+ff_matrix_invert_3x3((void *)rgb2xyz, xyz2rgb);
 ff_fill_rgb2xyz_table(&primaries_table[in], &whitepoint_table[in], 
rgb2xyz);
-ff_matrix_mul_3x3(rgb2rgb, rgb2xyz, xyz2rgb);
+ff_matrix_mul_3x3(rgb2rgb, (void *)rgb2xyz, (void *)xyz2rgb);
 }
 
 #define OPENCL_SOURCE_NB 3
-- 
2.17.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH v2 2/2] lavfi/vf_avgblur_opencl: remove useless clFinish().

2018-07-03 Thread Ruiling Song

The very last clFinish() should be ok.

Signed-off-by: Ruiling Song 
---
 libavfilter/vf_avgblur_opencl.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_opencl.c
index bc6bcab..99ed1ca 100644
--- a/libavfilter/vf_avgblur_opencl.c
+++ b/libavfilter/vf_avgblur_opencl.c
@@ -228,7 +228,6 @@ static int avgblur_opencl_filter_frame(AVFilterLink 
*inlink, AVFrame *input)
  0, NULL, NULL);
 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue horizontal "
  "kernel: %d.\n", cle);
-cle = clFinish(ctx->command_queue);
 
 err = ff_opencl_filter_work_size_from_image(avctx, global_work,
 i == 0 ? output : 
intermediate, p, 0);
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH v2 1/2] lavfi/opencl: add macro for opencl error handling.

2018-07-03 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 libavfilter/opencl.h| 11 +
 libavfilter/vf_avgblur_opencl.c | 45 +--
 libavfilter/vf_overlay_opencl.c | 29 +--
 libavfilter/vf_program_opencl.c | 14 ++-
 libavfilter/vf_tonemap_opencl.c | 33 +-
 libavfilter/vf_unsharp_opencl.c | 52 +
 6 files changed, 47 insertions(+), 137 deletions(-)

diff --git a/libavfilter/opencl.h b/libavfilter/opencl.h
index 7441b11..0ed360b 100644
--- a/libavfilter/opencl.h
+++ b/libavfilter/opencl.h
@@ -112,5 +112,16 @@ int ff_opencl_filter_work_size_from_image(AVFilterContext 
*avctx,
   size_t *work_size,
   AVFrame *frame, int plane,
   int block_alignment);
+/**
+ * A helper macro to handle OpenCL error. It will assign errcode to
+ * variable err, log error msg, and jump to fail label on error.
+ */
+#define CL_FAIL_ON_ERROR(errcode, ...) do {\
+if (cle != CL_SUCCESS) {\
+av_log(avctx, AV_LOG_ERROR, __VA_ARGS__);\
+err = errcode;\
+goto fail;\
+}\
+} while(0)
 
 #endif /* AVFILTER_OPENCL_H */
diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_opencl.c
index d1d3eb1..bc6bcab 100644
--- a/libavfilter/vf_avgblur_opencl.c
+++ b/libavfilter/vf_avgblur_opencl.c
@@ -64,26 +64,16 @@ static int avgblur_opencl_init(AVFilterContext *avctx)
 ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
   ctx->ocf.hwctx->device_id,
   0, &cle);
-if (!ctx->command_queue) {
-av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL "
-   "command queue: %d.\n", cle);
-err = AVERROR(EIO);
-goto fail;
-}
+CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
+ "command queue %d.\n", cle);
 
 ctx->kernel_horiz = clCreateKernel(ctx->ocf.program,"avgblur_horiz", &cle);
-if (!ctx->kernel_horiz) {
-av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
-err = AVERROR(EIO);
-goto fail;
-}
+CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create horizontal "
+ "kernel %d.\n", cle);
 
 ctx->kernel_vert = clCreateKernel(ctx->ocf.program,"avgblur_vert", &cle);
-if (!ctx->kernel_vert) {
-av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
-err = AVERROR(EIO);
-goto fail;
-}
+CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create vertical "
+ "kernel %d.\n", cle);
 
 ctx->initialised = 1;
 return 0;
@@ -236,12 +226,8 @@ static int avgblur_opencl_filter_frame(AVFilterLink 
*inlink, AVFrame *input)
 cle = clEnqueueNDRangeKernel(ctx->command_queue, 
ctx->kernel_horiz, 2, NULL,
  global_work, NULL,
  0, NULL, NULL);
-if (cle != CL_SUCCESS) {
-av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
-   cle);
-err = AVERROR(EIO);
-goto fail;
-}
+CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue horizontal "
+ "kernel: %d.\n", cle);
 cle = clFinish(ctx->command_queue);
 
 err = ff_opencl_filter_work_size_from_image(avctx, global_work,
@@ -259,22 +245,13 @@ static int avgblur_opencl_filter_frame(AVFilterLink 
*inlink, AVFrame *input)
 cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_vert, 
2, NULL,
  global_work, NULL,
  0, NULL, NULL);
-if (cle != CL_SUCCESS) {
-av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
-   cle);
-err = AVERROR(EIO);
-goto fail;
-}
+CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue vertical "
+ "kernel: %d.\n", cle);
 }
 }
 
 cle = clFinish(ctx->command_queue);
-if (cle != CL_SUCCESS) {
-av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue: %d.\n",
-   cle);
-err = AVERROR(EIO);
-goto fail;
-}
+CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", 
cle);
 
 err = av_frame_copy_props(output, input);
 if (err < 0)
diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/v

[FFmpeg-devel] [PATCH 2/2] lavfi/vf_avgblur_opencl: remove useless clFinish().

2018-07-02 Thread Ruiling Song

The very last clFinish() should be ok.

Signed-off-by: Ruiling Song 
---
 libavfilter/vf_avgblur_opencl.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_opencl.c
index bc6bcab..99ed1ca 100644
--- a/libavfilter/vf_avgblur_opencl.c
+++ b/libavfilter/vf_avgblur_opencl.c
@@ -228,7 +228,6 @@ static int avgblur_opencl_filter_frame(AVFilterLink 
*inlink, AVFrame *input)
  0, NULL, NULL);
 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue horizontal "
  "kernel: %d.\n", cle);
-cle = clFinish(ctx->command_queue);
 
 err = ff_opencl_filter_work_size_from_image(avctx, global_work,
 i == 0 ? output : 
intermediate, p, 0);
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH 1/2] lavfi/opencl: add macro for opencl error handling.

2018-07-02 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 libavfilter/opencl.h|  4 ++--
 libavfilter/vf_avgblur_opencl.c | 45 +--
 libavfilter/vf_overlay_opencl.c | 29 +--
 libavfilter/vf_program_opencl.c | 14 ++-
 libavfilter/vf_tonemap_opencl.c | 33 +-
 libavfilter/vf_unsharp_opencl.c | 52 +
 6 files changed, 38 insertions(+), 139 deletions(-)

diff --git a/libavfilter/opencl.h b/libavfilter/opencl.h
index fd76f72..0ed360b 100644
--- a/libavfilter/opencl.h
+++ b/libavfilter/opencl.h
@@ -116,9 +116,9 @@ int ff_opencl_filter_work_size_from_image(AVFilterContext 
*avctx,
  * A helper macro to handle OpenCL error. It will assign errcode to
  * variable err, log error msg, and jump to fail label on error.
  */
-#define OCL_FAIL_ON_ERR(logctx, cle, errcode, ...) do {\
+#define CL_FAIL_ON_ERROR(errcode, ...) do {\
 if (cle != CL_SUCCESS) {\
-av_log(logctx, AV_LOG_ERROR, __VA_ARGS__);\
+av_log(avctx, AV_LOG_ERROR, __VA_ARGS__);\
 err = errcode;\
 goto fail;\
 }\
diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_opencl.c
index d1d3eb1..bc6bcab 100644
--- a/libavfilter/vf_avgblur_opencl.c
+++ b/libavfilter/vf_avgblur_opencl.c
@@ -64,26 +64,16 @@ static int avgblur_opencl_init(AVFilterContext *avctx)
 ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
   ctx->ocf.hwctx->device_id,
   0, &cle);
-if (!ctx->command_queue) {
-av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL "
-   "command queue: %d.\n", cle);
-err = AVERROR(EIO);
-goto fail;
-}
+CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
+ "command queue %d.\n", cle);
 
 ctx->kernel_horiz = clCreateKernel(ctx->ocf.program,"avgblur_horiz", &cle);
-if (!ctx->kernel_horiz) {
-av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
-err = AVERROR(EIO);
-goto fail;
-}
+CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create horizontal "
+ "kernel %d.\n", cle);
 
 ctx->kernel_vert = clCreateKernel(ctx->ocf.program,"avgblur_vert", &cle);
-if (!ctx->kernel_vert) {
-av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
-err = AVERROR(EIO);
-goto fail;
-}
+CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create vertical "
+ "kernel %d.\n", cle);
 
 ctx->initialised = 1;
 return 0;
@@ -236,12 +226,8 @@ static int avgblur_opencl_filter_frame(AVFilterLink 
*inlink, AVFrame *input)
 cle = clEnqueueNDRangeKernel(ctx->command_queue, 
ctx->kernel_horiz, 2, NULL,
  global_work, NULL,
  0, NULL, NULL);
-if (cle != CL_SUCCESS) {
-av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
-   cle);
-err = AVERROR(EIO);
-goto fail;
-}
+CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue horizontal "
+ "kernel: %d.\n", cle);
 cle = clFinish(ctx->command_queue);
 
 err = ff_opencl_filter_work_size_from_image(avctx, global_work,
@@ -259,22 +245,13 @@ static int avgblur_opencl_filter_frame(AVFilterLink 
*inlink, AVFrame *input)
 cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_vert, 
2, NULL,
  global_work, NULL,
  0, NULL, NULL);
-if (cle != CL_SUCCESS) {
-av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
-   cle);
-err = AVERROR(EIO);
-goto fail;
-}
+CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue vertical "
+ "kernel: %d.\n", cle);
 }
 }
 
 cle = clFinish(ctx->command_queue);
-if (cle != CL_SUCCESS) {
-av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue: %d.\n",
-   cle);
-err = AVERROR(EIO);
-goto fail;
-}
+CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", 
cle);
 
 err = av_frame_copy_props(output, input);
 if (err < 0)
diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/vf_overlay_opencl.c
index 556ce35..e9c8532 100644
--- a/libavfilter/vf_overlay_opencl.c
+++ b/libavfilter/vf_overlay_opencl.c
@@ -100,19 +100,11 @@

[FFmpeg-devel] [PATCH v4 1/2] lavfi: add opencl tonemap filter.

2018-06-18 Thread Ruiling Song

This filter does HDR(HDR10/HLG) to SDR conversion with tone-mapping.

An example command to use this filter with vaapi codecs:
FFMPEG -init_hw_device vaapi=va:/dev/dri/renderD128 -init_hw_device \
opencl=ocl@va -hwaccel vaapi -hwaccel_device va -hwaccel_output_format \
vaapi -i INPUT -filter_hw_device ocl -filter_complex \
'[0:v]hwmap,tonemap_opencl=t=bt2020:tonemap=linear:format=p010[x1]; \
[x1]hwmap=derive_device=vaapi:reverse=1' -c:v hevc_vaapi -profile 2 OUTPUT

Signed-off-by: Ruiling Song 
---
As I didn't receive any other comment on v3, this version only fix the comment 
from Michael.
And also include some little change to leverage CL_SET_KERNEL_ARG() macro.

Thanks!
Ruiling

 configure   |   1 +
 libavfilter/Makefile|   2 +
 libavfilter/allfilters.c|   1 +
 libavfilter/colorspace.c|  90 +
 libavfilter/colorspace.h|  41 +++
 libavfilter/opencl/colorspace_common.cl | 220 +++
 libavfilter/opencl/tonemap.cl   | 272 ++
 libavfilter/opencl_source.h |   2 +
 libavfilter/vf_tonemap_opencl.c | 624 
 9 files changed, 1253 insertions(+)
 create mode 100644 libavfilter/colorspace.c
 create mode 100644 libavfilter/colorspace.h
 create mode 100644 libavfilter/opencl/colorspace_common.cl
 create mode 100644 libavfilter/opencl/tonemap.cl
 create mode 100644 libavfilter/vf_tonemap_opencl.c

diff --git a/configure b/configure
index 333e326..d9c5d63 100755
--- a/configure
+++ b/configure
@@ -3411,6 +3411,7 @@ tinterlace_filter_deps="gpl"
 tinterlace_merge_test_deps="tinterlace_filter"
 tinterlace_pad_test_deps="tinterlace_filter"
 tonemap_filter_deps="const_nan"
+tonemap_opencl_filter_deps="opencl const_nan"
 unsharp_opencl_filter_deps="opencl"
 uspp_filter_deps="gpl avcodec"
 vaguedenoiser_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 5b4be49..d2c85cf 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -356,6 +356,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += 
vf_tinterlace.o
 OBJS-$(CONFIG_TLUT2_FILTER)  += vf_lut2.o framesync.o
 OBJS-$(CONFIG_TMIX_FILTER)   += vf_mix.o framesync.o
 OBJS-$(CONFIG_TONEMAP_FILTER)+= vf_tonemap.o
+OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o 
colorspace.o opencl.o \
+opencl/tonemap.o 
opencl/colorspace_common.o
 OBJS-$(CONFIG_TRANSPOSE_FILTER)  += vf_transpose.o
 OBJS-$(CONFIG_TRIM_FILTER)   += trim.o
 OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)  += vf_premultiply.o framesync.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index f2d27d2..fa85c29 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -345,6 +345,7 @@ extern AVFilter ff_vf_tinterlace;
 extern AVFilter ff_vf_tlut2;
 extern AVFilter ff_vf_tmix;
 extern AVFilter ff_vf_tonemap;
+extern AVFilter ff_vf_tonemap_opencl;
 extern AVFilter ff_vf_transpose;
 extern AVFilter ff_vf_trim;
 extern AVFilter ff_vf_unpremultiply;
diff --git a/libavfilter/colorspace.c b/libavfilter/colorspace.c
new file mode 100644
index 000..7fd7bdf
--- /dev/null
+++ b/libavfilter/colorspace.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2016 Ronald S. Bultje 
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "colorspace.h"
+
+
+void invert_matrix3x3(const double in[3][3], double out[3][3])
+{
+double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2],
+   m10 = in[1][0], m11 = in[1][1], m12 = in[1][2],
+   m20 = in[2][0], m21 = in[2][1], m22 = in[2][2];
+int i, j;
+double det;
+
+out[0][0] =  (m11 * m22 - m21 * m12);
+out[0][1] = -(m01 * m22 - m21 * m02);
+out[0][2] =  (m01 * m12 - m11 * m02);
+out[1][0] = -(m10 * m22 - m20 * m12);
+out[1][1] =  (m00 * m22 - m20 * m02);
+out[1][2] = -(m00 * m12 - m10 * m02);
+out[2][0] =  (m10 * m21 - m20 * m11);
+out[2][1] = -(m00 * m21 - m20 * m01);
+out[2][2] =  (m00 * m11 - m10 * m01);
+
+det = m00 * out[0][0] + m10 * o

[FFmpeg-devel] [PATCH v4 2/2] lavfi: make vf_colorspace use functions from colorspace.c

2018-06-18 Thread Ruiling Song

These functions are shared among colorspace related filters.

Signed-off-by: Ruiling Song 
---
 libavfilter/Makefile|   2 +-
 libavfilter/vf_colorspace.c | 118 +---
 2 files changed, 23 insertions(+), 97 deletions(-)

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index d2c85cf..c20c270 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -166,7 +166,7 @@ OBJS-$(CONFIG_COLORCHANNELMIXER_FILTER)  += 
vf_colorchannelmixer.o
 OBJS-$(CONFIG_COLORKEY_FILTER)   += vf_colorkey.o
 OBJS-$(CONFIG_COLORLEVELS_FILTER)+= vf_colorlevels.o
 OBJS-$(CONFIG_COLORMATRIX_FILTER)+= vf_colormatrix.o
-OBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o colorspacedsp.o
+OBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o colorspace.o 
colorspacedsp.o
 OBJS-$(CONFIG_CONVOLUTION_FILTER)+= vf_convolution.o
 OBJS-$(CONFIG_CONVOLUTION_OPENCL_FILTER) += vf_convolution_opencl.o 
opencl.o \
opencl/convolution.o
diff --git a/libavfilter/vf_colorspace.c b/libavfilter/vf_colorspace.c
index 71ea08a..b593215 100644
--- a/libavfilter/vf_colorspace.c
+++ b/libavfilter/vf_colorspace.c
@@ -33,6 +33,7 @@
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
+#include "colorspace.h"
 
 enum DitherMode {
 DITHER_NONE,
@@ -110,21 +111,13 @@ static const enum AVColorSpace default_csp[CS_NB + 1] = {
 
 struct ColorPrimaries {
 enum Whitepoint wp;
-double xr, yr, xg, yg, xb, yb;
+struct PrimaryCoefficients coeff;
 };
 
 struct TransferCharacteristics {
 double alpha, beta, gamma, delta;
 };
 
-struct LumaCoefficients {
-double cr, cg, cb;
-};
-
-struct WhitepointCoefficients {
-double xw, yw;
-};
-
 typedef struct ColorSpaceContext {
 const AVClass *class;
 
@@ -286,57 +279,30 @@ static const struct WhitepointCoefficients 
whitepoint_coefficients[WP_NB] = {
 };
 
 static const struct ColorPrimaries color_primaries[AVCOL_PRI_NB] = {
-[AVCOL_PRI_BT709] = { WP_D65, 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 
},
-[AVCOL_PRI_BT470M]= { WP_C,   0.670, 0.330, 0.210, 0.710, 0.140, 0.080 
},
-[AVCOL_PRI_BT470BG]   = { WP_D65, 0.640, 0.330, 0.290, 0.600, 0.150, 
0.060,},
-[AVCOL_PRI_SMPTE170M] = { WP_D65, 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 
},
-[AVCOL_PRI_SMPTE240M] = { WP_D65, 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 
},
-[AVCOL_PRI_SMPTE428]  = { WP_E,   0.735, 0.265, 0.274, 0.718, 0.167, 0.009 
},
-[AVCOL_PRI_SMPTE431]  = { WP_DCI, 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 
},
-[AVCOL_PRI_SMPTE432]  = { WP_D65, 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 
},
-[AVCOL_PRI_FILM]  = { WP_C,   0.681, 0.319, 0.243, 0.692, 0.145, 0.049 
},
-[AVCOL_PRI_BT2020]= { WP_D65, 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 
},
-[AVCOL_PRI_JEDEC_P22] = { WP_D65, 0.630, 0.340, 0.295, 0.605, 0.155, 0.077 
},
+[AVCOL_PRI_BT709] = { WP_D65, { 0.640, 0.330, 0.300, 0.600, 0.150, 
0.060 } },
+[AVCOL_PRI_BT470M]= { WP_C,   { 0.670, 0.330, 0.210, 0.710, 0.140, 
0.080 } },
+[AVCOL_PRI_BT470BG]   = { WP_D65, { 0.640, 0.330, 0.290, 0.600, 0.150, 
0.060 } },
+[AVCOL_PRI_SMPTE170M] = { WP_D65, { 0.630, 0.340, 0.310, 0.595, 0.155, 
0.070 } },
+[AVCOL_PRI_SMPTE240M] = { WP_D65, { 0.630, 0.340, 0.310, 0.595, 0.155, 
0.070 } },
+[AVCOL_PRI_SMPTE428]  = { WP_E,   { 0.735, 0.265, 0.274, 0.718, 0.167, 
0.009 } },
+[AVCOL_PRI_SMPTE431]  = { WP_DCI, { 0.680, 0.320, 0.265, 0.690, 0.150, 
0.060 } },
+[AVCOL_PRI_SMPTE432]  = { WP_D65, { 0.680, 0.320, 0.265, 0.690, 0.150, 
0.060 } },
+[AVCOL_PRI_FILM]  = { WP_C,   { 0.681, 0.319, 0.243, 0.692, 0.145, 
0.049 } },
+[AVCOL_PRI_BT2020]= { WP_D65, { 0.708, 0.292, 0.170, 0.797, 0.131, 
0.046 } },
+[AVCOL_PRI_JEDEC_P22] = { WP_D65, { 0.630, 0.340, 0.295, 0.605, 0.155, 
0.077 } },
 };
 
 static const struct ColorPrimaries *get_color_primaries(enum AVColorPrimaries 
prm)
 {
-const struct ColorPrimaries *coeffs;
+const struct ColorPrimaries *p;
 
 if (prm >= AVCOL_PRI_NB)
 return NULL;
-coeffs = &color_primaries[prm];
-if (!coeffs->xr)
+p = &color_primaries[prm];
+if (!p->coeff.xr)
 return NULL;
 
-return coeffs;
-}
-
-static void invert_matrix3x3(const double in[3][3], double out[3][3])
-{
-double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2],
-   m10 = in[1][0], m11 = in[1][1], m12 = in[1][2],
-   m20 = in[2][0], m21 = in[2][1], m22 = in[2][2];
-int i, j;
-double det;
-
-out[0][0] =  (m11 * m22 - m21 * m12);
-out[0][1] = -(m01 * m22 - m21 * m02);
-out[0][2] =  (m01 * m12 - m11 * m02);
-out[1][0] = -(m10 * m22 - m20 * m12);
-out[1][1] =  (m00 * m22 - m20 * m02);
-out[1][2] = -(m00 * m12 - m10 * m02);
-out[2][0] =  (m10 * m21 - m20 * m11);
-out[2][1]

[FFmpeg-devel] [PATCH] lavfi: add helper macro for OpenCL error handling.

2018-06-12 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
I am not sure whether do you think this would be useful?
the main purpose is to make OpenCL error check code simpler.
If we think this is good, I can go to replace current
OpenCL filters to use this macro.

for example:
if (cle != CL_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
   cle);
err = AVERROR(EIO);
goto fail;
}
can be replaced with:
OCL_FAIL_ON_ERR(avctx, cle, AVERROR(EIO), "Failed to enqueue kernel: %d.\n", 
cle);

Thanks!
Ruiling
 libavfilter/opencl.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/libavfilter/opencl.h b/libavfilter/opencl.h
index c0a4519..c33df1c 100644
--- a/libavfilter/opencl.h
+++ b/libavfilter/opencl.h
@@ -97,5 +97,16 @@ int ff_opencl_filter_work_size_from_image(AVFilterContext 
*avctx,
   size_t *work_size,
   AVFrame *frame, int plane,
   int block_alignment);
+/**
+ * A helper macro to handle OpenCL error. It will assign errcode to
+ * variable err, log error msg, and jump to fail label on error.
+ */
+#define OCL_FAIL_ON_ERR(logctx, cle, errcode, ...) do {\
+if (cle != CL_SUCCESS) {\
+av_log(logctx, AV_LOG_ERROR, __VA_ARGS__);\
+err = errcode;\
+goto fail;\
+}\
+} while(0)
 
 #endif /* AVFILTER_OPENCL_H */
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH] lavu: add calling convention for OpenCL callback.

2018-06-06 Thread Ruiling Song

This fix a build error on Windows:
C2440: connot convert from 'void (__cdecl *) (...)' to 'void (__stdcall 
*)(...)'.

Signed-off-by: Ruiling Song 
---
 libavutil/hwcontext_opencl.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libavutil/hwcontext_opencl.c b/libavutil/hwcontext_opencl.c
index 43b5c5a..e08d7bc 100644
--- a/libavutil/hwcontext_opencl.c
+++ b/libavutil/hwcontext_opencl.c
@@ -141,9 +141,10 @@ typedef struct OpenCLFramesContext {
 } OpenCLFramesContext;
 
 
-static void opencl_error_callback(const char *errinfo,
-  const void *private_info, size_t cb,
-  void *user_data)
+static void CL_CALLBACK opencl_error_callback(const char *errinfo,
+  const void *private_info,
+  size_t cb,
+  void *user_data)
 {
 AVHWDeviceContext *ctx = user_data;
 av_log(ctx, AV_LOG_ERROR, "OpenCL error: %s\n", errinfo);
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH v3 2/2] lavfi: make vf_colorspace use functions from colorspace.c

2018-06-06 Thread Ruiling Song

These functions are shared among colorspace related filters.

Signed-off-by: Ruiling Song 
---
 libavfilter/Makefile|   2 +-
 libavfilter/vf_colorspace.c | 118 +---
 2 files changed, 23 insertions(+), 97 deletions(-)

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index d2c85cf..c20c270 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -166,7 +166,7 @@ OBJS-$(CONFIG_COLORCHANNELMIXER_FILTER)  += 
vf_colorchannelmixer.o
 OBJS-$(CONFIG_COLORKEY_FILTER)   += vf_colorkey.o
 OBJS-$(CONFIG_COLORLEVELS_FILTER)+= vf_colorlevels.o
 OBJS-$(CONFIG_COLORMATRIX_FILTER)+= vf_colormatrix.o
-OBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o colorspacedsp.o
+OBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o colorspace.o 
colorspacedsp.o
 OBJS-$(CONFIG_CONVOLUTION_FILTER)+= vf_convolution.o
 OBJS-$(CONFIG_CONVOLUTION_OPENCL_FILTER) += vf_convolution_opencl.o 
opencl.o \
opencl/convolution.o
diff --git a/libavfilter/vf_colorspace.c b/libavfilter/vf_colorspace.c
index 71ea08a..b593215 100644
--- a/libavfilter/vf_colorspace.c
+++ b/libavfilter/vf_colorspace.c
@@ -33,6 +33,7 @@
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
+#include "colorspace.h"
 
 enum DitherMode {
 DITHER_NONE,
@@ -110,21 +111,13 @@ static const enum AVColorSpace default_csp[CS_NB + 1] = {
 
 struct ColorPrimaries {
 enum Whitepoint wp;
-double xr, yr, xg, yg, xb, yb;
+struct PrimaryCoefficients coeff;
 };
 
 struct TransferCharacteristics {
 double alpha, beta, gamma, delta;
 };
 
-struct LumaCoefficients {
-double cr, cg, cb;
-};
-
-struct WhitepointCoefficients {
-double xw, yw;
-};
-
 typedef struct ColorSpaceContext {
 const AVClass *class;
 
@@ -286,57 +279,30 @@ static const struct WhitepointCoefficients 
whitepoint_coefficients[WP_NB] = {
 };
 
 static const struct ColorPrimaries color_primaries[AVCOL_PRI_NB] = {
-[AVCOL_PRI_BT709] = { WP_D65, 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 
},
-[AVCOL_PRI_BT470M]= { WP_C,   0.670, 0.330, 0.210, 0.710, 0.140, 0.080 
},
-[AVCOL_PRI_BT470BG]   = { WP_D65, 0.640, 0.330, 0.290, 0.600, 0.150, 
0.060,},
-[AVCOL_PRI_SMPTE170M] = { WP_D65, 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 
},
-[AVCOL_PRI_SMPTE240M] = { WP_D65, 0.630, 0.340, 0.310, 0.595, 0.155, 0.070 
},
-[AVCOL_PRI_SMPTE428]  = { WP_E,   0.735, 0.265, 0.274, 0.718, 0.167, 0.009 
},
-[AVCOL_PRI_SMPTE431]  = { WP_DCI, 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 
},
-[AVCOL_PRI_SMPTE432]  = { WP_D65, 0.680, 0.320, 0.265, 0.690, 0.150, 0.060 
},
-[AVCOL_PRI_FILM]  = { WP_C,   0.681, 0.319, 0.243, 0.692, 0.145, 0.049 
},
-[AVCOL_PRI_BT2020]= { WP_D65, 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 
},
-[AVCOL_PRI_JEDEC_P22] = { WP_D65, 0.630, 0.340, 0.295, 0.605, 0.155, 0.077 
},
+[AVCOL_PRI_BT709] = { WP_D65, { 0.640, 0.330, 0.300, 0.600, 0.150, 
0.060 } },
+[AVCOL_PRI_BT470M]= { WP_C,   { 0.670, 0.330, 0.210, 0.710, 0.140, 
0.080 } },
+[AVCOL_PRI_BT470BG]   = { WP_D65, { 0.640, 0.330, 0.290, 0.600, 0.150, 
0.060 } },
+[AVCOL_PRI_SMPTE170M] = { WP_D65, { 0.630, 0.340, 0.310, 0.595, 0.155, 
0.070 } },
+[AVCOL_PRI_SMPTE240M] = { WP_D65, { 0.630, 0.340, 0.310, 0.595, 0.155, 
0.070 } },
+[AVCOL_PRI_SMPTE428]  = { WP_E,   { 0.735, 0.265, 0.274, 0.718, 0.167, 
0.009 } },
+[AVCOL_PRI_SMPTE431]  = { WP_DCI, { 0.680, 0.320, 0.265, 0.690, 0.150, 
0.060 } },
+[AVCOL_PRI_SMPTE432]  = { WP_D65, { 0.680, 0.320, 0.265, 0.690, 0.150, 
0.060 } },
+[AVCOL_PRI_FILM]  = { WP_C,   { 0.681, 0.319, 0.243, 0.692, 0.145, 
0.049 } },
+[AVCOL_PRI_BT2020]= { WP_D65, { 0.708, 0.292, 0.170, 0.797, 0.131, 
0.046 } },
+[AVCOL_PRI_JEDEC_P22] = { WP_D65, { 0.630, 0.340, 0.295, 0.605, 0.155, 
0.077 } },
 };
 
 static const struct ColorPrimaries *get_color_primaries(enum AVColorPrimaries 
prm)
 {
-const struct ColorPrimaries *coeffs;
+const struct ColorPrimaries *p;
 
 if (prm >= AVCOL_PRI_NB)
 return NULL;
-coeffs = &color_primaries[prm];
-if (!coeffs->xr)
+p = &color_primaries[prm];
+if (!p->coeff.xr)
 return NULL;
 
-return coeffs;
-}
-
-static void invert_matrix3x3(const double in[3][3], double out[3][3])
-{
-double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2],
-   m10 = in[1][0], m11 = in[1][1], m12 = in[1][2],
-   m20 = in[2][0], m21 = in[2][1], m22 = in[2][2];
-int i, j;
-double det;
-
-out[0][0] =  (m11 * m22 - m21 * m12);
-out[0][1] = -(m01 * m22 - m21 * m02);
-out[0][2] =  (m01 * m12 - m11 * m02);
-out[1][0] = -(m10 * m22 - m20 * m12);
-out[1][1] =  (m00 * m22 - m20 * m02);
-out[1][2] = -(m00 * m12 - m10 * m02);
-out[2][0] =  (m10 * m21 - m20 * m11);
-out[2][1]

[FFmpeg-devel] [PATCH v3 1/2] lavfi: add opencl tonemap filter.

2018-06-06 Thread Ruiling Song

This filter does HDR(HDR10/HLG) to SDR conversion with tone-mapping.

An example command to use this filter with vaapi codecs:
FFMPEG -init_hw_device vaapi=va:/dev/dri/renderD128 -init_hw_device \
opencl=ocl@va -hwaccel vaapi -hwaccel_device va -hwaccel_output_format \
vaapi -i INPUT -filter_hw_device ocl -filter_complex \
'[0:v]hwmap,tonemap_opencl=t=bt2020:tonemap=linear:format=p010[x1]; \
[x1]hwmap=derive_device=vaapi:reverse=1' -c:v hevc_vaapi -profile 2 OUTPUT

Signed-off-by: Ruiling Song 
---
this version mainly address Mark's comments on v2.

Thanks!
Ruiling

 configure   |   1 +
 libavfilter/Makefile|   2 +
 libavfilter/allfilters.c|   1 +
 libavfilter/colorspace.c|  90 +
 libavfilter/colorspace.h|  41 ++
 libavfilter/opencl/colorspace_common.cl | 220 +++
 libavfilter/opencl/tonemap.cl   | 272 +
 libavfilter/opencl_source.h |   2 +
 libavfilter/vf_tonemap_opencl.c | 657 
 9 files changed, 1286 insertions(+)
 create mode 100644 libavfilter/colorspace.c
 create mode 100644 libavfilter/colorspace.h
 create mode 100644 libavfilter/opencl/colorspace_common.cl
 create mode 100644 libavfilter/opencl/tonemap.cl
 create mode 100644 libavfilter/vf_tonemap_opencl.c

diff --git a/configure b/configure
index 53224f0..4ff651f 100755
--- a/configure
+++ b/configure
@@ -3410,6 +3410,7 @@ tinterlace_filter_deps="gpl"
 tinterlace_merge_test_deps="tinterlace_filter"
 tinterlace_pad_test_deps="tinterlace_filter"
 tonemap_filter_deps="const_nan"
+tonemap_opencl_filter_deps="opencl const_nan"
 unsharp_opencl_filter_deps="opencl"
 uspp_filter_deps="gpl avcodec"
 vaguedenoiser_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 5b4be49..d2c85cf 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -356,6 +356,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += 
vf_tinterlace.o
 OBJS-$(CONFIG_TLUT2_FILTER)  += vf_lut2.o framesync.o
 OBJS-$(CONFIG_TMIX_FILTER)   += vf_mix.o framesync.o
 OBJS-$(CONFIG_TONEMAP_FILTER)+= vf_tonemap.o
+OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o 
colorspace.o opencl.o \
+opencl/tonemap.o 
opencl/colorspace_common.o
 OBJS-$(CONFIG_TRANSPOSE_FILTER)  += vf_transpose.o
 OBJS-$(CONFIG_TRIM_FILTER)   += trim.o
 OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)  += vf_premultiply.o framesync.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index f2d27d2..fa85c29 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -345,6 +345,7 @@ extern AVFilter ff_vf_tinterlace;
 extern AVFilter ff_vf_tlut2;
 extern AVFilter ff_vf_tmix;
 extern AVFilter ff_vf_tonemap;
+extern AVFilter ff_vf_tonemap_opencl;
 extern AVFilter ff_vf_transpose;
 extern AVFilter ff_vf_trim;
 extern AVFilter ff_vf_unpremultiply;
diff --git a/libavfilter/colorspace.c b/libavfilter/colorspace.c
new file mode 100644
index 000..7fd7bdf
--- /dev/null
+++ b/libavfilter/colorspace.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2016 Ronald S. Bultje 
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "colorspace.h"
+
+
+void invert_matrix3x3(const double in[3][3], double out[3][3])
+{
+double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2],
+   m10 = in[1][0], m11 = in[1][1], m12 = in[1][2],
+   m20 = in[2][0], m21 = in[2][1], m22 = in[2][2];
+int i, j;
+double det;
+
+out[0][0] =  (m11 * m22 - m21 * m12);
+out[0][1] = -(m01 * m22 - m21 * m02);
+out[0][2] =  (m01 * m12 - m11 * m02);
+out[1][0] = -(m10 * m22 - m20 * m12);
+out[1][1] =  (m00 * m22 - m20 * m02);
+out[1][2] = -(m00 * m12 - m10 * m02);
+out[2][0] =  (m10 * m21 - m20 * m11);
+out[2][1] = -(m00 * m21 - m20 * m01);
+out[2][2] =  (m00 * m11 - m10 * m01);
+
+det = m00 * out[0][0] + m10 * out[0][1] + m20 * out[0][2];
+det = 1.0 / det;
+
+for (i = 0; i < 3; i++) {
+for (j = 0; j < 3

[FFmpeg-devel] [PATCH] lavfi: add opencl tonemap filter.

2018-05-28 Thread Ruiling Song

This filter does HDR(HDR10/HLG) to SDR conversion with tone-mapping.

An example command to use this filter with vaapi codecs:
FFMPEG -init_hw_device vaapi=va:/dev/dri/renderD128 -init_hw_device \
opencl=ocl@va -hwaccel vaapi -hwaccel_device va -hwaccel_output_format \
vaapi -i INPUT -filter_hw_device ocl -filter_complex \
'[0:v]hwmap,tonemap_opencl=t=bt2020:tonemap=linear:format=p010[x1]; \
[x1]hwmap=derive_device=vaapi:reverse=1' -c:v hevc_vaapi -profile 2 OUTPUT

v2:
add peak detection.

Signed-off-by: Ruiling Song 
---
 configure  |   1 +
 libavfilter/Makefile   |   2 +
 libavfilter/allfilters.c   |   1 +
 libavfilter/colorspace_basic.c |  89 +
 libavfilter/colorspace_basic.h |  40 ++
 libavfilter/opencl/colorspace_basic.cl | 187 ++
 libavfilter/opencl/tonemap.cl  | 278 ++
 libavfilter/opencl_source.h|   2 +
 libavfilter/vf_tonemap_opencl.c| 655 +
 9 files changed, 1255 insertions(+)
 create mode 100644 libavfilter/colorspace_basic.c
 create mode 100644 libavfilter/colorspace_basic.h
 create mode 100644 libavfilter/opencl/colorspace_basic.cl
 create mode 100644 libavfilter/opencl/tonemap.cl
 create mode 100644 libavfilter/vf_tonemap_opencl.c

diff --git a/configure b/configure
index e52f8f8..ee3586b 100755
--- a/configure
+++ b/configure
@@ -3401,6 +3401,7 @@ tinterlace_filter_deps="gpl"
 tinterlace_merge_test_deps="tinterlace_filter"
 tinterlace_pad_test_deps="tinterlace_filter"
 tonemap_filter_deps="const_nan"
+tonemap_opencl_filter_deps="opencl"
 unsharp_opencl_filter_deps="opencl"
 uspp_filter_deps="gpl avcodec"
 vaguedenoiser_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index c68ef05..0915656 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -352,6 +352,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += 
vf_tinterlace.o
 OBJS-$(CONFIG_TLUT2_FILTER)  += vf_lut2.o framesync.o
 OBJS-$(CONFIG_TMIX_FILTER)   += vf_mix.o framesync.o
 OBJS-$(CONFIG_TONEMAP_FILTER)+= vf_tonemap.o
+OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o 
colorspace_basic.o opencl.o \
+opencl/tonemap.o 
opencl/colorspace_basic.o
 OBJS-$(CONFIG_TRANSPOSE_FILTER)  += vf_transpose.o
 OBJS-$(CONFIG_TRIM_FILTER)   += trim.o
 OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)  += vf_premultiply.o framesync.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index b44093d..6873bab 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -343,6 +343,7 @@ extern AVFilter ff_vf_tinterlace;
 extern AVFilter ff_vf_tlut2;
 extern AVFilter ff_vf_tmix;
 extern AVFilter ff_vf_tonemap;
+extern AVFilter ff_vf_tonemap_opencl;
 extern AVFilter ff_vf_transpose;
 extern AVFilter ff_vf_trim;
 extern AVFilter ff_vf_unpremultiply;
diff --git a/libavfilter/colorspace_basic.c b/libavfilter/colorspace_basic.c
new file mode 100644
index 000..93f9f08
--- /dev/null
+++ b/libavfilter/colorspace_basic.c
@@ -0,0 +1,89 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "colorspace_basic.h"
+
+
+void invert_matrix3x3(const double in[3][3], double out[3][3])
+{
+double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2],
+   m10 = in[1][0], m11 = in[1][1], m12 = in[1][2],
+   m20 = in[2][0], m21 = in[2][1], m22 = in[2][2];
+int i, j;
+double det;
+
+out[0][0] =  (m11 * m22 - m21 * m12);
+out[0][1] = -(m01 * m22 - m21 * m02);
+out[0][2] =  (m01 * m12 - m11 * m02);
+out[1][0] = -(m10 * m22 - m20 * m12);
+out[1][1] =  (m00 * m22 - m20 * m02);
+out[1][2] = -(m00 * m12 - m10 * m02);
+out[2][0] =  (m10 * m21 - m20 * m11);
+out[2][1] = -(m00 * m21 - m20 * m01);
+out[2][2] =  (m00 * m11 - m10 * m01);
+
+det = m00 * out[0][0] + m10 * out[0][1] + m20 * out[0][2];
+det = 1.0 / det;
+
+for (i = 0; i < 3; i++) {
+for (j = 0; j < 3; j++)
+out[i][j] *= det;
+}
+}
+
+void mul3x3(double

[FFmpeg-devel] [PATCH] lavfi: a minor fix to tonemap peak detection.

2018-05-20 Thread Ruiling Song

If the transfer was SMPTE2084, use the peak of 1 even if not tagged.
Otherwise, we would assume it is HLG with a peak of 1200.
Based on suggestion by Niklas Haas.

Signed-off-by: Ruiling Song 
---
 libavfilter/vf_tonemap.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/libavfilter/vf_tonemap.c b/libavfilter/vf_tonemap.c
index 10308bd..ab45f2e 100644
--- a/libavfilter/vf_tonemap.c
+++ b/libavfilter/vf_tonemap.c
@@ -131,10 +131,9 @@ static double determine_signal_peak(AVFrame *in)
 peak = av_q2d(metadata->max_luminance) / REFERENCE_WHITE;
 }
 
-/* smpte2084 needs the side data above to work correctly
- * if missing, assume that the original transfer was arib-std-b67 */
+/* if not SMPTE2084, we would assume HLG */
 if (!peak)
-peak = 12;
+peak = in->color_trc == AVCOL_TRC_SMPTE2084 ? 100 : 12;
 
 return peak;
 }
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH] lavfi: add opencl tonemap filter.

2018-05-20 Thread Ruiling Song

This filter does HDR(HDR10/HLG) to SDR conversion with tone-mapping.

An example command to use this filter with vaapi codecs:
FFMPEG -init_hw_device vaapi=va:/dev/dri/renderD128 -init_hw_device \
opencl=ocl@va -hwaccel vaapi -hwaccel_device va -hwaccel_output_format \
vaapi -i INPUT -filter_hw_device ocl -filter_complex \
'[0:v]hwmap,tonemap_opencl=t=bt2020:tonemap=linear:format=p010[x1]; \
[x1]hwmap=derive_device=vaapi:reverse=1' -c:v hevc_vaapi -profile 2 OUTPUT

Signed-off-by: Ruiling Song 
---
 configure  |   1 +
 libavfilter/Makefile   |   2 +
 libavfilter/allfilters.c   |   1 +
 libavfilter/colorspace_basic.c |  89 ++
 libavfilter/colorspace_basic.h |  40 +++
 libavfilter/opencl/colorspace_basic.cl | 179 +++
 libavfilter/opencl/tonemap.cl  | 258 +++
 libavfilter/opencl_source.h|   2 +
 libavfilter/vf_tonemap_opencl.c| 560 +
 9 files changed, 1132 insertions(+)
 create mode 100644 libavfilter/colorspace_basic.c
 create mode 100644 libavfilter/colorspace_basic.h
 create mode 100644 libavfilter/opencl/colorspace_basic.cl
 create mode 100644 libavfilter/opencl/tonemap.cl
 create mode 100644 libavfilter/vf_tonemap_opencl.c

diff --git a/configure b/configure
index e52f8f8..ee3586b 100755
--- a/configure
+++ b/configure
@@ -3401,6 +3401,7 @@ tinterlace_filter_deps="gpl"
 tinterlace_merge_test_deps="tinterlace_filter"
 tinterlace_pad_test_deps="tinterlace_filter"
 tonemap_filter_deps="const_nan"
+tonemap_opencl_filter_deps="opencl"
 unsharp_opencl_filter_deps="opencl"
 uspp_filter_deps="gpl avcodec"
 vaguedenoiser_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index c68ef05..0915656 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -352,6 +352,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += 
vf_tinterlace.o
 OBJS-$(CONFIG_TLUT2_FILTER)  += vf_lut2.o framesync.o
 OBJS-$(CONFIG_TMIX_FILTER)   += vf_mix.o framesync.o
 OBJS-$(CONFIG_TONEMAP_FILTER)+= vf_tonemap.o
+OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o 
colorspace_basic.o opencl.o \
+opencl/tonemap.o 
opencl/colorspace_basic.o
 OBJS-$(CONFIG_TRANSPOSE_FILTER)  += vf_transpose.o
 OBJS-$(CONFIG_TRIM_FILTER)   += trim.o
 OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)  += vf_premultiply.o framesync.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index b44093d..6873bab 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -343,6 +343,7 @@ extern AVFilter ff_vf_tinterlace;
 extern AVFilter ff_vf_tlut2;
 extern AVFilter ff_vf_tmix;
 extern AVFilter ff_vf_tonemap;
+extern AVFilter ff_vf_tonemap_opencl;
 extern AVFilter ff_vf_transpose;
 extern AVFilter ff_vf_trim;
 extern AVFilter ff_vf_unpremultiply;
diff --git a/libavfilter/colorspace_basic.c b/libavfilter/colorspace_basic.c
new file mode 100644
index 000..93f9f08
--- /dev/null
+++ b/libavfilter/colorspace_basic.c
@@ -0,0 +1,89 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "colorspace_basic.h"
+
+
+void invert_matrix3x3(const double in[3][3], double out[3][3])
+{
+double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2],
+   m10 = in[1][0], m11 = in[1][1], m12 = in[1][2],
+   m20 = in[2][0], m21 = in[2][1], m22 = in[2][2];
+int i, j;
+double det;
+
+out[0][0] =  (m11 * m22 - m21 * m12);
+out[0][1] = -(m01 * m22 - m21 * m02);
+out[0][2] =  (m01 * m12 - m11 * m02);
+out[1][0] = -(m10 * m22 - m20 * m12);
+out[1][1] =  (m00 * m22 - m20 * m02);
+out[1][2] = -(m00 * m12 - m10 * m02);
+out[2][0] =  (m10 * m21 - m20 * m11);
+out[2][1] = -(m00 * m21 - m20 * m01);
+out[2][2] =  (m00 * m11 - m10 * m01);
+
+det = m00 * out[0][0] + m10 * out[0][1] + m20 * out[0][2];
+det = 1.0 / det;
+
+for (i = 0; i < 3; i++) {
+for (j = 0; j < 3; j++)
+out[i][j] *= det;
+}
+}
+
+void mul3x3(double dst[3][3], const double

[FFmpeg-devel] [RFC] lavfi: add opencl tonemap filter.

2018-05-04 Thread Ruiling Song

It basically does hdr to sdr conversion with tonemapping.

Signed-off-by: Ruiling Song 
---
This patch tries to add a filter to do hdr to sdr conversion with tonemapping.
The filter does all the job of tonemapping in one pass, which is quite 
different from the vf_tonemap.c
I choose this way because I think this would introduce less memory access.

And I find that tonemaping shares lots of code with colorspace conversion.
So I move color space related code into seprated files (both OpenCL kernel and 
host code).

I am not sure whether the design seems OK?
Is there anybody would like to give some comments on the overall design or 
implementation details?


Thanks!
Ruiling

 configure  |   1 +
 libavfilter/Makefile   |   2 +
 libavfilter/allfilters.c   |   1 +
 libavfilter/colorspace_basic.c |  89 +++
 libavfilter/colorspace_basic.h |  40 +++
 libavfilter/opencl/colorspace_basic.cl | 137 ++
 libavfilter/opencl/tonemap.cl  | 136 ++
 libavfilter/opencl_source.h|   2 +
 libavfilter/vf_tonemap_opencl.c| 472 +
 9 files changed, 880 insertions(+)
 create mode 100644 libavfilter/colorspace_basic.c
 create mode 100644 libavfilter/colorspace_basic.h
 create mode 100644 libavfilter/opencl/colorspace_basic.cl
 create mode 100644 libavfilter/opencl/tonemap.cl
 create mode 100644 libavfilter/vf_tonemap_opencl.c

diff --git a/configure b/configure
index 7f199c6..b9e464d 100755
--- a/configure
+++ b/configure
@@ -3395,6 +3395,7 @@ tinterlace_filter_deps="gpl"
 tinterlace_merge_test_deps="tinterlace_filter"
 tinterlace_pad_test_deps="tinterlace_filter"
 tonemap_filter_deps="const_nan"
+tonemap_opencl_filter_deps="opencl"
 unsharp_opencl_filter_deps="opencl"
 uspp_filter_deps="gpl avcodec"
 vaguedenoiser_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 3454f25..7a1b0e8 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -348,6 +348,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += 
vf_tinterlace.o
 OBJS-$(CONFIG_TLUT2_FILTER)  += vf_lut2.o framesync.o
 OBJS-$(CONFIG_TMIX_FILTER)   += vf_mix.o framesync.o
 OBJS-$(CONFIG_TONEMAP_FILTER)+= vf_tonemap.o
+OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o 
colorspace_basic.o opencl.o \
+opencl/tonemap.o 
opencl/colorspace_basic.o
 OBJS-$(CONFIG_TRANSPOSE_FILTER)  += vf_transpose.o
 OBJS-$(CONFIG_TRIM_FILTER)   += trim.o
 OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)  += vf_premultiply.o framesync.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index d958f9b..759097a 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -339,6 +339,7 @@ extern AVFilter ff_vf_tinterlace;
 extern AVFilter ff_vf_tlut2;
 extern AVFilter ff_vf_tmix;
 extern AVFilter ff_vf_tonemap;
+extern AVFilter ff_vf_tonemap_opencl;
 extern AVFilter ff_vf_transpose;
 extern AVFilter ff_vf_trim;
 extern AVFilter ff_vf_unpremultiply;
diff --git a/libavfilter/colorspace_basic.c b/libavfilter/colorspace_basic.c
new file mode 100644
index 000..93f9f08
--- /dev/null
+++ b/libavfilter/colorspace_basic.c
@@ -0,0 +1,89 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "colorspace_basic.h"
+
+
+void invert_matrix3x3(const double in[3][3], double out[3][3])
+{
+double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2],
+   m10 = in[1][0], m11 = in[1][1], m12 = in[1][2],
+   m20 = in[2][0], m21 = in[2][1], m22 = in[2][2];
+int i, j;
+double det;
+
+out[0][0] =  (m11 * m22 - m21 * m12);
+out[0][1] = -(m01 * m22 - m21 * m02);
+out[0][2] =  (m01 * m12 - m11 * m02);
+out[1][0] = -(m10 * m22 - m20 * m12);
+out[1][1] =  (m00 * m22 - m20 * m02);
+out[1][2] = -(m00 * m12 - m10 * m02);
+out[2][0] =  (m10 * m21 - m20 * m11);
+out[2][1] = -(m00 * m21 - m20 * m01);
+out[2][2] =  (m00 * m11 - m10 * m01);
+
+det = m00 * out[0][0] + m10 * out[0][1] + m20 * out[0][2];
+det = 1.0 / de

[FFmpeg-devel] [PATCH v2 2/2] lavf/qsv: clone the frame which may be managed by framework

2018-04-02 Thread Ruiling Song

For filters based on framesync, the input frame was managed
by framesync, so we should not directly keep and destroy it,
instead we make a clone of it here, or else double-free will occur.
But for other filters not based on framesync, we still need to
free the input frame inside filter_frame. That's why I made
this v2 to fix the side-effect on normal filters.

v2:
and one av_frame_free() in vf_vpp_qsv.c

Signed-off-by: Ruiling Song 
---
 libavfilter/qsvvpp.c | 4 ++--
 libavfilter/vf_vpp_qsv.c | 5 -
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c
index f32b46d..86787c5 100644
--- a/libavfilter/qsvvpp.c
+++ b/libavfilter/qsvvpp.c
@@ -296,7 +296,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s, 
AVFilterLink *inlink, AVFrame *p
 av_log(ctx, AV_LOG_ERROR, "QSVVPP gets a wrong frame.\n");
 return NULL;
 }
-qsv_frame->frame   = picref;
+qsv_frame->frame   = av_frame_clone(picref);
 qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame->data[3];
 } else {
 /* make a copy if the input is not padded as libmfx requires */
@@ -318,7 +318,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s, 
AVFilterLink *inlink, AVFrame *p
 av_frame_copy_props(qsv_frame->frame, picref);
 av_frame_free(&picref);
 } else
-qsv_frame->frame = picref;
+qsv_frame->frame = av_frame_clone(picref);
 
 if (map_frame_to_surface(qsv_frame->frame,
 &qsv_frame->surface_internal) < 0) {
diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c
index bd5fc32..4ef5bfb 100644
--- a/libavfilter/vf_vpp_qsv.c
+++ b/libavfilter/vf_vpp_qsv.c
@@ -326,8 +326,11 @@ static int config_output(AVFilterLink *outlink)
 static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
 {
 VPPContext *vpp = inlink->dst->priv;
+int ret = 0;
 
-return ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref);
+ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref);
+av_frame_free(&picref);
+return ret;
 }
 
 static int query_formats(AVFilterContext *ctx)
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH v2 1/2] lavf: make overlay_qsv work based on framesync

2018-04-02 Thread Ruiling Song

The existing version which was cherry-picked from Libav does not work
with FFmpeg framework, because ff_request_frame() was totally
different between Libav (recursive) and FFmpeg (non-recursive).
The existing overlay_qsv implementation depends on the recursive version
of ff_request_frame to trigger immediate call to request_frame() on input pad.
But this has been removed in FFmpeg since "lavfi: make request_frame() 
non-recursive."
Now that we have handy framesync support in FFmpeg, so I make it work
based on framesync. Some other fixing which is also needed to make
overlay_qsv work are put in a separate patch.

v2:
add .preinit field to initilize framesync options.
export more options like vf_overlay.c

Signed-off-by: Ruiling Song 
---
 libavfilter/Makefile |   2 +-
 libavfilter/vf_overlay_qsv.c | 213 ---
 2 files changed, 78 insertions(+), 137 deletions(-)

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index a90ca30..7f2ad1f 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -267,7 +267,7 @@ OBJS-$(CONFIG_OSCILLOSCOPE_FILTER)   += 
vf_datascope.o
 OBJS-$(CONFIG_OVERLAY_FILTER)+= vf_overlay.o framesync.o
 OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \
 opencl/overlay.o framesync.o
-OBJS-$(CONFIG_OVERLAY_QSV_FILTER)+= vf_overlay_qsv.o
+OBJS-$(CONFIG_OVERLAY_QSV_FILTER)+= vf_overlay_qsv.o framesync.o
 OBJS-$(CONFIG_OWDENOISE_FILTER)  += vf_owdenoise.o
 OBJS-$(CONFIG_PAD_FILTER)+= vf_pad.o
 OBJS-$(CONFIG_PALETTEGEN_FILTER) += vf_palettegen.o
diff --git a/libavfilter/vf_overlay_qsv.c b/libavfilter/vf_overlay_qsv.c
index 6c3efdb..2087178 100644
--- a/libavfilter/vf_overlay_qsv.c
+++ b/libavfilter/vf_overlay_qsv.c
@@ -36,6 +36,7 @@
 #include "formats.h"
 #include "video.h"
 
+#include "framesync.h"
 #include "qsvvpp.h"
 
 #define MAIN0
@@ -56,14 +57,10 @@ enum var_name {
 VAR_VARS_NB
 };
 
-enum EOFAction {
-EOF_ACTION_REPEAT,
-EOF_ACTION_ENDALL
-};
-
 typedef struct QSVOverlayContext {
 const AVClass  *class;
 
+FFFrameSync fs;
 QSVVPPContext  *qsv;
 QSVVPPParamqsv_param;
 mfxExtVPPComposite comp_conf;
@@ -72,10 +69,6 @@ typedef struct QSVOverlayContext {
 char *overlay_ox, *overlay_oy, *overlay_ow, *overlay_oh;
 uint16_t  overlay_alpha, overlay_pixel_alpha;
 
-enum EOFAction eof_action;  /* action to take on EOF from source */
-
-AVFrame *main;
-AVFrame *over_prev, *over_next;
 } QSVOverlayContext;
 
 static const char *const var_names[] = {
@@ -90,20 +83,25 @@ static const char *const var_names[] = {
 NULL
 };
 
-static const AVOption options[] = {
+static const AVOption overlay_qsv_options[] = {
 { "x", "Overlay x position", OFFSET(overlay_ox), AV_OPT_TYPE_STRING, { 
.str="0"}, 0, 255, .flags = FLAGS},
 { "y", "Overlay y position", OFFSET(overlay_oy), AV_OPT_TYPE_STRING, { 
.str="0"}, 0, 255, .flags = FLAGS},
 { "w", "Overlay width",  OFFSET(overlay_ow), AV_OPT_TYPE_STRING, { 
.str="overlay_iw"}, 0, 255, .flags = FLAGS},
 { "h", "Overlay height", OFFSET(overlay_oh), AV_OPT_TYPE_STRING, { 
.str="overlay_ih*w/overlay_iw"}, 0, 255, .flags = FLAGS},
 { "alpha", "Overlay global alpha", OFFSET(overlay_alpha), AV_OPT_TYPE_INT, 
{ .i64 = 255}, 0, 255, .flags = FLAGS},
 { "eof_action", "Action to take when encountering EOF from secondary input 
",
-OFFSET(eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT },
-EOF_ACTION_REPEAT, EOF_ACTION_ENDALL, .flags = FLAGS, "eof_action" },
-{ "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 
= EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" },
-{ "endall", "End both streams.",  0, AV_OPT_TYPE_CONST, { .i64 
= EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" },
+OFFSET(fs.opt_eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT 
},
+EOF_ACTION_REPEAT, EOF_ACTION_PASS, .flags = FLAGS, "eof_action" },
+{ "repeat", "Repeat the previous frame.",   0, AV_OPT_TYPE_CONST, { 
.i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" },
+{ "endall", "End both streams.",0, AV_OPT_TYPE_CONST, { 
.i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" },
+{ "pass",   "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { 
.i64 = EOF_ACTION_PASS },   .flags = FLAGS, "eof_action" },
+{ "shortest", "force termination when the

[FFmpeg-devel] [PATCH 1/2] lavf: make overlay_qsv work based on framesync.

2018-03-21 Thread Ruiling Song

the previous version which was cherry-picked from Libav does not work
with FFmpeg framework, because ff_request_frame() was totally
different between Libav and FFmpeg. So, I make it work through
using framesync. Some other fixing that is needed to run overlay_qsv
was put in another separate patch.

Signed-off-by: Ruiling Song 
---
 libavfilter/Makefile |   2 +-
 libavfilter/vf_overlay_qsv.c | 212 +++
 2 files changed, 75 insertions(+), 139 deletions(-)

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index fc16512..e642b8d 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -263,7 +263,7 @@ OBJS-$(CONFIG_OSCILLOSCOPE_FILTER)   += 
vf_datascope.o
 OBJS-$(CONFIG_OVERLAY_FILTER)+= vf_overlay.o framesync.o
 OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \
 opencl/overlay.o framesync.o
-OBJS-$(CONFIG_OVERLAY_QSV_FILTER)+= vf_overlay_qsv.o
+OBJS-$(CONFIG_OVERLAY_QSV_FILTER)+= vf_overlay_qsv.o framesync.o
 OBJS-$(CONFIG_OWDENOISE_FILTER)  += vf_owdenoise.o
 OBJS-$(CONFIG_PAD_FILTER)+= vf_pad.o
 OBJS-$(CONFIG_PALETTEGEN_FILTER) += vf_palettegen.o
diff --git a/libavfilter/vf_overlay_qsv.c b/libavfilter/vf_overlay_qsv.c
index 6c3efdb..c4c694f 100644
--- a/libavfilter/vf_overlay_qsv.c
+++ b/libavfilter/vf_overlay_qsv.c
@@ -36,6 +36,7 @@
 #include "formats.h"
 #include "video.h"
 
+#include "framesync.h"
 #include "qsvvpp.h"
 
 #define MAIN0
@@ -56,14 +57,10 @@ enum var_name {
 VAR_VARS_NB
 };
 
-enum EOFAction {
-EOF_ACTION_REPEAT,
-EOF_ACTION_ENDALL
-};
-
 typedef struct QSVOverlayContext {
 const AVClass  *class;
 
+FFFrameSync fs;
 QSVVPPContext  *qsv;
 QSVVPPParamqsv_param;
 mfxExtVPPComposite comp_conf;
@@ -72,10 +69,7 @@ typedef struct QSVOverlayContext {
 char *overlay_ox, *overlay_oy, *overlay_ow, *overlay_oh;
 uint16_t  overlay_alpha, overlay_pixel_alpha;
 
-enum EOFAction eof_action;  /* action to take on EOF from source */
-
-AVFrame *main;
-AVFrame *over_prev, *over_next;
+enum FFFrameSyncExtMode eof_action;
 } QSVOverlayContext;
 
 static const char *const var_names[] = {
@@ -90,20 +84,22 @@ static const char *const var_names[] = {
 NULL
 };
 
-static const AVOption options[] = {
+static const AVOption overlay_qsv_options[] = {
 { "x", "Overlay x position", OFFSET(overlay_ox), AV_OPT_TYPE_STRING, { 
.str="0"}, 0, 255, .flags = FLAGS},
 { "y", "Overlay y position", OFFSET(overlay_oy), AV_OPT_TYPE_STRING, { 
.str="0"}, 0, 255, .flags = FLAGS},
 { "w", "Overlay width",  OFFSET(overlay_ow), AV_OPT_TYPE_STRING, { 
.str="overlay_iw"}, 0, 255, .flags = FLAGS},
 { "h", "Overlay height", OFFSET(overlay_oh), AV_OPT_TYPE_STRING, { 
.str="overlay_ih*w/overlay_iw"}, 0, 255, .flags = FLAGS},
 { "alpha", "Overlay global alpha", OFFSET(overlay_alpha), AV_OPT_TYPE_INT, 
{ .i64 = 255}, 0, 255, .flags = FLAGS},
 { "eof_action", "Action to take when encountering EOF from secondary input 
",
-OFFSET(eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT },
-EOF_ACTION_REPEAT, EOF_ACTION_ENDALL, .flags = FLAGS, "eof_action" },
-{ "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 
= EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" },
-{ "endall", "End both streams.",  0, AV_OPT_TYPE_CONST, { .i64 
= EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" },
+OFFSET(eof_action), AV_OPT_TYPE_INT, { .i64 = EXT_INFINITY },
+EXT_STOP, EXT_INFINITY, .flags = FLAGS, "eof_action" },
+{ "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 
= EXT_INFINITY }, .flags = FLAGS, "eof_action" },
+{ "endall", "End both streams.",  0, AV_OPT_TYPE_CONST, { .i64 
= EXT_STOP }, .flags = FLAGS, "eof_action" },
 { NULL }
 };
 
+FRAMESYNC_DEFINE_CLASS(overlay_qsv, QSVOverlayContext, fs);
+
 static int eval_expr(AVFilterContext *ctx)
 {
 QSVOverlayContext *vpp = ctx->priv;
@@ -230,12 +226,53 @@ static int config_overlay_input(AVFilterLink *inlink)
 return 0;
 }
 
+static int process_frame(FFFrameSync *fs)
+{
+AVFilterContext  *ctx = fs->parent;
+QSVOverlayContext  *s = fs->opaque;
+AVFrame*frame = NULL;
+int   ret = 0, i;
+
+for (i = 0; i < ctx->nb_inputs; i++) {
+ret = ff_framesync_get_frame(fs, i, &frame, 0);
+if (ret == 0)
+ret = ff_qsvvpp_filter

[FFmpeg-devel] [PATCH 2/2] lavf: clone the frame managed by framework.

2018-03-21 Thread Ruiling Song

we should clone the frame, which is managed by the framework.
directly assign it will cause double-free issue when qsv try
to free it. In fact, the frames was managed by the framework!

Right now, I am still not quite sure why we receive 'more data'
error from libmfx. But some simple debugging seems that it
is non-sense. so just skip it totally, not bothering to
return a EAGAIN error to the caller.

Signed-off-by: Ruiling Song 
---
 libavfilter/qsvvpp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c
index f32b46d..980dd62 100644
--- a/libavfilter/qsvvpp.c
+++ b/libavfilter/qsvvpp.c
@@ -296,7 +296,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s, 
AVFilterLink *inlink, AVFrame *p
 av_log(ctx, AV_LOG_ERROR, "QSVVPP gets a wrong frame.\n");
 return NULL;
 }
-qsv_frame->frame   = picref;
+qsv_frame->frame   = av_frame_clone(picref);
 qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame->data[3];
 } else {
 /* make a copy if the input is not padded as libmfx requires */
@@ -318,7 +318,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s, 
AVFilterLink *inlink, AVFrame *p
 av_frame_copy_props(qsv_frame->frame, picref);
 av_frame_free(&picref);
 } else
-qsv_frame->frame = picref;
+qsv_frame->frame = av_frame_clone(picref);
 
 if (map_frame_to_surface(qsv_frame->frame,
 &qsv_frame->surface_internal) < 0) {
@@ -707,7 +707,7 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink 
*inlink, AVFrame *picr
 if (ret < 0 && ret != MFX_ERR_MORE_SURFACE) {
 /* Ignore more_data error */
 if (ret == MFX_ERR_MORE_DATA)
-ret = AVERROR(EAGAIN);
+ret = 0;
 break;
 }
 
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH] lavc/vaapi: release buffer before destroy context.

2018-02-07 Thread Ruiling Song

The common way to use libVA was first destroy the buffer, then the
context. I am not sure whether libVA has clear statement on this.
This patch just make things simple. This would fix an segmentation
fault issue against iHD open source driver.

Signed-off-by: Ruiling Song 
---
 libavcodec/vaapi_encode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index 550ea47..607e3ab 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -1562,6 +1562,8 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
 vaapi_encode_free(avctx, pic);
 }
 
+av_buffer_pool_uninit(&ctx->output_buffer_pool);
+
 if (ctx->va_context != VA_INVALID_ID) {
 vaDestroyContext(ctx->hwctx->display, ctx->va_context);
 ctx->va_context = VA_INVALID_ID;
@@ -1572,7 +1574,6 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
 ctx->va_config = VA_INVALID_ID;
 }
 
-av_buffer_pool_uninit(&ctx->output_buffer_pool);
 
 av_freep(&ctx->codec_sequence_params);
 av_freep(&ctx->codec_picture_params);
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH] lavc/qsv: skip the packet if decoding failure.

2018-01-25 Thread Ruiling Song

From: "Ruiling, Song" 

MediaSDK may fail to decode some frame, just skip it.
Otherwise, it will keep decoding the failure packet repeatedly
without processing any packet afterwards.

v2:
switch to using av_packet_unref().

Signed-off-by: Ruiling Song 
---
 libavcodec/qsvdec_h2645.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c
index 5e00673..d92a150 100644
--- a/libavcodec/qsvdec_h2645.c
+++ b/libavcodec/qsvdec_h2645.c
@@ -153,8 +153,12 @@ static int qsv_decode_frame(AVCodecContext *avctx, void 
*data,
 }
 
 ret = ff_qsv_process_data(avctx, &s->qsv, frame, got_frame, 
&s->buffer_pkt);
-if (ret < 0)
+if (ret < 0) {
+/* Drop buffer_pkt when failed to decode the packet. Otherwise,
+   the decoder will keep decoding the failure packet. */
+av_packet_unref(&s->buffer_pkt);
 return ret;
+}
 
 s->buffer_pkt.size -= ret;
 s->buffer_pkt.data += ret;
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH] lavc/qsv: skip the packet if decoding failure.

2018-01-22 Thread Ruiling, Song

MediaSDK may fail to decode some frame, just skip it.
Otherwise, it will keep decoding the failure packet repeatedly
without processing any packet afterwards.

Signed-off-by: Ruiling, Song 
---
 libavcodec/qsvdec_h2645.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c
index 5e00673..0d06b21 100644
--- a/libavcodec/qsvdec_h2645.c
+++ b/libavcodec/qsvdec_h2645.c
@@ -153,8 +153,12 @@ static int qsv_decode_frame(AVCodecContext *avctx, void 
*data,
 }
 
 ret = ff_qsv_process_data(avctx, &s->qsv, frame, got_frame, 
&s->buffer_pkt);
-if (ret < 0)
-return ret;
+if (ret < 0){
+ /* force the buffer_pkt's size to 0 when failed to decode the 
packet,
+otherwise, the decoder will keep decoding the failure packet. 
*/
+ s->buffer_pkt.size = 0;
+ return ret;
+}
 
 s->buffer_pkt.size -= ret;
 s->buffer_pkt.data += ret;
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH] vaapi_h265: general_level_idc should times 3.

2017-11-27 Thread Ruiling Song

Signed-off-by: Ruiling Song 
---
 libavcodec/vaapi_encode_h265.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c
index 3ae92a7..32b8bc6 100644
--- a/libavcodec/vaapi_encode_h265.c
+++ b/libavcodec/vaapi_encode_h265.c
@@ -219,7 +219,7 @@ static int 
vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
 .general_non_packed_constraint_flag = 1,
 .general_frame_only_constraint_flag = 1,
 
-.general_level_idc = avctx->level,
+.general_level_idc = avctx->level * 3,
 };
 vps->profile_tier_level.general_profile_compatibility_flag[avctx->profile 
& 31] = 1;
 
-- 
2.7.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

62 matches

Mail list logo