Re: [FFmpeg-devel] [PATCH] vf_tonemap.c: Support for slice thread for performance

2019-05-05 Thread Paul B Mahol
On 5/5/19, lance.lmw...@gmail.com  wrote:
> From: Limin Wang 
>
> ---
>  libavfilter/vf_tonemap.c | 109 +--
>  1 file changed, 81 insertions(+), 28 deletions(-)
>
> diff --git a/libavfilter/vf_tonemap.c b/libavfilter/vf_tonemap.c
> index efd4af5466..0b26dd5e7f 100644
> --- a/libavfilter/vf_tonemap.c
> +++ b/libavfilter/vf_tonemap.c
> @@ -1,5 +1,6 @@
>  /*
>   * Copyright (c) 2017 Vittorio Giovara 
> + * Copyright (c) 2019 Limin Wang 
>   *
>   * This file is part of FFmpeg.
>   *
> @@ -71,6 +72,13 @@ typedef struct TonemapContext {
>  const struct LumaCoefficients *coeffs;
>  } TonemapContext;
>
> +typedef struct ThreadData {
> +AVFrame *in, *out;
> +double peak;
> +const struct AVPixFmtDescriptor *desc;
> +const struct AVPixFmtDescriptor *odesc;
> +} ThreadData;
> +
>  static const enum AVPixelFormat pix_fmts[] = {
>  AV_PIX_FMT_GBRPF32,
>  AV_PIX_FMT_GBRAPF32,
> @@ -127,15 +135,10 @@ static float mobius(float in, float j, double peak)
>  }
>
>  #define MIX(x,y,a) (x) * (1 - (a)) + (y) * (a)
> -static void tonemap(TonemapContext *s, AVFrame *out, const AVFrame *in,
> -const AVPixFmtDescriptor *desc, int x, int y, double
> peak)
> +static void tonemap(TonemapContext *s, float *r_out, float *b_out, float
> *g_out,
> +const float *r_in, const float *b_in, const float
> *g_in,
> +const AVPixFmtDescriptor *desc, double peak)
>  {
> -const float *r_in = (const float *)(in->data[0] + x *
> desc->comp[0].step + y * in->linesize[0]);
> -const float *b_in = (const float *)(in->data[1] + x *
> desc->comp[1].step + y * in->linesize[1]);
> -const float *g_in = (const float *)(in->data[2] + x *
> desc->comp[2].step + y * in->linesize[2]);
> -float *r_out = (float *)(out->data[0] + x * desc->comp[0].step + y *
> out->linesize[0]);
> -float *b_out = (float *)(out->data[1] + x * desc->comp[1].step + y *
> out->linesize[1]);
> -float *g_out = (float *)(out->data[2] + x * desc->comp[2].step + y *
> out->linesize[2]);
>  float sig, sig_orig;
>
>  /* load values */
> @@ -189,17 +192,78 @@ static void tonemap(TonemapContext *s, AVFrame *out,
> const AVFrame *in,
>  *r_out *= sig / sig_orig;
>  *g_out *= sig / sig_orig;
>  *b_out *= sig / sig_orig;
> +
> +}
> +
> +static int do_tonemap_slice(AVFilterContext *ctx, void *arg, int jobnr, int
> nb_jobs)
> +{
> +TonemapContext *s = ctx->priv;
> +const ThreadData *td = arg;
> +const AVFrame *in = td->in;
> +AVFrame *out = td->out;
> +int x, y;
> +const AVPixFmtDescriptor *desc = td->desc;
> +const AVPixFmtDescriptor *odesc = td->odesc;
> +const int slice_start = (out->height *  jobnr   ) / nb_jobs;
> +const int slice_end   = (out->height * (jobnr+1)) / nb_jobs;
> +const int slice_h   = slice_end - slice_start;
> +uint8_t *dstr = out->data[0] + slice_start * out->linesize[0];
> +uint8_t *dstb = out->data[1] + slice_start * out->linesize[1];
> +uint8_t *dstg = out->data[2] + slice_start * out->linesize[2];
> +const uint8_t *srcr = in->data[0] + slice_start * in->linesize[0];
> +const uint8_t *srcb = in->data[1] + slice_start * in->linesize[1];
> +const uint8_t *srcg = in->data[2] + slice_start * in->linesize[2];
> +uint8_t *dsta = out->data[3] + slice_start * out->linesize[3];
> +const uint8_t *srca = in ->data[3] + slice_start * in->linesize[3];
> +
> +/* do the tone map */
> +for (y = slice_start; y < slice_end; y++) {
> +for (x = 0; x < out->width; x++) {
> +const float *r_in = (const float *)(srcr + x *
> desc->comp[0].step);
> +const float *b_in = (const float *)(srcb + x *
> desc->comp[1].step);
> +const float *g_in = (const float *)(srcg + x *
> desc->comp[2].step);
> +float *r_out = (float *)(dstr + x * desc->comp[0].step);
> +float *b_out = (float *)(dstb + x * desc->comp[1].step);
> +float *g_out = (float *)(dstg + x * desc->comp[2].step);
> +
> +tonemap(s, r_out, b_out, g_out, r_in, b_in, g_in, desc,
> td->peak);
> +}
> +srcr += in->linesize[0];
> +srcg += in->linesize[1];
> +srcb += in->linesize[2];
> +dstr += out->linesize[0];
> +dstg += out->linesize[1];
> +dstb += out->linesize[2];
> +}
> +
> +/* copy/generate alpha if needed */
> +if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags &
> AV_PIX_FMT_FLAG_ALPHA) {
> +av_image_copy_plane(dsta, out->linesize[3],
> +srca, in->linesize[3],
> +out->linesize[3], slice_h);
> +} else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> +for (y = slice_start; y < slice_end; y++) {
> +for (x = 0; x < out->width; x++) {
> +AV_WN32(dsta + x * odesc->comp[3].step + y *
> out->linesize[3],
> +av_float2int(1.0f));
> +}
> + 

[FFmpeg-devel] [PATCH] vf_tonemap.c: Support for slice thread for performance

2019-05-05 Thread lance . lmwang
From: Limin Wang 

---
 libavfilter/vf_tonemap.c | 109 +--
 1 file changed, 81 insertions(+), 28 deletions(-)

diff --git a/libavfilter/vf_tonemap.c b/libavfilter/vf_tonemap.c
index efd4af5466..0b26dd5e7f 100644
--- a/libavfilter/vf_tonemap.c
+++ b/libavfilter/vf_tonemap.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2017 Vittorio Giovara 
+ * Copyright (c) 2019 Limin Wang 
  *
  * This file is part of FFmpeg.
  *
@@ -71,6 +72,13 @@ typedef struct TonemapContext {
 const struct LumaCoefficients *coeffs;
 } TonemapContext;
 
+typedef struct ThreadData {
+AVFrame *in, *out;
+double peak;
+const struct AVPixFmtDescriptor *desc;
+const struct AVPixFmtDescriptor *odesc;
+} ThreadData;
+
 static const enum AVPixelFormat pix_fmts[] = {
 AV_PIX_FMT_GBRPF32,
 AV_PIX_FMT_GBRAPF32,
@@ -127,15 +135,10 @@ static float mobius(float in, float j, double peak)
 }
 
 #define MIX(x,y,a) (x) * (1 - (a)) + (y) * (a)
-static void tonemap(TonemapContext *s, AVFrame *out, const AVFrame *in,
-const AVPixFmtDescriptor *desc, int x, int y, double peak)
+static void tonemap(TonemapContext *s, float *r_out, float *b_out, float 
*g_out,
+const float *r_in, const float *b_in, const float *g_in,
+const AVPixFmtDescriptor *desc, double peak)
 {
-const float *r_in = (const float *)(in->data[0] + x * desc->comp[0].step + 
y * in->linesize[0]);
-const float *b_in = (const float *)(in->data[1] + x * desc->comp[1].step + 
y * in->linesize[1]);
-const float *g_in = (const float *)(in->data[2] + x * desc->comp[2].step + 
y * in->linesize[2]);
-float *r_out = (float *)(out->data[0] + x * desc->comp[0].step + y * 
out->linesize[0]);
-float *b_out = (float *)(out->data[1] + x * desc->comp[1].step + y * 
out->linesize[1]);
-float *g_out = (float *)(out->data[2] + x * desc->comp[2].step + y * 
out->linesize[2]);
 float sig, sig_orig;
 
 /* load values */
@@ -189,17 +192,78 @@ static void tonemap(TonemapContext *s, AVFrame *out, 
const AVFrame *in,
 *r_out *= sig / sig_orig;
 *g_out *= sig / sig_orig;
 *b_out *= sig / sig_orig;
+
+}
+
+static int do_tonemap_slice(AVFilterContext *ctx, void *arg, int jobnr, int 
nb_jobs)
+{
+TonemapContext *s = ctx->priv;
+const ThreadData *td = arg;
+const AVFrame *in = td->in;
+AVFrame *out = td->out;
+int x, y;
+const AVPixFmtDescriptor *desc = td->desc;
+const AVPixFmtDescriptor *odesc = td->odesc;
+const int slice_start = (out->height *  jobnr   ) / nb_jobs;
+const int slice_end   = (out->height * (jobnr+1)) / nb_jobs;
+const int slice_h   = slice_end - slice_start;
+uint8_t *dstr = out->data[0] + slice_start * out->linesize[0];
+uint8_t *dstb = out->data[1] + slice_start * out->linesize[1];
+uint8_t *dstg = out->data[2] + slice_start * out->linesize[2];
+const uint8_t *srcr = in->data[0] + slice_start * in->linesize[0];
+const uint8_t *srcb = in->data[1] + slice_start * in->linesize[1];
+const uint8_t *srcg = in->data[2] + slice_start * in->linesize[2];
+uint8_t *dsta = out->data[3] + slice_start * out->linesize[3];
+const uint8_t *srca = in ->data[3] + slice_start * in->linesize[3];
+
+/* do the tone map */
+for (y = slice_start; y < slice_end; y++) {
+for (x = 0; x < out->width; x++) {
+const float *r_in = (const float *)(srcr + x * desc->comp[0].step);
+const float *b_in = (const float *)(srcb + x * desc->comp[1].step);
+const float *g_in = (const float *)(srcg + x * desc->comp[2].step);
+float *r_out = (float *)(dstr + x * desc->comp[0].step);
+float *b_out = (float *)(dstb + x * desc->comp[1].step);
+float *g_out = (float *)(dstg + x * desc->comp[2].step);
+
+tonemap(s, r_out, b_out, g_out, r_in, b_in, g_in, desc, td->peak);
+}
+srcr += in->linesize[0];
+srcg += in->linesize[1];
+srcb += in->linesize[2];
+dstr += out->linesize[0];
+dstg += out->linesize[1];
+dstb += out->linesize[2];
+}
+
+/* copy/generate alpha if needed */
+if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & 
AV_PIX_FMT_FLAG_ALPHA) {
+av_image_copy_plane(dsta, out->linesize[3],
+srca, in->linesize[3],
+out->linesize[3], slice_h);
+} else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+for (y = slice_start; y < slice_end; y++) {
+for (x = 0; x < out->width; x++) {
+AV_WN32(dsta + x * odesc->comp[3].step + y * out->linesize[3],
+av_float2int(1.0f));
+}
+}
+}
+
+return 0;
 }
 
 static int filter_frame(AVFilterLink *link, AVFrame *in)
 {
-TonemapContext *s = link->dst->priv;
+AVFilterContext *ctx = link->dst;
+TonemapContext *s = ctx->priv;
 AVFilterLink *outlink = link->dst->outputs[0];
 AVFrame *