Re: [FFmpeg-devel] [PATCH] vf_tonemap.c: Support for slice thread for performance
On 5/5/19, lance.lmw...@gmail.com wrote: > From: Limin Wang > > --- > libavfilter/vf_tonemap.c | 109 +-- > 1 file changed, 81 insertions(+), 28 deletions(-) > > diff --git a/libavfilter/vf_tonemap.c b/libavfilter/vf_tonemap.c > index efd4af5466..0b26dd5e7f 100644 > --- a/libavfilter/vf_tonemap.c > +++ b/libavfilter/vf_tonemap.c > @@ -1,5 +1,6 @@ > /* > * Copyright (c) 2017 Vittorio Giovara > + * Copyright (c) 2019 Limin Wang > * > * This file is part of FFmpeg. > * > @@ -71,6 +72,13 @@ typedef struct TonemapContext { > const struct LumaCoefficients *coeffs; > } TonemapContext; > > +typedef struct ThreadData { > +AVFrame *in, *out; > +double peak; > +const struct AVPixFmtDescriptor *desc; > +const struct AVPixFmtDescriptor *odesc; > +} ThreadData; > + > static const enum AVPixelFormat pix_fmts[] = { > AV_PIX_FMT_GBRPF32, > AV_PIX_FMT_GBRAPF32, > @@ -127,15 +135,10 @@ static float mobius(float in, float j, double peak) > } > > #define MIX(x,y,a) (x) * (1 - (a)) + (y) * (a) > -static void tonemap(TonemapContext *s, AVFrame *out, const AVFrame *in, > -const AVPixFmtDescriptor *desc, int x, int y, double > peak) > +static void tonemap(TonemapContext *s, float *r_out, float *b_out, float > *g_out, > +const float *r_in, const float *b_in, const float > *g_in, > +const AVPixFmtDescriptor *desc, double peak) > { > -const float *r_in = (const float *)(in->data[0] + x * > desc->comp[0].step + y * in->linesize[0]); > -const float *b_in = (const float *)(in->data[1] + x * > desc->comp[1].step + y * in->linesize[1]); > -const float *g_in = (const float *)(in->data[2] + x * > desc->comp[2].step + y * in->linesize[2]); > -float *r_out = (float *)(out->data[0] + x * desc->comp[0].step + y * > out->linesize[0]); > -float *b_out = (float *)(out->data[1] + x * desc->comp[1].step + y * > out->linesize[1]); > -float *g_out = (float *)(out->data[2] + x * desc->comp[2].step + y * > out->linesize[2]); > float sig, sig_orig; > > /* load values */ > @@ -189,17 +192,78 @@ static void tonemap(TonemapContext *s, AVFrame *out, > const AVFrame *in, > *r_out *= sig / sig_orig; > *g_out *= sig / sig_orig; > *b_out *= sig / sig_orig; > + > +} > + > +static int do_tonemap_slice(AVFilterContext *ctx, void *arg, int jobnr, int > nb_jobs) > +{ > +TonemapContext *s = ctx->priv; > +const ThreadData *td = arg; > +const AVFrame *in = td->in; > +AVFrame *out = td->out; > +int x, y; > +const AVPixFmtDescriptor *desc = td->desc; > +const AVPixFmtDescriptor *odesc = td->odesc; > +const int slice_start = (out->height * jobnr ) / nb_jobs; > +const int slice_end = (out->height * (jobnr+1)) / nb_jobs; > +const int slice_h = slice_end - slice_start; > +uint8_t *dstr = out->data[0] + slice_start * out->linesize[0]; > +uint8_t *dstb = out->data[1] + slice_start * out->linesize[1]; > +uint8_t *dstg = out->data[2] + slice_start * out->linesize[2]; > +const uint8_t *srcr = in->data[0] + slice_start * in->linesize[0]; > +const uint8_t *srcb = in->data[1] + slice_start * in->linesize[1]; > +const uint8_t *srcg = in->data[2] + slice_start * in->linesize[2]; > +uint8_t *dsta = out->data[3] + slice_start * out->linesize[3]; > +const uint8_t *srca = in ->data[3] + slice_start * in->linesize[3]; > + > +/* do the tone map */ > +for (y = slice_start; y < slice_end; y++) { > +for (x = 0; x < out->width; x++) { > +const float *r_in = (const float *)(srcr + x * > desc->comp[0].step); > +const float *b_in = (const float *)(srcb + x * > desc->comp[1].step); > +const float *g_in = (const float *)(srcg + x * > desc->comp[2].step); > +float *r_out = (float *)(dstr + x * desc->comp[0].step); > +float *b_out = (float *)(dstb + x * desc->comp[1].step); > +float *g_out = (float *)(dstg + x * desc->comp[2].step); > + > +tonemap(s, r_out, b_out, g_out, r_in, b_in, g_in, desc, > td->peak); > +} > +srcr += in->linesize[0]; > +srcg += in->linesize[1]; > +srcb += in->linesize[2]; > +dstr += out->linesize[0]; > +dstg += out->linesize[1]; > +dstb += out->linesize[2]; > +} > + > +/* copy/generate alpha if needed */ > +if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & > AV_PIX_FMT_FLAG_ALPHA) { > +av_image_copy_plane(dsta, out->linesize[3], > +srca, in->linesize[3], > +out->linesize[3], slice_h); > +} else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) { > +for (y = slice_start; y < slice_end; y++) { > +for (x = 0; x < out->width; x++) { > +AV_WN32(dsta + x * odesc->comp[3].step + y * > out->linesize[3], > +av_float2int(1.0f)); > +} > +
[FFmpeg-devel] [PATCH] vf_tonemap.c: Support for slice thread for performance
From: Limin Wang --- libavfilter/vf_tonemap.c | 109 +-- 1 file changed, 81 insertions(+), 28 deletions(-) diff --git a/libavfilter/vf_tonemap.c b/libavfilter/vf_tonemap.c index efd4af5466..0b26dd5e7f 100644 --- a/libavfilter/vf_tonemap.c +++ b/libavfilter/vf_tonemap.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2017 Vittorio Giovara + * Copyright (c) 2019 Limin Wang * * This file is part of FFmpeg. * @@ -71,6 +72,13 @@ typedef struct TonemapContext { const struct LumaCoefficients *coeffs; } TonemapContext; +typedef struct ThreadData { +AVFrame *in, *out; +double peak; +const struct AVPixFmtDescriptor *desc; +const struct AVPixFmtDescriptor *odesc; +} ThreadData; + static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, @@ -127,15 +135,10 @@ static float mobius(float in, float j, double peak) } #define MIX(x,y,a) (x) * (1 - (a)) + (y) * (a) -static void tonemap(TonemapContext *s, AVFrame *out, const AVFrame *in, -const AVPixFmtDescriptor *desc, int x, int y, double peak) +static void tonemap(TonemapContext *s, float *r_out, float *b_out, float *g_out, +const float *r_in, const float *b_in, const float *g_in, +const AVPixFmtDescriptor *desc, double peak) { -const float *r_in = (const float *)(in->data[0] + x * desc->comp[0].step + y * in->linesize[0]); -const float *b_in = (const float *)(in->data[1] + x * desc->comp[1].step + y * in->linesize[1]); -const float *g_in = (const float *)(in->data[2] + x * desc->comp[2].step + y * in->linesize[2]); -float *r_out = (float *)(out->data[0] + x * desc->comp[0].step + y * out->linesize[0]); -float *b_out = (float *)(out->data[1] + x * desc->comp[1].step + y * out->linesize[1]); -float *g_out = (float *)(out->data[2] + x * desc->comp[2].step + y * out->linesize[2]); float sig, sig_orig; /* load values */ @@ -189,17 +192,78 @@ static void tonemap(TonemapContext *s, AVFrame *out, const AVFrame *in, *r_out *= sig / sig_orig; *g_out *= sig / sig_orig; *b_out *= sig / sig_orig; + +} + +static int do_tonemap_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ +TonemapContext *s = ctx->priv; +const ThreadData *td = arg; +const AVFrame *in = td->in; +AVFrame *out = td->out; +int x, y; +const AVPixFmtDescriptor *desc = td->desc; +const AVPixFmtDescriptor *odesc = td->odesc; +const int slice_start = (out->height * jobnr ) / nb_jobs; +const int slice_end = (out->height * (jobnr+1)) / nb_jobs; +const int slice_h = slice_end - slice_start; +uint8_t *dstr = out->data[0] + slice_start * out->linesize[0]; +uint8_t *dstb = out->data[1] + slice_start * out->linesize[1]; +uint8_t *dstg = out->data[2] + slice_start * out->linesize[2]; +const uint8_t *srcr = in->data[0] + slice_start * in->linesize[0]; +const uint8_t *srcb = in->data[1] + slice_start * in->linesize[1]; +const uint8_t *srcg = in->data[2] + slice_start * in->linesize[2]; +uint8_t *dsta = out->data[3] + slice_start * out->linesize[3]; +const uint8_t *srca = in ->data[3] + slice_start * in->linesize[3]; + +/* do the tone map */ +for (y = slice_start; y < slice_end; y++) { +for (x = 0; x < out->width; x++) { +const float *r_in = (const float *)(srcr + x * desc->comp[0].step); +const float *b_in = (const float *)(srcb + x * desc->comp[1].step); +const float *g_in = (const float *)(srcg + x * desc->comp[2].step); +float *r_out = (float *)(dstr + x * desc->comp[0].step); +float *b_out = (float *)(dstb + x * desc->comp[1].step); +float *g_out = (float *)(dstg + x * desc->comp[2].step); + +tonemap(s, r_out, b_out, g_out, r_in, b_in, g_in, desc, td->peak); +} +srcr += in->linesize[0]; +srcg += in->linesize[1]; +srcb += in->linesize[2]; +dstr += out->linesize[0]; +dstg += out->linesize[1]; +dstb += out->linesize[2]; +} + +/* copy/generate alpha if needed */ +if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) { +av_image_copy_plane(dsta, out->linesize[3], +srca, in->linesize[3], +out->linesize[3], slice_h); +} else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) { +for (y = slice_start; y < slice_end; y++) { +for (x = 0; x < out->width; x++) { +AV_WN32(dsta + x * odesc->comp[3].step + y * out->linesize[3], +av_float2int(1.0f)); +} +} +} + +return 0; } static int filter_frame(AVFilterLink *link, AVFrame *in) { -TonemapContext *s = link->dst->priv; +AVFilterContext *ctx = link->dst; +TonemapContext *s = ctx->priv; AVFilterLink *outlink = link->dst->outputs[0]; AVFrame *