On 3/8/2017 1:58 PM, Thomas Mundt wrote: > Hi, > > attached patch adds a complex (-1 2 6 2 -1) vertcal lowpassfilter to > vf_interlace. This will better retain detail and reduce blurring compared to > the existing (1 2 1) filter. > > Please comment. > > > 0001-avfilter-vf_interlace-add-complex-vertcal-lowpassfil.patch > > > From f157bc9b898d1215f8ec10b301720a9d9ff03c63 Mon Sep 17 00:00:00 2001 > From: Thomas Mundt <loud...@yahoo.de> > Date: Wed, 8 Mar 2017 17:33:18 +0100 > Subject: [PATCH] avfilter/vf_interlace: add complex vertcal lowpassfilter Add > a complex (-1 2 6 2 -1) filter to reduce blurring compared to the existing (1 > 2 1) filter. > > Signed-off-by: Thomas Mundt <loud...@yahoo.de> > --- > doc/filters.texi | 13 ++++++-- > libavfilter/interlace.h | 2 ++ > libavfilter/vf_interlace.c | 42 ++++++++++++++++++++++-- > libavfilter/x86/vf_interlace.asm | 65 > +++++++++++++++++++++++++++++++++++++ > libavfilter/x86/vf_interlace_init.c | 17 +++++++--- > 5 files changed, 130 insertions(+), 9 deletions(-) > > diff --git a/doc/filters.texi b/doc/filters.texi > index b5265d9..0041d39 100644 > --- a/doc/filters.texi > +++ b/doc/filters.texi > @@ -9109,8 +9109,17 @@ This determines whether the interlaced frame is taken > from the even > (tff - default) or odd (bff) lines of the progressive frame. > > @item lowpass > -Enable (default) or disable the vertical lowpass filter to avoid twitter > -interlacing and reduce moire patterns. > +Vertical lowpass filter to avoid twitter interlacing and > +reduce moire patterns. > + > +@table @samp > +@item 0 > +Disable vertical lowpass filter > +@item 1 > +Enable linear filter (default) > +@item 2 > +Enable complex filter > +@end table > @end table > > @section kerndeint > diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h > index da073ae..7ad457e 100644 > --- a/libavfilter/interlace.h > +++ b/libavfilter/interlace.h > @@ -51,6 +51,8 @@ typedef struct InterlaceContext { > AVFrame *cur, *next; // the two frames from which the new one is > obtained > void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const uint8_t > *srcp, > const uint8_t *srcp_above, const uint8_t > *srcp_below); > + void (*lowpass_line_complex)(uint8_t *dstp, ptrdiff_t linesize, > + const uint8_t *srcp, int mref, int pref);
Why not keep a single prototype, passing mref and pref for both linear and complex? You can calculate srcp_above and srcp_below for linear like you're doing it for complex in both the c and asm versions. In any case, mref and pref should be ptrdiff_t and not int. > } InterlaceContext; > > void ff_interlace_init_x86(InterlaceContext *interlace); > diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c > index efa3128..e8d5de4 100644 > --- a/libavfilter/vf_interlace.c > +++ b/libavfilter/vf_interlace.c > @@ -47,7 +47,7 @@ static const AVOption interlace_options[] = { > { "bff", "bottom field first", 0, > AV_OPT_TYPE_CONST, {.i64 = MODE_BFF }, INT_MIN, INT_MAX, .flags = > FLAGS, .unit = "scan" }, > { "lowpass", "set vertical low-pass filter", OFFSET(lowpass), > - AV_OPT_TYPE_BOOL, {.i64 = 1 }, 0, 1, .flags = FLAGS }, > + AV_OPT_TYPE_INT, {.i64 = 1 }, 0, 2, .flags = FLAGS }, Maybe add AV_OPT_TYPE_CONST values "off", "linear" and "complex". > { NULL } > }; > > @@ -67,6 +67,24 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t > linesize, > } > } > > +static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize, > + const uint8_t *srcp, int mref, int pref) > +{ > + const uint8_t *srcp_above = srcp + mref; > + const uint8_t *srcp_below = srcp + pref; > + const uint8_t *srcp_above2 = srcp + mref * 2; > + const uint8_t *srcp_below2 = srcp + pref * 2; > + int i; > + for (i = 0; i < linesize; i++) { > + // this calculation is an integer representation of > + // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - > 0.125 * below2' > + // '4 +' is for rounding. > + dstp[i] = av_clip_uint8((4 + (srcp[i] << 2) > + + ((srcp[i] + srcp_above[i] + srcp_below[i]) << 1) > + - srcp_above2[i] - srcp_below2[i]) >> 3); > + } > +} > + > static const enum AVPixelFormat formats_supported[] = { > AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, > AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUVA420P, > @@ -116,7 +134,11 @@ static int config_out_props(AVFilterLink *outlink) > > > if (s->lowpass) { > - s->lowpass_line = lowpass_line_c; > + if (s->lowpass == 1) > + s->lowpass_line = lowpass_line_c; > + else if (s->lowpass == 2) > + s->lowpass_line_complex = lowpass_line_complex_c; > + > if (ARCH_X86) > ff_interlace_init_x86(s); > } > @@ -150,7 +172,7 @@ static void copy_picture_field(InterlaceContext *s, > srcp += src_frame->linesize[plane]; > dstp += dst_frame->linesize[plane]; > } > - if (lowpass) { > + if (lowpass == 1) { > int srcp_linesize = src_frame->linesize[plane] * 2; > int dstp_linesize = dst_frame->linesize[plane] * 2; > for (j = lines; j > 0; j--) { > @@ -164,6 +186,20 @@ static void copy_picture_field(InterlaceContext *s, > dstp += dstp_linesize; > srcp += srcp_linesize; > } > + } else if (lowpass == 2) { > + int srcp_linesize = src_frame->linesize[plane] * 2; > + int dstp_linesize = dst_frame->linesize[plane] * 2; > + for (j = lines; j > 0; j--) { > + int pref = src_frame->linesize[plane]; > + int mref = -pref; > + if (j >= (lines - 1)) > + mref = 0; > + else if (j <= 2) > + pref = 0; > + s->lowpass_line_complex(dstp, cols, srcp, mref, pref); > + dstp += dstp_linesize; > + srcp += srcp_linesize; > + } > } else { > av_image_copy_plane(dstp, dst_frame->linesize[plane] * 2, > srcp, src_frame->linesize[plane] * 2, > diff --git a/libavfilter/x86/vf_interlace.asm > b/libavfilter/x86/vf_interlace.asm > index f70c700..777a95f 100644 > --- a/libavfilter/x86/vf_interlace.asm > +++ b/libavfilter/x86/vf_interlace.asm > @@ -25,6 +25,8 @@ > > SECTION_RODATA > > +pw_4: times 8 dw 4 > + > SECTION .text > > %macro LOWPASS_LINE 0 > @@ -56,6 +58,63 @@ cglobal lowpass_line, 5, 5, 7 > add r1, 2*mmsize > jl .loop > REP_RET > + > +%endmacro > + > +%macro LOWPASS_LINE_COMPLEX 0 > +cglobal lowpass_line_complex, 3, 5, 7, dst, h, src, mref, pref Make this 5, 5, 7 and remove the two movs below. > + mov r3, mrefmp > + mov r4, prefmp > + > + pxor m6, m6 > +.loop: > + mova m0, [srcq+r3] [srcq+mrefq] > + mova m2, [srcq+r4] [srcq+prefq] > + mova m1, m0 > + mova m3, m2 > + punpcklbw m0, m6 > + punpcklbw m2, m6 > + punpckhbw m1, m6 > + punpckhbw m3, m6 > + paddw m0, m2 > + paddw m1, m3 > + mova m2, [srcq+r3*2] [srcq+mrefq*2] > + mova m4, [srcq+r4*2] [srcq+prefq*2] > + mova m3, m2 > + mova m5, m4 > + punpcklbw m2, m6 > + punpcklbw m4, m6 > + punpckhbw m3, m6 > + punpckhbw m5, m6 > + paddw m2, m4 > + paddw m3, m5 > + mova m4, [srcq] > + mova m5, m4 > + punpcklbw m4, m6 > + punpckhbw m5, m6 > + paddw m0, m4 > + paddw m1, m5 > + psllw m0, 1 > + psllw m1, 1 > + psllw m4, 2 > + psllw m5, 2 > + paddw m0, m4 > + paddw m1, m5 > + paddw m0, [pw_4] > + paddw m1, [pw_4] > + psubusw m0, m2 > + psubusw m1, m3 > + psrlw m0, 3 > + psrlw m1, 3 > + packuswb m0, m1 > + mova [dstq], m0 > + > + add dstq, mmsize > + add srcq, mmsize > + sub DWORD hm, mmsize sub hd, mmsize > + jg .loop > +REP_RET > + > %endmacro > > INIT_XMM sse2 > @@ -63,3 +122,9 @@ LOWPASS_LINE > > INIT_XMM avx > LOWPASS_LINE > + > +INIT_XMM sse2 > +LOWPASS_LINE_COMPLEX > + > +INIT_XMM avx > +LOWPASS_LINE_COMPLEX There's no reason to add an AVX version. You're not taking advantage of the 3 operand instruction format, or any of the new instructions, or the 32 byte regs. If you merge some of the movas with punpck* instructions and can notice a difference when benching, then AVX would make sense. > diff --git a/libavfilter/x86/vf_interlace_init.c > b/libavfilter/x86/vf_interlace_init.c > index 52a22f8..947ff9a 100644 > --- a/libavfilter/x86/vf_interlace_init.c > +++ b/libavfilter/x86/vf_interlace_init.c > @@ -35,12 +35,21 @@ void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t > linesize, > const uint8_t *srcp_above, > const uint8_t *srcp_below); > > +void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize, > + const uint8_t *srcp, int mref, int pref); > +void ff_lowpass_line_complex_avx (uint8_t *dstp, ptrdiff_t linesize, > + const uint8_t *srcp, int mref, int pref); > + > av_cold void ff_interlace_init_x86(InterlaceContext *s) > { > int cpu_flags = av_get_cpu_flags(); > > - if (EXTERNAL_SSE2(cpu_flags)) > - s->lowpass_line = ff_lowpass_line_sse2; > - if (EXTERNAL_AVX(cpu_flags)) > - s->lowpass_line = ff_lowpass_line_avx; > + if (EXTERNAL_SSE2(cpu_flags)) { > + s->lowpass_line = ff_lowpass_line_sse2; > + s->lowpass_line_complex = ff_lowpass_line_complex_sse2; > + } > + if (EXTERNAL_AVX(cpu_flags)) { > + s->lowpass_line = ff_lowpass_line_avx; > + s->lowpass_line_complex = ff_lowpass_line_complex_avx; > + } > } > -- 2.7.4.windows.1 > > > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel