ffmpeg | branch: master | Michael Niedermayer <michae...@gmx.at> | Sat Nov 15 04:07:08 2014 +0100| [ca5c3ff90972a5c97aabda2ace57ba72dcd7d83b] | committer: Vittorio Giovara
vf_interlace: x86: improve asm performance 4775 decicycles -> 3688 decicycles > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ca5c3ff90972a5c97aabda2ace57ba72dcd7d83b --- libavfilter/x86/vf_interlace.asm | 46 ++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm index 8c2e9b0..b8d8616 100644 --- a/libavfilter/x86/vf_interlace.asm +++ b/libavfilter/x86/vf_interlace.asm @@ -2,6 +2,7 @@ ;* x86-optimized functions for interlace filter ;* ;* Copyright (C) 2014 Kieran Kunhya <kier...@obe.tv> +;* Copyright (c) 2014 Michael Niedermayer <michae...@gmx.at> ;* ;* This file is part of Libav. ;* @@ -34,36 +35,27 @@ cglobal lowpass_line, 5, 5, 7 add r4, r1 neg r1 - pxor m6, m6 + pcmpeqb m6, m6 .loop - mova m0, [r2+r1] - punpcklbw m1, m0, m6 - punpckhbw m0, m6 - paddw m0, m0 - paddw m1, m1 + mova m0, [r3+r1] + mova m1, [r3+r1+mmsize] + pavgb m0, [r4+r1] + pavgb m1, [r4+r1+mmsize] + mova m2, [r2+r1] + mova m3, [r2+r1+mmsize] + pxor m0, m6 + pxor m1, m6 + pxor m2, m6, [r2+r1] + pxor m3, m6, [r2+r1+mmsize] + pavgb m0, m2 + pavgb m1, m3 + pxor m0, m6 + pxor m1, m6 + mova [r0+r1], m0 + mova [r0+r1+mmsize], m1 - mova m2, [r3+r1] - punpcklbw m3, m2, m6 - punpckhbw m2, m6 - - mova m4, [r4+r1] - punpcklbw m5, m4, m6 - punpckhbw m4, m6 - - paddw m1, m3 - pavgw m1, m5 - - paddw m0, m2 - pavgw m0, m4 - - psrlw m0, 1 - psrlw m1, 1 - - packuswb m1, m0 - mova [r0+r1], m1 - - add r1, mmsize + add r1, 2*mmsize jl .loop REP_RET %endmacro _______________________________________________ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog