ffmpeg | branch: master | Clément Bœsch <[email protected]> | Mon Mar 20 18:38:07 2017 +0100| [3898e346b33515897d6ea83369f39e9d10a419bb] | committer: Clément Bœsch
Merge commit '07e1f99a1bb41d1a615676140eefc85cf69fa793' * commit '07e1f99a1bb41d1a615676140eefc85cf69fa793': x86util: Document SBUTTERFLY macro Merged-by: Clément Bœsch <[email protected]> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3898e346b33515897d6ea83369f39e9d10a419bb --- libavutil/x86/x86util.asm | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index c50ddc6..de7d2d1 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -34,6 +34,16 @@ [base], [base + stride], [base + 2*stride], [base3], \ [base3 + stride], [base3 + 2*stride], [base3 + stride3], [base3 + stride*4] +; Interleave low src0 with low src1 and store in src0, +; interleave high src0 with high src1 and store in src1. +; %1 - types +; %2 - index of the register with src0 +; %3 - index of the register with src1 +; %4 - index of the register for intermediate results +; example for %1 - wd: input: src0: x0 x1 x2 x3 z0 z1 z2 z3 +; src1: y0 y1 y2 y3 q0 q1 q2 q3 +; output: src0: x0 y0 x1 y1 x2 y2 x3 y3 +; src1: z0 q0 z1 q1 z2 q2 z3 q3 %macro SBUTTERFLY 4 %ifidn %1, dqqq vperm2i128 m%4, m%2, m%3, q0301 ====================================================================== diff --cc libavutil/x86/x86util.asm index c50ddc6,bba958e..de7d2d1 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@@ -29,16 -29,18 +29,26 @@@ %include "libavutil/x86/x86inc.asm" +; expands to [base],...,[base+7*stride] +%define PASS8ROWS(base, base3, stride, stride3) \ + [base], [base + stride], [base + 2*stride], [base3], \ + [base3 + stride], [base3 + 2*stride], [base3 + stride3], [base3 + stride*4] + + ; Interleave low src0 with low src1 and store in src0, + ; interleave high src0 with high src1 and store in src1. + ; %1 - types + ; %2 - index of the register with src0 + ; %3 - index of the register with src1 + ; %4 - index of the register for intermediate results + ; example for %1 - wd: input: src0: x0 x1 x2 x3 z0 z1 z2 z3 + ; src1: y0 y1 y2 y3 q0 q1 q2 q3 + ; output: src0: x0 y0 x1 y1 x2 y2 x3 y3 + ; src1: z0 q0 z1 q1 z2 q2 z3 q3 %macro SBUTTERFLY 4 -%if avx_enabled == 0 +%ifidn %1, dqqq + vperm2i128 m%4, m%2, m%3, q0301 + vinserti128 m%2, m%2, xm%3, 1 +%elif avx_enabled == 0 mova m%4, m%2 punpckl%1 m%2, m%3 punpckh%1 m%4, m%3 _______________________________________________ ffmpeg-cvslog mailing list [email protected] http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
