[libav-devel] [PATCH 2/2] fmtconvert: add AVX version of float_interleave2()

Justin Ruggles Wed, 09 Nov 2011 11:23:20 -0800

---
vzeroupper doesn't need to be called until the end.

 libavcodec/fmtconvert.h         |    4 ++--
 libavcodec/x86/fmtconvert.asm   |   16 ++++++++++++++++
 libavcodec/x86/fmtconvert_mmx.c |   15 +++++++++++++++
 3 files changed, 33 insertions(+), 2 deletions(-)


diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
index 1b53401..c488180 100644
--- a/libavcodec/fmtconvert.h
+++ b/libavcodec/fmtconvert.h
@@ -75,9 +75,9 @@ typedef struct FmtConvertContext {
      * @param dst destination array of interleaved float.
      *            constraints: 16-byte aligned
      * @param src source array of float arrays, one for each channel.
-     *            constraints: 16-byte aligned
+     *            constraints: 32-byte aligned
      * @param len number of elements to convert.
-     *            constraints: multiple of 8
+     *            constraints: multiple of 16
      * @param channels number of channels
      */
     void (*float_interleave)(float *dst, const float **src, unsigned int len,
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index f69cd7c..11cd138 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -342,10 +342,21 @@ cglobal float_interleave2, 3,4,5, dst, src, len, src1
     PUNPCKHDQ  m1, m3, m4
     PUNPCKLDQ  m3, m3, m4
 
+%if cpuflag(avx)
+    vextractf128 [dstq    ], m0, 0
+    vextractf128 [dstq+ 16], m2, 0
+    vextractf128 [dstq+ 32], m0, 1
+    vextractf128 [dstq+ 48], m2, 1
+    vextractf128 [dstq+ 64], m3, 0
+    vextractf128 [dstq+ 80], m1, 0
+    vextractf128 [dstq+ 96], m3, 1
+    vextractf128 [dstq+112], m1, 1
+%else
     mova  [dstq         ], m0
     mova  [dstq+1*mmsize], m2
     mova  [dstq+2*mmsize], m3
     mova  [dstq+3*mmsize], m1
+%endif
 
     add      srcq, mmsize*2
     add      dstq, mmsize*4
@@ -354,6 +365,9 @@ cglobal float_interleave2, 3,4,5, dst, src, len, src1
 %if mmsize == 8
     emms
 %endif
+%if mmsize == 32
+    vzeroupper
+%endif
     REP_RET
 %endmacro
 
@@ -365,3 +379,5 @@ INIT_XMM sse
 %define PUNPCKLDQ unpcklps
 %define PUNPCKHDQ unpckhps
 FLOAT_INTERLEAVE2
+INIT_YMM avx
+FLOAT_INTERLEAVE2
diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c
index 78cca9c..26c86d1 100644
--- a/libavcodec/x86/fmtconvert_mmx.c
+++ b/libavcodec/x86/fmtconvert_mmx.c
@@ -81,6 +81,7 @@ static void float_to_int16_interleave_3dnow2(int16_t *dst, 
const float **src, lo
 
 void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len);
 void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len);
+void ff_float_interleave2_avx(float *dst, const float **src, unsigned int len);
 
 void ff_float_interleave6_mmx(float *dst, const float **src, unsigned int len);
 void ff_float_interleave6_sse(float *dst, const float **src, unsigned int len);
@@ -106,6 +107,17 @@ static void float_interleave_sse(float *dst, const float 
**src,
     else
         ff_float_interleave_c(dst, src, len, channels);
 }
+
+static void float_interleave_avx(float *dst, const float **src,
+                                 unsigned int len, int channels)
+{
+    if (channels == 2) {
+        ff_float_interleave2_avx(dst, src, len);
+    } else if (channels == 6)
+        ff_float_interleave6_sse(dst, src, len);
+    else
+        ff_float_interleave_c(dst, src, len, channels);
+}
 #endif
 
 void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
@@ -138,6 +150,9 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, 
AVCodecContext *avctx)
             c->float_to_int16 = ff_float_to_int16_sse2;
             c->float_to_int16_interleave = float_to_int16_interleave_sse2;
         }
+        if (HAVE_AVX && mm_flags & AV_CPU_FLAG_AVX) {
+            c->float_interleave = float_interleave_avx;
+        }
     }
 #endif
 }
-- 
1.7.1

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/2] fmtconvert: add AVX version of float_interleave2()

Reply via email to