This was discussed as an alternative to manipulating instructions directly.
This version also fixes the case where %2 == %3.
---
libavutil/x86/x86util.asm | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index 79a023f..e78352e 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -30,10 +30,18 @@
%include "libavutil/x86/x86inc.asm"
%macro SBUTTERFLY 4
-%if avx_enabled == 0
+%if notcpuflag(sse2) && mmsize == 16
+ %ifidn %1, dq
+ mova m%4, m%2
+ unpckhps m%4, m%3
+ unpcklps m%2, m%3
+ %else
+ %error Only dq unpack is supported by SBUTTERFLY on SSE1
+ %endif
+%elif avx_enabled == 0
mova m%4, m%2
- punpckl%1 m%2, m%3
punpckh%1 m%4, m%3
+ punpckl%1 m%2, m%3
%else
punpckh%1 m%4, m%2, m%3
punpckl%1 m%2, m%3
--
1.8.0.msysgit.0
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel