---
libavcodec/x86/dsputil_yasm.asm | 23 ++++++++++++-----------
1 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index 5244362..d42d24b 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -457,25 +457,26 @@ cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src,
w, left
; float scalarproduct_float_sse(const float *v1, const float *v2, int len)
-cglobal scalarproduct_float_sse, 3,3,2, v1, v2, offset
+INIT_XMM sse
+cglobal scalarproduct_float, 3,3,2, v1, v2, offset
neg offsetq
shl offsetq, 2
sub v1q, offsetq
sub v2q, offsetq
- xorps xmm0, xmm0
+ xorps m0, m0
.loop:
- movaps xmm1, [v1q+offsetq]
- mulps xmm1, [v2q+offsetq]
- addps xmm0, xmm1
+ movaps m1, [v1q+offsetq]
+ mulps m1, [v2q+offsetq]
+ addps m0, m1
add offsetq, 16
js .loop
- movhlps xmm1, xmm0
- addps xmm0, xmm1
- movss xmm1, xmm0
- shufps xmm0, xmm0, 1
- addss xmm0, xmm1
+ movhlps m1, m0
+ addps m0, m1
+ movss m1, m0
+ shufps m0, m0, 1
+ addss m0, m1
%ifndef ARCH_X86_64
- movd r0m, xmm0
+ movd r0m, m0
fld dword r0m
%endif
RET
--
1.7.1
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel