On Sun, 5 Jun 2011, Justin Ruggles wrote:
On 06/05/2011 03:48 PM, Loren Merritt wrote:

Can you clip floats instead? sse1 has minps where it doesn't have pminsd.
Alternately, can you use minps in the implementation of clip_int32?

Are you also suggesting to convert/clip/convert for the int32 version?
I didn't really consider that. Do you think it would be faster than the
compare/mask method?

Yes.

I was also going to suggest the fact that floats have the same ordering properties as sign/magnitude ints, so you don't necessarily have to even convert them. But denormals are slow, so that's a bad idea.

--Loren Merritt
commit f4c891fd9f2e48ea14be6b771e7dbd7417a35b3e
Author: Loren Merritt <[email protected]>
Date:   2011-06-05 21:23:51 +0000

    cosmetics

diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index d00a2b2..b1d4674 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -1150,41 +1150,20 @@ VECTOR_CLIP_INT32 sse41
 ; GCC generates similar but slower code. For some reason it wants to use
 ; branching for the max value clipping instead of cmovg.
 
-%macro CLIPD_CMOV 3 ;  src/dst, min, max
-    cmp    %1, %3
-    cmovg  %1, %3
-    cmp    %1, %2
-    cmovl  %1, %2
-%endmacro
-
 cglobal vector_clip_int32_cmov, 5,6,0, dst, src, min, max, len, tmp
 .loop:
-    mov         tmpd, [srcq]
-    CLIPD_CMOV  tmpd, mind, maxd
-    mov       [dstq], tmpd
-    mov         tmpd, [srcq+4]
-    CLIPD_CMOV  tmpd, mind, maxd
-    mov     [dstq+4], tmpd
-    mov         tmpd, [srcq+8]
-    CLIPD_CMOV  tmpd, mind, maxd
-    mov     [dstq+8], tmpd
-    mov         tmpd, [srcq+12]
-    CLIPD_CMOV  tmpd, mind, maxd
-    mov    [dstq+12], tmpd
-    mov         tmpd, [srcq+16]
-    CLIPD_CMOV  tmpd, mind, maxd
-    mov    [dstq+16], tmpd
-    mov         tmpd, [srcq+20]
-    CLIPD_CMOV  tmpd, mind, maxd
-    mov    [dstq+20], tmpd
-    mov         tmpd, [srcq+24]
-    CLIPD_CMOV  tmpd, mind, maxd
-    mov    [dstq+24], tmpd
-    mov         tmpd, [srcq+28]
-    CLIPD_CMOV  tmpd, mind, maxd
-    mov    [dstq+28], tmpd
-    add         srcq, 32
-    add         dstq, 32
-    sub         lenq, 8
+%assign i 0
+%rep 8
+    mov     tmpd, [srcq+i]
+    cmp     tmpd, maxd
+    cmovg   tmpd, maxd
+    cmp     tmpd, mind
+    cmovl   tmpd, mind
+    mov [dstq+i], tmpd
+%assign i i+4
+%endrep
+    add     srcq, 32
+    add     dstq, 32
+    sub     lenq, 8
     ja .loop
     REP_RET
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to