On Sun, 5 Jun 2011, Justin Ruggles wrote:
On 06/05/2011 03:48 PM, Loren Merritt wrote:
Can you clip floats instead? sse1 has minps where it doesn't have pminsd.
Alternately, can you use minps in the implementation of clip_int32?
Are you also suggesting to convert/clip/convert for the int32 version?
I didn't really consider that. Do you think it would be faster than the
compare/mask method?
Yes.
I was also going to suggest the fact that floats have the same ordering
properties as sign/magnitude ints, so you don't necessarily have to even
convert them. But denormals are slow, so that's a bad idea.
--Loren Merritt
commit f4c891fd9f2e48ea14be6b771e7dbd7417a35b3e
Author: Loren Merritt <[email protected]>
Date: 2011-06-05 21:23:51 +0000
cosmetics
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index d00a2b2..b1d4674 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -1150,41 +1150,20 @@ VECTOR_CLIP_INT32 sse41
; GCC generates similar but slower code. For some reason it wants to use
; branching for the max value clipping instead of cmovg.
-%macro CLIPD_CMOV 3 ; src/dst, min, max
- cmp %1, %3
- cmovg %1, %3
- cmp %1, %2
- cmovl %1, %2
-%endmacro
-
cglobal vector_clip_int32_cmov, 5,6,0, dst, src, min, max, len, tmp
.loop:
- mov tmpd, [srcq]
- CLIPD_CMOV tmpd, mind, maxd
- mov [dstq], tmpd
- mov tmpd, [srcq+4]
- CLIPD_CMOV tmpd, mind, maxd
- mov [dstq+4], tmpd
- mov tmpd, [srcq+8]
- CLIPD_CMOV tmpd, mind, maxd
- mov [dstq+8], tmpd
- mov tmpd, [srcq+12]
- CLIPD_CMOV tmpd, mind, maxd
- mov [dstq+12], tmpd
- mov tmpd, [srcq+16]
- CLIPD_CMOV tmpd, mind, maxd
- mov [dstq+16], tmpd
- mov tmpd, [srcq+20]
- CLIPD_CMOV tmpd, mind, maxd
- mov [dstq+20], tmpd
- mov tmpd, [srcq+24]
- CLIPD_CMOV tmpd, mind, maxd
- mov [dstq+24], tmpd
- mov tmpd, [srcq+28]
- CLIPD_CMOV tmpd, mind, maxd
- mov [dstq+28], tmpd
- add srcq, 32
- add dstq, 32
- sub lenq, 8
+%assign i 0
+%rep 8
+ mov tmpd, [srcq+i]
+ cmp tmpd, maxd
+ cmovg tmpd, maxd
+ cmp tmpd, mind
+ cmovl tmpd, mind
+ mov [dstq+i], tmpd
+%assign i i+4
+%endrep
+ add srcq, 32
+ add dstq, 32
+ sub lenq, 8
ja .loop
REP_RET
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel