Hi,

On 06/05/2011 05:29 PM, Loren Merritt wrote:

> On Sun, 5 Jun 2011, Justin Ruggles wrote:
>> > On 06/05/2011 03:48 PM, Loren Merritt wrote:
>> >
>>> >> Can you clip floats instead? sse1 has minps where it doesn't have pminsd.
>>> >> Alternately, can you use minps in the implementation of clip_int32?
>> >
>> > Are you also suggesting to convert/clip/convert for the int32 version?
>> > I didn't really consider that. Do you think it would be faster than the
>> > compare/mask method?
> Yes.
> 
> I was also going to suggest the fact that floats have the same ordering 
> properties as sign/magnitude ints, so you don't necessarily have to even 
> convert them. But denormals are slow, so that's a bad idea.

ok, I'll give it a try.

> commit f4c891fd9f2e48ea14be6b771e7dbd7417a35b3e
> Author: Loren Merritt <[email protected]>
> Date:   2011-06-05 21:23:51 +0000
> 
>     cosmetics
> 
> diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
> index d00a2b2..b1d4674 100644
> --- a/libavcodec/x86/dsputil_yasm.asm
> +++ b/libavcodec/x86/dsputil_yasm.asm
> @@ -1150,41 +1150,20 @@ VECTOR_CLIP_INT32 sse41
>  ; GCC generates similar but slower code. For some reason it wants to use
>  ; branching for the max value clipping instead of cmovg.
>  
> -%macro CLIPD_CMOV 3 ;  src/dst, min, max
> -    cmp    %1, %3
> -    cmovg  %1, %3
> -    cmp    %1, %2
> -    cmovl  %1, %2
> -%endmacro
> -
>  cglobal vector_clip_int32_cmov, 5,6,0, dst, src, min, max, len, tmp
>  .loop:
> -    mov         tmpd, [srcq]
> -    CLIPD_CMOV  tmpd, mind, maxd
> -    mov       [dstq], tmpd
> -    mov         tmpd, [srcq+4]
> -    CLIPD_CMOV  tmpd, mind, maxd
> -    mov     [dstq+4], tmpd
> -    mov         tmpd, [srcq+8]
> -    CLIPD_CMOV  tmpd, mind, maxd
> -    mov     [dstq+8], tmpd
> -    mov         tmpd, [srcq+12]
> -    CLIPD_CMOV  tmpd, mind, maxd
> -    mov    [dstq+12], tmpd
> -    mov         tmpd, [srcq+16]
> -    CLIPD_CMOV  tmpd, mind, maxd
> -    mov    [dstq+16], tmpd
> -    mov         tmpd, [srcq+20]
> -    CLIPD_CMOV  tmpd, mind, maxd
> -    mov    [dstq+20], tmpd
> -    mov         tmpd, [srcq+24]
> -    CLIPD_CMOV  tmpd, mind, maxd
> -    mov    [dstq+24], tmpd
> -    mov         tmpd, [srcq+28]
> -    CLIPD_CMOV  tmpd, mind, maxd
> -    mov    [dstq+28], tmpd
> -    add         srcq, 32
> -    add         dstq, 32
> -    sub         lenq, 8
> +%assign i 0
> +%rep 8
> +    mov     tmpd, [srcq+i]
> +    cmp     tmpd, maxd
> +    cmovg   tmpd, maxd
> +    cmp     tmpd, mind
> +    cmovl   tmpd, mind
> +    mov [dstq+i], tmpd
> +%assign i i+4
> +%endrep
> +    add     srcq, 32
> +    add     dstq, 32
> +    sub     lenq, 8
>      ja .loop
>      REP_RET


ah, thanks. i forgot yasm macros can do cool things like that.

-Justin

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to