Hi,

On Fri, Jul 27, 2012 at 5:04 PM, Diego Biurrun <di...@biurrun.de> wrote:
> On Fri, Jul 27, 2012 at 04:49:18PM -0700, Ronald S. Bultje wrote:
>> On Fri, Jul 27, 2012 at 4:45 PM, Diego Biurrun <di...@biurrun.de> wrote:
>> > On Fri, Jul 27, 2012 at 03:08:26PM -0700, Ronald S. Bultje wrote:
>> >>
>> >> --- a/libavcodec/x86/h264_deblock.asm
>> >> +++ b/libavcodec/x86/h264_deblock.asm
>> >> @@ -282,8 +282,8 @@ cextern pb_A1
>> >>  
>> >> ;-----------------------------------------------------------------------------
>> >>  ; void deblock_v_luma( uint8_t *pix, int stride, int alpha, int beta, 
>> >> int8_t *tc0 )
>> >>  
>> >> ;-----------------------------------------------------------------------------
>> >> -%macro DEBLOCK_LUMA 1
>> >> -cglobal deblock_v_luma_8_%1, 5,5,10
>> >> +%macro DEBLOCK_V_LUMA 0
>> >> +cglobal deblock_v_luma_8, 5,5,10
>> >>      movd    m8, [r4] ; tc0
>> >>      lea     r4, [r1*3]
>> >>      dec     r2d        ; alpha-1
>> >> @@ -323,12 +323,13 @@ cglobal deblock_v_luma_8_%1, 5,5,10
>> >>      mova    [r4+2*r1], m1
>> >>      mova    [r0], m2
>> >>      RET
>> >> +%endmacro
>> >>
>> >>  
>> >> ;-----------------------------------------------------------------------------
>> >>  ; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, 
>> >> int8_t *tc0 )
>> >>  
>> >> ;-----------------------------------------------------------------------------
>> >> -INIT_MMX
>> >> -cglobal deblock_h_luma_8_%1, 5,9
>> >> +%macro DEBLOCK_H_LUMA 0
>> >> +cglobal deblock_h_luma_8, 5,9
>> >>      movsxd r7,  r1d
>> >>      lea    r8,  [r7+r7*2]
>> >>      lea    r6,  [r0-4]
>> >> @@ -355,7 +356,7 @@ cglobal deblock_h_luma_8_%1, 5,9
>> >>  %if WIN64
>> >>      mov    [rsp+0x20], r4
>> >>  %endif
>> >> -    call   deblock_v_luma_8_%1
>> >> +    call   deblock_v_luma_8
>> >>
>> >>      ; transpose 16x4 -> original space  (only the middle 4 rows were 
>> >> changed by the filter)
>> >>      add    r6, 2
>> >> @@ -384,24 +385,29 @@ cglobal deblock_h_luma_8_%1, 5,9
>> >>      RET
>> >>  %endmacro
>> >>
>> >> -INIT_XMM
>> >> -DEBLOCK_LUMA sse2
>> >> -INIT_AVX
>> >> -DEBLOCK_LUMA avx
>> >> +INIT_XMM sse2
>> >> +DEBLOCK_V_LUMA
>> >> +INIT_MMX sse2
>> >> +DEBLOCK_H_LUMA
>> >> +
>> >> +INIT_XMM avx
>> >> +DEBLOCK_V_LUMA
>> >> +INIT_MMX avx
>> >> +DEBLOCK_H_LUMA
>> >
>> > I would suggest that you move the DEBLOCK_V_LUMA macro invocations
>> > directly below that macro.  This is what we do everywhere.  Not
>> > seeing the invocations directly below the definition is confusing.
>> > Same below for the parameterized variants of the macros.
>>
>> That actually has code cache implications.
>
> OK, patch fine with me then.  One last question: Why did you split the
> macros into H/V variants?

I didn't see Loren's INIT_MMX cpuname suggestion. I can revert that part back.

Ronald
_______________________________________________
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to