--- libavcodec/x86/h264_intrapred_10bit.asm | 307 ++++++++++++++++--------------- 1 files changed, 158 insertions(+), 149 deletions(-)
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm index 226583e..b98bf17 100644 --- a/libavcodec/x86/h264_intrapred_10bit.asm +++ b/libavcodec/x86/h264_intrapred_10bit.asm @@ -52,8 +52,8 @@ SECTION .text ;----------------------------------------------------------------------------- ; void pred4x4_down_right(pixel *src, const pixel *topright, int stride) ;----------------------------------------------------------------------------- -%macro PRED4x4_DR 1 -cglobal pred4x4_down_right_10_%1, 3,3 +%macro PRED4x4_DR 0 +cglobal pred4x4_down_right_10, 3, 3 sub r0, r2 lea r1, [r0+r2*2] movhps m1, [r1-8] @@ -78,21 +78,22 @@ cglobal pred4x4_down_right_10_%1, 3,3 RET %endmacro -INIT_XMM +INIT_XMM sse2 %define PALIGNR PALIGNR_MMX -PRED4x4_DR sse2 +PRED4x4_DR %define PALIGNR PALIGNR_SSSE3 -PRED4x4_DR ssse3 +INIT_XMM ssse3 +PRED4x4_DR %if HAVE_AVX -INIT_AVX -PRED4x4_DR avx +INIT_XMM avx +PRED4x4_DR %endif ;----------------------------------------------------------------------------- ; void pred4x4_vertical_right(pixel *src, const pixel *topright, int stride) ;----------------------------------------------------------------------------- -%macro PRED4x4_VR 1 -cglobal pred4x4_vertical_right_10_%1, 3,3,6 +%macro PRED4x4_VR 0 +cglobal pred4x4_vertical_right_10, 3, 3, 6 sub r0, r2 lea r1, [r0+r2*2] movq m5, [r0] ; ........t3t2t1t0 @@ -118,21 +119,22 @@ cglobal pred4x4_vertical_right_10_%1, 3,3,6 RET %endmacro -INIT_XMM +INIT_XMM sse2 %define PALIGNR PALIGNR_MMX -PRED4x4_VR sse2 +PRED4x4_VR %define PALIGNR PALIGNR_SSSE3 -PRED4x4_VR ssse3 +INIT_XMM ssse3 +PRED4x4_VR %if HAVE_AVX -INIT_AVX -PRED4x4_VR avx +INIT_XMM avx +PRED4x4_VR %endif ;----------------------------------------------------------------------------- ; void pred4x4_horizontal_down(pixel *src, const pixel *topright, int stride) ;----------------------------------------------------------------------------- -%macro PRED4x4_HD 1 -cglobal pred4x4_horizontal_down_10_%1, 3,3 +%macro PRED4x4_HD 0 +cglobal pred4x4_horizontal_down_10, 3, 3 sub r0, r2 lea r1, [r0+r2*2] movq m0, [r0-8] ; lt .. @@ -161,14 +163,15 @@ cglobal pred4x4_horizontal_down_10_%1, 3,3 RET %endmacro -INIT_XMM +INIT_XMM sse2 %define PALIGNR PALIGNR_MMX -PRED4x4_HD sse2 +PRED4x4_HD %define PALIGNR PALIGNR_SSSE3 -PRED4x4_HD ssse3 +INIT_XMM ssse3 +PRED4x4_HD %if HAVE_AVX -INIT_AVX -PRED4x4_HD avx +INIT_XMM avx +PRED4x4_HD %endif ;----------------------------------------------------------------------------- @@ -191,8 +194,8 @@ PRED4x4_HD avx HADDD %1, %2 %endmacro -INIT_MMX -cglobal pred4x4_dc_10_mmxext, 3,3 +INIT_MMX mmxext +cglobal pred4x4_dc_10, 3, 3 sub r0, r2 lea r1, [r0+r2*2] movq m2, [r0+r2*1-8] @@ -215,8 +218,8 @@ cglobal pred4x4_dc_10_mmxext, 3,3 ;----------------------------------------------------------------------------- ; void pred4x4_down_left(pixel *src, const pixel *topright, int stride) ;----------------------------------------------------------------------------- -%macro PRED4x4_DL 1 -cglobal pred4x4_down_left_10_%1, 3,3 +%macro PRED4x4_DL 0 +cglobal pred4x4_down_left_10, 3, 3 sub r0, r2 movq m0, [r0] movhps m0, [r1] @@ -235,18 +238,19 @@ cglobal pred4x4_down_left_10_%1, 3,3 RET %endmacro -INIT_XMM -PRED4x4_DL sse2 +INIT_XMM sse2 +PRED4x4_DL +INIT_XMM sse2 %if HAVE_AVX -INIT_AVX -PRED4x4_DL avx +INIT_XMM avx +PRED4x4_DL %endif ;----------------------------------------------------------------------------- ; void pred4x4_vertical_left(pixel *src, const pixel *topright, int stride) ;----------------------------------------------------------------------------- -%macro PRED4x4_VL 1 -cglobal pred4x4_vertical_left_10_%1, 3,3 +%macro PRED4x4_VL 0 +cglobal pred4x4_vertical_left_10, 3, 3 sub r0, r2 movu m1, [r0] movhps m1, [r1] @@ -264,18 +268,18 @@ cglobal pred4x4_vertical_left_10_%1, 3,3 RET %endmacro -INIT_XMM -PRED4x4_VL sse2 +INIT_XMM sse2 +PRED4x4_VL %if HAVE_AVX -INIT_AVX -PRED4x4_VL avx +INIT_XMM avx +PRED4x4_VL %endif ;----------------------------------------------------------------------------- ; void pred4x4_horizontal_up(pixel *src, const pixel *topright, int stride) ;----------------------------------------------------------------------------- -INIT_MMX -cglobal pred4x4_horizontal_up_10_mmxext, 3,3 +INIT_MMX mmxext +cglobal pred4x4_horizontal_up_10, 3, 3 sub r0, r2 lea r1, [r0+r2*2] movq m0, [r0+r2*1-8] @@ -308,8 +312,8 @@ cglobal pred4x4_horizontal_up_10_mmxext, 3,3 ;----------------------------------------------------------------------------- ; void pred8x8_vertical(pixel *src, int stride) ;----------------------------------------------------------------------------- -INIT_XMM -cglobal pred8x8_vertical_10_sse2, 2,2 +INIT_XMM sse2 +cglobal pred8x8_vertical_10, 2, 2 sub r0, r1 mova m0, [r0] %rep 3 @@ -324,8 +328,8 @@ cglobal pred8x8_vertical_10_sse2, 2,2 ;----------------------------------------------------------------------------- ; void pred8x8_horizontal(pixel *src, int stride) ;----------------------------------------------------------------------------- -INIT_XMM -cglobal pred8x8_horizontal_10_sse2, 2,3 +INIT_XMM sse2 +cglobal pred8x8_horizontal_10, 2, 3 mov r2d, 4 .loop: movq m0, [r0+r1*0-8] @@ -354,8 +358,8 @@ cglobal pred8x8_horizontal_10_sse2, 2,3 %endif %endmacro -%macro PRED8x8_DC 2 -cglobal pred8x8_dc_10_%1, 2,6 +%macro PRED8x8_DC 1 +cglobal pred8x8_dc_10, 2, 6 sub r0, r1 pxor m4, m4 movq m0, [r0+0] @@ -371,7 +375,7 @@ cglobal pred8x8_dc_10_%1, 2,6 paddw m1, m3 punpcklwd m0, m1 %endif - %2 m2, m0, 00001110b + %1 m2, m0, 00001110b paddw m0, m2 lea r5, [r1*3] @@ -396,8 +400,8 @@ cglobal pred8x8_dc_10_%1, 2,6 punpcklwd m2, m3 punpckldq m0, m2 ; s0, s1, s2, s3 - %2 m3, m0, 11110110b ; s2, s1, s3, s3 - %2 m0, m0, 01110100b ; s0, s1, s3, s1 + %1 m3, m0, 11110110b ; s2, s1, s3, s3 + %1 m0, m0, 01110100b ; s0, s1, s3, s1 paddw m0, m3 psrlw m0, 2 pavgw m0, m4 ; s0+s2, s1, s3, s1+s3 @@ -423,16 +427,16 @@ cglobal pred8x8_dc_10_%1, 2,6 RET %endmacro -INIT_MMX -PRED8x8_DC mmxext, pshufw -INIT_XMM -PRED8x8_DC sse2 , pshuflw +INIT_MMX mmxext +PRED8x8_DC pshufw +INIT_XMM sse2 +PRED8x8_DC pshuflw ;----------------------------------------------------------------------------- ; void pred8x8_top_dc(pixel *src, int stride) ;----------------------------------------------------------------------------- -INIT_XMM -cglobal pred8x8_top_dc_10_sse2, 2,4 +INIT_XMM sse2 +cglobal pred8x8_top_dc_10, 2, 4 sub r0, r1 mova m0, [r0] pshuflw m1, m0, 0x4e @@ -458,8 +462,8 @@ cglobal pred8x8_top_dc_10_sse2, 2,4 ;----------------------------------------------------------------------------- ; void pred8x8_plane(pixel *src, int stride) ;----------------------------------------------------------------------------- -INIT_XMM -cglobal pred8x8_plane_10_sse2, 2,7,7 +INIT_XMM sse2 +cglobal pred8x8_plane_10, 2, 7, 7 sub r0, r1 lea r2, [r1*3] lea r3, [r0+r1*4] @@ -521,8 +525,8 @@ cglobal pred8x8_plane_10_sse2, 2,7,7 ;----------------------------------------------------------------------------- ; void pred8x8l_128_dc(pixel *src, int has_topleft, int has_topright, int stride) ;----------------------------------------------------------------------------- -%macro PRED8x8L_128_DC 1 -cglobal pred8x8l_128_dc_10_%1, 4,4 +%macro PRED8x8L_128_DC 0 +cglobal pred8x8l_128_dc_10, 4, 4 mova m0, [pw_512] ; (1<<(BIT_DEPTH-1)) lea r1, [r3*3] lea r2, [r0+r3*4] @@ -537,16 +541,16 @@ cglobal pred8x8l_128_dc_10_%1, 4,4 RET %endmacro -INIT_MMX -PRED8x8L_128_DC mmxext -INIT_XMM -PRED8x8L_128_DC sse2 +INIT_MMX mmxext +PRED8x8L_128_DC +INIT_XMM sse2 +PRED8x8L_128_DC ;----------------------------------------------------------------------------- ; void pred8x8l_top_dc(pixel *src, int has_topleft, int has_topright, int stride) ;----------------------------------------------------------------------------- -%macro PRED8x8L_TOP_DC 1 -cglobal pred8x8l_top_dc_10_%1, 4,4,6 +%macro PRED8x8L_TOP_DC 0 +cglobal pred8x8l_top_dc_10, 4, 4, 6 sub r0, r3 mova m0, [r0] shr r1d, 14 @@ -574,19 +578,19 @@ cglobal pred8x8l_top_dc_10_%1, 4,4,6 RET %endmacro -INIT_XMM -PRED8x8L_TOP_DC sse2 +INIT_XMM sse2 +PRED8x8L_TOP_DC %if HAVE_AVX -INIT_AVX -PRED8x8L_TOP_DC avx +INIT_XMM avx +PRED8x8L_TOP_DC %endif ;----------------------------------------------------------------------------- ;void pred8x8l_dc(pixel *src, int has_topleft, int has_topright, int stride) ;----------------------------------------------------------------------------- ;TODO: see if scalar is faster -%macro PRED8x8L_DC 1 -cglobal pred8x8l_dc_10_%1, 4,6,6 +%macro PRED8x8L_DC 0 +cglobal pred8x8l_dc_10, 4, 6, 6 sub r0, r3 lea r4, [r0+r3*4] lea r5, [r3*3] @@ -633,18 +637,18 @@ cglobal pred8x8l_dc_10_%1, 4,6,6 RET %endmacro -INIT_XMM -PRED8x8L_DC sse2 +INIT_XMM sse2 +PRED8x8L_DC %if HAVE_AVX -INIT_AVX -PRED8x8L_DC avx +INIT_XMM avx +PRED8x8L_DC %endif ;----------------------------------------------------------------------------- ; void pred8x8l_vertical(pixel *src, int has_topleft, int has_topright, int stride) ;----------------------------------------------------------------------------- -%macro PRED8x8L_VERTICAL 1 -cglobal pred8x8l_vertical_10_%1, 4,4,6 +%macro PRED8x8L_VERTICAL 0 +cglobal pred8x8l_vertical_10, 4, 4, 6 sub r0, r3 mova m0, [r0] shr r1d, 14 @@ -668,18 +672,18 @@ cglobal pred8x8l_vertical_10_%1, 4,4,6 RET %endmacro -INIT_XMM -PRED8x8L_VERTICAL sse2 +INIT_XMM sse2 +PRED8x8L_VERTICAL %if HAVE_AVX -INIT_AVX -PRED8x8L_VERTICAL avx +INIT_XMM avx +PRED8x8L_VERTICAL %endif ;----------------------------------------------------------------------------- ; void pred8x8l_horizontal(uint8_t *src, int has_topleft, int has_topright, int stride) ;----------------------------------------------------------------------------- -%macro PRED8x8L_HORIZONTAL 1 -cglobal pred8x8l_horizontal_10_%1, 4,4,5 +%macro PRED8x8L_HORIZONTAL 0 +cglobal pred8x8l_horizontal_10, 4, 4, 5 mova m0, [r0-16] shr r1d, 14 dec r1 @@ -722,21 +726,22 @@ cglobal pred8x8l_horizontal_10_%1, 4,4,5 RET %endmacro -INIT_XMM +INIT_XMM sse2 %define PALIGNR PALIGNR_MMX -PRED8x8L_HORIZONTAL sse2 +PRED8x8L_HORIZONTAL +INIT_XMM ssse3 %define PALIGNR PALIGNR_SSSE3 -PRED8x8L_HORIZONTAL ssse3 +PRED8x8L_HORIZONTAL %if HAVE_AVX -INIT_AVX -PRED8x8L_HORIZONTAL avx +INIT_XMM avx +PRED8x8L_HORIZONTAL %endif ;----------------------------------------------------------------------------- ;void pred8x8l_down_left(pixel *src, int has_topleft, int has_topright, int stride) ;----------------------------------------------------------------------------- -%macro PRED8x8L_DOWN_LEFT 1 -cglobal pred8x8l_down_left_10_%1, 4,4,7 +%macro PRED8x8L_DOWN_LEFT 0 +cglobal pred8x8l_down_left_10, 4, 4, 7 sub r0, r3 mova m3, [r0] shr r1d, 14 @@ -791,23 +796,24 @@ cglobal pred8x8l_down_left_10_%1, 4,4,7 jmp .do_topright %endmacro -INIT_XMM +INIT_XMM sse2 %define PALIGNR PALIGNR_MMX -PRED8x8L_DOWN_LEFT sse2 +PRED8x8L_DOWN_LEFT +INIT_XMM ssse3 %define PALIGNR PALIGNR_SSSE3 -PRED8x8L_DOWN_LEFT ssse3 +PRED8x8L_DOWN_LEFT %if HAVE_AVX -INIT_AVX -PRED8x8L_DOWN_LEFT avx +INIT_XMM avx +PRED8x8L_DOWN_LEFT %endif ;----------------------------------------------------------------------------- ;void pred8x8l_down_right(pixel *src, int has_topleft, int has_topright, int stride) ;----------------------------------------------------------------------------- -%macro PRED8x8L_DOWN_RIGHT 1 +%macro PRED8x8L_DOWN_RIGHT 0 ; standard forbids this when has_topleft is false ; no need to check -cglobal pred8x8l_down_right_10_%1, 4,5,8 +cglobal pred8x8l_down_right_10, 4, 5, 8 sub r0, r3 lea r4, [r0+r3*4] lea r1, [r3*3] @@ -866,22 +872,23 @@ cglobal pred8x8l_down_right_10_%1, 4,5,8 RET %endmacro -INIT_XMM +INIT_XMM sse2 %define PALIGNR PALIGNR_MMX -PRED8x8L_DOWN_RIGHT sse2 +PRED8x8L_DOWN_RIGHT +INIT_XMM ssse3 %define PALIGNR PALIGNR_SSSE3 -PRED8x8L_DOWN_RIGHT ssse3 +PRED8x8L_DOWN_RIGHT %if HAVE_AVX -INIT_AVX -PRED8x8L_DOWN_RIGHT avx +INIT_XMM avx +PRED8x8L_DOWN_RIGHT %endif ;----------------------------------------------------------------------------- ; void pred8x8l_vertical_right(pixel *src, int has_topleft, int has_topright, int stride) ;----------------------------------------------------------------------------- -%macro PRED8x8L_VERTICAL_RIGHT 1 +%macro PRED8x8L_VERTICAL_RIGHT 0 ; likewise with 8x8l_down_right -cglobal pred8x8l_vertical_right_10_%1, 4,5,7 +cglobal pred8x8l_vertical_right_10, 4, 5, 7 sub r0, r3 lea r4, [r0+r3*4] lea r1, [r3*3] @@ -937,21 +944,22 @@ cglobal pred8x8l_vertical_right_10_%1, 4,5,7 RET %endmacro -INIT_XMM +INIT_XMM sse2 %define PALIGNR PALIGNR_MMX -PRED8x8L_VERTICAL_RIGHT sse2 +PRED8x8L_VERTICAL_RIGHT +INIT_XMM ssse3 %define PALIGNR PALIGNR_SSSE3 -PRED8x8L_VERTICAL_RIGHT ssse3 +PRED8x8L_VERTICAL_RIGHT %if HAVE_AVX -INIT_AVX -PRED8x8L_VERTICAL_RIGHT avx +INIT_XMM avx +PRED8x8L_VERTICAL_RIGHT %endif ;----------------------------------------------------------------------------- ; void pred8x8l_horizontal_up(pixel *src, int has_topleft, int has_topright, int stride) ;----------------------------------------------------------------------------- -%macro PRED8x8L_HORIZONTAL_UP 1 -cglobal pred8x8l_horizontal_up_10_%1, 4,4,6 +%macro PRED8x8L_HORIZONTAL_UP 0 +cglobal pred8x8l_horizontal_up_10, 4, 4, 6 mova m0, [r0+r3*0-16] punpckhwd m0, [r0+r3*1-16] shr r1d, 14 @@ -999,14 +1007,15 @@ cglobal pred8x8l_horizontal_up_10_%1, 4,4,6 RET %endmacro -INIT_XMM +INIT_XMM sse2 %define PALIGNR PALIGNR_MMX -PRED8x8L_HORIZONTAL_UP sse2 +PRED8x8L_HORIZONTAL_UP +INIT_XMM ssse3 %define PALIGNR PALIGNR_SSSE3 -PRED8x8L_HORIZONTAL_UP ssse3 +PRED8x8L_HORIZONTAL_UP %if HAVE_AVX -INIT_AVX -PRED8x8L_HORIZONTAL_UP avx +INIT_XMM avx +PRED8x8L_HORIZONTAL_UP %endif @@ -1022,8 +1031,8 @@ PRED8x8L_HORIZONTAL_UP avx %endif %endmacro -%macro PRED16x16_VERTICAL 1 -cglobal pred16x16_vertical_10_%1, 2,3 +%macro PRED16x16_VERTICAL 0 +cglobal pred16x16_vertical_10, 2, 3 sub r0, r1 mov r2d, 8 mova m0, [r0+ 0] @@ -1041,16 +1050,16 @@ cglobal pred16x16_vertical_10_%1, 2,3 REP_RET %endmacro -INIT_MMX -PRED16x16_VERTICAL mmxext -INIT_XMM -PRED16x16_VERTICAL sse2 +INIT_MMX mmxext +PRED16x16_VERTICAL +INIT_XMM sse2 +PRED16x16_VERTICAL ;----------------------------------------------------------------------------- ; void pred16x16_horizontal(pixel *src, int stride) ;----------------------------------------------------------------------------- -%macro PRED16x16_HORIZONTAL 1 -cglobal pred16x16_horizontal_10_%1, 2,3 +%macro PRED16x16_HORIZONTAL 0 +cglobal pred16x16_horizontal_10, 2, 3 mov r2d, 8 .vloop: movd m0, [r0+r1*0-4] @@ -1065,16 +1074,16 @@ cglobal pred16x16_horizontal_10_%1, 2,3 REP_RET %endmacro -INIT_MMX -PRED16x16_HORIZONTAL mmxext -INIT_XMM -PRED16x16_HORIZONTAL sse2 +INIT_MMX mmxext +PRED16x16_HORIZONTAL +INIT_XMM sse2 +PRED16x16_HORIZONTAL ;----------------------------------------------------------------------------- ; void pred16x16_dc(pixel *src, int stride) ;----------------------------------------------------------------------------- -%macro PRED16x16_DC 1 -cglobal pred16x16_dc_10_%1, 2,6 +%macro PRED16x16_DC 0 +cglobal pred16x16_dc_10, 2, 6 mov r5, r0 sub r0, r1 mova m0, [r0+0] @@ -1111,16 +1120,16 @@ cglobal pred16x16_dc_10_%1, 2,6 REP_RET %endmacro -INIT_MMX -PRED16x16_DC mmxext -INIT_XMM -PRED16x16_DC sse2 +INIT_MMX mmxext +PRED16x16_DC +INIT_XMM sse2 +PRED16x16_DC ;----------------------------------------------------------------------------- ; void pred16x16_top_dc(pixel *src, int stride) ;----------------------------------------------------------------------------- -%macro PRED16x16_TOP_DC 1 -cglobal pred16x16_top_dc_10_%1, 2,3 +%macro PRED16x16_TOP_DC 0 +cglobal pred16x16_top_dc_10, 2, 3 sub r0, r1 mova m0, [r0+0] paddw m0, [r0+mmsize] @@ -1143,16 +1152,16 @@ cglobal pred16x16_top_dc_10_%1, 2,3 REP_RET %endmacro -INIT_MMX -PRED16x16_TOP_DC mmxext -INIT_XMM -PRED16x16_TOP_DC sse2 +INIT_MMX mmxext +PRED16x16_TOP_DC +INIT_XMM sse2 +PRED16x16_TOP_DC ;----------------------------------------------------------------------------- ; void pred16x16_left_dc(pixel *src, int stride) ;----------------------------------------------------------------------------- -%macro PRED16x16_LEFT_DC 1 -cglobal pred16x16_left_dc_10_%1, 2,6 +%macro PRED16x16_LEFT_DC 0 +cglobal pred16x16_left_dc_10, 2, 6 mov r5, r0 sub r0, 2 @@ -1180,16 +1189,16 @@ cglobal pred16x16_left_dc_10_%1, 2,6 REP_RET %endmacro -INIT_MMX -PRED16x16_LEFT_DC mmxext -INIT_XMM -PRED16x16_LEFT_DC sse2 +INIT_MMX mmxext +PRED16x16_LEFT_DC +INIT_XMM sse2 +PRED16x16_LEFT_DC ;----------------------------------------------------------------------------- ; void pred16x16_128_dc(pixel *src, int stride) ;----------------------------------------------------------------------------- -%macro PRED16x16_128_DC 1 -cglobal pred16x16_128_dc_10_%1, 2,3 +%macro PRED16x16_128_DC 0 +cglobal pred16x16_128_dc_10, 2, 3 mova m0, [pw_512] mov r2d, 8 .loop: @@ -1201,7 +1210,7 @@ cglobal pred16x16_128_dc_10_%1, 2,3 REP_RET %endmacro -INIT_MMX -PRED16x16_128_DC mmxext -INIT_XMM -PRED16x16_128_DC sse2 +INIT_MMX mmxext +PRED16x16_128_DC +INIT_XMM sse2 +PRED16x16_128_DC -- 1.7.2.5 _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel