From: Daniel Kang <daniel.d.k...@gmail.com> The only CPUs that have 3dnow and don't have mmxext are 12 years old.
Moreover, AMD has deprecated 3dnow. --- libavcodec/x86/dsputil_mmx.c | 142 +---------------------------- libavcodec/x86/dsputil_mmx_avg_template.c | 8 +- libavcodec/x86/h264_qpel_mmx.c | 4 - 3 files changed, 8 insertions(+), 146 deletions(-) diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 93f9db8..cfea906 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -198,12 +198,14 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; #define DEF(x) x ## _3dnow #define PAVGB "pavgusb" #define OP_AVG PAVGB +#define SKIP_FOR_3DNOW #include "dsputil_mmx_avg_template.c" #undef DEF #undef PAVGB #undef OP_AVG +#undef SKIP_FOR_3DNOW /***********************************/ /* MMX2 specific */ @@ -1052,73 +1054,6 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \ ); \ } \ \ -static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, \ - uint8_t *src, \ - int dstStride, \ - int srcStride, \ - int h) \ -{ \ - int i; \ - int16_t temp[16]; \ - /* quick HACK, XXX FIXME MUST be optimized */ \ - for (i = 0; i < h; i++) { \ - temp[ 0] = (src[ 0] + src[ 1]) * 20 - (src[ 0] + src[ 2]) * 6 + \ - (src[ 1] + src[ 3]) * 3 - (src[ 2] + src[ 4]); \ - temp[ 1] = (src[ 1] + src[ 2]) * 20 - (src[ 0] + src[ 3]) * 6 + \ - (src[ 0] + src[ 4]) * 3 - (src[ 1] + src[ 5]); \ - temp[ 2] = (src[ 2] + src[ 3]) * 20 - (src[ 1] + src[ 4]) * 6 + \ - (src[ 0] + src[ 5]) * 3 - (src[ 0] + src[ 6]); \ - temp[ 3] = (src[ 3] + src[ 4]) * 20 - (src[ 2] + src[ 5]) * 6 + \ - (src[ 1] + src[ 6]) * 3 - (src[ 0] + src[ 7]); \ - temp[ 4] = (src[ 4] + src[ 5]) * 20 - (src[ 3] + src[ 6]) * 6 + \ - (src[ 2] + src[ 7]) * 3 - (src[ 1] + src[ 8]); \ - temp[ 5] = (src[ 5] + src[ 6]) * 20 - (src[ 4] + src[ 7]) * 6 + \ - (src[ 3] + src[ 8]) * 3 - (src[ 2] + src[ 9]); \ - temp[ 6] = (src[ 6] + src[ 7]) * 20 - (src[ 5] + src[ 8]) * 6 + \ - (src[ 4] + src[ 9]) * 3 - (src[ 3] + src[10]); \ - temp[ 7] = (src[ 7] + src[ 8]) * 20 - (src[ 6] + src[ 9]) * 6 + \ - (src[ 5] + src[10]) * 3 - (src[ 4] + src[11]); \ - temp[ 8] = (src[ 8] + src[ 9]) * 20 - (src[ 7] + src[10]) * 6 + \ - (src[ 6] + src[11]) * 3 - (src[ 5] + src[12]); \ - temp[ 9] = (src[ 9] + src[10]) * 20 - (src[ 8] + src[11]) * 6 + \ - (src[ 7] + src[12]) * 3 - (src[ 6] + src[13]); \ - temp[10] = (src[10] + src[11]) * 20 - (src[ 9] + src[12]) * 6 + \ - (src[ 8] + src[13]) * 3 - (src[ 7] + src[14]); \ - temp[11] = (src[11] + src[12]) * 20 - (src[10] + src[13]) * 6 + \ - (src[ 9] + src[14]) * 3 - (src[ 8] + src[15]); \ - temp[12] = (src[12] + src[13]) * 20 - (src[11] + src[14]) * 6 + \ - (src[10] + src[15]) * 3 - (src[ 9] + src[16]); \ - temp[13] = (src[13] + src[14]) * 20 - (src[12] + src[15]) * 6 + \ - (src[11] + src[16]) * 3 - (src[10] + src[16]); \ - temp[14] = (src[14] + src[15]) * 20 - (src[13] + src[16]) * 6 + \ - (src[12] + src[16]) * 3 - (src[11] + src[15]); \ - temp[15] = (src[15] + src[16]) * 20 - (src[14] + src[16]) * 6 + \ - (src[13] + src[15]) * 3 - (src[12] + src[14]); \ - __asm__ volatile ( \ - "movq (%0), %%mm0 \n\t" \ - "movq 8(%0), %%mm1 \n\t" \ - "paddw %2, %%mm0 \n\t" \ - "paddw %2, %%mm1 \n\t" \ - "psraw $5, %%mm0 \n\t" \ - "psraw $5, %%mm1 \n\t" \ - "packuswb %%mm1, %%mm0 \n\t" \ - OP_3DNOW(%%mm0, (%1), %%mm1, q) \ - "movq 16(%0), %%mm0 \n\t" \ - "movq 24(%0), %%mm1 \n\t" \ - "paddw %2, %%mm0 \n\t" \ - "paddw %2, %%mm1 \n\t" \ - "psraw $5, %%mm0 \n\t" \ - "psraw $5, %%mm1 \n\t" \ - "packuswb %%mm1, %%mm0 \n\t" \ - OP_3DNOW(%%mm0, 8(%1), %%mm1, q) \ - :: "r"(temp), "r"(dst), "m"(ROUNDER) \ - : "memory" \ - ); \ - dst += dstStride; \ - src += srcStride; \ - } \ -} \ - \ static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, \ uint8_t *src, \ int dstStride, \ @@ -1187,49 +1122,6 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, \ /* "m"(ff_pw_20), "m"(ff_pw_3), */ "m"(ROUNDER) \ : "memory" \ ); \ -} \ - \ -static void OPNAME ## mpeg4_qpel8_h_lowpass_3dnow(uint8_t *dst, \ - uint8_t *src, \ - int dstStride, \ - int srcStride, \ - int h) \ -{ \ - int i; \ - int16_t temp[8]; \ - /* quick HACK, XXX FIXME MUST be optimized */ \ - for (i = 0; i < h; i++) { \ - temp[0] = (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + \ - (src[1] + src[3]) * 3 - (src[2] + src[4]); \ - temp[1] = (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + \ - (src[0] + src[4]) * 3 - (src[1] + src[5]); \ - temp[2] = (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + \ - (src[0] + src[5]) * 3 - (src[0] + src[6]); \ - temp[3] = (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + \ - (src[1] + src[6]) * 3 - (src[0] + src[7]); \ - temp[4] = (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + \ - (src[2] + src[7]) * 3 - (src[1] + src[8]); \ - temp[5] = (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + \ - (src[3] + src[8]) * 3 - (src[2] + src[8]); \ - temp[6] = (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + \ - (src[4] + src[8]) * 3 - (src[3] + src[7]); \ - temp[7] = (src[7] + src[8]) * 20 - (src[6] + src[8]) * 6 + \ - (src[5] + src[7]) * 3 - (src[4] + src[6]); \ - __asm__ volatile ( \ - "movq (%0), %%mm0 \n\t" \ - "movq 8(%0), %%mm1 \n\t" \ - "paddw %2, %%mm0 \n\t" \ - "paddw %2, %%mm1 \n\t" \ - "psraw $5, %%mm0 \n\t" \ - "psraw $5, %%mm1 \n\t" \ - "packuswb %%mm1, %%mm0 \n\t" \ - OP_3DNOW(%%mm0, (%1), %%mm1, q) \ - :: "r"(temp), "r"(dst), "m"(ROUNDER) \ - : "memory" \ - ); \ - dst += dstStride; \ - src += srcStride; \ - } \ } #define QPEL_OP(OPNAME, ROUNDER, RND, OP, MMX) \ @@ -1753,9 +1645,6 @@ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \ QPEL_BASE(put_, ff_pw_16, _, PUT_OP, PUT_OP) QPEL_BASE(avg_, ff_pw_16, _, AVG_MMX2_OP, AVG_3DNOW_OP) QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP) -QPEL_OP(put_, ff_pw_16, _, PUT_OP, 3dnow) -QPEL_OP(avg_, ff_pw_16, _, AVG_3DNOW_OP, 3dnow) -QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow) QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmx2) QPEL_OP(avg_, ff_pw_16, _, AVG_MMX2_OP, mmx2) QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2) @@ -1816,10 +1705,6 @@ QPEL_2TAP(put_, 16, mmx2) QPEL_2TAP(avg_, 16, mmx2) QPEL_2TAP(put_, 8, mmx2) QPEL_2TAP(avg_, 8, mmx2) -QPEL_2TAP(put_, 16, 3dnow) -QPEL_2TAP(avg_, 16, 3dnow) -QPEL_2TAP(put_, 8, 3dnow) -QPEL_2TAP(avg_, 8, 3dnow) void ff_put_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) { @@ -2770,29 +2655,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow; } - if (CONFIG_H264QPEL) { - SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow, ); - SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow, ); - SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, ); - SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow, ); - SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow, ); - SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow, ); - - if (!high_bit_depth) { - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow, ); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow, ); - SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow, ); - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow, ); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow, ); - SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow, ); - } - - SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow, ); - SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow, ); - SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, ); - SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, ); - } - c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; #if HAVE_7REGS diff --git a/libavcodec/x86/dsputil_mmx_avg_template.c b/libavcodec/x86/dsputil_mmx_avg_template.c index 8b116b7..b514746 100644 --- a/libavcodec/x86/dsputil_mmx_avg_template.c +++ b/libavcodec/x86/dsputil_mmx_avg_template.c @@ -55,6 +55,7 @@ static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_ :"%"REG_a, "memory"); } +#ifndef SKIP_FOR_3DNOW static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { __asm__ volatile( @@ -104,7 +105,7 @@ static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride) :"memory"); } - +#endif static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { @@ -226,6 +227,7 @@ static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src :"memory");*/ } +#ifndef SKIP_FOR_3DNOW static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { __asm__ volatile( @@ -276,7 +278,7 @@ static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride) :"memory"); } - +#endif static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { @@ -872,6 +874,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line :"%"REG_a, "memory"); } +#ifndef SKIP_FOR_3DNOW static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { do { @@ -896,6 +899,7 @@ static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_siz h -= 4; } while(h > 0); } +#endif //FIXME the following could be optimized too ... static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ diff --git a/libavcodec/x86/h264_qpel_mmx.c b/libavcodec/x86/h264_qpel_mmx.c index fc1635d..e84e5a5 100644 --- a/libavcodec/x86/h264_qpel_mmx.c +++ b/libavcodec/x86/h264_qpel_mmx.c @@ -1163,9 +1163,6 @@ QPEL(put_, 16,XMM, 16)\ QPEL(avg_, 8, XMM, 16)\ QPEL(avg_, 16,XMM, 16)\ -#define PAVGB "pavgusb" -QPEL_H264(put_, PUT_OP, 3dnow) -QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow) #undef PAVGB #define PAVGB "pavgb" QPEL_H264(put_, PUT_OP, mmx2) @@ -1184,7 +1181,6 @@ QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, ssse3) #endif #undef PAVGB -H264_MC_4816(3dnow) H264_MC_4816(mmx2) H264_MC_816(H264_MC_V, sse2) H264_MC_816(H264_MC_HV, sse2) -- 1.7.9.5 _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel