Currently there is a wild mix of 3dn2/3dnow2/3dnowext. Switching to "3dnowext", which is the actual name of the CPU flag unifies this. --- libavcodec/x86/Makefile | 4 +- libavcodec/x86/dsputil_mmx.c | 14 ++++++------ libavcodec/x86/fft.c | 6 ++-- libavcodec/x86/fft.h | 6 ++-- libavcodec/x86/{fft_3dn.c => fft_3dnow.c} | 2 +- libavcodec/x86/{fft_3dn2.c => fft_3dnowext.c} | 26 ++++++++++++------------ libavcodec/x86/fft_mmx.asm | 8 +++--- libavcodec/x86/fmtconvert.asm | 6 ++-- libavcodec/x86/fmtconvert_mmx.c | 10 +++++--- libavutil/x86/x86inc.asm | 2 +- 10 files changed, 43 insertions(+), 41 deletions(-) rename libavcodec/x86/{fft_3dn.c => fft_3dnow.c} (97%) rename libavcodec/x86/{fft_3dn2.c => fft_3dnowext.c} (85%)
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 1d26357..8cd7bb0 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -38,8 +38,8 @@ YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o YASM-OBJS-$(CONFIG_DCT) += x86/dct32_sse.o YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_yasm.o -YASM-OBJS-FFT-$(HAVE_AMD3DNOW) += x86/fft_3dn.o -YASM-OBJS-FFT-$(HAVE_AMD3DNOWEXT) += x86/fft_3dn2.o +YASM-OBJS-FFT-$(HAVE_AMD3DNOW) += x86/fft_3dnow.o +YASM-OBJS-FFT-$(HAVE_AMD3DNOWEXT) += x86/fft_3dnowext.o YASM-OBJS-$(CONFIG_FFT) += x86/fft_mmx.o \ $(YASM-OBJS-FFT-yes) YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o \ diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index afbb531..2663c39 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2387,9 +2387,9 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], } #if HAVE_6REGS -static void vector_fmul_window_3dnow2(float *dst, const float *src0, - const float *src1, const float *win, - int len) +static void vector_fmul_window_3dnowext(float *dst, const float *src0, + const float *src1, const float *win, + int len) { x86_reg i = -len * 4; x86_reg j = len * 4 - 8; @@ -2838,11 +2838,11 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, #endif } -static void dsputil_init_3dnow2(DSPContext *c, AVCodecContext *avctx, - int mm_flags) +static void dsputil_init_3dnowext(DSPContext *c, AVCodecContext *avctx, + int mm_flags) { #if HAVE_6REGS && HAVE_INLINE_ASM - c->vector_fmul_window = vector_fmul_window_3dnow2; + c->vector_fmul_window = vector_fmul_window_3dnowext; #endif } @@ -3093,7 +3093,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) dsputil_init_3dnow(c, avctx, mm_flags); if (mm_flags & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) - dsputil_init_3dnow2(c, avctx, mm_flags); + dsputil_init_3dnowext(c, avctx, mm_flags); if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) dsputil_init_sse(c, avctx, mm_flags); diff --git a/libavcodec/x86/fft.c b/libavcodec/x86/fft.c index 6349c23..ae7a08e 100644 --- a/libavcodec/x86/fft.c +++ b/libavcodec/x86/fft.c @@ -33,9 +33,9 @@ av_cold void ff_fft_init_mmx(FFTContext *s) } if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) { /* 3DNowEx for K7 */ - s->imdct_calc = ff_imdct_calc_3dnow2; - s->imdct_half = ff_imdct_half_3dnow2; - s->fft_calc = ff_fft_calc_3dnow2; + s->imdct_calc = ff_imdct_calc_3dnowext; + s->imdct_half = ff_imdct_half_3dnowext; + s->fft_calc = ff_fft_calc_3dnowext; } if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) { /* SSE for P3/P4/K8 */ diff --git a/libavcodec/x86/fft.h b/libavcodec/x86/fft.h index 1cefe7a..6e80b95 100644 --- a/libavcodec/x86/fft.h +++ b/libavcodec/x86/fft.h @@ -25,12 +25,12 @@ void ff_fft_permute_sse(FFTContext *s, FFTComplex *z); void ff_fft_calc_avx(FFTContext *s, FFTComplex *z); void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); void ff_fft_calc_3dnow(FFTContext *s, FFTComplex *z); -void ff_fft_calc_3dnow2(FFTContext *s, FFTComplex *z); +void ff_fft_calc_3dnowext(FFTContext *s, FFTComplex *z); void ff_imdct_calc_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_calc_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_half_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input); +void ff_imdct_calc_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input); +void ff_imdct_half_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input); diff --git a/libavcodec/x86/fft_3dn.c b/libavcodec/x86/fft_3dnow.c similarity index 97% rename from libavcodec/x86/fft_3dn.c rename to libavcodec/x86/fft_3dnow.c index 5a4d3ad..8778260 100644 --- a/libavcodec/x86/fft_3dn.c +++ b/libavcodec/x86/fft_3dnow.c @@ -20,4 +20,4 @@ */ #define EMULATE_3DNOWEXT -#include "fft_3dn2.c" +#include "fft_3dnowext.c" diff --git a/libavcodec/x86/fft_3dn2.c b/libavcodec/x86/fft_3dnowext.c similarity index 85% rename from libavcodec/x86/fft_3dn2.c rename to libavcodec/x86/fft_3dnowext.c index e684cc7..47d912d 100644 --- a/libavcodec/x86/fft_3dn2.c +++ b/libavcodec/x86/fft_3dnowext.c @@ -30,30 +30,30 @@ DECLARE_ALIGNED(8, static const unsigned int, m1m1)[2] = { 1U<<31, 1U<<31 }; "movq "#s","#d"\n"\ "psrlq $32,"#d"\n"\ "punpckldq "#s","#d"\n" -#define ff_fft_calc_3dnow2 ff_fft_calc_3dnow -#define ff_fft_dispatch_3dnow2 ff_fft_dispatch_3dnow -#define ff_fft_dispatch_interleave_3dnow2 ff_fft_dispatch_interleave_3dnow -#define ff_imdct_calc_3dnow2 ff_imdct_calc_3dnow -#define ff_imdct_half_3dnow2 ff_imdct_half_3dnow +#define ff_fft_calc_3dnowext ff_fft_calc_3dnow +#define ff_fft_dispatch_3dnowext ff_fft_dispatch_3dnow +#define ff_fft_dispatch_interleave_3dnowext ff_fft_dispatch_interleave_3dnow +#define ff_imdct_calc_3dnowext ff_imdct_calc_3dnow +#define ff_imdct_half_3dnowext ff_imdct_half_3dnow #else #define PSWAPD(s,d) "pswapd "#s","#d"\n" #endif -void ff_fft_dispatch_3dnow2(FFTComplex *z, int nbits); -void ff_fft_dispatch_interleave_3dnow2(FFTComplex *z, int nbits); +void ff_fft_dispatch_3dnowext(FFTComplex *z, int nbits); +void ff_fft_dispatch_interleave_3dnowext(FFTComplex *z, int nbits); -void ff_fft_calc_3dnow2(FFTContext *s, FFTComplex *z) +void ff_fft_calc_3dnowext(FFTContext *s, FFTComplex *z) { int n = 1<<s->nbits; int i; - ff_fft_dispatch_interleave_3dnow2(z, s->nbits); + ff_fft_dispatch_interleave_3dnowext(z, s->nbits); __asm__ volatile("femms"); if(n <= 8) for(i=0; i<n; i+=2) FFSWAP(FFTSample, z[i].im, z[i+1].re); } -void ff_imdct_half_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input) +void ff_imdct_half_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input) { x86_reg j, k; long n = s->mdct_size; @@ -101,7 +101,7 @@ void ff_imdct_half_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *inp ); } - ff_fft_dispatch_3dnow2(z, s->nbits); + ff_fft_dispatch_3dnowext(z, s->nbits); #define CMUL(j,mm0,mm1)\ "movq (%2,"#j",2), %%mm6 \n"\ @@ -144,13 +144,13 @@ void ff_imdct_half_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *inp __asm__ volatile("femms"); } -void ff_imdct_calc_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input) +void ff_imdct_calc_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input) { x86_reg j, k; long n = s->mdct_size; long n4 = n >> 2; - ff_imdct_half_3dnow2(s, output+n4, input); + ff_imdct_half_3dnowext(s, output+n4, input); j = -n; k = n-8; diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm index 5c6583b..d39806d 100644 --- a/libavcodec/x86/fft_mmx.asm +++ b/libavcodec/x86/fft_mmx.asm @@ -498,7 +498,7 @@ fft8 %+ SUFFIX: ret %endmacro -INIT_MMX 3dnow2 +INIT_MMX 3dnowext FFT48_3DN %macro pswapd 2 @@ -695,8 +695,8 @@ INIT_MMX 3dnow %define unpckhps punpckhdq DECL_PASS pass_3dnow, PASS_SMALL 1, [wq], [wq+o1q] DECL_PASS pass_interleave_3dnow, PASS_BIG 0 -%define pass_3dnow2 pass_3dnow -%define pass_interleave_3dnow2 pass_interleave_3dnow +%define pass_3dnowext pass_3dnow +%define pass_interleave_3dnowext pass_interleave_3dnow %ifdef PIC %define SECTION_REL - $$ @@ -763,7 +763,7 @@ DECL_FFT 5, _interleave INIT_MMX 3dnow DECL_FFT 4 DECL_FFT 4, _interleave -INIT_MMX 3dnow2 +INIT_MMX 3dnowext DECL_FFT 4 DECL_FFT 4, _interleave diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm index 4916e7a..0fd14fe 100644 --- a/libavcodec/x86/fmtconvert.asm +++ b/libavcodec/x86/fmtconvert.asm @@ -249,7 +249,7 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2 %macro PSWAPD_SSE 2 pshufw %1, %2, 0x4e %endmacro -%macro PSWAPD_3DN1 2 +%macro PSWAPD_3DNOW 2 movq %1, %2 psrlq %1, 32 punpckldq %1, %2 @@ -306,10 +306,10 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4, %define pswapd PSWAPD_SSE FLOAT_TO_INT16_INTERLEAVE6 sse %define cvtps2pi pf2id -%define pswapd PSWAPD_3DN1 +%define pswapd PSWAPD_3DNOW FLOAT_TO_INT16_INTERLEAVE6 3dnow %undef pswapd -FLOAT_TO_INT16_INTERLEAVE6 3dn2 +FLOAT_TO_INT16_INTERLEAVE6 3dnowext %undef cvtps2pi ;----------------------------------------------------------------------------- diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c index aaf634d..fbdc526 100644 --- a/libavcodec/x86/fmtconvert_mmx.c +++ b/libavcodec/x86/fmtconvert_mmx.c @@ -46,7 +46,7 @@ void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long l void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); -void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); +void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len); #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse @@ -74,9 +74,11 @@ FLOAT_TO_INT16_INTERLEAVE(3dnow) FLOAT_TO_INT16_INTERLEAVE(sse) FLOAT_TO_INT16_INTERLEAVE(sse2) -static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){ +static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src, + long len, int channels) +{ if(channels==6) - ff_float_to_int16_interleave6_3dn2(dst, src, len); + ff_float_to_int16_interleave6_3dnowext(dst, src, len); else float_to_int16_interleave_3dnow(dst, src, len, channels); } @@ -126,7 +128,7 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) } if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) { if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ - c->float_to_int16_interleave = float_to_int16_interleave_3dn2; + c->float_to_int16_interleave = float_to_int16_interleave_3dnowext; } } if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) { diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index 7a75951..03e6c07 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -557,7 +557,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %assign cpuflags_mmx (1<<0) %assign cpuflags_mmx2 (1<<1) | cpuflags_mmx %assign cpuflags_3dnow (1<<2) | cpuflags_mmx -%assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow +%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow %assign cpuflags_sse (1<<4) | cpuflags_mmx2 %assign cpuflags_sse2 (1<<5) | cpuflags_sse %assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 -- 1.7.2.5 _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel