---
libavcodec/x86/fmtconvert.asm | 128 +++++++++++++++++++--------------------
libavcodec/x86/fmtconvert_mmx.c | 8 +-
2 files changed, 67 insertions(+), 69 deletions(-)
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index d621089..f69cd7c 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -27,11 +27,11 @@ SECTION_TEXT
;---------------------------------------------------------------------------------
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int
len);
;---------------------------------------------------------------------------------
-%macro INT32_TO_FLOAT_FMUL_SCALAR 2
+%macro INT32_TO_FLOAT_FMUL_SCALAR 0
%ifdef UNIX64
-cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len
+cglobal int32_to_float_fmul_scalar, 3,3,5, dst, src, len
%else
-cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
+cglobal int32_to_float_fmul_scalar, 4,4,5, dst, src, mul, len
%endif
%ifdef WIN64
SWAP 0, 2
@@ -44,7 +44,7 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul,
len
add dstq, lenq
neg lenq
.loop:
-%ifidn %1, sse2
+%if cpuflag(sse2)
cvtdq2ps m1, [srcq+lenq ]
cvtdq2ps m2, [srcq+lenq+16]
%else
@@ -64,27 +64,26 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src,
mul, len
REP_RET
%endmacro
-INIT_XMM
+INIT_XMM sse
%define SPLATD SPLATD_SSE
-%define movdqa movaps
-INT32_TO_FLOAT_FMUL_SCALAR sse, 5
-%undef movdqa
+INT32_TO_FLOAT_FMUL_SCALAR
+INIT_XMM sse2
%define SPLATD SPLATD_SSE2
-INT32_TO_FLOAT_FMUL_SCALAR sse2, 3
+INT32_TO_FLOAT_FMUL_SCALAR
%undef SPLATD
;------------------------------------------------------------------------------
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
;------------------------------------------------------------------------------
-%macro FLOAT_TO_INT16 2
-cglobal float_to_int16_%1, 3,3,%2, dst, src, len
+%macro FLOAT_TO_INT16 0
+cglobal float_to_int16, 3,3,2, dst, src, len
add lenq, lenq
lea srcq, [srcq+2*lenq]
add dstq, lenq
neg lenq
.loop:
-%ifidn %1, sse2
+%if cpuflag(sse2)
cvtps2dq m0, [srcq+2*lenq ]
cvtps2dq m1, [srcq+2*lenq+16]
packssdw m0, m1
@@ -101,26 +100,27 @@ cglobal float_to_int16_%1, 3,3,%2, dst, src, len
%endif
add lenq, 16
js .loop
-%ifnidn %1, sse2
+%if mmsize == 8
emms
%endif
REP_RET
%endmacro
-INIT_XMM
-FLOAT_TO_INT16 sse2, 2
-INIT_MMX
-FLOAT_TO_INT16 sse, 0
+INIT_XMM sse2
+FLOAT_TO_INT16
+INIT_MMX sse
+FLOAT_TO_INT16
+INIT_MMX 3dnow
%define cvtps2pi pf2id
-FLOAT_TO_INT16 3dnow, 0
+FLOAT_TO_INT16
%undef cvtps2pi
;-------------------------------------------------------------------------------
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long
len);
;-------------------------------------------------------------------------------
-%macro FLOAT_TO_INT16_INTERLEAVE2 1
-cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
+%macro FLOAT_TO_INT16_INTERLEAVE2 0
+cglobal float_to_int16_interleave2, 3,4,2, dst, src0, src1, len
lea lenq, [4*r2q]
mov src1q, [src0q+gprsize]
mov src0q, [src0q]
@@ -129,7 +129,7 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0,
src1, len
add src1q, lenq
neg lenq
.loop:
-%ifidn %1, sse2
+%if cpuflag(sse2)
cvtps2dq m0, [src0q+lenq]
cvtps2dq m1, [src1q+lenq]
packssdw m0, m1
@@ -151,21 +151,20 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0,
src1, len
%endif
add lenq, 16
js .loop
-%ifnidn %1, sse2
+%if mmsize == 8
emms
%endif
REP_RET
%endmacro
-INIT_MMX
+INIT_MMX 3dnow
%define cvtps2pi pf2id
-FLOAT_TO_INT16_INTERLEAVE2 3dnow
+FLOAT_TO_INT16_INTERLEAVE2
%undef cvtps2pi
-%define movdqa movaps
-FLOAT_TO_INT16_INTERLEAVE2 sse
-%undef movdqa
-INIT_XMM
-FLOAT_TO_INT16_INTERLEAVE2 sse2
+INIT_MMX sse
+FLOAT_TO_INT16_INTERLEAVE2
+INIT_XMM sse2
+FLOAT_TO_INT16_INTERLEAVE2
%macro PSWAPD_SSE 2
@@ -177,9 +176,9 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2
punpckldq %1, %2
%endmacro
-%macro FLOAT_TO_INT16_INTERLEAVE6 1
+%macro FLOAT_TO_INT16_INTERLEAVE6 0
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
-cglobal float_to_int16_interleave6_%1, 2,7,0, dst, src, src1, src2, src3,
src4, src5
+cglobal float_to_int16_interleave6, 2,7,0, dst, src, src1, src2, src3, src4,
src5
%ifdef ARCH_X86_64
%define lend r10d
mov lend, r2d
@@ -226,21 +225,24 @@ cglobal float_to_int16_interleave6_%1, 2,7,0, dst, src,
src1, src2, src3, src4,
RET
%endmacro ; FLOAT_TO_INT16_INTERLEAVE6
+INIT_MMX sse
%define pswapd PSWAPD_SSE
-FLOAT_TO_INT16_INTERLEAVE6 sse
+FLOAT_TO_INT16_INTERLEAVE6
+INIT_MMX 3dnow
%define cvtps2pi pf2id
%define pswapd PSWAPD_3DN1
-FLOAT_TO_INT16_INTERLEAVE6 3dnow
+FLOAT_TO_INT16_INTERLEAVE6
%undef pswapd
-FLOAT_TO_INT16_INTERLEAVE6 3dn2
+INIT_MMX 3dnow2
+FLOAT_TO_INT16_INTERLEAVE6
%undef cvtps2pi
;-----------------------------------------------------------------------------
; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
;-----------------------------------------------------------------------------
-%macro FLOAT_INTERLEAVE6 2
-cglobal float_interleave6_%1, 2,7,%2, dst, src, src1, src2, src3, src4, src5
+%macro FLOAT_INTERLEAVE6 0
+cglobal float_interleave6, 2,7,7, dst, src, src1, src2, src3, src4, src5
%ifdef ARCH_X86_64
%define lend r10d
mov lend, r2d
@@ -259,7 +261,7 @@ cglobal float_interleave6_%1, 2,7,%2, dst, src, src1, src2,
src3, src4, src5
sub src4q, srcq
sub src5q, srcq
.loop:
-%ifidn %1, sse
+%if cpuflag(sse)
movaps m0, [srcq]
movaps m1, [srcq+src1q]
movaps m2, [srcq+src2q]
@@ -308,62 +310,58 @@ cglobal float_interleave6_%1, 2,7,%2, dst, src, src1,
src2, src3, src4, src5
add dstq, mmsize*6
sub lend, mmsize/4
jg .loop
-%ifidn %1, mmx
+%if mmsize == 8
emms
%endif
REP_RET
%endmacro
-INIT_MMX
-FLOAT_INTERLEAVE6 mmx, 0
-INIT_XMM
-FLOAT_INTERLEAVE6 sse, 7
+INIT_MMX mmx
+FLOAT_INTERLEAVE6
+INIT_XMM sse
+FLOAT_INTERLEAVE6
;-----------------------------------------------------------------------------
; void ff_float_interleave2(float *dst, const float **src, unsigned int len);
;-----------------------------------------------------------------------------
-%macro FLOAT_INTERLEAVE2 2
-cglobal float_interleave2_%1, 3,4,%2, dst, src, len, src1
+%macro FLOAT_INTERLEAVE2 0
+cglobal float_interleave2, 3,4,5, dst, src, len, src1
mov src1q, [srcq+gprsize]
mov srcq, [srcq ]
sub src1q, srcq
.loop
- MOVPS m0, [srcq ]
- MOVPS m1, [srcq+src1q ]
- MOVPS m3, [srcq +mmsize]
- MOVPS m4, [srcq+src1q+mmsize]
+ mova m0, [srcq ]
+ mova m1, [srcq+src1q ]
+ mova m3, [srcq +mmsize]
+ mova m4, [srcq+src1q+mmsize]
- MOVPS m2, m0
- PUNPCKLDQ m0, m1
- PUNPCKHDQ m2, m1
+ PUNPCKHDQ m2, m0, m1
+ PUNPCKLDQ m0, m0, m1
- MOVPS m1, m3
- PUNPCKLDQ m3, m4
- PUNPCKHDQ m1, m4
+ PUNPCKHDQ m1, m3, m4
+ PUNPCKLDQ m3, m3, m4
- MOVPS [dstq ], m0
- MOVPS [dstq+1*mmsize], m2
- MOVPS [dstq+2*mmsize], m3
- MOVPS [dstq+3*mmsize], m1
+ mova [dstq ], m0
+ mova [dstq+1*mmsize], m2
+ mova [dstq+2*mmsize], m3
+ mova [dstq+3*mmsize], m1
add srcq, mmsize*2
add dstq, mmsize*4
sub lend, mmsize/2
jg .loop
-%ifidn %1, mmx
+%if mmsize == 8
emms
%endif
REP_RET
%endmacro
-INIT_MMX
-%define MOVPS movq
+INIT_MMX mmx
%define PUNPCKLDQ punpckldq
%define PUNPCKHDQ punpckhdq
-FLOAT_INTERLEAVE2 mmx, 0
-INIT_XMM
-%define MOVPS movaps
+FLOAT_INTERLEAVE2
+INIT_XMM sse
%define PUNPCKLDQ unpcklps
%define PUNPCKHDQ unpckhps
-FLOAT_INTERLEAVE2 sse, 5
+FLOAT_INTERLEAVE2
diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c
index 17079d3..78cca9c 100644
--- a/libavcodec/x86/fmtconvert_mmx.c
+++ b/libavcodec/x86/fmtconvert_mmx.c
@@ -41,7 +41,7 @@ void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const
float **src, long l
void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int
len);
void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int
len);
-void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int
len);
+void ff_float_to_int16_interleave6_3dnow2(int16_t *dst, const float **src, int
len);
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
@@ -72,9 +72,9 @@ FLOAT_TO_INT16_INTERLEAVE(3dnow)
FLOAT_TO_INT16_INTERLEAVE(sse)
FLOAT_TO_INT16_INTERLEAVE(sse2)
-static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src,
long len, int channels){
+static void float_to_int16_interleave_3dnow2(int16_t *dst, const float **src,
long len, int channels){
if(channels==6)
- ff_float_to_int16_interleave6_3dn2(dst, src, len);
+ ff_float_to_int16_interleave6_3dnow2(dst, src, len);
else
float_to_int16_interleave_3dnow(dst, src, len, channels);
}
@@ -124,7 +124,7 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c,
AVCodecContext *avctx)
}
if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) {
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
- c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
+ c->float_to_int16_interleave =
float_to_int16_interleave_3dnow2;
}
}
if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) {
--
1.7.1
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel