On Sat, 11 Jun 2016, James Almer wrote:

On 6/11/2016 5:32 PM, Martin Storsjö wrote:
---
 libavcodec/x86/h264_idct.asm       | 5 +++++
 libavcodec/x86/h264_idct_10bit.asm | 4 ++++
 2 files changed, 9 insertions(+)

diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index 313791a..9abed3c 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -82,6 +82,7 @@ SECTION .text
 INIT_MMX mmx
 ; void ff_h264_idct_add_8_mmx(uint8_t *dst, int16_t *block, int stride)
 cglobal h264_idct_add_8, 3, 3, 0
+    movsxd       r2, r2d
     IDCT4_ADD    r0, r1, r2
     RET

@@ -204,6 +205,7 @@ cglobal h264_idct_add_8, 3, 3, 0
 INIT_MMX mmx
 ; void ff_h264_idct8_add_8_mmx(uint8_t *dst, int16_t *block, int stride)
 cglobal h264_idct8_add_8, 3, 4, 0
+    movsxd      r2, r2d
     %assign pad 128+4-(stack_offset&7)
     SUB         rsp, pad

@@ -272,6 +274,7 @@ cglobal h264_idct8_add_8, 3, 4, 0
 INIT_XMM sse2
 ; void ff_h264_idct8_add_8_sse2(uint8_t *dst, int16_t *block, int stride)
 cglobal h264_idct8_add_8, 3, 4, 10
+    movsxd    r2, r2d
     IDCT8_ADD_SSE r0, r1, r2, r3
     RET

@@ -310,6 +313,7 @@ INIT_MMX mmxext
 ; void ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride)
 %if ARCH_X86_64
 cglobal h264_idct_dc_add_8, 3, 4, 0
+    movsxd       r2, r2d
     movsx        r3, word [r1]
     mov  dword [r1], 0
     DC_ADD_MMXEXT_INIT r3, r2
@@ -318,6 +322,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0

 ; void ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride)
 cglobal h264_idct8_dc_add_8, 3, 4, 0
+    movsxd       r2, r2d
     movsx        r3, word [r1]
     mov  dword [r1], 0
     DC_ADD_MMXEXT_INIT r3, r2
diff --git a/libavcodec/x86/h264_idct_10bit.asm 
b/libavcodec/x86/h264_idct_10bit.asm
index b7d5105..a5bfb34 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -77,6 +77,7 @@ SECTION .text

 %macro IDCT_ADD_10 0
 cglobal h264_idct_add_10, 3,3
+    movsxd       r2, r2d
     IDCT4_ADD_10 r0, r1, r2
     RET
 %endmacro
@@ -190,6 +191,7 @@ IDCT_ADD16_10

 INIT_MMX mmxext
 cglobal h264_idct_dc_add_10,3,3
+    movsxd    r2, r2d
     movd      m0, [r1]
     mov dword [r1], 0
     paddd     m0, [pd_32]
@@ -205,6 +207,7 @@ cglobal h264_idct_dc_add_10,3,3
 ;-----------------------------------------------------------------------------
 %macro IDCT8_DC_ADD 0
 cglobal h264_idct8_dc_add_10,3,4,7
+    movsxd    r2, r2d
     movd      m0, [r1]
     mov dword[r1], 0
     paddd     m0, [pd_32]
@@ -438,6 +441,7 @@ IDCT_ADD8

 %macro IDCT8_ADD 0
 cglobal h264_idct8_add_10, 3,4,16
+    movsxd    r2, r2d
 %if UNIX64 == 0
     %assign pad 16-gprsize-(stack_offset&15)
     sub  rsp, pad


Use the movsxdifnidn macro, to avoid emitting a movsxd on x86_32.

Thanks, I can change that.

Some of the codepaths are within an %if ARCH_X86_64 (like ff_h264_idct_dc_add_8_mmxext); is it still preferred to use that macro form there, or an explicit movsxd?

// Martin
_______________________________________________
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to