Bug#318493: libavcodec-dev: MPEG encoding broken for MMX

Tobias Grimm Fri, 15 Jul 2005 13:18:17 -0700

Package: libavcodec-dev
Version: 0.cvs20050626-2
Severity: important
Tags: patch


In the Debian package of ffmpeg, some MMX code parts are patched in order to
make it compile with -fPIC. Unfortunately one of the patches introduces a bug,
when trying to encode to MPEG.

This is the buggy patch for mpegvideo_mmx_template.c:

@@ -157,7 +168,14 @@
             "psubw %%mm1, %%mm0                        \n\t" // ABS(block[i])
             "movq (%3, %%"REG_a"), %%mm6       \n\t" // bias[0]
             "paddusw %%mm6, %%mm0              \n\t" // ABS(block[i]) + bias[0]
+#if defined(PIC) && !defined(ARCH_X86_64)
+            "push %%"REG_a"                    \n\t"
+            "movl %2, %%"REG_a"                        \n\t"
+            "movq (%%"REG_a", %%"REG_a"), %%mm5        \n\t" // qmat[i]
+            "pop %%"REG_a"                     \n\t"
+#else
             "movq (%2, %%"REG_a"), %%mm5               \n\t" // qmat[i]
+#endif
             "pmulhw %%mm5, %%mm0               \n\t" // (ABS(block[i])*qmat[0] 
+ bias[0]*qmat[0])>>16
             "por %%mm0, %%mm4                  \n\t" 
             "pxor %%mm1, %%mm0                 \n\t" 
@@ -179,7 +197,11 @@


REG_a can't be reused here, a different register has to be taken to get the
operand from memory instead of from the register, which is not available with
-fPIC.

I've solved this by using REG_b, which needs to be defined in mmx.h, so the
correct patch looks like this:

@@ -157,7 +168,14 @@
             "psubw %%mm1, %%mm0                        \n\t" // ABS(block[i])
             "movq (%3, %%"REG_a"), %%mm6       \n\t" // bias[0]
             "paddusw %%mm6, %%mm0              \n\t" // ABS(block[i]) + bias[0]
+#if defined(PIC) && !defined(ARCH_X86_64)
+            "push %%"REG_b"                    \n\t"
+            "movl %2, %%"REG_b"                        \n\t"
+            "movq (%%"REG_b", %%"REG_a"), %%mm5        \n\t" // qmat[i]
+            "pop %%"REG_b"                     \n\t"
+#else
             "movq (%2, %%"REG_a"), %%mm5               \n\t" // qmat[i]
+#endif
             "pmulhw %%mm5, %%mm0               \n\t" // (ABS(block[i])*qmat[0] 
+ bias[0]*qmat[0])>>16
             "por %%mm0, %%mm4                  \n\t" 
             "pxor %%mm1, %%mm0                 \n\t" 


See the difference? Here is the complete patch:

--- ffmpeg-0.cvs20050626.orig/libavcodec/i386/mmx.h
+++ ffmpeg-0.cvs20050626/libavcodec/i386/mmx.h
@@ -9,6 +9,9 @@
 #  define REG_a "rax"
 #else
 #  define REG_a "eax"
+#  if defined(PIC)
+#     define REG_b "ebx"
+#  endif
 #endif
 
 /*
--- ffmpeg-0.cvs20050626.orig/libavcodec/i386/mpegvideo_mmx_template.c
+++ ffmpeg-0.cvs20050626/libavcodec/i386/mpegvideo_mmx_template.c
@@ -95,7 +95,14 @@
             SPREADW(%%mm3)
             "pxor %%mm7, %%mm7                 \n\t" // 0
             "pxor %%mm4, %%mm4                 \n\t" // 0
+#if defined(PIC) && !defined(ARCH_X86_64)
+            "push %%"REG_a"                    \n\t"
+            "movl %2, %%"REG_a"                        \n\t"
+            "movq (%%"REG_a"), %%mm5           \n\t" // qmat[0]
+            "pop %%"REG_a"                     \n\t"
+#else
             "movq (%2), %%mm5                  \n\t" // qmat[0]
+#endif
             "pxor %%mm6, %%mm6                 \n\t"
             "psubw (%3), %%mm6                 \n\t" // -bias[0]
             "mov $-128, %%"REG_a"              \n\t"
@@ -128,7 +135,11 @@
             "movd %%mm3, %%"REG_a"             \n\t"
             "movzb %%al, %%"REG_a"             \n\t" // last_non_zero_p1
            : "+a" (last_non_zero_p1)
+#if defined(PIC) && !defined(ARCH_X86_64)
+            : "r" (block+64), "m" (qmat), "r" (bias),
+#else
             : "r" (block+64), "r" (qmat), "r" (bias),
+#endif
               "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
         );
         // note the asm is split cuz gcc doesnt like that many operands ...
@@ -157,7 +168,14 @@
             "psubw %%mm1, %%mm0                        \n\t" // ABS(block[i])
             "movq (%3, %%"REG_a"), %%mm6       \n\t" // bias[0]
             "paddusw %%mm6, %%mm0              \n\t" // ABS(block[i]) + bias[0]
+#if defined(PIC) && !defined(ARCH_X86_64)
+            "push %%"REG_b"                    \n\t"
+            "movl %2, %%"REG_b"                        \n\t"
+            "movq (%%"REG_b", %%"REG_a"), %%mm5        \n\t" // qmat[i]
+            "pop %%"REG_b"                     \n\t"
+#else
             "movq (%2, %%"REG_a"), %%mm5               \n\t" // qmat[i]
+#endif
             "pmulhw %%mm5, %%mm0               \n\t" // (ABS(block[i])*qmat[0] 
+ bias[0]*qmat[0])>>16
             "por %%mm0, %%mm4                  \n\t" 
             "pxor %%mm1, %%mm0                 \n\t" 
@@ -179,7 +197,11 @@
             "movd %%mm3, %%"REG_a"             \n\t"
             "movzb %%al, %%"REG_a"             \n\t" // last_non_zero_p1
            : "+a" (last_non_zero_p1)
+#if defined(PIC) && !defined(ARCH_X86_64)
+            : "r" (block+64), "m" (qmat+64), "r" (bias+64),
+#else
             : "r" (block+64), "r" (qmat+64), "r" (bias+64),
+#endif
               "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
         );
         // note the asm is split cuz gcc doesnt like that many operands ...

bye,

Tobias

-- System Information:
Debian Release: 3.1
  APT prefers testing
  APT policy: (500, 'testing')
Architecture: i386 (i686)
Shell:  /bin/sh linked to /bin/bash
Kernel: Linux 2.6.11-1-tobi
Locale: [EMAIL PROTECTED], [EMAIL PROTECTED] (charmap=ISO-8859-15)

Versions of packages libavcodec-dev depends on:
ii  liba52-0.7.4-dev [liba5 0.7.4-1          Development library and headers fo
ii  libdc1394-11-dev        1.0.0-4          high level programming interface f
ii  libdts-dev              0.0.2-svn-1      development files for libdts
ii  libogg-dev              1.1.2-1          Ogg Bitstream Library Development
ii  libraw1394-dev          0.10.1-1.1       library for direct access to IEEE 
ii  libtheora-dev           0.0.0.alpha4-1.1 The Theora Video Compression Codec
ii  libvorbis-dev           1.1.0-1          The Vorbis General Audio Compressi
ii  zlib1g-dev              1:1.2.2-4        compression library - development

-- no debconf information


-- 
To UNSUBSCRIBE, email to [EMAIL PROTECTED]
with a subject of "unsubscribe". Trouble? Contact [EMAIL PROTECTED]

Bug#318493: libavcodec-dev: MPEG encoding broken for MMX

Reply via email to