Exact version of GCC: gcc-4.2-20060304 Problem since: unknown (gcc-4.0.3-20060212 compiles OK both under -O2 and -O3) Built with: ./configure --prefix=/usr/local; make bootstrap Hardware: Athlon Thunderbird 1.33 GHz; 256 MB RAM OS: KANOTIX Linux (Knoppix) uname -a: Linux zion 2.6.11 #3 SMP Mon May 2 00:19:56 CEST 2005 i686 GNU/Linux Package: Cinelerra-2.0, jpeg-mmx lib Description of problem: Breaks with -O2, successful with -O
[EMAIL PROTECTED]:~/cinelerra-2.0/quicktime/jpeg-mmx.0.1.6$ gcc -O2 -I. -c -o jquant_x86simd.o -save-temps jquant_x86simd.c jquant_x86simd.s: Assembler messages: jquant_x86simd.s:59: Error: suffix or operands invalid for `ldmxcsr' [EMAIL PROTECTED]:~/cinelerra-2.0/quicktime/jpeg-mmx.0.1.6$ gcc -O -I. -c -o jquant_x86simd.o -save-temps jquant_x86simd.c [EMAIL PROTECTED]:~/cinelerra-2.0/quicktime/jpeg-mmx.0.1.6$ (successful) .i file that triggered this erroneous compilation is: # 1 "jquant_x86simd.c" # 1 "<built-in>" # 1 "<command line>" # 1 "jquant_x86simd.c" # 31 "jquant_x86simd.c" # 1 "jconfig.h" 1 # 32 "jquant_x86simd.c" 2 # 1 "jmorecfg.h" 1 # 58 "jmorecfg.h" typedef unsigned char JSAMPLE; # 98 "jmorecfg.h" typedef short JCOEF; # 124 "jmorecfg.h" typedef unsigned char JOCTET; # 149 "jmorecfg.h" typedef unsigned char UINT8; # 161 "jmorecfg.h" typedef unsigned short UINT16; typedef short INT16; typedef int INT32; # 189 "jmorecfg.h" typedef unsigned int JDIMENSION; # 245 "jmorecfg.h" typedef int boolean; # 33 "jquant_x86simd.c" 2 # 1 "mmx.h" 1 # 22 "mmx.h" # 1 "attributes.h" 1 # 23 "mmx.h" 2 typedef union { long long q; unsigned long long uq; int d[2]; unsigned int ud[2]; short w[4]; unsigned short uw[4]; char b[8]; unsigned char ub[8]; float s[2]; } mmx_t; # 34 "jquant_x86simd.c" 2 void jcquant_3dnow( INT16 *psrc, INT16 *pdst, float *piqf ) { int i; for (i=0; i < 64 ; i+=4) { # 55 "jquant_x86simd.c" __asm__ __volatile__ ("movq" " %0, %%" "mm2" : : "X" (*(mmx_t *)&psrc[0])); __asm__ __volatile__ ("movq" " %" "mm2" ", %" "mm7"); __asm__ __volatile__ ("psraw" " %0, %%" "mm7" : : "J" (16) ); __asm__ __volatile__ ("movq" " %" "mm2" ", %" "mm3"); __asm__ __volatile__ ("punpcklwd" " %" "mm7" ", %" "mm2"); __asm__ __volatile__ ("punpckhwd" " %" "mm7" ", %" "mm3"); __asm__ __volatile__ ("movq" " %0, %%" "mm4" : : "X" (*(mmx_t*)&piqf[0])); __asm__ __volatile__ ("pi2fd" " %" "mm2" ", %" "mm2"); __asm__ __volatile__ ("movq" " %0, %%" "mm5" : : "X" (*(mmx_t*)&piqf[2])); __asm__ __volatile__ ("pi2fd" " %" "mm3" ", %" "mm3"); __asm__ __volatile__ ("pfmul" " %" "mm4" ", %" "mm2"); __asm__ __volatile__ ("pfmul" " %" "mm5" ", %" "mm3"); __asm__ __volatile__ ("pf2id" " %" "mm2" ", %" "mm2"); __asm__ __volatile__ ("pf2id" " %" "mm3" ", %" "mm3"); __asm__ __volatile__ ("packssdw" " %" "mm3" ", %" "mm2"); piqf += 4; psrc += 4; __asm__ __volatile__ ("movq" " %%" "mm2" ", %0" : "=X" (*(mmx_t*)pdst) : ); pdst += 4; } __asm__ __volatile__ ("femms"); } static int trunc_mxcsr = 0x1f80; void jcquant_sse( INT16 *psrc, INT16 *pdst, float *piqf ) { int i; __asm__ ( "ldmxcsr %0\n" : : "X" (trunc_mxcsr) ); for (i=0; i < 64 ; i+=4) { __asm__ __volatile__ ("movq" " %0, %%" "mm2" : : "X" (psrc[i])); __asm__ __volatile__ ("movq" " %" "mm2" ", %" "mm7"); __asm__ __volatile__ ("psraw" " %0, %%" "mm7" : : "J" (16) ); __asm__ __volatile__ ("movq" " %" "mm2" ", %" "mm3"); __asm__ __volatile__ ("punpcklwd" " %" "mm7" ", %" "mm2"); __asm__ __volatile__ ("punpckhwd" " %" "mm7" ", %" "mm3"); __asm__ __volatile__ ("cvtpi2ps" " %" "mm2" ", %" "xmm2"); __asm__ __volatile__ ("cvtpi2ps" " %" "mm3" ", %" "xmm3"); __asm__ __volatile__ ("shufps" " %0, %%" "xmm3" ", %%" "xmm2" : : "X" (0*1 + 1*4 + 0 * 16 + 1 * 64) ); __asm__ __volatile__ ("mulps" " %0, %%" "xmm2" : : "X" (piqf[i])); __asm__ __volatile__ ("cvtps2pi" " %" "xmm2" ", %" "mm2"); __asm__ __volatile__ ("shufps" " %0, %%" "xmm2" ", %%" "xmm2" : : "X" (2*1 + 3*4 + 0 * 16 + 1 * 64) ); __asm__ __volatile__ ("cvtps2pi" " %" "xmm2" ", %" "mm3"); __asm__ __volatile__ ("packssdw" " %" "mm3" ", %" "mm2"); __asm__ __volatile__ ("movq" " %%" "mm2" ", %0" : "=X" (pdst[i]) : ); } __asm__ __volatile__ ("emms"); } -- Summary: Wrong assembly generated with -O2, -O OK for cinelerra source item Product: gcc Version: 4.2.0 Status: UNCONFIRMED Severity: major Priority: P3 Component: c AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: mtodorov at alu dot hr GCC build triplet: i686-pc-linux-gnu GCC host triplet: i686-pc-linux-gnu GCC target triplet: i686-pc-linux-gnu http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26653