bash-3.2$ cat /tmp/x.c #include <mmintrin.h> extern __m64 x, y; unsigned long long foo(__m64 m) { return _mm_cvtm64_si64(_mm_add_pi32(x, y)); } bash-3.2$ ./xgcc -B./ -march=core2 -S -O2 /tmp/x.c bash-3.2$ cat x.s .file "x.c" .text .p2align 4,,15 .globl foo .type foo, @function foo: .LFB129: .cfi_startproc movq x(%rip), %mm0 paddd y(%rip), %mm0 movq %mm0, -8(%rsp) movq -8(%rsp), %rax ret .cfi_endproc .LFE129: .size foo, .-foo .ident "GCC: (GNU) 4.4.0 20080903 (experimental) [trunk revision 139952]" .section .note.GNU-stack,"",@progbits bash-3.2$ ./xgcc -B./ -march=core2 -S -O2 /tmp/x.c -fno-ira bash-3.2$ cat x.s .file "x.c" .text .p2align 4,,15 .globl foo .type foo, @function foo: .LFB129: .cfi_startproc movq x(%rip), %mm0 paddd y(%rip), %mm0 movd %mm0, %rax ret .cfi_endproc .LFE129: .size foo, .-foo .ident "GCC: (GNU) 4.4.0 20080903 (experimental) [trunk revision 139952]" .section .note.GNU-stack,"",@progbits bash-3.2$
The problem is IRA turns (insn 8 7 14 2 ../../include/mmintrin.h:300 (set (reg:V2SI 61) (plus:V2SI (reg:V2SI 63 [ x ]) (mem/c/i:V2SI (symbol_ref:DI ("y") <var_decl 0x7fd36e03cc80 y>) [2 y+0 S8 A64]))) 992 {*mmx_addv2si3} (expr_list:REG_DEAD (reg:V2SI 63 [ x ]) (nil))) (insn 14 8 20 2 /tmp/x.c:12 (set (reg/i:DI 0 ax) (subreg:DI (reg:V2SI 61) 0)) 89 {*movdi_1_rex64} (expr_list:REG_DEAD (re g:V2SI 61) (nil))) into (insn 26 8 14 2 ../../include/mmintrin.h:300 (set (mem/c:V2SI (plus:DI (reg/f:DI 7 sp) (const_int -8 [0xfffffffffffffff8])) [3 S8 A64]) (reg:V2SI 29 mm0 [orig:63 x ] [63])) 946 {*movv2si_internal_rex64} (nil) ) (insn:HI 14 26 20 2 /tmp/x.c:12 (set (reg/i:DI 0 ax) (mem/c:DI (plus:DI (reg/f:DI 7 sp) (const_int -8 [0xfffffffffffffff8])) [3 S8 A64])) 89 {*movdi_1_r ex64} (nil)) while the old RA generates (insn:HI 14 8 20 2 /tmp/x.c:12 (set (reg/i:DI 0 ax) (reg:DI 29 mm0 [orig:63 x ] [63])) 89 {*movdi_1_rex64} (nil)) The outputs from regmove pass are different. With IRA, we got (insn:HI 8 7 14 2 ../../include/mmintrin.h:300 (set (reg:V2SI 61) (plus:V2SI (reg:V2SI 63 [ x ]) (mem/c/i:V2SI (symbol_ref:DI ("y") <var_decl 0x7f66abfb5c80 y>) [2 y +0 S8 A64]))) 992 {*mmx_addv2si3} (expr_list:REG_DEAD (reg:V2SI 63 [ x ]) (nil))) (insn:HI 14 8 20 2 /tmp/x.c:12 (set (reg/i:DI 0 ax) (subreg:DI (reg:V2SI 61) 0)) 89 {*movdi_1_rex64} (expr_list:REG_DEAD (re g:V2SI 61) (nil))) Without IRA, we got (insn:HI 8 7 14 2 ../../include/mmintrin.h:300 (set (reg:V2SI 63 [ x ]) (plus:V2SI (reg:V2SI 63 [ x ]) (mem/c/i:V2SI (symbol_ref:DI ("y") <var_decl 0x7fd36e03cc80 y>) [2 y+0 S8 A64]))) 992 {*mmx_addv2si3} (nil)) (insn:HI 14 8 20 2 /tmp/x.c:12 (set (reg/i:DI 0 ax) (subreg:DI (reg:V2SI 63 [ x ]) 0)) 89 {*movdi_1_rex64} (expr_list:REG_D EAD (reg:V2SI 63 [ x ]) (nil))) Does it have any impact on code generation? -- Summary: [4.4 Regression] IRA generates ineffient code Product: gcc Version: 4.4.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: middle-end AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: hjl dot tools at gmail dot com GCC target triplet: x86_64-unknown-linux-gnu http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37364