Given the simple test program # define __kernel_cttz(x) __builtin_ctzl(x) unsigned long __ffs(unsigned long word) { /* Whee. EV67 can calculate it directly. */ return __kernel_cttz(word); }
long foo(const unsigned long *b) { unsigned long b0, b1, ofs, tmp; b0 = b[0]; b1 = b[1]; ofs = (b0 ? 0 : 64); tmp = (b0 ? b0 : b1); tmp = __ffs(tmp); return tmp + ofs; } gcc 4.3.4, 4.4.3, and 4.5.0 all produce very bad code at -O1. gcc-4.4.3 produces bad code at other levels as well, but that's for a different bug report. I guess we really only care about 4.5 at this point. # gcc-4.3.4 -Os -mcpu=ev67 -c z.c && objdump -d z.o z.o: file format elf64-alpha Disassembly of section .text: 0000000000000000 <__ffs>: 0: 60 06 f0 73 cttz a0,v0 4: 01 80 fa 6b ret 0000000000000008 <foo>: 8: 00 00 10 a4 ldq v0,0(a0) c: 08 00 30 a4 ldq t0,8(a0) 10: c1 04 00 44 cmovne v0,v0,t0 14: a0 15 00 40 cmpeq v0,0,v0 18: 20 d7 00 48 sll v0,0x6,v0 1c: 61 06 e1 73 cttz t0,t0 20: 00 04 01 40 addq v0,t0,v0 24: 01 80 fa 6b ret # gcc-4.3.4 -O1 -mcpu=ev67 -c z.c && objdump -d z.o z.o: file format elf64-alpha Disassembly of section .text: 0000000000000000 <__ffs>: 0: 60 06 f0 73 cttz a0,v0 4: 01 80 fa 6b ret 0000000000000008 <foo>: 8: 00 00 bb 27 ldah gp,0(t12) c: 00 00 bd 23 lda gp,0(gp) 10: f0 ff de 23 lda sp,-16(sp) 14: 00 00 5e b7 stq ra,0(sp) 18: 08 00 3e b5 stq s0,8(sp) 1c: 01 04 f0 47 mov a0,t0 20: 00 00 10 a6 ldq a0,0(a0) 24: 08 00 21 a4 ldq t0,8(t0) 28: a9 15 00 42 cmpeq a0,0,s0 2c: 29 d7 20 49 sll s0,0x6,s0 30: 90 04 01 46 cmoveq a0,t0,a0 34: 00 00 40 d3 bsr ra,38 <foo+0x30> 38: 00 04 20 41 addq s0,v0,v0 3c: 00 00 5e a7 ldq ra,0(sp) 40: 08 00 3e a5 ldq s0,8(sp) 44: 10 00 de 23 lda sp,16(sp) 48: 01 80 fa 6b ret # gcc-4.3.4 -O2 -mcpu=ev67 -c z.c && objdump -d z.o z.o: file format elf64-alpha Disassembly of section .text: 0000000000000000 <__ffs>: 0: 60 06 f0 73 cttz a0,v0 4: 01 80 fa 6b ret 8: 1f 04 ff 47 nop c: 00 00 fe 2f unop 0000000000000010 <foo>: 10: 00 00 10 a4 ldq v0,0(a0) 14: 08 00 30 a4 ldq t0,8(a0) 18: c1 04 00 44 cmovne v0,v0,t0 1c: a0 15 00 40 cmpeq v0,0,v0 20: 20 d7 00 48 sll v0,0x6,v0 24: 61 06 e1 73 cttz t0,t0 28: 00 04 01 40 addq v0,t0,v0 2c: 01 80 fa 6b ret # gcc-4.3.4 -O3 -mcpu=ev67 -c z.c && objdump -d z.o z.o: file format elf64-alpha Disassembly of section .text: 0000000000000000 <__ffs>: 0: 60 06 f0 73 cttz a0,v0 4: 01 80 fa 6b ret 8: 1f 04 ff 47 nop c: 00 00 fe 2f unop 0000000000000010 <foo>: 10: 00 00 10 a4 ldq v0,0(a0) 14: 08 00 30 a4 ldq t0,8(a0) 18: c1 04 00 44 cmovne v0,v0,t0 1c: a0 15 00 40 cmpeq v0,0,v0 20: 20 d7 00 48 sll v0,0x6,v0 24: 61 06 e1 73 cttz t0,t0 28: 00 04 01 40 addq v0,t0,v0 2c: 01 80 fa 6b ret # gcc-4.4.3 -Os -mcpu=ev67 -c z.c && objdump -d z.o z.o: file format elf64-alpha Disassembly of section .text: 0000000000000000 <__ffs>: 0: 60 06 f0 73 cttz a0,v0 4: 01 80 fa 6b ret 0000000000000008 <foo>: 8: 00 00 bb 27 ldah gp,0(t12) c: 00 00 bd 23 lda gp,0(gp) 10: f0 ff de 23 lda sp,-16(sp) 14: 08 00 30 a4 ldq t0,8(a0) 18: 08 00 3e b5 stq s0,8(sp) 1c: 00 00 30 a5 ldq s0,0(a0) 20: 00 00 5e b7 stq ra,0(sp) 24: 10 04 e1 47 mov t0,a0 28: d0 04 29 45 cmovne s0,s0,a0 2c: a9 15 20 41 cmpeq s0,0,s0 30: 29 d7 20 49 sll s0,0x6,s0 34: 00 00 40 d3 bsr ra,38 <foo+0x30> 38: 00 04 20 41 addq s0,v0,v0 3c: 00 00 5e a7 ldq ra,0(sp) 40: 08 00 3e a5 ldq s0,8(sp) 44: 10 00 de 23 lda sp,16(sp) 48: 01 80 fa 6b ret # gcc-4.4.3 -O1 -mcpu=ev67 -c z.c && objdump -d z.o z.o: file format elf64-alpha Disassembly of section .text: 0000000000000000 <__ffs>: 0: 60 06 f0 73 cttz a0,v0 4: 01 80 fa 6b ret 0000000000000008 <foo>: 8: 00 00 bb 27 ldah gp,0(t12) c: 00 00 bd 23 lda gp,0(gp) 10: f0 ff de 23 lda sp,-16(sp) 14: 00 00 5e b7 stq ra,0(sp) 18: 08 00 3e b5 stq s0,8(sp) 1c: 00 00 30 a4 ldq t0,0(a0) 20: 08 00 10 a6 ldq a0,8(a0) 24: a9 15 20 40 cmpeq t0,0,s0 28: 29 d7 20 49 sll s0,0x6,s0 2c: d0 04 21 44 cmovne t0,t0,a0 30: 00 00 40 d3 bsr ra,34 <foo+0x2c> 34: 00 04 20 41 addq s0,v0,v0 38: 00 00 5e a7 ldq ra,0(sp) 3c: 08 00 3e a5 ldq s0,8(sp) 40: 10 00 de 23 lda sp,16(sp) 44: 01 80 fa 6b ret # gcc-4.4.3 -O2 -mcpu=ev67 -c z.c && objdump -d z.o z.o: file format elf64-alpha Disassembly of section .text: 0000000000000000 <__ffs>: 0: 60 06 f0 73 cttz a0,v0 4: 01 80 fa 6b ret 8: 1f 04 ff 47 nop c: 00 00 fe 2f unop 0000000000000010 <foo>: 10: 00 00 bb 27 ldah gp,0(t12) 14: 00 00 bd 23 lda gp,0(gp) 18: f0 ff de 23 lda sp,-16(sp) 1c: 08 00 30 a4 ldq t0,8(a0) 20: 08 00 3e b5 stq s0,8(sp) 24: 00 00 30 a5 ldq s0,0(a0) 28: 00 00 5e b7 stq ra,0(sp) 2c: 10 04 e1 47 mov t0,a0 30: d0 04 29 45 cmovne s0,s0,a0 34: a9 15 20 41 cmpeq s0,0,s0 38: 29 d7 20 49 sll s0,0x6,s0 3c: 00 00 40 d3 bsr ra,40 <foo+0x30> 40: 00 04 20 41 addq s0,v0,v0 44: 00 00 5e a7 ldq ra,0(sp) 48: 08 00 3e a5 ldq s0,8(sp) 4c: 10 00 de 23 lda sp,16(sp) 50: 01 80 fa 6b ret 54: 00 00 fe 2f unop 58: 1f 04 ff 47 nop 5c: 00 00 fe 2f unop # gcc-4.4.3 -O3 -mcpu=ev67 -c z.c && objdump -d z.o z.o: file format elf64-alpha Disassembly of section .text: 0000000000000000 <__ffs>: 0: 60 06 f0 73 cttz a0,v0 4: 01 80 fa 6b ret 8: 1f 04 ff 47 nop c: 00 00 fe 2f unop 0000000000000010 <foo>: 10: 00 00 30 a4 ldq t0,0(a0) 14: 08 00 10 a4 ldq v0,8(a0) 18: c0 04 21 44 cmovne t0,t0,v0 1c: a1 15 20 40 cmpeq t0,0,t0 20: 21 d7 20 48 sll t0,0x6,t0 24: 60 06 e0 73 cttz v0,v0 28: 00 04 01 40 addq v0,t0,v0 2c: 01 80 fa 6b ret # gcc-4.5.0 -Os -mcpu=ev67 -c z.c && objdump -d z.o z.o: file format elf64-alpha Disassembly of section .text: 0000000000000000 <__ffs>: 0: 60 06 f0 73 cttz a0,v0 4: 01 80 fa 6b ret 0000000000000008 <foo>: 8: 00 00 30 a4 ldq t0,0(a0) c: 08 00 10 a4 ldq v0,8(a0) 10: c0 04 21 44 cmovne t0,t0,v0 14: a1 15 20 40 cmpeq t0,0,t0 18: 21 d7 20 48 sll t0,0x6,t0 1c: 60 06 e0 73 cttz v0,v0 20: 00 04 01 40 addq v0,t0,v0 24: 01 80 fa 6b ret # gcc-4.5.0 -O1 -mcpu=ev67 -c z.c && objdump -d z.o z.o: file format elf64-alpha Disassembly of section .text: 0000000000000000 <__ffs>: 0: 60 06 f0 73 cttz a0,v0 4: 01 80 fa 6b ret 0000000000000008 <foo>: 8: 00 00 bb 27 ldah gp,0(t12) c: 00 00 bd 23 lda gp,0(gp) 10: f0 ff de 23 lda sp,-16(sp) 14: 00 00 5e b7 stq ra,0(sp) 18: 08 00 3e b5 stq s0,8(sp) 1c: 00 00 30 a4 ldq t0,0(a0) 20: 08 00 10 a6 ldq a0,8(a0) 24: a9 15 20 40 cmpeq t0,0,s0 28: 29 d7 20 49 sll s0,0x6,s0 2c: d0 04 21 44 cmovne t0,t0,a0 30: 00 00 40 d3 bsr ra,34 <foo+0x2c> 34: 00 04 20 41 addq s0,v0,v0 38: 00 00 5e a7 ldq ra,0(sp) 3c: 08 00 3e a5 ldq s0,8(sp) 40: 10 00 de 23 lda sp,16(sp) 44: 01 80 fa 6b ret # gcc-4.5.0 -O2 -mcpu=ev67 -c z.c && objdump -d z.o z.o: file format elf64-alpha Disassembly of section .text: 0000000000000000 <__ffs>: 0: 60 06 f0 73 cttz a0,v0 4: 01 80 fa 6b ret 8: 1f 04 ff 47 nop c: 00 00 fe 2f unop 0000000000000010 <foo>: 10: 00 00 30 a4 ldq t0,0(a0) 14: 08 00 10 a4 ldq v0,8(a0) 18: c0 04 21 44 cmovne t0,t0,v0 1c: a1 15 20 40 cmpeq t0,0,t0 20: 21 d7 20 48 sll t0,0x6,t0 24: 60 06 e0 73 cttz v0,v0 28: 00 04 01 40 addq v0,t0,v0 2c: 01 80 fa 6b ret # gcc-4.5.0 -O3 -mcpu=ev67 -c z.c && objdump -d z.o z.o: file format elf64-alpha Disassembly of section .text: 0000000000000000 <__ffs>: 0: 60 06 f0 73 cttz a0,v0 4: 01 80 fa 6b ret 8: 1f 04 ff 47 nop c: 00 00 fe 2f unop 0000000000000010 <foo>: 10: 00 00 30 a4 ldq t0,0(a0) 14: 08 00 10 a4 ldq v0,8(a0) 18: c0 04 21 44 cmovne t0,t0,v0 1c: a1 15 20 40 cmpeq t0,0,t0 20: 21 d7 20 48 sll t0,0x6,t0 24: 60 06 e0 73 cttz v0,v0 28: 00 04 01 40 addq v0,t0,v0 2c: 01 80 fa 6b ret 4.3.4 -Os: good 4.3.4 -O1: bad 4.3.4 -O2: good 4.3.4 -O3: good 4.4.3 -Os: bad 4.4.3 -O1: bad 4.4.3 -O2: bad 4.4.3 -O3: good 4.5.0 -Os: good 4.5.0 -O1: bad 4.5.0 -O2: good 4.5.0 -O3: good -- Summary: gcc produces bad code at -O1 Product: gcc Version: 4.5.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: rtl-optimization AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: mattst88 at gmail dot com GCC build triplet: alpha-unknown-linux-gnu GCC host triplet: alpha-unknown-linux-gnu GCC target triplet: alpha-unknown-linux-gnu http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44123