https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118360
Bug ID: 118360
Summary: [avr] Expensive shift instead of bit test
Product: gcc
Version: 15.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: middle-end
Assignee: unassigned at gcc dot gnu.org
Reporter: gjl at gcc dot gnu.org
Target Milestone: ---
$ avr-gcc-15 -Os -mmcu=atmega8 -S -dp
long fun1 (int a, long b)
{
if (a & 1)
b ^= 8;
return b;
}
compiles to:
fun1:
push r16 ; 39 [c=4 l=1] pushqi1/0
push r17 ; 40 [c=4 l=1] pushqi1/0
/* prologue: function */
/* frame size = 0 */
/* stack size = 2 */
.L__stack_usage = 2
andi r24,lo8(1) ; 48 [c=4 l=1] *andqi3/1
ldi r25,0 ; 49 [c=4 l=1] movqi_insn/0
ldi r26,0 ; 50 [c=4 l=1] movqi_insn/0
ldi r27,0 ; 51 [c=4 l=1] movqi_insn/0
ldi r18,3 ; 47 [c=28 l=7] *ashlsi3_const/3
1:
lsl r24
rol r25
rol r26
rol r27
dec r18
brne 1b
movw r16,r24 ; 60 [c=4 l=1] *movhi/0
movw r18,r26 ; 61 [c=4 l=1] *movhi/0
eor r16,r20 ; 34 [c=4 l=1] *xorqi3
eor r17,r21 ; 35 [c=4 l=1] *xorqi3
eor r18,r22 ; 36 [c=4 l=1] *xorqi3
eor r19,r23 ; 37 [c=4 l=1] *xorqi3
movw r22,r16 ; 62 [c=4 l=1] *movhi/0
movw r24,r18 ; 63 [c=4 l=1] *movhi/0
/* epilogue start */
pop r17 ; 43 [c=4 l=1] popqi
pop r16 ; 44 [c=4 l=1] popqi
ret ; 45 [c=0 l=1] return_from_epilogue
whereas the very similar (tests the inverted bit)
long fun_not1 (int a, long b)
{
if (!(a & 1))
b ^= 8;
return b;
}
gives
fun_not1:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
movw r18,r24 ; 52 [c=4 l=1] *movhi/0
mov r24,r22 ; 39 [c=4 l=1] movqi_insn/0
mov r22,r20 ; 40 [c=4 l=1] movqi_insn/0
mov r25,r23 ; 41 [c=4 l=1] movqi_insn/0
mov r23,r21 ; 42 [c=4 l=1] movqi_insn/0
sbrc r18,0 ; 43 [c=4 l=2] *sbrx_branchhi
rjmp .L3
ldi r18,lo8(8) ; 50 [c=4 l=1] movqi_insn/1
eor r22,r18 ; 51 [c=4 l=1] *xorqi3
.L3:
/* epilogue start */
ret ; 46 [c=0 l=1] return
Notice that AVR is an 8-bit architecture and doesn't have a barrel shifter,
hence 32-bit shifts are very expensive.
* Some of the middle-end RTL lowering or tree optimizers don't even consider
costs.
* Even when costs are considered, they may be a bad proxy for the real costs.
One example is when arithmetic is performed by a library call that appears to
be cheap with -Os ("only" one CALL), but a libcall is still unwanted and too
expansive when a simple bit-test can do.