https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109907
--- Comment #21 from Georg-Johann Lay <gjl at gcc dot gnu.org> --- One more test: unsigned char lfsr32_mpp_ge0 (unsigned long number) { unsigned char b = 0; if (number >= 0) b--; if (number & (1UL << 29)) b++; if (number & (1UL << 13)) b++; return b; } with -Os -mmcu=atmega128 -dp it generates: lfsr32_ppp_ge0: push r16 ; 85 [c=4 l=1] pushqi1/0 push r17 ; 86 [c=4 l=1] pushqi1/0 /* prologue: function */ /* frame size = 0 */ /* stack size = 2 */ movw r16,r22 ; 93 [c=4 l=1] *movhi/0 movw r18,r24 ; 94 [c=4 l=1] *movhi/0 movw r22,r18 ; 82 [c=4 l=2] *movsi/0 movw r20,r16 clr r20 ; 83 [c=16 l=4] *andsi3/1 clr r21 clr r22 andi r23,32 ldi r24,lo8(1) ; 68 [c=4 l=1] movqi_insn/1 sbrc r17,5 ; 84 [c=28 l=2] *sbrx_branchsi rjmp .L18 or r20,r21 ; 75 [c=16 l=3] *cmpsi/0 or r20,r22 or r20,r23 breq .L19 ; 77 [c=12 l=1] *branch ldi r24,0 ; 73 [c=4 l=1] movqi_insn/0 .L19: neg r24 ; 72 [c=4 l=1] *negqi2 .L17: /* epilogue start */ pop r17 ; 89 [c=4 l=1] popqi pop r16 ; 90 [c=4 l=1] popqi ret ; 91 [c=0 l=1] return_from_epilogue .L18: or r20,r21 ; 69 [c=16 l=3] *cmpsi/0 or r20,r22 or r20,r23 brne .L17 ; 71 [c=12 l=1] *branch ldi r24,0 ; 67 [c=4 l=1] movqi_insn/0 rjmp .L17 ; 95 [c=4 l=1] jump so it does arithmetic on 32-bit variables (one AND and two COMPAREs) in 28 instructions, use more stack and high register pressure. An optimal code would require just 8 instructions and additional register pressure of just 1 byte for the output: lfsr32_mpp_ge0: /* prologue: function */ /* frame size = 0 */ /* stack size = 0 */ clr R26 ;; b lives in R26, number lives in R25:R22. sbrs R25, 7 ;; Skip next if number.31 = 1 dec R26 sbrc R25, 5 ;; Skip next if number.29 = 0 inc R26 sbrc R23, 5 ;; Skip next if number.13 = 0 inc R26 mov R24, r26 /* epilogue start */ ret