------- Comment #9 from vda dot linux at googlemail dot com 2006-07-16 18:47 ------- The test program below shows that in this case doing division with div insn takes more instructions than with mul+shift.
Also mul+shift path has absolutely useless "movl %edx, %eax" insn, shaving that will make it even smaller. Need to build newer gcc and retest... # cat t.c enum { B = 10 }; enum { shift_bits = 35 }; enum { K = (1ULL<<shift_bits)/B + 1 }; unsigned a,b; void f(unsigned A) { asm("#1"); a = A/B; asm("#2"); b = (((unsigned long long)A) * K) >> shift_bits; asm("#3"); } # gcc -Os -fomit-frame-pointer -S t.c # cat t.s .file "t.c" .text .globl f .type f, @function f: pushl %ebx #APP #1 #NO_APP movl $10, %edx movl %edx, %ebx movl 8(%esp), %eax xorl %edx, %edx divl %ebx movl %eax, a #APP #2 #NO_APP movl $-858993459, %eax mull 8(%esp) movl %edx, %eax shrl $3, %eax movl %eax, b #APP #3 #NO_APP popl %ebx ret .size f, .-f .comm a,4,4 .comm b,4,4 .section .note.GNU-stack,"",@progbits .ident "GCC: (GNU) 3.4.3" -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=28395