Compiling the following function with -O3 gives the following assembly output:
void spin(int volatile* ptr) { while(*ptr); return; } spin: .LLFB1: .register %g2, #scratch lduw [%o0], %g1 ! 8 *zero_extendsidi2_insn_sp64/2 [length = 1] cmp %g1, 0 ! 9 *cmpsi_insn [length = 1] be,pn %icc, .LL3 ! 10 *normal_branch [length = 1] mov 0, %g1 ! 17 *movdi_insn_sp64/1 [length = 1] .LL6: lduw [%o0], %g2 ! 20 *zero_extendsidi2_insn_sp64/2 [length = 1] cmp %g2, 0 ! 22 *cmpsi_insn [length = 1] bne,pt %icc, .LL6 ! 23 *normal_branch [length = 1] add %g1, 1, %g1 ! 19 *adddi3_sp64/1 [length = 1] .LL3: jmp %o7+8 ! 55 *return_internal [length = 1] mov %g1, %o0 ! 30 *movdi_insn_sp64/1 [length = 1] Manually replacing the cmp/b* pairs with br* instructions gives 10-11% more iterations/sec on my machine: .global spin_brz spin_brz: .register %g2, #scratch ld [%o0], %g1 brz,pn %g1, spin_brz_done clr %g1 spin_brz_again: ld [%o0], %g2 brnz,pt %g2, spin_brz_again add %g1, 0x1, %g1 spin_brz_done: retl mov %g1, %o0 .size spin_brz, .- spin_brz -- Summary: gcc should use brz(brnz) instead of cmp/be(bne) when possible Product: gcc Version: unknown Status: UNCONFIRMED Severity: normal Priority: P3 Component: c AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: scovich at gmail dot com GCC target triplet: sparc-sun-solaris2.10 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40067