Compiling the following function with -O3 gives the following assembly output:

void spin(int volatile* ptr) {
    while(*ptr);
    return;
}

spin:
.LLFB1:
        .register       %g2, #scratch
        lduw    [%o0], %g1      ! 8     *zero_extendsidi2_insn_sp64/2   [length
= 1]
        cmp     %g1, 0  ! 9     *cmpsi_insn     [length = 1]
        be,pn   %icc, .LL3      ! 10    *normal_branch  [length = 1]
         mov    0, %g1  ! 17    *movdi_insn_sp64/1      [length = 1]
.LL6:   
        lduw    [%o0], %g2      ! 20    *zero_extendsidi2_insn_sp64/2   [length
= 1]
        cmp     %g2, 0  ! 22    *cmpsi_insn     [length = 1]
        bne,pt  %icc, .LL6      ! 23    *normal_branch  [length = 1]
         add    %g1, 1, %g1     ! 19    *adddi3_sp64/1  [length = 1]
.LL3:   
        jmp     %o7+8   ! 55    *return_internal        [length = 1]
         mov    %g1, %o0        ! 30    *movdi_insn_sp64/1      [length = 1]

Manually replacing the cmp/b* pairs with br* instructions gives 10-11% more
iterations/sec on my machine:

        .global spin_brz
spin_brz:
        .register %g2, #scratch
        ld        [%o0], %g1
        brz,pn    %g1, spin_brz_done
        clr       %g1
spin_brz_again:
        ld        [%o0], %g2
        brnz,pt   %g2, spin_brz_again
        add       %g1, 0x1, %g1
spin_brz_done:
        retl
        mov       %g1, %o0
        .size   spin_brz, .- spin_brz


-- 
           Summary: gcc should use brz(brnz) instead of cmp/be(bne) when
                    possible
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: scovich at gmail dot com
GCC target triplet: sparc-sun-solaris2.10


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40067

Reply via email to