------- Comment #8 from froydnj at gcc dot gnu dot org  2010-01-25 21:10 -------
First, something has gotten better; an arm-eabi gcc (-O2 -std=c99 -mcpu=arm9
-funroll-loops) from 20091209 gives:

Unroll:
        @ Function supports interworking.
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        stmfd   sp!, {r4, r5, r6, r7, r8}
        add     r8, r1, #256
.L2:
        ldr     ip, [r1, #0]
        mov     r7, r1
        mul     r2, ip, r0
        str     r2, [r7], #4
        ldr     r3, [r1, #4]
        ldr     r5, [r7, #4]
        mul     r6, r3, r0
        str     r6, [r7, #0]
        ldr     r4, [r1, #12]
        ldr     ip, [r1, #16]
        add     r2, r1, #20
        ldmia   r2, {r2, r3, r7}        @ phole ldm
        mul     r6, r5, r0
        mul     r5, r4, r0
        mul     r4, ip, r0
        mul     ip, r2, r0
        mul     r2, r3, r0
        mul     r3, r7, r0
        str     r6, [r1, #8]
        str     r5, [r1, #12]
        str     r4, [r1, #16]
        str     ip, [r1, #20]
        str     r2, [r1, #24]
        add     r1, r1, #32
        cmp     r1, r8
        str     r3, [r1, #-4]
        bne     .L2
        ldmfd   sp!, {r4, r5, r6, r7, r8}
        bx      lr
        .size   Unroll, .-Unroll
        .ident  "GCC: (GNU) 4.5.0 20091209 (experimental)"

which, if not close to ManualUnroll from the first comment, is much better than
the initial example.

Second, the problem Daniel mentioned concerning auto-inc/dec not doing the
right thing is because of the cleverness of
loop-unroll.c:analyze_iv_to_split_insn.  It breaks the code shape that
auto-inc/dec needs.  (You can see its effects in the assembly above; the
spurious move to r7 at the top of the loop.)  Even if you disable that bit of
RTL loop unrolling, you also need to disable the web pass so as to not really
break the code shape for auto-inc/dec and introduce spurious moves into the
RTL.  Once you do that, you get:

Unroll:
        @ Function supports interworking.
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        add     ip, r1, #256
.L2:
        ldr     r2, [r1, #0]
        mul     r3, r2, r0
        str     r3, [r1], #4
        ldr     r2, [r1, #0]
        mul     r3, r2, r0
        str     r3, [r1], #4
        ldr     r2, [r1, #0]
        mul     r3, r2, r0
        str     r3, [r1], #4
        ldr     r2, [r1, #0]
        mul     r3, r2, r0
        str     r3, [r1], #4
        ldr     r2, [r1, #0]
        mul     r3, r2, r0
        str     r3, [r1], #4
        ldr     r2, [r1, #0]
        mul     r3, r2, r0
        str     r3, [r1], #4
        ldr     r2, [r1, #0]
        mul     r3, r2, r0
        str     r3, [r1], #4
        ldr     r2, [r1, #0]
        mul     r3, r2, r0
        str     r3, [r1], #4
        cmp     r1, ip
        bne     .L2
        bx      lr


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36712

Reply via email to