http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55769
Bug #: 55769
Summary: unnecessary spill/reload to compose register pair
Classification: Unclassified
Product: gcc
Version: 4.8.0
Status: UNCONFIRMED
Severity: enhancement
Priority: P3
Component: target
AssignedTo: [email protected]
ReportedBy: [email protected]
Target: arm-linux-gnueabi
Created attachment 29018
--> http://gcc.gnu.org/bugzilla/attachment.cgi?id=29018
testcase
Compile the attached source code with options: -march=armv7-a -mthumb -O2
Trunk gcc generates:
sum_ror_mem:
@ args = 0, pretend = 0, frame = 40
@ frame_needed = 0, uses_anonymous_args = 0
push {r4, r5, r6, r7, r8, r9, r10, fp, lr}
add r8, r1, r2
cmp r1, r8
sub sp, sp, #44
mov r4, r0
mov r5, #0
bcs .L2
mov r9, r1
.L3:
add r0, r9, #1024
add r9, r9, #64
bl prefetch
ldrd r2, [r9, #-64]
adds r2, r2, r4
adc r3, r3, r5
lsrs r1, r2, #8
orr r1, r1, r3, lsl #24
lsrs r3, r3, #8
str r1, [sp] // A
orr r3, r3, r2, lsl #24
str r3, [sp, #4] // B
ldrd r0, [r9, #-56]
ldrd r2, [sp] // C
adds r2, r2, r0
adc r3, r3, r1
lsrs r1, r2, #8
orr r1, r1, r3, lsl #24
lsrs r3, r3, #8
str r1, [sp, #8]
orr r3, r3, r2, lsl #24
str r3, [sp, #12]
ldrd r0, [r9, #-48]
ldrd r2, [sp, #8]
adds r2, r2, r0
adc r3, r3, r1
lsrs r1, r2, #8
orr r1, r1, r3, lsl #24
lsrs r3, r3, #8
str r1, [sp, #16]
orr r3, r3, r2, lsl #24
str r3, [sp, #20]
ldrd r0, [r9, #-40]
ldrd r2, [sp, #16]
adds r2, r2, r0
adc r3, r3, r1
lsrs r1, r2, #8
orr r1, r1, r3, lsl #24
lsrs r3, r3, #8
str r1, [sp, #24]
orr r3, r3, r2, lsl #24
str r3, [sp, #28]
ldrd r0, [r9, #-32]
ldrd r2, [sp, #24]
adds r2, r2, r0
adc r3, r3, r1
lsrs r1, r2, #8
orr r10, r1, r3, lsl #24
lsrs r3, r3, #8
orr fp, r3, r2, lsl #24
ldrd r2, [r9, #-24]
adds r2, r2, r10
adc r3, r3, fp
lsrs r1, r2, #8
orr r1, r1, r3, lsl #24
lsrs r3, r3, #8
str r1, [sp, #32]
orr r3, r3, r2, lsl #24
str r3, [sp, #36]
ldrd r0, [r9, #-16]
ldrd r2, [sp, #32]
adds r2, r2, r0
adc r3, r3, r1
lsr ip, r2, #8
ldrd r0, [r9, #-8]
orr r6, ip, r3, lsl #24
lsrs r3, r3, #8
adds r0, r0, r6
orr r7, r3, r2, lsl #24
adc r1, r1, r7
cmp r8, r9
lsr r2, r0, #8
lsr r3, r1, #8
orr r4, r2, r1, lsl #24
orr r5, r3, r0, lsl #24
bhi .L3
.L2:
adds r0, r5, r4
add sp, sp, #44
@ sp needed
pop {r4, r5, r6, r7, r8, r9, r10, fp, pc}
Note that instructions AB spill two value onto stack, and instruction C read
them back to form a 64bit register pair. If we swap the register usage of r1
and r2, then we can avoid these 3 instructions. There are also many similar
patterns in the following instructions that can be avoided.