arm-size-branch_cost.patch reduces BRANCH_COST for thumb2 -Os to 1. The lower branch cost makes expand choose branches to expand code like '(a == b || c == d)'.
The impact of arm-size-branch_cost.patch on the example from the bug report for ARM Thumb-2 -Os is a size reduction of 15%, from 68 to 58 bytes. This size reduction is illustrated in this diff of the assembly (left, without patch, size 68. right, with patch, size 58): ... push {r3, r4, r5, r6, r7, push {r3, r4, r5, r6, r7, mov r7, r1 mov r7, r1 mov r6, r2 mov r6, r2 movs r1, #0 movs r1, #0 movs r2, #1 movs r2, #1 mov r5, r0 mov r5, r0 bl lseek bl lseek movs r2, #2 < movs r1, #0 movs r1, #0 > movs r2, #2 mov r4, r0 mov r4, r0 mov r0, r5 mov r0, r5 bl lseek bl lseek sub r2, r4, #-1 | adds r2, r4, #1 rsbs r3, r2, #0 | beq .L3 adc r3, r3, r2 | adds r3, r0, #1 cmp r0, #-1 | beq .L2 it eq < orreq r3, r3, #1 < cbnz r3, .L3 < subs r0, r0, r4 subs r0, r0, r4 beq .L4 | beq .L5 str r4, [r7, #0] str r4, [r7, #0] str r0, [r6, #0] str r0, [r6, #0] mov r0, r3 | movs r0, #0 pop {r3, r4, r5, r6, r7, pop {r3, r4, r5, r6, r7, .L3: .L3: mov r0, #-1 | mov r0, r4 pop {r3, r4, r5, r6, r7, pop {r3, r4, r5, r6, r7, .L4: | .L5: mov r0, #-1 mov r0, #-1 > .L2: pop {r3, r4, r5, r6, r7, pop {r3, r4, r5, r6, r7, ... Thanks, - Tom
Index: gcc/config/arm/arm.h =================================================================== --- gcc/config/arm/arm.h (revision 293961) +++ gcc/config/arm/arm.h (revision 293962) @@ -2201,7 +2201,8 @@ typedef struct /* Try to generate sequences that don't involve branches, we can then use conditional instructions */ #define BRANCH_COST(speed_p, predictable_p) \ - (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0)) + (TARGET_32BIT ? (TARGET_THUMB2 && optimize_size ? 1 : 4) \ + : (optimize > 0 ? 2 : 0)) /* Position Independent Code. */ /* We decide which register to use based on the compilation options and