https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60947
--- Comment #13 from amker at gcc dot gnu.org --- OK, I compared generated assembly before/after revision 206552. BEFORE) @ frame_needed = 1, uses_anonymous_args = 0 mov ip, sp stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, fp, ip, lr, pc} sub fp, ip, #4 sub sp, sp, #20 mov r4, #0 ldr r8, [r0, #112] .L1064: add r0, r4, #232 add r0, r8, r0 mov r1, #255 mov r2, #8 add r4, r4, #32 bl memset cmp r4, #96 bne .L1064 AFTER) @ frame_needed = 1, uses_anonymous_args = 0 mov ip, sp stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, fp, ip, lr, pc} sub fp, ip, #4 sub sp, sp, #20 ldr r8, [r0, #112] add r3, r8, #232 add r4, r8, #328 .L1064: mov r0, r3 mov r1, #255 mov r2, #8 bl memset add r3, r0, #32 <---X cmp r3, r4 bne .L1064 The only possibility I can see is GCC now uses r3 as the induction variable, which is a scratch register according to arm acps. Problem is according to posix standard: #include <string.h> void *memset(void *s, int c, size_t n); RETURN VALUE: The memset() function shall return s; no return value is reserved to indicate an error. So r0 returned in INSN X should be set r3 back to right value. This can be confirmed by rtl dumps before/after reload: BEFORE reload) 24: NOTE_INSN_BASIC_BLOCK 2 2: NOTE_INSN_DELETED 3: NOTE_INSN_FUNCTION_BEG 26: r114:SI=[r0:SI+0x70] REG_DEAD r0:SI 27: r165:SI=r114:SI+0xe8 28: r162:SI=r114:SI+0x148 32: L32: 33: NOTE_INSN_BASIC_BLOCK 3 34: r0:SI=r165:SI 684: r1:SI=0xff 685: r2:SI=0x8 37: r0:SI=call [`memset'] argc:0 REG_RETURNED r165:SI REG_DEAD r2:SI REG_DEAD r1:SI REG_UNUSED r0:SI REG_EH_REGION 0 38: r165:SI=r165:SI+0x20 40: cc:CC=cmp(r165:SI,r162:SI) 41: pc={(cc:CC!=0)?L32:pc} REG_DEAD cc:CC REG_BR_PROB 6667 AFTER reload) 24: NOTE_INSN_BASIC_BLOCK 2 2: NOTE_INSN_DELETED 3: NOTE_INSN_FUNCTION_BEG 26: r8:SI=[r0:SI+0x70] 27: r3:SI=r8:SI+0xe8 28: r4:SI=r8:SI+0x148 32: L32: 33: NOTE_INSN_BASIC_BLOCK 3 34: r0:SI=r3:SI 684: r1:SI=0xff 685: r2:SI=0x8 37: r0:SI=call [`memset'] argc:0 REG_RETURNED r3:SI REG_EH_REGION 0 755: r3:SI=r0:SI <--------restore r3 from r0 38: r3:SI=r3:SI+0x20 40: cc:CC=cmp(r3:SI,r4:SI) 41: pc={(cc:CC!=0)?L32:pc} REG_BR_PROB 6667 So here I really don't know why this program can fail. As far as ivopt is concerned, the dumps of ivopt are like: BEFORE patch) sizetype ivtmp.551; <bb 2>: runtime_9 = substream_8(D)->runtime; goto <bb 4>; <bb 3>: <bb 4>: # ivtmp.551_154 = PHI <ivtmp.551_150(3), 0(2)> _13 = ivtmp.551_154 + 232; _1 = runtime_9 + _13; _59 = _1; memset (_59, 255, 8); ivtmp.551_150 = ivtmp.551_154 + 32; if (ivtmp.551_150 != 96) goto <bb 3>; else goto <bb 48>; AFTER patch) unsigned int ivtmp.550; <bb 2>: runtime_9 = substream_8(D)->runtime; _1 = runtime_9 + 232; ivtmp.550_13 = (unsigned int) _1; _143 = runtime_9 + 328; _142 = (unsigned int) _143; goto <bb 4>; <bb 3>: <bb 4>: # ivtmp.550_154 = PHI <ivtmp.550_150(3), ivtmp.550_13(2)> _59 = (struct snd_mask *) ivtmp.550_154; memset (_59, 255, 8); ivtmp.550_150 = ivtmp.550_154 + 32; if (ivtmp.550_150 != _142) goto <bb 3>; else goto <bb 48>; The transformation looks fine to me, right?