Package: gcc Version: 4:4.9.2-2 I am observing what appears to be gcc miscompiling a memcpy call. In the following transcript, note that the memcpy call should be writing to the memory range [0x9ca17df, 0x9ca17e7), and yet the bytes in range [0x9ca17e7, 0x9ca17e9) also seem to get modified.
----- $ uname -a Linux packer-debian-8-amd64 3.16.0-4-amd64 #1 SMP Debian 3.16.7-ckt25-1 (2016-03-06) x86_64 GNU/Linux $ cat memcpytestcase.c #include <stdint.h> #include <stdio.h> #include <string.h> int main() { uint8_t *mem = malloc(4096); uint8_t *nrenames = (uint8_t*)mem, *mcp, *mcode; uint32_t renames[4] = {0, 0, 0x1234, 0}; mcode = mcp = mem + 2015; mcode[0] = mcode[1] = 0; *nrenames = 2; printf("Before %p: %02x %02x\n", mcode, mcode[0], mcode[1]); if (*nrenames) { unsigned sz = *nrenames * 4; mcp -= sz; printf("memcpy(%p, %p, %u);\n", mcp, renames, sz); memcpy(mcp, renames, sz); } printf("After %p: %02x %02x\n", mcode, mcode[0], mcode[1]); return 0; } $ gcc -m32 -O2 -march=i686 memcpytestcase.c && ./a.out Before 0x9ca17e7: 00 00 memcpy(0x9ca17df, 0xffec6a20, 8); After 0x9ca17e7: 34 12 ----- For reference, the asm output (via the -S flag to gcc) appears to be the following. If there are at least 8 bytes to copy (in this case, there are exactly 8 bytes to copy), then the code at L33 comes into play. Said code aligns the destination pointer (%edx) via L36 and L37, and then copies 8 bytes at a time via the loop at L6. The copy loop always executes for at least one iteration, which leads to a problem if there were initially exactly 8 bytes to copy and the alignment logic already handled some of those 8. ----- ... call printf // The "Before" printf movzbl (%ebx), %eax testb %al, %al jne .L32 .L2: movzbl 2016(%ebx), %eax movl %esi, 4(%esp) movl $.LC2, (%esp) movl %eax, 12(%esp) movzbl 2015(%ebx), %eax movl %eax, 8(%esp) call printf // The "After" printf ... .L32: .cfi_restore_state leal 0(,%eax,4), %ecx movl %esi, %edx subl %ecx, %edx leal 32(%esp), %edi movl %ecx, 12(%esp) movl %edx, 4(%esp) movl %edi, 8(%esp) movl $.LC1, (%esp) movl %ecx, 28(%esp) movl %edx, 24(%esp) call printf movl 28(%esp), %ecx movl 24(%esp), %edx movl %ecx, %eax movl %edi, %ecx cmpl $8, %eax jnb .L33 .L3: xorl %edi, %edi testb $4, 28(%esp) jne .L34 .L8: testb $2, 28(%esp) jne .L35 .L9: testb $1, 28(%esp) je .L2 movzbl (%ecx,%edi), %eax movb %al, (%edx,%edi) jmp .L2 .L35: movzwl (%ecx,%edi), %eax movw %ax, (%edx,%edi) addl $2, %edi jmp .L9 .L34: movl (%ecx), %edi movl %edi, (%edx) movl $4, %edi jmp .L8 .L33: testb $1, %dl jne .L36 .L4: testb $2, %dl jne .L37 .L5: movl 28(%esp), %edi andl $-8, %edi movl %edi, 24(%esp) xorl %edi, %edi .L6: movl (%ecx,%edi), %eax movl %eax, (%edx,%edi) movl 4(%ecx,%edi), %eax movl %eax, 4(%edx,%edi) addl $8, %edi cmpl 24(%esp), %edi jb .L6 addl %edi, %edx addl %edi, %ecx jmp .L3 .L37: movzwl (%ecx), %edi addl $2, %edx addl $2, %ecx movw %di, -2(%edx) subl $2, 28(%esp) jmp .L5 .L36: movzbl (%edi), %ecx incl %edx movb %cl, -1(%edx) leal 33(%esp), %ecx decl 28(%esp) jmp .L4 -----