Package: gcc
Version: 4:4.9.2-2

I am observing what appears to be gcc miscompiling a memcpy call. In
the following transcript, note that the memcpy call should be writing
to the memory range [0x9ca17df, 0x9ca17e7), and yet the bytes in range
[0x9ca17e7, 0x9ca17e9) also seem to get modified.

-----

$ uname -a
Linux packer-debian-8-amd64 3.16.0-4-amd64 #1 SMP Debian
3.16.7-ckt25-1 (2016-03-06) x86_64 GNU/Linux

$ cat memcpytestcase.c
#include <stdint.h>
#include <stdio.h>
#include <string.h>

int main() {
  uint8_t *mem = malloc(4096);
  uint8_t *nrenames = (uint8_t*)mem, *mcp, *mcode;
  uint32_t renames[4] = {0, 0, 0x1234, 0};
  mcode = mcp = mem + 2015;
  mcode[0] = mcode[1] = 0;
  *nrenames = 2;
  printf("Before %p: %02x %02x\n", mcode, mcode[0], mcode[1]);
  if (*nrenames) {
    unsigned sz = *nrenames * 4;
    mcp -= sz;
    printf("memcpy(%p, %p, %u);\n", mcp, renames, sz);
    memcpy(mcp, renames, sz);
  }
  printf("After  %p: %02x %02x\n", mcode, mcode[0], mcode[1]);
  return 0;
}

$ gcc -m32 -O2 -march=i686 memcpytestcase.c && ./a.out
Before 0x9ca17e7: 00 00
memcpy(0x9ca17df, 0xffec6a20, 8);
After  0x9ca17e7: 34 12

-----

For reference, the asm output (via the -S flag to gcc) appears to be
the following. If there are at least 8 bytes to copy (in this case,
there are exactly 8 bytes to copy), then the code at L33 comes into
play. Said code aligns the destination pointer (%edx) via L36 and L37,
and then copies 8 bytes at a time via the loop at L6. The copy loop
always executes for at least one iteration, which leads to a problem
if there were initially exactly 8 bytes to copy and the alignment
logic already handled some of those 8.

-----

...
        call    printf // The "Before" printf
        movzbl  (%ebx), %eax
        testb   %al, %al
        jne     .L32
.L2:
        movzbl  2016(%ebx), %eax
        movl    %esi, 4(%esp)
        movl    $.LC2, (%esp)
        movl    %eax, 12(%esp)
        movzbl  2015(%ebx), %eax
        movl    %eax, 8(%esp)
        call    printf // The "After" printf
...
.L32:
        .cfi_restore_state
        leal    0(,%eax,4), %ecx
        movl    %esi, %edx
        subl    %ecx, %edx
        leal    32(%esp), %edi
        movl    %ecx, 12(%esp)
        movl    %edx, 4(%esp)
        movl    %edi, 8(%esp)
        movl    $.LC1, (%esp)
        movl    %ecx, 28(%esp)
        movl    %edx, 24(%esp)
        call    printf
        movl    28(%esp), %ecx
        movl    24(%esp), %edx
        movl    %ecx, %eax
        movl    %edi, %ecx
        cmpl    $8, %eax
        jnb     .L33
.L3:
        xorl    %edi, %edi
        testb   $4, 28(%esp)
        jne     .L34
.L8:
        testb   $2, 28(%esp)
        jne     .L35
.L9:
        testb   $1, 28(%esp)
        je      .L2
        movzbl  (%ecx,%edi), %eax
        movb    %al, (%edx,%edi)
        jmp     .L2
.L35:
        movzwl  (%ecx,%edi), %eax
        movw    %ax, (%edx,%edi)
        addl    $2, %edi
        jmp     .L9
.L34:
        movl    (%ecx), %edi
        movl    %edi, (%edx)
        movl    $4, %edi
        jmp     .L8
.L33:
        testb   $1, %dl
        jne     .L36
.L4:
        testb   $2, %dl
        jne     .L37
.L5:
        movl    28(%esp), %edi
        andl    $-8, %edi
        movl    %edi, 24(%esp)
        xorl    %edi, %edi
.L6:
        movl    (%ecx,%edi), %eax
        movl    %eax, (%edx,%edi)
        movl    4(%ecx,%edi), %eax
        movl    %eax, 4(%edx,%edi)
        addl    $8, %edi
        cmpl    24(%esp), %edi
        jb      .L6
        addl    %edi, %edx
        addl    %edi, %ecx
        jmp     .L3
.L37:
        movzwl  (%ecx), %edi
        addl    $2, %edx
        addl    $2, %ecx
        movw    %di, -2(%edx)
        subl    $2, 28(%esp)
        jmp     .L5
.L36:
        movzbl  (%edi), %ecx
        incl    %edx
        movb    %cl, -1(%edx)
        leal    33(%esp), %ecx
        decl    28(%esp)
        jmp     .L4

-----

Reply via email to