Gcc 4.4 generates an extra load in a loop:

[EMAIL PROTECTED] gcc]$ cat /tmp/b.c 
#include <tmmintrin.h>

extern __m128i src[10];
extern __m128i resdst[10];

void
foo (void)
{
  int i;

  for (i = 0; i < 10; i++)
    resdst[i] = _mm_abs_epi16 (src[i]);
}
[EMAIL PROTECTED] gcc]$ gcc -O2 -S /tmp/b.c -o old.s -mssse3
-fno-asynchronous-unwind-tables
[EMAIL PROTECTED] gcc]$ gcc --version
gcc (GCC) 4.3.0 20080428 (Red Hat 4.3.0-8)
Copyright (C) 2008 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

[EMAIL PROTECTED] gcc]$ cat old.s
        .file   "b.c"
        .text
        .p2align 4,,15
.globl foo
        .type   foo, @function
foo:
        xorl    %eax, %eax
        .p2align 4,,10
        .p2align 3
.L2:
        pabsw   src(%rax), %xmm0
        movdqa  %xmm0, resdst(%rax)
        addq    $16, %rax
        cmpq    $160, %rax
        jne     .L2
        rep
        ret
        .size   foo, .-foo
        .ident  "GCC: (GNU) 4.3.0 20080428 (Red Hat 4.3.0-8)"
        .section        .note.GNU-stack,"",@progbits
[EMAIL PROTECTED] gcc]$  ./xgcc -B./ -O2 -mssse3 -S /tmp/b.c
-fno-asynchronous-unwind-tables
[EMAIL PROTECTED] gcc]$ cat b.s
        .file   "b.c"
        .text
        .p2align 4,,15
.globl foo
        .type   foo, @function
foo:
        xorl    %eax, %eax
        .p2align 4,,10
        .p2align 3
.L2:
        movdqu  src(%rax), %xmm0
        pabsw   %xmm0, %xmm0
        movdqu  %xmm0, resdst(%rax)
        addq    $16, %rax
        cmpq    $160, %rax
        jne     .L2
        rep
        ret
        .size   foo, .-foo
        .ident  "GCC: (GNU) 4.4.0 20081006 (experimental) [trunk revision
140917]"

There are 2 problems:

1. Alignment info is lost and unaligned load is generated.
2. The load isn't needed at all.


-- 
           Summary: [4.4 Regression] Unnecessary load instruction in a loop
           Product: gcc
           Version: 4.4.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: hjl dot tools at gmail dot com


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37774

Reply via email to