https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91994

H.J. Lu <hjl.tools at gmail dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
            Summary|[10 Regression] r276327     |[10 Regression] r276327
                   |miscompiled 557.xz_r in     |breaks -mvzeroupper
                   |SPEC CPU 2017               |

--- Comment #5 from H.J. Lu <hjl.tools at gmail dot com> ---
[hjl@gnu-skx-1 gcc]$ cat bad.c
#include <stdlib.h>
#include <immintrin.h>

__m256i x1, x2, x3;

__attribute__ ((noinline))
static void
foo (void)
{
  x1 = x2;
}

void
bar (void)
{
  __m256i x = x1;
  foo ();
  x3 = x;
}

__attribute__ ((noinline))
int
main (void)
{
  __m256i x = _mm256_set1_epi8 (3);
  x1 = x;
  bar ();
  if (__builtin_memcmp (&x3, &x, sizeof (x)))
    abort ();
  return 0;
}
[hjl@gnu-skx-1 gcc]$ ./xgcc -B./ -march=skylake  -O2  bad.c 
./a[hjl@gnu-skx-1 gcc]$ ./a.out 
Aborted
[hjl@gnu-skx-1 gcc]$ ./xgcc -B./ -march=skylake  -O2  bad.c -S
[hjl@gnu-skx-1 gcc]$ cat bad.s
        .file   "bad.c"
        .text
        .p2align 4
        .type   foo, @function
foo:
.LFB5339:
        .cfi_startproc
        vmovdqa x2(%rip), %ymm0
        vmovdqa %ymm0, x1(%rip)
        vzeroupper <<< Clobber the upper bits of YMM1.
        ret
        .cfi_endproc
.LFE5339:
        .size   foo, .-foo
        .p2align 4
        .globl  bar
        .type   bar, @function
bar:
.LFB5340:
        .cfi_startproc
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset 6, -16
        vmovdqa x1(%rip), %ymm1
        movq    %rsp, %rbp
        .cfi_def_cfa_register 6
        andq    $-32, %rsp
        call    foo
        vmovdqa %ymm1, x3(%rip)
        vzeroupper
        leave
        .cfi_def_cfa 7, 8
        ret
        .cfi_endproc
.LFE5340:
        .size   bar, .-bar
        .section        .text.startup,"ax",@progbits
        .p2align 4
        .globl  main
        .type   main, @function
main:
.LFB5341:
        .cfi_startproc
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset 6, -16
        movabsq $217020518514230019, %rax
        movq    %rsp, %rbp
        .cfi_def_cfa_register 6
        andq    $-32, %rsp
        subq    $32, %rsp
        vmovdqa .LC0(%rip), %ymm1
        vmovdqa %ymm1, (%rsp)
        vmovdqa %ymm1, x1(%rip)
        call    foo
        vmovdqa %ymm1, x3(%rip)
        movq    x3+8(%rip), %rdx
        xorq    (%rsp), %rax
        xorq    8(%rsp), %rdx
        orq     %rax, %rdx
        jne     .L6
        movq    x3+24(%rip), %rdx
        movq    x3+16(%rip), %rax
        xorq    24(%rsp), %rdx
        xorq    16(%rsp), %rax
        orq     %rax, %rdx
        je      .L9
.L6:
        vzeroupper
        call    abort
        .p2align 4,,10
        .p2align 3
.L9:
        xorl    %eax, %eax
        vzeroupper
        leave
        .cfi_def_cfa 7, 8
        ret
        .cfi_endproc
.LFE5341:
        .size   main, .-main
        .comm   x3,32,32
        .comm   x2,32,32
        .comm   x1,32,32
        .section        .rodata.cst32,"aM",@progbits,32
        .align 32
.LC0:
        .quad   217020518514230019
        .quad   217020518514230019
        .quad   217020518514230019
        .quad   217020518514230019
        .ident  "GCC: (GNU) 10.0.0 20191003 (experimental)"
        .section        .note.GNU-stack,"",@progbits
[hjl@gnu-skx-1 gcc]$

Reply via email to