https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91994
H.J. Lu <hjl.tools at gmail dot com> changed: What |Removed |Added ---------------------------------------------------------------------------- Summary|[10 Regression] r276327 |[10 Regression] r276327 |miscompiled 557.xz_r in |breaks -mvzeroupper |SPEC CPU 2017 | --- Comment #5 from H.J. Lu <hjl.tools at gmail dot com> --- [hjl@gnu-skx-1 gcc]$ cat bad.c #include <stdlib.h> #include <immintrin.h> __m256i x1, x2, x3; __attribute__ ((noinline)) static void foo (void) { x1 = x2; } void bar (void) { __m256i x = x1; foo (); x3 = x; } __attribute__ ((noinline)) int main (void) { __m256i x = _mm256_set1_epi8 (3); x1 = x; bar (); if (__builtin_memcmp (&x3, &x, sizeof (x))) abort (); return 0; } [hjl@gnu-skx-1 gcc]$ ./xgcc -B./ -march=skylake -O2 bad.c ./a[hjl@gnu-skx-1 gcc]$ ./a.out Aborted [hjl@gnu-skx-1 gcc]$ ./xgcc -B./ -march=skylake -O2 bad.c -S [hjl@gnu-skx-1 gcc]$ cat bad.s .file "bad.c" .text .p2align 4 .type foo, @function foo: .LFB5339: .cfi_startproc vmovdqa x2(%rip), %ymm0 vmovdqa %ymm0, x1(%rip) vzeroupper <<< Clobber the upper bits of YMM1. ret .cfi_endproc .LFE5339: .size foo, .-foo .p2align 4 .globl bar .type bar, @function bar: .LFB5340: .cfi_startproc pushq %rbp .cfi_def_cfa_offset 16 .cfi_offset 6, -16 vmovdqa x1(%rip), %ymm1 movq %rsp, %rbp .cfi_def_cfa_register 6 andq $-32, %rsp call foo vmovdqa %ymm1, x3(%rip) vzeroupper leave .cfi_def_cfa 7, 8 ret .cfi_endproc .LFE5340: .size bar, .-bar .section .text.startup,"ax",@progbits .p2align 4 .globl main .type main, @function main: .LFB5341: .cfi_startproc pushq %rbp .cfi_def_cfa_offset 16 .cfi_offset 6, -16 movabsq $217020518514230019, %rax movq %rsp, %rbp .cfi_def_cfa_register 6 andq $-32, %rsp subq $32, %rsp vmovdqa .LC0(%rip), %ymm1 vmovdqa %ymm1, (%rsp) vmovdqa %ymm1, x1(%rip) call foo vmovdqa %ymm1, x3(%rip) movq x3+8(%rip), %rdx xorq (%rsp), %rax xorq 8(%rsp), %rdx orq %rax, %rdx jne .L6 movq x3+24(%rip), %rdx movq x3+16(%rip), %rax xorq 24(%rsp), %rdx xorq 16(%rsp), %rax orq %rax, %rdx je .L9 .L6: vzeroupper call abort .p2align 4,,10 .p2align 3 .L9: xorl %eax, %eax vzeroupper leave .cfi_def_cfa 7, 8 ret .cfi_endproc .LFE5341: .size main, .-main .comm x3,32,32 .comm x2,32,32 .comm x1,32,32 .section .rodata.cst32,"aM",@progbits,32 .align 32 .LC0: .quad 217020518514230019 .quad 217020518514230019 .quad 217020518514230019 .quad 217020518514230019 .ident "GCC: (GNU) 10.0.0 20191003 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-skx-1 gcc]$