https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68923
H.J. Lu <hjl.tools at gmail dot com> changed: What |Removed |Added ---------------------------------------------------------------------------- Status|UNCONFIRMED |RESOLVED Resolution|--- |FIXED Target Milestone|--- |9.0 --- Comment #3 from H.J. Lu <hjl.tools at gmail dot com> --- Fixed for GCC 9: [hjl@gnu-cfl-1 gcc]cat x.c #include <immintrin.h> #include <stdint.h> #define USE_MOVQ __m256 load_bytes_to_m256(uint8_t *p) { #ifdef USE_MOVQ // compiles to an actual movq then pmovzx xmm,xmm with gcc -O3 __m128i small_load = _mm_cvtsi64_si128( *(uint64_t*)p ); #else // loadu compiles to a 128b load with gcc -O0, potentially segfaulting __m128i small_load = _mm_loadu_si128( (__m128i*)p ); #endif __m256i intvec = _mm256_cvtepu8_epi32( small_load ); return _mm256_cvtepi32_ps(intvec); } [hjl@gnu-cfl-1 gcc]$ ./xgcc -B./ -S -O3 x.c -march=haswell [hjl@gnu-cfl-1 gcc]$ cat x.s .file "x.c" .text .p2align 4 .globl load_bytes_to_m256 .type load_bytes_to_m256, @function load_bytes_to_m256: .LFB5186: .cfi_startproc vpmovzxbd (%rdi), %ymm0 vcvtdq2ps %ymm0, %ymm0 ret .cfi_endproc .LFE5186: .size load_bytes_to_m256, .-load_bytes_to_m256 .ident "GCC: (GNU) 9.0.0 20190118 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-cfl-1 gcc]$