https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88713
--- Comment #46 from H.J. Lu <hjl.tools at gmail dot com> --- We generate sqrtps for scalar sqrtf: [hjl@gnu-skx-1 pr88713]$ cat s.i extern float sqrtf(float x); float rsqrt(float r) { return sqrtf (r); } [hjl@gnu-skx-1 pr88713]$ gcc -Ofast -S s.i [hjl@gnu-skx-1 pr88713]$ cat s.s .file "s.i" .text .p2align 4,,15 .globl rsqrt .type rsqrt, @function rsqrt: .LFB0: .cfi_startproc sqrtss %xmm0, %xmm0 ret .cfi_endproc .LFE0: .size rsqrt, .-rsqrt .ident "GCC: (GNU) 8.2.1 20190109 (Red Hat 8.2.1-7)" .section .note.GNU-stack,"",@progbits [hjl@gnu-skx-1 pr88713]$ But why don't we generate sqrtps for vector sqrtf? [hjl@gnu-skx-1 pr88713]$ cat y.i extern float sqrtf(float x); void rsqrt(float* restrict r, float* restrict a){ for (int i = 0; i < 16; i++){ r[i] = sqrtf(a[i]); } } [hjl@gnu-skx-1 pr88713]$ gcc -S -Ofast y.i [hjl@gnu-skx-1 pr88713]$ cat y.s .file "y.i" .text .p2align 4,,15 .globl rsqrt .type rsqrt, @function rsqrt: .LFB0: .cfi_startproc movups (%rsi), %xmm1 pxor %xmm2, %xmm2 movaps .LC0(%rip), %xmm4 movaps %xmm2, %xmm3 rsqrtps %xmm1, %xmm0 cmpneqps %xmm1, %xmm3 movaps %xmm1, %xmm5 andps %xmm3, %xmm0 movaps .LC1(%rip), %xmm3 mulps %xmm0, %xmm5 mulps %xmm5, %xmm0 mulps %xmm3, %xmm5 movaps %xmm0, %xmm1 movups 16(%rsi), %xmm0 addps %xmm4, %xmm1 mulps %xmm5, %xmm1 movaps %xmm2, %xmm5 cmpneqps %xmm0, %xmm5 movups %xmm1, (%rdi) rsqrtps %xmm0, %xmm1 andps %xmm5, %xmm1 movaps %xmm2, %xmm5 mulps %xmm1, %xmm0 mulps %xmm0, %xmm1 mulps %xmm3, %xmm0 addps %xmm4, %xmm1 mulps %xmm0, %xmm1 movups 32(%rsi), %xmm0 cmpneqps %xmm0, %xmm5 movups %xmm1, 16(%rdi) rsqrtps %xmm0, %xmm1 andps %xmm5, %xmm1 mulps %xmm1, %xmm0 mulps %xmm0, %xmm1 mulps %xmm3, %xmm0 addps %xmm4, %xmm1 mulps %xmm0, %xmm1 movups %xmm1, 32(%rdi) movups 48(%rsi), %xmm1 rsqrtps %xmm1, %xmm0 cmpneqps %xmm1, %xmm2 andps %xmm2, %xmm0 mulps %xmm0, %xmm1 mulps %xmm1, %xmm0 mulps %xmm3, %xmm1 addps %xmm4, %xmm0 mulps %xmm1, %xmm0 movups %xmm0, 48(%rdi) ret .cfi_endproc .LFE0: .size rsqrt, .-rsqrt .section .rodata.cst16,"aM",@progbits,16 .align 16 .LC0: .long 3225419776 .long 3225419776 .long 3225419776 .long 3225419776 .align 16 .LC1: .long 3204448256 .long 3204448256 .long 3204448256 .long 3204448256 .ident "GCC: (GNU) 8.2.1 20190109 (Red Hat 8.2.1-7)" .section .note.GNU-stack,"",@progbits [hjl@gnu-skx-1 pr88713]$