https://gcc.gnu.org/bugzilla/show_bug.cgi?id=34678
--- Comment #48 from Thomas Koenig <tkoenig at gcc dot gnu.org> --- Clang gets this right, even without the pragma; the original test case is compiled to pushq %r14 pushq %rbx subq $24, %rsp movq %rsi, %r14 movq %rdi, %rbx movsd %xmm1, 16(%rsp) # 8-byte Spill movsd %xmm0, 8(%rsp) # 8-byte Spill movl $1024, %edi # imm = 0x400 callq fesetround@PLT movsd 8(%rsp), %xmm0 # 8-byte Reload divsd 16(%rsp), %xmm0 # 8-byte Folded Reload movsd %xmm0, (%rbx) movl $2048, %edi # imm = 0x800 callq fesetround@PLT movsd 8(%rsp), %xmm0 # 8-byte Reload divsd 16(%rsp), %xmm0 # 8-byte Folded Reload movsd %xmm0, (%r14) addq $24, %rsp popq %rbx popq %r14 retq