Hello! Ondrej Certik <[EMAIL PROTECTED]> writes:
> $ ./a.out
> -4.1974624032366689e+117
> -8.4657370748010221e-47
> 4.9581771393902231e+163
>
> and with -O:
>
> $ gcc -W -Wall -O quot.c
> $ ./a.out
> -4.1974624032366689e+117
> -8.4657370748010221e-47
> 4.9581771393902237e+163
>
> produces different results in the last digit. The "-O" case is correct.
I think both results *are* correct. Long explanation follows.
$ gcc -O2 -march=k8 -mfpmath=sse -save-temps -o quot-sse quot.c
$ mv quot.s quot-sse.s
$ gcc -O2 -march=k8 -mfpmath=387 -save-temps -o quot-387 quot.c
$ mv quot.s quot-387.s
$ cat quot-387.s
.file "quot.c"
.section .rodata.str1.1,"aMS",@progbits,1
.LC2:
.string "%.16e\n%.16e\n%.16e\n"
.text
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB12:
subq $40, %rsp
.LCFI0:
movabsq $-2856793040191571536, %rax
movl $.LC2, %edi
movq %rax, 32(%rsp)
movabsq $-5305541054711142669, %rax
movq %rax, 24(%rsp)
movl $3, %eax
fldl 32(%rsp)
fldl 24(%rsp)
fdivrp %st, %st(1)
fstpl 16(%rsp)
movlpd 16(%rsp), %xmm2
movlpd 24(%rsp), %xmm1
movlpd 32(%rsp), %xmm0
call printf
xorl %eax, %eax
addq $40, %rsp
ret
.LFE12:
.size main, .-main
.section .eh_frame,"a",@progbits
.Lframe1:
.long .LECIE1-.LSCIE1
.LSCIE1:
.long 0x0
.byte 0x1
.string "zR"
.uleb128 0x1
.sleb128 -8
.byte 0x10
.uleb128 0x1
.byte 0x3
.byte 0xc
.uleb128 0x7
.uleb128 0x8
.byte 0x90
.uleb128 0x1
.align 8
.LECIE1:
.LSFDE1:
.long .LEFDE1-.LASFDE1
.LASFDE1:
.long .LASFDE1-.Lframe1
.long .LFB12
.long .LFE12-.LFB12
.uleb128 0x0
.byte 0x4
.long .LCFI0-.LFB12
.byte 0xe
.uleb128 0x30
.align 8
.LEFDE1:
.ident "GCC: (GNU) 4.1.2 20061115 (prerelease) (Debian 4.1.1-21)"
.section .note.GNU-stack,"",@progbits
$ cat quot-sse.s
.file "quot.c"
.section .rodata.str1.1,"aMS",@progbits,1
.LC2:
.string "%.16e\n%.16e\n%.16e\n"
.text
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB12:
subq $40, %rsp
.LCFI0:
movabsq $-2856793040191571536, %rax
movl $.LC2, %edi
movq %rax, 32(%rsp)
movabsq $-5305541054711142669, %rax
movq %rax, 24(%rsp)
movl $3, %eax
movlpd 32(%rsp), %xmm0
movlpd 24(%rsp), %xmm1
divsd %xmm1, %xmm0
movsd %xmm0, 16(%rsp)
movlpd 16(%rsp), %xmm2
movlpd 24(%rsp), %xmm1
movlpd 32(%rsp), %xmm0
call printf
xorl %eax, %eax
addq $40, %rsp
ret
.LFE12:
.size main, .-main
.section .eh_frame,"a",@progbits
.Lframe1:
.long .LECIE1-.LSCIE1
.LSCIE1:
.long 0x0
.byte 0x1
.string "zR"
.uleb128 0x1
.sleb128 -8
.byte 0x10
.uleb128 0x1
.byte 0x3
.byte 0xc
.uleb128 0x7
.uleb128 0x8
.byte 0x90
.uleb128 0x1
.align 8
.LECIE1:
.LSFDE1:
.long .LEFDE1-.LASFDE1
.LASFDE1:
.long .LASFDE1-.Lframe1
.long .LFB12
.long .LFE12-.LFB12
.uleb128 0x0
.byte 0x4
.long .LCFI0-.LFB12
.byte 0xe
.uleb128 0x30
.align 8
.LEFDE1:
.ident "GCC: (GNU) 4.1.2 20061115 (prerelease) (Debian 4.1.1-21)"
.section .note.GNU-stack,"",@progbits
$ diff quot-387.s quot-sse.s
19,22c19,22
< fldl 32(%rsp)
< fldl 24(%rsp)
< fdivrp %st, %st(1)
< fstpl 16(%rsp)
---
> movlpd 32(%rsp), %xmm0
> movlpd 24(%rsp), %xmm1
> divsd %xmm1, %xmm0
> movsd %xmm0, 16(%rsp)
ix87 code *looks* equivalent to SSE one. Due to different precision used by SSE
(64-bit double) and ix87 (80-bit) the results of conversion (fldl vs movlpd)
are different (because of different truncation). So, results are different too.
Nevertheless, both *are* correct (unless I'm missing something, that is).
> This only happens on 32 bits
Not really (the above asm is clearly 64-bit). This happens if you use x87 FPU
(which is default for 32 bit code). In this regime all intermediate quantities
are actually double-extended (80 bit) FP numbers. Naturally, the results can be
different. AFAIK, IEEE 754 does NOT guarantee them to be the same on every
compliant platform. In particular, conversion of the *same* decimal number
results in different binary numbers.
> I'd like to trace this bug down and figure out if it's a bug in gcc, or
> somewhere else.
I don't think it's a bug. Rather, it's a wrong expectation from the user side.
You are comparing results of FP calculation done on 3 different platforms:
ix87, sse2, and MPFR, and expect them to be the same. I think your expectation
is wrong.
That said, I'm not really an expert. Feel free to point out which requirements
of IEEE 754 are violated.
Best regards,
Alexei
--
All science is either physics or stamp collecting.
signature.asc
Description: Digital signature

