Bug#470318: i387 versus SSE versus MPFR (gcc "bug")

Alexei Sheplyakov Thu, 17 Apr 2008 03:04:42 -0700

Hello!

Ondrej Certik <[EMAIL PROTECTED]> writes:


> $ ./a.out 
> -4.1974624032366689e+117
> -8.4657370748010221e-47
> 4.9581771393902231e+163
> 
> and with -O:
> 
> $ gcc -W -Wall -O quot.c 
> $ ./a.out 
> -4.1974624032366689e+117
> -8.4657370748010221e-47
> 4.9581771393902237e+163
> 
> produces different results in the last digit. The "-O" case is correct.

I think both results *are* correct. Long explanation follows.

$ gcc -O2 -march=k8 -mfpmath=sse -save-temps -o quot-sse quot.c
$ mv quot.s quot-sse.s
$ gcc -O2 -march=k8 -mfpmath=387 -save-temps -o quot-387 quot.c
$ mv quot.s quot-387.s
$ cat quot-387.s 
        .file   "quot.c"
        .section        .rodata.str1.1,"aMS",@progbits,1
.LC2:
        .string "%.16e\n%.16e\n%.16e\n"
        .text
        .p2align 4,,15
.globl main
        .type   main, @function
main:
.LFB12:
        subq    $40, %rsp
.LCFI0:
        movabsq $-2856793040191571536, %rax
        movl    $.LC2, %edi
        movq    %rax, 32(%rsp)
        movabsq $-5305541054711142669, %rax
        movq    %rax, 24(%rsp)
        movl    $3, %eax
        fldl    32(%rsp)
        fldl    24(%rsp)
        fdivrp  %st, %st(1)
        fstpl   16(%rsp)
        movlpd  16(%rsp), %xmm2
        movlpd  24(%rsp), %xmm1
        movlpd  32(%rsp), %xmm0
        call    printf
        xorl    %eax, %eax
        addq    $40, %rsp
        ret
.LFE12:
        .size   main, .-main
        .section        .eh_frame,"a",@progbits
.Lframe1:
        .long   .LECIE1-.LSCIE1
.LSCIE1:
        .long   0x0
        .byte   0x1
        .string "zR"
        .uleb128 0x1
        .sleb128 -8
        .byte   0x10
        .uleb128 0x1
        .byte   0x3
        .byte   0xc
        .uleb128 0x7
        .uleb128 0x8
        .byte   0x90
        .uleb128 0x1
        .align 8
.LECIE1:
.LSFDE1:
        .long   .LEFDE1-.LASFDE1
.LASFDE1:
        .long   .LASFDE1-.Lframe1
        .long   .LFB12
        .long   .LFE12-.LFB12
        .uleb128 0x0
        .byte   0x4
        .long   .LCFI0-.LFB12
        .byte   0xe
        .uleb128 0x30
        .align 8
.LEFDE1:
        .ident  "GCC: (GNU) 4.1.2 20061115 (prerelease) (Debian 4.1.1-21)"
        .section        .note.GNU-stack,"",@progbits

$ cat quot-sse.s
        .file   "quot.c"
        .section        .rodata.str1.1,"aMS",@progbits,1
.LC2:
        .string "%.16e\n%.16e\n%.16e\n"
        .text
        .p2align 4,,15
.globl main
        .type   main, @function
main:
.LFB12:
        subq    $40, %rsp
.LCFI0:
        movabsq $-2856793040191571536, %rax
        movl    $.LC2, %edi
        movq    %rax, 32(%rsp)
        movabsq $-5305541054711142669, %rax
        movq    %rax, 24(%rsp)
        movl    $3, %eax
        movlpd  32(%rsp), %xmm0
        movlpd  24(%rsp), %xmm1
        divsd   %xmm1, %xmm0
        movsd   %xmm0, 16(%rsp)
        movlpd  16(%rsp), %xmm2
        movlpd  24(%rsp), %xmm1
        movlpd  32(%rsp), %xmm0
        call    printf
        xorl    %eax, %eax
        addq    $40, %rsp
        ret
.LFE12:
        .size   main, .-main
        .section        .eh_frame,"a",@progbits
.Lframe1:
        .long   .LECIE1-.LSCIE1
.LSCIE1:
        .long   0x0
        .byte   0x1
        .string "zR"
        .uleb128 0x1
        .sleb128 -8
        .byte   0x10
        .uleb128 0x1
        .byte   0x3
        .byte   0xc
        .uleb128 0x7
        .uleb128 0x8
        .byte   0x90
        .uleb128 0x1
        .align 8
.LECIE1:
.LSFDE1:
        .long   .LEFDE1-.LASFDE1
.LASFDE1:
        .long   .LASFDE1-.Lframe1
        .long   .LFB12
        .long   .LFE12-.LFB12
        .uleb128 0x0
        .byte   0x4
        .long   .LCFI0-.LFB12
        .byte   0xe
        .uleb128 0x30
        .align 8
.LEFDE1:
        .ident  "GCC: (GNU) 4.1.2 20061115 (prerelease) (Debian 4.1.1-21)"
        .section        .note.GNU-stack,"",@progbits

$ diff quot-387.s quot-sse.s
19,22c19,22
<       fldl    32(%rsp)
<       fldl    24(%rsp)
<       fdivrp  %st, %st(1)
<       fstpl   16(%rsp)
---
>       movlpd  32(%rsp), %xmm0
>       movlpd  24(%rsp), %xmm1
>       divsd   %xmm1, %xmm0
>       movsd   %xmm0, 16(%rsp)


ix87 code *looks* equivalent to SSE one. Due to different precision used by SSE
(64-bit double) and ix87 (80-bit) the results of conversion (fldl vs movlpd)
are different (because of different truncation). So, results are different too.
Nevertheless, both *are* correct (unless I'm missing something, that is).

> This only happens on 32 bits 

Not really (the above asm is clearly 64-bit). This happens if you use x87 FPU
(which is default for 32 bit code). In this regime all intermediate quantities
are actually double-extended (80 bit) FP numbers. Naturally, the results can be
different. AFAIK, IEEE 754 does NOT guarantee them to be the same on every
compliant platform. In particular, conversion of the *same* decimal number
results in different binary numbers.

> I'd like to trace this bug down and figure out if it's a bug in gcc, or
> somewhere else.

I don't think it's a bug. Rather, it's a wrong expectation from the user side.
You are comparing results of FP calculation done on 3 different platforms:
ix87, sse2, and MPFR, and expect them to be the same. I think your expectation
is wrong.

That said, I'm not really an expert. Feel free to point out which requirements
of IEEE 754 are violated.

Best regards,
        Alexei

-- 
All science is either physics or stamp collecting.

signature.asc
Description: Digital signature

Bug#470318: i387 versus SSE versus MPFR (gcc "bug")

Reply via email to