On Tue, Mar 06, 2007 at 11:24:06AM +0100, Bert Wesarg wrote:
> Hello,
> 
> Gleb Natapov wrote:
> > If it does this after opal_atomic_lock() (which is explicit memory
> > barrier) then it is broken.
> Than, gcc 4.1.1 on the amd64 architecture is broken:
And can you repeat the test please, but make "test" variable to be global
to tell compiler that it can be actually accessed by more then one
thread.

> 
> The test-cases were compiled in the test/asm directory, with -O3
> 
> Bert
> 
> 

> #define OMPI_BUILDING 0
> #include "ompi_config.h"
> 
> #include "opal/sys/atomic.h"
> 
> static opal_atomic_lock_t lock = { { OPAL_ATOMIC_UNLOCKED } };
> 
> int
> main(int argc, char *argv[])
> {
>     int test = (argc == 1);
> 
>     __asm__ ("# first if\n");
>     if (1 == test) {
>         return 1;
>     }
>     __asm__ ("# lock\n");
>     opal_atomic_lock(&lock);
> 
>     __asm__ ("# second if\n");
>     if (1 == test) {
>         __asm__ ("# if unlock\n");
>         opal_atomic_unlock(&lock);
>         return 2;
>     }
> 
>     test = 1;
>     __asm__ ("# unlock\n");
>     opal_atomic_unlock(&lock);
> 
>     return 0;
> }
> 

>       .file   "double_check.c"
>       .text
>       .p2align 4,,15
> .globl main
>       .type   main, @function
> main:
> .LFB30:
> #APP
>       # first if
> 
> #NO_APP
>       decl    %edi
>       movl    $1, %eax
>       je      .L4
> #APP
>       # lock
> 
>       .p2align 4,,7
> #NO_APP
> .L5:
>       xorl    %edx, %edx
>       movl    $1, %ecx
>       movl    %edx, %eax
> #APP
>       lock; cmpxchgl %ecx,lock(%rip)   
>       sete     %dl      
>       
> #NO_APP
>       testb   %dl, %dl
>       jne     .L13
>       .p2align 4,,7
> .L9:
>       movl    lock(%rip), %eax
>       decl    %eax
>       je      .L9
>       jmp     .L5
> .L13:
> #APP
>       # second if
> 
>       # unlock
> 
> #NO_APP
>       movl    $0, lock(%rip)
> .L4:
>       rep ; ret
> .LFE30:
>       .size   main, .-main
>       .local  lock
>       .comm   lock,4,4
>       .section        .eh_frame,"a",@progbits
> .Lframe1:
>       .long   .LECIE1-.LSCIE1
> .LSCIE1:
>       .long   0x0
>       .byte   0x1
>       .string "zR"
>       .uleb128 0x1
>       .sleb128 -8
>       .byte   0x10
>       .uleb128 0x1
>       .byte   0x3
>       .byte   0xc
>       .uleb128 0x7
>       .uleb128 0x8
>       .byte   0x90
>       .uleb128 0x1
>       .align 8
> .LECIE1:
> .LSFDE1:
>       .long   .LEFDE1-.LASFDE1
> .LASFDE1:
>       .long   .LASFDE1-.Lframe1
>       .long   .LFB30
>       .long   .LFE30-.LFB30
>       .uleb128 0x0
>       .align 8
> .LEFDE1:
>       .ident  "GCC: (GNU) 4.1.1"
>       .section        .note.GNU-stack,"",@progbits

> #define OMPI_BUILDING 0
> #include "ompi_config.h"
> 
> #include "opal/sys/atomic.h"
> 
> static opal_atomic_lock_t lock = { { OPAL_ATOMIC_UNLOCKED } };
> 
> int
> main(int argc, char *argv[])
> {
>     volatile int test = (argc == 1);
> 
>     __asm__ ("# first if\n");
>     if (1 == test) {
>         return 1;
>     }
>     __asm__ ("# lock\n");
>     opal_atomic_lock(&lock);
> 
>     __asm__ ("# second if\n");
>     if (1 == test) {
>         __asm__ ("# if unlock\n");
>         opal_atomic_unlock(&lock);
>         return 2;
>     }
> 
>     test = 1;
>     __asm__ ("# unlock\n");
>     opal_atomic_unlock(&lock);
> 
>     return 0;
> }
> 

>       .file   "double_check_volatile.c"
>       .text
>       .p2align 4,,15
> .globl main
>       .type   main, @function
> main:
> .LFB30:
>       xorl    %eax, %eax
>       cmpl    $1, %edi
>       sete    %al
>       movl    %eax, -4(%rsp)
> #APP
>       # first if
> 
> #NO_APP
>       movl    -4(%rsp), %eax
>       movl    $1, %edx
>       decl    %eax
>       je      .L4
> #APP
>       # lock
> 
>       .p2align 4,,7
> #NO_APP
> .L5:
>       xorl    %edx, %edx
>       movl    $1, %ecx
>       movl    %edx, %eax
> #APP
>       lock; cmpxchgl %ecx,lock(%rip)   
>       sete     %dl      
>       
> #NO_APP
>       testb   %dl, %dl
>       jne     .L15
>       .p2align 4,,7
> .L11:
>       movl    lock(%rip), %eax
>       decl    %eax
>       je      .L11
>       jmp     .L5
> .L15:
> #APP
>       # second if
> 
> #NO_APP
>       movl    -4(%rsp), %eax
>       decl    %eax
>       jne     .L8
> #APP
>       # if unlock
> 
> #NO_APP
>       movl    $0, lock(%rip)
>       movl    $2, %edx
> .L4:
>       movl    %edx, %eax
>       ret
> .L8:
>       movl    $1, -4(%rsp)
> #APP
>       # unlock
> 
> #NO_APP
>       xorl    %edx, %edx
>       movl    $0, lock(%rip)
>       jmp     .L4
> .LFE30:
>       .size   main, .-main
>       .local  lock
>       .comm   lock,4,4
>       .section        .eh_frame,"a",@progbits
> .Lframe1:
>       .long   .LECIE1-.LSCIE1
> .LSCIE1:
>       .long   0x0
>       .byte   0x1
>       .string "zR"
>       .uleb128 0x1
>       .sleb128 -8
>       .byte   0x10
>       .uleb128 0x1
>       .byte   0x3
>       .byte   0xc
>       .uleb128 0x7
>       .uleb128 0x8
>       .byte   0x90
>       .uleb128 0x1
>       .align 8
> .LECIE1:
> .LSFDE1:
>       .long   .LEFDE1-.LASFDE1
> .LASFDE1:
>       .long   .LASFDE1-.Lframe1
>       .long   .LFB30
>       .long   .LFE30-.LFB30
>       .uleb128 0x0
>       .align 8
> .LEFDE1:
>       .ident  "GCC: (GNU) 4.1.1"
>       .section        .note.GNU-stack,"",@progbits

> _______________________________________________
> devel mailing list
> de...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/devel

--
                        Gleb.

Reply via email to