It didn't work. Let's try with this small complete application:
#include <stdint.h>
#define SMPLOCK "lock;"
static inline int opal_atomic_cmpset_32( volatile int32_t *addr,
int32_t oldval, int32_t newval)
{
unsigned char ret;
__asm__ __volatile__ (
SMPLOCK "cmpxchgl %1,%2 \n\t"
"sete %0 \n\t"
: "=qm" (ret)
: "q"(newval), "m"(*addr), "a"(oldval)
: "memory");
return (int)ret;
}
int main(int argc, char* argv[] )
{
int32_t value = 0, oldval = 0, delta = 1;
int32_t* addr = &value;
do {
oldval = *addr;
} while (0 == opal_atomic_cmpset_32(addr, oldval, oldval + delta));
return (oldval + delta);
}
Thanks,
george.
On Jun 8, 2010, at 14:42 , Jeff Squyres wrote:
> Look at my output -- I did...
>
> On Jun 8, 2010, at 10:40 AM, George Bosilca wrote:
>
>> Still no good, the opal_atomic_cmpset_32 is not inlined. Try to add -O3 to
>> your command line, this helped for gcc.
>>
>> Thanks,
>> george.
>>
>> On Jun 8, 2010, at 14:14 , Jeff Squyres wrote:
>>
>>> On Jun 8, 2010, at 9:53 AM, George Bosilca wrote:
>>>
>>>> As you can see there is no explicit call, the opal_atomic_cmpset_32 is
>>>> really inlined. I think the problem is that you didn't specify the -O3
>>>> flag on your command line.
>>>
>>> Ah, you wanted me to compile the OMPI code itself and send you the
>>> assembly. That's not what you asked for. :-)
>>>
>>> (I just took the code you sent in the mail, stuffed it into george.c, and
>>> compiled that with -s -- outside of the context of the Open MPI code tree)
>>>
>>> Here's the new output. It still didn't inline, but you can see the code
>>> for the _cmpset function:
>>>
>>> -----
>>> [7:13] svbu-mpi:~/tmp % cat george.c
>>>
>>> #include <stdint.h>
>>>
>>> #include "opal/sys/atomic.h"
>>>
>>> int foo(void) {
>>> int32_t oldval, delta;
>>> int32_t *addr = 0;
>>> do {
>>> oldval = *addr;
>>> } while (0 == opal_atomic_cmpset_32(addr, oldval, oldval + delta));
>>> return (oldval + delta);
>>> }
>>>
>>> [7:13] svbu-mpi:~/tmp % pgcc -O3 -I /home/jsquyres/svn/ompi4
>>> -I/home/jsquyres/svn/ompi4/opal/include -c -s george.c
>>> [7:13] svbu-mpi:~/tmp % cat george.s
>>> .file "george.c"
>>> .version "01.01"
>>> ## PGC 7.0 -opt 1
>>> ## PGC 06/08/2010 05:10:04
>>> ## pgcc george.c -c -S
>>> ## /opt/pgi/7.0.7/linux86-64/7.0-7/bin/pgc
>>> ## george.c -opt 1 -terse 1 -inform warn -x 119 0xa10000 -x 122 0x40 -x 123
>>> 0x1000
>>> ## -x 127 4 -x 127 16 -x 19 0x400000 -x 28 0x40000 -x 70 0x8000 -x 122 1
>>> -quad
>>> ## -x 59 4 -x 59 4 -tp p7-64 -astype 0 -stdinc
>>> /opt/pgi/7.0.7/linux86-64/7.0-7/include:/usr/local/include:/usr/lib/gcc/x86_64-redhat-linux/4.1.2/include:/usr/lib/gcc/x86_64-redhat-linux/4.1.2/include:/usr/include
>>> ## -def unix -def __unix -def __unix__ -def linux -def __linux -def
>>> __linux__
>>> ## -def __NO_MATH_INLINES -def __x86_64__ -def
>>> __LONG_MAX__=9223372036854775807L
>>> ## -def __SIZE_TYPE__=unsigned long int -def __PTRDIFF_TYPE__=long int -def
>>> __THROW=
>>> ## -def __extension__= -def __amd64__ -def __SSE__ -def __MMX__ -def
>>> __SSE2__
>>> ## -def __SSE3__ -predicate #machine(x86_64) #lint(off) #system(posix)
>>> #cpu(x86_64)
>>> ## -cmdline +pgcc george.c -c -S -x 123 4 -x 123 0x80000000 -alwaysinline
>>> /opt/pgi/7.0.7/linux86-64/7.0-7/lib/libintrinsics.il 4
>>> ## -asm george.s
>>> ## lineno: 3
>>> .text
>>> .align 16
>>> .globl foo
>>> foo:
>>> ..Dcfb0:
>>> pushq %rbp
>>> ..Dcfi0:
>>> movq %rsp, %rbp
>>> ..Dcfi1:
>>> subq $16, %rsp
>>> ..EN1:
>>> ## lineno: 5
>>> movq $0, -8(%rbp)
>>> .p2align 4,,3
>>> .LB157:
>>> ## lineno: 6
>>> movq -8(%rbp), %rdi
>>> movl (%rdi), %esi
>>> movl %esi, -12(%rbp)
>>> movl -16(%rbp), %edx
>>> addl %esi, %edx
>>> xorl %eax, %eax
>>> call opal_atomic_cmpset_32
>>> testl %eax, %eax
>>> je .LB157
>>> movl -16(%rbp), %eax
>>> addl -12(%rbp), %eax
>>> ## lineno: 10
>>> leave
>>> ret
>>> .type foo,@function
>>> .size foo,.-foo
>>> ..Dcfe0:
>>> __fooEND:
>>> .section .pgi_trace
>>> .align 8
>>> .quad foo ## address of routine
>>> .quad __fooEND - foo ## size of routine
>>> .2byte 0 ## flags for future use
>>> .2byte 3 ## length of following string
>>> ## name:foo:
>>> .byte 0x66,0x6f,0x6f,0x00
>>> .data
>>> .globl opal_atomic_cmpset_32
>>> .section .debug_frame
>>> ..Dcieb0:
>>> .4byte ..Dciee0-..Dcieb0-4 ## CIE length
>>> .4byte 0xffffffff ## CIE ID
>>> .byte 0x1 ## CIE version
>>> .byte 0x0 ## no augmentation
>>> .byte 0x1 ## ULEB128 1, code alignment factor
>>> .byte 0x78 ## SLEB128 -8, data alignment factor
>>> .byte 0x10 ## return address column
>>> .byte 0xc ## DW_CFA_def_cfa (col 7)
>>> .byte 0x7 ## ULEB128 7
>>> .byte 0x8 ## ULEB128 8
>>> .byte 0x90 ## DW_CFA_offset (col 16)
>>> .byte 0x1 ## ULEB128 1
>>> .align 8
>>> ..Dciee0:
>>> .4byte ..Dfdee0-..Dfdeb0 ## FDE length
>>> ..Dfdeb0:
>>> .4byte ..Dcieb0 ## CIE pointer
>>> .quad ..Dcfb0 ## initial location
>>> .quad ..Dcfe0-..Dcfb0 ## address range
>>> .byte 0x4 ## DW_CFA_advance_loc4
>>> .4byte ..Dcfi0-..Dcfb0
>>> .byte 0xe ## DW_CFA_def_cfa_offset
>>> .byte 0x10 ## ULEB128 16
>>> .byte 0x86 ## DW_CFA_offset (col 6)
>>> .byte 0x2 ## ULEB128 2
>>> .byte 0x4 ## DW_CFA_advance_loc4
>>> .4byte ..Dcfi1-..Dcfi0
>>> .byte 0xd ## DW_CFA_def_cfa_register (col 6)
>>> .byte 0x6 ## ULEB128 6
>>> .align 8
>>> ..Dfdee0:
>>> .ident "PGC 7.0-7"
>>> [7:13] svbu-mpi:~/tmp %
>>> -----
>>>
>>> --
>>> Jeff Squyres
>>> [email protected]
>>> For corporate legal information go to:
>>> http://www.cisco.com/web/about/doing_business/legal/cri/
>>>
>>>
>>> _______________________________________________
>>> devel mailing list
>>> [email protected]
>>> http://www.open-mpi.org/mailman/listinfo.cgi/devel
>>
>>
>> _______________________________________________
>> devel mailing list
>> [email protected]
>> http://www.open-mpi.org/mailman/listinfo.cgi/devel
>>
>
>
> --
> Jeff Squyres
> [email protected]
> For corporate legal information go to:
> http://www.cisco.com/web/about/doing_business/legal/cri/
>
>
> _______________________________________________
> devel mailing list
> [email protected]
> http://www.open-mpi.org/mailman/listinfo.cgi/devel