[OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization
Fix the double-check locking[1] by defining the cls_initialized member to volatile. Greetings Bert Wesarg [1]: http://en.wikipedia.org/wiki/Double-checked_locking --- opal/class/opal_object.h |2 +- 1 files changed, 1 insertion(+), 1 deletion(-) diff --quilt old/opal/class/opal_object.h new/opal/class/opal_object.h --- old/opal/class/opal_object.h +++ new/opal/class/opal_object.h @@ -148,11 +148,11 @@ typedef void (*opal_destruct_t) (opal_ob struct opal_class_t { const char *cls_name; /**< symbolic name for class */ opal_class_t *cls_parent; /**< parent class descriptor */ opal_construct_t cls_construct; /**< class constructor */ opal_destruct_t cls_destruct; /**< class destructor */ -int cls_initialized;/**< is class initialized */ +volatile int cls_initialized; /**< is class initialized */ int cls_depth; /**< depth of class hierarchy tree */ opal_construct_t *cls_construct_array; /**< array of parent class constructors */ opal_destruct_t *cls_destruct_array; /**< array of parent class destructors */
Re: [OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization
On Tue, Mar 06, 2007 at 10:10:44AM +0100, Bert Wesarg wrote: > Fix the double-check locking[1] by defining the cls_initialized member to > volatile. > > Greetings > > Bert Wesarg > > [1]: http://en.wikipedia.org/wiki/Double-checked_locking Can you explain how the Java example from this page applies to the code in Open MPI? cls_initialized is set only after class is fully initialized and usable. > > > --- > > opal/class/opal_object.h |2 +- > 1 files changed, 1 insertion(+), 1 deletion(-) > > diff --quilt old/opal/class/opal_object.h new/opal/class/opal_object.h > --- old/opal/class/opal_object.h > +++ new/opal/class/opal_object.h > @@ -148,11 +148,11 @@ typedef void (*opal_destruct_t) (opal_ob > struct opal_class_t { > const char *cls_name; /**< symbolic name for class */ > opal_class_t *cls_parent; /**< parent class descriptor */ > opal_construct_t cls_construct; /**< class constructor */ > opal_destruct_t cls_destruct; /**< class destructor */ > -int cls_initialized;/**< is class initialized */ > +volatile int cls_initialized; /**< is class initialized */ > int cls_depth; /**< depth of class hierarchy tree */ > opal_construct_t *cls_construct_array; > /**< array of parent class constructors > */ > opal_destruct_t *cls_destruct_array; > /**< array of parent class destructors */ > ___ > devel mailing list > de...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/devel -- Gleb.
Re: [OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization
Gleb Natapov wrote: > On Tue, Mar 06, 2007 at 10:10:44AM +0100, Bert Wesarg wrote: >> Fix the double-check locking[1] by defining the cls_initialized member to >> volatile. >> >> Greetings >> >> Bert Wesarg >> >> [1]: http://en.wikipedia.org/wiki/Double-checked_locking > Can you explain how the Java example from this page applies to the code > in Open MPI? cls_initialized is set only after class is fully > initialized and usable. Sure: in opal_object.c:opal_class_initialize(): if (1 == cls->cls_initialized) { return; } opal_atomic_lock(&class_lock); /* If another thread initializing this same class came in at roughly the same time, it may have gotten the lock and initialized. So check again. */ if (1 == cls->cls_initialized) { opal_atomic_unlock(&class_lock); return; } the compiler can optimize the second if check away, without being declared as volatile. Bert > >> > >> --- >> >> opal/class/opal_object.h |2 +- >> 1 files changed, 1 insertion(+), 1 deletion(-) >> >> diff --quilt old/opal/class/opal_object.h new/opal/class/opal_object.h >> --- old/opal/class/opal_object.h >> +++ new/opal/class/opal_object.h >> @@ -148,11 +148,11 @@ typedef void (*opal_destruct_t) (opal_ob >> struct opal_class_t { >> const char *cls_name; /**< symbolic name for class */ >> opal_class_t *cls_parent; /**< parent class descriptor */ >> opal_construct_t cls_construct; /**< class constructor */ >> opal_destruct_t cls_destruct; /**< class destructor */ >> -int cls_initialized;/**< is class initialized */ >> +volatile int cls_initialized; /**< is class initialized */ >> int cls_depth; /**< depth of class hierarchy tree */ >> opal_construct_t *cls_construct_array; >> /**< array of parent class constructors >> */ >> opal_destruct_t *cls_destruct_array; >> /**< array of parent class destructors >> */ > >> ___ >> devel mailing list >> de...@open-mpi.org >> http://www.open-mpi.org/mailman/listinfo.cgi/devel > > -- > Gleb. > ___ > devel mailing list > de...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/devel
Re: [OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization
On Tue, Mar 06, 2007 at 10:44:53AM +0100, Bert Wesarg wrote: > > > Gleb Natapov wrote: > > On Tue, Mar 06, 2007 at 10:10:44AM +0100, Bert Wesarg wrote: > >> Fix the double-check locking[1] by defining the cls_initialized member to > >> volatile. > >> > >> Greetings > >> > >> Bert Wesarg > >> > >> [1]: http://en.wikipedia.org/wiki/Double-checked_locking > > Can you explain how the Java example from this page applies to the code > > in Open MPI? cls_initialized is set only after class is fully > > initialized and usable. > Sure: > > in opal_object.c:opal_class_initialize(): > > > if (1 == cls->cls_initialized) { > return; > } > opal_atomic_lock(&class_lock); > > /* If another thread initializing this same class came in at >roughly the same time, it may have gotten the lock and >initialized. So check again. */ > > if (1 == cls->cls_initialized) { > opal_atomic_unlock(&class_lock); > return; > } > > the compiler can optimize the second if check away, without being declared > as volatile. If it does this after opal_atomic_lock() (which is explicit memory barrier) then it is broken. > > Bert > > > > > >> > > > >> --- > >> > >> opal/class/opal_object.h |2 +- > >> 1 files changed, 1 insertion(+), 1 deletion(-) > >> > >> diff --quilt old/opal/class/opal_object.h new/opal/class/opal_object.h > >> --- old/opal/class/opal_object.h > >> +++ new/opal/class/opal_object.h > >> @@ -148,11 +148,11 @@ typedef void (*opal_destruct_t) (opal_ob > >> struct opal_class_t { > >> const char *cls_name; /**< symbolic name for class */ > >> opal_class_t *cls_parent; /**< parent class descriptor */ > >> opal_construct_t cls_construct; /**< class constructor */ > >> opal_destruct_t cls_destruct; /**< class destructor */ > >> -int cls_initialized;/**< is class initialized */ > >> +volatile int cls_initialized; /**< is class initialized */ > >> int cls_depth; /**< depth of class hierarchy tree */ > >> opal_construct_t *cls_construct_array; > >> /**< array of parent class > >> constructors */ > >> opal_destruct_t *cls_destruct_array; > >> /**< array of parent class > >> destructors */ > > > >> ___ > >> devel mailing list > >> de...@open-mpi.org > >> http://www.open-mpi.org/mailman/listinfo.cgi/devel > > > > -- > > Gleb. > > ___ > > devel mailing list > > de...@open-mpi.org > > http://www.open-mpi.org/mailman/listinfo.cgi/devel > ___ > devel mailing list > de...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/devel -- Gleb.
Re: [OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization
Hello, Gleb Natapov wrote: > If it does this after opal_atomic_lock() (which is explicit memory > barrier) then it is broken. Than, gcc 4.1.1 on the amd64 architecture is broken: The test-cases were compiled in the test/asm directory, with -O3 Bert #define OMPI_BUILDING 0 #include "ompi_config.h" #include "opal/sys/atomic.h" static opal_atomic_lock_t lock = { { OPAL_ATOMIC_UNLOCKED } }; int main(int argc, char *argv[]) { int test = (argc == 1); __asm__ ("# first if\n"); if (1 == test) { return 1; } __asm__ ("# lock\n"); opal_atomic_lock(&lock); __asm__ ("# second if\n"); if (1 == test) { __asm__ ("# if unlock\n"); opal_atomic_unlock(&lock); return 2; } test = 1; __asm__ ("# unlock\n"); opal_atomic_unlock(&lock); return 0; } .file "double_check.c" .text .p2align 4,,15 .globl main .type main, @function main: .LFB30: #APP # first if #NO_APP decl %edi movl $1, %eax je .L4 #APP # lock .p2align 4,,7 #NO_APP .L5: xorl %edx, %edx movl $1, %ecx movl %edx, %eax #APP lock; cmpxchgl %ecx,lock(%rip) sete %dl #NO_APP testb %dl, %dl jne .L13 .p2align 4,,7 .L9: movl lock(%rip), %eax decl %eax je .L9 jmp .L5 .L13: #APP # second if # unlock #NO_APP movl $0, lock(%rip) .L4: rep ; ret .LFE30: .size main, .-main .local lock .comm lock,4,4 .section .eh_frame,"a",@progbits .Lframe1: .long .LECIE1-.LSCIE1 .LSCIE1: .long 0x0 .byte 0x1 .string "zR" .uleb128 0x1 .sleb128 -8 .byte 0x10 .uleb128 0x1 .byte 0x3 .byte 0xc .uleb128 0x7 .uleb128 0x8 .byte 0x90 .uleb128 0x1 .align 8 .LECIE1: .LSFDE1: .long .LEFDE1-.LASFDE1 .LASFDE1: .long .LASFDE1-.Lframe1 .long .LFB30 .long .LFE30-.LFB30 .uleb128 0x0 .align 8 .LEFDE1: .ident "GCC: (GNU) 4.1.1" .section .note.GNU-stack,"",@progbits #define OMPI_BUILDING 0 #include "ompi_config.h" #include "opal/sys/atomic.h" static opal_atomic_lock_t lock = { { OPAL_ATOMIC_UNLOCKED } }; int main(int argc, char *argv[]) { volatile int test = (argc == 1); __asm__ ("# first if\n"); if (1 == test) { return 1; } __asm__ ("# lock\n"); opal_atomic_lock(&lock); __asm__ ("# second if\n"); if (1 == test) { __asm__ ("# if unlock\n"); opal_atomic_unlock(&lock); return 2; } test = 1; __asm__ ("# unlock\n"); opal_atomic_unlock(&lock); return 0; } .file "double_check_volatile.c" .text .p2align 4,,15 .globl main .type main, @function main: .LFB30: xorl %eax, %eax cmpl $1, %edi sete %al movl %eax, -4(%rsp) #APP # first if #NO_APP movl -4(%rsp), %eax movl $1, %edx decl %eax je .L4 #APP # lock .p2align 4,,7 #NO_APP .L5: xorl %edx, %edx movl $1, %ecx movl %edx, %eax #APP lock; cmpxchgl %ecx,lock(%rip) sete %dl #NO_APP testb %dl, %dl jne .L15 .p2align 4,,7 .L11: movl lock(%rip), %eax decl %eax je .L11 jmp .L5 .L15: #APP # second if #NO_APP movl -4(%rsp), %eax decl %eax jne .L8 #APP # if unlock #NO_APP movl $0, lock(%rip) movl $2, %edx .L4: movl %edx, %eax ret .L8: movl $1, -4(%rsp) #APP # unlock #NO_APP xorl %edx, %edx movl $0, lock(%rip) jmp .L4 .LFE30: .size main, .-main .local lock .comm lock,4,4 .section .eh_frame,"a",@progbits .Lframe1: .long .LECIE1-.LSCIE1 .LSCIE1: .long 0x0 .byte 0x1 .string "zR" .uleb128 0x1 .sleb128 -8 .byte 0x10 .uleb128 0x1 .byte 0x3 .byte 0xc .uleb128 0x7 .uleb128 0x8 .byte 0x90 .uleb128 0x1 .align 8 .LECIE1: .LSFDE1: .long .LEFDE1-.LASFDE1 .LASFDE1: .long .LASFDE1-.Lframe1 .long .LFB30 .long .LFE30-.LFB30 .uleb128 0x0 .align 8 .LEFDE1: .ident "GCC: (GNU) 4.1.1" .section .note.GNU-stack,"",@progbits
Re: [OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization
On Tue, Mar 06, 2007 at 11:24:06AM +0100, Bert Wesarg wrote: > Hello, > > Gleb Natapov wrote: > > If it does this after opal_atomic_lock() (which is explicit memory > > barrier) then it is broken. > Than, gcc 4.1.1 on the amd64 architecture is broken: And can you repeat the test please, but make "test" variable to be global to tell compiler that it can be actually accessed by more then one thread. > > The test-cases were compiled in the test/asm directory, with -O3 > > Bert > > > #define OMPI_BUILDING 0 > #include "ompi_config.h" > > #include "opal/sys/atomic.h" > > static opal_atomic_lock_t lock = { { OPAL_ATOMIC_UNLOCKED } }; > > int > main(int argc, char *argv[]) > { > int test = (argc == 1); > > __asm__ ("# first if\n"); > if (1 == test) { > return 1; > } > __asm__ ("# lock\n"); > opal_atomic_lock(&lock); > > __asm__ ("# second if\n"); > if (1 == test) { > __asm__ ("# if unlock\n"); > opal_atomic_unlock(&lock); > return 2; > } > > test = 1; > __asm__ ("# unlock\n"); > opal_atomic_unlock(&lock); > > return 0; > } > > .file "double_check.c" > .text > .p2align 4,,15 > .globl main > .type main, @function > main: > .LFB30: > #APP > # first if > > #NO_APP > decl%edi > movl$1, %eax > je .L4 > #APP > # lock > > .p2align 4,,7 > #NO_APP > .L5: > xorl%edx, %edx > movl$1, %ecx > movl%edx, %eax > #APP > lock; cmpxchgl %ecx,lock(%rip) > sete %dl > > #NO_APP > testb %dl, %dl > jne .L13 > .p2align 4,,7 > .L9: > movllock(%rip), %eax > decl%eax > je .L9 > jmp .L5 > .L13: > #APP > # second if > > # unlock > > #NO_APP > movl$0, lock(%rip) > .L4: > rep ; ret > .LFE30: > .size main, .-main > .local lock > .comm lock,4,4 > .section.eh_frame,"a",@progbits > .Lframe1: > .long .LECIE1-.LSCIE1 > .LSCIE1: > .long 0x0 > .byte 0x1 > .string "zR" > .uleb128 0x1 > .sleb128 -8 > .byte 0x10 > .uleb128 0x1 > .byte 0x3 > .byte 0xc > .uleb128 0x7 > .uleb128 0x8 > .byte 0x90 > .uleb128 0x1 > .align 8 > .LECIE1: > .LSFDE1: > .long .LEFDE1-.LASFDE1 > .LASFDE1: > .long .LASFDE1-.Lframe1 > .long .LFB30 > .long .LFE30-.LFB30 > .uleb128 0x0 > .align 8 > .LEFDE1: > .ident "GCC: (GNU) 4.1.1" > .section.note.GNU-stack,"",@progbits > #define OMPI_BUILDING 0 > #include "ompi_config.h" > > #include "opal/sys/atomic.h" > > static opal_atomic_lock_t lock = { { OPAL_ATOMIC_UNLOCKED } }; > > int > main(int argc, char *argv[]) > { > volatile int test = (argc == 1); > > __asm__ ("# first if\n"); > if (1 == test) { > return 1; > } > __asm__ ("# lock\n"); > opal_atomic_lock(&lock); > > __asm__ ("# second if\n"); > if (1 == test) { > __asm__ ("# if unlock\n"); > opal_atomic_unlock(&lock); > return 2; > } > > test = 1; > __asm__ ("# unlock\n"); > opal_atomic_unlock(&lock); > > return 0; > } > > .file "double_check_volatile.c" > .text > .p2align 4,,15 > .globl main > .type main, @function > main: > .LFB30: > xorl%eax, %eax > cmpl$1, %edi > sete%al > movl%eax, -4(%rsp) > #APP > # first if > > #NO_APP > movl-4(%rsp), %eax > movl$1, %edx > decl%eax > je .L4 > #APP > # lock > > .p2align 4,,7 > #NO_APP > .L5: > xorl%edx, %edx > movl$1, %ecx > movl%edx, %eax > #APP > lock; cmpxchgl %ecx,lock(%rip) > sete %dl > > #NO_APP > testb %dl, %dl > jne .L15 > .p2align 4,,7 > .L11: > movllock(%rip), %eax > decl%eax > je .L11 > jmp .L5 > .L15: > #APP > # second if > > #NO_APP > movl-4(%rsp), %eax > decl%eax > jne .L8 > #APP > # if unlock > > #NO_APP > movl$0, lock(%rip) > movl$2, %edx > .L4: > movl%edx, %eax > ret > .L8: > movl$1, -4(%rsp) > #APP > # unlock > > #NO_APP > xorl%edx, %edx > movl$0, lock(%rip) > jmp .L4 > .LFE30: > .size main, .-main > .local lock > .comm lock,4,4 > .section.eh_frame,"a",@progbits > .Lframe1: > .long .LECIE1-.LSCIE1 > .LSCIE1: > .long 0x0 > .byte 0x1 > .string "zR" > .uleb128 0x1 > .sleb128 -8 > .byte 0x10 > .uleb128 0x1 > .byte 0x3 > .byte 0xc > .uleb128 0x7 > .uleb128 0x8 > .byte 0x90 > .uleb128 0x1 > .align 8
Re: [OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization
Gleb Natapov wrote: > On Tue, Mar 06, 2007 at 11:24:06AM +0100, Bert Wesarg wrote: >> Hello, >> >> Gleb Natapov wrote: >>> If it does this after opal_atomic_lock() (which is explicit memory >>> barrier) then it is broken. >> Than, gcc 4.1.1 on the amd64 architecture is broken: > And can you repeat the test please, but make "test" variable to be global > to tell compiler that it can be actually accessed by more then one > thread. > ahh, now both produce nearly the same code. but who hurts it to declare the variable volatile? Bert --- double_check.s 2007-03-06 12:07:12.103478512 +0100 +++ double_check_volatile.s 2007-03-06 12:06:20.675315485 +0100 @@ -1,4 +1,4 @@ - .file "double_check.c" + .file "double_check_volatile.c" .text .p2align 4,,15 .globl main @@ -13,8 +13,9 @@ # first if #NO_APP - decl %eax + movl test(%rip), %eax movl $1, %edx + decl %eax je .L4 #APP # lock @@ -43,7 +44,8 @@ # second if #NO_APP - cmpl $1, test(%rip) + movl test(%rip), %eax + decl %eax jne .L8 #APP # if unlock
Re: [OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization
On Tue, Mar 06, 2007 at 12:13:16PM +0100, Bert Wesarg wrote: > Gleb Natapov wrote: > > On Tue, Mar 06, 2007 at 11:24:06AM +0100, Bert Wesarg wrote: > >> Hello, > >> > >> Gleb Natapov wrote: > >>> If it does this after opal_atomic_lock() (which is explicit memory > >>> barrier) then it is broken. > >> Than, gcc 4.1.1 on the amd64 architecture is broken: > > And can you repeat the test please, but make "test" variable to be global > > to tell compiler that it can be actually accessed by more then one > > thread. > > > > ahh, now both produce nearly the same code. > > but who hurts it to declare the variable volatile? > The disadvantage of using volatile is that compiler will produce inefficient code for every access to a variable and not only specific access that you care about, so it is better to do proper memory barrier in places where ordering is important and let compiler optimize all other cases. But what is even more important is that in most cases volatile is not enough. CPU may reorder accesses to memory and volatile doesn't help, so volatile is not a replacement for memory barriers. You can find Linus rants about volatile usage in LKML archives for entertainment reading. Volatile is overused in Open MPI anyway. > Bert > --- double_check.s2007-03-06 12:07:12.103478512 +0100 > +++ double_check_volatile.s 2007-03-06 12:06:20.675315485 +0100 > @@ -1,4 +1,4 @@ > - .file "double_check.c" > + .file "double_check_volatile.c" > .text > .p2align 4,,15 > .globl main > @@ -13,8 +13,9 @@ > # first if > > #NO_APP > - decl%eax > + movltest(%rip), %eax > movl$1, %edx > + decl%eax > je .L4 > #APP > # lock > @@ -43,7 +44,8 @@ > # second if > > #NO_APP > - cmpl$1, test(%rip) > + movltest(%rip), %eax > + decl%eax > jne .L8 > #APP > # if unlock > ___ > devel mailing list > de...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/devel -- Gleb.
[OMPI devel] 1.2rc1: hangs non deterministic with simple MPI_thread_init()
Hello, I followed the call to test the rc1, but a simple test programm hangs, but non deterministic. all but one orted have quit. but no cpu eating from orted or mpirun. The test system is a xeon cluster with myrinet interconnect. the outouts are splited into two mails (100kb limit) Bert Wesarg /* -*- c -*- */ /* $ ~/opt/openmpi-1.2rc1/bin/mpicc -std=gnu99 -W -Wall -Wno-unused-parameter -O2 -c -o mpi_threaded.o mpi_threaded.c $ ~/opt/openmpi-1.2rc1/bin/mpicc -std=gnu99 mpi_threaded.o -o mpi_threaded $ cat host8_11 compute-0-8.local slots=2 max-slots=4 compute-0-9.local slots=2 max-slots=4 compute-0-10.local slots=2 max-slots=4 compute-0-11.local slots=2 max-slots=4 $ ~/opt/openmpi-1.2rc1/bin/mpirun -np 4 --hostfile host8_11 ./mpi_threaded thread level: MPI_THREAD_MULTIPLE thread level: MPI_THREAD_MULTIPLE thread level: MPI_THREAD_MULTIPLE thread level: MPI_THREAD_MULTIPLE # hangs none deterministic, must kill mpirun with kill -KILL $ ~/opt/openmpi-1.2rc1/bin/mpirun -np 2 ./mpi_threaded -- [0,1,0]: Myrinet/GM on host master.halc.informatik.uni-halle.de was unable to find any NICs. Another transport will be used instead, although this may result in lower performance. -- -- [0,1,1]: Myrinet/GM on host master.halc.informatik.uni-halle.de was unable to find any NICs. Another transport will be used instead, although this may result in lower performance. -- thread level: MPI_THREAD_MULTIPLE thread level: MPI_THREAD_MULTIPLE $ */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #define ENUM_TO_STRING(e) [e] = #e static const char *const thread_names[] = { ENUM_TO_STRING(MPI_THREAD_SINGLE), ENUM_TO_STRING(MPI_THREAD_FUNNELED), ENUM_TO_STRING(MPI_THREAD_SERIALIZED), ENUM_TO_STRING(MPI_THREAD_MULTIPLE) }; int main(int ac, char *av[]) { int thread_level; MPI_Init_thread(&ac, &av, MPI_THREAD_MULTIPLE, &thread_level); printf("thread level: %s\n", thread_names[thread_level]); MPI_Finalize(); return 0; } ompi-out1.tar.gz Description: GNU Zip compressed data
[OMPI devel] 1.2rc1: hangs non deterministic with simple MPI_thread_init() (Part 2)
Part 2 of the output tar ompi-out2.tar.gz Description: GNU Zip compressed data
Re: [OMPI devel] 1.2rc1: hangs non deterministic with simple MPI_thread_init()
Hi Bert Wesarg, Thank you for your quick testing of 1.2rc1. 1.2 is expected to fail when using MPI_THREAD_MULTIPLE. I suspect that a working and tested MPI_THREAD_MULTIPLE will be one of our goals for 1.3. On 3/6/07, Bert Wesarg wrote: Hello, I followed the call to test the rc1, but a simple test programm hangs, but non deterministic. all but one orted have quit. but no cpu eating from orted or mpirun. The test system is a xeon cluster with myrinet interconnect. the outouts are splited into two mails (100kb limit) Bert Wesarg ___ devel mailing list de...@open-mpi.org http://www.open-mpi.org/mailman/listinfo.cgi/devel -- Tim Mattox - http://homepage.mac.com/tmattox/ tmat...@gmail.com || timat...@open-mpi.org I'm a bright... http://www.the-brights.net/
Re: [OMPI devel] 1.2rc1: hangs non deterministic with simple MPI_thread_init()
Hi, this is realy sad, version 1.1.2 works quite good with threads (multiple threads which starts mpi requests), only 1 of 10 (or even less) kills with a SIGSEGV. And this this simple test program works even longer. Bert Tim Mattox wrote: > Hi Bert Wesarg, > Thank you for your quick testing of 1.2rc1. 1.2 is expected to fail when > using MPI_THREAD_MULTIPLE. I suspect that a working and tested > MPI_THREAD_MULTIPLE will be one of our goals for 1.3. > > On 3/6/07, Bert Wesarg wrote: >> Hello, >> >> I followed the call to test the rc1, but a simple test programm hangs, but >> non deterministic. all but one orted have quit. but no cpu eating from >> orted or mpirun. >> >> The test system is a xeon cluster with myrinet interconnect. >> >> the outouts are splited into two mails (100kb limit) >> >> Bert Wesarg >> >> ___ >> devel mailing list >> de...@open-mpi.org >> http://www.open-mpi.org/mailman/listinfo.cgi/devel >> >> > >
Re: [OMPI devel] 1.2rc1: hangs non deterministic with simple MPI_thread_init()
Unfortunately, MPI_THREAD_MULTIPLE has never received a lot of testing in any version of OMPI (including v1.1). Various members tested the bozo cases (e.g., ensure we don't double lock, etc.), and periodically tested/debugged simple multi-threaded apps, but not much more than that. As such, it's probably pretty lucky that v1.1 works with THREAD_MULTIPLE. Indeed, v1.2 has changed quite a bit since v1.2, and only the same level of THREAD_MULTIPLE testing has occurred (so far) -- checking for the bozo cases. We do expect to improve this over the next several months; as Tim mentioned, for the v1.3 timeframe. On Mar 6, 2007, at 9:06 AM, Bert Wesarg wrote: Hi, this is realy sad, version 1.1.2 works quite good with threads (multiple threads which starts mpi requests), only 1 of 10 (or even less) kills with a SIGSEGV. And this this simple test program works even longer. Bert Tim Mattox wrote: Hi Bert Wesarg, Thank you for your quick testing of 1.2rc1. 1.2 is expected to fail when using MPI_THREAD_MULTIPLE. I suspect that a working and tested MPI_THREAD_MULTIPLE will be one of our goals for 1.3. On 3/6/07, Bert Wesarg wrote: Hello, I followed the call to test the rc1, but a simple test programm hangs, but non deterministic. all but one orted have quit. but no cpu eating from orted or mpirun. The test system is a xeon cluster with myrinet interconnect. the outouts are splited into two mails (100kb limit) Bert Wesarg ___ devel mailing list de...@open-mpi.org http://www.open-mpi.org/mailman/listinfo.cgi/devel ___ devel mailing list de...@open-mpi.org http://www.open-mpi.org/mailman/listinfo.cgi/devel -- Jeff Squyres Server Virtualization Business Unit Cisco Systems
Re: [OMPI devel] [PATCH] opal/class/opal_object.c: save some memory for the ctor/dtor arrays
Bert, Thanks for this patch. I apply it in the trunk as revision r13939. Thanks again. george. On Mar 5, 2007, at 12:10 PM, Bert Wesarg wrote: This saves some memory for the constructors and destructors arrays of a class by counting the constructors and destructors while we are counting the cls_depth. And the reversion of the constructor array can now be done without an extra loop. The patch is only compile tested. Greetings Bert Wesarg Index: opal/class/opal_object.c === --- opal/class/opal_object.c(revision 13923) +++ opal/class/opal_object.c(working copy) @@ -71,7 +71,9 @@ { opal_class_t *c; opal_construct_t* cls_construct_array; -opal_destruct_t* cls_destruct_array; +opal_destruct_t* cls_destruct_array; +int cls_construct_array_count; +int cls_destruct_array_count; int i; assert(cls); @@ -95,33 +97,51 @@ /* * First calculate depth of class hierarchy + * And the number of constructors and destructors */ cls->cls_depth = 0; +cls_construct_array_count = 0; +cls_destruct_array_count = 0; for (c = cls; c; c = c->cls_parent) { -cls->cls_depth += 1; +if( NULL != c->cls_construct ) { +cls_construct_array_count++; +} +if( NULL != c->cls_destruct ) { +cls_destruct_array_count++; +} +cls->cls_depth++; } /* * Allocate arrays for hierarchy of constructors and destructors + * plus for each a NULL-sentinel */ cls->cls_construct_array = -(void (**)(opal_object_t*))malloc((cls->cls_depth + 1)* - sizeof(opal_construct_t) * 2 ); +(void (**)(opal_object_t*))malloc ((cls_construct_array_count + + cls_destruct_array_count + 2) * + sizeof(opal_construct_t) ); if (NULL == cls->cls_construct_array) { perror("Out of memory"); exit(-1); } -cls->cls_destruct_array = cls->cls_construct_array + cls- >cls_depth + 1; -cls_construct_array = cls->cls_construct_array; -cls_destruct_array = cls->cls_destruct_array; - +cls->cls_destruct_array = +cls->cls_construct_array + cls_construct_array_count + 1; + +/* + * The constructor array is reversed, so start at the end + */ + +cls_construct_array = cls->cls_construct_array + cls_construct_array_count; +cls_destruct_array = cls->cls_destruct_array; + c = cls; +*cls_construct_array = NULL; /* end marker for the constructors */ for (i = 0; i < cls->cls_depth; i++) { if( NULL != c->cls_construct ) { +--cls_construct_array; *cls_construct_array = c->cls_construct; -cls_construct_array++; } if( NULL != c->cls_destruct ) { *cls_destruct_array = c->cls_destruct; @@ -129,16 +149,7 @@ } c = c->cls_parent; } -*cls_construct_array = NULL; /* end marker for the constructors */ *cls_destruct_array = NULL; /* end marker for the destructors */ -/* Now we have to invert the constructors */ -for( i = 0, --cls_construct_array; - cls_construct_array > (cls->cls_construct_array + i); - i++, cls_construct_array-- ) { -opal_construct_t temp_construct = *cls_construct_array; -*cls_construct_array = cls->cls_construct_array[i]; -cls->cls_construct_array[i] = temp_construct; -} cls->cls_initialized = 1; save_class(cls); ___ devel mailing list de...@open-mpi.org http://www.open-mpi.org/mailman/listinfo.cgi/devel "Half of what I say is meaningless; but I say it so that the other half may reach you" Kahlil Gibran
Re: [OMPI devel] [PATCH] opal/class/opal_object.c: replace the classes array with a linked list
Bert, Your previous patch saves some memory while the current one use some more. I prefer to keep the array, as it's not only out of any critical path but it's not performance related. An array can do the job without any problems and use less memory than the linked list. Thanks, george. On Mar 5, 2007, at 12:33 PM, Bert Wesarg wrote: This replaces the classes array (which holds all ctor/dtor arrays) with a linked list. This patch is compile tested only. Greetings Bert Wesarg Index: opal/class/opal_object.c === --- opal/class/opal_object.c(revision 13923) +++ opal/class/opal_object.c(working copy) @@ -51,17 +51,12 @@ * Local variables */ static opal_atomic_lock_t class_lock = { { OPAL_ATOMIC_UNLOCKED } }; -static void** classes = NULL; -static int num_classes = 0; -static int max_classes = 0; -static const int increment = 10; +static void *classes; - /* * Local functions */ static void save_class(opal_class_t *cls); -static void expand_array(void); /* @@ -72,6 +67,7 @@ opal_class_t *c; opal_construct_t* cls_construct_array; opal_destruct_t* cls_destruct_array; +void *base_pointer; int i; assert(cls); @@ -104,15 +100,18 @@ /* * Allocate arrays for hierarchy of constructors and destructors + * Plus one void pointer for the classes list */ -cls->cls_construct_array = -(void (**)(opal_object_t*))malloc((cls->cls_depth + 1)* - sizeof(opal_construct_t) * 2 ); -if (NULL == cls->cls_construct_array) { +base_pointer = malloc((cls->cls_depth + 1) * sizeof (opal_construct_t) * 2 + + sizeof(void *)); +if (NULL == base_pointer) { perror("Out of memory"); exit(-1); } +/* The arrays begin behind the void pointer */ +cls->cls_destruct_array = +(void (**)(opal_object_t*))((char *)base_pointer + sizeof (void *)); cls->cls_destruct_array = cls->cls_construct_array + cls- >cls_depth + 1; cls_construct_array = cls->cls_construct_array; cls_destruct_array = cls->cls_destruct_array; @@ -154,18 +153,10 @@ */ int opal_class_finalize(void) { -int i; - -if (NULL != classes) { -for (i = 0; i < num_classes; ++i) { -if (NULL != classes[i]) { -free(classes[i]); -} -} +while (NULL != classes) { +void *next = *(void **)classes; free(classes); -classes = NULL; -num_classes = 0; -max_classes = 0; +classes = next; } return OPAL_SUCCESS; @@ -174,27 +165,7 @@ static void save_class(opal_class_t *cls) { -if (num_classes >= max_classes) { -expand_array(); -} - -classes[num_classes] = cls->cls_construct_array; -++num_classes; +void *base_pointer = (char *)cls->cls_construct_array - sizeof (void *); +*(void **)base_pointer = classes; +classes = base_pointer; } - - -static void expand_array(void) -{ -int i; - -max_classes += increment; -classes = (void**)realloc(classes, sizeof(void *) * max_classes); -if (NULL == classes) { -perror("class malloc failed"); -exit(-1); -} -for (i = num_classes; i < max_classes; ++i) { -classes[i] = NULL; -} -} - ___ devel mailing list de...@open-mpi.org http://www.open-mpi.org/mailman/listinfo.cgi/devel "Half of what I say is meaningless; but I say it so that the other half may reach you" Kahlil Gibran
Re: [OMPI devel] [PATCH] opal/class/opal_object.c: save some memory for the ctor/dtor arrays
Hello, thanks, but I was in preparation to submit a superseded patch, which is more intrusive to the class object system. Bert George Bosilca wrote: > Bert, > > Thanks for this patch. I apply it in the trunk as revision r13939. > Thanks again. > >george. > > On Mar 5, 2007, at 12:10 PM, Bert Wesarg wrote: > >> This saves some memory for the constructors and destructors arrays >> of a >> class by counting the constructors and destructors while we are >> counting >> the cls_depth. And the reversion of the constructor array can now >> be done >> without an extra loop. >> >> The patch is only compile tested. >> >> Greetings >> >> Bert Wesarg >> >> Index: opal/class/opal_object.c >> === >> --- opal/class/opal_object.c (revision 13923) >> +++ opal/class/opal_object.c (working copy) >> @@ -71,7 +71,9 @@ >> { >> opal_class_t *c; >> opal_construct_t* cls_construct_array; >> -opal_destruct_t* cls_destruct_array; >> +opal_destruct_t* cls_destruct_array; >> +int cls_construct_array_count; >> +int cls_destruct_array_count; >> int i; >> >> assert(cls); >> @@ -95,33 +97,51 @@ >> >> /* >> * First calculate depth of class hierarchy >> + * And the number of constructors and destructors >> */ >> >> cls->cls_depth = 0; >> +cls_construct_array_count = 0; >> +cls_destruct_array_count = 0; >> for (c = cls; c; c = c->cls_parent) { >> -cls->cls_depth += 1; >> +if( NULL != c->cls_construct ) { >> +cls_construct_array_count++; >> +} >> +if( NULL != c->cls_destruct ) { >> +cls_destruct_array_count++; >> +} >> +cls->cls_depth++; >> } >> >> /* >> * Allocate arrays for hierarchy of constructors and destructors >> + * plus for each a NULL-sentinel >> */ >> >> cls->cls_construct_array = >> -(void (**)(opal_object_t*))malloc((cls->cls_depth + 1)* >> - sizeof(opal_construct_t) >> * 2 ); >> +(void (**)(opal_object_t*))malloc >> ((cls_construct_array_count + >> + >> cls_destruct_array_count + 2) * >> + sizeof(opal_construct_t) ); >> if (NULL == cls->cls_construct_array) { >> perror("Out of memory"); >> exit(-1); >> } >> -cls->cls_destruct_array = cls->cls_construct_array + cls- >>> cls_depth + 1; >> -cls_construct_array = cls->cls_construct_array; >> -cls_destruct_array = cls->cls_destruct_array; >> - >> +cls->cls_destruct_array = >> +cls->cls_construct_array + cls_construct_array_count + 1; >> + >> +/* >> + * The constructor array is reversed, so start at the end >> + */ >> + >> +cls_construct_array = cls->cls_construct_array + >> cls_construct_array_count; >> +cls_destruct_array = cls->cls_destruct_array; >> + >> c = cls; >> +*cls_construct_array = NULL; /* end marker for the >> constructors */ >> for (i = 0; i < cls->cls_depth; i++) { >> if( NULL != c->cls_construct ) { >> +--cls_construct_array; >> *cls_construct_array = c->cls_construct; >> -cls_construct_array++; >> } >> if( NULL != c->cls_destruct ) { >> *cls_destruct_array = c->cls_destruct; >> @@ -129,16 +149,7 @@ >> } >> c = c->cls_parent; >> } >> -*cls_construct_array = NULL; /* end marker for the >> constructors */ >> *cls_destruct_array = NULL; /* end marker for the destructors */ >> -/* Now we have to invert the constructors */ >> -for( i = 0, --cls_construct_array; >> - cls_construct_array > (cls->cls_construct_array + i); >> - i++, cls_construct_array-- ) { >> -opal_construct_t temp_construct = *cls_construct_array; >> -*cls_construct_array = cls->cls_construct_array[i]; >> -cls->cls_construct_array[i] = temp_construct; >> -} >> >> cls->cls_initialized = 1; >> save_class(cls); >> ___ >> devel mailing list >> de...@open-mpi.org >> http://www.open-mpi.org/mailman/listinfo.cgi/devel > > "Half of what I say is meaningless; but I say it so that the other > half may reach you" >Kahlil Gibran > > > ___ > devel mailing list > de...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/devel
[OMPI devel] [RFC][PATCH] option to use libumem for opal object system
Hello, this gives the option to use the umem cache feature from the libumem[1] for the opal object system. It is full backward compatible to the old system. But the patch exists of more changes: (1) reorder opal_class_t, in the hope that vital members fit in the first cache line (2) a per class lock for initialization (3) the global class list is now a linked list embeded in the opal_class_t (this can be reduced to a stack/single linked list) (4) new contructors/destructors for the one time cache initialization To complile with this new feature you must configure open-mpi with "-DUSE_UMEM" in your CFLAGS, and all other needed build flags to find the header and library of lubumem (LDFLAGS, LIBS). To full use the object caching of libumem you can use the new macro OBJ_CLASS_INSTANCE_CACHE() which have arguments for the cache contructors/destructors. In the followup mail, I convert the opal_free_list_t and orte_pointer_array_t to use the cache for the initialization of the opal_mutex_t and opal_condition_t members. I have just compiled it with and without USE_UMEM but no benchmarking. Comments welcome. Greetings Bert Wesarg PS: I know that you are busy with the OMPI 1.2 release, I just want to send it out, befor I forget it. [1] solaris: built-in other: http://sourceforge.net/projects/umem --- opal/class/opal_object.c | 210 +++ opal/class/opal_object.h | 201 2 files changed, 324 insertions(+), 87 deletions(-) diff --quilt old/opal/class/opal_object.h new/opal/class/opal_object.h --- old/opal/class/opal_object.h +++ new/opal/class/opal_object.h @@ -122,10 +122,17 @@ #if OMPI_HAVE_THREAD_SUPPORT #include "opal/sys/atomic.h" #endif /* OMPI_HAVE_THREAD_SUPPORT */ +#ifdef USE_UMEM +# include +# ifndef UMEM_CACHE_NAMELEN +# define UMEM_CACHE_NAMELEN 31 +# endif +#endif + #if OMPI_ENABLE_DEBUG /* Any kind of unique ID should do the job */ #define OPAL_OBJ_MAGIC_ID ((0xdeafbeedULL << 32) + 0xdeafbeedULL) #endif @@ -144,21 +151,36 @@ typedef void (*opal_destruct_t) (opal_ob * * There should be a single instance of this descriptor for each class * definition. */ struct opal_class_t { -const char *cls_name; /**< symbolic name for class */ -opal_class_t *cls_parent; /**< parent class descriptor */ -opal_construct_t cls_construct; /**< class constructor */ -opal_destruct_t cls_destruct; /**< class destructor */ -int cls_initialized;/**< is class initialized */ -int cls_depth; /**< depth of class hierarchy tree */ +#ifdef USE_UMEM +umem_cache_t *cls_cache;/**< object cache */ +#endif +size_t cls_sizeof;/**< size of an object instance */ +opal_construct_t *cls_cache_construct_array; +/**< array of parent class cache constructors */ opal_construct_t *cls_construct_array; /**< array of parent class constructors */ -opal_destruct_t *cls_destruct_array; +opal_destruct_t *cls_destruct_array; /**< array of parent class destructors */ -size_t cls_sizeof; /**< size of an object instance */ +opal_destruct_t *cls_cache_destruct_array; +/**< array of parent class cache destructors */ +int cls_initialized;/**< is class initialized */ +int cls_depth; /**< depth of class hierarchy tree */ +const char *cls_name; /**< symbolic name for class */ +opal_class_t *cls_parent; /**< parent class descriptor */ +opal_construct_t cls_construct; /**< class constructor */ +opal_destruct_t cls_destruct; /**< class destructor */ +opal_construct_t cls_cache_construct; +/**< class object cache constructor */ +opal_destruct_t cls_cache_destruct; +/**< class cache destructor */ +opal_atomic_lock_t cls_init_lock; +/**< class init mutex */ +opal_class_t*cls_next, *cls_prev; +/**< linked list of all classes */ }; /** * For static initializations of OBJects. * @@ -198,30 +220,97 @@ struct opal_object_t { * @param NAME Name of class * @return Pointer to class descriptor */ #define OBJ_CLASS(NAME) (&(NAME ## _class)) - /** * Static initializer for a class descriptor * * @param NAME Name of class * @param PARENTName of parent class * @param CONSTRUCTOR Pointer to constructor * @param DESTRUCTORPointer to destructor * * Put this in NAME.c */ +#ifdef USE_UMEM #define OBJ_CLASS_INSTANCE(NAME, PARENT, CONSTRUCTOR, DESTRUCTOR) \ opal_class_t NAME ## _class = { \ +NULL,
[OMPI devel] [RFC][PATCH] use the new object caching (examples)
--- opal/class/opal_free_list.c | 24 ++-- 1 files changed, 18 insertions(+), 6 deletions(-) diff --quilt old/opal/class/opal_free_list.c new/opal/class/opal_free_list.c --- old/opal/class/opal_free_list.c +++ new/opal/class/opal_free_list.c @@ -22,23 +22,25 @@ #include "opal/sys/cache.h" static void opal_free_list_construct(opal_free_list_t* fl); static void opal_free_list_destruct(opal_free_list_t* fl); +static void opal_free_list_cache_construct(opal_free_list_t* fl); +static void opal_free_list_cache_destruct(opal_free_list_t* fl); -OBJ_CLASS_INSTANCE(opal_free_list_t, - opal_list_t, - opal_free_list_construct, - opal_free_list_destruct); +OBJ_CLASS_INSTANCE_CACHE(opal_free_list_t, + opal_list_t, + opal_free_list_construct, + opal_free_list_destruct, + opal_free_list_cache_construct, + opal_free_list_cache_destruct); OBJ_CLASS_INSTANCE(opal_free_list_item_t, opal_list_item_t, NULL, NULL); static void opal_free_list_construct(opal_free_list_t* fl) { -OBJ_CONSTRUCT(&fl->fl_lock, opal_mutex_t); -OBJ_CONSTRUCT(&fl->fl_condition, opal_condition_t); fl->fl_max_to_alloc = 0; fl->fl_num_allocated = 0; fl->fl_num_per_alloc = 0; fl->fl_num_waiting = 0; fl->fl_elem_size = 0; @@ -55,10 +57,20 @@ static void opal_free_list_destruct(opal OBJ_DESTRUCT(item); free(item); } OBJ_DESTRUCT(&fl->fl_allocations); +} + +static void opal_free_list_cache_construct(opal_free_list_t* fl) +{ +OBJ_CONSTRUCT(&fl->fl_lock, opal_mutex_t); +OBJ_CONSTRUCT(&fl->fl_condition, opal_condition_t); +} + +static void opal_free_list_cache_destruct(opal_free_list_t* fl) +{ OBJ_DESTRUCT(&fl->fl_condition); OBJ_DESTRUCT(&fl->fl_lock); } int opal_free_list_init( --- orte/class/orte_pointer_array.c | 23 --- 1 files changed, 20 insertions(+), 3 deletions(-) diff --quilt old/orte/class/orte_pointer_array.c new/orte/class/orte_pointer_array.c --- old/orte/class/orte_pointer_array.c +++ new/orte/class/orte_pointer_array.c @@ -27,25 +27,28 @@ #include "orte/class/orte_pointer_array.h" #include "opal/util/output.h" static void orte_pointer_array_construct(orte_pointer_array_t *); static void orte_pointer_array_destruct(orte_pointer_array_t *); +static void orte_pointer_array_cache_construct(orte_pointer_array_t *); +static void orte_pointer_array_cache_destruct(orte_pointer_array_t *); static bool grow_table(orte_pointer_array_t *table); -OBJ_CLASS_INSTANCE( +OBJ_CLASS_INSTANCE_CACHE( orte_pointer_array_t, opal_object_t, orte_pointer_array_construct, -orte_pointer_array_destruct +orte_pointer_array_destruct, +orte_pointer_array_cache_construct, +orte_pointer_array_cache_destruct ); /* * orte_pointer_array constructor */ void orte_pointer_array_construct(orte_pointer_array_t *array) { -OBJ_CONSTRUCT(&array->lock, opal_mutex_t); array->lowest_free = 0; array->number_free = 0; array->size = 0; array->max_size = 0; array->block_size = 0; @@ -59,11 +62,25 @@ void orte_pointer_array_destruct(orte_po { /* free table */ if( NULL != array->addr) { free(array->addr); } +} +/* + * orte_pointer_array cache constructor + */ +void orte_pointer_array_cache_construct(orte_pointer_array_t *array) +{ +OBJ_CONSTRUCT(&array->lock, opal_mutex_t); +} + +/* + * orte_pointer_array cache destructor + */ +void orte_pointer_array_cache_destruct(orte_pointer_array_t *array) +{ OBJ_DESTRUCT(&array->lock); } /** * initialize an array object
Re: [OMPI devel] [RFC][PATCH] option to use libumem for opal object system
This is a self fix reply > > > --- > > opal/class/opal_object.c | 210 > +++ > opal/class/opal_object.h | 201 > 2 files changed, 324 insertions(+), 87 deletions(-) > > diff --quilt old/opal/class/opal_object.h new/opal/class/opal_object.h > --- old/opal/class/opal_object.h > +++ new/opal/class/opal_object.h > @@ -122,10 +122,17 @@ > > #if OMPI_HAVE_THREAD_SUPPORT > #include "opal/sys/atomic.h" > #endif /* OMPI_HAVE_THREAD_SUPPORT */ > > +#ifdef USE_UMEM > +# include > +# ifndef UMEM_CACHE_NAMELEN > +# define UMEM_CACHE_NAMELEN 31 > +# endif > +#endif > + > #if OMPI_ENABLE_DEBUG > /* Any kind of unique ID should do the job */ > #define OPAL_OBJ_MAGIC_ID ((0xdeafbeedULL << 32) + 0xdeafbeedULL) > #endif > > @@ -144,21 +151,36 @@ typedef void (*opal_destruct_t) (opal_ob > * > * There should be a single instance of this descriptor for each class > * definition. > */ > struct opal_class_t { > -const char *cls_name; /**< symbolic name for class */ > -opal_class_t *cls_parent; /**< parent class descriptor */ > -opal_construct_t cls_construct; /**< class constructor */ > -opal_destruct_t cls_destruct; /**< class destructor */ > -int cls_initialized;/**< is class initialized */ > -int cls_depth; /**< depth of class hierarchy tree */ > +#ifdef USE_UMEM > +umem_cache_t *cls_cache;/**< object cache */ > +#endif > +size_t cls_sizeof;/**< size of an object instance */ > +opal_construct_t *cls_cache_construct_array; > +/**< array of parent class cache constructors */ > opal_construct_t *cls_construct_array; > /**< array of parent class constructors > */ > -opal_destruct_t *cls_destruct_array; > +opal_destruct_t *cls_destruct_array; > /**< array of parent class destructors */ > -size_t cls_sizeof; /**< size of an object instance */ > +opal_destruct_t *cls_cache_destruct_array; > +/**< array of parent class cache destructors */ > +int cls_initialized;/**< is class initialized */ > +int cls_depth; /**< depth of class hierarchy tree */ > +const char *cls_name; /**< symbolic name for class */ > +opal_class_t *cls_parent; /**< parent class descriptor */ > +opal_construct_t cls_construct; /**< class constructor */ > +opal_destruct_t cls_destruct; /**< class destructor */ > +opal_construct_t cls_cache_construct; > +/**< class object cache constructor */ > +opal_destruct_t cls_cache_destruct; > +/**< class cache destructor */ > +opal_atomic_lock_t cls_init_lock; > +/**< class init mutex */ > +opal_class_t*cls_next, *cls_prev; > +/**< linked list of all classes */ > }; > > /** > * For static initializations of OBJects. > * > @@ -198,30 +220,97 @@ struct opal_object_t { > * @param NAME Name of class > * @return Pointer to class descriptor > */ > #define OBJ_CLASS(NAME) (&(NAME ## _class)) > > - > /** > * Static initializer for a class descriptor > * > * @param NAME Name of class > * @param PARENTName of parent class > * @param CONSTRUCTOR Pointer to constructor > * @param DESTRUCTORPointer to destructor > * > * Put this in NAME.c > */ > +#ifdef USE_UMEM > #define OBJ_CLASS_INSTANCE(NAME, PARENT, CONSTRUCTOR, DESTRUCTOR) \ > opal_class_t NAME ## _class = { \ > +NULL, \ > +sizeof(NAME), \ > +NULL, NULL, NULL, NULL, \ > +0, 0, \ > +# NAME, \ > +OBJ_CLASS(PARENT), \ > +(opal_construct_t) CONSTRUCTOR, \ > +(opal_destruct_t) DESTRUCTOR, \ > +(opal_construct_t) NULL,\ > +(opal_destruct_t) NULL, \ > +{ { OPAL_ATOMIC_UNLOCKED } }, \ > +OBJ_CLASS(NAME), OBJ_CLASS(NAME)\ > +} > +#else > +#define OBJ_CLASS_INSTANCE(NAME, PARENT, CONSTRUCTOR, DESTRUCTOR) \ > +opal_class_t NAME ## _class = {
Re: [OMPI devel] [RFC][PATCH] option to use libumem for opal object system
Bert, Reordering the members in the opal_class structure can be discussed. In general, we don't focus on improving anything that's outside the critical path (such as the class sub-system). Even if all Open MPI objects are derived from this class, it is definitively not performance aware, and making it so don't give us any benefit. Moreover, it will make the code more difficult to read/understand and maintain. There are already some features similar with umem in Open MPI. Activate the memory debugging to get all debug features. Use the free list to get the alignment. We have a more powerful approach, as we can align not only on the whole structure, but on a particular item. These features are already in the trunk. Moreover, if any layer inside Open MPI need aligned memory, then it should allocate the objects by itself and call OBJ_CONSTRUCT instead of OBJ_NEW. I'm reluctant to include these 2 patches into the Open MPI trunk. At least not until it is proven that there is any benefit (mostly performance). However, if someone else from the community think they are fine/required, then it might happens that they will get included. Thanks, george. On Mar 6, 2007, at 12:22 PM, Bert Wesarg wrote: Hello, this gives the option to use the umem cache feature from the libumem [1] for the opal object system. It is full backward compatible to the old system. But the patch exists of more changes: (1) reorder opal_class_t, in the hope that vital members fit in the first cache line (2) a per class lock for initialization (3) the global class list is now a linked list embeded in the opal_class_t (this can be reduced to a stack/single linked list) (4) new contructors/destructors for the one time cache initialization To complile with this new feature you must configure open-mpi with "-DUSE_UMEM" in your CFLAGS, and all other needed build flags to find the header and library of lubumem (LDFLAGS, LIBS). To full use the object caching of libumem you can use the new macro OBJ_CLASS_INSTANCE_CACHE() which have arguments for the cache contructors/destructors. In the followup mail, I convert the opal_free_list_t and orte_pointer_array_t to use the cache for the initialization of the opal_mutex_t and opal_condition_t members. I have just compiled it with and without USE_UMEM but no benchmarking. Comments welcome. Greetings Bert Wesarg PS: I know that you are busy with the OMPI 1.2 release, I just want to send it out, befor I forget it. [1] solaris: built-in other: http://sourceforge.net/projects/umem --- opal/class/opal_object.c | 210 ++ + opal/class/opal_object.h | 201 +++ + 2 files changed, 324 insertions(+), 87 deletions(-) diff --quilt old/opal/class/opal_object.h new/opal/class/opal_object.h --- old/opal/class/opal_object.h +++ new/opal/class/opal_object.h @@ -122,10 +122,17 @@ #if OMPI_HAVE_THREAD_SUPPORT #include "opal/sys/atomic.h" #endif /* OMPI_HAVE_THREAD_SUPPORT */ +#ifdef USE_UMEM +# include +# ifndef UMEM_CACHE_NAMELEN +# define UMEM_CACHE_NAMELEN 31 +# endif +#endif + #if OMPI_ENABLE_DEBUG /* Any kind of unique ID should do the job */ #define OPAL_OBJ_MAGIC_ID ((0xdeafbeedULL << 32) + 0xdeafbeedULL) #endif @@ -144,21 +151,36 @@ typedef void (*opal_destruct_t) (opal_ob * * There should be a single instance of this descriptor for each class * definition. */ struct opal_class_t { -const char *cls_name; /**< symbolic name for class */ -opal_class_t *cls_parent; /**< parent class descriptor */ -opal_construct_t cls_construct; /**< class constructor */ -opal_destruct_t cls_destruct; /**< class destructor */ -int cls_initialized;/**< is class initialized */ -int cls_depth; /**< depth of class hierarchy tree */ +#ifdef USE_UMEM +umem_cache_t *cls_cache;/**< object cache */ +#endif +size_t cls_sizeof;/**< size of an object instance */ +opal_construct_t *cls_cache_construct_array; +/**< array of parent class cache constructors */ opal_construct_t *cls_construct_array; /**< array of parent class constructors */ -opal_destruct_t *cls_destruct_array; +opal_destruct_t *cls_destruct_array; /**< array of parent class destructors */ -size_t cls_sizeof; /**< size of an object instance */ +opal_destruct_t *cls_cache_destruct_array; +/**< array of parent class cache destructors */ +int cls_initialized;/**< is class initialized */ +int cls_depth; /**< depth of class hierarchy tree */ +const char *cls_name; /**< symbolic name for class */ +opal_class_t *cls_parent; /**< parent class descriptor