[OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization

2007-03-06 Thread Bert Wesarg
Fix the double-check locking[1] by defining the cls_initialized member to
volatile.

Greetings

Bert Wesarg

[1]: http://en.wikipedia.org/wiki/Double-checked_locking


---

 opal/class/opal_object.h |2 +-
 1 files changed, 1 insertion(+), 1 deletion(-)

diff --quilt old/opal/class/opal_object.h new/opal/class/opal_object.h
--- old/opal/class/opal_object.h
+++ new/opal/class/opal_object.h
@@ -148,11 +148,11 @@ typedef void (*opal_destruct_t) (opal_ob
 struct opal_class_t {
 const char *cls_name;   /**< symbolic name for class */
 opal_class_t *cls_parent;   /**< parent class descriptor */
 opal_construct_t cls_construct; /**< class constructor */
 opal_destruct_t cls_destruct;   /**< class destructor */
-int cls_initialized;/**< is class initialized */
+volatile int cls_initialized;   /**< is class initialized */
 int cls_depth;  /**< depth of class hierarchy tree */
 opal_construct_t *cls_construct_array;
 /**< array of parent class constructors */
 opal_destruct_t *cls_destruct_array;
 /**< array of parent class destructors */


Re: [OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization

2007-03-06 Thread Gleb Natapov
On Tue, Mar 06, 2007 at 10:10:44AM +0100, Bert Wesarg wrote:
> Fix the double-check locking[1] by defining the cls_initialized member to
> volatile.
> 
> Greetings
> 
> Bert Wesarg
> 
> [1]: http://en.wikipedia.org/wiki/Double-checked_locking
Can you explain how the Java example from this page applies to the code
in Open MPI? cls_initialized is set only after class is fully
initialized and usable.

> 
> 

> ---
> 
>  opal/class/opal_object.h |2 +-
>  1 files changed, 1 insertion(+), 1 deletion(-)
> 
> diff --quilt old/opal/class/opal_object.h new/opal/class/opal_object.h
> --- old/opal/class/opal_object.h
> +++ new/opal/class/opal_object.h
> @@ -148,11 +148,11 @@ typedef void (*opal_destruct_t) (opal_ob
>  struct opal_class_t {
>  const char *cls_name;   /**< symbolic name for class */
>  opal_class_t *cls_parent;   /**< parent class descriptor */
>  opal_construct_t cls_construct; /**< class constructor */
>  opal_destruct_t cls_destruct;   /**< class destructor */
> -int cls_initialized;/**< is class initialized */
> +volatile int cls_initialized;   /**< is class initialized */
>  int cls_depth;  /**< depth of class hierarchy tree */
>  opal_construct_t *cls_construct_array;
>  /**< array of parent class constructors 
> */
>  opal_destruct_t *cls_destruct_array;
>  /**< array of parent class destructors */

> ___
> devel mailing list
> de...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/devel

--
Gleb.


Re: [OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization

2007-03-06 Thread Bert Wesarg


Gleb Natapov wrote:
> On Tue, Mar 06, 2007 at 10:10:44AM +0100, Bert Wesarg wrote:
>> Fix the double-check locking[1] by defining the cls_initialized member to
>> volatile.
>>
>> Greetings
>>
>> Bert Wesarg
>>
>> [1]: http://en.wikipedia.org/wiki/Double-checked_locking
> Can you explain how the Java example from this page applies to the code
> in Open MPI? cls_initialized is set only after class is fully
> initialized and usable.
Sure:

in opal_object.c:opal_class_initialize():


if (1 == cls->cls_initialized) {
return;
}
opal_atomic_lock(&class_lock);

/* If another thread initializing this same class came in at
   roughly the same time, it may have gotten the lock and
   initialized.  So check again. */

if (1 == cls->cls_initialized) {
opal_atomic_unlock(&class_lock);
return;
}

the compiler can optimize the second if check away, without being declared
as volatile.

Bert


> 
>>
> 
>> ---
>>
>>  opal/class/opal_object.h |2 +-
>>  1 files changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --quilt old/opal/class/opal_object.h new/opal/class/opal_object.h
>> --- old/opal/class/opal_object.h
>> +++ new/opal/class/opal_object.h
>> @@ -148,11 +148,11 @@ typedef void (*opal_destruct_t) (opal_ob
>>  struct opal_class_t {
>>  const char *cls_name;   /**< symbolic name for class */
>>  opal_class_t *cls_parent;   /**< parent class descriptor */
>>  opal_construct_t cls_construct; /**< class constructor */
>>  opal_destruct_t cls_destruct;   /**< class destructor */
>> -int cls_initialized;/**< is class initialized */
>> +volatile int cls_initialized;   /**< is class initialized */
>>  int cls_depth;  /**< depth of class hierarchy tree */
>>  opal_construct_t *cls_construct_array;
>>  /**< array of parent class constructors 
>> */
>>  opal_destruct_t *cls_destruct_array;
>>  /**< array of parent class destructors 
>> */
> 
>> ___
>> devel mailing list
>> de...@open-mpi.org
>> http://www.open-mpi.org/mailman/listinfo.cgi/devel
> 
> --
>   Gleb.
> ___
> devel mailing list
> de...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/devel


Re: [OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization

2007-03-06 Thread Gleb Natapov
On Tue, Mar 06, 2007 at 10:44:53AM +0100, Bert Wesarg wrote:
> 
> 
> Gleb Natapov wrote:
> > On Tue, Mar 06, 2007 at 10:10:44AM +0100, Bert Wesarg wrote:
> >> Fix the double-check locking[1] by defining the cls_initialized member to
> >> volatile.
> >>
> >> Greetings
> >>
> >> Bert Wesarg
> >>
> >> [1]: http://en.wikipedia.org/wiki/Double-checked_locking
> > Can you explain how the Java example from this page applies to the code
> > in Open MPI? cls_initialized is set only after class is fully
> > initialized and usable.
> Sure:
> 
> in opal_object.c:opal_class_initialize():
> 
> 
> if (1 == cls->cls_initialized) {
> return;
> }
> opal_atomic_lock(&class_lock);
> 
> /* If another thread initializing this same class came in at
>roughly the same time, it may have gotten the lock and
>initialized.  So check again. */
> 
> if (1 == cls->cls_initialized) {
> opal_atomic_unlock(&class_lock);
> return;
> }
> 
> the compiler can optimize the second if check away, without being declared
> as volatile.
If it does this after opal_atomic_lock() (which is explicit memory
barrier) then it is broken.

> 
> Bert
> 
> 
> > 
> >>
> > 
> >> ---
> >>
> >>  opal/class/opal_object.h |2 +-
> >>  1 files changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --quilt old/opal/class/opal_object.h new/opal/class/opal_object.h
> >> --- old/opal/class/opal_object.h
> >> +++ new/opal/class/opal_object.h
> >> @@ -148,11 +148,11 @@ typedef void (*opal_destruct_t) (opal_ob
> >>  struct opal_class_t {
> >>  const char *cls_name;   /**< symbolic name for class */
> >>  opal_class_t *cls_parent;   /**< parent class descriptor */
> >>  opal_construct_t cls_construct; /**< class constructor */
> >>  opal_destruct_t cls_destruct;   /**< class destructor */
> >> -int cls_initialized;/**< is class initialized */
> >> +volatile int cls_initialized;   /**< is class initialized */
> >>  int cls_depth;  /**< depth of class hierarchy tree */
> >>  opal_construct_t *cls_construct_array;
> >>  /**< array of parent class 
> >> constructors */
> >>  opal_destruct_t *cls_destruct_array;
> >>  /**< array of parent class 
> >> destructors */
> > 
> >> ___
> >> devel mailing list
> >> de...@open-mpi.org
> >> http://www.open-mpi.org/mailman/listinfo.cgi/devel
> > 
> > --
> > Gleb.
> > ___
> > devel mailing list
> > de...@open-mpi.org
> > http://www.open-mpi.org/mailman/listinfo.cgi/devel
> ___
> devel mailing list
> de...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/devel

--
Gleb.


Re: [OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization

2007-03-06 Thread Bert Wesarg
Hello,

Gleb Natapov wrote:
> If it does this after opal_atomic_lock() (which is explicit memory
> barrier) then it is broken.
Than, gcc 4.1.1 on the amd64 architecture is broken:

The test-cases were compiled in the test/asm directory, with -O3

Bert


#define OMPI_BUILDING 0
#include "ompi_config.h"

#include "opal/sys/atomic.h"

static opal_atomic_lock_t lock = { { OPAL_ATOMIC_UNLOCKED } };

int
main(int argc, char *argv[])
{
int test = (argc == 1);

__asm__ ("# first if\n");
if (1 == test) {
return 1;
}
__asm__ ("# lock\n");
opal_atomic_lock(&lock);

__asm__ ("# second if\n");
if (1 == test) {
__asm__ ("# if unlock\n");
opal_atomic_unlock(&lock);
return 2;
}

test = 1;
__asm__ ("# unlock\n");
opal_atomic_unlock(&lock);

return 0;
}

	.file	"double_check.c"
	.text
	.p2align 4,,15
.globl main
	.type	main, @function
main:
.LFB30:
#APP
	# first if

#NO_APP
	decl	%edi
	movl	$1, %eax
	je	.L4
#APP
	# lock

	.p2align 4,,7
#NO_APP
.L5:
	xorl	%edx, %edx
	movl	$1, %ecx
	movl	%edx, %eax
#APP
	lock; cmpxchgl %ecx,lock(%rip)   
	sete %dl  

#NO_APP
	testb	%dl, %dl
	jne	.L13
	.p2align 4,,7
.L9:
	movl	lock(%rip), %eax
	decl	%eax
	je	.L9
	jmp	.L5
.L13:
#APP
	# second if

	# unlock

#NO_APP
	movl	$0, lock(%rip)
.L4:
	rep ; ret
.LFE30:
	.size	main, .-main
	.local	lock
	.comm	lock,4,4
	.section	.eh_frame,"a",@progbits
.Lframe1:
	.long	.LECIE1-.LSCIE1
.LSCIE1:
	.long	0x0
	.byte	0x1
	.string	"zR"
	.uleb128 0x1
	.sleb128 -8
	.byte	0x10
	.uleb128 0x1
	.byte	0x3
	.byte	0xc
	.uleb128 0x7
	.uleb128 0x8
	.byte	0x90
	.uleb128 0x1
	.align 8
.LECIE1:
.LSFDE1:
	.long	.LEFDE1-.LASFDE1
.LASFDE1:
	.long	.LASFDE1-.Lframe1
	.long	.LFB30
	.long	.LFE30-.LFB30
	.uleb128 0x0
	.align 8
.LEFDE1:
	.ident	"GCC: (GNU) 4.1.1"
	.section	.note.GNU-stack,"",@progbits
#define OMPI_BUILDING 0
#include "ompi_config.h"

#include "opal/sys/atomic.h"

static opal_atomic_lock_t lock = { { OPAL_ATOMIC_UNLOCKED } };

int
main(int argc, char *argv[])
{
volatile int test = (argc == 1);

__asm__ ("# first if\n");
if (1 == test) {
return 1;
}
__asm__ ("# lock\n");
opal_atomic_lock(&lock);

__asm__ ("# second if\n");
if (1 == test) {
__asm__ ("# if unlock\n");
opal_atomic_unlock(&lock);
return 2;
}

test = 1;
__asm__ ("# unlock\n");
opal_atomic_unlock(&lock);

return 0;
}

	.file	"double_check_volatile.c"
	.text
	.p2align 4,,15
.globl main
	.type	main, @function
main:
.LFB30:
	xorl	%eax, %eax
	cmpl	$1, %edi
	sete	%al
	movl	%eax, -4(%rsp)
#APP
	# first if

#NO_APP
	movl	-4(%rsp), %eax
	movl	$1, %edx
	decl	%eax
	je	.L4
#APP
	# lock

	.p2align 4,,7
#NO_APP
.L5:
	xorl	%edx, %edx
	movl	$1, %ecx
	movl	%edx, %eax
#APP
	lock; cmpxchgl %ecx,lock(%rip)   
	sete %dl  

#NO_APP
	testb	%dl, %dl
	jne	.L15
	.p2align 4,,7
.L11:
	movl	lock(%rip), %eax
	decl	%eax
	je	.L11
	jmp	.L5
.L15:
#APP
	# second if

#NO_APP
	movl	-4(%rsp), %eax
	decl	%eax
	jne	.L8
#APP
	# if unlock

#NO_APP
	movl	$0, lock(%rip)
	movl	$2, %edx
.L4:
	movl	%edx, %eax
	ret
.L8:
	movl	$1, -4(%rsp)
#APP
	# unlock

#NO_APP
	xorl	%edx, %edx
	movl	$0, lock(%rip)
	jmp	.L4
.LFE30:
	.size	main, .-main
	.local	lock
	.comm	lock,4,4
	.section	.eh_frame,"a",@progbits
.Lframe1:
	.long	.LECIE1-.LSCIE1
.LSCIE1:
	.long	0x0
	.byte	0x1
	.string	"zR"
	.uleb128 0x1
	.sleb128 -8
	.byte	0x10
	.uleb128 0x1
	.byte	0x3
	.byte	0xc
	.uleb128 0x7
	.uleb128 0x8
	.byte	0x90
	.uleb128 0x1
	.align 8
.LECIE1:
.LSFDE1:
	.long	.LEFDE1-.LASFDE1
.LASFDE1:
	.long	.LASFDE1-.Lframe1
	.long	.LFB30
	.long	.LFE30-.LFB30
	.uleb128 0x0
	.align 8
.LEFDE1:
	.ident	"GCC: (GNU) 4.1.1"
	.section	.note.GNU-stack,"",@progbits


Re: [OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization

2007-03-06 Thread Gleb Natapov
On Tue, Mar 06, 2007 at 11:24:06AM +0100, Bert Wesarg wrote:
> Hello,
> 
> Gleb Natapov wrote:
> > If it does this after opal_atomic_lock() (which is explicit memory
> > barrier) then it is broken.
> Than, gcc 4.1.1 on the amd64 architecture is broken:
And can you repeat the test please, but make "test" variable to be global
to tell compiler that it can be actually accessed by more then one
thread.

> 
> The test-cases were compiled in the test/asm directory, with -O3
> 
> Bert
> 
> 

> #define OMPI_BUILDING 0
> #include "ompi_config.h"
> 
> #include "opal/sys/atomic.h"
> 
> static opal_atomic_lock_t lock = { { OPAL_ATOMIC_UNLOCKED } };
> 
> int
> main(int argc, char *argv[])
> {
> int test = (argc == 1);
> 
> __asm__ ("# first if\n");
> if (1 == test) {
> return 1;
> }
> __asm__ ("# lock\n");
> opal_atomic_lock(&lock);
> 
> __asm__ ("# second if\n");
> if (1 == test) {
> __asm__ ("# if unlock\n");
> opal_atomic_unlock(&lock);
> return 2;
> }
> 
> test = 1;
> __asm__ ("# unlock\n");
> opal_atomic_unlock(&lock);
> 
> return 0;
> }
> 

>   .file   "double_check.c"
>   .text
>   .p2align 4,,15
> .globl main
>   .type   main, @function
> main:
> .LFB30:
> #APP
>   # first if
> 
> #NO_APP
>   decl%edi
>   movl$1, %eax
>   je  .L4
> #APP
>   # lock
> 
>   .p2align 4,,7
> #NO_APP
> .L5:
>   xorl%edx, %edx
>   movl$1, %ecx
>   movl%edx, %eax
> #APP
>   lock; cmpxchgl %ecx,lock(%rip)   
>   sete %dl  
>   
> #NO_APP
>   testb   %dl, %dl
>   jne .L13
>   .p2align 4,,7
> .L9:
>   movllock(%rip), %eax
>   decl%eax
>   je  .L9
>   jmp .L5
> .L13:
> #APP
>   # second if
> 
>   # unlock
> 
> #NO_APP
>   movl$0, lock(%rip)
> .L4:
>   rep ; ret
> .LFE30:
>   .size   main, .-main
>   .local  lock
>   .comm   lock,4,4
>   .section.eh_frame,"a",@progbits
> .Lframe1:
>   .long   .LECIE1-.LSCIE1
> .LSCIE1:
>   .long   0x0
>   .byte   0x1
>   .string "zR"
>   .uleb128 0x1
>   .sleb128 -8
>   .byte   0x10
>   .uleb128 0x1
>   .byte   0x3
>   .byte   0xc
>   .uleb128 0x7
>   .uleb128 0x8
>   .byte   0x90
>   .uleb128 0x1
>   .align 8
> .LECIE1:
> .LSFDE1:
>   .long   .LEFDE1-.LASFDE1
> .LASFDE1:
>   .long   .LASFDE1-.Lframe1
>   .long   .LFB30
>   .long   .LFE30-.LFB30
>   .uleb128 0x0
>   .align 8
> .LEFDE1:
>   .ident  "GCC: (GNU) 4.1.1"
>   .section.note.GNU-stack,"",@progbits

> #define OMPI_BUILDING 0
> #include "ompi_config.h"
> 
> #include "opal/sys/atomic.h"
> 
> static opal_atomic_lock_t lock = { { OPAL_ATOMIC_UNLOCKED } };
> 
> int
> main(int argc, char *argv[])
> {
> volatile int test = (argc == 1);
> 
> __asm__ ("# first if\n");
> if (1 == test) {
> return 1;
> }
> __asm__ ("# lock\n");
> opal_atomic_lock(&lock);
> 
> __asm__ ("# second if\n");
> if (1 == test) {
> __asm__ ("# if unlock\n");
> opal_atomic_unlock(&lock);
> return 2;
> }
> 
> test = 1;
> __asm__ ("# unlock\n");
> opal_atomic_unlock(&lock);
> 
> return 0;
> }
> 

>   .file   "double_check_volatile.c"
>   .text
>   .p2align 4,,15
> .globl main
>   .type   main, @function
> main:
> .LFB30:
>   xorl%eax, %eax
>   cmpl$1, %edi
>   sete%al
>   movl%eax, -4(%rsp)
> #APP
>   # first if
> 
> #NO_APP
>   movl-4(%rsp), %eax
>   movl$1, %edx
>   decl%eax
>   je  .L4
> #APP
>   # lock
> 
>   .p2align 4,,7
> #NO_APP
> .L5:
>   xorl%edx, %edx
>   movl$1, %ecx
>   movl%edx, %eax
> #APP
>   lock; cmpxchgl %ecx,lock(%rip)   
>   sete %dl  
>   
> #NO_APP
>   testb   %dl, %dl
>   jne .L15
>   .p2align 4,,7
> .L11:
>   movllock(%rip), %eax
>   decl%eax
>   je  .L11
>   jmp .L5
> .L15:
> #APP
>   # second if
> 
> #NO_APP
>   movl-4(%rsp), %eax
>   decl%eax
>   jne .L8
> #APP
>   # if unlock
> 
> #NO_APP
>   movl$0, lock(%rip)
>   movl$2, %edx
> .L4:
>   movl%edx, %eax
>   ret
> .L8:
>   movl$1, -4(%rsp)
> #APP
>   # unlock
> 
> #NO_APP
>   xorl%edx, %edx
>   movl$0, lock(%rip)
>   jmp .L4
> .LFE30:
>   .size   main, .-main
>   .local  lock
>   .comm   lock,4,4
>   .section.eh_frame,"a",@progbits
> .Lframe1:
>   .long   .LECIE1-.LSCIE1
> .LSCIE1:
>   .long   0x0
>   .byte   0x1
>   .string "zR"
>   .uleb128 0x1
>   .sleb128 -8
>   .byte   0x10
>   .uleb128 0x1
>   .byte   0x3
>   .byte   0xc
>   .uleb128 0x7
>   .uleb128 0x8
>   .byte   0x90
>   .uleb128 0x1
>   .align 8

Re: [OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization

2007-03-06 Thread Bert Wesarg
Gleb Natapov wrote:
> On Tue, Mar 06, 2007 at 11:24:06AM +0100, Bert Wesarg wrote:
>> Hello,
>>
>> Gleb Natapov wrote:
>>> If it does this after opal_atomic_lock() (which is explicit memory
>>> barrier) then it is broken.
>> Than, gcc 4.1.1 on the amd64 architecture is broken:
> And can you repeat the test please, but make "test" variable to be global
> to tell compiler that it can be actually accessed by more then one
> thread.
> 

ahh, now both produce nearly the same code.

but who hurts it to declare the variable volatile?

Bert
--- double_check.s	2007-03-06 12:07:12.103478512 +0100
+++ double_check_volatile.s	2007-03-06 12:06:20.675315485 +0100
@@ -1,4 +1,4 @@
-	.file	"double_check.c"
+	.file	"double_check_volatile.c"
 	.text
 	.p2align 4,,15
 .globl main
@@ -13,8 +13,9 @@
 	# first if

 #NO_APP
-	decl	%eax
+	movl	test(%rip), %eax
 	movl	$1, %edx
+	decl	%eax
 	je	.L4
 #APP
 	# lock
@@ -43,7 +44,8 @@
 	# second if

 #NO_APP
-	cmpl	$1, test(%rip)
+	movl	test(%rip), %eax
+	decl	%eax
 	jne	.L8
 #APP
 	# if unlock


Re: [OMPI devel] [PATCH] opal/class/opal_object: fix double-check locking for class initialization

2007-03-06 Thread Gleb Natapov
On Tue, Mar 06, 2007 at 12:13:16PM +0100, Bert Wesarg wrote:
> Gleb Natapov wrote:
> > On Tue, Mar 06, 2007 at 11:24:06AM +0100, Bert Wesarg wrote:
> >> Hello,
> >>
> >> Gleb Natapov wrote:
> >>> If it does this after opal_atomic_lock() (which is explicit memory
> >>> barrier) then it is broken.
> >> Than, gcc 4.1.1 on the amd64 architecture is broken:
> > And can you repeat the test please, but make "test" variable to be global
> > to tell compiler that it can be actually accessed by more then one
> > thread.
> > 
> 
> ahh, now both produce nearly the same code.
> 
> but who hurts it to declare the variable volatile?
> 
The disadvantage of using volatile is that compiler will produce
inefficient code for every access to a variable and not only specific
access that you care about, so it is better to do proper memory barrier
in places where ordering is important and let compiler optimize all
other cases. But what is even more important is that in most cases
volatile is not enough. CPU may reorder accesses to memory and volatile
doesn't help, so volatile is not a replacement for memory barriers.
You can find Linus rants about volatile usage in LKML archives for
entertainment reading. Volatile is overused in Open MPI anyway.

> Bert

> --- double_check.s2007-03-06 12:07:12.103478512 +0100
> +++ double_check_volatile.s   2007-03-06 12:06:20.675315485 +0100
> @@ -1,4 +1,4 @@
> - .file   "double_check.c"
> + .file   "double_check_volatile.c"
>   .text
>   .p2align 4,,15
>  .globl main
> @@ -13,8 +13,9 @@
>   # first if
>  
>  #NO_APP
> - decl%eax
> + movltest(%rip), %eax
>   movl$1, %edx
> + decl%eax
>   je  .L4
>  #APP
>   # lock
> @@ -43,7 +44,8 @@
>   # second if
>  
>  #NO_APP
> - cmpl$1, test(%rip)
> + movltest(%rip), %eax
> + decl%eax
>   jne .L8
>  #APP
>   # if unlock

> ___
> devel mailing list
> de...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/devel

--
Gleb.


[OMPI devel] 1.2rc1: hangs non deterministic with simple MPI_thread_init()

2007-03-06 Thread Bert Wesarg
Hello,

I followed the call to test the rc1, but a simple test programm hangs, but
non deterministic. all but one orted have quit. but no cpu eating from
orted or mpirun.

The test system is a xeon cluster with myrinet interconnect.

the outouts are splited into two mails (100kb limit)

Bert Wesarg
/* -*- c -*- */

/*
 $ ~/opt/openmpi-1.2rc1/bin/mpicc -std=gnu99 -W -Wall -Wno-unused-parameter -O2 -c -o mpi_threaded.o mpi_threaded.c
 $ ~/opt/openmpi-1.2rc1/bin/mpicc -std=gnu99 mpi_threaded.o -o mpi_threaded
 $ cat host8_11
compute-0-8.local slots=2  max-slots=4
compute-0-9.local slots=2  max-slots=4
compute-0-10.local slots=2  max-slots=4
compute-0-11.local slots=2  max-slots=4
 $ ~/opt/openmpi-1.2rc1/bin/mpirun -np 4 --hostfile host8_11 ./mpi_threaded
thread level: MPI_THREAD_MULTIPLE
thread level: MPI_THREAD_MULTIPLE
thread level: MPI_THREAD_MULTIPLE
thread level: MPI_THREAD_MULTIPLE
 # hangs none deterministic, must kill mpirun with kill -KILL
 $ ~/opt/openmpi-1.2rc1/bin/mpirun -np 2 ./mpi_threaded
--
[0,1,0]: Myrinet/GM on host master.halc.informatik.uni-halle.de was unable to find any NICs.
Another transport will be used instead, although this may result in
lower performance.
--
--
[0,1,1]: Myrinet/GM on host master.halc.informatik.uni-halle.de was unable to find any NICs.
Another transport will be used instead, although this may result in
lower performance.
--
thread level: MPI_THREAD_MULTIPLE
thread level: MPI_THREAD_MULTIPLE
 $
 */

#define _GNU_SOURCE
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#include 

#include 

#define ENUM_TO_STRING(e) [e] = #e

static const char *const thread_names[] = {
ENUM_TO_STRING(MPI_THREAD_SINGLE),
ENUM_TO_STRING(MPI_THREAD_FUNNELED),
ENUM_TO_STRING(MPI_THREAD_SERIALIZED),
ENUM_TO_STRING(MPI_THREAD_MULTIPLE)
};

int
main(int ac, char *av[])
{
int thread_level;

MPI_Init_thread(&ac, &av, MPI_THREAD_MULTIPLE, &thread_level);
printf("thread level: %s\n", thread_names[thread_level]);
MPI_Finalize();

return 0;
}


ompi-out1.tar.gz
Description: GNU Zip compressed data


[OMPI devel] 1.2rc1: hangs non deterministic with simple MPI_thread_init() (Part 2)

2007-03-06 Thread Bert Wesarg
Part 2 of the output tar


ompi-out2.tar.gz
Description: GNU Zip compressed data


Re: [OMPI devel] 1.2rc1: hangs non deterministic with simple MPI_thread_init()

2007-03-06 Thread Tim Mattox

Hi Bert Wesarg,
Thank you for your quick testing of 1.2rc1.  1.2 is expected to fail when
using MPI_THREAD_MULTIPLE.  I suspect that a working and tested
MPI_THREAD_MULTIPLE will be one of our goals for 1.3.

On 3/6/07, Bert Wesarg  wrote:

Hello,

I followed the call to test the rc1, but a simple test programm hangs, but
non deterministic. all but one orted have quit. but no cpu eating from
orted or mpirun.

The test system is a xeon cluster with myrinet interconnect.

the outouts are splited into two mails (100kb limit)

Bert Wesarg

___
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel





--
Tim Mattox - http://homepage.mac.com/tmattox/
tmat...@gmail.com || timat...@open-mpi.org
   I'm a bright... http://www.the-brights.net/


Re: [OMPI devel] 1.2rc1: hangs non deterministic with simple MPI_thread_init()

2007-03-06 Thread Bert Wesarg
Hi,

this is realy sad, version 1.1.2 works quite good with threads (multiple
threads which starts mpi requests), only 1 of 10 (or even less) kills with
a SIGSEGV. And this this simple test program works even longer.

Bert

Tim Mattox wrote:
> Hi Bert Wesarg,
> Thank you for your quick testing of 1.2rc1.  1.2 is expected to fail when
> using MPI_THREAD_MULTIPLE.  I suspect that a working and tested
> MPI_THREAD_MULTIPLE will be one of our goals for 1.3.
> 
> On 3/6/07, Bert Wesarg  wrote:
>> Hello,
>>
>> I followed the call to test the rc1, but a simple test programm hangs, but
>> non deterministic. all but one orted have quit. but no cpu eating from
>> orted or mpirun.
>>
>> The test system is a xeon cluster with myrinet interconnect.
>>
>> the outouts are splited into two mails (100kb limit)
>>
>> Bert Wesarg
>>
>> ___
>> devel mailing list
>> de...@open-mpi.org
>> http://www.open-mpi.org/mailman/listinfo.cgi/devel
>>
>>
> 
> 


Re: [OMPI devel] 1.2rc1: hangs non deterministic with simple MPI_thread_init()

2007-03-06 Thread Jeff Squyres
Unfortunately, MPI_THREAD_MULTIPLE has never received a lot of  
testing in any version of OMPI (including v1.1).  Various members  
tested the bozo cases (e.g., ensure we don't double lock, etc.), and  
periodically tested/debugged simple multi-threaded apps, but not much  
more than that.


As such, it's probably pretty lucky that v1.1 works with  
THREAD_MULTIPLE.  Indeed, v1.2 has changed quite a bit since v1.2,  
and only the same level of THREAD_MULTIPLE testing has occurred (so  
far) -- checking for the bozo cases.


We do expect to improve this over the next several months; as Tim  
mentioned, for the v1.3 timeframe.



On Mar 6, 2007, at 9:06 AM, Bert Wesarg wrote:


Hi,

this is realy sad, version 1.1.2 works quite good with threads  
(multiple
threads which starts mpi requests), only 1 of 10 (or even less)  
kills with

a SIGSEGV. And this this simple test program works even longer.

Bert

Tim Mattox wrote:

Hi Bert Wesarg,
Thank you for your quick testing of 1.2rc1.  1.2 is expected to  
fail when

using MPI_THREAD_MULTIPLE.  I suspect that a working and tested
MPI_THREAD_MULTIPLE will be one of our goals for 1.3.

On 3/6/07, Bert Wesarg  wrote:

Hello,

I followed the call to test the rc1, but a simple test programm  
hangs, but
non deterministic. all but one orted have quit. but no cpu eating  
from

orted or mpirun.

The test system is a xeon cluster with myrinet interconnect.

the outouts are splited into two mails (100kb limit)

Bert Wesarg

___
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel






___
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel



--
Jeff Squyres
Server Virtualization Business Unit
Cisco Systems



Re: [OMPI devel] [PATCH] opal/class/opal_object.c: save some memory for the ctor/dtor arrays

2007-03-06 Thread George Bosilca

Bert,

Thanks for this patch. I apply it in the trunk as revision r13939.  
Thanks again.


  george.

On Mar 5, 2007, at 12:10 PM, Bert Wesarg wrote:

This saves some memory for the constructors and destructors arrays  
of a
class by counting the constructors and destructors while we are  
counting
the cls_depth. And the reversion of the constructor array can now  
be done

without an extra loop.

The patch is only compile tested.

Greetings

Bert Wesarg

Index: opal/class/opal_object.c
===
--- opal/class/opal_object.c(revision 13923)
+++ opal/class/opal_object.c(working copy)
@@ -71,7 +71,9 @@
 {
 opal_class_t *c;
 opal_construct_t* cls_construct_array;
-opal_destruct_t* cls_destruct_array;
+opal_destruct_t* cls_destruct_array;
+int cls_construct_array_count;
+int cls_destruct_array_count;
 int i;

 assert(cls);
@@ -95,33 +97,51 @@

 /*
  * First calculate depth of class hierarchy
+ * And the number of constructors and destructors
  */

 cls->cls_depth = 0;
+cls_construct_array_count = 0;
+cls_destruct_array_count  = 0;
 for (c = cls; c; c = c->cls_parent) {
-cls->cls_depth += 1;
+if( NULL != c->cls_construct ) {
+cls_construct_array_count++;
+}
+if( NULL != c->cls_destruct ) {
+cls_destruct_array_count++;
+}
+cls->cls_depth++;
 }

 /*
  * Allocate arrays for hierarchy of constructors and destructors
+ * plus for each a NULL-sentinel
  */

 cls->cls_construct_array =
-(void (**)(opal_object_t*))malloc((cls->cls_depth + 1)*
-  sizeof(opal_construct_t)  
* 2 );
+(void (**)(opal_object_t*))malloc 
((cls_construct_array_count +
+
cls_destruct_array_count + 2) *

+  sizeof(opal_construct_t) );
 if (NULL == cls->cls_construct_array) {
 perror("Out of memory");
 exit(-1);
 }
-cls->cls_destruct_array = cls->cls_construct_array + cls- 
>cls_depth + 1;

-cls_construct_array = cls->cls_construct_array;
-cls_destruct_array  = cls->cls_destruct_array;
-
+cls->cls_destruct_array =
+cls->cls_construct_array + cls_construct_array_count + 1;
+
+/*
+ * The constructor array is reversed, so start at the end
+ */
+
+cls_construct_array = cls->cls_construct_array +  
cls_construct_array_count;

+cls_destruct_array  = cls->cls_destruct_array;
+
 c = cls;
+*cls_construct_array = NULL;  /* end marker for the  
constructors */

 for (i = 0; i < cls->cls_depth; i++) {
 if( NULL != c->cls_construct ) {
+--cls_construct_array;
 *cls_construct_array = c->cls_construct;
-cls_construct_array++;
 }
 if( NULL != c->cls_destruct ) {
 *cls_destruct_array = c->cls_destruct;
@@ -129,16 +149,7 @@
 }
 c = c->cls_parent;
 }
-*cls_construct_array = NULL;  /* end marker for the  
constructors */

 *cls_destruct_array = NULL;  /* end marker for the destructors */
-/* Now we have to invert the constructors */
-for( i = 0, --cls_construct_array;
- cls_construct_array > (cls->cls_construct_array + i);
- i++, cls_construct_array-- ) {
-opal_construct_t temp_construct = *cls_construct_array;
-*cls_construct_array = cls->cls_construct_array[i];
-cls->cls_construct_array[i] = temp_construct;
-}

 cls->cls_initialized = 1;
 save_class(cls);
___
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel


"Half of what I say is meaningless; but I say it so that the other  
half may reach you"

  Kahlil Gibran




Re: [OMPI devel] [PATCH] opal/class/opal_object.c: replace the classes array with a linked list

2007-03-06 Thread George Bosilca

Bert,

Your previous patch saves some memory while the current one use some  
more. I prefer to keep the array, as it's not only out of any  
critical path but it's not performance related. An array can do the  
job without any problems and use less memory than the linked list.


  Thanks,
george.

On Mar 5, 2007, at 12:33 PM, Bert Wesarg wrote:

This replaces the classes array (which holds all ctor/dtor arrays)  
with a

linked list.

This patch is compile tested only.

Greetings
Bert Wesarg
Index: opal/class/opal_object.c
===
--- opal/class/opal_object.c(revision 13923)
+++ opal/class/opal_object.c(working copy)
@@ -51,17 +51,12 @@
  * Local variables
  */
 static opal_atomic_lock_t class_lock = { { OPAL_ATOMIC_UNLOCKED } };
-static void** classes = NULL;
-static int num_classes = 0;
-static int max_classes = 0;
-static const int increment = 10;
+static void *classes;

-
 /*
  * Local functions
  */
 static void save_class(opal_class_t *cls);
-static void expand_array(void);


 /*
@@ -72,6 +67,7 @@
 opal_class_t *c;
 opal_construct_t* cls_construct_array;
 opal_destruct_t* cls_destruct_array;
+void *base_pointer;
 int i;

 assert(cls);
@@ -104,15 +100,18 @@

 /*
  * Allocate arrays for hierarchy of constructors and destructors
+ * Plus one void pointer for the classes list
  */

-cls->cls_construct_array =
-(void (**)(opal_object_t*))malloc((cls->cls_depth + 1)*
-  sizeof(opal_construct_t)  
* 2 );

-if (NULL == cls->cls_construct_array) {
+base_pointer = malloc((cls->cls_depth + 1) * sizeof 
(opal_construct_t) * 2 +

+  sizeof(void *));
+if (NULL == base_pointer) {
 perror("Out of memory");
 exit(-1);
 }
+/* The arrays begin behind the void pointer */
+cls->cls_destruct_array =
+(void (**)(opal_object_t*))((char *)base_pointer + sizeof 
(void *));
 cls->cls_destruct_array = cls->cls_construct_array + cls- 
>cls_depth + 1;

 cls_construct_array = cls->cls_construct_array;
 cls_destruct_array  = cls->cls_destruct_array;
@@ -154,18 +153,10 @@
  */
 int opal_class_finalize(void)
 {
-int i;
-
-if (NULL != classes) {
-for (i = 0; i < num_classes; ++i) {
-if (NULL != classes[i]) {
-free(classes[i]);
-}
-}
+while (NULL != classes) {
+void *next = *(void **)classes;
 free(classes);
-classes = NULL;
-num_classes = 0;
-max_classes = 0;
+classes = next;
 }

 return OPAL_SUCCESS;
@@ -174,27 +165,7 @@

 static void save_class(opal_class_t *cls)
 {
-if (num_classes >= max_classes) {
-expand_array();
-}
-
-classes[num_classes] = cls->cls_construct_array;
-++num_classes;
+void *base_pointer = (char *)cls->cls_construct_array - sizeof 
(void *);

+*(void **)base_pointer = classes;
+classes = base_pointer;
 }
-
-
-static void expand_array(void)
-{
-int i;
-
-max_classes += increment;
-classes = (void**)realloc(classes, sizeof(void *) * max_classes);
-if (NULL == classes) {
-perror("class malloc failed");
-exit(-1);
-}
-for (i = num_classes; i < max_classes; ++i) {
-classes[i] = NULL;
-}
-}
-
___
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel


"Half of what I say is meaningless; but I say it so that the other  
half may reach you"

  Kahlil Gibran




Re: [OMPI devel] [PATCH] opal/class/opal_object.c: save some memory for the ctor/dtor arrays

2007-03-06 Thread Bert Wesarg
Hello,

thanks, but I was in preparation to submit a superseded patch, which is
more intrusive to the class object system.

Bert

George Bosilca wrote:
> Bert,
> 
> Thanks for this patch. I apply it in the trunk as revision r13939.  
> Thanks again.
> 
>george.
> 
> On Mar 5, 2007, at 12:10 PM, Bert Wesarg wrote:
> 
>> This saves some memory for the constructors and destructors arrays  
>> of a
>> class by counting the constructors and destructors while we are  
>> counting
>> the cls_depth. And the reversion of the constructor array can now  
>> be done
>> without an extra loop.
>>
>> The patch is only compile tested.
>>
>> Greetings
>>
>> Bert Wesarg
>>
>> Index: opal/class/opal_object.c
>> ===
>> --- opal/class/opal_object.c (revision 13923)
>> +++ opal/class/opal_object.c (working copy)
>> @@ -71,7 +71,9 @@
>>  {
>>  opal_class_t *c;
>>  opal_construct_t* cls_construct_array;
>> -opal_destruct_t* cls_destruct_array;
>> +opal_destruct_t* cls_destruct_array;
>> +int cls_construct_array_count;
>> +int cls_destruct_array_count;
>>  int i;
>>
>>  assert(cls);
>> @@ -95,33 +97,51 @@
>>
>>  /*
>>   * First calculate depth of class hierarchy
>> + * And the number of constructors and destructors
>>   */
>>
>>  cls->cls_depth = 0;
>> +cls_construct_array_count = 0;
>> +cls_destruct_array_count  = 0;
>>  for (c = cls; c; c = c->cls_parent) {
>> -cls->cls_depth += 1;
>> +if( NULL != c->cls_construct ) {
>> +cls_construct_array_count++;
>> +}
>> +if( NULL != c->cls_destruct ) {
>> +cls_destruct_array_count++;
>> +}
>> +cls->cls_depth++;
>>  }
>>
>>  /*
>>   * Allocate arrays for hierarchy of constructors and destructors
>> + * plus for each a NULL-sentinel
>>   */
>>
>>  cls->cls_construct_array =
>> -(void (**)(opal_object_t*))malloc((cls->cls_depth + 1)*
>> -  sizeof(opal_construct_t)  
>> * 2 );
>> +(void (**)(opal_object_t*))malloc 
>> ((cls_construct_array_count +
>> +
>> cls_destruct_array_count + 2) *
>> +  sizeof(opal_construct_t) );
>>  if (NULL == cls->cls_construct_array) {
>>  perror("Out of memory");
>>  exit(-1);
>>  }
>> -cls->cls_destruct_array = cls->cls_construct_array + cls- 
>>> cls_depth + 1;
>> -cls_construct_array = cls->cls_construct_array;
>> -cls_destruct_array  = cls->cls_destruct_array;
>> -
>> +cls->cls_destruct_array =
>> +cls->cls_construct_array + cls_construct_array_count + 1;
>> +
>> +/*
>> + * The constructor array is reversed, so start at the end
>> + */
>> +
>> +cls_construct_array = cls->cls_construct_array +  
>> cls_construct_array_count;
>> +cls_destruct_array  = cls->cls_destruct_array;
>> +
>>  c = cls;
>> +*cls_construct_array = NULL;  /* end marker for the  
>> constructors */
>>  for (i = 0; i < cls->cls_depth; i++) {
>>  if( NULL != c->cls_construct ) {
>> +--cls_construct_array;
>>  *cls_construct_array = c->cls_construct;
>> -cls_construct_array++;
>>  }
>>  if( NULL != c->cls_destruct ) {
>>  *cls_destruct_array = c->cls_destruct;
>> @@ -129,16 +149,7 @@
>>  }
>>  c = c->cls_parent;
>>  }
>> -*cls_construct_array = NULL;  /* end marker for the  
>> constructors */
>>  *cls_destruct_array = NULL;  /* end marker for the destructors */
>> -/* Now we have to invert the constructors */
>> -for( i = 0, --cls_construct_array;
>> - cls_construct_array > (cls->cls_construct_array + i);
>> - i++, cls_construct_array-- ) {
>> -opal_construct_t temp_construct = *cls_construct_array;
>> -*cls_construct_array = cls->cls_construct_array[i];
>> -cls->cls_construct_array[i] = temp_construct;
>> -}
>>
>>  cls->cls_initialized = 1;
>>  save_class(cls);
>> ___
>> devel mailing list
>> de...@open-mpi.org
>> http://www.open-mpi.org/mailman/listinfo.cgi/devel
> 
> "Half of what I say is meaningless; but I say it so that the other  
> half may reach you"
>Kahlil Gibran
> 
> 
> ___
> devel mailing list
> de...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/devel


[OMPI devel] [RFC][PATCH] option to use libumem for opal object system

2007-03-06 Thread Bert Wesarg
Hello,

this gives the option to use the umem cache feature from the libumem[1]
for the opal object system.

It is full backward compatible to the old system.

But the patch exists of more changes:

(1) reorder opal_class_t, in the hope that vital members fit in the first
cache line
(2) a per class lock for initialization
(3) the global class list is now a linked list embeded in the opal_class_t
(this can be reduced to a stack/single linked list)
(4) new contructors/destructors for the one time cache initialization

To complile with this new feature you must configure open-mpi with
"-DUSE_UMEM" in your CFLAGS, and all other needed build flags to find the
header and library of lubumem (LDFLAGS, LIBS).

To full use the object caching of libumem you can use the new macro
OBJ_CLASS_INSTANCE_CACHE() which have arguments for the cache
contructors/destructors.

In the followup mail, I convert the opal_free_list_t and
orte_pointer_array_t to use the cache for the initialization of the
opal_mutex_t and opal_condition_t members.

I have just compiled it with and without USE_UMEM but no benchmarking.

Comments welcome.

Greetings

Bert Wesarg

PS: I know that you are busy with the OMPI 1.2 release, I just want to
send it out, befor I forget it.

[1] solaris: built-in
other:   http://sourceforge.net/projects/umem
---

 opal/class/opal_object.c |  210 +++
 opal/class/opal_object.h |  201 
 2 files changed, 324 insertions(+), 87 deletions(-)

diff --quilt old/opal/class/opal_object.h new/opal/class/opal_object.h
--- old/opal/class/opal_object.h
+++ new/opal/class/opal_object.h
@@ -122,10 +122,17 @@

 #if OMPI_HAVE_THREAD_SUPPORT
 #include "opal/sys/atomic.h"
 #endif  /* OMPI_HAVE_THREAD_SUPPORT */

+#ifdef USE_UMEM
+# include 
+# ifndef UMEM_CACHE_NAMELEN
+#  define UMEM_CACHE_NAMELEN 31
+# endif
+#endif
+
 #if OMPI_ENABLE_DEBUG
 /* Any kind of unique ID should do the job */
 #define OPAL_OBJ_MAGIC_ID ((0xdeafbeedULL << 32) + 0xdeafbeedULL)
 #endif

@@ -144,21 +151,36 @@ typedef void (*opal_destruct_t) (opal_ob
  *
  * There should be a single instance of this descriptor for each class
  * definition.
  */
 struct opal_class_t {
-const char *cls_name;   /**< symbolic name for class */
-opal_class_t *cls_parent;   /**< parent class descriptor */
-opal_construct_t cls_construct; /**< class constructor */
-opal_destruct_t cls_destruct;   /**< class destructor */
-int cls_initialized;/**< is class initialized */
-int cls_depth;  /**< depth of class hierarchy tree */
+#ifdef USE_UMEM
+umem_cache_t *cls_cache;/**< object cache */
+#endif
+size_t   cls_sizeof;/**< size of an object instance */
+opal_construct_t *cls_cache_construct_array;
+/**< array of parent class cache constructors */
 opal_construct_t *cls_construct_array;
 /**< array of parent class constructors */
-opal_destruct_t *cls_destruct_array;
+opal_destruct_t  *cls_destruct_array;
 /**< array of parent class destructors */
-size_t cls_sizeof;  /**< size of an object instance */
+opal_destruct_t  *cls_cache_destruct_array;
+/**< array of parent class cache destructors */
+int cls_initialized;/**< is class initialized */
+int cls_depth;  /**< depth of class hierarchy tree */
+const char   *cls_name; /**< symbolic name for class */
+opal_class_t *cls_parent;   /**< parent class descriptor */
+opal_construct_t cls_construct; /**< class constructor */
+opal_destruct_t  cls_destruct;  /**< class destructor */
+opal_construct_t cls_cache_construct;
+/**< class object cache constructor */
+opal_destruct_t  cls_cache_destruct;
+/**< class cache destructor */
+opal_atomic_lock_t cls_init_lock;
+/**< class init mutex */
+opal_class_t*cls_next, *cls_prev;
+/**< linked list of all classes */
 };

 /**
  * For static initializations of OBJects.
  *
@@ -198,30 +220,97 @@ struct opal_object_t {
  * @param NAME  Name of class
  * @return  Pointer to class descriptor
  */
 #define OBJ_CLASS(NAME) (&(NAME ## _class))

-
 /**
  * Static initializer for a class descriptor
  *
  * @param NAME  Name of class
  * @param PARENTName of parent class
  * @param CONSTRUCTOR   Pointer to constructor
  * @param DESTRUCTORPointer to destructor
  *
  * Put this in NAME.c
  */
+#ifdef USE_UMEM
 #define OBJ_CLASS_INSTANCE(NAME, PARENT, CONSTRUCTOR, DESTRUCTOR)   \
 opal_class_t NAME ## _class = { \
+NULL, 

[OMPI devel] [RFC][PATCH] use the new object caching (examples)

2007-03-06 Thread Bert Wesarg

---

 opal/class/opal_free_list.c |   24 ++--
 1 files changed, 18 insertions(+), 6 deletions(-)

diff --quilt old/opal/class/opal_free_list.c new/opal/class/opal_free_list.c
--- old/opal/class/opal_free_list.c
+++ new/opal/class/opal_free_list.c
@@ -22,23 +22,25 @@
 #include "opal/sys/cache.h"


 static void opal_free_list_construct(opal_free_list_t* fl);
 static void opal_free_list_destruct(opal_free_list_t* fl);
+static void opal_free_list_cache_construct(opal_free_list_t* fl);
+static void opal_free_list_cache_destruct(opal_free_list_t* fl);

-OBJ_CLASS_INSTANCE(opal_free_list_t,
-   opal_list_t,
-   opal_free_list_construct,
-   opal_free_list_destruct);
+OBJ_CLASS_INSTANCE_CACHE(opal_free_list_t,
+ opal_list_t,
+ opal_free_list_construct,
+ opal_free_list_destruct,
+ opal_free_list_cache_construct,
+ opal_free_list_cache_destruct);
 OBJ_CLASS_INSTANCE(opal_free_list_item_t,
opal_list_item_t,
NULL, NULL);

 static void opal_free_list_construct(opal_free_list_t* fl)
 {
-OBJ_CONSTRUCT(&fl->fl_lock, opal_mutex_t);
-OBJ_CONSTRUCT(&fl->fl_condition, opal_condition_t);
 fl->fl_max_to_alloc = 0;
 fl->fl_num_allocated = 0;
 fl->fl_num_per_alloc = 0;
 fl->fl_num_waiting = 0;
 fl->fl_elem_size = 0;
@@ -55,10 +57,20 @@ static void opal_free_list_destruct(opal
 OBJ_DESTRUCT(item);
 free(item);
 }

 OBJ_DESTRUCT(&fl->fl_allocations);
+}
+
+static void opal_free_list_cache_construct(opal_free_list_t* fl)
+{
+OBJ_CONSTRUCT(&fl->fl_lock, opal_mutex_t);
+OBJ_CONSTRUCT(&fl->fl_condition, opal_condition_t);
+}
+
+static void opal_free_list_cache_destruct(opal_free_list_t* fl)
+{
 OBJ_DESTRUCT(&fl->fl_condition);
 OBJ_DESTRUCT(&fl->fl_lock);
 }

 int opal_free_list_init(
---

 orte/class/orte_pointer_array.c |   23 ---
 1 files changed, 20 insertions(+), 3 deletions(-)

diff --quilt old/orte/class/orte_pointer_array.c new/orte/class/orte_pointer_array.c
--- old/orte/class/orte_pointer_array.c
+++ new/orte/class/orte_pointer_array.c
@@ -27,25 +27,28 @@
 #include "orte/class/orte_pointer_array.h"
 #include "opal/util/output.h"

 static void orte_pointer_array_construct(orte_pointer_array_t *);
 static void orte_pointer_array_destruct(orte_pointer_array_t *);
+static void orte_pointer_array_cache_construct(orte_pointer_array_t *);
+static void orte_pointer_array_cache_destruct(orte_pointer_array_t *);
 static bool grow_table(orte_pointer_array_t *table);

-OBJ_CLASS_INSTANCE(
+OBJ_CLASS_INSTANCE_CACHE(
 orte_pointer_array_t,
 opal_object_t,
 orte_pointer_array_construct,
-orte_pointer_array_destruct
+orte_pointer_array_destruct,
+orte_pointer_array_cache_construct,
+orte_pointer_array_cache_destruct
 );

 /*
  * orte_pointer_array constructor
  */
 void orte_pointer_array_construct(orte_pointer_array_t *array)
 {
-OBJ_CONSTRUCT(&array->lock, opal_mutex_t);
 array->lowest_free = 0;
 array->number_free = 0;
 array->size = 0;
 array->max_size = 0;
 array->block_size = 0;
@@ -59,11 +62,25 @@ void orte_pointer_array_destruct(orte_po
 {
 /* free table */
 if( NULL != array->addr) {
 free(array->addr);
 }
+}

+/*
+ * orte_pointer_array cache constructor
+ */
+void orte_pointer_array_cache_construct(orte_pointer_array_t *array)
+{
+OBJ_CONSTRUCT(&array->lock, opal_mutex_t);
+}
+
+/*
+ * orte_pointer_array cache destructor
+ */
+void orte_pointer_array_cache_destruct(orte_pointer_array_t *array)
+{
 OBJ_DESTRUCT(&array->lock);
 }

 /**
  * initialize an array object


Re: [OMPI devel] [RFC][PATCH] option to use libumem for opal object system

2007-03-06 Thread Bert Wesarg
This is a self fix reply

> 
> 
> ---
> 
>  opal/class/opal_object.c |  210 
> +++
>  opal/class/opal_object.h |  201 
>  2 files changed, 324 insertions(+), 87 deletions(-)
> 
> diff --quilt old/opal/class/opal_object.h new/opal/class/opal_object.h
> --- old/opal/class/opal_object.h
> +++ new/opal/class/opal_object.h
> @@ -122,10 +122,17 @@
>  
>  #if OMPI_HAVE_THREAD_SUPPORT
>  #include "opal/sys/atomic.h"
>  #endif  /* OMPI_HAVE_THREAD_SUPPORT */
>  
> +#ifdef USE_UMEM
> +# include 
> +# ifndef UMEM_CACHE_NAMELEN
> +#  define UMEM_CACHE_NAMELEN 31
> +# endif
> +#endif
> +
>  #if OMPI_ENABLE_DEBUG
>  /* Any kind of unique ID should do the job */
>  #define OPAL_OBJ_MAGIC_ID ((0xdeafbeedULL << 32) + 0xdeafbeedULL)
>  #endif
>  
> @@ -144,21 +151,36 @@ typedef void (*opal_destruct_t) (opal_ob
>   *
>   * There should be a single instance of this descriptor for each class
>   * definition.
>   */
>  struct opal_class_t {
> -const char *cls_name;   /**< symbolic name for class */
> -opal_class_t *cls_parent;   /**< parent class descriptor */
> -opal_construct_t cls_construct; /**< class constructor */
> -opal_destruct_t cls_destruct;   /**< class destructor */
> -int cls_initialized;/**< is class initialized */
> -int cls_depth;  /**< depth of class hierarchy tree */
> +#ifdef USE_UMEM
> +umem_cache_t *cls_cache;/**< object cache */
> +#endif
> +size_t   cls_sizeof;/**< size of an object instance */
> +opal_construct_t *cls_cache_construct_array;
> +/**< array of parent class cache constructors */
>  opal_construct_t *cls_construct_array;
>  /**< array of parent class constructors 
> */
> -opal_destruct_t *cls_destruct_array;
> +opal_destruct_t  *cls_destruct_array;
>  /**< array of parent class destructors */
> -size_t cls_sizeof;  /**< size of an object instance */
> +opal_destruct_t  *cls_cache_destruct_array;
> +/**< array of parent class cache destructors */
> +int cls_initialized;/**< is class initialized */
> +int cls_depth;  /**< depth of class hierarchy tree */
> +const char   *cls_name; /**< symbolic name for class */
> +opal_class_t *cls_parent;   /**< parent class descriptor */
> +opal_construct_t cls_construct; /**< class constructor */
> +opal_destruct_t  cls_destruct;  /**< class destructor */
> +opal_construct_t cls_cache_construct;
> +/**< class object cache constructor */
> +opal_destruct_t  cls_cache_destruct;
> +/**< class cache destructor */
> +opal_atomic_lock_t cls_init_lock;
> +/**< class init mutex */
> +opal_class_t*cls_next, *cls_prev;
> +/**< linked list of all classes */
>  };
>  
>  /**
>   * For static initializations of OBJects.
>   *
> @@ -198,30 +220,97 @@ struct opal_object_t {
>   * @param NAME  Name of class
>   * @return  Pointer to class descriptor
>   */
>  #define OBJ_CLASS(NAME) (&(NAME ## _class))
>  
> -
>  /**
>   * Static initializer for a class descriptor
>   *
>   * @param NAME  Name of class
>   * @param PARENTName of parent class
>   * @param CONSTRUCTOR   Pointer to constructor
>   * @param DESTRUCTORPointer to destructor
>   *
>   * Put this in NAME.c
>   */
> +#ifdef USE_UMEM
>  #define OBJ_CLASS_INSTANCE(NAME, PARENT, CONSTRUCTOR, DESTRUCTOR)   \
>  opal_class_t NAME ## _class = { \
> +NULL,   \
> +sizeof(NAME),   \
> +NULL, NULL, NULL, NULL, \
> +0, 0,   \
> +# NAME, \
> +OBJ_CLASS(PARENT),  \
> +(opal_construct_t) CONSTRUCTOR, \
> +(opal_destruct_t) DESTRUCTOR,   \
> +(opal_construct_t) NULL,\
> +(opal_destruct_t) NULL, \
> +{ { OPAL_ATOMIC_UNLOCKED } },   \
> +OBJ_CLASS(NAME), OBJ_CLASS(NAME)\
> +}
> +#else
> +#define OBJ_CLASS_INSTANCE(NAME, PARENT, CONSTRUCTOR, DESTRUCTOR)   \
> +opal_class_t NAME ## _class = {  

Re: [OMPI devel] [RFC][PATCH] option to use libumem for opal object system

2007-03-06 Thread George Bosilca

Bert,

Reordering the members in the opal_class structure can be discussed.  
In general, we don't focus on improving anything that's outside the  
critical path (such as the class sub-system). Even if all Open MPI  
objects are derived from this class, it is definitively not  
performance aware, and making it so don't give us any benefit.  
Moreover, it will make the code more difficult to read/understand and  
maintain.


There are already some features similar with umem in Open MPI.  
Activate the memory debugging to get all debug features. Use the free  
list to get the alignment. We have a more powerful approach, as we  
can align not only on the whole structure, but on a particular item.  
These features are already in the trunk. Moreover, if any layer  
inside Open MPI need aligned memory, then it should allocate the  
objects by itself and call OBJ_CONSTRUCT instead of OBJ_NEW.


I'm reluctant to include these 2 patches into the Open MPI trunk. At  
least not until it is proven that there is any benefit (mostly  
performance). However, if someone else from the community think they  
are fine/required, then it might happens that they will get included.


  Thanks,
george.

On Mar 6, 2007, at 12:22 PM, Bert Wesarg wrote:


Hello,

this gives the option to use the umem cache feature from the libumem 
[1]

for the opal object system.

It is full backward compatible to the old system.

But the patch exists of more changes:

(1) reorder opal_class_t, in the hope that vital members fit in the  
first

cache line
(2) a per class lock for initialization
(3) the global class list is now a linked list embeded in the  
opal_class_t

(this can be reduced to a stack/single linked list)
(4) new contructors/destructors for the one time cache initialization

To complile with this new feature you must configure open-mpi with
"-DUSE_UMEM" in your CFLAGS, and all other needed build flags to  
find the

header and library of lubumem (LDFLAGS, LIBS).

To full use the object caching of libumem you can use the new macro
OBJ_CLASS_INSTANCE_CACHE() which have arguments for the cache
contructors/destructors.

In the followup mail, I convert the opal_free_list_t and
orte_pointer_array_t to use the cache for the initialization of the
opal_mutex_t and opal_condition_t members.

I have just compiled it with and without USE_UMEM but no benchmarking.

Comments welcome.

Greetings

Bert Wesarg

PS: I know that you are busy with the OMPI 1.2 release, I just want to
send it out, befor I forget it.

[1] solaris: built-in
other:   http://sourceforge.net/projects/umem
---

 opal/class/opal_object.c |  210 ++ 
+
 opal/class/opal_object.h |  201 +++ 
+

 2 files changed, 324 insertions(+), 87 deletions(-)

diff --quilt old/opal/class/opal_object.h new/opal/class/opal_object.h
--- old/opal/class/opal_object.h
+++ new/opal/class/opal_object.h
@@ -122,10 +122,17 @@

 #if OMPI_HAVE_THREAD_SUPPORT
 #include "opal/sys/atomic.h"
 #endif  /* OMPI_HAVE_THREAD_SUPPORT */

+#ifdef USE_UMEM
+# include 
+# ifndef UMEM_CACHE_NAMELEN
+#  define UMEM_CACHE_NAMELEN 31
+# endif
+#endif
+
 #if OMPI_ENABLE_DEBUG
 /* Any kind of unique ID should do the job */
 #define OPAL_OBJ_MAGIC_ID ((0xdeafbeedULL << 32) + 0xdeafbeedULL)
 #endif

@@ -144,21 +151,36 @@ typedef void (*opal_destruct_t) (opal_ob
  *
  * There should be a single instance of this descriptor for each  
class

  * definition.
  */
 struct opal_class_t {
-const char *cls_name;   /**< symbolic name for class */
-opal_class_t *cls_parent;   /**< parent class descriptor */
-opal_construct_t cls_construct; /**< class constructor */
-opal_destruct_t cls_destruct;   /**< class destructor */
-int cls_initialized;/**< is class initialized */
-int cls_depth;  /**< depth of class hierarchy  
tree */

+#ifdef USE_UMEM
+umem_cache_t *cls_cache;/**< object cache */
+#endif
+size_t   cls_sizeof;/**< size of an object  
instance */

+opal_construct_t *cls_cache_construct_array;
+/**< array of parent class cache  
constructors */

 opal_construct_t *cls_construct_array;
 /**< array of parent class  
constructors */

-opal_destruct_t *cls_destruct_array;
+opal_destruct_t  *cls_destruct_array;
 /**< array of parent class  
destructors */
-size_t cls_sizeof;  /**< size of an object  
instance */

+opal_destruct_t  *cls_cache_destruct_array;
+/**< array of parent class cache  
destructors */

+int cls_initialized;/**< is class initialized */
+int cls_depth;  /**< depth of class hierarchy  
tree */

+const char   *cls_name; /**< symbolic name for class */
+opal_class_t *cls_parent;   /**< parent class descriptor