Greetings. We have seen some bus errors when compiling a user application with certain compiler flags and running on a sparc based server. The issue is that some structures are not word or double word aligned causing a bus error. I have tracked down two places where I can make a minor change and everything seems to work fine. However, I want to see if anyone has issues with these changes. The two changes are shown below.

burl-ct-v440-0 206 =>svn diff
Index: ompi/mca/btl/sm/btl_sm_frag.h
===================================================================
--- ompi/mca/btl/sm/btl_sm_frag.h    (revision 17039)
+++ ompi/mca/btl/sm/btl_sm_frag.h    (working copy)
@@ -9,6 +9,7 @@
*                         University of Stuttgart.  All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
*                         All rights reserved.
+ * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
* $COPYRIGHT$
*  * Additional copyrights may follow
@@ -41,6 +42,10 @@
   struct mca_btl_sm_frag_t *frag;
   size_t len;
   mca_btl_base_tag_t tag;
+   /* Add a 4 byte pad to round out structure to 16 bytes for 32-bit
+    * and to 24 bytes for 64-bit.  Helps prevent bus errors for strict
+    * alignment cases like SPARC. */
+    char pad[4];
};
typedef struct mca_btl_sm_hdr_t mca_btl_sm_hdr_t;


Index: ompi/mca/pml/ob1/pml_ob1_recvfrag.h
===================================================================
--- ompi/mca/pml/ob1/pml_ob1_recvfrag.h    (revision 17039)
+++ ompi/mca/pml/ob1/pml_ob1_recvfrag.h    (working copy)
@@ -9,6 +9,7 @@
*                         University of Stuttgart.  All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
*                         All rights reserved.
+ * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
* $COPYRIGHT$
*  * Additional copyrights may follow
@@ -67,7 +68,8 @@
   unsigned char* _ptr = (unsigned char*)frag->addr;                   \
   /* init recv_frag */                                                \
   frag->btl = btl;                                                    \
-    frag->hdr = *(mca_pml_ob1_hdr_t*)hdr;                               \
+    memcpy(&frag->hdr, (void *)((mca_pml_ob1_hdr_t*)hdr)                \
+           sizeof(mca_pml_ob1_hdr_t));                                  \
   frag->num_segments = 1;                                             \
   _size = segs[0].seg_len;                                            \
   for( i = 1; i < cnt; i++ ) {                                        \
burl-ct-v440-0 207 =>


The ticket associated with this issue is https://svn.open-mpi.org/trac/ompi/ticket/1148

Rolf

Reply via email to