diff --git a/ompi/mca/btl/smcuda/btl_smcuda.c b/ompi/mca/btl/smcuda/btl_smcuda.c
index 495c392..210c9cc 100644
--- a/ompi/mca/btl/smcuda/btl_smcuda.c
+++ b/ompi/mca/btl/smcuda/btl_smcuda.c
@@ -755,7 +755,7 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
 
     }
 #endif /* OMPI_CUDA_SUPPORT */
-    frag->base.des_src = &(frag->segment);
+    frag->base.des_src = &(frag->segment.base);
     frag->base.des_src_cnt = 1;
     frag->base.order = MCA_BTL_NO_ORDER;
     frag->base.des_dst = NULL;
@@ -954,7 +954,7 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_dst(
 
     frag->base.des_src = NULL;
     frag->base.des_src_cnt = 0;
-    frag->base.des_dst = &frag->segment;
+    frag->base.des_dst = &frag->segment.base;
     frag->base.des_dst_cnt = 1;
     frag->base.des_flags = flags;
     return &frag->base;
@@ -982,7 +982,7 @@ int mca_btl_smcuda_get_cuda(struct mca_btl_base_module_t* btl,
      * garbage in the debugger.  */
     
     memset(&rget_reg, 0, sizeof(rget_reg));
-    memcpy(&rget_reg.memHandle, dst_seg->key, sizeof(dst_seg->key));
+    memcpy(&rget_reg.memHandle, src_seg->key, sizeof(src_seg->key));
 
     /* Open the memory handle to the remote memory.  If it is cached, then
      * we just retrieve it from cache and avoid a call to open the handle.  That
@@ -1007,7 +1007,7 @@ int mca_btl_smcuda_get_cuda(struct mca_btl_base_module_t* btl,
      * not equal the address that was used to retrieve the block.
      * Therefore, compute the offset and add it to the address of the
      * memory handle. */
-    offset = src_seg->base.seg_addr.lval - reg_ptr->base.base;
+    offset = (unsigned char *)src_seg->base.seg_addr.lval - reg_ptr->base.base;
     remote_memory_address = (unsigned char *)reg_ptr->base.alloc_base + offset;
     if (0 != offset) {
         opal_output(-1, "OFFSET=%d", (int)offset);
diff --git a/ompi/mca/btl/smcuda/btl_smcuda_component.c b/ompi/mca/btl/smcuda/btl_smcuda_component.c
index ca99846..b9a6095 100644
--- a/ompi/mca/btl/smcuda/btl_smcuda_component.c
+++ b/ompi/mca/btl/smcuda/btl_smcuda_component.c
@@ -171,7 +171,7 @@ static int smcuda_register(void)
 #if OMPI_CUDA_SUPPORT
     mca_btl_smcuda.super.btl_flags |= MCA_BTL_FLAGS_CUDA_GET;
 #endif /* OMPI_CUDA_SUPPORT */
-    mca_btl_smcuda.super.btl_seg_size = sizeof (mca_btl_sm_segment_t);
+    mca_btl_smcuda.super.btl_seg_size = sizeof (mca_btl_smcuda_segment_t);
     mca_btl_smcuda.super.btl_bandwidth = 9000;  /* Mbs */
     mca_btl_smcuda.super.btl_latency   = 1;     /* Microsecs */
 
diff --git a/ompi/mca/btl/smcuda/btl_smcuda_frag.c b/ompi/mca/btl/smcuda/btl_smcuda_frag.c
index b05eeb3..4b16a4e 100644
--- a/ompi/mca/btl/smcuda/btl_smcuda_frag.c
+++ b/ompi/mca/btl/smcuda/btl_smcuda_frag.c
@@ -32,9 +32,9 @@ static inline void mca_btl_smcuda_frag_common_constructor(mca_btl_smcuda_frag_t*
         frag->hdr->my_smp_rank = mca_btl_smcuda_component.my_smp_rank;
     }
     frag->segment.base.seg_len = frag->size;
-    frag->base.des_src = &frag->segment;
+    frag->base.des_src = &frag->segment.base;
     frag->base.des_src_cnt = 1;
-    frag->base.des_dst = &frag->segment;
+    frag->base.des_dst = &frag->segment.base;
     frag->base.des_dst_cnt = 1;
     frag->base.des_flags = 0;
 #if OMPI_CUDA_SUPPORT
