Index: ompi/mca/bcol/base/bcol_base_frame.c
===================================================================
--- ompi/mca/bcol/base/bcol_base_frame.c	(revision 30606)
+++ ompi/mca/bcol/base/bcol_base_frame.c	(working copy)
@@ -28,7 +28,6 @@
 #include "ompi/mca/bcol/base/base.h"
 #include "ompi/include/ompi/constants.h"
 #include "ompi/mca/mpool/mpool.h"
-#include "ompi/mca/coll/ml/coll_ml.h" /*frag and full message descriptors defined here*/
 #include "opal/class/opal_list.h"
 /*
  * The following file was created by configure.  It contains extern
@@ -341,3 +340,19 @@
                    opal_list_item_t,
                    NULL,
                    NULL);
+
+static void lmngr_block_constructor(mca_bcol_base_lmngr_block_t *item) 
+{
+    item->base_addr = NULL;
+}
+
+static void lnmgr_block_destructor(mca_bcol_base_lmngr_block_t *item) 
+{
+    /* I have nothing to do here */
+}
+OBJ_CLASS_INSTANCE(mca_bcol_base_lmngr_block_t,
+        opal_list_item_t,
+        lmngr_block_constructor,
+        lnmgr_block_destructor);
+
+
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma.h
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma.h	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma.h	(working copy)
@@ -19,8 +19,6 @@
 #include "ompi_config.h"
 #include "ompi/mca/bcol/bcol.h"
 #include "ompi/mca/mpool/mpool.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_allocation.h"
 #include "ompi/request/request.h"
 #include "ompi/proc/proc.h"
 #include "ompi/patterns/net/netpatterns.h"
@@ -406,7 +404,7 @@
     /* pool index */
     int pool_index;
 
-    /* pointer to the ml_memory_block_desc_t structure
+    /* pointer to the mca_bcol_base_memory_block_desc_t structure
      * that is actually managing this registration.
      * This is meaningful when these control structures
      * are used in conjunction with the user payload
@@ -717,7 +715,7 @@
 struct mca_bcol_basesmuma_local_mlmem_desc_t {
 
     uint32_t bank_index_for_release;
-    struct ml_memory_block_desc_t *ml_mem_desc;
+    struct mca_bcol_base_memory_block_desc_t *ml_mem_desc;
     uint32_t     num_banks;
     uint32_t     num_buffers_per_bank;
     uint32_t     size_buffer;
@@ -934,7 +932,7 @@
     size_t size_ctl_structure;
     size_t data_seg_alignment;
     bcol_basesmuma_smcm_mmap_t *sm_mmap; /* shared memory map struct */
-    mca_coll_ml_release_buff_fn_t buff_release_cb; /* buffer release
+    mca_bcol_base_release_buff_fn_t buff_release_cb; /* buffer release
                                                       call back */
 } bcol_basesmuma_registration_data_t;
 
@@ -1014,7 +1012,7 @@
 
 /* shared memory recusive double barrier */
 int bcol_basesmuma_recursive_double_barrier(bcol_function_args_t *input_args,
-                                            coll_ml_function_t *c_input_args);
+                                            mca_bcol_base_function_t *c_input_args);
 /* shared memory fanin */
 int bcol_basesmuma_fanin_init(mca_bcol_base_module_t *super);
 
@@ -1028,36 +1026,36 @@
 int bcol_basesmuma_bcast_init(mca_bcol_base_module_t *super);
 
 int bcol_basesmuma_bcast(bcol_function_args_t *input_args,
-                         coll_ml_function_t *c_input_args);
+                         mca_bcol_base_function_t *c_input_args);
 
 /* Shared memory non-blocking broadcast */
 int bcol_basesmuma_bcast_k_nomial_anyroot(bcol_function_args_t *input_args,
-                                          coll_ml_function_t *c_input_args);
+                                          mca_bcol_base_function_t *c_input_args);
 
 int bcol_basesmuma_bcast_k_nomial_knownroot(bcol_function_args_t *input_args,
-                                            coll_ml_function_t *c_input_args);
+                                            mca_bcol_base_function_t *c_input_args);
 
 /* Shared memory non-blocking broadcast - Large message anyroot */
 int bcol_basesmuma_binary_scatter_allgather_segment(bcol_function_args_t *input_args,
-                                                    coll_ml_function_t *c_input_args);
+                                                    mca_bcol_base_function_t *c_input_args);
 
 #if 0
 /*FIXME: having fun here*/
 int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
-                                      coll_ml_function_t   *c_input_args);
+                                      mca_bcol_base_function_t   *c_input_args);
 #endif
 
 int bcol_basesmuma_lmsg_bcast_k_nomial_anyroot(bcol_function_args_t *input_args,
-                                               coll_ml_function_t *c_input_args);
+                                               mca_bcol_base_function_t *c_input_args);
 
 int bcol_basesmuma_lmsg_scatter_allgather_portals_bcast(bcol_function_args_t *input_args,
-                                                        coll_ml_function_t *c_input_args);
+                                                        mca_bcol_base_function_t *c_input_args);
 
 int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast(bcol_function_args_t *input_args,
-                                                           coll_ml_function_t *c_input_args);
+                                                           mca_bcol_base_function_t *c_input_args);
 
 int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast(bcol_function_args_t *input_args,
-                                                                     coll_ml_function_t *c_input_args);
+                                                                     mca_bcol_base_function_t *c_input_args);
 
 /*
  *  shared memory scatter
@@ -1067,23 +1065,23 @@
 /* shared memory nonblocking scatter - known root */
 int bcol_basesmuma_nb_scatter_k_array_knownroot(
                                                 bcol_function_args_t *input_args,
-                                                coll_ml_function_t *c_input_args);
+                                                mca_bcol_base_function_t *c_input_args);
 
 /* shared memory non-blocking k-nomial barrier init */
 int bcol_basesmuma_k_nomial_barrier_init(bcol_function_args_t *input_args,
-                                         struct coll_ml_function_t *const_args);
+                                         struct mca_bcol_base_function_t *const_args);
 
 /* shared memory non-blocking k-nomial barrier progress */
 int bcol_basesmuma_k_nomial_barrier_progress(bcol_function_args_t *input_args,
-                                             struct coll_ml_function_t *const_args);
+                                             struct mca_bcol_base_function_t *const_args);
 
 /*shared memory non-blocking k-nomial allgather init */
 int bcol_basesmuma_k_nomial_allgather_init(bcol_function_args_t *input_args,
-                                           struct coll_ml_function_t *const_args);
+                                           struct mca_bcol_base_function_t *const_args);
 
 /* shared memory non-blocking k-nomial allgather progress */
 int bcol_basesmuma_k_nomial_allgather_progress(bcol_function_args_t *input_args,
-                                               struct coll_ml_function_t *const_args);
+                                               struct mca_bcol_base_function_t *const_args);
 
 /* shared memory allgather -- selection logic api */
 int bcol_basesmuma_allgather_init(mca_bcol_base_module_t *super);
@@ -1090,15 +1088,15 @@
 
 /* shared memory blocking k-nomial gather */
 int bcol_basesmuma_k_nomial_gather(bcol_function_args_t *input_args,
-                                   coll_ml_function_t *c_input_args);
+                                   mca_bcol_base_function_t *c_input_args);
 
 /* shared memory non blocking k-nomial gather */
 int bcol_basesmuma_k_nomial_gather_init(bcol_function_args_t *input_args,
-                                        coll_ml_function_t *c_input_args);
+                                        mca_bcol_base_function_t *c_input_args);
 
 /* shared memory non blocking k-nomial gather progress*/
 int bcol_basesmuma_k_nomial_gather_progress(bcol_function_args_t *input_args,
-                                            coll_ml_function_t *c_input_args);
+                                            mca_bcol_base_function_t *c_input_args);
 
 /* shared memory init */
 int bcol_basesmuma_gather_init(mca_bcol_base_module_t *super);
@@ -1110,9 +1108,9 @@
 /* Shared memory basesmuma reduce */
 int bcol_basesmuma_reduce_init(mca_bcol_base_module_t *super);
 int bcol_basesmuma_reduce_intra_fanin(bcol_function_args_t *input_args,
-                                      coll_ml_function_t *c_input_args);
+                                      mca_bcol_base_function_t *c_input_args);
 int bcol_basesmuma_reduce_intra_fanin_old(bcol_function_args_t *input_args,
-                                          coll_ml_function_t *c_input_args);
+                                          mca_bcol_base_function_t *c_input_args);
 
 int bcol_basesmuma_reduce_intra_reducescatter_gather(void *sbuf, void *rbuf,
                                                      int count, struct ompi_datatype_t *dtype,
@@ -1125,10 +1123,10 @@
 int bcol_basesmuma_allreduce_init(mca_bcol_base_module_t *super);
 
 int bcol_basesmuma_allreduce_intra_fanin_fanout(bcol_function_args_t *input_args,
-                                                coll_ml_function_t *c_input_args);
+                                                mca_bcol_base_function_t *c_input_args);
 
 int bcol_basesmuma_allreduce_intra_recursive_doubling(bcol_function_args_t *input_args,
-                                                      coll_ml_function_t *c_input_args);
+                                                      mca_bcol_base_function_t *c_input_args);
 
 /* initialize non-blocking barrier for recycling the memory buffers.
  *  This is not a general purpose nb_barrier, and relies on the
@@ -1168,19 +1166,13 @@
 int bcol_basesmuma_free_buff( sm_buffer_mgmt * buff_block,
                               uint64_t buff_id );
 
-/* This function does bcol_basesmuma specific memory registration and
-   issues call back for ml level bank recycling
-*/
-int bcol_basesmuma_bank_init(struct mca_coll_ml_module_t *ml_module,
-                             mca_bcol_base_module_t *bcol_module,
-                             void *reg_data);
-
 /* bank init which is used for shared memory optimization, fall back to
  * the bank init above if this causes problems
  */
-int bcol_basesmuma_bank_init_opti(struct mca_coll_ml_module_t *ml_module,
-                                  mca_bcol_base_module_t *bcol_module,
-                                  void *reg_data);
+int bcol_basesmuma_bank_init_opti(struct mca_bcol_base_memory_block_desc_t *payload_block,
+        uint32_t data_offset,
+        mca_bcol_base_module_t *bcol_module,
+        void *reg_data);
 
 /* used for shared memory offset exchange */
 int base_bcol_basesmuma_exchange_offsets(
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_allgather.c
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_allgather.c	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_allgather.c	(working copy)
@@ -14,7 +14,7 @@
 #include "ompi_config.h"
 
 #include "ompi/include/ompi/constants.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
+#include "ompi/mca/bcol/base/base.h"
 #include "ompi/mca/bcol/bcol.h"
 #include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h"
 /*
@@ -51,7 +51,7 @@
  *
  */
 int bcol_basesmuma_k_nomial_allgather_init(bcol_function_args_t *input_args,
-                                           struct coll_ml_function_t *const_args)
+                                           struct mca_bcol_base_function_t *const_args)
 {
     /* local variables */
     int8_t  flag_offset;
@@ -286,7 +286,7 @@
 /* allgather progress function */
 
 int bcol_basesmuma_k_nomial_allgather_progress(bcol_function_args_t *input_args,
-                                               struct coll_ml_function_t *const_args)
+                                               struct mca_bcol_base_function_t *const_args)
 {
 
 
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_allreduce.c
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_allreduce.c	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_allreduce.c	(working copy)
@@ -20,9 +20,10 @@
 
 #include "opal/include/opal_stdint.h"
 
+#include "ompi/mca/bcol/base/base.h"
 #include "bcol_basesmuma.h"
 
-static int bcol_basesmuma_allreduce_intra_fanin_fanout_progress (bcol_function_args_t *input_args, coll_ml_function_t *c_input_args);
+static int bcol_basesmuma_allreduce_intra_fanin_fanout_progress (bcol_function_args_t *input_args, mca_bcol_base_function_t *c_input_args);
 
 int bcol_basesmuma_allreduce_init(mca_bcol_base_module_t *super)
 {
@@ -185,7 +186,7 @@
 
 }
 
-static int bcol_basesmuma_allreduce_intra_fanin_fanout_progress (bcol_function_args_t *input_args, coll_ml_function_t *c_input_args)
+static int bcol_basesmuma_allreduce_intra_fanin_fanout_progress (bcol_function_args_t *input_args, mca_bcol_base_function_t *c_input_args)
 {
     mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
     int buff_idx = buff_idx = input_args->src_desc->buffer_index;
@@ -256,7 +257,7 @@
 /**
  * Shared memory blocking allreduce.
  */
-int bcol_basesmuma_allreduce_intra_fanin_fanout(bcol_function_args_t *input_args, coll_ml_function_t *c_input_args)
+int bcol_basesmuma_allreduce_intra_fanin_fanout(bcol_function_args_t *input_args, mca_bcol_base_function_t *c_input_args)
 {
     /* local variables */
     mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
@@ -307,7 +308,7 @@
 
 /* this thing uses the old bcol private control structures */
 int bcol_basesmuma_allreduce_intra_recursive_doubling(bcol_function_args_t *input_args,
-                                                      coll_ml_function_t *c_input_args)
+                                                      mca_bcol_base_function_t *c_input_args)
 {
 
     int my_rank,group_size,my_node_index;
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c	(working copy)
@@ -17,6 +17,9 @@
 #include "ompi/datatype/ompi_datatype.h"
 #include "ompi/communicator/communicator.h"
 
+#include "ompi/mca/bcol/bcol.h"
+#include "ompi/mca/bcol/base/base.h"
+
 #include "bcol_basesmuma.h"
 
 #define __TEST_BLOCKING__   1
@@ -117,7 +120,7 @@
  * @param module - basesmuma module.
  */
 int bcol_basesmuma_bcast(bcol_function_args_t *input_args,
-    coll_ml_function_t *c_input_args)
+    mca_bcol_base_function_t *c_input_args)
 {
     /* local variables */
     int group_size, process_shift, my_node_index;
@@ -254,7 +257,7 @@
 /*zero-copy large massage communication methods*/
 #if 0
 int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
-                                  coll_ml_function_t   *c_input_args)
+                                  mca_bcol_base_function_t   *c_input_args)
 {
     /* local variables */
     int group_size, process_shift, my_node_index;
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast_prime.c
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast_prime.c	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast_prime.c	(working copy)
@@ -96,7 +96,7 @@
   } while (0)
 
 int bcol_basesmuma_bcast_k_nomial_knownroot(bcol_function_args_t *input_args,
-                                            coll_ml_function_t *c_input_args)
+                                            mca_bcol_base_function_t *c_input_args)
 {
   /* local variables */
   mca_bcol_basesmuma_module_t* bcol_module=
@@ -242,7 +242,7 @@
  * @param module - basesmuma module.
  */
 int bcol_basesmuma_bcast_k_nomial_anyroot(bcol_function_args_t *input_args,
-                                          coll_ml_function_t *c_input_args)
+                                          mca_bcol_base_function_t *c_input_args)
 {
   /* local variables */
   mca_bcol_basesmuma_module_t* bcol_module=
@@ -387,7 +387,7 @@
  * algorithms.
  */
 int bcol_basesmuma_binary_scatter_allgather_segment(bcol_function_args_t *input_args,
-                                                    coll_ml_function_t *c_input_args)
+                                                    mca_bcol_base_function_t *c_input_args)
 {
 
   /* local variables */
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c	(working copy)
@@ -21,7 +21,6 @@
 #include "ompi/constants.h"
 #include "ompi/mca/bcol/bcol.h"
 #include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
 #include "ompi/patterns/comm/coll_ops.h"
 
 #include "opal/dss/dss.h"
@@ -166,127 +165,6 @@
     return ret;
 }
 
-#if 0
-/* Basesmuma interface function used for buffer bank resource recycling and
-   bcol specific registration information
-*/
-int bcol_basesmuma_bank_init(struct mca_coll_ml_module_t *ml_module,
-                             mca_bcol_base_module_t *bcol_module,
-                             void *reg_data)
-{
-    /* assumption here is that the block has been registered with
-     * sm bcol hence has been mapped by each process, need to be
-     * sure that memory is mapped amongst sm peers
-     */
-
-    /* local variables */
-    int ret = OMPI_SUCCESS, i;
-    uint32_t j;
-    sm_buffer_mgmt *pload_mgmt;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    bcol_basesmuma_registration_data_t *sm_reg_data =
-        (bcol_basesmuma_registration_data_t *) reg_data;
-    mca_bcol_basesmuma_module_t *sm_bcol =
-        (mca_bcol_basesmuma_module_t *) bcol_module;
-    ml_memory_block_desc_t *ml_block =
-        ml_module->payload_block;
-    size_t malloc_size;
-    ompi_common_sm_file_t input_file;
-    uint64_t mem_offset;
-    int leading_dim,loop_limit,buf_id;
-    unsigned char *base_ptr;
-    mca_bcol_basesmuma_module_t *sm_bcol_module=
-        (mca_bcol_basesmuma_module_t *)bcol_module;
-
-    fprintf(stderr,"test opti test\n");
-
-    /* first, we get a pointer to the payload buffer management struct */
-    pload_mgmt = &(sm_bcol->colls_with_user_data);
-
-    /* allocate memory for pointers to mine and my peers' payload buffers
-     */
-    malloc_size = ml_block->num_banks*ml_block->num_buffers_per_bank*
-        pload_mgmt->size_of_group *sizeof(void *);
-    pload_mgmt->data_buffs = malloc(malloc_size);
-    if( !pload_mgmt->data_buffs) {
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    /* setup the input file for the shared memory connection manager */
-    input_file.file_name = sm_reg_data->file_name;
-    input_file.size = sm_reg_data->size;
-    input_file.size_ctl_structure = 0;
-    input_file.data_seg_alignment = BASESMUMA_CACHE_LINE_SIZE;
-    input_file.mpool_size = sm_reg_data->size;
-
-    /* call the connection manager and map my shared memory peers' file
-     */
-    ret = ompi_common_smcm_allgather_connection(
-        sm_bcol,
-        sm_bcol->super.sbgp_partner_module,
-        &(cs->sm_connections_list),
-        &(sm_bcol->payload_backing_files_info),
-        sm_bcol->super.sbgp_partner_module->group_comm,
-        input_file,
-        false);
-    if( OMPI_SUCCESS != ret ) {
-        goto exit_ERROR;
-    }
-
-    /* now we exchange offset info - don't assume symmetric virtual memory
-     */
-    mem_offset = (uint64_t)(ml_block->block->base_addr) -
-        (uint64_t)(cs->sm_payload_structs->data_addr);
-
-    /* call into the exchange offsets function */
-    ret = base_bcol_basesmuma_exchange_offsets(sm_bcol_module,
-                                               (void **)pload_mgmt->data_buffs, mem_offset, 0,
-                                               pload_mgmt->size_of_group);
-    if( OMPI_SUCCESS != ret ) {
-        goto exit_ERROR;
-    }
-
-    /* convert memory offset to virtual address in current rank */
-    leading_dim = pload_mgmt->size_of_group;
-    loop_limit =  ml_block->num_banks*ml_block->num_buffers_per_bank;
-    for (i=0;i< sm_bcol_module->super.sbgp_partner_module->group_size;i++) {
-
-        /* get the base pointer */
-        int array_id=SM_ARRAY_INDEX(leading_dim,0,i);
-        if( i == sm_bcol_module->super.sbgp_partner_module->my_index) {
-            /* me */
-            base_ptr=cs->sm_payload_structs->map_addr;
-        } else {
-            base_ptr=sm_bcol_module->payload_backing_files_info[i]->
-                sm_mmap->map_addr;
-        }
-        pload_mgmt->data_buffs[array_id]=(void *)
-            (((uint64_t)pload_mgmt->data_buffs[array_id])+(uint64_t)base_ptr);
-        for( buf_id = 1 ; buf_id < loop_limit ; buf_id++ ) {
-            int array_id_m1=SM_ARRAY_INDEX(leading_dim,(buf_id-1),i);
-            array_id=SM_ARRAY_INDEX(leading_dim,buf_id,i);
-            pload_mgmt->data_buffs[array_id]=(void *) ((uint64_t)(pload_mgmt->data_buffs[array_id_m1])+
-                                                       (uint64_t)ml_block->size_buffer);
-        }
-    }
-
-    /* setup the data structures needed for releasing the payload
-     * buffers back to the ml level
-     */
-    for(j = 0; j < ml_block->num_banks; j++) {
-        sm_bcol->colls_with_user_data.
-            ctl_buffs_mgmt[j].nb_barrier_desc.ml_memory_block_descriptor=
-            ml_block;
-    }
-
-    return OMPI_SUCCESS;
-
-exit_ERROR:
-    return ret;
-}
-#endif
-
 /*
  * Allocate buffers for storing non-blocking collective descriptions, required
  * for making code re-entrant
@@ -337,9 +215,10 @@
 /* New init function used for new control scheme where we put the control
  * struct at the top of the payload buffer
  */
-int bcol_basesmuma_bank_init_opti(struct mca_coll_ml_module_t *ml_module,
-                                  mca_bcol_base_module_t *bcol_module,
-                                  void *reg_data)
+int bcol_basesmuma_bank_init_opti(struct mca_bcol_base_memory_block_desc_t *payload_block,
+        uint32_t data_offset,
+        mca_bcol_base_module_t *bcol_module,
+        void *reg_data)
 {
     /* assumption here is that the block has been registered with
      * sm bcol hence has been mapped by each process, need to be
@@ -354,8 +233,7 @@
         (bcol_basesmuma_registration_data_t *) reg_data;
     mca_bcol_basesmuma_module_t *sm_bcol =
         (mca_bcol_basesmuma_module_t *) bcol_module;
-    ml_memory_block_desc_t *ml_block =
-        ml_module->payload_block;
+    mca_bcol_base_memory_block_desc_t *ml_block = payload_block;
     size_t malloc_size;
     bcol_basesmuma_smcm_file_t input_file;
     uint64_t mem_offset;
@@ -374,7 +252,7 @@
 
     /* go ahead and get the header size that is cached on the payload block
      */
-    sm_bcol->total_header_size = ml_module->data_offset;
+    sm_bcol->total_header_size = data_offset;
 
     /* allocate memory for pointers to mine and my peers' payload buffers
      * difference here is that now we use our new data struct
@@ -449,7 +327,7 @@
         /* second, calculate where to set the data pointer */
         pload_mgmt->data_buffs[array_id].payload=(void *)
             (uintptr_t)((uint64_t)(uintptr_t) pload_mgmt->data_buffs[array_id].ctl_struct +
-                        (uint64_t)(uintptr_t) ml_module->data_offset);
+                        (uint64_t)(uintptr_t) data_offset);
 
         for( buf_id = 1 ; buf_id < loop_limit ; buf_id++ ) {
             int array_id_m1=SM_ARRAY_INDEX(leading_dim,(buf_id-1),i);
@@ -464,7 +342,7 @@
             /* second, set the payload pointer */
             pload_mgmt->data_buffs[array_id].payload =(void *)
                 (uintptr_t)((uint64_t)(uintptr_t) pload_mgmt->data_buffs[array_id].ctl_struct +
-                            (uint64_t)(uintptr_t) ml_module->data_offset);
+                            (uint64_t)(uintptr_t) data_offset);
         }
 
     }
@@ -513,7 +391,7 @@
                                                ml_mem->num_banks,
                                                ml_mem->num_buffers_per_bank,
                                                ml_mem->size_buffer,
-                                               ml_module->data_offset,
+                                               data_offset,
                                                sm_bcol_module->super.sbgp_partner_module->group_size,
                                                sm_bcol_module->pow_k)) {
 
@@ -542,7 +420,7 @@
  */
 mca_bcol_basesmuma_module_t *sm_bcol_module
 int bcol_basesmuma_free_payload_buff(
-    struct ml_memory_block_desc_t *block,
+    struct mca_bcol_base_memory_block_desc_t *block,
     sm_buffer_mgmt *ctl_mgmt,
     uint64_t buff_id)
 {
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_component.c
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_component.c	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_component.c	(working copy)
@@ -23,7 +23,6 @@
 #include "ompi/mca/mpool/base/base.h"
 #include "ompi/mca/bcol/bcol.h"
 #include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
 
 #include "bcol_basesmuma.h"
 /*
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c	(working copy)
@@ -18,6 +18,7 @@
 
 #include "opal/sys/atomic.h"
 
+#include "ompi/mca/bcol/base/base.h"
 #include "bcol_basesmuma.h"
 
 /********************************************************************************/
@@ -25,7 +26,7 @@
 /********************************************************************************/
 
 static int bcol_basesmuma_fanin_new(bcol_function_args_t *input_args,
-                                    coll_ml_function_t *c_input_args)
+                                    mca_bcol_base_function_t *c_input_args)
 {
     /* local variables */
     int64_t sequence_number;
@@ -107,7 +108,7 @@
 }
 
 static int bcol_basesmuma_fanin_new_progress(bcol_function_args_t *input_args,
-                                    coll_ml_function_t *c_input_args)
+                                    mca_bcol_base_function_t *c_input_args)
 {
     /* local variables */
     int64_t sequence_number;
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c	(working copy)
@@ -18,6 +18,7 @@
 
 #include "opal/sys/atomic.h"
 
+#include "ompi/mca/bcol/base/base.h"
 #include "bcol_basesmuma.h"
 
 /***********************************************************************************/
@@ -26,7 +27,7 @@
 
 static int bcol_basesmuma_fanout_new(
                 bcol_function_args_t *input_args,
-                coll_ml_function_t *c_input_args)
+                mca_bcol_base_function_t *c_input_args)
 {
     /* local variables */
     int64_t sequence_number;
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_gather.c
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_gather.c	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_gather.c	(working copy)
@@ -12,6 +12,7 @@
  */
 
 #include "ompi_config.h"
+#include "ompi/mca/bcol/base/base.h"
 #include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h"
 #include "ompi/constants.h"
 #include "ompi/datatype/ompi_datatype.h"
@@ -53,7 +54,7 @@
 }
 
 int bcol_basesmuma_k_nomial_gather_init(bcol_function_args_t *input_args,
-                                        coll_ml_function_t *c_input_args)
+                                        mca_bcol_base_function_t *c_input_args)
 {
     /* local variables */
     int leading_dim, buff_idx, idx;
@@ -217,7 +218,7 @@
 
 
 int bcol_basesmuma_k_nomial_gather_progress(bcol_function_args_t *input_args,
-                                            coll_ml_function_t *c_input_args)
+                                            mca_bcol_base_function_t *c_input_args)
 {
     /* local variables */
     int group_size;
@@ -457,7 +458,7 @@
 /* original, fully blocking, fully synchronous gather - should result in worst performance when used */
 #if 0
 int bcol_basesmuma_k_nomial_gather(bcol_function_args_t *input_args,
-                                   coll_ml_function_t *c_input_args)
+                                   mca_bcol_base_function_t *c_input_args)
 {
     /* local variables */
     int group_size;
@@ -753,7 +754,7 @@
 #if 0
 /* blocking, asynchronous polling gather routine */
 int bcol_basesmuma_k_nomial_gather(bcol_function_args_t *input_args,
-                                   coll_ml_function_t *c_input_args)
+                                   mca_bcol_base_function_t *c_input_args)
 {
     /* local variables */
     int group_size;
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.c
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.c	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.c	(working copy)
@@ -43,7 +43,7 @@
 static int completed_scatter = 0;
 #if 0
 int bcol_basesmuma_lmsg_scatter_allgather_portals_bcast_old(bcol_function_args_t *input_args,
-    coll_ml_function_t *c_input_args)
+    mca_bcol_base_function_t *c_input_args)
 {
 
     /* local variables */
@@ -589,7 +589,7 @@
  */
 
 int bcol_basesmuma_lmsg_scatter_allgather_portals_bcast(bcol_function_args_t *input_args,
-    coll_ml_function_t *c_input_args)
+    mca_bcol_base_function_t *c_input_args)
 {
 
     /* local variables */
@@ -1113,7 +1113,7 @@
  */
 
 int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast(bcol_function_args_t *input_args,
-    coll_ml_function_t *c_input_args)
+    mca_bcol_base_function_t *c_input_args)
 {
 	int i;
 	mca_bcol_basesmuma_portal_proc_info_t *portals_info;
@@ -1526,7 +1526,7 @@
 
 
 int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast(bcol_function_args_t *input_args,
-    coll_ml_function_t *c_input_args)
+    mca_bcol_base_function_t *c_input_args)
 {
 
 	int i;
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.h
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.h	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.h	(working copy)
@@ -615,12 +615,12 @@
 }
 
 int bcol_basesmuma_lmsg_scatter_allgather_portals_bcast(bcol_function_args_t *input_args,
-    coll_ml_function_t *c_input_args);
+    mca_bcol_base_function_t *c_input_args);
 
 int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast(bcol_function_args_t *input_args,
-    coll_ml_function_t *c_input_args);
+    mca_bcol_base_function_t *c_input_args);
 
 int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast(bcol_function_args_t *input_args,
-    coll_ml_function_t *c_input_args);
+    mca_bcol_base_function_t *c_input_args);
 
 #endif
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_knomial_bcast.c
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_knomial_bcast.c	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_knomial_bcast.c	(working copy)
@@ -46,7 +46,7 @@
  * @param module - basesmuma module.
  */
 int bcol_basesmuma_lmsg_bcast_k_nomial_anyroot(bcol_function_args_t *input_args,
-    coll_ml_function_t *c_input_args)
+    mca_bcol_base_function_t *c_input_args)
 {
 #if 0
 		/* local variables */
@@ -268,7 +268,7 @@
 
 
 int bcol_basesmuma_lmsg_bcast_k_nomial_anyroot(bcol_function_args_t *input_args,
-    coll_ml_function_t *c_input_args)
+    mca_bcol_base_function_t *c_input_args)
 {
     /* local variables */
     mca_bcol_basesmuma_module_t* bcol_module=
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_barrier.c
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_barrier.c	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_barrier.c	(working copy)
@@ -24,7 +24,7 @@
 
 #if 0
 int bcol_basesmuma_recursive_double_barrier(bcol_function_args_t *input_args,
-                                            coll_ml_function_t *c_input_args)
+                                            mca_bcol_base_function_t *c_input_args)
 {
 
     /* local variables */
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c	(working copy)
@@ -21,6 +21,7 @@
 #include "bcol_basesmuma.h"
 #include "opal/sys/atomic.h"
 #include "ompi/patterns/net/netpatterns.h"
+#include "ompi/mca/bcol/base/base.h"
 
 /*
  * Initialize nonblocking barrier.  This is code specific for handling
@@ -369,7 +370,7 @@
 }
 
 static int bcol_basesmuma_memsync(bcol_function_args_t *input_args,
-                coll_ml_function_t *c_input_args)
+                mca_bcol_base_function_t *c_input_args)
 {
     int rc;
     int memory_bank = input_args->root;
@@ -414,7 +415,7 @@
 }
 
 static int bcol_basesmuma_memsync_progress(bcol_function_args_t *input_args,
-                coll_ml_function_t *c_input_args)
+                mca_bcol_base_function_t *c_input_args)
 {
     int memory_bank = input_args->root;
 
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.c
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.c	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.c	(working copy)
@@ -29,7 +29,7 @@
  */
 
 static int bcol_basesmuma_reduce_intra_fanin_progress(bcol_function_args_t *input_args,
-                                                      coll_ml_function_t *c_input_args);
+                                                      mca_bcol_base_function_t *c_input_args);
 
 int bcol_basesmuma_reduce_init(mca_bcol_base_module_t *super)
 {
@@ -112,7 +112,7 @@
 }
 
 static int bcol_basesmuma_reduce_intra_fanin_progress(bcol_function_args_t *input_args,
-                                                      coll_ml_function_t *c_input_args)
+                                                      mca_bcol_base_function_t *c_input_args)
 {
     mca_bcol_basesmuma_module_t* bcol_module =
         (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
@@ -157,7 +157,7 @@
 }
 
 int bcol_basesmuma_reduce_intra_fanin(bcol_function_args_t *input_args,
-                                      coll_ml_function_t *c_input_args)
+                                      mca_bcol_base_function_t *c_input_args)
 {
     /* local variables */
     int rc=BCOL_FN_COMPLETE;
@@ -261,7 +261,7 @@
  * complete?
  */
 int bcol_basesmuma_reduce_intra_fanin_old(bcol_function_args_t *input_args,
-                                          coll_ml_function_t *c_input_args)
+                                          mca_bcol_base_function_t *c_input_args)
 {
     /* local variables */
     int rc=OMPI_SUCCESS;
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.h
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.h	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.h	(working copy)
@@ -70,10 +70,10 @@
 						volatile mca_bcol_basesmuma_payload_t *data_buffs);
 
 int bcol_basesmuma_lmsg_reduce(bcol_function_args_t *input_args,
-        coll_ml_function_t *c_input_args);
+        mca_bcol_base_function_t *c_input_args);
 
 int bcol_basesmuma_lmsg_reduce_extra(bcol_function_args_t *input_args,
-        coll_ml_function_t *c_input_args);
+        mca_bcol_base_function_t *c_input_args);
 
 void basesmuma_reduce_recv(int my_group_index, int peer,
 						   void *recv_buffer,
Index: ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c
===================================================================
--- ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c	(revision 30606)
+++ ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c	(working copy)
@@ -11,8 +11,8 @@
 #include "ompi_config.h"
 
 #include "ompi/include/ompi/constants.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
 #include "ompi/mca/bcol/bcol.h"
+#include "ompi/mca/bcol/base/base.h"
 #include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h"
 
 /*
@@ -49,7 +49,7 @@
  *
  */
 int bcol_basesmuma_k_nomial_barrier_init(bcol_function_args_t *input_args,
-                struct coll_ml_function_t *const_args)
+                struct mca_bcol_base_function_t *const_args)
 {
     /* local variables */
     int flag_offset = 0;
@@ -236,7 +236,7 @@
 /* allgather progress function */
 
 int bcol_basesmuma_k_nomial_barrier_progress(bcol_function_args_t *input_args,
-                        struct coll_ml_function_t *const_args)
+                        struct mca_bcol_base_function_t *const_args)
 {
 
 
Index: ompi/mca/bcol/bcol.h
===================================================================
--- ompi/mca/bcol/bcol.h	(revision 30606)
+++ ompi/mca/bcol/bcol.h	(working copy)
@@ -34,8 +34,6 @@
 #endif
 
 /* Forward declaration - please do not remove it */
-struct ml_memory_block_desc_t;
-struct mca_coll_ml_module_t;
 struct ml_buffers_t;
 
 struct mca_bcol_base_coll_fn_comm_attributes_t;
@@ -318,8 +316,8 @@
 
 /* forward declaration */
 struct mca_coll_ml_descriptor_t;
-struct ml_payload_buffer_desc_t;
-struct mca_coll_ml_route_info_t;
+struct mca_bcol_base_payload_buffer_desc_t;
+struct mca_bcol_base_route_info_t;
 
 typedef struct {
     int order_num;           /* Seq num of collective fragment */
@@ -352,7 +350,7 @@
      * parameters */
     /* Pasha: We don need this one for new flow - remove it */
     struct mca_coll_ml_descriptor_t *full_message_descriptor;
-    struct mca_coll_ml_route_info_t *root_route;
+    struct mca_bcol_base_route_info_t *root_route;
     /* function status */
     int function_status;
     /* root, for rooted operations */
@@ -361,8 +359,8 @@
     void *sbuf;
     void *rbuf;
     void *userbuf;
-    struct ml_payload_buffer_desc_t *src_desc;
-    struct ml_payload_buffer_desc_t *dst_desc;
+    struct mca_bcol_base_payload_buffer_desc_t *src_desc;
+    struct mca_bcol_base_payload_buffer_desc_t *dst_desc;
    /* ml buffer size */
     uint32_t buffer_size;
     /* index of buffer in ml payload cache */
@@ -389,6 +387,82 @@
 
 };
 
+struct mca_bcol_base_route_info_t {
+    int level;
+    int rank;
+};
+typedef struct mca_bcol_base_route_info_t mca_bcol_base_route_info_t;
+
+struct mca_bcol_base_lmngr_block_t {
+    opal_list_item_t super;
+    struct mca_coll_ml_lmngr_t *lmngr;
+    void* base_addr;
+};
+typedef struct mca_bcol_base_lmngr_block_t mca_bcol_base_lmngr_block_t;
+OBJ_CLASS_DECLARATION(mca_bcol_base_lmngr_block_t);
+
+struct mca_bcol_base_memory_block_desc_t {
+
+    /* memory block for payload buffers */
+    struct mca_bcol_base_lmngr_block_t *block;
+
+    /* Address offset in bytes -- Indicates free memory in the block */
+    uint64_t   block_addr_offset;
+
+    /* size of the memory block */
+    size_t     size_block;
+
+    /* number of memory banks */
+    uint32_t     num_banks;
+
+    /* number of buffers per bank */
+    uint32_t    num_buffers_per_bank;
+
+    /* size of a payload buffer */
+    uint32_t     size_buffer;
+
+    /* pointer to buffer descriptors initialized */
+    struct mca_bcol_base_payload_buffer_desc_t *buffer_descs;
+
+    /* index of the next free buffer in the block */
+    uint64_t next_free_buffer;
+
+    uint32_t *bank_release_counters;
+
+    /* Counter that defines what bank should be synchronized next
+     * since collectives could be completed out of order, we have to make
+     * sure that memory synchronization collectives started in order ! */
+    int memsync_counter; 
+
+    /* This arrays of flags used to signal that the bank is ready for recycling */
+    bool *ready_for_memsync;
+
+    /* This flags monitors if bank is open for usage. Usually we expect that user
+     * will do the check only on buffer-zero allocation */
+    bool *bank_is_busy;
+
+};
+
+/* convenience typedef */
+typedef struct mca_bcol_base_memory_block_desc_t mca_bcol_base_memory_block_desc_t;
+
+typedef void (*mca_bcol_base_release_buff_fn_t)(struct mca_bcol_base_memory_block_desc_t *ml_memblock, uint32_t buff_id);
+
+struct mca_bcol_base_payload_buffer_desc_t {
+    void         *base_data_addr;   /* buffer address */
+    void         *data_addr;         /* buffer address  + header offset */
+    uint64_t     generation_number;  /* my generation */
+    uint64_t     bank_index;         /* my bank */
+    uint64_t     buffer_index;       /* my buff index */
+};
+/* convenience typedef */
+typedef struct mca_bcol_base_payload_buffer_desc_t mca_bcol_base_payload_buffer_desc_t;
+
+
+
+
+
+
 typedef struct bcol_function_args_t bcol_function_args_t;
 
 
@@ -411,9 +485,9 @@
 
 /* collective function prototype - all functions have the same interface
  * so that we can call them via a function pointer */
-struct coll_ml_function_t;
+struct mca_bcol_base_function_t;
 typedef int (*mca_bcol_base_module_collective_fn_primitives_t)
-    (bcol_function_args_t *input_args, struct coll_ml_function_t *const_args);
+    (bcol_function_args_t *input_args, struct mca_bcol_base_function_t *const_args);
 
 typedef int (*mca_bcol_base_module_collective_init_fn_primitives_t)
     (struct mca_bcol_base_module_t *bcol_module);
@@ -533,11 +607,12 @@
  *
  */
 /*typedef int (*mca_bcol_base_init_memory_fn_t)
-    (struct ml_memory_block_desc_t *ml_block, void *reg_data);*/
+    (struct mca_bcol_base_memory_block_desc_t *ml_block, void *reg_data);*/
 
 typedef int (*mca_bcol_base_init_memory_fn_t)
-    (struct mca_coll_ml_module_t *ml_module,
-     struct mca_bcol_base_module_t *bcol_module,
+     (struct mca_bcol_base_memory_block_desc_t *payload_block,
+     uint32_t data_offset,
+     struct mca_bcol_base_module_t *bcol,
      void *reg_data);
 
 typedef int (*mca_common_allgather_init_fn_t)
@@ -656,6 +731,41 @@
 typedef struct mca_bcol_base_module_t mca_bcol_base_module_t;
 OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bcol_base_module_t);
 
+/* function description */
+struct mca_bcol_base_function_t {
+    int fn_idx;
+    /* module */
+    struct mca_bcol_base_module_t *bcol_module;
+
+    /*
+     *  The following two parameters are used for bcol modules
+     *  that want to do some optimizations based on the fact that
+     *  n functions from the same bcol module are called in a row.
+     *  For example, in the iboffload case, on the first call one
+     *  will want to initialize the MWR, and start to instantiate
+     *  it, but only post it at the end of the last call.
+     *  The index of this function in a sequence of consecutive
+     *  functions from the same bcol
+     */
+    int index_in_consecutive_same_bcol_calls;
+
+    /* number of times functions from this bcol are
+     * called in order
+     */
+    int n_of_this_type_in_a_row;
+
+    /*
+     * number of times functions from this module are called in the
+     * collective operation.
+     */
+    int n_of_this_type_in_collective;
+    int index_of_this_type_in_collective;
+};
+typedef struct mca_bcol_base_function_t mca_bcol_base_function_t;
+
+
+
+
 struct mca_bcol_base_descriptor_t {
     ompi_free_list_item_t super;
 /* Vasily: will be described in the future */
Index: ompi/mca/bcol/iboffload/bcol_iboffload.h
===================================================================
--- ompi/mca/bcol/iboffload/bcol_iboffload.h	(revision 30606)
+++ ompi/mca/bcol/iboffload/bcol_iboffload.h	(working copy)
@@ -28,8 +28,6 @@
 #include "ompi/datatype/ompi_datatype.h"
 #include "ompi/datatype/ompi_datatype_internal.h"
 
-#include "ompi/mca/coll/ml/coll_ml.h"
-
 #include "ompi/mca/bcol/bcol.h"
 #include "ompi/mca/bcol/base/base.h"
 
@@ -135,7 +133,7 @@
     /* IB related information first */
     struct mca_bcol_iboffload_rdma_info_t ib_info;
     /* back pointer to original ML memory descriptor */
-    struct ml_memory_block_desc_t *ml_mem_desc;
+    struct mca_bcol_base_memory_block_desc_t *ml_mem_desc;
     /* Pasha: do we really need this one ?*/
     /* caching ml memory descriptor configurations localy */
     mca_bcol_iboffload_rdma_block_desc_t bdesc;
@@ -579,7 +577,7 @@
  */
 
 int mca_bcol_iboffload_small_msg_bcast_intra(bcol_function_args_t *fn_arguments,
-                                                   struct coll_ml_function_t
+                                                   struct mca_bcol_base_function_t
                                                    *const_args);
 
 int mca_bcol_iboffload_barrier_intra_recursive_doubling_start(
@@ -613,7 +611,7 @@
                 struct mca_bcol_iboffload_collreq_t *coll_request);
 
 int mca_bcol_iboffload_nb_memory_service_barrier_intra(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 
 int mca_bcol_iboffload_coll_support_all_types(bcol_coll coll_name);
 int mca_bcol_iboffload_coll_supported(int op, int dtype, bcol_elem_type elem_type);
Index: ompi/mca/bcol/iboffload/bcol_iboffload_allgather.c
===================================================================
--- ompi/mca/bcol/iboffload/bcol_iboffload_allgather.c	(revision 30606)
+++ ompi/mca/bcol/iboffload/bcol_iboffload_allgather.c	(working copy)
@@ -1170,7 +1170,7 @@
 #if 0
 static int mca_bcol_iboffload_neighbor_allgather_userbuffer_intra(
                                             bcol_function_args_t *fn_arguments,
-                                            struct coll_ml_function_t *const_args)
+                                            struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_iboffload_module_t *iboffload_module =
         (mca_bcol_iboffload_module_t *)const_args->bcol_module;
@@ -1201,7 +1201,7 @@
 
 #if 1
 static int mca_bcol_iboffload_k_nomial_allgather_userbuffer_intra(bcol_function_args_t *fn_arguments,
-                                                   struct coll_ml_function_t *const_args)
+                                                   struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_iboffload_module_t *iboffload_module =
         (mca_bcol_iboffload_module_t *)const_args->bcol_module;
@@ -1232,7 +1232,7 @@
 #endif
 
 static int mca_bcol_iboffload_k_nomial_allgather_mlbuffer_intra(bcol_function_args_t *fn_arguments,
-                                                   struct coll_ml_function_t *const_args)
+                                                   struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_iboffload_module_t *iboffload_module =
         (mca_bcol_iboffload_module_t *)const_args->bcol_module;
@@ -1266,7 +1266,7 @@
  */
 static int mca_bcol_iboffload_collreq_mlbuffer_progress(
             bcol_function_args_t *input_args,
-            struct coll_ml_function_t *const_args)
+            struct mca_bcol_base_function_t *const_args)
 {
     int i;
     mca_bcol_iboffload_collreq_t *coll_request =
@@ -1294,7 +1294,7 @@
 
 static int mca_bcol_iboffload_collreq_userbuffer_progress(
                         bcol_function_args_t *input_args,
-                        struct coll_ml_function_t *const_args)
+                        struct mca_bcol_base_function_t *const_args)
 {
     int i;
     mca_bcol_iboffload_collreq_t *coll_request =
Index: ompi/mca/bcol/iboffload/bcol_iboffload_allreduce.c
===================================================================
--- ompi/mca/bcol/iboffload/bcol_iboffload_allreduce.c	(revision 30606)
+++ ompi/mca/bcol/iboffload/bcol_iboffload_allreduce.c	(working copy)
@@ -1261,7 +1261,7 @@
 }
 
 static int mca_bcol_iboffload_allreduce_intra(bcol_function_args_t *fn_arguments,
-                                              struct coll_ml_function_t *const_args)
+                                              struct mca_bcol_base_function_t *const_args)
 {
     /* local variables */
     int rc;
@@ -1304,7 +1304,7 @@
 
 static int mca_bcol_iboffload_allreduce_progress(
                         bcol_function_args_t *input_args,
-                        struct coll_ml_function_t *const_args)
+                        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_iboffload_collreq_t *coll_request =
                  (mca_bcol_iboffload_collreq_t *)
Index: ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c
===================================================================
--- ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c	(revision 30606)
+++ ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c	(working copy)
@@ -324,7 +324,7 @@
 }
 
 int mca_bcol_iboffload_nb_memory_service_barrier_intra(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
 
     /* local variables */
@@ -818,7 +818,7 @@
 
 static int mca_bcol_iboffload_new_style_barrier_progress(
                         bcol_function_args_t *input_args,
-                        struct coll_ml_function_t *const_args)
+                        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_iboffload_collreq_t *coll_request =
                  (mca_bcol_iboffload_collreq_t *)
@@ -840,7 +840,7 @@
 
 static int mca_bcol_iboffload_new_style_barrier_intra(
                                 bcol_function_args_t *input_args,
-                                struct coll_ml_function_t *const_args)
+                                struct mca_bcol_base_function_t *const_args)
 {
     /* local variables */
     int rc;
Index: ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c
===================================================================
--- ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c	(revision 30606)
+++ ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c	(working copy)
@@ -177,7 +177,7 @@
 
 int mca_bcol_iboffload_small_msg_bcast_progress(
                         bcol_function_args_t *input_args,
-                        struct coll_ml_function_t *const_args)
+                        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_iboffload_collreq_t *coll_request =
                  (mca_bcol_iboffload_collreq_t *)
@@ -369,7 +369,7 @@
 }
 
 int mca_bcol_iboffload_small_msg_bcast_intra(bcol_function_args_t *fn_arguments,
-                                                   struct coll_ml_function_t *const_args)
+                                                   struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_iboffload_module_t *iboffload_module =
         (mca_bcol_iboffload_module_t *) const_args->bcol_module;
@@ -501,7 +501,7 @@
 }
 
 int mca_bcol_iboffload_small_msg_bcast_extra_intra(bcol_function_args_t *fn_arguments,
-                                                   struct coll_ml_function_t *const_args)
+                                                   struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_iboffload_module_t *iboffload_module =
         (mca_bcol_iboffload_module_t *)const_args->bcol_module;
@@ -528,7 +528,7 @@
 
 /* Large message scatter-allgather with zero copy */
 int mca_bcol_iboffload_zero_copy_progress(bcol_function_args_t *fn_arguments,
-                                                   struct coll_ml_function_t *const_args)
+                                                   struct mca_bcol_base_function_t *const_args)
 {
     int i;
     mca_bcol_iboffload_collreq_t *coll_request =
@@ -830,7 +830,7 @@
 }
 
 int mca_bcol_iboffload_bcast_scatter_allgather_intra(bcol_function_args_t *fn_arguments,
-                                                   struct coll_ml_function_t *const_args)
+                                                   struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_iboffload_module_t *iboffload_module =
         (mca_bcol_iboffload_module_t *) const_args->bcol_module;
@@ -977,7 +977,7 @@
 }
 
 int mca_bcol_iboffload_bcast_scatter_allgather_extra_intra(bcol_function_args_t *fn_arguments,
-                                                   struct coll_ml_function_t *const_args)
+                                                   struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_iboffload_module_t *iboffload_module =
         (mca_bcol_iboffload_module_t *) const_args->bcol_module;
Index: ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h
===================================================================
--- ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h	(revision 30606)
+++ ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h	(working copy)
@@ -26,17 +26,17 @@
 
 int mca_bcol_iboffload_small_msg_bcast_progress(
                         bcol_function_args_t *input_args,
-                        struct coll_ml_function_t *const_args);
+                        struct mca_bcol_base_function_t *const_args);
 int mca_bcol_iboffload_small_msg_bcast_extra_intra(bcol_function_args_t *fn_arguments,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 int mca_bcol_iboffload_small_msg_bcast_intra(bcol_function_args_t *fn_arguments,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 int mca_bcol_iboffload_bcast_scatter_allgather_intra(bcol_function_args_t *fn_arguments,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 int mca_bcol_iboffload_zero_copy_progress(bcol_function_args_t *fn_arguments,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 int mca_bcol_iboffload_bcast_scatter_allgather_extra_intra(bcol_function_args_t *fn_arguments,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 int mca_bcol_iboffload_bcast_register(mca_bcol_base_module_t *super);
 
 static inline __opal_attribute_always_inline__ int
Index: ompi/mca/bcol/iboffload/bcol_iboffload_fanin.c
===================================================================
--- ompi/mca/bcol/iboffload/bcol_iboffload_fanin.c	(revision 30606)
+++ ompi/mca/bcol/iboffload/bcol_iboffload_fanin.c	(working copy)
@@ -233,7 +233,7 @@
  ***********************************************************************/
 static int mca_bcol_iboffload_new_style_fanin_progress(
                         bcol_function_args_t *input_args,
-                        struct coll_ml_function_t *const_args)
+                        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_iboffload_collreq_t *coll_request =
                  (mca_bcol_iboffload_collreq_t *)
@@ -293,7 +293,7 @@
 
 static int mca_bcol_iboffload_new_style_fanin_intra(
                                 bcol_function_args_t *input_args,
-                                struct coll_ml_function_t *const_args)
+                                struct mca_bcol_base_function_t *const_args)
 {
     int rc = OMPI_SUCCESS;
 
Index: ompi/mca/bcol/iboffload/bcol_iboffload_fanout.c
===================================================================
--- ompi/mca/bcol/iboffload/bcol_iboffload_fanout.c	(revision 30606)
+++ ompi/mca/bcol/iboffload/bcol_iboffload_fanout.c	(working copy)
@@ -232,7 +232,7 @@
  ***********************************************************************/
 static int mca_bcol_iboffload_new_style_fanout_progress(
                         bcol_function_args_t *input_args,
-                        struct coll_ml_function_t *const_args)
+                        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_iboffload_collreq_t *coll_request =
                  (mca_bcol_iboffload_collreq_t *)
@@ -292,7 +292,7 @@
 
 static int mca_bcol_iboffload_new_style_fanout_intra(
                                 bcol_function_args_t *input_args,
-                                struct coll_ml_function_t *const_args)
+                                struct mca_bcol_base_function_t *const_args)
 {
     int rc = OMPI_SUCCESS;
 
Index: ompi/mca/bcol/iboffload/bcol_iboffload_module.c
===================================================================
--- ompi/mca/bcol/iboffload/bcol_iboffload_module.c	(revision 30606)
+++ ompi/mca/bcol/iboffload/bcol_iboffload_module.c	(working copy)
@@ -614,7 +614,7 @@
     mca_bcol_iboffload_module_t *iboffload_module = (mca_bcol_iboffload_module_t *) bcol;
     mca_bcol_iboffload_local_rdma_block_t *rdma_block = &iboffload_module->rdma_block;
 
-    struct ml_memory_block_desc_t *desc = ml_module->payload_block;
+    struct mca_bcol_base_memory_block_desc_t *desc = ml_module->payload_block;
     struct ibv_mr *mr = (struct ibv_mr *) desc->block->lmngr->reg_desc[bcol->context_index];
     int i;
 
Index: ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h
===================================================================
--- ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h	(revision 30606)
+++ ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h	(working copy)
@@ -247,8 +247,6 @@
 struct mca_bcol_ptpcoll_local_mlmem_desc_t {
     /* Bank index to release */
     uint32_t bank_index_for_release;
-    /* back pointer to original ML memory descriptor */
-    struct ml_memory_block_desc_t *ml_mem_desc;
     /* number of memory banks */
     uint32_t     num_banks;
     /* number of buffers per bank */
@@ -382,9 +380,9 @@
 
 /* barrier routines */
 int bcol_ptpcoll_barrier_recurs_dbl(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 int bcol_ptpcoll_barrier_recurs_knomial(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 int bcol_ptpcoll_barrier_init(mca_bcol_base_module_t *super);
 int mca_bcol_ptpcoll_memsync_init(mca_bcol_base_module_t *super);
 void * bcol_ptpcoll_allocate_memory(size_t length, size_t alignment, 
@@ -398,16 +396,16 @@
 int bcol_ptpcoll_fanin( bcol_function_args_t *input_args,
         struct mca_bcol_base_module_t *module);
 int bcol_ptpcoll_fanout( bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 
 
 /* allgather routine */
 int bcol_ptpcoll_k_nomial_allgather_init(bcol_function_args_t *input_args,
-                        struct coll_ml_function_t *const_args);
+                        struct mca_bcol_base_function_t *const_args);
 
 /* allgather progress */
 int bcol_ptpcoll_k_nomial_allgather_progress(bcol_function_args_t *input_args,
-                        struct coll_ml_function_t *const_args);
+                        struct mca_bcol_base_function_t *const_args);
 /* allgather register */
 int bcol_ptpcoll_allgather_init(mca_bcol_base_module_t *super);
 
Index: ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allgather.c
===================================================================
--- ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allgather.c	(revision 30606)
+++ ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allgather.c	(working copy)
@@ -11,10 +11,8 @@
 #include "ompi_config.h"
 
 #include "ompi/include/ompi/constants.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
 #include "ompi/mca/bcol/bcol.h"
 #include "bcol_ptpcoll_allreduce.h"
-#include "ompi/mca/coll/base/coll_tags.h" /* debug */
 /*
  * Recursive K-ing allgather
  */
@@ -31,7 +29,7 @@
  */
 
 int bcol_ptpcoll_k_nomial_allgather_init(bcol_function_args_t *input_args,
-                struct coll_ml_function_t *const_args)
+                struct mca_bcol_base_function_t *const_args)
 {
     /* local variables */
 
@@ -307,7 +305,7 @@
 /* allgather progress function */
 
 int bcol_ptpcoll_k_nomial_allgather_progress(bcol_function_args_t *input_args,
-                        struct coll_ml_function_t *const_args)
+                        struct mca_bcol_base_function_t *const_args)
 {
 
 
Index: ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.c
===================================================================
--- ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.c	(revision 30606)
+++ ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.c	(working copy)
@@ -14,7 +14,6 @@
 #include "ompi_config.h"
 
 #include "ompi/include/ompi/constants.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
 #include "ompi/mca/bcol/bcol.h"
 #include "bcol_ptpcoll_allreduce.h"
 
@@ -75,7 +74,7 @@
 }
 
 static int bcol_ptpcoll_allreduce_narraying_progress (bcol_function_args_t *input_args,
-                                                      struct coll_ml_function_t *const_args)
+                                                      struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
     void *data_buffer = (void *) ( (unsigned char *) input_args->sbuf +
@@ -228,7 +227,7 @@
 }
 
 int bcol_ptpcoll_allreduce_narraying_init(bcol_function_args_t *input_args,
-                                          struct coll_ml_function_t *const_args){
+                                          struct mca_bcol_base_function_t *const_args){
 
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
     uint64_t sequence_number = input_args->sequence_num;
@@ -696,7 +695,7 @@
 
 
 int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_init(bcol_function_args_t *input_args,
-                                                                    struct coll_ml_function_t *const_args){
+                                                                    struct mca_bcol_base_function_t *const_args){
 
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
     struct ompi_op_t *op = input_args->op;
@@ -895,7 +894,7 @@
 }
 
 int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_extra_init(bcol_function_args_t *input_args,
-                                                                          struct coll_ml_function_t *const_args){
+                                                                          struct mca_bcol_base_function_t *const_args){
 
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
     struct ompi_op_t *op = input_args->op;
Index: ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.h
===================================================================
--- ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.h	(revision 30606)
+++ ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.h	(working copy)
@@ -34,7 +34,7 @@
 
 
 int bcol_ptpcoll_allreduce_narraying_init(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 
 int bcol_ptpcoll_allreduce_recursivek_scatter_reduce(mca_bcol_ptpcoll_module_t *ptpcoll_module,
 						const int buffer_index, void *sbuf,
@@ -52,7 +52,7 @@
 				const int padded_start_byte);
 
 int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_init(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 
 
 int compute_knomial_allgather_offsets(int group_index, int count, struct
@@ -74,7 +74,7 @@
 						const int count, struct ompi_datatype_t *dtype);
 
 int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_extra_init(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 
 int bcol_ptpcoll_allreduce_init(mca_bcol_base_module_t *super);
 
Index: ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c
===================================================================
--- ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c	(revision 30606)
+++ ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c	(working copy)
@@ -14,7 +14,6 @@
 #include "ompi_config.h"
 
 #include "ompi/include/ompi/constants.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
 #include "bcol_ptpcoll.h"
 #include "bcol_ptpcoll_utils.h"
 
@@ -30,7 +29,7 @@
 /*****************************************************************************************/
 static int bcol_ptpcoll_barrier_recurs_knomial_new(
                 bcol_function_args_t *input_args,
-                struct coll_ml_function_t *const_args)
+                struct mca_bcol_base_function_t *const_args)
 {
     /* local variable */
     uint64_t sequence_number;
@@ -220,7 +219,7 @@
 
 static int bcol_ptpcoll_barrier_recurs_knomial_new_progress(
                                 bcol_function_args_t *input_args,
-                                struct coll_ml_function_t *const_args)
+                                struct mca_bcol_base_function_t *const_args)
 {
     /* local variable */
     mca_bcol_ptpcoll_module_t *ptpcoll_module =
@@ -364,7 +363,7 @@
 
 static int bcol_ptpcoll_barrier_recurs_knomial_extra_new(
                                 bcol_function_args_t *input_args,
-                                struct coll_ml_function_t *const_args)
+                                struct mca_bcol_base_function_t *const_args)
 {
     /* local variable */
     uint64_t sequence_number;
@@ -450,7 +449,7 @@
 
 static int bcol_ptpcoll_barrier_recurs_dbl_new(
                                 bcol_function_args_t *input_args,
-                                struct coll_ml_function_t *const_args)
+                                struct mca_bcol_base_function_t *const_args)
 {
    /* local variable */
     uint64_t sequence_number;
@@ -625,7 +624,7 @@
 
 static int bcol_ptpcoll_barrier_recurs_dbl_new_progress(
                                 bcol_function_args_t *input_args,
-                                struct coll_ml_function_t *const_args)
+                                struct mca_bcol_base_function_t *const_args)
 {
    /* local variable */
     mca_bcol_ptpcoll_module_t *ptp_module =
@@ -758,7 +757,7 @@
 
 static int bcol_ptpcoll_barrier_recurs_dbl_extra_new(
                                 bcol_function_args_t *input_args,
-                                struct coll_ml_function_t *const_args)
+                                struct mca_bcol_base_function_t *const_args)
 {
    /* local variable */
     uint64_t sequence_number;
@@ -837,7 +836,7 @@
 /* We have the same progress func for both cases (R-D and K-Nominal) */
 static int bcol_ptpcoll_barrier_extra_node_progress(
                             bcol_function_args_t *input_args,
-                            struct coll_ml_function_t *const_args)
+                            struct mca_bcol_base_function_t *const_args)
 {
    /* local variable */
     ompi_request_t **requests;
Index: ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c
===================================================================
--- ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c	(revision 30606)
+++ ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c	(working copy)
@@ -11,7 +11,6 @@
 #include "ompi_config.h"
 
 #include "ompi/include/ompi/constants.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
 #include "ompi/mca/bcol/bcol.h"
 #include "bcol_ptpcoll_bcast.h"
 #include "bcol_ptpcoll_utils.h"
@@ -83,7 +82,7 @@
 
 
 int bcol_ptpcoll_bcast_k_nomial_anyroot_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     int completed = 0;
     int rc;
@@ -112,7 +111,7 @@
 
 /* K-nomial tree ( with any root ) algorithm */
 int bcol_ptpcoll_bcast_k_nomial_anyroot(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
     mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
@@ -293,7 +292,7 @@
 }
 
 static int bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
@@ -392,7 +391,7 @@
 }
 
 static int bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     int rc;
     int completed = 0; /* not completed */
@@ -467,7 +466,7 @@
 
 
 int bcol_ptpcoll_bcast_k_nomial_known_root_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
@@ -596,7 +595,7 @@
 }
 
 int bcol_ptpcoll_bcast_k_nomial_known_root(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
@@ -767,7 +766,7 @@
 }
 
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
@@ -866,7 +865,7 @@
 }
 
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     int rc;
     int completed = 0; /* not completed */
@@ -920,7 +919,7 @@
 }
 
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
@@ -1014,7 +1013,7 @@
 }
 
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
@@ -1147,7 +1146,7 @@
 }
 
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
@@ -1254,7 +1253,7 @@
 }
 
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
@@ -1435,7 +1434,7 @@
 }
 
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
@@ -1522,7 +1521,7 @@
 }
 
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     int rc;
     int completed = 0; /* not completed */
@@ -1544,7 +1543,7 @@
 }
 
 static int bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root_progress(
-        bcol_function_args_t *input_args, struct coll_ml_function_t *const_args)
+        bcol_function_args_t *input_args, struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
@@ -1676,7 +1675,7 @@
 
 
 static int bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
@@ -1899,7 +1898,7 @@
 
 /* Pasha : need to move this code to some common function */
 static int bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root_extra(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
@@ -1984,7 +1983,7 @@
 }
 
 static int bcol_ptpcoll_bcast_known_root_extra_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     int rc;
     int completed = 0; /* not completed */
@@ -2008,7 +2007,7 @@
 
 
 static int bcol_ptpcoll_bcast_narray_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
@@ -2093,7 +2092,7 @@
 }
 
 static int bcol_ptpcoll_bcast_narray(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
Index: ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h
===================================================================
--- ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h	(revision 30606)
+++ ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h	(working copy)
@@ -20,34 +20,34 @@
 int bcol_ptpcoll_bcast_init(mca_bcol_base_module_t *super);
 
 int bcol_ptpcoll_bcast_k_nomial_anyroot (bcol_function_args_t *input_args, 
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 int bcol_ptpcoll_bcast_k_nomial_anyroot_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 
 int bcol_ptpcoll_bcast_k_nomial_known_root(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 int bcol_ptpcoll_bcast_k_nomial_known_root_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 
 
 /* macros */
Index: ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanout.c
===================================================================
--- ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanout.c	(revision 30606)
+++ ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanout.c	(working copy)
@@ -10,7 +10,6 @@
 
 
 #include "ompi/include/ompi/constants.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
 #include "ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h"
 
 /*
@@ -18,7 +17,7 @@
  */
 
 int bcol_ptpcoll_fanout( bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     /* local variable */
     int ret = OMPI_SUCCESS;
Index: ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c
===================================================================
--- ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c	(revision 30606)
+++ ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c	(working copy)
@@ -30,8 +30,6 @@
 #include "ompi/mca/bcol/base/base.h"
 #include "ompi/mca/pml/pml.h"  /* need this for the max tag size */
 
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_allocation.h"
 #include "bcol_ptpcoll.h"
 #include "bcol_ptpcoll_utils.h"
 #include "bcol_ptpcoll_bcast.h"
@@ -244,13 +242,14 @@
 /*
  * Cache information about ML memory
  */
-static int mca_bcol_ptpcoll_cache_ml_memory_info(struct mca_coll_ml_module_t *ml_module,
+static int mca_bcol_ptpcoll_cache_ml_memory_info(struct mca_bcol_base_memory_block_desc_t *payload_block,
+                                                 uint32_t data_offset,
                                                  struct mca_bcol_base_module_t *bcol,
                                                  void *reg_data)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) bcol;
     mca_bcol_ptpcoll_local_mlmem_desc_t *ml_mem = &ptpcoll_module->ml_mem;
-    struct ml_memory_block_desc_t *desc = ml_module->payload_block;
+    struct mca_bcol_base_memory_block_desc_t *desc = payload_block;
     int group_size = ptpcoll_module->super.sbgp_partner_module->group_size;
 
     PTPCOLL_VERBOSE(10, ("mca_bcol_ptpcoll_init_buffer_memory was called"));
@@ -263,9 +262,6 @@
     PTPCOLL_VERBOSE(10, ("ML buffer configuration num banks %d num_per_bank %d size %d base addr %p",
                          desc->num_banks, desc->num_buffers_per_bank, desc->size_buffer, desc->block->base_addr));
 
-    /* pointer to ml level descriptor */
-    ml_mem->ml_mem_desc = desc;
-
     /* Set first bank index for release */
     ml_mem->bank_index_for_release = 0;
 
@@ -274,7 +270,7 @@
                                          ml_mem->num_banks,
                                          ml_mem->num_buffers_per_bank,
                                          ml_mem->size_buffer,
-                                         ml_module->data_offset,
+                                         data_offset,
                                          group_size,
                                          ptpcoll_module->pow_k)) {
         PTPCOLL_VERBOSE(10, ("Failed to allocate rdma memory descriptor\n"));
@@ -281,8 +277,8 @@
         return OMPI_ERROR;
     }
 
-    PTPCOLL_VERBOSE(10, ("ml_module = %p, ptpcoll_module = %p, ml_mem_desc = %p.\n",
-                         ml_module, ptpcoll_module, ml_mem->ml_mem_desc));
+    PTPCOLL_VERBOSE(10, ("ptpcoll_module = %p, ml_mem_desc = %p.\n",
+                         ptpcoll_module));
 
     return OMPI_SUCCESS;
 }
Index: ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.c
===================================================================
--- ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.c	(revision 30606)
+++ ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.c	(working copy)
@@ -14,16 +14,15 @@
 #include "ompi_config.h"
 
 #include "ompi/include/ompi/constants.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
 #include "ompi/mca/bcol/bcol.h"
 #include "bcol_ptpcoll_reduce.h"
 #include "bcol_ptpcoll_utils.h"
 
 static int bcol_ptpcoll_reduce_narray_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 
 static int bcol_ptpcoll_reduce_narray(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args);
+        struct mca_bcol_base_function_t *const_args);
 
 
 #define NARRAY_RECV_NB(narray_node, process_shift, group_size,                            \
@@ -85,7 +84,7 @@
     return OMPI_SUCCESS;
 }
 static int bcol_ptpcoll_reduce_narray_progress(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
@@ -226,7 +225,7 @@
 }
 
 static int bcol_ptpcoll_reduce_narray(bcol_function_args_t *input_args,
-        struct coll_ml_function_t *const_args)
+        struct mca_bcol_base_function_t *const_args)
 {
     mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
 
Index: ompi/mca/coll/ml/coll_ml.h
===================================================================
--- ompi/mca/coll/ml/coll_ml.h	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml.h	(working copy)
@@ -101,40 +101,7 @@
 };
 
 struct mca_bcol_base_module_t;
-/* function description */
-struct coll_ml_function_t {
-    int fn_idx;
-    /* module */
-    struct mca_bcol_base_module_t *bcol_module;
 
-    /*
-     *  The following two parameters are used for bcol modules
-     *  that want to do some optimizations based on the fact that
-     *  n functions from the same bcol module are called in a row.
-     *  For example, in the iboffload case, on the first call one
-     *  will want to initialize the MWR, and start to instantiate
-     *  it, but only post it at the end of the last call.
-     *  The index of this function in a sequence of consecutive
-     *  functions from the same bcol
-     */
-    int index_in_consecutive_same_bcol_calls;
-
-    /* number of times functions from this bcol are
-     * called in order
-     */
-    int n_of_this_type_in_a_row;
-
-    /*
-     * number of times functions from this module are called in the
-     * collective operation.
-     */
-    int n_of_this_type_in_collective;
-    int index_of_this_type_in_collective;
-};
-typedef struct coll_ml_function_t coll_ml_function_t;
-
-
-
 /* collective function arguments - gives
  * one function signature for calling all collective setup
  * routines, with the initial call to a collective function having
@@ -205,7 +172,7 @@
     mpi_coll_algorithm_params_t alg_params;
 
     /* list of functions */
-    coll_ml_function_t *functions;
+    mca_bcol_base_function_t *functions;
 
     /* function names - for debugging */
     char **function_names;
@@ -599,12 +566,6 @@
  */
 OMPI_MODULE_DECLSPEC extern mca_coll_ml_component_t mca_coll_ml_component;
 
-struct mca_coll_ml_route_info_t {
-    int level;
-    int rank;
-};
-typedef struct mca_coll_ml_route_info_t mca_coll_ml_route_info_t;
-
 struct mca_coll_ml_leader_offset_info_t {
     size_t offset;
     int level_one_index;
@@ -623,7 +584,7 @@
     int n_levels;
     /* bcols bits that describe supported features/modes */
     uint64_t all_bcols_mode;
-    mca_coll_ml_route_info_t *route_vector;
+    mca_bcol_base_route_info_t *route_vector;
     coll_ml_collective_description_t *hierarchical_algorithms[BCOL_NUM_OF_FUNCTIONS];
     sub_group_params_t *array_of_all_subgroups;
     /* (sbgp, bcol) pairs */
@@ -691,7 +652,7 @@
     ompi_free_list_t fragment_descriptors;
 
     /** pointer to the payload memory block **/
-    struct ml_memory_block_desc_t *payload_block;
+    struct mca_bcol_base_memory_block_desc_t *payload_block;
 
     /** the maximum size of collective function description */
     int max_dag_size;
Index: ompi/mca/coll/ml/coll_ml_allgather.c
===================================================================
--- ompi/mca/coll/ml/coll_ml_allgather.c	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_allgather.c	(working copy)
@@ -133,7 +133,7 @@
     size_t frag_len, dt_size;
 
     void *buf;
-    ml_payload_buffer_desc_t *src_buffer_desc;
+    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc;
     mca_coll_ml_collective_operation_progress_t *new_op;
 
     mca_coll_ml_module_t *ml_module = OP_ML_MODULE(coll_op);
@@ -296,7 +296,7 @@
     int ret, n_fragments = 1, comm_size;
 
     mca_coll_ml_topology_t *topo_info;
-    ml_payload_buffer_desc_t *src_buffer_desc;
+    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc;
 
     mca_coll_ml_component_t *cm = &mca_coll_ml_component;
 
Index: ompi/mca/coll/ml/coll_ml_allocation.c
===================================================================
--- ompi/mca/coll/ml/coll_ml_allocation.c	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_allocation.c	(working copy)
@@ -21,11 +21,11 @@
 
 long memory_buffer_index;
 
-ml_memory_block_desc_t *mca_coll_ml_allocate_block(struct mca_coll_ml_component_t *ml_component,
-                                                   ml_memory_block_desc_t *ml_memblock)
+mca_bcol_base_memory_block_desc_t *mca_coll_ml_allocate_block(struct mca_coll_ml_component_t *ml_component,
+                                                   mca_bcol_base_memory_block_desc_t *ml_memblock)
 {
-    ml_memory_block_desc_t *ret = NULL;
-    ml_memory_block_desc_t *memory_block = NULL;
+    mca_bcol_base_memory_block_desc_t *ret = NULL;
+    mca_bcol_base_memory_block_desc_t *memory_block = NULL;
     mca_coll_ml_lmngr_t *memory_manager = NULL;
 
     if (ml_memblock) {
@@ -32,7 +32,7 @@
         ML_ERROR(("Memory already allocated - expecting NULL pointer"));
         return ret;
     }
-    memory_block = (ml_memory_block_desc_t*) calloc(1, sizeof(ml_memory_block_desc_t));
+    memory_block = (mca_bcol_base_memory_block_desc_t*) calloc(1, sizeof(mca_bcol_base_memory_block_desc_t));
 
     if (NULL == memory_block){
         ML_ERROR(("Couldn't allocate memory for ml_memblock"));
@@ -60,7 +60,7 @@
     return ret;
 }
 
-void mca_coll_ml_free_block (ml_memory_block_desc_t *ml_memblock)
+void mca_coll_ml_free_block (mca_bcol_base_memory_block_desc_t *ml_memblock)
 {
     if (!ml_memblock)
         return;
@@ -76,7 +76,7 @@
     free(ml_memblock);
 }
 
-int mca_coll_ml_initialize_block(ml_memory_block_desc_t *ml_memblock,
+int mca_coll_ml_initialize_block(mca_bcol_base_memory_block_desc_t *ml_memblock,
                                  uint32_t num_buffers,
                                  uint32_t num_banks,
                                  uint32_t buffer_size,
@@ -86,7 +86,7 @@
     int ret = OMPI_SUCCESS;
     uint32_t bank_loop, buff_loop;
     uint64_t addr_offset = 0;
-    ml_payload_buffer_desc_t *pbuff_descs = NULL,*pbuff_desc = NULL;
+    mca_bcol_base_payload_buffer_desc_t *pbuff_descs = NULL,*pbuff_desc = NULL;
 
     if (NULL == ml_memblock){
         ML_ERROR(("Memory block not initialized"));
@@ -100,7 +100,7 @@
         goto exit_ERROR;
     }
 
-    pbuff_descs = (ml_payload_buffer_desc_t*) malloc(sizeof(ml_payload_buffer_desc_t)
+    pbuff_descs = (mca_bcol_base_payload_buffer_desc_t*) malloc(sizeof(mca_bcol_base_payload_buffer_desc_t)
             * num_banks * num_buffers);
 
     for(bank_loop = 0; bank_loop < num_banks; bank_loop++)
@@ -167,12 +167,12 @@
     return ret;
 }
 
-ml_payload_buffer_desc_t *mca_coll_ml_alloc_buffer (mca_coll_ml_module_t *module)
+mca_bcol_base_payload_buffer_desc_t *mca_coll_ml_alloc_buffer (mca_coll_ml_module_t *module)
 {
     uint64_t bindex;
     uint32_t bank, buffer, num_buffers;
-    ml_memory_block_desc_t *ml_memblock = module->payload_block;
-    ml_payload_buffer_desc_t *pbuff_descs = NULL,
+    mca_bcol_base_memory_block_desc_t *ml_memblock = module->payload_block;
+    mca_bcol_base_payload_buffer_desc_t *pbuff_descs = NULL,
         *ml_membuffer = NULL;
 
     /* Return a buffer */
Index: ompi/mca/coll/ml/coll_ml_allocation.h
===================================================================
--- ompi/mca/coll/ml/coll_ml_allocation.h	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_allocation.h	(working copy)
@@ -20,67 +20,6 @@
 #include "ompi/mca/mpool/base/base.h"
 #include "coll_ml_lmngr.h"
 
-typedef void (*mca_coll_ml_release_buff_fn_t)(struct ml_memory_block_desc_t *ml_memblock, uint32_t buff_id);
-
-struct ml_payload_buffer_desc_t {
-    void         *base_data_addr;   /* buffer address */
-    void         *data_addr;         /* buffer address  + header offset */
-    uint64_t     generation_number;  /* my generation */
-    uint64_t     bank_index;         /* my bank */
-    uint64_t     buffer_index;       /* my buff index */
-};
-/* convenience typedef */
-typedef struct ml_payload_buffer_desc_t ml_payload_buffer_desc_t;
-
-
-struct mca_coll_ml_lmngr_block_t;
-struct ml_memory_block_desc_t {
-
-    /* memory block for payload buffers */
-    struct mca_coll_ml_lmngr_block_t *block;
-
-    /* Address offset in bytes -- Indicates free memory in the block */
-    uint64_t   block_addr_offset;
-
-    /* size of the memory block */
-    size_t     size_block;
-
-    /* number of memory banks */
-    uint32_t     num_banks;
-
-    /* number of buffers per bank */
-    uint32_t    num_buffers_per_bank;
-
-    /* size of a payload buffer */
-    uint32_t     size_buffer;
-
-    /* pointer to buffer descriptors initialized */
-    ml_payload_buffer_desc_t *buffer_descs;
-
-    /* index of the next free buffer in the block */
-    uint64_t next_free_buffer;
-
-    uint32_t *bank_release_counters;
-
-    /* Counter that defines what bank should be synchronized next
-     * since collectives could be completed out of order, we have to make
-     * sure that memory synchronization collectives started in order ! */
-    int memsync_counter; 
-
-    /* This arrays of flags used to signal that the bank is ready for recycling */
-    bool *ready_for_memsync;
-
-    /* This flags monitors if bank is open for usage. Usually we expect that user
-     * will do the check only on buffer-zero allocation */
-    bool *bank_is_busy;
-
-};
-/* convenience typedef */
-typedef struct ml_memory_block_desc_t ml_memory_block_desc_t;
-
-
-
-
 /*
   Returns a block of memory from mpool
 
@@ -99,15 +38,15 @@
 struct mca_coll_ml_component_t;
 struct mca_coll_ml_module_t;
 
-ml_memory_block_desc_t *mca_coll_ml_allocate_block(
+mca_bcol_base_memory_block_desc_t *mca_coll_ml_allocate_block(
                 struct mca_coll_ml_component_t  *ml_component,
-                struct ml_memory_block_desc_t *ml_memblock
+                struct mca_bcol_base_memory_block_desc_t *ml_memblock
                 );
     /* Allocate the memory from mpool */
     /* Register the memory block with bcols */
 
 void mca_coll_ml_free_block(
-                 ml_memory_block_desc_t *ml_memblock
+                 mca_bcol_base_memory_block_desc_t *ml_memblock
                 );
 
 
@@ -125,7 +64,7 @@
    On Failure: OMPI_ERROR
  */
 int mca_coll_ml_initialize_block(
-        ml_memory_block_desc_t *ml_memblock,
+        mca_bcol_base_memory_block_desc_t *ml_memblock,
         uint32_t num_buffers,
         uint32_t num_banks,
         uint32_t buffer_size,
@@ -146,12 +85,12 @@
    On Sucess: OMPI_SUCCESS
    On Failure: OMPI_ERROR
  */
-ml_payload_buffer_desc_t *mca_coll_ml_alloc_buffer(
+mca_bcol_base_payload_buffer_desc_t *mca_coll_ml_alloc_buffer(
             struct mca_coll_ml_module_t *module);
 
 int mca_coll_ml_free_buffer(
-        ml_memory_block_desc_t *ml_memblock,
-        struct ml_payload_buffer_desc_t *ml_membuffer
+        mca_bcol_base_memory_block_desc_t *ml_memblock,
+        struct mca_bcol_base_payload_buffer_desc_t *ml_membuffer
         );
 
 /*
@@ -166,7 +105,7 @@
 
   */
 int mca_coll_ml_register_block_bcol(
-                ml_memory_block_desc_t *ml_memblock
+                mca_bcol_base_memory_block_desc_t *ml_memblock
                 );
 
 #endif /* MCA_ML_ALLOC_H */
Index: ompi/mca/coll/ml/coll_ml_allreduce.c
===================================================================
--- ompi/mca/coll/ml/coll_ml_allreduce.c	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_allreduce.c	(working copy)
@@ -91,7 +91,7 @@
 
     ptrdiff_t lb, extent;
 
-    ml_payload_buffer_desc_t *src_buffer_desc;
+    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc;
     mca_coll_ml_collective_operation_progress_t *new_op;
 
     mca_coll_ml_module_t *ml_module = OP_ML_MODULE(coll_op);
@@ -253,7 +253,7 @@
     ptrdiff_t lb, extent;
     size_t pack_len, dt_size;
 
-    ml_payload_buffer_desc_t *src_buffer_desc;
+    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc;
     mca_coll_ml_collective_operation_progress_t *coll_op;
 
     mca_coll_ml_component_t *cm = &mca_coll_ml_component;
Index: ompi/mca/coll/ml/coll_ml_barrier.c
===================================================================
--- ompi/mca/coll/ml/coll_ml_barrier.c	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_barrier.c	(working copy)
@@ -37,7 +37,7 @@
 {
     ompi_free_list_item_t *item;
     mca_coll_ml_collective_operation_progress_t *coll_op;
-    ml_payload_buffer_desc_t *src_buffer_desc = NULL;
+    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
     
     /* allocate an ml buffer for signaling purposes */
     src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
Index: ompi/mca/coll/ml/coll_ml_bcast.c
===================================================================
--- ompi/mca/coll/ml/coll_ml_bcast.c	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_bcast.c	(working copy)
@@ -177,7 +177,7 @@
     int ret, frag_len;
     size_t max_data = 0;
 
-    ml_payload_buffer_desc_t *src_buffer_desc = NULL;
+    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
     mca_coll_ml_collective_operation_progress_t *new_op = NULL;
     mca_coll_ml_task_setup_fn_t task_setup = NULL;
     mca_coll_ml_module_t *ml_module = OP_ML_MODULE(coll_op);
@@ -315,7 +315,7 @@
     size_t dt_size;
     void *buf;
 
-    ml_payload_buffer_desc_t *src_buffer_desc = NULL;
+    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
     mca_coll_ml_collective_operation_progress_t *new_op = NULL;
     mca_coll_ml_task_setup_fn_t task_setup = NULL;
 
@@ -448,7 +448,7 @@
 
     mca_coll_ml_collective_operation_progress_t * coll_op = NULL;
     mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-    ml_payload_buffer_desc_t *src_buffer_desc = NULL;
+    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
     mca_coll_ml_task_setup_fn_t task_setup;
     OPAL_PTRDIFF_TYPE lb, extent;
 
@@ -710,7 +710,7 @@
     mca_coll_ml_collective_operation_progress_t * coll_op = NULL;
     mca_coll_ml_compound_functions_t *fixed_schedule;
     mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-    ml_payload_buffer_desc_t *src_buffer_desc = NULL;
+    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
     mca_bcol_base_coll_fn_desc_t *func;
     OPAL_PTRDIFF_TYPE lb, extent;
 
@@ -813,7 +813,7 @@
 
         func = fixed_schedule[fn_idx].bcol_function;
         ret = func->coll_fn(&coll_op->variable_fn_params,
-                (struct coll_ml_function_t *) &fixed_schedule[fn_idx].constant_group_data);
+                (struct mca_bcol_base_function_t *) &fixed_schedule[fn_idx].constant_group_data);
         /* set the coll_fn_started flag to true */
         if (BCOL_FN_COMPLETE == ret) {
             /* done with this routine, bump the active counter */
Index: ompi/mca/coll/ml/coll_ml_colls.h
===================================================================
--- ompi/mca/coll/ml/coll_ml_colls.h	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_colls.h	(working copy)
@@ -25,7 +25,7 @@
 
     /* RLG - temp fix  !!!! - really need to remove this, but right now
      do not want to change the signature of the collective primitives to
-     use coll_ml_utility_data_t rather than coll_ml_function_t */
+     use coll_ml_utility_data_t rather than mca_bcol_base_function_t */
     int dummy;
 
     /* module */
@@ -239,7 +239,7 @@
 } mca_coll_ml_pending_type_t;
 
 /* Forward declaration */
-struct ml_payload_buffer_desc_t;
+struct mca_bcol_base_payload_buffer_desc_t;
 /* Data structure used to track ML level collective operation
  * progress.
  */
@@ -348,7 +348,7 @@
         struct full_message_t *message_descriptor;
 
         /* ML buffer descriptor attached to this buffer */
-        struct ml_payload_buffer_desc_t *buffer_desc;
+        struct mca_bcol_base_payload_buffer_desc_t *buffer_desc;
         /* handle for collective progress, e.g. alltoall */
         bcol_fragment_descriptor_t bcol_fragment_desc;
 
Index: ompi/mca/coll/ml/coll_ml_component.c
===================================================================
--- ompi/mca/coll/ml/coll_ml_component.c	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_component.c	(working copy)
@@ -159,9 +159,9 @@
 
             const_args  = &seq_coll_op->coll_schedule->component_functions[fn_idx].constant_group_data;
             /* RLG - note need to move to useing coll_ml_utility_data_t as
-             * collective argument, rather than  coll_ml_function_t
+             * collective argument, rather than  mca_bcol_base_function_t
              */
-            rc = progress_fn(&(seq_coll_op->variable_fn_params), (coll_ml_function_t *)const_args);
+            rc = progress_fn(&(seq_coll_op->variable_fn_params), (mca_bcol_base_function_t *)const_args);
             if (BCOL_FN_COMPLETE == rc) {
                 /* done with this routine */
                 seq_coll_op->sequential_routine.current_active_bcol_fn++;
@@ -205,7 +205,7 @@
         const_args = &task_status->ml_coll_operation->coll_schedule->
             component_functions[INDEX(task_status)].constant_group_data;
         rc = progress_fn(&(task_status->ml_coll_operation->variable_fn_params),
-                (coll_ml_function_t *)const_args);
+                (mca_bcol_base_function_t *)const_args);
         if (BCOL_FN_COMPLETE == rc) {
             ML_VERBOSE(3, ("GOT BCOL_COMPLETED!!!!"));
             rc = mca_coll_ml_task_completion_processing(&task_status, ACTIVE_L);
@@ -233,7 +233,7 @@
             const_args = &task_status->ml_coll_operation->coll_schedule->
                 component_functions[INDEX(task_status)].constant_group_data;
             rc = coll_fn(&(task_status->ml_coll_operation->variable_fn_params),
-                    (coll_ml_function_t *)const_args);
+                    (mca_bcol_base_function_t *)const_args);
             if (BCOL_FN_COMPLETE == rc) {
                 ML_VERBOSE(3, ("GOT BCOL_COMPLETED!"));
                 rc = mca_coll_ml_task_completion_processing(&task_status, PENDING_L);
Index: ompi/mca/coll/ml/coll_ml_hier_algorithms_ibarrier.c
===================================================================
--- ompi/mca/coll/ml/coll_ml_hier_algorithms_ibarrier.c	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_hier_algorithms_ibarrier.c	(working copy)
@@ -66,8 +66,8 @@
     hierarchical_algorithms[BCOL_IBARRIER]->n_buffers=0;
 
     /* allocate space for the functions */
-    hierarchical_algorithms[BCOL_IBARRIER][0].functions=(coll_ml_function_t *)
-        malloc(sizeof(coll_ml_function_t)*
+    hierarchical_algorithms[BCOL_IBARRIER][0].functions=(mca_bcol_base_function_t *)
+        malloc(sizeof(mca_bcol_base_function_t)*
             hierarchical_algorithms[BCOL_IBARRIER][0].n_functions);
     if( NULL == hierarchical_algorithms[BCOL_IBARRIER][0].functions) {
         ret=OMPI_ERROR;
Index: ompi/mca/coll/ml/coll_ml_hier_algorithms_setup.c
===================================================================
--- ompi/mca/coll/ml/coll_ml_hier_algorithms_setup.c	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_hier_algorithms_setup.c	(working copy)
@@ -75,8 +75,8 @@
                 collective_alg->n_functions ));
 
     /* allocate space for the functions */
-    collective_alg->functions = (coll_ml_function_t *)
-        malloc(sizeof(coll_ml_function_t) * collective_alg->n_functions);
+    collective_alg->functions = (mca_bcol_base_function_t *)
+        malloc(sizeof(mca_bcol_base_function_t) * collective_alg->n_functions);
     if( NULL == collective_alg->functions) {
         ML_ERROR(("Can't allocate memory.\n"));
         ret = OMPI_ERR_OUT_OF_RESOURCE;
Index: ompi/mca/coll/ml/coll_ml_inlines.h
===================================================================
--- ompi/mca/coll/ml/coll_ml_inlines.h	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_inlines.h	(working copy)
@@ -45,7 +45,7 @@
         int mca_coll_ml_buffer_recycling(mca_coll_ml_collective_operation_progress_t *ml_request)
 {
     mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *)ml_request->coll_module;
-    ml_memory_block_desc_t *ml_memblock = ml_module->payload_block;
+    mca_bcol_base_memory_block_desc_t *ml_memblock = ml_module->payload_block;
     uint64_t bank_index = ml_request->fragment_data.buffer_desc->bank_index;
     int rc;
 
@@ -417,7 +417,7 @@
             rc = func->bcol_function->coll_fn(&op_prog->variable_fn_params,
                     /* Pasha: Need to update the prototype of the func,
                        right now it is ugly hack for compilation */
-                    (struct coll_ml_function_t *)&func->constant_group_data);
+                    (struct mca_bcol_base_function_t *)&func->constant_group_data);
             switch(rc) {
                 case BCOL_FN_NOT_STARTED:
                     /* put it on pending list */
@@ -604,7 +604,7 @@
 
         bcol_func = (sched->component_functions[ifunc].bcol_function);
         ret = bcol_func->coll_fn(&coll_op->variable_fn_params,
-                    (struct coll_ml_function_t *) &sched->component_functions[ifunc].constant_group_data);
+                    (struct mca_bcol_base_function_t *) &sched->component_functions[ifunc].constant_group_data);
 
         if (BCOL_FN_COMPLETE == ret) {
             if (ifunc == n_fn - 1) {
Index: ompi/mca/coll/ml/coll_ml_lmngr.c
===================================================================
--- ompi/mca/coll/ml/coll_ml_lmngr.c	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_lmngr.c	(working copy)
@@ -89,22 +89,6 @@
         construct_lmngr,
         destruct_lmngr);
 
-static void lmngr_block_constructor(mca_coll_ml_lmngr_block_t *item) 
-{
-    item->base_addr = NULL;
-}
-
-static void lnmgr_block_destructor(mca_coll_ml_lmngr_block_t *item) 
-{
-    /* I have nothing to do here */
-}
-
-OBJ_CLASS_INSTANCE(mca_coll_ml_lmngr_block_t,
-        opal_list_item_t,
-        lmngr_block_constructor,
-        lnmgr_block_destructor);
-
-
 int mca_coll_ml_lmngr_tune(mca_coll_ml_lmngr_t *lmngr, 
         size_t block_size, size_t list_size, size_t alignment)
 {
@@ -246,7 +230,7 @@
     /* slice the memory to blocks */
     addr = (unsigned char *) lmngr->base_addr;
     for(num_blocks = 0; num_blocks < (int)lmngr->list_size; num_blocks++) {
-        mca_coll_ml_lmngr_block_t *item = OBJ_NEW(mca_coll_ml_lmngr_block_t);
+        mca_bcol_base_lmngr_block_t *item = OBJ_NEW(mca_bcol_base_lmngr_block_t);
         item->base_addr = (void *)addr;
         item->lmngr = lmngr;
         /* ML_VERBOSE(10, ("Appending block # %d %p", num_blocks, (void *)addr)); */
@@ -260,7 +244,7 @@
     return OMPI_SUCCESS;
 }
 
-mca_coll_ml_lmngr_block_t* mca_coll_ml_lmngr_alloc (
+mca_bcol_base_lmngr_block_t* mca_coll_ml_lmngr_alloc (
         mca_coll_ml_lmngr_t *lmngr)
 {
     int rc;
@@ -282,10 +266,10 @@
         return NULL;
     }
 
-    return (mca_coll_ml_lmngr_block_t *)opal_list_remove_first(list);
+    return (mca_bcol_base_lmngr_block_t *)opal_list_remove_first(list);
 }
 
-void mca_coll_ml_lmngr_free(mca_coll_ml_lmngr_block_t *block)
+void mca_coll_ml_lmngr_free(mca_bcol_base_lmngr_block_t *block)
 {
     opal_list_append(&block->lmngr->blocks_list, (opal_list_item_t *)block);
 }
Index: ompi/mca/coll/ml/coll_ml_lmngr.h
===================================================================
--- ompi/mca/coll/ml/coll_ml_lmngr.h	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_lmngr.h	(working copy)
@@ -55,14 +55,6 @@
 typedef struct mca_coll_ml_lmngr_t mca_coll_ml_lmngr_t;
 OBJ_CLASS_DECLARATION(mca_coll_ml_lmngr_t);
 
-struct mca_coll_ml_lmngr_block_t {
-    opal_list_item_t super;
-    mca_coll_ml_lmngr_t *lmngr;
-    void* base_addr;
-};
-typedef struct mca_coll_ml_lmngr_block_t mca_coll_ml_lmngr_block_t;
-OBJ_CLASS_DECLARATION(mca_coll_ml_lmngr_block_t);
-
 /* read user defined parametres for list manager */
 int mca_coll_ml_lmngr_reg(void);
 /* If programmer want to user other than default mca
@@ -76,10 +68,10 @@
 int mca_coll_ml_lmngr_append_nc(mca_coll_ml_lmngr_t *lmngr, bcol_base_network_context_t *nc);
 
 /* Allocate a block from memory list manager */
-mca_coll_ml_lmngr_block_t* mca_coll_ml_lmngr_alloc (
+mca_bcol_base_lmngr_block_t* mca_coll_ml_lmngr_alloc (
         mca_coll_ml_lmngr_t *lmngr);
 
 /* Return block to list memory manager */
-void mca_coll_ml_lmngr_free (mca_coll_ml_lmngr_block_t *block);
+void mca_coll_ml_lmngr_free (mca_bcol_base_lmngr_block_t *block);
 
 #endif
Index: ompi/mca/coll/ml/coll_ml_memsync.c
===================================================================
--- ompi/mca/coll/ml/coll_ml_memsync.c	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_memsync.c	(working copy)
@@ -27,7 +27,7 @@
 static int mca_coll_ml_memsync_recycle_memory(mca_coll_ml_collective_operation_progress_t *coll_op)
 {
     mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *)coll_op->coll_module;
-    ml_memory_block_desc_t *ml_memblock = ml_module->payload_block;
+    mca_bcol_base_memory_block_desc_t *ml_memblock = ml_module->payload_block;
     mca_coll_ml_collective_operation_progress_t *pending_op = NULL;
     int bank = coll_op->full_message.bank_index_to_recycle;
     int rc;
Index: ompi/mca/coll/ml/coll_ml_module.c
===================================================================
--- ompi/mca/coll/ml/coll_ml_module.c	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_module.c	(working copy)
@@ -219,7 +219,7 @@
         /* gvm Leak FIX Remove fragment free list */
         OBJ_DESTRUCT(&(module->fragment_descriptors));
         OBJ_DESTRUCT(&(module->message_descriptors));
-        /* push ml_memory_block_desc_t back on list manager */
+        /* push mca_bcol_base_memory_block_desc_t back on list manager */
         mca_coll_ml_free_block(module->payload_block);
         /* release the cinvertor if it was allocated */
         if (NULL != module->reference_convertor) {
@@ -510,7 +510,8 @@
             for (j = 0; j < topo->component_pairs[i].num_bcol_modules; j++) {
                 bcol_module = topo->component_pairs[i].bcol_modules[j];
                 if (NULL != bcol_module->bcol_memory_init) {
-                    ret = bcol_module->bcol_memory_init(ml_module,
+                    ret = bcol_module->bcol_memory_init(ml_module->payload_block,
+                                                        ml_module->data_offset,
                                                         bcol_module,
                                                         (NULL != bcol_module->network_context) ?
                                                         bcol_module->network_context->context_data: NULL);
@@ -2378,8 +2379,8 @@
         goto exit_ERROR;
     }
 
-    topo->route_vector = (mca_coll_ml_route_info_t *)
-        calloc(comm_size, sizeof(mca_coll_ml_route_info_t));
+    topo->route_vector = (mca_bcol_base_route_info_t *)
+        calloc(comm_size, sizeof(mca_bcol_base_route_info_t));
     if (NULL == topo->route_vector) {
         ML_VERBOSE(10, ("Cannot allocate memory.\n"));
         rc = OMPI_ERR_OUT_OF_RESOURCE;
Index: ompi/mca/coll/ml/coll_ml_reduce.c
===================================================================
--- ompi/mca/coll/ml/coll_ml_reduce.c	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_reduce.c	(working copy)
@@ -120,7 +120,7 @@
 
     ptrdiff_t lb, extent;
 
-    ml_payload_buffer_desc_t *src_buffer_desc;
+    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc;
     mca_coll_ml_collective_operation_progress_t *new_op;
 
     mca_coll_ml_module_t *ml_module = OP_ML_MODULE(coll_op);
@@ -278,7 +278,7 @@
                            int large_data_reduce) {
     ptrdiff_t lb, extent;
     size_t pack_len, dt_size;
-    ml_payload_buffer_desc_t *src_buffer_desc = NULL;
+    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
     mca_coll_ml_collective_operation_progress_t * coll_op = NULL;
     bool contiguous = ompi_datatype_is_contiguous_memory_layout(dtype, count);
     mca_coll_ml_component_t *cm = &mca_coll_ml_component;
Index: ompi/mca/coll/ml/coll_ml_select.c
===================================================================
--- ompi/mca/coll/ml/coll_ml_select.c	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_select.c	(working copy)
@@ -310,7 +310,7 @@
 int mca_select_bcol_function(mca_bcol_base_module_t *bcol_module,
                 int bcoll_type,
                 bcol_function_args_t *bcol_fn_arguments,
-                coll_ml_function_t *ml_fn_arguments )
+                mca_bcol_base_function_t *ml_fn_arguments )
 {
         
     struct mca_bcol_base_coll_fn_desc_t *fn_filtered = NULL;
Index: ompi/mca/coll/ml/coll_ml_select.h
===================================================================
--- ompi/mca/coll/ml/coll_ml_select.h	(revision 30606)
+++ ompi/mca/coll/ml/coll_ml_select.h	(working copy)
@@ -19,7 +19,7 @@
 int mca_select_bcol_function(mca_bcol_base_module_t *bcol_module,
                 int bcoll_type,
                 bcol_function_args_t *bcol_fn_arguments,
-                coll_ml_function_t *ml_fn_arguments );
+                mca_bcol_base_function_t *ml_fn_arguments );
 /* 
  *  Goes through the function table and filters the collectives functions
  *  based on comm-time attributes.
