Thanks for comment. fixed it. On Mon, Aug 8, 2011 at 6:28 PM, Jeff Squyres <jsquy...@cisco.com> wrote:
> Mike -- > > Does mxm_init() do Reasonable Things to check to see if the local > OpenFabrics-capable devices are unsuitable for MXM? E.g., does it check to > see if the local OpenFabrics devices are MXM-capable, and if not, fail > gracefully? > > Also, I would suggest NOT showing a show_help message if there are OF > devices available such that CM/MXM can (probably) fail over to OB1/openib. > I.e., only show a show_help message if devices are available for MXM, but > an actual error occurs during the MXM initialization. > > Otherwise, if I mpirun (with the MXM MTL installed) on a system with only > RoCE or iWARP devices present, MXM will complain but then fail over to > OB1/openib. That would probably be confusing. > > > > On Aug 7, 2011, at 8:06 AM, mi...@osl.iu.edu wrote: > > > Author: miked > > Date: 2011-08-07 08:06:49 EDT (Sun, 07 Aug 2011) > > New Revision: 25005 > > URL: https://svn.open-mpi.org/trac/ompi/changeset/25005 > > > > Log: > > better mxm selection mechanism, some refactoring > > Text files modified: > > trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c | 4 ++-- > > trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c | 32 > ++++++++++++++------------------ > > trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c | 6 +++--- > > trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h | 6 +++++- > > trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c | 4 ++-- > > 5 files changed, 26 insertions(+), 26 deletions(-) > > > > Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c > > > ============================================================================== > > --- trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c (original) > > +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c 2011-08-07 08:06:49 EDT > (Sun, 07 Aug 2011) > > @@ -18,9 +18,9 @@ > > mxm_error_t err; > > mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*) > mtl_request; > > > > - err = mxm_req_cancel(mtl_mxm_request->mxm_base_request); > > + err = mxm_req_cancel(&mtl_mxm_request->mxm.base); > > if (MXM_OK == err) { > > - err = mxm_req_test(mtl_mxm_request->mxm_base_request); > > + err = mxm_req_test(&mtl_mxm_request->mxm.base); > > if (MXM_OK == err) { > > mtl_request->ompi_req->req_status._cancelled = true; > > > mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super); > > > > Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c > > > ============================================================================== > > --- trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c (original) > > +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c 2011-08-07 08:06:49 > EDT (Sun, 07 Aug 2011) > > @@ -72,18 +72,27 @@ > > > > static int ompi_mtl_mxm_component_open(void) > > { > > - struct stat st; > > > > - /* Component available only if IB hardware is present */ > > - if (0 == stat("/dev/infiniband/uverbs0", &st)) { > > - return OMPI_SUCCESS; > > - } else { > > + mxm_context_opts_t mxm_opts; > > + mxm_error_t err; > > + > > + mca_mtl_mxm_output = opal_output_open(NULL); > > + opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose); > > + > > + mxm_fill_context_opts(&mxm_opts); > > + err = mxm_init(&mxm_opts, &ompi_mtl_mxm.mxm_context); > > + if (MXM_OK != err) { > > + orte_show_help("help-mtl-mxm.txt", "mxm init", true, > > + mxm_error_string(err)); > > return OPAL_ERR_NOT_AVAILABLE; > > } > > + return OMPI_SUCCESS; > > } > > > > static int ompi_mtl_mxm_component_close(void) > > { > > + mxm_cleanup(ompi_mtl_mxm.mxm_context); > > + ompi_mtl_mxm.mxm_context = NULL; > > return OMPI_SUCCESS; > > } > > > > @@ -91,21 +100,8 @@ > > ompi_mtl_mxm_component_init(bool enable_progress_threads, > > bool enable_mpi_threads) > > { > > - mxm_context_opts_t mxm_opts; > > - mxm_error_t err; > > int rc; > > > > - mca_mtl_mxm_output = opal_output_open(NULL); > > - opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose); > > - > > - mxm_fill_context_opts(&mxm_opts); > > - err = mxm_init(&mxm_opts, &ompi_mtl_mxm.mxm_context); > > - if (MXM_OK != err) { > > - orte_show_help("help-mtl-mxm.txt", "mxm init", true, > > - mxm_error_string(err)); > > - return NULL; > > - } > > - > > rc = ompi_mtl_mxm_module_init(); > > if (OMPI_SUCCESS != rc) { > > return NULL; > > > > Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c > > > ============================================================================== > > --- trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c (original) > > +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c 2011-08-07 08:06:49 EDT > (Sun, 07 Aug 2011) > > @@ -22,12 +22,12 @@ > > { > > mca_mtl_mxm_request_t *req = (mca_mtl_mxm_request_t *) context; > > struct ompi_request_t *ompi_req = req->super.ompi_req; > > - mxm_recv_req_t *mxm_recv_req = (mxm_recv_req_t > *)req->mxm_base_request; > > + mxm_recv_req_t *mxm_recv_req = &req->mxm.recv; > > > > /* Set completion status and envelope */ > > ompi_req->req_status.MPI_TAG = > mxm_recv_req->completion.sender_tag; > > ompi_req->req_status.MPI_SOURCE = > mxm_recv_req->completion.sender_imm; > > - ompi_req->req_status.MPI_ERROR = > ompi_mtl_mxm_to_mpi_status(req->mxm_base_request->error); > > + ompi_req->req_status.MPI_ERROR = > ompi_mtl_mxm_to_mpi_status(mxm_recv_req->base.error); > > ompi_req->req_status._ucount = > mxm_recv_req->completion.actual_len; > > > > /* Copy data */ > > @@ -63,7 +63,7 @@ > > return ret; > > } > > > > - mxm_recv_req = (mxm_recv_req_t *)mtl_mxm_request->mxm_base_request; > > + mxm_recv_req = &mtl_mxm_request->mxm.recv; > > > > /* prepare a receive request embedded in the MTL request */ > > mxm_recv_req->base.state = MXM_REQ_NEW; > > > > Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h > > > ============================================================================== > > --- trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h (original) > > +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h 2011-08-07 08:06:49 EDT > (Sun, 07 Aug 2011) > > @@ -16,7 +16,11 @@ > > > > struct mca_mtl_mxm_request_t { > > struct mca_mtl_request_t super; > > - mxm_req_base_t *mxm_base_request; > > + union { > > + mxm_req_base_t base; > > + mxm_send_req_t send; > > + mxm_recv_req_t recv; > > + } mxm; > > /* mxm_segment_t mxm_segment[1]; */ > > void *buf; > > size_t length; > > > > Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c > > > ============================================================================== > > --- trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c (original) > > +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c 2011-08-07 08:06:49 EDT > (Sun, 07 Aug 2011) > > @@ -25,7 +25,7 @@ > > free(mtl_mxm_request->buf); > > } > > > > - mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR = > ompi_mtl_mxm_to_mpi_status(mtl_mxm_request->mxm_base_request->error); > > + mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR = > ompi_mtl_mxm_to_mpi_status(mtl_mxm_request->mxm.base.error); > > > > mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super); > > } > > @@ -93,7 +93,7 @@ > > return ret; > > } > > > > - mxm_send_req = (mxm_send_req_t *) mtl_mxm_request->mxm_base_request; > > + mxm_send_req = &mtl_mxm_request->mxm.send; > > > > /* prepare a send request embedded in the MTL request */ > > mxm_send_req->base.state = MXM_REQ_NEW; > > _______________________________________________ > > svn-full mailing list > > svn-f...@open-mpi.org > > http://www.open-mpi.org/mailman/listinfo.cgi/svn-full > > > -- > Jeff Squyres > jsquy...@cisco.com > For corporate legal information go to: > http://www.cisco.com/web/about/doing_business/legal/cri/ > > > _______________________________________________ > devel mailing list > de...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/devel >