Following patch makes orte-checkpoint communicate with orterun again: diff --git a/orte/tools/orte-checkpoint/orte-checkpoint.c b/orte/tools/orte-checkpoint/orte-checkpoint.c index 7106342..8539f34 100644 --- a/orte/tools/orte-checkpoint/orte-checkpoint.c +++ b/orte/tools/orte-checkpoint/orte-checkpoint.c @@ -834,7 +834,7 @@ static int notify_process_for_checkpoint(opal_crs_base_ckpt_options_t *options) }
if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(&(orterun_hnp->name), buffer, - ORTE_RML_TAG_CKPT, hnp_receiver, + ORTE_RML_TAG_CKPT, orte_rml_send_callback, NULL))) { exit_status = ret; goto cleanup; @@ -845,11 +845,6 @@ static int notify_process_for_checkpoint(opal_crs_base_ckpt_options_t *options) ORTE_JOBID_PRINT(jobid)); cleanup: - if( NULL != buffer) { - OBJ_RELEASE(buffer); - buffer = NULL; - } - if( ORTE_SUCCESS != exit_status ) { opal_show_help("help-orte-checkpoint.txt", "unable_to_connect", true, orte_checkpoint_globals.pid); Before committing the code into the repository I wanted to make sure it is the correct way to fix it. The first change changes the callback to orte_rml_send_callback(). When I initially made the code compile again I used hnp_receiver() to change the code from blocking to non-blocking and that was wrong. The second change (removal of OBJ_RELEASE(buffer)) is necessary because this seems to delete buffer during communication and then everything breaks badly. Adrian