Hi, I am planning to commit the following patch. Those two progress() calls are responsible for most of our deep recursion troubles. And I also think they are completely unnecessary.
diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 5899243..641176e 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -704,9 +704,6 @@ int mca_pml_ob1_recv_request_schedule_once( mca_bml_base_free(bml_btl,dst); continue; } - - /* run progress as the prepare (pinning) can take some time */ - mca_bml.bml_progress(); } return OMPI_SUCCESS; diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index 0998a05..9d7f3f9 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -968,7 +968,6 @@ cannot_pack: mca_bml_base_free(bml_btl,des); continue; } - mca_bml.bml_progress(); } return OMPI_SUCCESS; -- Gleb.