Found a pretty nasty frag leak (and a minor one) in ob1 (see commit below). If 
this fix addresses some hangs we are seeing on infiniband LANL might want a 
1.4.6 rolled (or a faster rollout for 1.6.0).

-Nathan

---------- Forwarded message ----------
List-Post: devel@lists.open-mpi.org
Date: Thu, 1 Mar 2012 08:53:39 -0700
From: hje...@osl.iu.edu
Reply-To: de...@open-mpi.org
To: s...@open-mpi.org
Subject: [OMPI svn] svn:open-mpi r26077

Author: hjelmn
List-Post: devel@lists.open-mpi.org
Date: 2012-03-01 10:53:39 EST (Thu, 01 Mar 2012)
New Revision: 26077
URL: https://svn.open-mpi.org/trac/ompi/changeset/26077

Log:
ob1: fix two fragment leaks
 - MAJOR! get src descriptor leaks if mca_bml_base_send fails
 - minor. descriptor leaked in mca_pml_send_request_start_copy if the btl 
returns OMPI_ERR_RESOURCE_BUSY.
Text files modified:
   trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c |    27 ++++++++++++++++-----------
   1 files changed, 16 insertions(+), 11 deletions(-)

Modified: trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c
==============================================================================
--- trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c    (original)
+++ trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c    2012-03-01 10:53:39 EST (Thu, 
01 Mar 2012)
@@ -1,3 +1,4 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
 /*
  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
  *                         University Research and Technology
@@ -12,6 +13,8 @@
  * Copyright (c) 2008      UT-Battelle, LLC. All rights reserved.
  * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
  * Copyright (c) 2012      NVIDIA Corporation.  All rights reserved.
+ * Copyright (c) 2012      Los Alamos National Security, LLC. All rights
+ *                         reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -546,15 +549,14 @@
         }
         return OMPI_SUCCESS;
     }
-    switch(OPAL_SOS_GET_ERROR_CODE(rc)) {
-        case OMPI_ERR_RESOURCE_BUSY:
-            /* No more resources. Allow the upper level to queue the send */
-            rc = OMPI_ERR_OUT_OF_RESOURCE;
-            break;
-        default:
-            mca_bml_base_free(bml_btl, des);
-            break;
+
+    if (OMPI_ERR_RESOURCE_BUSY == OPAL_SOS_GET_ERROR_CODE(rc)) {
+        /* No more resources. Allow the upper level to queue the send */
+        rc = OMPI_ERR_OUT_OF_RESOURCE;
     }
+
+    mca_bml_base_free (bml_btl, des);
+
     return rc;
 }

@@ -631,7 +633,7 @@
      * operation is achieved.
      */

-    mca_btl_base_descriptor_t* des;
+    mca_btl_base_descriptor_t *des, *src = NULL;
     mca_btl_base_segment_t* segment;
     mca_pml_ob1_hdr_t* hdr;
     bool need_local_cb = false;
@@ -640,7 +642,6 @@
     bml_btl = sendreq->req_rdma[0].bml_btl;
     if((sendreq->req_rdma_cnt == 1) && (bml_btl->btl_flags & 
(MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_CUDA_GET))) {
         mca_mpool_base_registration_t* reg = sendreq->req_rdma[0].btl_reg;
-        mca_btl_base_descriptor_t* src;
         size_t i;
         size_t old_position = 
sendreq->req_send.req_base.req_convertor.bConverted;

@@ -781,6 +782,10 @@
         return OMPI_SUCCESS;
     }
     mca_bml_base_free(bml_btl, des);
+    if (NULL != src) {
+        mca_bml_base_free (bml_btl, src);
+    }
+
     return rc;
 }

@@ -1144,7 +1149,7 @@
                               0,
                               &frag->rdma_length,
                               MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
-                             MCA_BTL_DES_FLAGS_PUT,
+                              MCA_BTL_DES_FLAGS_PUT,
                               &des );

     if( OPAL_UNLIKELY(NULL == des) ) {
_______________________________________________
svn mailing list
s...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/svn

Reply via email to