Hi Thuan,

Ack.

B.R/Thang 

-----Original Message-----
From: Thuan Tran <thuan.t...@dektech.com.au> 
Sent: Thursday, September 24, 2020 4:05 PM
To: Minh Hon Chau <minh.c...@dektech.com.au>; Thang Duc Nguyen 
<thang.d.ngu...@dektech.com.au>; Thanh Nguyen <thanh.ngu...@dektech.com.au>
Cc: opensaf-devel@lists.sourceforge.net; Thuan Tran <thuan.t...@dektech.com.au>
Subject: [PATCH 1/1] smf: improve admin operation from serial to parallel 
[#3221]

- In one-step upgrade with new install applications, SMF invoke admin operation 
(sync mode) serially. This way may stuck due to SU dependencies cause upgrade 
fail in the end as admin operation timeout.
- Improve admin operation from serial to parallel to avoid SU dependencies 
issue, also help speed up upgrade time.
---
 src/smf/smfd/SmfAdminState.cc |  70 ++++++++++++---
 src/smf/smfd/SmfAdminState.h  |   7 +-
 src/smf/smfd/SmfUtils.cc      | 161 +++++++++++++++++++++++++++-------
 src/smf/smfd/SmfUtils.h       |  37 ++++++++
 4 files changed, 232 insertions(+), 43 deletions(-)

diff --git a/src/smf/smfd/SmfAdminState.cc b/src/smf/smfd/SmfAdminState.cc 
index 473021521..bc2bb1b85 100755
--- a/src/smf/smfd/SmfAdminState.cc
+++ b/src/smf/smfd/SmfAdminState.cc
@@ -511,8 +511,8 @@ bool 
SmfAdminStateHandler::changeAdminState(SaAmfAdminStateT fromState,
              saf_error(errno_));
     }
   } else if (nodeList_.size() == 1) {
-    TRACE("%s: Use serialized for one node", __FUNCTION__);
-    rc = adminOperationSerialized(toState, nodeList_);
+    TRACE("%s: admin op for one node", __FUNCTION__);
+    rc = adminOperationParallel(toState, nodeList_);
     if (rc == false) {
       LOG_NO("%s: setAdminStateNode() Fail %s", __FUNCTION__,
              saf_error(errno_));
@@ -521,9 +521,9 @@ bool 
SmfAdminStateHandler::changeAdminState(SaAmfAdminStateT fromState,
 
   // Set admin state for SUs
   if ((rc == true) && (!suList_.empty())) {
-    TRACE("%s: Use serialized for SUs", __FUNCTION__);
+    TRACE("%s: admin op for SUs", __FUNCTION__);
     // Do only if setting admin state for nodes did not fail
-    rc = adminOperationSerialized(toState, suList_);
+    rc = adminOperationParallel(toState, suList_);
     if (rc == false) {
       LOG_NO("%s: setAdminStateSUs() Fail %s", __FUNCTION__,
              saf_error(errno_));
@@ -576,22 +576,54 @@ bool SmfAdminStateHandler::adminOperationNodeGroup(
 // Set given admin state to all units in the given unitList  // Return false 
if Fail. errno_ is set  // -bool SmfAdminStateHandler::adminOperationSerialized(
+bool SmfAdminStateHandler::adminOperationParallel(
     SaAmfAdminOperationIdT adminState,
     const std::list<unitNameAndState> &i_unitList) {
   bool rc = true;
-  errno_ = SA_AIS_OK;
 
   TRACE_ENTER();
 
+  timespec now = base::ReadMonotonicClock();  timespec timeout = now + 
+ base::NanosToTimespec(smfd_cb->adminOpTimeout);
+
   if (!i_unitList.empty()) {
-    for (auto &unit : i_unitList) {
-      rc = adminOperation(adminState, unit.name);
-      if (rc == false) {
-        // Failed to set admin state
-        break;
+    for (auto &unit : i_unitList)
+      adminOperationAsync(adminState, unit.name);
+
+    bool adminFail = false;
+    while (base::ReadMonotonicClock() < timeout) {
+      auto it = adminAsyncList_.begin();
+      while (it != adminAsyncList_.end()) {
+        if ((*it)->isAdminAsyncDone()) {
+          if ((*it)->getAdminAsyncResult() != SA_AIS_OK) {
+            LOG_ER("Admin op FAIL %s for %s",
+                   saf_error((*it)->getAdminAsyncResult()),
+                   (*it)->getAdminAsyncObject().c_str());
+            adminFail = true;
+            rc = false;
+            break;
+          } else {
+            delete (*it);
+            it = adminAsyncList_.erase(it);
+            continue;
+          }
+        }
+        it++;
       }
+      if (adminFail) break;  // one admin operation fail
+      if (adminAsyncList_.empty()) break;  // all admin operations done OK
+      sleep(1);
     }
+
+    for (auto &immUtil : adminAsyncList_) {
+      if (adminFail == false) {
+        LOG_ER("Admin op TIMEOUT for %s",
+              immUtil->getAdminAsyncObject().c_str());
+        rc = false;
+      }
+      delete immUtil;
+    }
+    adminAsyncList_.clear();
   }
 
   TRACE_LEAVE();
@@ -667,6 +699,8 @@ SaAmfAdminStateT SmfAdminStateHandler::getAdminState(
   SaAmfAdminStateT adminState = SA_AMF_ADMIN_UNLOCKED;
   if (p_adminState != nullptr) adminState = *p_adminState;
 
+  immutil_saImmOmAccessorFinalize(accessorHandle_);
+
   TRACE_LEAVE();
   return adminState;
 }
@@ -972,3 +1006,17 @@ bool 
SmfAdminStateHandler::adminOperation(SaAmfAdminOperationIdT adminOperation,
   TRACE_LEAVE2("%s", rc ? "OK" : "FAIL");
   return rc;
 }
+
+///
+/// Set given admin state (Async) to one unit /// void 
+SmfAdminStateHandler::adminOperationAsync(
+          SaAmfAdminOperationIdT adminOperation, const std::string 
+&unitName) {
+  TRACE_ENTER();
+  SmfImmUtils *immUtil = new SmfImmUtils();
+  TRACE("\t Unit name '%s', adminOperation=%d", unitName.c_str(),
+        adminOperation);
+  immUtil->callAdminOperationAsync(unitName, adminOperation);
+  adminAsyncList_.push_back(immUtil);
+  TRACE_LEAVE();
+}
diff --git a/src/smf/smfd/SmfAdminState.h b/src/smf/smfd/SmfAdminState.h index 
6d6836df7..da518ce57 100644
--- a/src/smf/smfd/SmfAdminState.h
+++ b/src/smf/smfd/SmfAdminState.h
@@ -84,11 +84,13 @@ class SmfAdminStateHandler {
   bool adminOperationNodeGroup(SaAmfAdminStateT fromState,
                                  SaAmfAdminOperationIdT toState);
   // Using m_suList
-  bool adminOperationSerialized(SaAmfAdminOperationIdT adminState,
+  bool adminOperationParallel(SaAmfAdminOperationIdT adminState,
                                 const std::list<unitNameAndState>& i_nodeList);
   bool nodeGroupAdminOperation(SaAmfAdminOperationIdT adminState);
   bool adminOperation(SaAmfAdminOperationIdT adminState,
                       const std::string& unitName);
+  void adminOperationAsync(SaAmfAdminOperationIdT adminState,
+                      const std::string& unitName);
 
   SaAmfAdminStateT getAdminState(const std::string& i_unit);
   bool setNodeGroupParentDn();
@@ -96,6 +98,7 @@ class SmfAdminStateHandler {
   std::list<unitNameAndState>* allUnits_;
   std::list<unitNameAndState> suList_;
   std::list<unitNameAndState> nodeList_;
+  std::list<class SmfImmUtils*> adminAsyncList_;
 
   const SaVersionT immVersion_{'A', 2, 17};
 
@@ -127,5 +130,5 @@ class SmfAdminStateHandler {
   DELETE_COPY_AND_MOVE_OPERATORS(SmfAdminStateHandler);
 };
 
-#endif /* SMF_SMFD_SMFADMINSTATE_H_ */
+#endif  // SMF_SMFD_SMFADMINSTATE_H_
 
diff --git a/src/smf/smfd/SmfUtils.cc b/src/smf/smfd/SmfUtils.cc index 
9858a798e..a7f0cf895 100644
--- a/src/smf/smfd/SmfUtils.cc
+++ b/src/smf/smfd/SmfUtils.cc
@@ -21,8 +21,10 @@
  */
 #include "smf/smfd/SmfUtils.h"
 
+#include <poll.h>
 #include <unistd.h>
 #include <string.h>
+#include <thread>
 #include <iostream>
 #include <sstream>
 #include <algorithm>
@@ -218,7 +220,7 @@ std::string replaceAllCopy(const std::string &i_haystack,  
//================================================================================
 
 SmfImmUtils::SmfImmUtils()
-    : m_omHandle(0), m_ownerHandle(0), m_accessorHandle(0) {
+    : m_omHandle(0), m_ownerHandle(0), m_accessorHandle(0), 
+ m_adminOperId(0) {
   initialize();
 }
 
@@ -227,13 +229,34 @@ SmfImmUtils::SmfImmUtils()  // 
------------------------------------------------------------------------------
 SmfImmUtils::~SmfImmUtils() { finalize(); }
 
+SmfImmUtils *SmfImmUtils::me(0);
+std::mutex SmfImmUtils::m_mutex;
+void SmfImmUtils::adminOperationInvokeCallback(
+    SaInvocationT invocation, SaAisErrorT opRetVal, SaAisErrorT error) 
+{
+  TRACE("Admin async operation %s inv=%llu return error=%d opRetVal=%d",
+        me->getAdminAsyncObject().c_str(), invocation, error, 
+opRetVal);
+  me->m_admOiReturn = SA_AIS_ERR_TRY_AGAIN;
+  if (error == SA_AIS_ERR_TRY_AGAIN) {
+    return;
+  } else if (error == SA_AIS_OK) {
+    if (opRetVal == SA_AIS_ERR_BUSY || opRetVal == SA_AIS_ERR_TRY_AGAIN)
+      return;
+    me->m_admOiReturn = opRetVal;
+  } else {
+    me->m_admOiReturn = error;
+  }
+  me->m_asyncThreadRunning = false;
+}
+
+
 // 
------------------------------------------------------------------------------
 // initialize()
 // 
------------------------------------------------------------------------------
 bool SmfImmUtils::initialize(void) {
   SaVersionT local_version = s_immVersion;
+  static SaImmCallbacksT immOmCb = 
+ {SmfImmUtils::adminOperationInvokeCallback};
   if (m_omHandle == 0) {
-    (void)immutil_saImmOmInitialize(&m_omHandle, NULL, &local_version);
+    (void)immutil_saImmOmInitialize(&m_omHandle, &immOmCb, 
+ &local_version);
   }
 
   if (m_ownerHandle == 0) {
@@ -245,6 +268,9 @@ bool SmfImmUtils::initialize(void) {
     (void)immutil_saImmOmAccessorInitialize(m_omHandle, &m_accessorHandle);
   }
 
+  m_asyncThreadRunning = false;
+  m_admOiReturn = SA_AIS_OK;
+
   return true;
 }
 
@@ -252,6 +278,9 @@ bool SmfImmUtils::initialize(void) {  // finalize()  // 
------------------------------------------------------------------------------
 bool SmfImmUtils::finalize(void) {
+
+  m_asyncThreadRunning = false;
+
   if (m_ownerHandle != 0) {
     (void)immutil_saImmOmAdminOwnerFinalize(m_ownerHandle);
   }
@@ -547,6 +576,95 @@ done:
   return rc;
 }
 
+static std::atomic<SaInvocationT> smfd_adminAsyncInv{0}; // 
+-----------------------------------------------------------------------
+-----
+// adminOperationAsyncThread()
+// 
+-----------------------------------------------------------------------
+----- void SmfImmUtils::adminOperationAsyncThread(void) {
+  struct pollfd fds;
+  SaAisErrorT rc;
+  SaSelectionObjectT selObj = -1;
+
+  TRACE_ENTER();
+  rc = immutil_saImmOmSelectionObjectGet(m_omHandle, &selObj);  if (rc 
+ != SA_AIS_OK) {
+    LOG_ER("saImmOmSelectionObjectGet FAILED: %s", saf_error(rc));
+    return;
+  }
+
+  fds.fd = (int)selObj;
+  fds.events = POLLIN;
+  m_asyncThreadRunning = true;
+  m_admOiReturn = SA_AIS_ERR_TRY_AGAIN;
+
+  while (m_asyncThreadRunning) {
+    if (m_admOiReturn == SA_AIS_ERR_TRY_AGAIN) {
+      SaNameT objectName;
+      osaf_extended_name_lend(m_objectDn.c_str(), &objectName);
+      const SaNameT *objectNames[2];
+      objectNames[0] = &objectName;
+      objectNames[1] = NULL;
+      rc = immutil_saImmOmAdminOwnerSet(
+                  m_ownerHandle, objectNames, SA_IMM_ONE);
+      if (rc != SA_AIS_OK) {
+        LOG_NO("Fail to set admin owner, rc=%s, dn=[%s]", saf_error(rc),
+              m_objectDn.c_str());
+        continue;
+      }
+      const SaImmAdminOperationParamsT_2 *params[1] = {NULL};
+      rc = immutil_saImmOmAdminOperationInvokeAsync_2(
+              m_ownerHandle, smfd_adminAsyncInv++,
+              &objectName, 0, m_adminOperId, params);
+      immutil_saImmOmAdminOwnerRelease(m_ownerHandle, objectNames, SA_IMM_ONE);
+      if (rc != SA_AIS_OK) {
+        LOG_NO("Fail to invoke admin operation, rc=%s. dn=[%s], opId=[%llu]",
+              saf_error(rc), m_objectDn.c_str(), m_adminOperId);
+        continue;
+      }
+      TRACE("Invoked admin operation async on %s", m_objectDn.c_str());
+      m_admOiReturn = SA_AIS_OK;
+    }
+
+    int ret = poll(&fds, 1, 1000);
+    if (ret == -1) {
+      LOG_ER("poll error: %s\n", strerror(errno));
+      break;
+    }
+
+    if ((ret > 0) && (fds.revents & POLLIN)) {
+      TRACE("Callback for admin async: %s", m_objectDn.c_str());
+      std::lock_guard<std::mutex> guard(m_mutex);
+      me = this;
+      saImmOmDispatch(m_omHandle, SA_DISPATCH_ONE);
+      if (m_admOiReturn == SA_AIS_ERR_TRY_AGAIN) sleep(1);
+    }
+  }
+
+  m_asyncThreadRunning = false;
+  TRACE_LEAVE();
+}
+
+// 
+-----------------------------------------------------------------------
+-------
+// callAdminOperationAsync()
+// 
+-----------------------------------------------------------------------
+------- void SmfImmUtils::callAdminOperationAsync(
+    const std::string &i_dn, SaAmfAdminOperationIdT i_operationId) {
+  if (m_asyncThreadRunning) {
+    LOG_ER("Already invoke admin async with this instance");
+    return;
+  }
+
+  m_objectDn = i_dn;
+  m_adminOperId = i_operationId;
+  try {
+    std::thread(&SmfImmUtils::adminOperationAsyncThread, 
+this).detach();
+  } catch (const std::system_error& e) {
+    LOG_ER("Failed to create thread adminOperationAsyncThread");
+    exit(EXIT_FAILURE);
+  }
+  while (!m_asyncThreadRunning) { base::Sleep(base::kOneMillisecond); } 
+}
+
 // 
------------------------------------------------------------------------------
 // callAdminOperation()
 // 
------------------------------------------------------------------------------
@@ -588,42 +706,25 @@ SaAisErrorT SmfImmUtils::callAdminOperation(
   }
 
   /* Call the admin operation */
-
   TRACE("call immutil_saImmOmAdminOperationInvoke_2");
-  rc = immutil_saImmOmAdminOperationInvoke_2(m_ownerHandle, &objectName, 0,
-                                             i_operationId, i_params,
-                                             &returnValue, i_timeout);
-  while ((rc == SA_AIS_OK) && (returnValue == SA_AIS_ERR_TRY_AGAIN) &&
-         (retry > 0)) {
-    sleep(2);
+  while (1) {
     rc = immutil_saImmOmAdminOperationInvoke_2(m_ownerHandle, &objectName, 0,
-                                               i_operationId, i_params,
-                                               &returnValue, i_timeout);
-    retry--;
-  }
-
-  if (retry <= 0) {
-    LOG_NO(
-        "Fail to invoke admin operation, too many SA_AIS_ERR_TRY_AGAIN, giving 
up. dn=[%s], opId=[%u]",
-        i_dn.c_str(), i_operationId);
-    rc = SA_AIS_ERR_TRY_AGAIN;
-    goto done;
-  }
-
-  if (rc != SA_AIS_OK) {
-    LOG_NO("Fail to invoke admin operation, rc=%s. dn=[%s], opId=[%u]",
-           saf_error(rc), i_dn.c_str(), i_operationId);
-    goto done;
+                                              i_operationId, i_params,
+                                              &returnValue, i_timeout);
+    if ((rc != SA_AIS_OK) || (returnValue != SA_AIS_ERR_TRY_AGAIN) ||
+        (--retry <= 0)) break;
+    sleep(2);
   }
 
-  if ((returnValue != SA_AIS_OK) &&
-      (returnValue != SA_AIS_ERR_REPAIR_PENDING)) {
+  if (rc == SA_AIS_OK) {
     TRACE("Admin operation %u on %s, return value=%s", i_operationId,
           i_dn.c_str(), saf_error(returnValue));
+    rc = returnValue;
+  } else {
+    LOG_ER("Fail to invoke admin operation, rc=%s. dn=[%s], opId=[%u]",
+           saf_error(rc), i_dn.c_str(), i_operationId);
   }
 
-  rc = returnValue;
-
 done:
   SaAisErrorT release_rc = immutil_saImmOmAdminOwnerRelease(m_ownerHandle,
                                                             objectNames, diff 
--git a/src/smf/smfd/SmfUtils.h b/src/smf/smfd/SmfUtils.h index 
83ce6eca5..67170c796 100644
--- a/src/smf/smfd/SmfUtils.h
+++ b/src/smf/smfd/SmfUtils.h
@@ -25,6 +25,7 @@
 
 #include <string>
 #include <list>
+#include <mutex>
 
 #include "ais/include/saImmOm.h"
 #include "ais/include/saAis.h"
@@ -189,6 +190,32 @@ class SmfImmUtils {
       const SaImmAdminOperationParamsT_2** i_params = NULL,
       SaTimeT i_timeout = SA_TIME_ONE_MINUTE);
 
+  ///
+  /// Purpose: Call administrative operation async on an object.
+  /// @param   i_dn DN of object to call.
+  /// @param   i_operationId The operation id.
+  /// @return  void
+  ///
+  void callAdminOperationAsync(
+      const std::string& i_dn, SaAmfAdminOperationIdT i_operationId);
+
+  ///
+  /// Purpose: Check admin operation async done or not.
+  /// @return  True if successful, otherwise false  ///  bool 
+ isAdminAsyncDone() { return !m_asyncThreadRunning; }
+
+  ///
+  /// Purpose: Check admin operation async result.
+  /// @return  SaAisErrorT
+  ///
+  SaAisErrorT getAdminAsyncResult() { return m_admOiReturn; }
+
+  ///
+  /// Purpose: Get object name of admin async operation.
+  /// @return  std::string
+  ///
+  std::string getAdminAsyncObject() { return m_objectDn; }
 
   // Fill in a CCB descriptor (immccb.h), store rollback data for
   // each operation in the CCB and apply the modifications @@ -214,12 +241,22 
@@ class SmfImmUtils {
  private:
   bool initialize(void);
   bool finalize(void);
+  void adminOperationAsyncThread(void);  static SmfImmUtils* me;  
+ static std::mutex m_mutex;  static void adminOperationInvokeCallback(
+      SaInvocationT invocation, SaAisErrorT opRetVal, SaAisErrorT 
+ error);
 
   static SaVersionT s_immVersion;
 
   SaImmHandleT m_omHandle;
   SaImmAdminOwnerHandleT m_ownerHandle;
   SaImmAccessorHandleT m_accessorHandle;
+
+  SaImmAdminOperationIdT m_adminOperId;  std::string m_objectDn;  bool 
+ m_asyncThreadRunning;  SaAisErrorT m_admOiReturn;
 };
 
 #endif  // SMF_SMFD_SMFUTILS_H_
--
2.17.1



_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to