Hi Thang,

Some comments inline with [Thuan]

Best Regards,
ThuanTr

-----Original Message-----
From: thang.d.nguyen <thang.d.ngu...@dektech.com.au> 
Sent: Tuesday, January 29, 2019 4:53 AM
To: gary....@dektech.com.au; minh.c...@dektech.com.au
Cc: opensaf-devel@lists.sourceforge.net
Subject: [devel] [PATCH 1/1] clmd: not send sync respond to client if node
down [#3004]

clmd will not send sync respond to client if the node that client resided on
down. This will avoid timeout when clmd send via mds.
---
 src/clm/clmd/clms_cb.h    |  5 ++++
 src/clm/clmd/clms_evt.cc  | 35 ++++++++++++++++++++++++++-
 src/clm/clmd/clms_evt.h   |  1 +
 src/clm/clmd/clms_main.cc |  4 ++++
 src/clm/clmd/clms_mds.cc  | 61
+++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/src/clm/clmd/clms_cb.h b/src/clm/clmd/clms_cb.h index
4d7fdc7..6999761 100644
--- a/src/clm/clmd/clms_cb.h
+++ b/src/clm/clmd/clms_cb.h
@@ -22,6 +22,7 @@
 #include "osaf/config.h"
 #endif
 #include <pthread.h>
+#include <list>
 #include <saImm.h>
 #include <saImmOi.h>
 #include <saPlm.h>
@@ -238,6 +239,8 @@ typedef struct clms_cb_t {
       *node_down_list_head; /*NODE_DOWN record - Fix when active node goes
down
                              */
   NODE_DOWN_LIST *node_down_list_tail;
+  // Node down list - Updated by MDS thread  std::list<SaClmNodeIdT *> 
+ mds_node_down_list;

[Thuan]: The element is simple and small, I suggest to use
std::set<NODE_ID>. We can avoid new/delete and SET to avoid duplicate
element

   bool is_impl_set;
   bool nid_started;         /**< true if started by NID */
   NCS_PATRICIA_TREE iplist; /* To temporarily store ipaddress information
@@ -245,6 +248,8 @@ typedef struct clms_cb_t {
 
   /* Mutex protecting shared data used by the scale-out functionality */
   pthread_mutex_t scale_out_data_mutex;
+  /* Mutex protecting shared data used by the delete/add node-id */  
+ pthread_mutex_t node_down_list_mutex;
   /* Number of occupied indices in the vectors pending_nodes[] and
    * pending_node_ids[] */
   size_t no_of_pending_nodes;
diff --git a/src/clm/clmd/clms_evt.cc b/src/clm/clmd/clms_evt.cc index
c2b83c2..08d4acd 100644
--- a/src/clm/clmd/clms_evt.cc
+++ b/src/clm/clmd/clms_evt.cc
@@ -17,7 +17,6 @@
  *
  */
 
-#include "osaf/configmake.h"
 #include <limits.h>
 #include <pthread.h>
 #include <sys/resource.h>
@@ -31,6 +30,9 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <list>
+#include <algorithm>
+#include "osaf/configmake.h"
 #include "base/logtrace.h"
 #include "base/ncsgl_defs.h"
 #include "base/osaf_utility.h"
@@ -1514,6 +1516,31 @@ static uint32_t proc_node_get_async_msg(CLMS_CB *cb,
CLMSV_CLMS_EVT *evt) {  }
 
 /**
+ * Return true if mds node down exist
+ * @param node id
+ *
+ * @return bool
+ */
+bool clms_is_node_down(uint32_t node_id) {
+  TRACE_ENTER();
+  bool found = false;
+  std::list<NODE_ID *>::iterator it;
+  osaf_mutex_lock_ordie(&clms_cb->node_down_list_mutex);
+
+  for (it = clms_cb->mds_node_down_list.begin();
+            it != clms_cb->mds_node_down_list.end(); ++it) {
+    if (*(*it) == node_id) {
+      found = true;
+      break;
+    }
+  }
+
+  osaf_mutex_unlock_ordie(&clms_cb->node_down_list_mutex);
+  TRACE_LEAVE();
+  return found;
+}
+
+/**
  * Handle a initialize message
  * @param cb
  * @param evt
@@ -1556,6 +1583,12 @@ static uint32_t proc_initialize_msg(CLMS_CB *cb,
CLMSV_CLMS_EVT *evt) {
   if (client != nullptr)
     msg.info.api_resp_info.param.client_id = client->client_id;
 
+  if (clms_is_node_down(node_id) == true) {
+    LOG_NO("node_id = %d already down, no need sending sync respond",
node_id);
+    if (client != nullptr) clms_client_delete(client->client_id);
+    return (uint32_t)ais_rc;
+  }
+
[Thuan] Why don't move this block before clms_client_new() then not need to
clms_client_delete()

   rc = clms_mds_msg_send(cb, &msg, &evt->fr_dest, &evt->mds_ctxt,
                          MDS_SEND_PRIORITY_HIGH, NCSMDS_SVC_ID_CLMA);
   if (rc != NCSCC_RC_SUCCESS) {
diff --git a/src/clm/clmd/clms_evt.h b/src/clm/clmd/clms_evt.h index
1005456..ef35cbc 100644
--- a/src/clm/clmd/clms_evt.h
+++ b/src/clm/clmd/clms_evt.h
@@ -92,6 +92,7 @@ extern uint32_t clms_clmresp_ok(CLMS_CB *cb,
CLMS_CLUSTER_NODE *op_node,
                                 CLMS_TRACK_INFO *trkrec);  extern uint32_t
clms_remove_clma_down_rec(CLMS_CB *cb, MDS_DEST mds_dest);  extern void
clms_remove_node_down_rec(SaClmNodeIdT node_id);
+extern bool clms_is_node_down(SaClmNodeIdT node_id);
 extern uint32_t clms_node_add(CLMS_CLUSTER_NODE *node, int i);  extern void
clms_clmresp_error_timeout(CLMS_CB *cb, CLMS_CLUSTER_NODE *node);  extern
bool clms_clma_entry_valid(CLMS_CB *cb, MDS_DEST mds_dest); diff --git
a/src/clm/clmd/clms_main.cc b/src/clm/clmd/clms_main.cc index
ad6e12e..e2c4f21 100644
--- a/src/clm/clmd/clms_main.cc
+++ b/src/clm/clmd/clms_main.cc
@@ -245,6 +245,10 @@ uint32_t clms_cb_init(CLMS_CB *clms_cb) {
   if (pthread_mutex_init(&clms_cb->scale_out_data_mutex, nullptr) != 0) {
     return NCSCC_RC_FAILURE;
   }
+  if (pthread_mutex_init(&clms_cb->node_down_list_mutex, nullptr) != 0) {
+    return NCSCC_RC_FAILURE;
+  }
+
   clms_cb->no_of_pending_nodes = 0;
   clms_cb->no_of_inprogress_nodes = 0;
   for (int i = 0; i != (MAX_PENDING_NODES + 1); ++i) { diff --git
a/src/clm/clmd/clms_mds.cc b/src/clm/clmd/clms_mds.cc index 58552cc..a9e004b
100644
--- a/src/clm/clmd/clms_mds.cc
+++ b/src/clm/clmd/clms_mds.cc
@@ -18,10 +18,13 @@
 
 #include <cinttypes>
 #include <cstring>
+#include <list>
+#include <algorithm>
 #include "base/logtrace.h"
 #include "base/ncsencdec_pub.h"
 #include "clm/clmd/clms.h"
 #include "clm/common/clmsv_enc_dec.h"
+#include "base/osaf_utility.h"
 
 #define CLMS_SVC_PVT_SUBPART_VERSION 1
 #define CLMS_WRT_CLMA_SUBPART_VER_AT_MIN_MSG_FMT 1 @@ -962,6 +965,60 @@
static uint32_t clms_mds_enc_flat(struct ncsmds_callback_info *info) {  }
 
 
/***************************************************************************
*
+ * Name          : clms_add_mds_node_down
+ *
+ * Description   : This function adds a node when MDS node down arrives.
+ *
+ * Arguments     : node_id   : node down id.
+ *
+ * Return Values : None.
+ *
+ * Notes         : None.
+ 
+***********************************************************************
+******/ void clms_add_mds_node_down(uint32_t node_id) {
+  TRACE_ENTER();
+  osaf_mutex_lock_ordie(&clms_cb->node_down_list_mutex);
+
+  NODE_ID *node = new NODE_ID;
+  *node = node_id;
+  clms_cb->mds_node_down_list.push_back(node);
+
+  osaf_mutex_unlock_ordie(&clms_cb->node_down_list_mutex);
+  TRACE_LEAVE();
+}
+
+/**************************************************************************
**
+ * Name          : clms_delete_mds_node_down
+ *
+ * Description   : This function adds a node when MDS node down arrives.
+ *
+ * Arguments     : node_id   : node down id.
+ *
+ * Return Values : None.
+ *
+ * Notes         : None.
+ 
+***********************************************************************
+******/ void clms_delete_mds_node_down(uint32_t node_id) {
+  TRACE_ENTER();
+  std::list<NODE_ID *>::iterator it;
+  osaf_mutex_lock_ordie(&clms_cb->node_down_list_mutex);
+
+  for (it = clms_cb->mds_node_down_list.begin(); it !=
+         clms_cb->mds_node_down_list.end(); ++it) {
+    NODE_ID *node = *it;
+    if (*node == node_id) {
+      clms_cb->mds_node_down_list.erase(it);
+      TRACE_2("Deleted:%x", *node);
+      delete node;
+      break;
+    }
+  }
+
+  osaf_mutex_unlock_ordie(&clms_cb->node_down_list_mutex);
+  TRACE_LEAVE();
+}
+
+/**********************************************************************
+******
  * Name          : clms_mds_dec_flat
  *
  * Description   : MDS decode and flatten
@@ -1026,6 +1083,9 @@ static uint32_t clms_mds_node_event(struct
ncsmds_callback_info *mds_info) {
     TRACE("Adding ipinformation to the ip list: %u", node_id);
     TRACE("addr_family:%u", mds_info->info.node_evt.addr_family);
     TRACE("ip_addr:%s", mds_info->info.node_evt.ip_addr);
+
+    clms_delete_mds_node_down(mds_info->info.node_evt.node_id);
+
[Thuan] why not direct input local var node_id is already set before, e.g:
clms_delete_mds_node_down(node_id)

     node_id = mds_info->info.node_evt.node_id;
     if ((ip = (IPLIST *)ncs_patricia_tree_get(
              &clms_cb->iplist, (uint8_t *)&node_id)) == nullptr) { @@
-1097,6 +1157,7 @@ static uint32_t clms_mds_node_event(struct
ncsmds_callback_info *mds_info) {
     clmsv_evt->info.node_mds_info.node_id =
mds_info->info.node_evt.node_id;
     clmsv_evt->info.node_mds_info.nodeup = SA_TRUE;
 
+    clms_add_mds_node_down(mds_info->info.node_evt.node_id);
[Thuan] can be shorten like clms_add_mds_node_down(node_id);

     rc = m_NCS_IPC_SEND(&clms_cb->mbx, clmsv_evt, NCS_IPC_PRIORITY_HIGH);
     if (rc != NCSCC_RC_SUCCESS) {
       TRACE("IPC send failed %d", rc);
--
2.7.4



_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel



_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to