Hi Thanh,

Thanks for reviewing.
Yes, it is possible use that global variable instead of running, but I prefer 
running.
Because I need a local variable to control while loop, not mess up with 
is_fctrl_enabled (global var).

Best Regards,
ThuanTr

-----Original Message-----
From: Thanh Nguyen <thanh.ngu...@dektech.com.au> 
Sent: Tuesday, March 31, 2020 10:01 AM
To: Thuan Tran <thuan.t...@dektech.com.au>; Minh Hon Chau 
<minh.c...@dektech.com.au>; Thang Duc Nguyen <thang.d.ngu...@dektech.com.au>
Cc: opensaf-devel@lists.sourceforge.net
Subject: RE: [devel] [PATCH 1/1] mds: fix mds_tipc_fctrl_shutdown() stuck 
forever [#3169]

Hi Thuan,

I have a minor comment and also an ACK.

Dealing with event interactions, we try not to introduce new variable such as 
running. We tend to use existing variable as much as we can. This will make 
further interactions update simpler.
The existing variable is_fctrl_enabled can be used without introducing the new 
variable running.

>From my observation of this code, on the logical side, 
is_fctrl_enabled == true ==> running == true
is_fctrl_enabled == false ==> running == false
and
running == true ==> is_fctrl_enabled == true
running == false ==> is_fctrl_enabled == false

Thus is_fctrl_enabled is identical to running.

Here is the practical side
1) is_fctrl_enabled == true; the process_all_events() loop runs.
is_fctrl_enable == false; the loop stops or does not start.
Instead of using while (running), it can just be while (is_fctrl_enabled). 
Surrounding code needs minor adjustment.

2) Initially is_fctrl_enable == false.
When the app call mds_tipc_fctrl_initialize(), it will call create_ncs_task() 
which set is_fctrl_enable to true and start the loop.

Best Regards,
Thanh

-----Original Message-----
From: thuan.tran <thuan.t...@dektech.com.au> 
Sent: Monday, 23 March 2020 8:59 PM
To: Minh Hon Chau <minh.c...@dektech.com.au>; Thang Duc Nguyen 
<thang.d.ngu...@dektech.com.au>
Cc: opensaf-devel@lists.sourceforge.net
Subject: [devel] [PATCH 1/1] mds: fix mds_tipc_fctrl_shutdown() stuck forever 
[#3169]

- Deadlock of portid_map_mutex locking: mds_tipc_fctrl_shutdown() take lock 
then wait for thread process_all_events() to be canceled.
But that thread also want get lock then it is keep waiting for lock.
- Create safe method to cancel process_all_events() thread similar as the way 
MDS destroy legacy receiving thread.
And getting portid_map_mutex lock only after that thread released.
---
 src/mds/mds_tipc_fctrl_intf.cc | 24 ++++++++++++++++++++++--
 src/mds/mds_tipc_fctrl_msg.h   |  8 ++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/src/mds/mds_tipc_fctrl_intf.cc b/src/mds/mds_tipc_fctrl_intf.cc 
index 6ce00782e..93bfce51c 100644
--- a/src/mds/mds_tipc_fctrl_intf.cc
+++ b/src/mds/mds_tipc_fctrl_intf.cc
@@ -27,6 +27,8 @@
 
 #include "base/ncssysf_def.h"
 #include "base/ncssysf_tsk.h"
+#include "base/ncs_osprm.h"
+#include "base/osaf_poll.h"
 
 #include "mds/mds_log.h"
 #include "mds/mds_tipc_fctrl_portid.h"
@@ -194,6 +196,7 @@ bool mds_fctrl_mbx_cleanup(NCSCONTEXT arg, NCSCONTEXT msg) 
{  }
 
 uint32_t process_all_events(void) {
+  bool running = true;
   enum { FD_FCTRL = 0, NUM_FDS };
 
   int poll_tmo = chunk_ack_timeout;
@@ -203,7 +206,7 @@ uint32_t process_all_events(void) {
       ncs_ipc_get_sel_obj(&mbx_events).rmv_obj;
   pfd[FD_FCTRL].events = POLLIN;
 
-  while (true) {
+  while (running) {
     int pollres;
 
     pollres = poll(pfd, NUM_FDS, poll_tmo); @@ -231,9 +234,13 @@ uint32_t 
process_all_events(void) {
         if (evt->IsFlowEvent()) {
           process_flow_event(*evt);
         }
+        if (evt->IsShutDownEvent()) {
+          running = false;
+        }
 
         delete evt;
         portid_map_mutex.unlock();
+        if (!running) m_NCS_SEL_OBJ_IND(&evt->destroy_ack_obj_);
       }
     }
     // timeout, scan all portid and send ack msgs @@ -243,6 +250,7 @@ uint32_t 
process_all_events(void) {
       portid_map_mutex.unlock();
     }
   }  /* while */
+  m_MDS_LOG_DBG("FCTRL: process_all_events() thread end");
   return NCSCC_RC_SUCCESS;
 }
 
@@ -305,7 +313,18 @@ uint32_t mds_tipc_fctrl_initialize(int dgramsock, struct 
tipc_portid id,  uint32_t mds_tipc_fctrl_shutdown(void) {
   if (is_fctrl_enabled == false) return NCSCC_RC_SUCCESS;
 
-  portid_map_mutex.lock();
+  NCS_SEL_OBJ destroy_ack_obj;
+  m_NCS_SEL_OBJ_CREATE(&destroy_ack_obj);
+  Event* pevt = new Event(Event::Type::kEvtShutDown, destroy_ack_obj);  
+ if (m_NCS_IPC_SEND(&mbx_events, pevt,
+      NCS_IPC_PRIORITY_HIGH) != NCSCC_RC_SUCCESS) {
+    m_MDS_LOG_ERR("FCTRL: Failed to send shutdown, Error[%s]",
+        strerror(errno));
+    abort();
+  }
+  osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(destroy_ack_obj), 10000);  
+ m_NCS_SEL_OBJ_DESTROY(&destroy_ack_obj);
+  memset(&destroy_ack_obj, 0, sizeof(destroy_ack_obj));
 
   if (ncs_task_release(p_task_hdl) != NCSCC_RC_SUCCESS) {
     m_MDS_LOG_ERR("FCTRL: Stop of the Created Task-failed, Error[%s]", @@ 
-315,6 +334,7 @@ uint32_t mds_tipc_fctrl_shutdown(void) {
   m_NCS_IPC_DETACH(&mbx_events, mds_fctrl_mbx_cleanup, nullptr);
   m_NCS_IPC_RELEASE(&mbx_events, nullptr);
 
+  portid_map_mutex.lock();
   for (auto i : portid_map) delete i.second;
   portid_map.clear();
 
diff --git a/src/mds/mds_tipc_fctrl_msg.h b/src/mds/mds_tipc_fctrl_msg.h index 
c4641ed4e..1ba625650 100644
--- a/src/mds/mds_tipc_fctrl_msg.h
+++ b/src/mds/mds_tipc_fctrl_msg.h
@@ -49,10 +49,13 @@ class Event {
     kEvtTmrAll,
     kEvtTmrTxProb,    // event that tx probation timer expired for once
     kEvtTmrChunkAck,  // event to send the chunk ack
+    kEvtShutDown,     // event to shutdown flow control thread
   };
   NCS_IPC_MSG next_{0};
   Type type_;
 
+  // Used for shutdown
+  NCS_SEL_OBJ destroy_ack_obj_;
   // Used for flow event only
   struct tipc_portid id_;
   uint16_t svc_id_{0};
@@ -69,10 +72,15 @@ class Event {
     mseq_(mseq), mfrag_(mfrag), fseq_(f_seq_num) {
     type_ = type;
   }
+  Event(Type type, NCS_SEL_OBJ destroy_ack_obj):
+    destroy_ack_obj_(destroy_ack_obj) {
+    type_ = type;
+  }
   bool IsTimerEvent() const { return (type_ > Type::kEvtTmrAll); }
   bool IsFlowEvent() const {
     return (Type::kEvtDataFlowAll < type_ && type_ < Type::kEvtTmrAll);
   }
+  bool IsShutDownEvent() const { return (type_ == Type::kEvtShutDown); 
+ }
 };
 
 class BaseMessage {
--
2.17.1



_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to