Hi Thuan,
See my comment inline.

From: Thuan Tran <thuan.t...@dektech.com.au>
Sent: Wednesday, August 12, 2020 10:25 PM
To: Thang Duc Nguyen <thang.d.ngu...@dektech.com.au>; Minh Hon Chau 
<minh.c...@dektech.com.au>
Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1/1] rde: allow node start first to be promoted in relax 
mode [#3191]

Hi Thang,

It is for the case as following. For example:
Peer B see peer A up, calculate B pending time and SendPeerInfoResp() to peer A.
If peer A get peer B info before see peer B up, A pending time is still zero 
now.
[Thang]: Maybe it is not a real case. B/c rde will always receive the peer up 
then get info respond.

Then A need calculate time (in SetPeerState if pending time is zero) to decide 
give up or not.
This calculated pending time is later sent to peer B as peer A info.
By this way, to make decisions are made by two peers have no confliction.

Best Regards,
Thuan
________________________________
From: Thang Duc Nguyen 
<thang.d.ngu...@dektech.com.au<mailto:thang.d.ngu...@dektech.com.au>>
Sent: Wednesday, August 12, 2020 3:50:24 PM
To: Thuan Tran <thuan.t...@dektech.com.au<mailto:thuan.t...@dektech.com.au>>; 
Minh Hon Chau <minh.c...@dektech.com.au<mailto:minh.c...@dektech.com.au>>
Cc: 
opensaf-devel@lists.sourceforge.net<mailto:opensaf-devel@lists.sourceforge.net> 
<opensaf-devel@lists.sourceforge.net<mailto:opensaf-devel@lists.sourceforge.net>>
Subject: RE: [PATCH 1/1] rde: allow node start first to be promoted in relax 
mode [#3191]

Hi Thuan,
I think the pending time only need updating/calculating in SendPeerInfoResp(), 
when it receives the peer up event.
No need to update again in SetPeerState().

B.R/Thang

-----Original Message-----
From: Thuan Tran <thuan.t...@dektech.com.au<mailto:thuan.t...@dektech.com.au>>
Sent: Monday, May 25, 2020 11:27 AM
To: Minh Hon Chau <minh.c...@dektech.com.au<mailto:minh.c...@dektech.com.au>>; 
Thang Duc Nguyen 
<thang.d.ngu...@dektech.com.au<mailto:thang.d.ngu...@dektech.com.au>>
Cc: 
opensaf-devel@lists.sourceforge.net<mailto:opensaf-devel@lists.sourceforge.net>;
 Thuan Tran <thuan.t...@dektech.com.au<mailto:thuan.t...@dektech.com.au>>
Subject: [PATCH 1/1] rde: allow node start first to be promoted in relax mode 
[#3191]

- In relax mode and unavailable consensus, sometimes SC-2 cannot become active 
even start long time before SC-1 because current promotion strategy only base 
on node id (lower is chosen).
- Change the way to get promotion by comparing promotion pending duration, node 
with promotion pending longer will get promotion and another node will give up. 
This help node start first become active.
If promotion pending duration is same, lower node id will promote.
---
 src/rde/rded/rde_cb.h    |  3 +++
 src/rde/rded/rde_main.cc | 10 +++++++++-  src/rde/rded/rde_mds.cc  | 12 
++++++++++++
 src/rde/rded/role.cc     | 20 ++++++++++++++++++--
 src/rde/rded/role.h      |  3 ++-
 5 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/src/rde/rded/rde_cb.h b/src/rde/rded/rde_cb.h index 
e35fdab2b..50a0a0d26 100644
--- a/src/rde/rded/rde_cb.h
+++ b/src/rde/rded/rde_cb.h
@@ -54,6 +54,8 @@ struct RDE_CONTROL_BLOCK {
   State state{State::kNotActive};
   std::atomic<ConsensusState> 
consensus_service_state{ConsensusState::kUnknown};
   std::atomic<bool> state_refresh_thread_started{false}; // consensus service
+  struct timespec promote_start{0};
+  uint64_t promote_pending{0};
 };

 enum RDE_MSG_TYPE {
@@ -72,6 +74,7 @@ enum RDE_MSG_TYPE {

 struct rde_peer_info {
   PCS_RDA_ROLE ha_role;
+  uint64_t promote_pending;
 };

 struct rde_msg {
diff --git a/src/rde/rded/rde_main.cc b/src/rde/rded/rde_main.cc index 
6594b3d49..e6bd759ec 100644
--- a/src/rde/rded/rde_main.cc
+++ b/src/rde/rded/rde_main.cc
@@ -30,6 +30,7 @@
 #include <cstdlib>
 #include <cstring>
 #include "base/conf.h"
+#include "base/time.h"
 #include "base/daemon.h"
 #include "base/logtrace.h"
 #include "base/ncs_main_papi.h"
@@ -108,7 +109,8 @@ static void handle_mbx_event() {
              msg->type == RDE_MSG_PEER_INFO_RESP ? "response" : "request",
              msg->fr_node_id, Role::to_string(msg->info.peer_info.ha_role));
       CheckForSplitBrain(msg);
-      role->SetPeerState(msg->info.peer_info.ha_role, msg->fr_node_id);
+      role->SetPeerState(msg->info.peer_info.ha_role, msg->fr_node_id,
+                         msg->info.peer_info.promote_pending);
       break;
     }
     case RDE_MSG_PEER_UP: {
@@ -283,9 +285,15 @@ static void CheckForSplitBrain(const rde_msg *msg) {  }

 static void SendPeerInfoResp(MDS_DEST mds_dest) {
+  RDE_CONTROL_BLOCK *cb = rde_get_control_block();
   rde_msg peer_info_req;
   peer_info_req.type = RDE_MSG_PEER_INFO_RESP;
   peer_info_req.info.peer_info.ha_role = role->role();
+  if (role->role() == PCS_RDA_UNDEFINED && cb->promote_pending == 0) {
+    struct timespec now = base::ReadMonotonicClock();
+    cb->promote_pending = base::TimespecToMillis(now -
+ cb->promote_start);  }  peer_info_req.info.peer_info.promote_pending =
+ cb->promote_pending;
   rde_mds_send(&peer_info_req, mds_dest);  }

diff --git a/src/rde/rded/rde_mds.cc b/src/rde/rded/rde_mds.cc index 
bc335f090..a32f54082 100644
--- a/src/rde/rded/rde_mds.cc
+++ b/src/rde/rded/rde_mds.cc
@@ -48,6 +48,10 @@ static uint32_t msg_encode(MDS_CALLBACK_ENC_INFO *enc_info) {
       assert(data);
       ncs_encode_32bit(&data, msg->info.peer_info.ha_role);
       ncs_enc_claim_space(uba, sizeof(uint32_t));
+      data = ncs_enc_reserve_space(uba, sizeof(uint64_t));
+      assert(data);
+      ncs_encode_64bit(&data, msg->info.peer_info.promote_pending);
+      ncs_enc_claim_space(uba, sizeof(uint64_t));
       break;

     default:
@@ -94,6 +98,14 @@ static uint32_t msg_decode(MDS_CALLBACK_DEC_INFO *dec_info) {
       msg->info.peer_info.ha_role =
           static_cast<PCS_RDA_ROLE>(ncs_decode_32bit(&data));
       ncs_dec_skip_space(uba, sizeof(uint32_t));
+      msg->info.peer_info.promote_pending = 0;
+      if (msg->info.peer_info.ha_role == PCS_RDA_UNDEFINED) {
+        data = ncs_dec_flatten_space(uba, data_buff, sizeof(uint64_t));
+        assert(data);
+        msg->info.peer_info.promote_pending =
+            static_cast<uint64_t>(ncs_decode_64bit(&data));
+        ncs_dec_skip_space(uba, sizeof(uint64_t));
+      }
       break;

     default:
diff --git a/src/rde/rded/role.cc b/src/rde/rded/role.cc index 
06c346ced..a3a969b66 100644
--- a/src/rde/rded/role.cc
+++ b/src/rde/rded/role.cc
@@ -288,6 +288,9 @@ uint32_t Role::SetRole(PCS_RDA_ROLE new_role) {
       (old_role == PCS_RDA_UNDEFINED || old_role == PCS_RDA_QUIESCED)) {
     LOG_NO("Requesting ACTIVE role");
     new_role = PCS_RDA_UNDEFINED;
+    RDE_CONTROL_BLOCK* cb = rde_get_control_block();
+    cb->promote_start = base::ReadMonotonicClock();
+    cb->promote_pending = 0;
   }
   if (new_role != old_role) {
     LOG_NO("RDE role set to %s", to_string(new_role)); @@ -347,10 +350,23 @@ 
uint32_t Role::UpdateMdsRegistration(PCS_RDA_ROLE new_role,
   return rc;
 }

-void Role::SetPeerState(PCS_RDA_ROLE node_role, NODE_ID node_id) {
+void Role::SetPeerState(PCS_RDA_ROLE node_role, NODE_ID node_id,
+                        uint64_t peer_promote_pending) {
   if (role() == PCS_RDA_UNDEFINED) {
+    bool give_up = false;
+    RDE_CONTROL_BLOCK *cb = rde_get_control_block();
+    if (node_role == PCS_RDA_UNDEFINED) {
+      if (cb->promote_pending == 0 && peer_promote_pending > 0) {
+        struct timespec now = base::ReadMonotonicClock();
+        cb->promote_pending = base::TimespecToMillis(now - cb->promote_start);
+      }
+      if ((cb->promote_pending < peer_promote_pending) ||
+          (cb->promote_pending == peer_promote_pending &&
+           node_id < own_node_id_))
+        give_up = true;
+    }
     if (node_role == PCS_RDA_ACTIVE || node_role == PCS_RDA_STANDBY ||
-        (node_role == PCS_RDA_UNDEFINED && node_id < own_node_id_)) {
+        give_up) {
       SetRole(PCS_RDA_QUIESCED);
       LOG_NO("Giving up election against 0x%" PRIx32
              " with role %s. "
diff --git a/src/rde/rded/role.h b/src/rde/rded/role.h index 
9c63cbe7b..8eb3abe36 100644
--- a/src/rde/rded/role.h
+++ b/src/rde/rded/role.h
@@ -38,7 +38,8 @@ class Role {
   void AddPeer(NODE_ID node_id);
   bool IsCandidate();
   bool IsPeerPresent();
-  void SetPeerState(PCS_RDA_ROLE node_role, NODE_ID node_id);
+  void SetPeerState(PCS_RDA_ROLE node_role, NODE_ID node_id,
+                    uint64_t peer_promote_pending);
   timespec* Poll(timespec* ts);
   uint32_t SetRole(PCS_RDA_ROLE new_role);
   PCS_RDA_ROLE role() const;
--
2.17.1

_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to