[devel] [PATCH 1/1] mds: not waste 1.5s in waiting dead Adest to send RSP [#3102]

2019-11-26 Thread thuan.tran
- When sending response message to Adest which is not exist (crash/terminate),
current MDS try to wait for 1.5 seconds before conclude no route to send RSP.

- Here are scenarios may waste 1.5s waiting:
   SVCs DOWN (dead adest or vdest role change) -> get SNDRSP -> send RSP (wait 
1.5s)
   get SNDRSP -> SVCs DOWN (dead adest or vdest role change) -> send RSP (wait 
1.5s)
This long wait time cause trouble for higher layer services, e.g: ntf, imm, 
etc...
where there are many agents send initialize request (use message SNDRSP type)

- Solution: create adest list, a timer start when last SVC of adest DOWN.
When sending RSP to this adest, the wait time will reduce to only 10ms.
Notice that following origin behavior is kept:
   No any SVC UP before -> get SNDRSP -> send RSP (wait 1.5s)

- New TC tet_send_response_tp_13() is created to verify this scenario.
---
 src/mds/apitest/mdstipc.h  |   1 +
 src/mds/apitest/mdstipc_api.c  | 107 ++
 src/mds/apitest/mdstipc_conf.c |   1 -
 src/mds/mds_c_api.c| 199 +++--
 src/mds/mds_c_sndrcv.c |  38 ---
 src/mds/mds_core.h |  30 -
 src/mds/mds_dt2c.h |   2 +-
 src/mds/mds_dt_common.c|  24 +++-
 src/mds/mds_main.c |   4 +
 9 files changed, 350 insertions(+), 56 deletions(-)

diff --git a/src/mds/apitest/mdstipc.h b/src/mds/apitest/mdstipc.h
index b56940ea6..2b9fcf2fd 100644
--- a/src/mds/apitest/mdstipc.h
+++ b/src/mds/apitest/mdstipc.h
@@ -157,6 +157,7 @@ TET_VDEST
 gl_tet_vdest[4]; /*change it to 6 to run VDS Redundancy: 101 for Stress*/
 TET_SVC gl_tet_svc;
 TET_MDS_RECVD_MSG_INFO gl_rcvdmsginfo, gl_direct_rcvmsginfo;
+TET_EVENT_INFO gl_event_data;
 int gl_vdest_indx;
 MDS_DIRECT_BUFF direct_buff;
 
diff --git a/src/mds/apitest/mdstipc_api.c b/src/mds/apitest/mdstipc_api.c
index 847f9a7f1..4a97f99e9 100644
--- a/src/mds/apitest/mdstipc_api.c
+++ b/src/mds/apitest/mdstipc_api.c
@@ -28,6 +28,7 @@
 #include "mdstipc.h"
 #include "base/ncssysf_tmr.h"
 #include "base/osaf_poll.h"
+#include "base/osaf_time.h"
 
 #define MSG_SIZE MDS_DIRECT_BUF_MAXSIZE
 static MDS_CLIENT_MSG_FORMAT_VER gl_set_msg_fmt_ver;
@@ -5633,6 +5634,109 @@ TODO: Check this testcase, it was outcomment already in 
the "tet"-files
 }
 #endif
 
+void tet_send_response_tp_13()
+{
+   int FAIL = 1;
+   mds_shutdown();
+
+   printf("\nTest Case 13: Now send_response"
+   " to dead Adest don't stuck in waiting 1.5s\n");
+   /**/
+   pid_t pid = fork();
+   if (pid == 0) {
+   /* child as sender */
+   MDS_SVC_ID to_svcids[] = {NCSMDS_SVC_ID_EXTERNAL_MIN};
+   mds_startup();
+   if (adest_get_handle() == NCSCC_RC_SUCCESS) {
+   if (mds_service_install(
+   gl_tet_adest.mds_pwe1_hdl,
+   NCSMDS_SVC_ID_INTERNAL_MIN, 1,
+   NCSMDS_SCOPE_NONE, false, false)
+   == NCSCC_RC_SUCCESS) {
+   if (mds_service_subscribe(
+   gl_tet_adest.mds_pwe1_hdl,
+   NCSMDS_SVC_ID_INTERNAL_MIN,
+   NCSMDS_SCOPE_INTRANODE,
+   1, to_svcids)
+   == NCSCC_RC_SUCCESS) {
+   sleep(1);
+   TET_MDS_MSG msg;
+   mds_send_get_response(
+   gl_tet_adest.mds_pwe1_hdl,
+   NCSMDS_SVC_ID_INTERNAL_MIN,
+   NCSMDS_SVC_ID_EXTERNAL_MIN,
+   gl_tet_adest.svc[0].svcevt[0].dest,
+   1000, MDS_SEND_PRIORITY_HIGH,
+   );
+   }
+   }
+   }
+   mds_shutdown();
+   } else if (pid > 0) {
+   /* parent as receiver */
+   struct timespec time1, time2, wait_time;
+   MDS_SVC_ID to_svcids[] = {NCSMDS_SVC_ID_INTERNAL_MIN};
+   mds_startup();
+   if (adest_get_handle() == NCSCC_RC_SUCCESS) {
+   if (mds_service_install(
+   gl_tet_adest.mds_pwe1_hdl,
+   NCSMDS_SVC_ID_EXTERNAL_MIN, 1,
+   NCSMDS_SCOPE_NONE, true, false)
+   == NCSCC_RC_SUCCESS) {
+   if (mds_service_subscribe(
+   gl_tet_adest.mds_pwe1_hdl,
+   NCSMDS_SVC_ID_EXTERNAL_MIN,
+   

[devel] [PATCH 1/1] mds: not waste 1.5s in waiting dead Adest to send RSP [#3102]

2019-11-05 Thread thuan.tran
- When sending response message to Adest which is not exist (crash/terminate),
current MDS try to wait for 1.5 seconds before conclude no route to send RSP.

- Here are scenarios may waste 1.5s waiting:
   SVCs DOWN (dead adest or vdest role change) -> get SNDRSP -> send RSP (wait 
1.5s)
   get SNDRSP -> SVCs DOWN (dead adest or vdest role change) -> send RSP (wait 
1.5s)
This long wait time cause trouble for higher layer services, e.g: ntf, imm, 
etc...
where there are many agents send initialize request (use message SNDRSP type)

- Solution: create adest list, a timer start when last SVC of adest DOWN.
When sending RSP to this adest, the wait time will reduce to only 10ms.
Notice that following origin behavior is kept:
   No any SVC UP before -> get SNDRSP -> send RSP (wait 1.5s)
---
 src/mds/mds_c_api.c | 180 
 src/mds/mds_c_sndrcv.c  |  38 +
 src/mds/mds_core.h  |  30 ++-
 src/mds/mds_dt2c.h  |   2 +-
 src/mds/mds_dt_common.c |  22 -
 src/mds/mds_main.c  |   4 +
 6 files changed, 221 insertions(+), 55 deletions(-)

diff --git a/src/mds/mds_c_api.c b/src/mds/mds_c_api.c
index 132555b8e..24b524d92 100644
--- a/src/mds/mds_c_api.c
+++ b/src/mds/mds_c_api.c
@@ -27,6 +27,62 @@
 #include "mds_log.h"
 #include "mds_core.h"
 
+/* Internal use functions */
+static void start_mds_down_tmr(MDS_DEST adest, MDS_SVC_ID svc_id)
+{
+   MDS_TMR_REQ_INFO *tmr_req_info = calloc(1, sizeof(MDS_TMR_REQ_INFO));
+   if (tmr_req_info == NULL) {
+   m_MDS_LOG_ERR("mds_mcm_svc_down out of memory\n");
+   abort();
+   }
+
+   tmr_req_info->type = MDS_DOWN_TMR;
+   tmr_req_info->info.down_event_tmr_info.adest = adest;
+   tmr_req_info->info.down_event_tmr_info.svc_id = svc_id;
+
+   tmr_t tmr_id = ncs_tmr_alloc(__FILE__, __LINE__);
+   if (tmr_id == NULL) {
+   m_MDS_LOG_ERR("mds_mcm_svc_down out of memory\n");
+   abort();
+   }
+
+   tmr_req_info->info.down_event_tmr_info.tmr_id = tmr_id;
+
+   uint32_t tmr_hdl =
+   ncshm_create_hdl(NCS_HM_POOL_ID_COMMON, NCS_SERVICE_ID_COMMON,
+(NCSCONTEXT)(tmr_req_info));
+
+   if (svc_id == 0) {
+   MDS_ADEST_INFO *adest_info =
+   (MDS_ADEST_INFO *)ncs_patricia_tree_get(
+   _mds_mcm_cb->adest_list,
+   (uint8_t *));
+   if (adest_info) {
+   adest_info->tmr_req_info = tmr_req_info;
+   adest_info->tmr_hdl = tmr_hdl;
+   }
+   }
+   tmr_id = ncs_tmr_start(tmr_id, MDS_DOWN_TMR_VAL,
+  (TMR_CALLBACK)mds_tmr_callback,
+  (void *)(long)(tmr_hdl), __FILE__, __LINE__);
+   assert(tmr_id != NULL);
+}
+
+static void stop_mds_down_tmr(MDS_ADEST_INFO *adest_info)
+{
+   assert(adest_info != NULL);
+   if (adest_info->tmr_req_info) {
+   MDS_TMR_REQ_INFO *tmr_req_info = adest_info->tmr_req_info;
+   ncs_tmr_stop(tmr_req_info->info.down_event_tmr_info.tmr_id);
+   ncs_tmr_free(tmr_req_info->info.down_event_tmr_info.tmr_id);
+   m_MMGR_FREE_TMR_INFO(tmr_req_info);
+   adest_info->tmr_req_info = NULL;
+   ncshm_destroy_hdl(
+   NCS_SERVICE_ID_COMMON,
+   (uint32_t)adest_info->tmr_hdl);
+   }
+}
+
 /*
 
   Function NAME: mds_validate_pwe_hdl
@@ -1449,6 +1505,25 @@ uint32_t mds_mcm_svc_unsubscribe(NCSMDS_INFO *info)
subscr_req_hdl);
}
 
+   /* Find and delete related adest from adest list */
+   MDS_SUBSCRIPTION_RESULTS_INFO *s_info = NULL;
+   mds_subtn_res_tbl_getnext_any(svc_hdl,
+   info->info.svc_cancel.i_svc_ids[i],
+   _info);
+   if (s_info) {
+   MDS_ADEST_INFO *adest_info =
+   (MDS_ADEST_INFO *)
+   ncs_patricia_tree_get(
+   _mds_mcm_cb->adest_list,
+   (uint8_t *)_info->key.adest);
+   if (adest_info) {
+   stop_mds_down_tmr(adest_info);
+   ncs_patricia_tree_del(
+   _mds_mcm_cb->adest_list,
+   (NCS_PATRICIA_NODE *)adest_info);
+   }
+   }
+
/* Delete all MDTM entries */
mds_subtn_res_tbl_del_all(svc_hdl,
  info->info.svc_cancel.i_svc_ids[i]);
@@ -1900,6 +1975,29 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID 
svc_id, V_DEST_RL role,
 
/*** Validation for SCOPE 

[devel] [PATCH 1/1] mds: not waste 1.5s in waiting dead Adest to send RSP [#3102]

2019-10-25 Thread thuan.tran
- When sending response message which Adest not exist (already down)
current MDS try to wait for 1.5 seconds before conclude no route to
send response message.

- Here is scenario may waste 1.5s:
  SVCs DOWN (dead adest or vdest role change) -> get SNDRSP -> send RSP (wait 
1.5s)

- With this change, MDS will not waste for 1.5s which can cause trouble
for higher layer services, e.g: ntf, imm, etc...
  SVCs DOWN (dead adest or vdest role change) -> get SNDRSP -> send RSP (wait 
10ms)

- Notice that following origin behavior is kept:
  No any SVC UP before -> get SNDRSP -> send RSP (wait 1.5s)
---
 src/mds/mds_c_api.c | 72 +++--
 src/mds/mds_c_sndrcv.c  | 38 ++
 src/mds/mds_core.h  | 29 +++--
 src/mds/mds_dt2c.h  |  2 +-
 src/mds/mds_dt_common.c | 22 -
 src/mds/mds_main.c  |  4 +++
 6 files changed, 145 insertions(+), 22 deletions(-)

diff --git a/src/mds/mds_c_api.c b/src/mds/mds_c_api.c
index 132555b8e..1078be35a 100644
--- a/src/mds/mds_c_api.c
+++ b/src/mds/mds_c_api.c
@@ -1900,6 +1900,28 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID 
svc_id, V_DEST_RL role,
 
/*** Validation for SCOPE **/
 
+   if (adest != m_MDS_GET_ADEST) {
+   MDS_ADEST_INFO *adest_info =
+   (MDS_ADEST_INFO *)ncs_patricia_tree_get(
+   _mds_mcm_cb->adest_list,
+   (uint8_t *));
+   if (!adest_info) {
+   /* Add adest to adest list */
+   adest_info = m_MMGR_ALLOC_ADEST_INFO;
+   memset(adest_info, 0, sizeof(MDS_ADEST_INFO));
+   adest_info->adest = adest;
+   adest_info->node.key_info =
+   (uint8_t *)_info->adest;
+   adest_info->svc_cnt = 1;
+   adest_info->tmr_start = false;
+   ncs_patricia_tree_add(
+   _mds_mcm_cb->adest_list,
+   (NCS_PATRICIA_NODE *)adest_info);
+   } else {
+   adest_info->svc_cnt++;
+   }
+   }
+
status = mds_get_subtn_res_tbl_by_adest(local_svc_hdl, svc_id, vdest_id,
adest, _subtn_result_info);
 
@@ -3379,6 +3401,7 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID 
svc_id, V_DEST_RL role,
 
 static void start_mds_down_tmr(MDS_DEST adest, MDS_SVC_ID svc_id)
 {
+   int64_t timeout = MDS_DOWN_TMR_VAL;
MDS_TMR_REQ_INFO *tmr_req_info = calloc(1, sizeof(MDS_TMR_REQ_INFO));
if (tmr_req_info == NULL) {
m_MDS_LOG_ERR("mds_mcm_svc_down out of memory\n");
@@ -3400,8 +3423,9 @@ static void start_mds_down_tmr(MDS_DEST adest, MDS_SVC_ID 
svc_id)
uint32_t tmr_hdl =
ncshm_create_hdl(NCS_HM_POOL_ID_COMMON, NCS_SERVICE_ID_COMMON,
 (NCSCONTEXT)(tmr_req_info));
-
-   tmr_id = ncs_tmr_start(tmr_id, 1000, // 10ms unit
+   if (svc_id == 0)
+   timeout = MDS_ADEST_DOWN_TMR_VAL;
+   tmr_id = ncs_tmr_start(tmr_id, timeout,
   (TMR_CALLBACK)mds_tmr_callback,
   (void *)(long)(tmr_hdl), __FILE__, __LINE__);
assert(tmr_id != NULL);
@@ -3571,6 +3595,24 @@ uint32_t mds_mcm_svc_down(PW_ENV_ID pwe_id, MDS_SVC_ID 
svc_id, V_DEST_RL role,
/* Discard : Getting down before getting up */
} else { /* Entry exist in subscription result table */
 
+   MDS_ADEST_INFO *adest_info =
+   (MDS_ADEST_INFO *)ncs_patricia_tree_get(
+   _mds_mcm_cb->adest_list,
+   (uint8_t *));
+   if (adest_info) {
+   adest_info->svc_cnt--;
+   if (adest_info->svc_cnt == 0 &&
+   adest_info->tmr_start == false) {
+   m_MDS_LOG_INFO(
+   "MCM:API: Adest <0x%08x, %u>"
+   " down timer start",
+   m_MDS_GET_NODE_ID_FROM_ADEST(adest),
+   m_MDS_GET_PROCESS_ID_FROM_ADEST(adest));
+   adest_info->tmr_start = true;
+   start_mds_down_tmr(adest, 0);
+   }
+   }
+
if (vdest_id == m_VDEST_ID_FOR_ADEST_ENTRY) {
status = mds_subtn_res_tbl_del(
local_svc_hdl, svc_id, vdest_id, adest,
@@ -4956,6 +4998,17 @@ uint32_t mds_mcm_init(void)
return NCSCC_RC_FAILURE;
}
 
+   /* ADEST TREE */
+   memset(_tree_params, 0, sizeof(NCS_PATRICIA_PARAMS));
+   pat_tree_params.key_size = sizeof(MDS_DEST);
+   if