[devel] [PATCH 1/1] mds: not waste 1.5s in waiting dead Adest to send RSP [#3102]
- When sending response message to Adest which is not exist (crash/terminate), current MDS try to wait for 1.5 seconds before conclude no route to send RSP. - Here are scenarios may waste 1.5s waiting: SVCs DOWN (dead adest or vdest role change) -> get SNDRSP -> send RSP (wait 1.5s) get SNDRSP -> SVCs DOWN (dead adest or vdest role change) -> send RSP (wait 1.5s) This long wait time cause trouble for higher layer services, e.g: ntf, imm, etc... where there are many agents send initialize request (use message SNDRSP type) - Solution: create adest list, a timer start when last SVC of adest DOWN. When sending RSP to this adest, the wait time will reduce to only 10ms. Notice that following origin behavior is kept: No any SVC UP before -> get SNDRSP -> send RSP (wait 1.5s) - New TC tet_send_response_tp_13() is created to verify this scenario. --- src/mds/apitest/mdstipc.h | 1 + src/mds/apitest/mdstipc_api.c | 107 ++ src/mds/apitest/mdstipc_conf.c | 1 - src/mds/mds_c_api.c| 199 +++-- src/mds/mds_c_sndrcv.c | 38 --- src/mds/mds_core.h | 30 - src/mds/mds_dt2c.h | 2 +- src/mds/mds_dt_common.c| 24 +++- src/mds/mds_main.c | 4 + 9 files changed, 350 insertions(+), 56 deletions(-) diff --git a/src/mds/apitest/mdstipc.h b/src/mds/apitest/mdstipc.h index b56940ea6..2b9fcf2fd 100644 --- a/src/mds/apitest/mdstipc.h +++ b/src/mds/apitest/mdstipc.h @@ -157,6 +157,7 @@ TET_VDEST gl_tet_vdest[4]; /*change it to 6 to run VDS Redundancy: 101 for Stress*/ TET_SVC gl_tet_svc; TET_MDS_RECVD_MSG_INFO gl_rcvdmsginfo, gl_direct_rcvmsginfo; +TET_EVENT_INFO gl_event_data; int gl_vdest_indx; MDS_DIRECT_BUFF direct_buff; diff --git a/src/mds/apitest/mdstipc_api.c b/src/mds/apitest/mdstipc_api.c index 847f9a7f1..4a97f99e9 100644 --- a/src/mds/apitest/mdstipc_api.c +++ b/src/mds/apitest/mdstipc_api.c @@ -28,6 +28,7 @@ #include "mdstipc.h" #include "base/ncssysf_tmr.h" #include "base/osaf_poll.h" +#include "base/osaf_time.h" #define MSG_SIZE MDS_DIRECT_BUF_MAXSIZE static MDS_CLIENT_MSG_FORMAT_VER gl_set_msg_fmt_ver; @@ -5633,6 +5634,109 @@ TODO: Check this testcase, it was outcomment already in the "tet"-files } #endif +void tet_send_response_tp_13() +{ + int FAIL = 1; + mds_shutdown(); + + printf("\nTest Case 13: Now send_response" + " to dead Adest don't stuck in waiting 1.5s\n"); + /**/ + pid_t pid = fork(); + if (pid == 0) { + /* child as sender */ + MDS_SVC_ID to_svcids[] = {NCSMDS_SVC_ID_EXTERNAL_MIN}; + mds_startup(); + if (adest_get_handle() == NCSCC_RC_SUCCESS) { + if (mds_service_install( + gl_tet_adest.mds_pwe1_hdl, + NCSMDS_SVC_ID_INTERNAL_MIN, 1, + NCSMDS_SCOPE_NONE, false, false) + == NCSCC_RC_SUCCESS) { + if (mds_service_subscribe( + gl_tet_adest.mds_pwe1_hdl, + NCSMDS_SVC_ID_INTERNAL_MIN, + NCSMDS_SCOPE_INTRANODE, + 1, to_svcids) + == NCSCC_RC_SUCCESS) { + sleep(1); + TET_MDS_MSG msg; + mds_send_get_response( + gl_tet_adest.mds_pwe1_hdl, + NCSMDS_SVC_ID_INTERNAL_MIN, + NCSMDS_SVC_ID_EXTERNAL_MIN, + gl_tet_adest.svc[0].svcevt[0].dest, + 1000, MDS_SEND_PRIORITY_HIGH, + &msg); + } + } + } + mds_shutdown(); + } else if (pid > 0) { + /* parent as receiver */ + struct timespec time1, time2, wait_time; + MDS_SVC_ID to_svcids[] = {NCSMDS_SVC_ID_INTERNAL_MIN}; + mds_startup(); + if (adest_get_handle() == NCSCC_RC_SUCCESS) { + if (mds_service_install( + gl_tet_adest.mds_pwe1_hdl, + NCSMDS_SVC_ID_EXTERNAL_MIN, 1, + NCSMDS_SCOPE_NONE, true, false) + == NCSCC_RC_SUCCESS) { + if (mds_service_subscribe( + gl_tet_adest.mds_pwe1_hdl, + NCSMDS_SVC_ID_EXTERNAL_MIN, +
[devel] [PATCH 1/1] mds: not waste 1.5s in waiting dead Adest to send RSP [#3102]
- When sending response message to Adest which is not exist (crash/terminate), current MDS try to wait for 1.5 seconds before conclude no route to send RSP. - Here are scenarios may waste 1.5s waiting: SVCs DOWN (dead adest or vdest role change) -> get SNDRSP -> send RSP (wait 1.5s) get SNDRSP -> SVCs DOWN (dead adest or vdest role change) -> send RSP (wait 1.5s) This long wait time cause trouble for higher layer services, e.g: ntf, imm, etc... where there are many agents send initialize request (use message SNDRSP type) - Solution: create adest list, a timer start when last SVC of adest DOWN. When sending RSP to this adest, the wait time will reduce to only 10ms. Notice that following origin behavior is kept: No any SVC UP before -> get SNDRSP -> send RSP (wait 1.5s) --- src/mds/mds_c_api.c | 180 src/mds/mds_c_sndrcv.c | 38 + src/mds/mds_core.h | 30 ++- src/mds/mds_dt2c.h | 2 +- src/mds/mds_dt_common.c | 22 - src/mds/mds_main.c | 4 + 6 files changed, 221 insertions(+), 55 deletions(-) diff --git a/src/mds/mds_c_api.c b/src/mds/mds_c_api.c index 132555b8e..24b524d92 100644 --- a/src/mds/mds_c_api.c +++ b/src/mds/mds_c_api.c @@ -27,6 +27,62 @@ #include "mds_log.h" #include "mds_core.h" +/* Internal use functions */ +static void start_mds_down_tmr(MDS_DEST adest, MDS_SVC_ID svc_id) +{ + MDS_TMR_REQ_INFO *tmr_req_info = calloc(1, sizeof(MDS_TMR_REQ_INFO)); + if (tmr_req_info == NULL) { + m_MDS_LOG_ERR("mds_mcm_svc_down out of memory\n"); + abort(); + } + + tmr_req_info->type = MDS_DOWN_TMR; + tmr_req_info->info.down_event_tmr_info.adest = adest; + tmr_req_info->info.down_event_tmr_info.svc_id = svc_id; + + tmr_t tmr_id = ncs_tmr_alloc(__FILE__, __LINE__); + if (tmr_id == NULL) { + m_MDS_LOG_ERR("mds_mcm_svc_down out of memory\n"); + abort(); + } + + tmr_req_info->info.down_event_tmr_info.tmr_id = tmr_id; + + uint32_t tmr_hdl = + ncshm_create_hdl(NCS_HM_POOL_ID_COMMON, NCS_SERVICE_ID_COMMON, +(NCSCONTEXT)(tmr_req_info)); + + if (svc_id == 0) { + MDS_ADEST_INFO *adest_info = + (MDS_ADEST_INFO *)ncs_patricia_tree_get( + &gl_mds_mcm_cb->adest_list, + (uint8_t *)&adest); + if (adest_info) { + adest_info->tmr_req_info = tmr_req_info; + adest_info->tmr_hdl = tmr_hdl; + } + } + tmr_id = ncs_tmr_start(tmr_id, MDS_DOWN_TMR_VAL, + (TMR_CALLBACK)mds_tmr_callback, + (void *)(long)(tmr_hdl), __FILE__, __LINE__); + assert(tmr_id != NULL); +} + +static void stop_mds_down_tmr(MDS_ADEST_INFO *adest_info) +{ + assert(adest_info != NULL); + if (adest_info->tmr_req_info) { + MDS_TMR_REQ_INFO *tmr_req_info = adest_info->tmr_req_info; + ncs_tmr_stop(tmr_req_info->info.down_event_tmr_info.tmr_id); + ncs_tmr_free(tmr_req_info->info.down_event_tmr_info.tmr_id); + m_MMGR_FREE_TMR_INFO(tmr_req_info); + adest_info->tmr_req_info = NULL; + ncshm_destroy_hdl( + NCS_SERVICE_ID_COMMON, + (uint32_t)adest_info->tmr_hdl); + } +} + /* Function NAME: mds_validate_pwe_hdl @@ -1449,6 +1505,25 @@ uint32_t mds_mcm_svc_unsubscribe(NCSMDS_INFO *info) subscr_req_hdl); } + /* Find and delete related adest from adest list */ + MDS_SUBSCRIPTION_RESULTS_INFO *s_info = NULL; + mds_subtn_res_tbl_getnext_any(svc_hdl, + info->info.svc_cancel.i_svc_ids[i], + &s_info); + if (s_info) { + MDS_ADEST_INFO *adest_info = + (MDS_ADEST_INFO *) + ncs_patricia_tree_get( + &gl_mds_mcm_cb->adest_list, + (uint8_t *)&s_info->key.adest); + if (adest_info) { + stop_mds_down_tmr(adest_info); + ncs_patricia_tree_del( + &gl_mds_mcm_cb->adest_list, + (NCS_PATRICIA_NODE *)adest_info); + } + } + /* Delete all MDTM entries */ mds_subtn_res_tbl_del_all(svc_hdl, info->info.svc_cancel.i_svc_ids[i]); @@ -1900,6 +1975,29 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role, /*** Valid
[devel] [PATCH 1/1] mds: not waste 1.5s in waiting dead Adest to send RSP [#3102]
- When sending response message which Adest not exist (already down) current MDS try to wait for 1.5 seconds before conclude no route to send response message. - Here is scenario may waste 1.5s: SVCs DOWN (dead adest or vdest role change) -> get SNDRSP -> send RSP (wait 1.5s) - With this change, MDS will not waste for 1.5s which can cause trouble for higher layer services, e.g: ntf, imm, etc... SVCs DOWN (dead adest or vdest role change) -> get SNDRSP -> send RSP (wait 10ms) - Notice that following origin behavior is kept: No any SVC UP before -> get SNDRSP -> send RSP (wait 1.5s) --- src/mds/mds_c_api.c | 72 +++-- src/mds/mds_c_sndrcv.c | 38 ++ src/mds/mds_core.h | 29 +++-- src/mds/mds_dt2c.h | 2 +- src/mds/mds_dt_common.c | 22 - src/mds/mds_main.c | 4 +++ 6 files changed, 145 insertions(+), 22 deletions(-) diff --git a/src/mds/mds_c_api.c b/src/mds/mds_c_api.c index 132555b8e..1078be35a 100644 --- a/src/mds/mds_c_api.c +++ b/src/mds/mds_c_api.c @@ -1900,6 +1900,28 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role, /*** Validation for SCOPE **/ + if (adest != m_MDS_GET_ADEST) { + MDS_ADEST_INFO *adest_info = + (MDS_ADEST_INFO *)ncs_patricia_tree_get( + &gl_mds_mcm_cb->adest_list, + (uint8_t *)&adest); + if (!adest_info) { + /* Add adest to adest list */ + adest_info = m_MMGR_ALLOC_ADEST_INFO; + memset(adest_info, 0, sizeof(MDS_ADEST_INFO)); + adest_info->adest = adest; + adest_info->node.key_info = + (uint8_t *)&adest_info->adest; + adest_info->svc_cnt = 1; + adest_info->tmr_start = false; + ncs_patricia_tree_add( + &gl_mds_mcm_cb->adest_list, + (NCS_PATRICIA_NODE *)adest_info); + } else { + adest_info->svc_cnt++; + } + } + status = mds_get_subtn_res_tbl_by_adest(local_svc_hdl, svc_id, vdest_id, adest, &log_subtn_result_info); @@ -3379,6 +3401,7 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role, static void start_mds_down_tmr(MDS_DEST adest, MDS_SVC_ID svc_id) { + int64_t timeout = MDS_DOWN_TMR_VAL; MDS_TMR_REQ_INFO *tmr_req_info = calloc(1, sizeof(MDS_TMR_REQ_INFO)); if (tmr_req_info == NULL) { m_MDS_LOG_ERR("mds_mcm_svc_down out of memory\n"); @@ -3400,8 +3423,9 @@ static void start_mds_down_tmr(MDS_DEST adest, MDS_SVC_ID svc_id) uint32_t tmr_hdl = ncshm_create_hdl(NCS_HM_POOL_ID_COMMON, NCS_SERVICE_ID_COMMON, (NCSCONTEXT)(tmr_req_info)); - - tmr_id = ncs_tmr_start(tmr_id, 1000, // 10ms unit + if (svc_id == 0) + timeout = MDS_ADEST_DOWN_TMR_VAL; + tmr_id = ncs_tmr_start(tmr_id, timeout, (TMR_CALLBACK)mds_tmr_callback, (void *)(long)(tmr_hdl), __FILE__, __LINE__); assert(tmr_id != NULL); @@ -3571,6 +3595,24 @@ uint32_t mds_mcm_svc_down(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role, /* Discard : Getting down before getting up */ } else { /* Entry exist in subscription result table */ + MDS_ADEST_INFO *adest_info = + (MDS_ADEST_INFO *)ncs_patricia_tree_get( + &gl_mds_mcm_cb->adest_list, + (uint8_t *)&adest); + if (adest_info) { + adest_info->svc_cnt--; + if (adest_info->svc_cnt == 0 && + adest_info->tmr_start == false) { + m_MDS_LOG_INFO( + "MCM:API: Adest <0x%08x, %u>" + " down timer start", + m_MDS_GET_NODE_ID_FROM_ADEST(adest), + m_MDS_GET_PROCESS_ID_FROM_ADEST(adest)); + adest_info->tmr_start = true; + start_mds_down_tmr(adest, 0); + } + } + if (vdest_id == m_VDEST_ID_FOR_ADEST_ENTRY) { status = mds_subtn_res_tbl_del( local_svc_hdl, svc_id, vdest_id, adest, @@ -4956,6 +4998,17 @@ uint32_t mds_mcm_init(void) return NCSCC_RC_FAILURE; } + /* ADEST TREE */ + memset(&pat_tree_params, 0, sizeof(NCS_PATRICIA_PARAMS)); + pat_tree_params.key_size