Re: [devel] [PATCH 1/1] lgs: fix memory leak reeported by valgrind [#3195]
Oh, I see. Thanks Thang. On 6/2/20 2:00 PM, Thang Duc Nguyen wrote: Hi Vu, Thanks for your comments. See my respond inline. B.R/Thang -Original Message- From: Vu Minh Nguyen Sent: Tuesday, June 2, 2020 12:14 PM To: Thang Duc Nguyen ; Thien Minh Huynh ; Thuan Tran ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [devel] [PATCH 1/1] lgs: fix memory leak reeported by valgrind [#3195] Hi Thang, See my comments inline. /Vu On 6/2/20 11:11 AM, thang.d.nguyen wrote: Fix definitely lost reported by valgrind. --- src/base/daemon.c | 2 -- src/log/logd/lgs_imm.cc | 8 src/log/logd/lgs_mbcsv.cc | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/base/daemon.c b/src/base/daemon.c index 48a0665f2..56f5aa8ff 100644 --- a/src/base/daemon.c +++ b/src/base/daemon.c @@ -57,7 +57,6 @@ #define DEFAULT_RUNAS_USERNAME "opensaf" -static const char *internal_version_id_; static char fifo_file[NAME_MAX]; static char __pidfile[NAME_MAX]; @@ -294,7 +293,6 @@ void daemonize(int argc, char *argv[]) char buf1[256 + sizeof("_SCHED_PRIORITY")] = {0}; char buf2[256 + sizeof("_SCHED_POLICY")] = {0}; - internal_version_id_ = strdup("@(#) $Id: " INTERNAL_VERSION_ID " $"); [Vu] I think this line is intentional. Refer to this commit for more info. [Thang]: OK. Will keep. Will find another way. commit 892322c6810e892e12c1042f076069c33745c6ca Author: Hans Nordeback Date: Tue Jan 7 14:57:46 2014 +0100 osaf: Improve fault analyse by using current changeset when configuring osaf [#676] Current changeset is visible in the syslog, in corefiles using ident, and also in the amfd and amfnd binaries (use ident). Updated wih review comments. if (argc > 0 && argv != NULL) { __parse_options(argc, argv); diff --git a/src/log/logd/lgs_imm.cc b/src/log/logd/lgs_imm.cc index 9094be5f3..1889a57b3 100644 --- a/src/log/logd/lgs_imm.cc +++ b/src/log/logd/lgs_imm.cc @@ -3027,6 +3027,14 @@ SaAisErrorT lgs_imm_init_configStreams(lgs_cb_t *cb) { } done: + /* Free memory allocated for attribute descriptions */ om_rc = + saImmOmClassDescriptionMemoryFree_2(omHandle, attr_definitions); if + (om_rc != SA_AIS_OK) { +LOG_NO("saImmOmClassDescriptionMemoryFree_2() Fail %s", + saf_error(om_rc)); +goto done; + } [Vu] I think the memory is freed when the search handle or om handle is finalized in below lines. I is probably mentioned in SAF spec. You can refer to it for more info. [Thang]: I think these memory allocate for these attribtutes can not release by finalize. There are some services still free like that. And mem leak was reported if not free. A sample code that invoked free explicitely. You can refer to get_SaLogStreamConfig_default( ) /Vu + /* Do not abort if error when finalizing */ om_rc = immutil_saImmOmSearchFinalize(immSearchHandle); if (om_rc != SA_AIS_OK) { diff --git a/src/log/logd/lgs_mbcsv.cc b/src/log/logd/lgs_mbcsv.cc index 6ec004f0a..ad98b1036 100644 --- a/src/log/logd/lgs_mbcsv.cc +++ b/src/log/logd/lgs_mbcsv.cc @@ -341,6 +341,7 @@ uint32_t edp_ed_open_stream_rec(EDU_HDL *edu_hdl, EDU_TKN *edu_tkn, } else { ckpt_open_stream_msg_ptr = static_cast(ptr); } +osafassert(ckpt_open_stream_msg_ptr != NULL); rc = m_NCS_EDU_RUN_RULES(edu_hdl, edu_tkn, ckpt_open_stream_rec_ed_rules, ckpt_open_stream_msg_ptr, ptr_data_len, buf_env, ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] lgs: fix memory leak reeported by valgrind [#3195]
Hi Thang, See my comments inline. /Vu On 6/2/20 11:11 AM, thang.d.nguyen wrote: Fix definitely lost reported by valgrind. --- src/base/daemon.c | 2 -- src/log/logd/lgs_imm.cc | 8 src/log/logd/lgs_mbcsv.cc | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/base/daemon.c b/src/base/daemon.c index 48a0665f2..56f5aa8ff 100644 --- a/src/base/daemon.c +++ b/src/base/daemon.c @@ -57,7 +57,6 @@ #define DEFAULT_RUNAS_USERNAME "opensaf" -static const char *internal_version_id_; static char fifo_file[NAME_MAX]; static char __pidfile[NAME_MAX]; @@ -294,7 +293,6 @@ void daemonize(int argc, char *argv[]) char buf1[256 + sizeof("_SCHED_PRIORITY")] = {0}; char buf2[256 + sizeof("_SCHED_POLICY")] = {0}; - internal_version_id_ = strdup("@(#) $Id: " INTERNAL_VERSION_ID " $"); [Vu] I think this line is intentional. Refer to this commit for more info. commit 892322c6810e892e12c1042f076069c33745c6ca Author: Hans Nordeback Date: Tue Jan 7 14:57:46 2014 +0100 osaf: Improve fault analyse by using current changeset when configuring osaf [#676] Current changeset is visible in the syslog, in corefiles using ident, and also in the amfd and amfnd binaries (use ident). Updated wih review comments. if (argc > 0 && argv != NULL) { __parse_options(argc, argv); diff --git a/src/log/logd/lgs_imm.cc b/src/log/logd/lgs_imm.cc index 9094be5f3..1889a57b3 100644 --- a/src/log/logd/lgs_imm.cc +++ b/src/log/logd/lgs_imm.cc @@ -3027,6 +3027,14 @@ SaAisErrorT lgs_imm_init_configStreams(lgs_cb_t *cb) { } done: + /* Free memory allocated for attribute descriptions */ + om_rc = saImmOmClassDescriptionMemoryFree_2(omHandle, attr_definitions); + if (om_rc != SA_AIS_OK) { +LOG_NO("saImmOmClassDescriptionMemoryFree_2() Fail %s", + saf_error(om_rc)); +goto done; + } [Vu] I think the memory is freed when the search handle or om handle is finalized in below lines. I is probably mentioned in SAF spec. You can refer to it for more info. /Vu + /* Do not abort if error when finalizing */ om_rc = immutil_saImmOmSearchFinalize(immSearchHandle); if (om_rc != SA_AIS_OK) { diff --git a/src/log/logd/lgs_mbcsv.cc b/src/log/logd/lgs_mbcsv.cc index 6ec004f0a..ad98b1036 100644 --- a/src/log/logd/lgs_mbcsv.cc +++ b/src/log/logd/lgs_mbcsv.cc @@ -341,6 +341,7 @@ uint32_t edp_ed_open_stream_rec(EDU_HDL *edu_hdl, EDU_TKN *edu_tkn, } else { ckpt_open_stream_msg_ptr = static_cast(ptr); } +osafassert(ckpt_open_stream_msg_ptr != NULL); rc = m_NCS_EDU_RUN_RULES(edu_hdl, edu_tkn, ckpt_open_stream_rec_ed_rules, ckpt_open_stream_msg_ptr, ptr_data_len, buf_env, ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] lgs: fix data CkptPushAsync equals with encode cold sync [#3183]
Ack with minor comments. On 5/11/20 2:11 PM, thien.m.huynh wrote: When NFS is unavailable, client try to write to log. Lgs server will put it into the queue with the time. At this time, standby node startup and cold sync. Cause of coredump due to duplicate data (CkptPushAsync) to put queue is NULL. The fix is adding a parametar CkptPushAsync into DecodeColdSync to get data for ckpt_proc_push_async more correctly. [Vu] The description is unclear to me. Here is my suggestion: The standby logsv is crashed during cold sync if having pending write requests in the queue. That happens because the CkptPushAsync data for decoding is referring to wrong data. The fix is to map the CkptPushAsync to the right memory. [Vu] The slogan should be updated too. --- src/log/logd/lgs_cache.cc | 10 +++--- src/log/logd/lgs_cache.h | 4 ++-- src/log/logd/lgs_mbcsv.cc | 6 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/log/logd/lgs_cache.cc b/src/log/logd/lgs_cache.cc index e3583e97c..ca25e681c 100644 --- a/src/log/logd/lgs_cache.cc +++ b/src/log/logd/lgs_cache.cc @@ -324,8 +324,8 @@ int Cache::EncodeColdSync(NCS_UBAID* uba) const { } int Cache::DecodeColdSync(NCS_UBAID* uba, lgsv_ckpt_header_t* header, - void* vdata, void** vckpt_rec, - size_t ckpt_rec_size) const { + CkptPushAsync* pasync, void* vdata, + void** vckpt_rec, size_t ckpt_rec_size) const { TRACE_ENTER(); assert(is_active() == false && "This instance does not run with standby role"); if (lgs_is_peer_v8() == false) return NCSCC_RC_SUCCESS; @@ -344,14 +344,10 @@ int Cache::DecodeColdSync(NCS_UBAID* uba, lgsv_ckpt_header_t* header, uint32_t num_rec = header->num_ckpt_records; int rc = NCSCC_RC_SUCCESS; EDU_ERR ederror; - lgsv_ckpt_msg_v8_t msg_v8; - auto data = &msg_v8.ckpt_rec.push_async; - CkptPushAsync* cache_data; while (num_rec) { -cache_data = data; rc = m_NCS_EDU_EXEC(&lgs_cb->edu_hdl, EncodeDecodePushAsync, uba, EDP_OP_TYPE_DEC, -&cache_data, &ederror); +&pasync, &ederror); if (rc != NCSCC_RC_SUCCESS) { m_NCS_EDU_PRINT_ERROR_STRING(ederror); return rc; diff --git a/src/log/logd/lgs_cache.h b/src/log/logd/lgs_cache.h index a5d6181fb..98ea6791b 100644 --- a/src/log/logd/lgs_cache.h +++ b/src/log/logd/lgs_cache.h @@ -251,8 +251,8 @@ class Cache { int EncodeColdSync(NCS_UBAID* uba) const; // Decode the queue on stanby side. int DecodeColdSync(NCS_UBAID* uba, lgsv_ckpt_header_t* header, - void* vdata, void** vckpt_rec, - size_t ckpt_rec_size) const; + CkptPushAsync* pasync, void* vdata, + void** vckpt_rec, size_t ckpt_rec_size) const; private: // Private constructor to not allow to instantiate this object directly, diff --git a/src/log/logd/lgs_mbcsv.cc b/src/log/logd/lgs_mbcsv.cc index 6ec004f0a..7d097fc28 100644 --- a/src/log/logd/lgs_mbcsv.cc +++ b/src/log/logd/lgs_mbcsv.cc @@ -1677,6 +1677,7 @@ static uint32_t ckpt_decode_cold_sync(lgs_cb_t *cb, NCS_MBCSV_CB_ARG *cbk_arg) { size_t ckpt_rec_size; void *vdata; EDU_PROG_HANDLER edp_function_reg = edp_ed_reg_rec; + CkptPushAsync *pasync{nullptr}; TRACE_ENTER(); /* @@ -1690,6 +1691,7 @@ static uint32_t ckpt_decode_cold_sync(lgs_cb_t *cb, NCS_MBCSV_CB_ARG *cbk_arg) { initialize_client_rec_ptr = &data_v9->ckpt_rec.initialize_client; stream_open_rec_ptr = &data_v9->ckpt_rec.stream_open; vdata = data_v9; +pasync = &data_v9->ckpt_rec.push_async; vckpt_rec = &data_v9->ckpt_rec; ckpt_rec_size = sizeof(data_v9->ckpt_rec); edp_function_reg = edp_ed_reg_rec_v6; @@ -1699,6 +1701,7 @@ static uint32_t ckpt_decode_cold_sync(lgs_cb_t *cb, NCS_MBCSV_CB_ARG *cbk_arg) { initialize_client_rec_ptr = &data_v8->ckpt_rec.initialize_client; stream_open_rec_ptr = &data_v8->ckpt_rec.stream_open; vdata = data_v8; +pasync = &data_v8->ckpt_rec.push_async; vckpt_rec = &data_v8->ckpt_rec; ckpt_rec_size = sizeof(data_v8->ckpt_rec); edp_function_reg = edp_ed_reg_rec_v6; @@ -1806,7 +1809,8 @@ static uint32_t ckpt_decode_cold_sync(lgs_cb_t *cb, NCS_MBCSV_CB_ARG *cbk_arg) { } /*End while, stream records */ rc = Cache::instance()->DecodeColdSync(&cbk_arg->info.decode.i_uba, header, - vdata, &vckpt_rec, ckpt_rec_size); + pasync, vdata, &vckpt_rec, + ckpt_rec_size); if (rc != NCSCC_RC_SUCCESS) { LOG_NO("DecodeColdSync failed"); goto done; ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-deve
Re: [devel] [PATCH 1/1] lgs: update time in queue every node is active [#3180]
Ack. However, the patch's slogan is a bit unclear to me. ;) Thanks, Vu On 4/24/20 3:31 PM, thang.d.nguyen wrote: When NFS is unavailable, client try to write to log. Lgs server will put it into the queue with the time. This info will check point with standby. Switch-over happens, and NFS is available again. New active will get the diffent time b/w current time and the time put into the queue. The timer on node can be different and it causes the coredump due to the current time less then time put into the queue. Once node is active, it must update the time put into the queue to make it consistent on local node. --- src/log/logd/lgs_cache.cc | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/log/logd/lgs_cache.cc b/src/log/logd/lgs_cache.cc index d6a282e48..e3583e97c 100644 --- a/src/log/logd/lgs_cache.cc +++ b/src/log/logd/lgs_cache.cc @@ -124,7 +124,11 @@ Cache::Data::Data(std::shared_ptr info, Cache::Data::Data(const CkptPushAsync* data) { TRACE_ENTER(); param_ = std::make_shared(data); - queue_at_ = data->queue_at; + // Don't inherit the queue_at_ from the active node, + // since the timer on both nodes may be different. + // Queue_at now is about when the element is actually + // put into the queue of each logsv instance. + queue_at_ = base::TimespecToNanos(base::ReadMonotonicClock()); seq_id_ = data->seq_id; log_record_ = strdup(data->log_record); size_ = strlen(log_record_); ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] log: log content is placed in a file of another stream [#3175]
Ack Thanks, Vu On 4/13/20 9:48 AM, thien.m.huynh wrote: Replace a previous patch of this ticket due to regex is not yet fully supported by gcc 4.8. --- src/log/logd/lgs_filehdl.cc | 53 +++-- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/src/log/logd/lgs_filehdl.cc b/src/log/logd/lgs_filehdl.cc index 9f4e27979..1743c53fd 100644 --- a/src/log/logd/lgs_filehdl.cc +++ b/src/log/logd/lgs_filehdl.cc @@ -26,7 +26,6 @@ #include #include #include -#include #include "base/logtrace.h" #include "base/osaf_time.h" @@ -955,6 +954,19 @@ static int chr_cnt_b(char *str, char c, int lim) { return cnt; } +/** + * Check if all character is decimal digit + * @param data string to be checked + * @param size number of characters to check + * @return: true if all character is decimal digit + */ +static bool all_digits(const char *data, int size) { + for (int i = 0; i < size; i++) { +if (!isdigit(data[i])) return false; + } + return true; +} + /** * Filter function used by scandir. * Find a current log file if it exist @@ -965,13 +977,38 @@ static int chr_cnt_b(char *str, char c, int lim) { /* Filename prefix (no timestamps or extension */ static std::string file_name_find_g; static int filter_logfile_name(const struct dirent *finfo) { - const std::string pattern = - "^" + file_name_find_g + "_[0-9]{8}_[0-9]{6}.log$"; - const std::regex e{pattern}; - if (std::regex_match(finfo->d_name, e)) { -return 1; - } - return 0; + size_t name_len = strlen(file_name_find_g.c_str()); + size_t fixed_length = name_len + strlen("_mmdd_hhmmss.log"); + + if (strlen(finfo->d_name) != fixed_length) return 0; + + size_t day_length = strlen("mmdd"); + size_t time_length = strlen("hhmmss"); + int day_offset = 1 + name_len; + int time_offset = 1 + day_offset + day_length; + int extension_offset = time_offset + time_length; + + bool start_with_filename = + strncmp(finfo->d_name, file_name_find_g.c_str(), name_len) == 0; + if (start_with_filename == false) return 0; + + bool underscore_1 = finfo->d_name[day_offset - 1] == '_'; + if (underscore_1 == false) return 0; + + bool mmdd_format = all_digits(finfo->d_name + day_offset, day_length); + if (mmdd_format == false) return 0; + + bool underscore_2 = finfo->d_name[time_offset - 1] == '_'; + if (underscore_2 == false) return 0; + + bool hhmmss_format = all_digits(finfo->d_name + time_offset, time_length); + if (hhmmss_format == false) return 0; + + bool log_extension = + strncmp(finfo->d_name + extension_offset, ".log", strlen(".log")) == 0; + if (log_extension == false) return 0; + + return 1; } /** ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] log: log content is placed in a file of another stream [#3175]
Ack. Regards, Vu On 4/7/20 4:33 PM, thien.m.huynh wrote: --- src/log/logd/lgs_filehdl.cc | 15 --- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/log/logd/lgs_filehdl.cc b/src/log/logd/lgs_filehdl.cc index 0d7fb2b74..362df4c27 100644 --- a/src/log/logd/lgs_filehdl.cc +++ b/src/log/logd/lgs_filehdl.cc @@ -26,6 +26,7 @@ #include #include #include +#include #include "base/logtrace.h" #include "base/osaf_time.h" @@ -964,13 +965,13 @@ static int chr_cnt_b(char *str, char c, int lim) { /* Filename prefix (no timestamps or extension */ static std::string file_name_find_g; static int filter_logfile_name(const struct dirent *finfo) { - int found = 0; - - if ((strstr(finfo->d_name, file_name_find_g.c_str()) != nullptr) && - (strstr(finfo->d_name, ".log") != nullptr)) -found = 1; - - return found; + const std::string pattern = + "^" + file_name_find_g + "_[0-9]{8}_[0-9]{6}.log$"; + const std::regex e{pattern}; + if (std::regex_match(finfo->d_name, e)) { +return true; + } + return false; } /** ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] log: log content is placed in a file of another stream [#3175]
Hi Thien, Given file_name_find_g = "testme". Is filter_logfile_name() expected to return true if finfo->d_name = "testme_20200407_143120_20200407_153120.log"? or finfo->d_name = "testme_2.log.log"? To me, it is better to use regular expression to assure that it only returns true if finfo->d_name matches the pattern: d_name starts with `file_name_find_g`, then follows by '_MMdd_HHmmss' time format and ends with '.log' extension|.|| E.g: |#include static std::string file_name_find_g; static int filter_logfile_name(const struct dirent *finfo) { const std::string pattern = "^" + file_name_find_g + "_[0-9]{8}_[0-9]{6}.log$"; const std::regex e{pattern}; if (std::regex_match(finfo->d_name, e)) { return true; } return false; } On 4/7/20 1:21 PM, thien.m.huynh wrote: --- src/log/logd/lgs_filehdl.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/log/logd/lgs_filehdl.cc b/src/log/logd/lgs_filehdl.cc index 0d7fb2b74..238259454 100644 --- a/src/log/logd/lgs_filehdl.cc +++ b/src/log/logd/lgs_filehdl.cc @@ -965,8 +965,10 @@ static int chr_cnt_b(char *str, char c, int lim) { static std::string file_name_find_g; static int filter_logfile_name(const struct dirent *finfo) { int found = 0; + int len = file_name_find_g.length(); - if ((strstr(finfo->d_name, file_name_find_g.c_str()) != nullptr) && + if ((strncmp(finfo->d_name, file_name_find_g.c_str(), len) == 0) && + isdigit(finfo->d_name[len + 1]) && (strstr(finfo->d_name, ".log") != nullptr)) found = 1; ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] base: add serial number arithmetic (RFC1982) [#3074]
Hi Thuan, Thanks. See my responses inline. Regards, Vu On 2/14/20 11:48 AM, Thuan Tran wrote: Hi Vu, Thanks for comments. Please check my replies inline. *Best Regards,* *ThuanTr*** *From:* Nguyen Minh Vu *Sent:* Thursday, February 13, 2020 5:50 PM *To:* Thuan Tran ; Minh Hon Chau ; Thang Duc Nguyen ; Gary Lee *Cc:* opensaf-devel@lists.sourceforge.net *Subject:* Re: [PATCH 1/1] base: add serial number arithmetic (RFC1982) [#3074] Hi Thuan, Ack with comments inline. Regards, Vu On 2/12/20 5:36 PM, thuan.tran wrote: - Adapt MDS with this SNA implementation. --- src/base/Makefile.am | 6 +- src/base/sna.h | 136 +++ src/base/tests/sna_test.cc | 117 ++ src/mds/mds_tipc_fctrl_intf.cc | 2 +- src/mds/mds_tipc_fctrl_portid.cc | 17 ++-- src/mds/mds_tipc_fctrl_portid.h | 64 +-- 6 files changed, 267 insertions(+), 75 deletions(-) create mode 100644 src/base/sna.h create mode 100644 src/base/tests/sna_test.cc diff --git a/src/base/Makefile.am b/src/base/Makefile.am index 025fb86a2..5082175cf 100644 --- a/src/base/Makefile.am +++ b/src/base/Makefile.am @@ -173,7 +173,8 @@ noinst_HEADERS += \ src/base/unix_client_socket.h \ src/base/unix_server_socket.h \ src/base/unix_socket.h \ - src/base/usrbuf.h + src/base/usrbuf.h \ + src/base/sna.h TESTS += bin/testleap bin/libbase_test bin/core_common_test @@ -237,7 +238,8 @@ bin_libbase_test_SOURCES = \ src/base/tests/time_compare_test.cc \ src/base/tests/time_convert_test.cc \ src/base/tests/time_subtract_test.cc \ - src/base/tests/unix_socket_test.cc + src/base/tests/unix_socket_test.cc \ + src/base/tests/sna_test.cc bin_libbase_test_LDADD = \ $(GTEST_DIR)/lib/libgtest.la \ diff --git a/src/base/sna.h b/src/base/sna.h new file mode 100644 index 0..fee6627bb --- /dev/null +++ b/src/base/sna.h @@ -0,0 +1,136 @@ +/* -*- OpenSAF -*- + * + * Copyright Ericsson AB 2020 - All Rights Reserved. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed + * under the GNU Lesser General Public License Version 2.1, February 1999. + * The complete license can be accessed from the following location: + *http://opensource.org/licenses/lgpl-license.php + * See the Copying file included with the OpenSAF distribution for full + * licensing terms. + * + * Reference: Serial Number Arithmetic from RFC1982 + * + */ + +#ifndef BASE_SNA_H_ +#define BASE_SNA_H_ + +#include +#include + +#define SNA16_MAX 65536 // 2^16 +#define SNA16_SPACE 32768 // (2^16)/2 +#define SNA32_MAX 4294967296 // 2^32 +#define SNA32_SPACE 2147483648 // (2^32)/2 [Vu] can use: #define SNA16_MAX (1 << 16) #define SNA32_MAX (1 << 32) SPACE ones probably are not necessary. See my comment for space() method below. [Thuan] Is there any benefit with 1 << 16 or 32? I think define a clear value is better. [Vu] the benefit is you won't need to add the comment e.g. // 2^16 About space(), it is intended because I don’t want CPU calculate every time refer to it. + +template +class _sna { [Vu] How about `class SerialNumber {}` [Thuan] OK, I will change the class name. + private: [Vu] Declaration order should start with a |public:| section, followed by |protected:|, then |private:| [Thuan] OK, will change order. + T i; [Vu] Should use a descriptive name. e.g: T value_ {0}; [Thuan] OK, will update the name. + uint64_t max() { + if (typeid(T) == typeid(uint64_t)) { + return SNA32_MAX; + } + if (typeid(T) == typeid(uint32_t)) { + return SNA16_MAX; + } + throw std::out_of_range("Invalid type"); [Vu] OpenSAF code does not handle exception. Should use assertion instead. e.g: assert(0 && "Invalid data type"); [Thuan] I throw to do basic test, if assert() then cannot test the case. Btw, I think throw exception without try catch will also end with terminate? [Vu] I think so; and if you you do tests on Seq16, Seq32, you probably won't reach exceptions. + } + uint64_t space() { + if (typeid(T) == typeid(uint64_t)) { + return SNA32_SPACE; + } + if (typeid(T) == typeid(uint32_t)) { + return SNA16_SPACE; + } + throw std::out_of_range("Invalid type"); [Vu] uint64_t space() { return max()/2;} [T
Re: [devel] [PATCH 1/1] base: add serial number arithmetic (RFC1982) [#3074]
Hi Thuan, Ack with comments inline. Regards, Vu On 2/12/20 5:36 PM, thuan.tran wrote: - Adapt MDS with this SNA implementation. --- src/base/Makefile.am | 6 +- src/base/sna.h | 136 +++ src/base/tests/sna_test.cc | 117 ++ src/mds/mds_tipc_fctrl_intf.cc | 2 +- src/mds/mds_tipc_fctrl_portid.cc | 17 ++-- src/mds/mds_tipc_fctrl_portid.h | 64 +-- 6 files changed, 267 insertions(+), 75 deletions(-) create mode 100644 src/base/sna.h create mode 100644 src/base/tests/sna_test.cc diff --git a/src/base/Makefile.am b/src/base/Makefile.am index 025fb86a2..5082175cf 100644 --- a/src/base/Makefile.am +++ b/src/base/Makefile.am @@ -173,7 +173,8 @@ noinst_HEADERS += \ src/base/unix_client_socket.h \ src/base/unix_server_socket.h \ src/base/unix_socket.h \ - src/base/usrbuf.h + src/base/usrbuf.h \ + src/base/sna.h TESTS += bin/testleap bin/libbase_test bin/core_common_test @@ -237,7 +238,8 @@ bin_libbase_test_SOURCES = \ src/base/tests/time_compare_test.cc \ src/base/tests/time_convert_test.cc \ src/base/tests/time_subtract_test.cc \ - src/base/tests/unix_socket_test.cc + src/base/tests/unix_socket_test.cc \ + src/base/tests/sna_test.cc bin_libbase_test_LDADD = \ $(GTEST_DIR)/lib/libgtest.la \ diff --git a/src/base/sna.h b/src/base/sna.h new file mode 100644 index 0..fee6627bb --- /dev/null +++ b/src/base/sna.h @@ -0,0 +1,136 @@ +/* -*- OpenSAF -*- + * + * Copyright Ericsson AB 2020 - All Rights Reserved. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed + * under the GNU Lesser General Public License Version 2.1, February 1999. + * The complete license can be accessed from the following location: + * http://opensource.org/licenses/lgpl-license.php + * See the Copying file included with the OpenSAF distribution for full + * licensing terms. + * + * Reference: Serial Number Arithmetic from RFC1982 + * + */ + +#ifndef BASE_SNA_H_ +#define BASE_SNA_H_ + +#include +#include + +#define SNA16_MAX 65536 // 2^16 +#define SNA16_SPACE 32768 // (2^16)/2 +#define SNA32_MAX 4294967296 // 2^32 +#define SNA32_SPACE 2147483648 // (2^32)/2 [Vu] can use: #define SNA16_MAX (1 << 16) #define SNA32_MAX (1 << 32) SPACE ones probably are not necessary. See my comment for space() method below. + +template +class _sna { [Vu] How about `class SerialNumber {}` + private: [Vu] Declaration order should start with a |public:| section, followed by |protected:|, then |private:| + T i; [Vu] Should use a descriptive name. e.g: T value_ {0}; + uint64_t max() { +if (typeid(T) == typeid(uint64_t)) { + return SNA32_MAX; +} +if (typeid(T) == typeid(uint32_t)) { + return SNA16_MAX; +} +throw std::out_of_range("Invalid type"); [Vu] OpenSAF code does not handle exception. Should use assertion instead. e.g: assert(0 && "Invalid data type"); + } + uint64_t space() { +if (typeid(T) == typeid(uint64_t)) { + return SNA32_SPACE; +} +if (typeid(T) == typeid(uint32_t)) { + return SNA16_SPACE; +} +throw std::out_of_range("Invalid type"); [Vu] uint64_t space() { return max()/2;} + } + + public: + _sna(): i(0) {} + _sna(const _sna &t) { +i = t.i; + } + explicit _sna(const uint64_t &n) { +if ((n < 0) || (n > (max()-1))) + throw std::out_of_range("Invalid initial value"); [Vu] Use assert() instead of throwing exception. +i = n; + } + _sna& operator=(const _sna &t) { +// check for self-assignment +if (&t == this) + return *this; +i = t.i; +return *this; + } + T v() const { +return i; + } + _sna& operator+=(const uint64_t& n) { +if ((n < 0) || (n > (space() - 1))) + throw std::out_of_range("Invalid addition value"); +i = (i + n) % max(); +return *this; + } + friend _sna operator+(_sna m, const uint64_t& n) { [Vu] my suggestion: friend _sna operator+(const _sna& m, ...) { _sna s{m}; s += n; return s; } +m += n; +return m; + } + // prefix ++ + _sna& operator++() { +*this += 1; +return *this; + } + // postfix ++ + _sna operator++(int) { +_sna tmp(*this); +operator++(); +return tmp; + } + bool operator==(const _sna& rhs) { +return i == rhs.i; + } + bool operator==(const uint32_t val) { +return i == val; + } + bool operator!=(const _sna& rhs) { +return i != rhs.i; + } + bool operator<(const _sna& rhs) { +return (i < rhs.i && rhs.i - i < space()) || \ + (i > rhs.i && i - rhs.i > space()); + } + bool operator<=(const _sna& rhs) { +return *this == rhs || *this < rhs; + } + bool operator>(const _sna& rhs) { +re
Re: [devel] [PATCH 1/1] log: add default-value tag to saLogStreamFacilityId of SaLogStream class [#3150]
Ack Thanks, Vu On 2/10/20 5:48 PM, thien.m.huynh wrote: --- src/log/config/logsv_classes.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/log/config/logsv_classes.xml b/src/log/config/logsv_classes.xml index 8aa8e69c2..b891b5903 100644 --- a/src/log/config/logsv_classes.xml +++ b/src/log/config/logsv_classes.xml @@ -83,6 +83,7 @@ SA_UINT32_T SA_RUNTIME SA_CACHED + 16 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 0/5] Review Request for build: fix errors from gcc 9.x [#3134]
Hi Alex, Ack from me. Thanks. Regards, Vu On 2/3/20 10:39 PM, Alex Jones wrote: Summary: build: fix errors from gcc 9.x [#3134] Review request for Ticket(s): 3134 Peer Reviewer(s): Tran Pull request to: Affected branch(es): develop Development branch: ticket-3134 Base revision: 876fbce762044d49da8edbd6bfcb059ee59e748e Personal repository: git://git.code.sf.net/u/trguitar/review Impacted area Impact y/n Docs n Build system y RPM/packaging n Configuration files n Startup scripts n SAF services n OpenSAF services n Core libraries n Samples n Tests n Other n NOTE: Patch(es) contain lines longer than 80 characers Comments (indicate scope for each "y" above): - *** EXPLAIN/COMMENT THE PATCH SERIES HERE *** revision 1c9c9c9aa23f95939597b0e29055c94c24e2815a Author: Alex Jones Date: Mon, 3 Feb 2020 10:32:17 -0500 build: fix compile errors with gcc 9.x [#3134] Rework fixes in NTF and SMF. revision 560b3243c3bcd821ca67839de8a4ee2825422966 Author: Alex Jones Date: Mon, 3 Feb 2020 10:32:17 -0500 build: fix compile errors from gcc-9.x [#3134] more issues revision 2ccf53568405ea69bb5a1faf1a1eae9644702ab4 Author: Alex Jones Date: Mon, 3 Feb 2020 10:32:17 -0500 build: fix gcc-9.x compiler problems [#3134] more fixes revision 2bec1a88c54b6de9a8d49f98f3d2c1d97cc537a4 Author: Alex Jones Date: Mon, 3 Feb 2020 10:32:17 -0500 build: fix errors from gcc 9.x [#3134] More compiler fixes revision 17a27e953d743bb712f0c377091ee14c2e659b25 Author: Alex Jones Date: Mon, 3 Feb 2020 10:32:17 -0500 build: fix errors from gcc 9.x [#3134] Mostly strncpy and strncat problems. Complete diffstat: -- src/base/daemon.c | 1 + src/ckpt/ckptd/cpd_imm.c | 4 +- src/ckpt/ckptnd/cpnd_res.c | 2 +- src/clm/clmd/clms_imm.cc | 2 +- src/dtm/dtmnd/dtm_intra_svc.cc | 2 +- src/evt/evtd/eds_ll.c | 4 +- src/imm/agent/imma_oi_api.cc | 3 +- src/imm/agent/imma_om_api.cc | 14 ++ src/imm/apitest/management/populate.c | 2 +- .../apitest/management/test_saImmOmClassCreate_2.c | 16 +++ src/imm/common/immpbe_dump.cc | 2 +- src/imm/immd/immd_amf.c | 2 +- src/imm/immloadd/imm_loader.cc | 10 ++-- src/imm/immloadd/imm_pbe_load.cc | 7 +-- src/imm/immnd/immnd_amf.c | 2 +- src/imm/immnd/immnd_evt.c | 8 ++-- src/imm/tools/imm_cfg.c | 2 +- src/imm/tools/imm_import.cc | 16 +++ src/lck/lckd/gld_imm.c | 2 +- src/log/agent/lga_agent.cc | 2 +- src/log/apitest/logtest.c | 2 +- src/log/apitest/tet_LogOiOps.c | 4 +- src/log/logd/lgs_dest.cc | 6 +-- src/log/logd/lgs_util.cc | 10 ++-- src/mds/mds_c_api.c | 4 +- src/msg/common/mqsv_common.c | 1 + src/msg/msgnd/mqnd_evt.c | 1 + src/msg/msgnd/mqnd_imm.c | 5 +- src/msg/msgnd/mqnd_proc.c | 1 + src/ntf/apitest/test_ntf_imcn.cc | 53 -- src/plm/apitest/test_saPlmReadinessTrack.c | 24 +- src/plm/plmcd/plmc_read_config.c | 22 - src/plm/plmcd/plmcd.c | 2 +- src/plm/plmd/plms_imm.c | 5 +- src/rde/rded/rde_rda.cc | 2 +- src/smf/smfd/SmfUtils.cc | 14 ++ src/smf/smfd/smfd_amf.cc | 2 +- 37 files changed, 137 insertions(+), 124 deletions(-) Testing Commands: - *** LIST THE COMMAND LINE TOOLS/STEPS TO TEST YOUR CHANGES *** Testing, Expected Results: -- *** PASTE COMMAND OUTPUTS / TEST RESULTS *** Conditions of Submission: - *** HOW MANY DAYS BEFORE PUSHING, CONSENSUS ETC *** Arch Built Started Linux distro --- mips n n mips64 n n x86 n n x86_64 n n powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These have to be removed. ___ You have needlessly changed whitespace or added whitespace crimes like trailing spaces, or spaces before tabs. ___ You have mixed real technical changes with whitespace and other cosmetic code cleanup changes. These have to be separate commits. ___ You need to refactor your submission into logical chunks; there is too much content into a single commit. ___ You have extraneous garbage in your review
Re: [devel] [PATCH 1/5] build: fix errors from gcc 9.x [#3134]
Hi Alex, Ack with a minor comment below. Thanks. Regards, Vu On 2/3/20 10:39 PM, Alex Jones wrote: Mostly strncpy and strncat problems. --- src/base/daemon.c | 1 + src/ckpt/ckptd/cpd_imm.c | 4 ++-- src/ckpt/ckptnd/cpnd_res.c | 2 +- src/clm/clmd/clms_imm.cc | 2 +- src/dtm/dtmnd/dtm_intra_svc.cc | 2 +- src/evt/evtd/eds_ll.c | 4 ++-- src/imm/agent/imma_oi_api.cc | 3 +-- src/imm/agent/imma_om_api.cc | 14 --- src/imm/apitest/management/populate.c | 2 +- .../management/test_saImmOmClassCreate_2.c | 16 ++--- src/imm/immd/immd_amf.c | 2 +- src/imm/immloadd/imm_loader.cc | 10 src/imm/immnd/immnd_amf.c | 2 +- src/imm/immnd/immnd_evt.c | 8 +++ src/imm/tools/imm_cfg.c | 2 +- src/imm/tools/imm_import.cc | 16 + src/lck/lckd/gld_imm.c | 2 +- src/log/agent/lga_agent.cc | 2 +- src/log/apitest/logtest.c | 2 +- src/log/apitest/tet_LogOiOps.c | 4 ++-- src/log/logd/lgs_dest.cc | 6 ++--- src/log/logd/lgs_util.cc | 10 src/mds/mds_c_api.c | 4 ++-- src/msg/common/mqsv_common.c | 1 + src/msg/msgnd/mqnd_evt.c | 1 + src/msg/msgnd/mqnd_imm.c | 5 ++-- src/msg/msgnd/mqnd_proc.c | 1 + src/plm/apitest/test_saPlmReadinessTrack.c | 24 +-- src/plm/plmcd/plmc_read_config.c | 22 - src/plm/plmcd/plmcd.c | 2 +- src/plm/plmd/plms_imm.c | 5 ++-- src/rde/rded/rde_rda.cc | 2 +- src/smf/smfd/SmfUtils.cc | 14 --- src/smf/smfd/smfd_amf.cc | 2 +- 34 files changed, 95 insertions(+), 104 deletions(-) diff --git a/src/base/daemon.c b/src/base/daemon.c index e24eaaaf0..f8e284fa1 100644 --- a/src/base/daemon.c +++ b/src/base/daemon.c @@ -510,6 +510,7 @@ void daemonize(int argc, char *argv[]) void daemonize_as_user(const char *username, int argc, char *argv[]) { strncpy(__runas_username, username, sizeof(__runas_username)); + __runas_username[sizeof(__runas_username) - 1] = '\0'; daemonize(argc, argv); } diff --git a/src/ckpt/ckptd/cpd_imm.c b/src/ckpt/ckptd/cpd_imm.c index af5cc29ec..e2dee0c2b 100644 --- a/src/ckpt/ckptd/cpd_imm.c +++ b/src/ckpt/ckptd/cpd_imm.c @@ -138,8 +138,8 @@ cpd_saImmOiRtAttrUpdateCallback(SaImmOiHandleT immOiHandle, ckpt_name = strdup(object_name); } - TRACE_4("ckpt_name: %s", ckpt_name); - TRACE_4("node_name: %s", node_name); + TRACE_4("ckpt_name: %s", ckpt_name ? ckpt_name : "n/a"); + TRACE_4("node_name: %s", node_name ? node_name : "n/a"); cpd_ckpt_map_node_get(&cb->ckpt_map_tree, ckpt_name, &map_info); diff --git a/src/ckpt/ckptnd/cpnd_res.c b/src/ckpt/ckptnd/cpnd_res.c index 3d69f3f3f..3e97495a9 100644 --- a/src/ckpt/ckptnd/cpnd_res.c +++ b/src/ckpt/ckptnd/cpnd_res.c @@ -422,7 +422,7 @@ void *cpnd_restart_shm_create(NCS_OS_POSIX_SHM_REQ_INFO *cpnd_open_req, cpnd_open_req->info.open.i_flags = O_CREAT | O_RDWR; rc = ncs_os_posix_shm(cpnd_open_req); if (NCSCC_RC_FAILURE == rc) { - LOG_ER("cpnd open request fail for RDWR mode %s", buf); + LOG_ER("cpnd open request fail for RDWR mode %s", buffer); m_MMGR_FREE_CPND_DEFAULT(buffer); return NULL; } diff --git a/src/clm/clmd/clms_imm.cc b/src/clm/clmd/clms_imm.cc index 017607d74..46b045faa 100644 --- a/src/clm/clmd/clms_imm.cc +++ b/src/clm/clmd/clms_imm.cc @@ -227,7 +227,7 @@ CLMS_CLUSTER_NODE *clms_node_new(SaNameT *name, } else if (!strcmp(attr->attrName, "saClmNodeAddress")) { node->node_addr.length = (SaUint16T)strlen(*((char **)value)); strncpy((char *)node->node_addr.value, *((char **)value), - node->node_addr.length); + node->node_addr.length + 1); } else if (!strcmp(attr->attrName, "saClmNodeEE")) { SaNameT *name = (SaNameT *)value; size_t nameLen = osaf_extended_name_length(name); diff --git a/src/dtm/dtmnd/dtm_intra_svc.cc b/src/dtm/dtmnd/dtm_intra_svc.cc index 1affd65d3..cf38e4544 100644 --- a/src/dtm/dtmnd/dtm_intra_svc.cc +++ b/src/dtm/dtmnd/dtm_intra_svc.cc @@ -1523,7 +1523,7 @@ uint32_t dtm_intranode_process_node_up(NODE_ID node_id, char *node_name, uint8_t buffer[DTM_LIB_NODE_UP_MSG_SIZE_FULL]; node_up_msg.node_id = node_id; node_up_msg.i_addr_family = i_addr_family; - strncpy(node_up_msg.node_ip, node_ip, INET6_ADDRSTRLEN); + strncpy(node_up_msg.node_ip, node_ip, INET6_ADDRSTRLEN - 1); strncpy(node_up_msg.node_name, node_db_info->node_name, _POSIX_HOST_NAME_MAX); TRACE("DTM: node_ip:%s, node_id:%u i_addr_family:%d ", node_up_msg.node_ip, diff --git a/src/evt/evtd/eds_ll.c b/src/evt/evtd/eds_ll.c index 7db4839cd..6e559e963 100644 --- a/src/evt/evtd/eds_ll.c +++ b/src/evt/evtd/eds_ll.c @@ -1802,7 +1802,7 @@ uint32_t eds_channel_close(EDS_CB *cb, uint32_t reg_id, uint32_t chan_id, TRACE("use count is zero"); chan_name.length = strlen((char *)wp->cname); strncpy((char *)chan_name.value, (char *)wp->cname, - chan_name.length); + chan_name.length + 1); if ((wp->chan_attrib & CHANNEL_UNLINKED) || (true == forced)) { TRACE( "forced flag is set 'or' CHANNEL is marked as CHANNEL_UNLINKED"); @@ -1874,7 +1874,7 @@ uint32_t eds_channel_unlink(EDS_CB *cb, uint32_t chan_name_len, "Use count is zero, delete the and IMM object"); channel_name.length = strlen((char *)wp->cname
Re: [devel] [PATCH 1/3] log: make facility ID configurable [#3131]
Hi Thien, Ack with comments. Regards, Vu On 1/20/20 2:59 PM, thien.m.huynh wrote: Streaming log record is packaged in RFC5424 format and they all carry a fixed facility ID (16). Therefore, log record receiver i.e. rsyslogd is not able to filter log records on their facility id such as which ones are regular logs or which ones are security. The ticket is to have the facility ID configurable so that the receiver can differentiate among types of log records based on that number. Configure facility id via a new attribute `saLogStreamFacilityId` in class `SaLogStreamConfig`. The valid range is [0-23]. The default value is 16 to keep the streaming feature backward compatible. Always keep the value of attribute `saLogStreamFacilityId` in sync with the standby. If user deletes value of the attribute `saLogStreamFacilityId`, it will go back to the default value (16). --- .../test_saImmOiRtObjectCreate_2.c| 9 +- .../implementer/test_saImmOiRtObjectDelete.c | 4 +- .../test_saImmOiRtObjectUpdate_2.c| 4 +- src/log/Makefile.am | 2 + src/log/config/logsv_classes.xml | 14 + src/log/logd/lgs_cache.cc | 16 +- src/log/logd/lgs_dest.cc | 1 + src/log/logd/lgs_dest.h | 6 +- src/log/logd/lgs_evt.cc | 8 +- src/log/logd/lgs_imm.cc | 83 +++- src/log/logd/lgs_mbcsv.cc | 370 +++--- src/log/logd/lgs_mbcsv.h | 33 +- src/log/logd/lgs_mbcsv_cache.cc | 15 +- src/log/logd/lgs_mbcsv_v2.cc | 2 +- src/log/logd/lgs_mbcsv_v3.cc | 2 +- src/log/logd/lgs_mbcsv_v5.cc | 4 +- src/log/logd/lgs_mbcsv_v9.cc | 242 src/log/logd/lgs_mbcsv_v9.h | 67 src/log/logd/lgs_recov.cc | 10 + src/log/logd/lgs_stream.cc| 13 +- src/log/logd/lgs_stream.h | 1 + src/log/logd/lgs_unixsock_dest.cc | 5 +- 22 files changed, 718 insertions(+), 193 deletions(-) create mode 100644 src/log/logd/lgs_mbcsv_v9.cc create mode 100644 src/log/logd/lgs_mbcsv_v9.h diff --git a/src/imm/apitest/implementer/test_saImmOiRtObjectCreate_2.c b/src/imm/apitest/implementer/test_saImmOiRtObjectCreate_2.c index df260a9d5..709332954 100644 --- a/src/imm/apitest/implementer/test_saImmOiRtObjectCreate_2.c +++ b/src/imm/apitest/implementer/test_saImmOiRtObjectCreate_2.c @@ -59,9 +59,11 @@ static SaImmAttrValuesT_2 v10 = {"saLogStreamSeverityFilter", SA_IMM_ATTR_SAUINT32T, 1, (void **)int1Values}; static SaImmAttrValuesT_2 v11 = {"saLogStreamCreationTimestamp", SA_IMM_ATTR_SATIMET, 1, (void **)lint1Values}; +static SaImmAttrValuesT_2 v12 = {"saLogStreamFacilityId", + SA_IMM_ATTR_SAUINT32T, 1, (void **)int1Values}; static const SaImmAttrValuesT_2 *attrValues[] = { -&v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, NULL}; +&v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, NULL}; static const SaImmClassNameT className = "SaLogStream"; void saImmOiRtObjectCreate_2_01(void) @@ -188,8 +190,9 @@ void saImmOiRtObjectCreate_2_07(void) // const SaNameT *nameValues27[] = {&rdnObj27}; const SaImmAttrValuesT_2 v27 = {"safLgStr", SA_IMM_ATTR_SASTRINGT, 1, (void **)nameValues27}; - const SaImmAttrValuesT_2 *attrValues27[] = { - &v1, &v27, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, NULL}; + const SaImmAttrValuesT_2 *attrValues27[] = {&v1, &v27, &v3, &v4, &v5, + &v6, &v7, &v8, &v9, &v10, + &v11, &v12, NULL}; SaNameT tmpName; diff --git a/src/imm/apitest/implementer/test_saImmOiRtObjectDelete.c b/src/imm/apitest/implementer/test_saImmOiRtObjectDelete.c index 986ec88aa..341d5144a 100644 --- a/src/imm/apitest/implementer/test_saImmOiRtObjectDelete.c +++ b/src/imm/apitest/implementer/test_saImmOiRtObjectDelete.c @@ -59,9 +59,11 @@ static SaImmAttrValuesT_2 v10 = {"saLogStreamSeverityFilter", SA_IMM_ATTR_SAUINT32T, 1, (void **)int1Values}; static SaImmAttrValuesT_2 v11 = {"saLogStreamCreationTimestamp", SA_IMM_ATTR_SATIMET, 1, (void **)lint1Values}; +static SaImmAttrValuesT_2 v12 = {"saLogStreamFacilityId", + SA_IMM_ATTR_SAUINT32T, 1, (void **)int1Values}; static const SaImmAttrValuesT_2 *attrValues[] = { -&v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, NULL}; +&v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, NULL}; static const SaImmClassNameT className = "SaLogStream
Re: [devel] [PATCH 2/3] log: Add test cases of make facility id configurable [#3131]
Hi Thien, Ack with comments inline. Regards, Vu On 1/20/20 2:59 PM, thien.m.huynh wrote: Adding 05 new test case into a new testsuite 22 --- src/log/Makefile.am | 7 +- src/log/apitest/log_server.cc | 43 +++ src/log/apitest/log_server.h | 35 +++ .../apitest/tet_saLogStreamConfigFacilityId.c | 273 ++ src/log/tests/lgs_dest_test.cc| 3 + 5 files changed, 359 insertions(+), 2 deletions(-) create mode 100644 src/log/apitest/log_server.cc create mode 100644 src/log/apitest/log_server.h create mode 100644 src/log/apitest/tet_saLogStreamConfigFacilityId.c diff --git a/src/log/Makefile.am b/src/log/Makefile.am index a2a98baec..2dad3cfb1 100644 --- a/src/log/Makefile.am +++ b/src/log/Makefile.am @@ -190,7 +190,8 @@ bin_PROGRAMS += bin/logtest bin/saflogtest bin/logtestfr noinst_HEADERS += \ src/log/apitest/logtest.h \ src/log/apitest/logutil.h \ - src/log/apitest/imm_tstutil.h + src/log/apitest/imm_tstutil.h \ + src/log/apitest/log_server.h bin_logtest_CFLAGS = $(AM_CFLAGS) -Wformat=1 @@ -227,7 +228,9 @@ bin_logtest_SOURCES = \ src/log/apitest/tet_Log_clm.c \ src/log/apitest/tet_cfg_destination.c \ src/log/apitest/tet_multiple_thread.c \ - src/log/apitest/tet_saLogWriteLogAsync_cache.c + src/log/apitest/tet_saLogWriteLogAsync_cache.c \ + src/log/apitest/tet_saLogStreamConfigFacilityId.c \ + src/log/apitest/log_server.cc bin_logtest_LDADD = \ lib/libapitest.la \ diff --git a/src/log/apitest/log_server.cc b/src/log/apitest/log_server.cc new file mode 100644 index 0..25f7894e0 --- /dev/null +++ b/src/log/apitest/log_server.cc @@ -0,0 +1,43 @@ +/* -*- OpenSAF -*- + * + * Copyright Ericsson AB 2020 - All Rights Reserved. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed + * under the GNU Lesser General Public License Version 2.1, February 1999. + * The complete license can be accessed from the following location: + * http://opensource.org/licenses/lgpl-license.php + * See the Copying file included with the OpenSAF distribution for full + * licensing terms. + * + * Author(s): Ericsson AB + * + */ + +#include "log/apitest/log_server.h" +#include "base/unix_server_socket.h" + +static base::UnixServerSocket* server = nullptr; + +void StartUnixServer() { + server = new base::UnixServerSocket("/tmp/test.sock", + base::UnixSocket::kBlocking, 0777); + server->fd(); +} + +bool FindPRI(const char* pri_field, const char* msg) { + char buf[1024]; + bool found = false; + do { +memset(buf, 0, sizeof(buf)); +size_t len = server->Recv(buf, 1024); +buf[len] = '\0'; +if (!strncmp(buf, pri_field, strlen(pri_field))) { + found = true; +} + } while (!strstr(buf, msg)); + return found; +} [Vu] The while loop may be blocked forever. Should add a mechanism to break the loop based on number of retries. + +void StopUnixServer() { delete server; } diff --git a/src/log/apitest/log_server.h b/src/log/apitest/log_server.h new file mode 100644 index 0..e36f885fa --- /dev/null +++ b/src/log/apitest/log_server.h @@ -0,0 +1,35 @@ +/* -*- OpenSAF -*- + * + * Copyright Ericsson AB 2020 - All Rights Reserved. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed + * under the GNU Lesser General Public License Version 2.1, February 1999. + * The complete license can be accessed from the following location: + * http://opensource.org/licenses/lgpl-license.php + * See the Copying file included with the OpenSAF distribution for full + * licensing terms. + * + * Author(s): Ericsson AB + * + */ + +#ifndef LOG_APITEST_LOG_SERVER_H_ +#define LOG_APITEST_LOG_SERVER_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +void StartUnixServer(); +bool FindPRI(const char* pri_field, const char* msg); +void StopUnixServer(); + +#ifdef __cplusplus +} // closing brace for extern "C" +#endif + +#endif // LOG_APITEST_LOG_SERVER_H_ diff --git a/src/log/apitest/tet_saLogStreamConfigFacilityId.c b/src/log/apitest/tet_saLogStreamConfigFacilityId.c new file mode 100644 index 0..276642d69 --- /dev/null +++ b/src/log/apitest/tet_saLogStreamConfigFacilityId.c @@ -0,0 +1,273 @@ +/* -*- OpenSAF -*- + * + * Copyright Ericsson AB 2020 - All Rights Reserved. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed + * unde
Re: [devel] [PATCH 1/1] imm: imm_adm tool accepts NOT_EXIST [#3141]
Hi Thang, Ack. Thanks, Vu On 1/15/20 12:46 PM, thang.d.nguyen wrote: In case 2 adm ops invoked on the same object with the same adm owner name. These adm op used imm_adm tool. Adm op 1 is interuptted. IMM will delete adm owne name when hard finalize of adm op 1. Then adm op 2 done, imm_adm tool needs accept err NOT_EXIST when release admin owner release. --- src/imm/tools/imm_admin.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/imm/tools/imm_admin.c b/src/imm/tools/imm_admin.c index 4c658d3dd..283ee71c9 100644 --- a/src/imm/tools/imm_admin.c +++ b/src/imm/tools/imm_admin.c @@ -557,7 +557,8 @@ int main(int argc, char *argv[]) if (releaseAdmo) { error = immutil_saImmOmAdminOwnerRelease( ownerHandle, objectNames, SA_IMM_ONE); - if (error != SA_AIS_OK) { + if (error != SA_AIS_OK && + error != SA_AIS_ERR_NOT_EXIST) { fprintf( stderr, "error - saImmOmAdminOwnerRelease FAILED: %s\n", ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/3] log: make facility ID configurable [#3131]
Hi Thien, See my comments inline. Regards, Vu On 1/16/20 11:08 AM, thien.m.huynh wrote: Streaming log record is packaged in RFC5424 format and they all carry a fixed facility ID (16). Therefore, log record receiver i.e. rsyslogd is not able to filter log records on their facility id such as which ones are regular logs or which ones are security. The ticket to have the facility ID configurable so that the receiver can [Vu] the ticket is to have ... differentiate among types of log records based on that number. Configure facility id via a new attribute `saLogStreamFacilityId` in class `SaLogStreamConfig`. The valid range is [0-23]. The default value is 16 to keep the streaming feature backward compatible. Always keep the value of attribute `saLogStreamFacilityId` in sync with the standby. If user deletes value of the attribute `saLogStreamFacilityId`, it will go back to the default value (16). --- src/log/Makefile.am | 2 + src/log/config/logsv_classes.xml | 14 ++ src/log/logd/lgs_cache.cc | 1 + src/log/logd/lgs_dest.cc | 1 + src/log/logd/lgs_dest.h | 6 +- src/log/logd/lgs_imm.cc | 74 - src/log/logd/lgs_mbcsv.cc | 187 +-- src/log/logd/lgs_mbcsv.h | 5 +- src/log/logd/lgs_mbcsv_v9.cc | 243 ++ src/log/logd/lgs_mbcsv_v9.h | 67 src/log/logd/lgs_recov.cc | 10 ++ src/log/logd/lgs_stream.cc| 9 ++ src/log/logd/lgs_stream.h | 1 + src/log/logd/lgs_unixsock_dest.cc | 7 +- 14 files changed, 610 insertions(+), 17 deletions(-) create mode 100644 src/log/logd/lgs_mbcsv_v9.cc create mode 100644 src/log/logd/lgs_mbcsv_v9.h diff --git a/src/log/Makefile.am b/src/log/Makefile.am index 767e25369..a2a98baec 100644 --- a/src/log/Makefile.am +++ b/src/log/Makefile.am @@ -87,6 +87,7 @@ noinst_HEADERS += \ src/log/logd/lgs_mbcsv_v3.h \ src/log/logd/lgs_mbcsv_v5.h \ src/log/logd/lgs_mbcsv_v6.h \ + src/log/logd/lgs_mbcsv_v9.h \ src/log/logd/lgs_oi_admin.h \ src/log/logd/lgs_recov.h \ src/log/logd/lgs_stream.h \ @@ -151,6 +152,7 @@ bin_osaflogd_SOURCES = \ src/log/logd/lgs_mbcsv_v3.cc \ src/log/logd/lgs_mbcsv_v5.cc \ src/log/logd/lgs_mbcsv_v6.cc \ + src/log/logd/lgs_mbcsv_v9.cc \ src/log/logd/lgs_mds.cc \ src/log/logd/lgs_oi_admin.cc \ src/log/logd/lgs_recov.cc \ diff --git a/src/log/config/logsv_classes.xml b/src/log/config/logsv_classes.xml index 084e8915d..8aa8e69c2 100644 --- a/src/log/config/logsv_classes.xml +++ b/src/log/config/logsv_classes.xml @@ -78,6 +78,12 @@ SA_UINT64_T SA_RUNTIME + + saLogStreamFacilityId + SA_UINT32_T + SA_RUNTIME + SA_CACHED + SA_CONFIG @@ -171,6 +177,14 @@ SA_UINT64_T SA_RUNTIME + + saLogStreamFacilityId + SA_UINT32_T + SA_CONFIG + SA_WRITABLE + SA_STRONG_DEFAULT + 16 +
Re: [devel] [PATCH 2/3] log: Add test cases of make facility id configurable [#3131]
Hi Thien, See my comments inline. Regards, Vu On 1/16/20 11:08 AM, thien.m.huynh wrote: Adding 05 new test case into a new testsuite 22 --- .../test_saImmOiRtObjectCreate_2.c| 9 +- .../implementer/test_saImmOiRtObjectDelete.c | 4 +- .../test_saImmOiRtObjectUpdate_2.c| 4 +- src/log/Makefile.am | 7 +- src/log/apitest/log_server.cc | 47 +++ src/log/apitest/log_server.h | 35 +++ .../apitest/tet_saLogStreamConfigFacilityId.c | 279 ++ src/log/tests/lgs_dest_test.cc| 3 + 8 files changed, 381 insertions(+), 7 deletions(-) create mode 100644 src/log/apitest/log_server.cc create mode 100644 src/log/apitest/log_server.h create mode 100644 src/log/apitest/tet_saLogStreamConfigFacilityId.c diff --git a/src/imm/apitest/implementer/test_saImmOiRtObjectCreate_2.c b/src/imm/apitest/implementer/test_saImmOiRtObjectCreate_2.c index df260a9d5..709332954 100644 --- a/src/imm/apitest/implementer/test_saImmOiRtObjectCreate_2.c +++ b/src/imm/apitest/implementer/test_saImmOiRtObjectCreate_2.c @@ -59,9 +59,11 @@ static SaImmAttrValuesT_2 v10 = {"saLogStreamSeverityFilter", SA_IMM_ATTR_SAUINT32T, 1, (void **)int1Values}; static SaImmAttrValuesT_2 v11 = {"saLogStreamCreationTimestamp", SA_IMM_ATTR_SATIMET, 1, (void **)lint1Values}; +static SaImmAttrValuesT_2 v12 = {"saLogStreamFacilityId", + SA_IMM_ATTR_SAUINT32T, 1, (void **)int1Values}; static const SaImmAttrValuesT_2 *attrValues[] = { -&v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, NULL}; +&v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, NULL}; static const SaImmClassNameT className = "SaLogStream"; void saImmOiRtObjectCreate_2_01(void) @@ -188,8 +190,9 @@ void saImmOiRtObjectCreate_2_07(void) // const SaNameT *nameValues27[] = {&rdnObj27}; const SaImmAttrValuesT_2 v27 = {"safLgStr", SA_IMM_ATTR_SASTRINGT, 1, (void **)nameValues27}; - const SaImmAttrValuesT_2 *attrValues27[] = { - &v1, &v27, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, NULL}; + const SaImmAttrValuesT_2 *attrValues27[] = {&v1, &v27, &v3, &v4, &v5, + &v6, &v7, &v8, &v9, &v10, + &v11, &v12, NULL}; SaNameT tmpName; diff --git a/src/imm/apitest/implementer/test_saImmOiRtObjectDelete.c b/src/imm/apitest/implementer/test_saImmOiRtObjectDelete.c index 986ec88aa..341d5144a 100644 --- a/src/imm/apitest/implementer/test_saImmOiRtObjectDelete.c +++ b/src/imm/apitest/implementer/test_saImmOiRtObjectDelete.c @@ -59,9 +59,11 @@ static SaImmAttrValuesT_2 v10 = {"saLogStreamSeverityFilter", SA_IMM_ATTR_SAUINT32T, 1, (void **)int1Values}; static SaImmAttrValuesT_2 v11 = {"saLogStreamCreationTimestamp", SA_IMM_ATTR_SATIMET, 1, (void **)lint1Values}; +static SaImmAttrValuesT_2 v12 = {"saLogStreamFacilityId", + SA_IMM_ATTR_SAUINT32T, 1, (void **)int1Values}; static const SaImmAttrValuesT_2 *attrValues[] = { -&v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, NULL}; +&v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, NULL}; static const SaImmClassNameT className = "SaLogStream"; void saImmOiRtObjectDelete_01(void) diff --git a/src/imm/apitest/implementer/test_saImmOiRtObjectUpdate_2.c b/src/imm/apitest/implementer/test_saImmOiRtObjectUpdate_2.c index 1a551fd9f..2f2b941dc 100644 --- a/src/imm/apitest/implementer/test_saImmOiRtObjectUpdate_2.c +++ b/src/imm/apitest/implementer/test_saImmOiRtObjectUpdate_2.c @@ -55,9 +55,11 @@ static SaImmAttrValuesT_2 v10 = {"saLogStreamSeverityFilter", SA_IMM_ATTR_SAUINT32T, 1, (void **)int1Values}; static SaImmAttrValuesT_2 v11 = {"saLogStreamCreationTimestamp", SA_IMM_ATTR_SATIMET, 1, (void **)lint1Values}; +static SaImmAttrValuesT_2 v12 = {"saLogStreamFacilityId", + SA_IMM_ATTR_SAUINT32T, 1, (void **)int1Values}; static const SaImmAttrValuesT_2 *attrValues[] = { -&v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, NULL}; +&v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, NULL}; static const SaImmClassNameT className = "SaLogStream"; diff --git a/src/log/Makefile.am b/src/log/Makefile.am index a2a98baec..2dad3cfb1 100644 --- a/src/log/Makefile.am +++ b/src/log/Makefile.am @@ -190,7 +190,8 @@ bin_PROGRAMS += bin/logtest bin/saflogtest bin/logtestfr noinst_HEADERS += \ src/log/apitest/logtest.h \ src/log/apitest/logutil.h \ - src/log/apitest/imm_tstutil.h + src/log/apitest/imm_tstutil.h \ + src/log/apitest/lo
Re: [devel] [PATCH 3/3] log: update README file for make facility id configurable [#3131]
Hi Thien, Ack this patch with a comment. Regards, Vu On 1/16/20 11:08 AM, thien.m.huynh wrote: --- src/log/README | 19 ++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/log/README b/src/log/README index 0d01e65d5..1d5469d6c 100644 --- a/src/log/README +++ b/src/log/README @@ -806,4 +806,21 @@ To test this feature, a GCC flag is added during compile time to simulate, SIMULATE_NFS_UNRESPONSE, the case the underlying file system is unresponsive, and it only takes effect when the cache size is given to a non-zero value. With that, the I/O thread will sleep *16 seconds* every 02 write requests. The flag -is disabled in default. \ No newline at end of file +is disabled in default. + + +5. Make facility ID configurable (#3131) +-- +Streaming log record is packaged in RFC5424 format and they all carry a fixed +facility ID (16). Therefore, log record receiver i.e. rsyslogd is not able to +filter log records on their facility id such as which ones are regular logs +or which ones are security. + +The ticket to have the facility ID configurable so that the receiver can +differentiate among types of log records based on that number. + +Configure facility id via a new attribute `saLogStreamFacilityId` in class +`SaLogStreamConfig`. The valid range is [0-23]. The default value is 16 to keep +the streaming feature backward compatible. Always keep the value of attribute +`saLogStreamFacilityId` in sync with the standby. If user deletes value of the +attribute `saLogStreamFacilityId`, it will go back to the default value (16). [Vu] I would suggest to replace the two paragraphs above with this one: To address the limitation above, a new configurable attribute `saLogStreamFacilityId` is added to `SaLogStreamConfig` class; the attribute represents the facility of every streaming log record. It has 16 (local use 0) in default and a valid value is within the range [0 - 23]. When deleting the value of that attribute, it will go back to the default value (16). ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 0/4] Review Request for fix compile errors from gcc-9.x [#3134]
Hi Alex, I've just tried to run tests with your patches, but got many failures. Most come from SMF/NTF tests. We will review your patches and come with comments later. Regards, Vu On 1/14/20 1:34 AM, Jones, Alex wrote: Summary: build: fix errors from gcc 9.x [#3134] Review request for Ticket(s): 3134 Peer Reviewer(s): *** LIST THE TECH REVIEWER(S) / MAINTAINER(S) HERE *** Pull request to: *** LIST THE PERSON WITH PUSH ACCESS HERE *** Affected branch(es): develop Development branch: ticket-3134 Base revision: 740100f2ebfb5458a8052dea29b5583b3dc8df5a Personal repository: git://git.code.sf.net/u/trguitar/review Impacted area Impact y/n Docs n Build system n RPM/packaging n Configuration files n Startup scripts n SAF services n OpenSAF services n Core libraries n Samples n Tests n Other n NOTE: Patch(es) contain lines longer than 80 characers Comments (indicate scope for each "y" above): - *** EXPLAIN/COMMENT THE PATCH SERIES HERE *** revision 2058aa9385459406ab900296b68457ca6901dd22 Author:Alex Jones Date:Mon, 13 Jan 2020 13:15:40 -0500 build: fix compile errors from gcc-9.x [#3134] more issues revision d59ecf823b0b4ffa4ddf94f29d813bc6b48cef0e Author:Alex Jones Date:Mon, 13 Jan 2020 13:15:40 -0500 build: fix gcc-9.x compiler problems [#3134] more fixes revision b4bd95b5335341b57347b19dc9ae3f8b9d8b76ce Author:Alex Jones Date:Mon, 13 Jan 2020 13:15:40 -0500 build: fix errors from gcc 9.x [#3134] More compiler fixes revision d2e71dd1f444b2353b8ea23937b4183a9169603f Author:Alex Jones Date:Mon, 13 Jan 2020 13:15:40 -0500 build: fix errors from gcc 9.x [#3134] Mostly strncpy and strncat problems. Complete diffstat: -- src/base/daemon.c | 1 + src/ckpt/ckptd/cpd_imm.c | 4 +- src/ckpt/ckptnd/cpnd_res.c | 2 +- src/clm/clmd/clms_imm.cc | 2 +- src/dtm/dtmnd/dtm_intra_svc.cc | 2 +- src/evt/evtd/eds_ll.c | 4 +- src/imm/agent/imma_oi_api.cc | 3 +- src/imm/agent/imma_om_api.cc | 14 ++ src/imm/apitest/management/populate.c | 2 +- .../apitest/management/test_saImmOmClassCreate_2.c | 16 +++ src/imm/common/immpbe_dump.cc | 2 +- src/imm/immd/immd_amf.c | 2 +- src/imm/immloadd/imm_loader.cc | 10 ++-- src/imm/immloadd/imm_pbe_load.cc | 7 +-- src/imm/immnd/immnd_amf.c | 2 +- src/imm/immnd/immnd_evt.c | 8 ++-- src/imm/tools/imm_cfg.c | 2 +- src/imm/tools/imm_import.cc | 16 +++ src/lck/lckd/gld_imm.c | 2 +- src/log/agent/lga_agent.cc | 2 +- src/log/apitest/logtest.c | 2 +- src/log/apitest/tet_LogOiOps.c | 4 +- src/log/logd/lgs_dest.cc | 6 +-- src/log/logd/lgs_util.cc | 10 ++-- src/mds/mds_c_api.c | 4 +- src/msg/common/mqsv_common.c | 1 + src/msg/msgnd/mqnd_evt.c | 1 + src/msg/msgnd/mqnd_imm.c | 5 +- src/msg/msgnd/mqnd_proc.c | 1 + src/ntf/apitest/test_ntf_imcn.cc | 53 -- src/plm/apitest/test_saPlmReadinessTrack.c | 24 +- src/plm/plmcd/plmc_read_config.c | 22 - src/plm/plmcd/plmcd.c | 2 +- src/plm/plmd/plms_imm.c | 5 +- src/rde/rded/rde_rda.cc | 2 +- src/smf/smfd/SmfUtils.cc | 14 ++ src/smf/smfd/smfd_amf.cc | 2 +- 37 files changed, 137 insertions(+), 124 deletions(-) Testing Commands: - 1) compile code with gcc-9.x 2) run apitests from all the subsystems Testing, Expected Results: -- 1) all code compiles 2) all tests succeed Conditions of Submission: - Acks from developers Arch Built Started Linux distro --- mips n n mips64 n n x86 n n x86_64 y y powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These have to be removed. ___ You have needlessly changed whitespace or added whitespace crimes like trailing spaces, or spaces before tabs. ___ You have mixed real technical changes with whitespace and other cosmetic code cleanup changes. These have to be separate commits. ___ You need to refactor your submission into logical chunks; there is too much content into a single commit. ___ You have extraneous garbag
Re: [devel] [PATCH 1/1] dtm: rotate all logtraces on demand [#3133]
Hi Phuc, Ack. Thanks, Vu On 1/7/20 3:11 PM, phuc.h.chau wrote: Adding a new option '--all' that means to rotate all existing logtrace if given along with the '--rotate' option. This patch also corrects wrong indentation in osaflog.cc file. --- src/base/log_writer.h | 1 + src/dtm/README | 15 +- src/dtm/tools/osaflog.cc| 44 +++-- src/dtm/transport/log_server.cc | 12 ++- src/dtm/transport/log_server.h | 8 ++-- 5 files changed, 61 insertions(+), 19 deletions(-) diff --git a/src/base/log_writer.h b/src/base/log_writer.h index ab2bf32..abd6d47 100644 --- a/src/base/log_writer.h +++ b/src/base/log_writer.h @@ -47,6 +47,7 @@ class LogWriter { void Flush(); void RotateLog(); void SetLogFile(const std::string& log_file) { log_file_ = log_file; } + size_t file_size() const { return current_file_size_; } private: constexpr static const size_t kBufferSize = 128 * size_t{1024}; diff --git a/src/dtm/README b/src/dtm/README index 430ff19..ff73af7 100644 --- a/src/dtm/README +++ b/src/dtm/README @@ -190,6 +190,9 @@ Options: --delete Delete the specified LOGSTREAM(s) by removing allocated resources in the log server. Does not delete log files from disk. +--rotate Rotate the specified LOGSTREAM(s). +--all Rotate all LOGSTREAM(s). + This option only works with '--rotate'. --max-file-size=SIZE Set the maximum size of the log file to SIZE bytes. The log file will be rotated when it exceeds this size. Suffixes k, M and @@ -197,4 +200,14 @@ Options: gigabytes. --max-backups=NUM Set the maximum number of backup files to retain during log rotation to NUM. - +--extract-trace + If a process produces a core dump file has + THREAD_TRACE_BUFFER enabled, this option + reads the to extract the trace + strings in all threads and writes them to + the file. +--max-idle=NUMSet the maximum number of idle time to NUM" + minutes. If a stream has not been used for + the given time, the stream will be closed. + Given zero (default) to max-idle to disable + this functionality. diff --git a/src/dtm/tools/osaflog.cc b/src/dtm/tools/osaflog.cc index f6fa168..b1fb461 100644 --- a/src/dtm/tools/osaflog.cc +++ b/src/dtm/tools/osaflog.cc @@ -55,6 +55,7 @@ uint64_t Random64Bits(uint64_t seed); bool PrettyPrint(const std::string& log_stream); bool Delete(const std::string& log_stream); bool Rotate(const std::string& log_stream); +bool RotateAll(); std::list OpenLogFiles(const std::string& log_stream); std::string PathName(const std::string& log_stream, int suffix); uint64_t GetInode(int fd); @@ -72,6 +73,7 @@ int main(int argc, char** argv) { {"print", no_argument, nullptr, 'p'}, {"delete", no_argument, nullptr, 'd'}, {"rotate", no_argument, nullptr, 'r'}, + {"all", no_argument, nullptr, 'a'}, {"extract-trace", required_argument, 0, 'e'}, {"max-idle", required_argument, 0, 'i'}, {0, 0, 0, 0}}; @@ -93,6 +95,7 @@ int main(int argc, char** argv) { bool pretty_print_set = false; bool delete_set = false; bool rotate_set = false; + bool rotate_all = false; bool max_file_size_set = false; bool max_backups_set = false; bool max_idle_set = false; @@ -105,7 +108,7 @@ int main(int argc, char** argv) { exit(EXIT_FAILURE); } - while ((option = getopt_long(argc, argv, "m:b:p:f:e:i:r", + while ((option = getopt_long(argc, argv, "m:b:p:f:e:i:ra", long_options, &long_index)) != -1) { switch (option) { case 'p': @@ -121,6 +124,9 @@ int main(int argc, char** argv) { case 'r': rotate_set = true; break; + case 'a': +rotate_all = true; +break; case 'm': max_file_size = base::StrToUint64(optarg, &max_file_size_set); @@ -175,15 +181,15 @@ int main(int argc, char** argv) { pretty_print_set = true; flush_set = true; } - - if ((argc <= optind && (pretty_print_set || delete_set || rotate_set)) || - (pretty_print_set && delete_set)) { - PrintUsage(argv[0]); - exit(EXIT_FAILURE); - } - + if ((argc <= optind && (pretty_print_set || delete_set)) || + (pretty_print_set && delete_set) || + (rotate_all && !rotate_set) || + (argc == optind && ro
Re: [devel] [PATCH 1/1] log: fix segmentation fault in log agent [#3137]
Hi Minh, Thanks for your comments. All cases you mentioned should be protected by the mutex as well. I will send out the updated patch soon. Regards, Vu On 1/6/20 11:08 AM, Minh Hon Chau wrote: Hi Vu, Don't you need to protect the list in ~LogClient()? And in NotifyClientAboutLostInvocations(), does it need to protect before 'read' in the 'for' loop? Otherwise it's ack from me. Thanks Minh On 6/1/20 2:15 pm, Vu Minh Nguyen wrote: log agent did not protect the resource `unacked_invocations_ list` from accessing by multiple threads, so caused segmentation fault. This patch introduces a mutex in order to synchronize the access to that common resource. --- src/log/agent/lga_client.h | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/log/agent/lga_client.h b/src/log/agent/lga_client.h index f5fa6faa4..c999d148e 100644 --- a/src/log/agent/lga_client.h +++ b/src/log/agent/lga_client.h @@ -174,11 +174,15 @@ class LogClient { // get acknowledgement from it. void KeepTrack(SaInvocationT inv, uint32_t ack_flags) { if (ack_flags != SA_LOG_RECORD_WRITE_ACK) return; + base::Lock scope_lock{mutex_unacked_list_}; unacked_invocations_.push_back(inv); } // Got an acknowledgment, so remove from the track list. - void RemoveTrack(SaInvocationT inv) { unacked_invocations_.remove(inv); } + void RemoveTrack(SaInvocationT inv) { + base::Lock scope_lock{mutex_unacked_list_}; + unacked_invocations_.remove(inv); + } void NotifyClientAboutLostInvocations() { for (const auto& i : unacked_invocations_) { @@ -196,6 +200,8 @@ class LogClient { SendMsgToMbx(msg, MDS_SEND_PRIORITY_HIGH); } + + base::Lock scope_lock{mutex_unacked_list_}; unacked_invocations_.clear(); } @@ -290,6 +296,10 @@ class LogClient { // If cluster goes to headless, log agent will inform to log client with // SA_AIS_ERR_TRY_AGAIN code for these invocations. std::list unacked_invocations_{}; + + // To protect the `unacked_invocations_` list. + base::Mutex mutex_unacked_list_{}; + // LOG handle (derived from hdl-mngr) SaLogHandleT handle_; ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] dtm: rotate all logtraces on demand [#3133]
Hi Phuc, See my comments inline. Regards, Vu On 1/4/20 2:53 PM, phuc.h.chau wrote: Adding a new option '--all' that means to rotate all existing logtrace if given along with the '--rotate' option. --- src/dtm/tools/osaflog.cc| 37 - src/dtm/transport/log_server.cc | 11 +++ src/dtm/transport/log_server.h | 1 + 3 files changed, 40 insertions(+), 9 deletions(-) mode change 100644 => 100755 src/dtm/tools/osaflog.cc mode change 100644 => 100755 src/dtm/transport/log_server.cc mode change 100644 => 100755 src/dtm/transport/log_server.h diff --git a/src/dtm/tools/osaflog.cc b/src/dtm/tools/osaflog.cc old mode 100644 new mode 100755 index f6fa168..0efc989 --- a/src/dtm/tools/osaflog.cc +++ b/src/dtm/tools/osaflog.cc @@ -55,6 +55,7 @@ uint64_t Random64Bits(uint64_t seed); bool PrettyPrint(const std::string& log_stream); bool Delete(const std::string& log_stream); bool Rotate(const std::string& log_stream); +bool RotateAll(); std::list OpenLogFiles(const std::string& log_stream); std::string PathName(const std::string& log_stream, int suffix); uint64_t GetInode(int fd); @@ -72,6 +73,7 @@ int main(int argc, char** argv) { {"print", no_argument, nullptr, 'p'}, {"delete", no_argument, nullptr, 'd'}, {"rotate", no_argument, nullptr, 'r'}, + {"all", no_argument, nullptr, 'a'}, {"extract-trace", required_argument, 0, 'e'}, {"max-idle", required_argument, 0, 'i'}, {0, 0, 0, 0}}; @@ -86,6 +88,7 @@ int main(int argc, char** argv) { bool print_result = true; bool delete_result = true; bool rotate_result = true; + bool rotateall_result = true; bool max_file_size_result = true; bool number_of_backups_result = true; bool max_idle_result = true; @@ -93,6 +96,7 @@ int main(int argc, char** argv) { bool pretty_print_set = false; bool delete_set = false; bool rotate_set = false; + bool rotate_all = false; bool max_file_size_set = false; bool max_backups_set = false; bool max_idle_set = false; @@ -105,7 +109,7 @@ int main(int argc, char** argv) { exit(EXIT_FAILURE); } - while ((option = getopt_long(argc, argv, "m:b:p:f:e:i:r", + while ((option = getopt_long(argc, argv, "m:b:p:f:e:i:r:a", [Vu] optstring should be 'm:b:p:f:e:i:ra' as 'rotate' option requires no argument. long_options, &long_index)) != -1) { switch (option) { case 'p': @@ -121,6 +125,9 @@ int main(int argc, char** argv) { case 'r': rotate_set = true; break; + case 'a': +rotate_all = true; +break; case 'm': max_file_size = base::StrToUint64(optarg, &max_file_size_set); @@ -176,8 +183,10 @@ int main(int argc, char** argv) { flush_set = true; } - if ((argc <= optind && (pretty_print_set || delete_set || rotate_set)) || - (pretty_print_set && delete_set)) { + if ((argc <= optind && (pretty_print_set || delete_set)) || + (pretty_print_set && delete_set) || + (rotate_all && !rotate_set) || + (argc == optind && rotate_set && !rotate_all)) { PrintUsage(argv[0]); exit(EXIT_FAILURE); } @@ -195,10 +204,12 @@ int main(int argc, char** argv) { delete_result = Delete(argv[optind++]); } } - if (rotate_set == true) { -while (rotate_result && optind < argc) { - rotate_result = Rotate(argv[optind++]); -} + if (rotate_all == true && rotate_set == true) { + rotateall_result = RotateAll(); + } else { +while (rotate_result && optind < argc) { + rotate_result = Rotate(argv[optind++]); +} } [Vu] Wrong indentation. Each level should have 02 spaces. if (max_backups_set == true) { number_of_backups_result = NoOfBackupFiles(max_backups); @@ -209,8 +220,10 @@ int main(int argc, char** argv) { if (max_idle_set == true) { max_idle_result = SetMaxIdleTime(max_idle); } - if (flush_result && print_result && max_file_size_result && rotate_result && - delete_result && number_of_backups_result && max_idle_result) + + if (flush_result && print_result && max_file_size_result && + rotate_result && delete_result && number_of_backups_result && + max_idle_result && rotateall_result) exit(EXIT_SUCCESS); exit(EXIT_FAILURE); } @@ -237,6 +250,8 @@ void PrintUsage(const char* program_name) { " removing allocated resources in the log\n" " server. Does not delete log files from disk.\n" "--rotate Rotate the specified LOGSTREAM(s).\n" + "--all Rotate all existing LOGSTREAM(s)\n
Re: [devel] [PATCH 1/5] log: improve the resilience of log service [#3116]
Thanks Gary. I will update the code as your comments. Thanks, Vu On 12/24/19 6:02 AM, Gary Lee wrote: Hi Vu Very, very minor comments with [GL]. Thanks Gary -Original Message- From: Vu Minh Nguyen [mailto:vu.m.ngu...@dektech.com.au] Sent: Thursday, 28 November 2019 7:24 PM To: lennart.l...@ericsson.com; Gary Lee ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen Subject: [PATCH 1/5] log: improve the resilience of log service [#3116] In order to improve resilience of OpenSAF LOG service when underlying file system is unresponsive, a queue is introduced to hold async write request up to an configurable time that is around 15 - 30 seconds. The readiness of the I/O thread will periodically check, and if it turns to ready state, the front element will go first. Returns SA_AIS_ERR_TRY_AGAIN to client if the element stays in the queue longer than the setting time. The queue capacity and the resilient time are configurable via the attributes: `logMaxPendingWriteRequests` and `logResilienceTimeout`. In default, this feature is disabled to keep log server backward compatible. --- src/log/Makefile.am | 21 +- src/log/config/logsv_classes.xml | 43 ++- src/log/logd/lgs_cache.cc| 469 +++ src/log/logd/lgs_cache.h | 287 +++ src/log/logd/lgs_config.cc | 78 - src/log/logd/lgs_config.h| 10 +- src/log/logd/lgs_evt.cc | 161 +++ src/log/logd/lgs_evt.h | 10 + src/log/logd/lgs_file.cc | 8 +- src/log/logd/lgs_filehdl.cc | 58 ++-- src/log/logd/lgs_imm.cc | 40 ++- src/log/logd/lgs_main.cc | 24 +- src/log/logd/lgs_mbcsv.cc| 447 +++-- src/log/logd/lgs_mbcsv.h | 19 +- src/log/logd/lgs_mbcsv_cache.cc | 372 src/log/logd/lgs_mbcsv_cache.h | 110 src/log/logd/lgs_mbcsv_v1.cc | 1 + src/log/logd/lgs_mbcsv_v2.cc | 2 + 18 files changed, 1889 insertions(+), 271 deletions(-) create mode 100644 src/log/logd/lgs_cache.cc create mode 100644 src/log/logd/lgs_cache.h create mode 100644 src/log/logd/lgs_mbcsv_cache.cc create mode 100644 src/log/logd/lgs_mbcsv_cache.h diff --git a/src/log/Makefile.am b/src/log/Makefile.am index f63a4a053..3367ef4f6 100644 --- a/src/log/Makefile.am +++ b/src/log/Makefile.am @@ -95,7 +95,9 @@ noinst_HEADERS += \ src/log/logd/lgs_nildest.h \ src/log/logd/lgs_unixsock_dest.h \ src/log/logd/lgs_common.h \ - src/log/logd/lgs_amf.h + src/log/logd/lgs_amf.h \ + src/log/logd/lgs_cache.h \ + src/log/logd/lgs_mbcsv_cache.h bin_PROGRAMS += bin/saflogger @@ -123,6 +125,15 @@ bin_osaflogd_CPPFLAGS = \ -DSA_EXTENDED_NAME_SOURCE \ $(AM_CPPFLAGS) +# Enable this flag to simulate the case that file system is unresponsive +# during write log record. Mainly for testing the following enhancement: +# log: improve the resilience of log service [#3116]. +# When enabled, log handle thread will be suspended 17 seconds every 02 write +# requests and only take affect if the `logMaxPendingWriteRequests` is set +# to an non-zero value. +bin_osaflogd_CPPFLAGS += -DSIMULATE_NFS_UNRESPONSE + + bin_osaflogd_SOURCES = \ src/log/logd/lgs_amf.cc \ src/log/logd/lgs_clm.cc \ @@ -147,7 +158,9 @@ bin_osaflogd_SOURCES = \ src/log/logd/lgs_util.cc \ src/log/logd/lgs_dest.cc \ src/log/logd/lgs_nildest.cc \ - src/log/logd/lgs_unixsock_dest.cc + src/log/logd/lgs_unixsock_dest.cc \ + src/log/logd/lgs_cache.cc \ + src/log/logd/lgs_mbcsv_cache.cc bin_osaflogd_LDADD = \ lib/libosaf_common.la \ @@ -183,6 +196,10 @@ bin_logtest_CPPFLAGS = \ -DSA_EXTENDED_NAME_SOURCE \ $(AM_CPPFLAGS) +# Enable this flag to add test cases for following enhancement: +# log: improve the resilience of log service [#3116]. +bin_logtest_CPPFLAGS += -DSIMULATE_NFS_UNRESPONSE + bin_logtest_SOURCES = \ src/log/apitest/logtest.c \ src/log/apitest/logutil.c \ diff --git a/src/log/config/logsv_classes.xml b/src/log/config/logsv_classes.xml index 9359823ff..084e8915d 100644 --- a/src/log/config/logsv_classes.xml +++ b/src/log/config/logsv_classes.xml @@ -195,7 +195,7 @@ to ensure that default global values in the implementation are also changed acco SA_UINT32_T SA_CONFIG SA_WRITABLE -1024 + 1024 logStreamFileFormat @@ -208,42 +208,42 @@ to ensure that default global values in the implementation are also changed acco SA_UINT32_T SA_CONFIG SA_WRITABLE -0 + 0 lo
Re: [devel] [PATCH 4/5] log: update README file for improvement of log resilience [#3116]
Thanks Gary. I will update the README as your comments. Thanks, Vu On 12/24/19 6:11 AM, Gary Lee wrote: Hi Vu Very minor comments with [GL]. Gary -Original Message- From: Vu Minh Nguyen [mailto:vu.m.ngu...@dektech.com.au] Sent: Thursday, 28 November 2019 7:25 PM To: lennart.l...@ericsson.com; Gary Lee ; Minh Hon Chau Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen Subject: [PATCH 4/5] log: update README file for improvement of log resilience [#3116] --- src/log/README | 38 ++ 1 file changed, 38 insertions(+) diff --git a/src/log/README b/src/log/README index b83d472e4..ab96a8157 100644 --- a/src/log/README +++ b/src/log/README @@ -764,3 +764,41 @@ on AMF role is unnecessary delay the CLM state of a Node (CLM state will available as soon as CLM started), so LGS is a taking AVD Up event as trigger to do CLM initialize. + +4. Improve the resilience of OpenSAF LOG service (#3116) +- +When the file system is unresponsive, log client gets try-again from +write callback very shortly after I/O timeout reaches the setting; the [GL] "reaches the setting" sounds confusing. What setting? +value of I/O timeout is configurable via the attribute logFileIoTimeout +within this valid range [500ms – 5000ms]. This is legacy behavior. + +This ticket improves the resilience of LOG service, so that log service +can cache async write requests up to an configurable time that is [GL] a configurable +around 15-30 seconds before returning status to log client via write async callback. + +The cache size is configurable via a new attribute `logMaxPendingWriteRequests`. +Default value is zero (0) - means this feature is disabled. The valid +range is [current queue size - 1000]. To know what is the current size +of the queue, fetching the value of pure runtime attribute [GL] To find the current size of the queue, fetch the ... +`logCurrentPendingWriteRequests` of `OpenSafLogCurrentConfig` class. +When the cache size reaches the limit, all coming requests will get +acknowledgement right away with SA_AIS_ERR_TRY_AGAIN. + +The resilient timeout can also be configurable via a new attribute +`logResilienceTimeout`. The valid range is [15-30] seconds. When a +pending write async can be dropped and removed from the queue in cases: +a) Stays in the queue longer than the given resilient timeout. +b) The targeting stream has been closed. + +The queue is always kept in sync with standby. + +Besides, log agent has a light list keeping track all invocations which +not yet get acknowledgements from log server. If cluster goes to +headless; in other words, log server is disappeared and all cached data +has been lost, log agent (library) will notify all lost invocations to +log client via write async callback with SA_AIS_ERR_TRY_AGAIN error code. + +To test this feature, a gcc flag is added during compile time to +simulate the case the underlying file system is unresponsive, and it +only takes affect when the cache size is given to an non-zero value. [G][ it only takes effect when the cache size is set to a non-zero value +With that, the I/O thread will sleep *16 seconds* every 02 write requests. \ No newline at end of file -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 0/5] Review Request for log: improve the resilience of log service [#3116]
Hi all, Any comment from this patch series? I will push the patches by this Wed if no more comments. Thanks, Vu On 11/28/19 3:24 PM, Vu Minh Nguyen wrote: Summary: log: improve the resilience of log service [#3116] Review request for Ticket(s): 3116 Peer Reviewer(s): Lennart, Gary, Minh Pull request to: *** LIST THE PERSON WITH PUSH ACCESS HERE *** Affected branch(es): develop Development branch: ticket-3116 Base revision: 8e07c19aed63c249f4e7fa8470270d2de1a56046 Personal repository: git://git.code.sf.net/u/winhvu/review Impacted area Impact y/n Docsn Build systemn RPM/packaging n Configuration files n Startup scripts n SAF servicesy OpenSAF servicesn Core libraries n Samples n Tests n Other n NOTE: Patch(es) contain lines longer than 80 characers Comments (indicate scope for each "y" above): - *** EXPLAIN/COMMENT THE PATCH SERIES HERE *** revision 2643f8f829bb7ba638193df19cc6f20f86acb497 Author: Vu Minh Nguyen Date: Thu, 28 Nov 2019 15:19:58 +0700 log: add test cases of improving the log resilience [#3116] Adding 08 new test cases into 02 suites: 1) Suite 20 with 07 test cases, including: - Test changing queue size & resilient timeout; - Test if a write async is dropped if its timeout setting is overdue, also verify if log server has kept the request in proper time. - Test if getting write callback right away if the cache is full. - Test if the cache is fully and correctly synced with standby. 2) Suite 21 with one test case: Test if LOG agent notifies all lost invocation to log client. As the suite 21 requires manual interaction, it is put into 'extended' tests. Only run with option '-e'. revision 2a714a95f9f714684840047ae1e02d7c11bca32f Author: Vu Minh Nguyen Date: Thu, 28 Nov 2019 15:19:58 +0700 log: update README file for improvement of log resilience [#3116] revision a103db0f56db32a1abe6f43eda7f06d48b54d469 Author: Vu Minh Nguyen Date: Thu, 28 Nov 2019 15:19:58 +0700 saflogger: make timeout waiting for getting acknowledgment configurable [#3116] Introducing a new option `-t second` or `--timeout=second` to let user input his desired timeout of waiting for write async acknowledgment. Default timeout is 20 seconds to keep saflogger backward compatible. revision 54d46abd5c433ee884294ada6b277b04042f444f Author: Vu Minh Nguyen Date: Thu, 28 Nov 2019 15:19:58 +0700 log: notify all lost log records when cluster goes to headless [#3116] This change introduces a light list keeping all invocations that not yet get the acknowledgement from log server. If the server is disappeared in case of headless, log agent will notify all lost invocations to log client with error code SA_AIS_ERR_TRY_AGAIN. revision 6260044621a2ecfad70bd5eeece3e811dc032188 Author: Vu Minh Nguyen Date: Thu, 28 Nov 2019 15:19:58 +0700 log: improve the resilience of log service [#3116] In order to improve resilience of OpenSAF LOG service when underlying file system is unresponsive, a queue is introduced to hold async write request up to an configurable time that is around 15 - 30 seconds. The readiness of the I/O thread will periodically check, and if it turns to ready state, the front element will go first. Returns SA_AIS_ERR_TRY_AGAIN to client if the element stays in the queue longer than the setting time. The queue capacity and the resilient time are configurable via the attributes: `logMaxPendingWriteRequests` and `logResilienceTimeout`. In default, this feature is disabled to keep log server backward compatible. Added Files: src/log/apitest/tet_saLogWriteLogAsync_cache.c src/log/logd/lgs_cache.cc src/log/logd/lgs_cache.h src/log/logd/lgs_mbcsv_cache.cc src/log/logd/lgs_mbcsv_cache.h Complete diffstat: -- src/log/Makefile.am| 24 +- src/log/README | 38 ++ src/log/agent/lga_agent.cc | 2 + src/log/agent/lga_client.cc| 8 +- src/log/agent/lga_client.h | 34 ++ src/log/apitest/logtest.c | 7 + src/log/apitest/logtest.h | 7 +- src/log/apitest/logutil.c | 14 +- src/log/apitest/tet_log_runtime_cfgobj.c | 2 +- src/log/apitest/tet_saLogWriteLogAsync_cache.c | 648 + src/log/config/logsv_classes.xml | 43 +- src/log/logd/lgs_cache.cc | 469 ++ src/log/logd/lgs_cache.h | 287 +++ src/log/logd/lgs_config.cc | 78 ++- src/log/logd/lgs_config.h | 10 +- src/log/logd/lgs_evt.cc| 161 ++ src/log/l
Re: [devel] [PATCH 1/1] amfd: Fix the data types of attributes inconsistency in get_config() [#3128]
Hi Phuc, Ack. Thanks, Vu On 12/16/19 2:59 PM, phuc.h.chau wrote: In Amfd, for Configuration::get_config(), object osafAmfDelayNodeFailoverTimeout and osafAmfDelayNodeFailoverNodeWaitTimeout are time_t, but the method uses uint32_t to hold the values of those attributes it leads to the stack memory corrupted --- src/amf/amfd/config.cc | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) mode change 100644 => 100755 src/amf/amfd/config.cc diff --git a/src/amf/amfd/config.cc b/src/amf/amfd/config.cc old mode 100644 new mode 100755 index af72840..375f050 --- a/src/amf/amfd/config.cc +++ b/src/amf/amfd/config.cc @@ -43,20 +43,20 @@ static void ccb_apply_modify_hdlr(struct CcbUtilOperationData *opdata) { configuration->restrict_auto_repair(enabled); } else if (!strcmp(attr_mod->modAttr.attrName, "osafAmfDelayNodeFailoverTimeout")) { - uint32_t delay = 0; // default to 0 if attribute is blank + time_t delay = 0; // default to 0 if attribute is blank if (attr_mod->modType != SA_IMM_ATTR_VALUES_DELETE && attr_mod->modAttr.attrValues != nullptr) { -delay = (*((SaUint32T *)attr_mod->modAttr.attrValues[0])); +delay = (*((time_t *)attr_mod->modAttr.attrValues[0])); } avd_cb->node_failover_delay = delay; TRACE("osafAmfDelayNodeFailoverTimeout changed to '%llu'", avd_cb->node_failover_delay); } else if (!strcmp(attr_mod->modAttr.attrName, "osafAmfDelayNodeFailoverNodeWaitTimeout")) { - uint32_t delay = kDefaultNodeWaitTime; + time_t delay = kDefaultNodeWaitTime; if (attr_mod->modType != SA_IMM_ATTR_VALUES_DELETE && attr_mod->modAttr.attrValues != nullptr) { -delay = (*((SaUint32T *)attr_mod->modAttr.attrValues[0])); +delay = (*((time_t *)attr_mod->modAttr.attrValues[0])); } avd_cb->node_failover_node_wait = delay; TRACE("osafAmfDelayNodeFailoverNodeWaitTimeout changed to '%llu'", @@ -166,18 +166,19 @@ SaAisErrorT Configuration::get_config(void) { (SaImmAttrValuesT_2 ***)&attributes) == SA_AIS_OK) { uint32_t value; +time_t time_value; TRACE("reading configuration '%s'", osaf_extended_name_borrow(&dn)); if (immutil_getAttr("osafAmfRestrictAutoRepairEnable", attributes, 0, &value) == SA_AIS_OK) { configuration->restrict_auto_repair(static_cast(value)); } if (immutil_getAttr("osafAmfDelayNodeFailoverTimeout", attributes, 0, -&value) == SA_AIS_OK) { - avd_cb->node_failover_delay = value; +&time_value) == SA_AIS_OK) { + avd_cb->node_failover_delay = time_value; } if (immutil_getAttr("osafAmfDelayNodeFailoverNodeWaitTimeout", attributes, 0, -&value) == SA_AIS_OK) { - avd_cb->node_failover_node_wait = value; +&time_value) == SA_AIS_OK) { + avd_cb->node_failover_node_wait = time_value; } } ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 5/5] log: add test cases of improving the log resilience [#3116]
Hi Thuan, See my responses inline. Regards, Vu On 12/9/19 5:35 PM, Tran Thuan wrote: Hi Vu, Some comments from me: - Remove xid in test code. OK - Use AIS_EVALUATE() and SYS_EVALUATE() that you defined, but you still direct use test_validate()/rc_validate() The macros are used to validate the given pre-condition. If the condition does not meet, the test will get failed and discontinue. These are used for different purposes. - polling_thread() is create thread and wait it done, why not directly call the function? OK. Good point. - In case of reboot SCs for headless, can we use system() command with following? Then no need manual interaction. ssh SC-1 "rdegetrole |grep ACTIVE && reboot" ssh SC-1 "rdegetrole |grep STANDBY && reboot" ssh does not work on UML containers. Best Regards, ThuanTr -Original Message- From: Vu Minh Nguyen Sent: Thursday, November 28, 2019 3:25 PM To: lennart.l...@ericsson.com; gary@dektech.com.au; minh.c...@dektech.com.au Cc: opensaf-devel@lists.sourceforge.net Subject: [devel] [PATCH 5/5] log: add test cases of improving the log resilience [#3116] Adding 08 new test cases into 02 suites: 1) Suite 20 with 07 test cases, including: - Test changing queue size & resilient timeout; - Test if a write async is dropped if its timeout setting is overdue, also verify if log server has kept the request in proper time. - Test if getting write callback right away if the cache is full. - Test if the cache is fully and correctly synced with standby. 2) Suite 21 with one test case: Test if LOG agent notifies all lost invocation to log client. As the suite 21 requires manual interaction, it is put into 'extended' tests. Only run with option '-e'. --- src/log/Makefile.am | 3 +- src/log/apitest/logtest.c | 7 + src/log/apitest/logtest.h | 7 +- src/log/apitest/logutil.c | 14 +- src/log/apitest/tet_log_runtime_cfgobj.c | 2 +- .../apitest/tet_saLogWriteLogAsync_cache.c| 648 ++ 6 files changed, 667 insertions(+), 14 deletions(-) create mode 100644 src/log/apitest/tet_saLogWriteLogAsync_cache.c diff --git a/src/log/Makefile.am b/src/log/Makefile.am index 3367ef4f6..3ec03c097 100644 --- a/src/log/Makefile.am +++ b/src/log/Makefile.am @@ -224,7 +224,8 @@ bin_logtest_SOURCES = \ src/log/apitest/tet_log_longDN.c \ src/log/apitest/tet_Log_clm.c \ src/log/apitest/tet_cfg_destination.c \ - src/log/apitest/tet_multiple_thread.c + src/log/apitest/tet_multiple_thread.c \ + src/log/apitest/tet_saLogWriteLogAsync_cache.c bin_logtest_LDADD = \ lib/libapitest.la \ diff --git a/src/log/apitest/logtest.c b/src/log/apitest/logtest.c index aabd1e578..149d27d93 100644 --- a/src/log/apitest/logtest.c +++ b/src/log/apitest/logtest.c @@ -96,6 +96,7 @@ SaLogCallbacksT logCallbacks = {NULL, NULL, NULL}; SaInvocationT invocation = 0; SaSelectionObjectT selectionObject; char log_root_path[PATH_MAX]; +SaLogAckFlagsT ack_flags = 0; void init_logrootpath(void) { @@ -465,6 +466,9 @@ int main(int argc, char **argv) add_suite_14(); add_suite_15(); add_suite_16(); +#ifdef SIMULATE_NFS_UNRESPONSE + add_suite_21(); +#endif test_list(); exit(0); case 'e': @@ -493,6 +497,9 @@ int main(int argc, char **argv) add_suite_14(); add_suite_15(); add_suite_16(); +#ifdef SIMULATE_NFS_UNRESPONSE + add_suite_21(); +#endif break; case 'v': if (silent_flg == true) { diff --git a/src/log/apitest/logtest.h b/src/log/apitest/logtest.h index 68f9df608..e04492086 100644 --- a/src/log/apitest/logtest.h +++ b/src/log/apitest/logtest.h @@ -76,7 +76,7 @@ extern SaSelectionObjectT selectionObject; extern SaNameT logSvcUsrName; extern SaLogRecordT genLogRecord; extern char log_root_path[]; - +extern SaLogAckFlagsT ack_flags; const static SaVersionT kLogVersion = {'A', 0x02, 0x03}; const static SaVersionT kImmVersion = {'A', 02, 11}; @@ -105,6 +105,11 @@ void add_suite_12(void); void add_suite_14(); void add_suite_15(); void add_suite_16(); + +#ifdef SIMULATE_NFS_UNRESPONSE +void add_suite_21(); +#endif + int get_active_sc(void); int get_attr_value(SaNameT *inObjName, char *inAttr, void *outValue); diff --git a/src/log/apitest/logutil.c b/src/log/apitest/logutil.c index 59d255515..d3e0c6297 100644 --- a/src/log/apitest/logutil.c +++ b/src/log/apitest/logutil.c @@ -52,15 +52,7 @@ void cond_check(void) int systemCall(const char *command) { int rc = system(command); - if (rc == -1) { - fprintf(stderr, "system() retuned -1 Failed \n"); - } else { -
Re: [devel] [PATCH 1/5] log: improve the resilience of log service [#3116]
Hi Thuan, See my responses inline. Regards, Vu On 12/9/19 5:32 PM, Tran Thuan wrote: Hi Vu, Some comments from me: - I think need remove xid name in code. OK - CleanOverdueData() should loop to clean all overdue records stead of just one overdue record. No. It should only serve one element each time to avoid blocking the main thread. - In PeriodicCheck, don't need check is_iothread_ready() before Flush() because it is checked inside Flush() Ok. I will remove the check in `PeriodicCheck`. - Flush() mean write all records, but actually just try to write one log record, I think should rename it. Ok. Will rename it to 'FlushFrontElement`. Best Regards, ThuanTr -Original Message- From: Vu Minh Nguyen Sent: Thursday, November 28, 2019 3:24 PM To: lennart.l...@ericsson.com; gary@dektech.com.au; minh.c...@dektech.com.au Cc: opensaf-devel@lists.sourceforge.net Subject: [devel] [PATCH 1/5] log: improve the resilience of log service [#3116] In order to improve resilience of OpenSAF LOG service when underlying file system is unresponsive, a queue is introduced to hold async write request up to an configurable time that is around 15 - 30 seconds. The readiness of the I/O thread will periodically check, and if it turns to ready state, the front element will go first. Returns SA_AIS_ERR_TRY_AGAIN to client if the element stays in the queue longer than the setting time. The queue capacity and the resilient time are configurable via the attributes: `logMaxPendingWriteRequests` and `logResilienceTimeout`. In default, this feature is disabled to keep log server backward compatible. --- src/log/Makefile.am | 21 +- src/log/config/logsv_classes.xml | 43 ++- src/log/logd/lgs_cache.cc| 469 +++ src/log/logd/lgs_cache.h | 287 +++ src/log/logd/lgs_config.cc | 78 - src/log/logd/lgs_config.h| 10 +- src/log/logd/lgs_evt.cc | 161 +++ src/log/logd/lgs_evt.h | 10 + src/log/logd/lgs_file.cc | 8 +- src/log/logd/lgs_filehdl.cc | 58 ++-- src/log/logd/lgs_imm.cc | 40 ++- src/log/logd/lgs_main.cc | 24 +- src/log/logd/lgs_mbcsv.cc| 447 +++-- src/log/logd/lgs_mbcsv.h | 19 +- src/log/logd/lgs_mbcsv_cache.cc | 372 src/log/logd/lgs_mbcsv_cache.h | 110 src/log/logd/lgs_mbcsv_v1.cc | 1 + src/log/logd/lgs_mbcsv_v2.cc | 2 + 18 files changed, 1889 insertions(+), 271 deletions(-) create mode 100644 src/log/logd/lgs_cache.cc create mode 100644 src/log/logd/lgs_cache.h create mode 100644 src/log/logd/lgs_mbcsv_cache.cc create mode 100644 src/log/logd/lgs_mbcsv_cache.h diff --git a/src/log/Makefile.am b/src/log/Makefile.am index f63a4a053..3367ef4f6 100644 --- a/src/log/Makefile.am +++ b/src/log/Makefile.am @@ -95,7 +95,9 @@ noinst_HEADERS += \ src/log/logd/lgs_nildest.h \ src/log/logd/lgs_unixsock_dest.h \ src/log/logd/lgs_common.h \ - src/log/logd/lgs_amf.h + src/log/logd/lgs_amf.h \ + src/log/logd/lgs_cache.h \ + src/log/logd/lgs_mbcsv_cache.h bin_PROGRAMS += bin/saflogger @@ -123,6 +125,15 @@ bin_osaflogd_CPPFLAGS = \ -DSA_EXTENDED_NAME_SOURCE \ $(AM_CPPFLAGS) +# Enable this flag to simulate the case that file system is unresponsive +# during write log record. Mainly for testing the following enhancement: +# log: improve the resilience of log service [#3116]. +# When enabled, log handle thread will be suspended 17 seconds every 02 write +# requests and only take affect if the `logMaxPendingWriteRequests` is set +# to an non-zero value. +bin_osaflogd_CPPFLAGS += -DSIMULATE_NFS_UNRESPONSE + + bin_osaflogd_SOURCES = \ src/log/logd/lgs_amf.cc \ src/log/logd/lgs_clm.cc \ @@ -147,7 +158,9 @@ bin_osaflogd_SOURCES = \ src/log/logd/lgs_util.cc \ src/log/logd/lgs_dest.cc \ src/log/logd/lgs_nildest.cc \ - src/log/logd/lgs_unixsock_dest.cc + src/log/logd/lgs_unixsock_dest.cc \ + src/log/logd/lgs_cache.cc \ + src/log/logd/lgs_mbcsv_cache.cc bin_osaflogd_LDADD = \ lib/libosaf_common.la \ @@ -183,6 +196,10 @@ bin_logtest_CPPFLAGS = \ -DSA_EXTENDED_NAME_SOURCE \ $(AM_CPPFLAGS) +# Enable this flag to add test cases for following enhancement: +# log: improve the resilience of log service [#3116]. +bin_logtest_CPPFLAGS += -DSIMULATE_NFS_UNRESPONSE + bin_logtest_SOURCES = \ src/log/apitest/logtest.c \ src/log/apitest/logutil.c \ diff --git a/src/log/config/logsv_classes.xml b/src/log/config/logsv_classes.xml index 9359823ff..084e8915d 100644 --- a/src/log/config/logsv_classes.xml +++ b/src/log/config/logsv_classes.xml @@ -195,7 +195,7 @@ to ensure that default global values in the implementation are also changed acco SA_UINT32_T
Re: [devel] [PATCH 0/1] Review Request for mds: not waste 1.5s in waiting dead Adest to send RSP [#3102] V2 (updated)
Hi Minh, I have no comment on this patch. Thanks. Regards, Vu On 12/5/19 11:29 AM, Minh Hon Chau wrote: Hi Thuan One minor comment, we could separate this commit into one for code change, one for test case. @Vu, you have any comments? Thanks Minh On 27/11/19 1:21 pm, thuan.tran wrote: Summary: mds: not waste 1.5s in waiting dead Adest to send RSP [#3102] Review request for Ticket(s): 3102 Peer Reviewer(s): Minh, Vu, Thang, Gary Pull request to: *** LIST THE PERSON WITH PUSH ACCESS HERE *** Affected branch(es): develop Development branch: ticket-3102 Base revision: b61bee5c8accd79e573ef726d40b945afc7c7b3e Personal repository: git://git.code.sf.net/u/thuantr/review Impacted area Impact y/n Docs n Build system n RPM/packaging n Configuration files n Startup scripts n SAF services n OpenSAF services n Core libraries y Samples n Tests y Other n NOTE: Patch(es) contain lines longer than 80 characers Comments (indicate scope for each "y" above): - N/A revision f4f5ab3efe19bdd11c5cb43e4f4d48af79656737 Author: thuan.tran Date: Tue, 26 Nov 2019 15:58:34 +0700 mds: not waste 1.5s in waiting dead Adest to send RSP [#3102] - When sending response message to Adest which is not exist (crash/terminate), current MDS try to wait for 1.5 seconds before conclude no route to send RSP. - Here are scenarios may waste 1.5s waiting: SVCs DOWN (dead adest or vdest role change) -> get SNDRSP -> send RSP (wait 1.5s) get SNDRSP -> SVCs DOWN (dead adest or vdest role change) -> send RSP (wait 1.5s) This long wait time cause trouble for higher layer services, e.g: ntf, imm, etc... where there are many agents send initialize request (use message SNDRSP type) - Solution: create adest list, a timer start when last SVC of adest DOWN. When sending RSP to this adest, the wait time will reduce to only 10ms. Notice that following origin behavior is kept: No any SVC UP before -> get SNDRSP -> send RSP (wait 1.5s) - New TC tet_send_response_tp_13() is created to verify this scenario. Complete diffstat: -- src/mds/apitest/mdstipc.h | 1 + src/mds/apitest/mdstipc_api.c | 107 ++ src/mds/apitest/mdstipc_conf.c | 1 - src/mds/mds_c_api.c | 199 + src/mds/mds_c_sndrcv.c | 38 +--- src/mds/mds_core.h | 30 ++- src/mds/mds_dt2c.h | 2 +- src/mds/mds_dt_common.c | 24 - src/mds/mds_main.c | 4 + 9 files changed, 350 insertions(+), 56 deletions(-) Testing Commands: - N/A Testing, Expected Results: -- N/A Conditions of Submission: - ACK by reviewers Arch Built Started Linux distro --- mips n n mips64 n n x86 n n x86_64 y y powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These have to be removed. ___ You have needlessly changed whitespace or added whitespace crimes like trailing spaces, or spaces before tabs. ___ You have mixed real technical changes with whitespace and other cosmetic code cleanup changes. These have to be separate commits. ___ You need to refactor your submission into logical chunks; there is too much content into a single commit. ___ You have extraneous garbage in your review (merge commits etc) ___ You have giant attachments which should never have been sent; Instead you should place your content in a public tree to be pulled. ___ You have too many commits attached to an e-mail; resend as threaded commits, or place in a public tree for a pull. ___ You have resent this content multi
Re: [devel] [PATCH 2/2] mds: Avoid message reallocation [#3089]
Hi Minh, Ack. Thanks, Vu On 11/28/19 6:54 PM, Minh Chau wrote: The patch avoids message reallocation if the message is in retransmission queue --- src/mds/mds_dt_tipc.c| 68 +++- src/mds/mds_tipc_fctrl_intf.cc | 6 ++-- src/mds/mds_tipc_fctrl_intf.h| 4 +-- src/mds/mds_tipc_fctrl_msg.cc| 2 +- src/mds/mds_tipc_fctrl_portid.cc | 9 ++ 5 files changed, 50 insertions(+), 39 deletions(-) diff --git a/src/mds/mds_dt_tipc.c b/src/mds/mds_dt_tipc.c index 722076f..3d4f468 100644 --- a/src/mds/mds_dt_tipc.c +++ b/src/mds/mds_dt_tipc.c @@ -120,7 +120,7 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req); /* Tipc actual send, can be made as Macro even*/ static uint32_t mdtm_sendto(uint8_t *buffer, uint16_t buff_len, - struct tipc_portid tipc_id); + struct tipc_portid tipc_id, uint8_t *is_queued); static uint32_t mdtm_mcast_sendto(void *buffer, size_t size, const MDTM_SEND_REQ *req); @@ -2643,7 +2643,8 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req) if (req->snd_type == MDS_SENDTYPE_ACK || req->snd_type == MDS_SENDTYPE_RACK) { uint8_t len = mds_and_mdtm_hdr_len; - uint8_t buffer_ack[len]; + uint8_t *buffer_ack = calloc(1, len); + uint8_t is_queued = 0; /* Add mds_hdr */ if (mdtm_add_mds_hdr(buffer_ack, req) @@ -2657,18 +2658,24 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req) &fctrl_seq_num) == NCSCC_RC_FAILURE){ m_MDS_LOG_ERR("FCTRL: Failed to send message" " len :%d", len); + free(buffer_ack); return NCSCC_RC_FAILURE; } /* Add frag_hdr */ if (mdtm_add_frag_hdr(buffer_ack, len, frag_seq_num, 0, fctrl_seq_num) != NCSCC_RC_SUCCESS) { + free(buffer_ack); return NCSCC_RC_FAILURE; } m_MDS_LOG_DBG("MDTM:Sending message with Service" " Seqno=%d, TO Dest_Tipc_id=<0x%08x:%u> ", req->svc_seq_num, tipc_id.node, tipc_id.ref); - return mdtm_sendto(buffer_ack, len, tipc_id); + status = mdtm_sendto(buffer_ack, len, tipc_id, + &is_queued); + if (is_queued == 0) + free(buffer_ack); + return status; } if (req->msg.encoding == MDS_ENC_TYPE_FLAT) { @@ -2730,6 +2737,7 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req) } else { uint8_t *p8; uint8_t *body = NULL; + uint8_t is_queued = 0; body = calloc(1, len + mds_and_mdtm_hdr_len); @@ -2806,8 +2814,8 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req) req->dest_svc_id); return NCSCC_RC_FAILURE; } - if (mdtm_mcast_sendto(body, len, req) - != NCSCC_RC_SUCCESS) { + status = mdtm_mcast_sendto(body, len, req); + if (status != NCSCC_RC_SUCCESS) { m_MDS_LOG_ERR("MDTM: Failed to" " send Multicast" " message Data lenght=%d" @@ -2819,24 +2827,20 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req) get_svc_names(req->dest_svc_id), req->dest_svc_id, strerror(errno)); - m_MMGR_FREE_BUFR_LIST(usrbuf); - free(body); - return NCSCC_RC_FAILURE; } } else { - if (mdtm_sendto(body, len, tipc_id) - != NCSCC_RC_SUCCESS) { + status = mdtm_sendto(body, len, + tipc_id, &is_queued); + if (
Re: [devel] [PATCH 1/1] mds: Fix mds flow control keep all messages in queue [#3123]
Hi Thuan, Ack with comments inline. Regards, Vu On 11/27/19 6:33 PM, thuan.tran wrote: When overflow happens, mds with flow control enabled may keep all messages in queue if it fails to send a message when receiving Nack or ChunkAck since no more trigger come after that. MDS flow control should retry to send message in this scenario. --- src/mds/mds_tipc_fctrl_portid.cc | 39 +++- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/src/mds/mds_tipc_fctrl_portid.cc b/src/mds/mds_tipc_fctrl_portid.cc index 316e1ba75..8081e8bd4 100644 --- a/src/mds/mds_tipc_fctrl_portid.cc +++ b/src/mds/mds_tipc_fctrl_portid.cc @@ -17,6 +17,7 @@ #include "mds/mds_tipc_fctrl_portid.h" #include "base/ncssysf_def.h" +#include "base/osaf_time.h" #include "mds/mds_dt.h" #include "mds/mds_log.h" @@ -149,23 +150,23 @@ void TipcPortId::FlushData() { uint32_t TipcPortId::Send(uint8_t* data, uint16_t length) { struct sockaddr_tipc server_addr; - ssize_t send_len = 0; - uint32_t rc = NCSCC_RC_SUCCESS; - memset(&server_addr, 0, sizeof(server_addr)); server_addr.family = AF_TIPC; server_addr.addrtype = TIPC_ADDR_ID; server_addr.addr.id = id_; - send_len = sendto(bsrsock_, data, length, 0, -(struct sockaddr *)&server_addr, sizeof(server_addr)); - - if (send_len == length) { -rc = NCSCC_RC_SUCCESS; - } else { -m_MDS_LOG_ERR("FCTRL: sendto() failed, Error[%s]", strerror(errno)); -rc = NCSCC_RC_FAILURE; + int retry = 5; + while (retry >= 0) { +ssize_t send_len = sendto(bsrsock_, data, length, 0, + (struct sockaddr *)&server_addr, sizeof(server_addr)); + [Vu] Any case the sendto just sends a part of data? if so, the retry if any should not start from the beginning of data. the below code shows what i meant: ssize_t byte_sent = 0; while (retry--) { ssize_t send_len = sendto(bsrsock_, data + byte_sent, length - byte_sent, 0, (struct sockaddr *)&server_addr, sizeof(server_addr)); if (send_lenn == -1) { // error handling here if (errno == EINTR) continue; // error, can't continue. should log something here? return NCSCC_RC_FAILURE; // or assert? } // number of bytes sent byte_sent += send_data; if (byte_sent >= length) { return NCSCC_RC_SUCCESS; } // retry but do we need to sleep here? osaf_nanosleep(&kTenMilliseconds); } +if (send_len == length) { + return NCSCC_RC_SUCCESS; +} else if (retry-- > 0) { + osaf_nanosleep(&kTenMilliseconds); +} } - return rc; + m_MDS_LOG_ERR("FCTRL: sendto() failed, Error[%s]", strerror(errno)); + return NCSCC_RC_FAILURE; } uint32_t TipcPortId::Queue(const uint8_t* data, uint16_t length, @@ -440,13 +441,14 @@ void TipcPortId::ReceiveChunkAck(uint16_t fseq, uint16_t chksize) { // try to send a few pending msg DataMessage* msg = nullptr; uint16_t send_msg_cnt = 0; -while (send_msg_cnt++ < chunk_size_) { +while (send_msg_cnt < chunk_size_) { // find the lowest sequence unsent yet msg = sndqueue_.FirstUnsent(); if (msg == nullptr) { break; } else { if (Send(msg->msg_data_, msg->header_.msg_len_) == NCSCC_RC_SUCCESS) { +send_msg_cnt++; msg->is_sent_ = true; m_MDS_LOG_NOTIFY("FCTRL: [me] --> [node:%x, ref:%u], " "SndQData[fseq:%u, len:%u], " @@ -455,7 +457,9 @@ void TipcPortId::ReceiveChunkAck(uint16_t fseq, uint16_t chksize) { msg->header_.fseq_, msg->header_.msg_len_, sndwnd_.acked_.v(), sndwnd_.send_.v(), sndwnd_.nacked_space_); } else { -break; +// If not retry, all messages are kept in queue +// and no more trigger to send messages +continue; [Vu] If send is constantly failed, this loop has no way to exit? } } } @@ -508,9 +512,12 @@ void TipcPortId::ReceiveNack(uint32_t mseq, uint16_t mfrag, DataMessage* msg = sndqueue_.Find(Seq16(fseq)); if (msg != nullptr) { // Resend the msg found -if (Send(msg->msg_data_, msg->header_.msg_len_) == NCSCC_RC_SUCCESS) { - msg->is_sent_ = true; +while (Send(msg->msg_data_, msg->header_.msg_len_) != NCSCC_RC_SUCCESS) { + // If not retry, all messages are kept in queue + // and no more trigger to send messages + continue; [Vu] If send is constantly failed, this loop has no way to exit? } +msg->is_sent_ = true; m_MDS_LOG_NOTIFY("FCTRL: [me] --> [node:%x, ref:%u], " "RsndData[mseq:%u, mfrag:%u, fseq:%u], " "sndwnd[acked:%u, send:%u, nacked:%" PRIu64 "]", ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] nid: fix unable to start UML cluster with tipc transport [#3122]
Hi Thuan, Thanks. Here is a new version. Please help to review this one. diff --git a/src/nid/configure_tipc.in b/src/nid/configure_tipc.in index a63c97046..4573389d5 100644 --- a/src/nid/configure_tipc.in +++ b/src/nid/configure_tipc.in @@ -221,19 +221,17 @@ function tipc_duplicate_node_detect () function tipc_configure () { echo "Inserting TIPC mdoule..." - - if ! test -f "$TIPC_MODULE" ; then - modprobe tipc + + # Prefer using modprobe to insmod as modprobe takes care of + # loading all dependencies if any. If any dependent module + # has not yet loaded, insmod will get failed. + if modprobe tipc ; then RM_TIPC_MODULE="modprobe -r tipc" - else - insmod "$TIPC_MODULE" + elif insmod "$TIPC_MODULE" ; then RM_TIPC_MODULE="rmmod $TIPC_MODULE" - fi - - ret_val=$? - if [ $ret_val -ne 0 ] ; then - logger -p user.err " TIPC Module could not be loaded " - exit 1 + else + logger -p user.err " TIPC Module could not be loaded " + exit 1 fi # max_nodes is not supported in TIPC 2.0 Regards, Vu On 11/25/19 2:30 PM, Tran Thuan wrote: Hi Vu, Sorry, I have comments inline. Best Regards, ThuanTr -Original Message- From: Tran Thuan Sent: Monday, November 25, 2019 2:27 PM To: 'Vu Minh Nguyen' ; 'thien.m.hu...@dektech.com.au' Cc: 'opensaf-devel@lists.sourceforge.net' Subject: RE: [PATCH 1/1] nid: fix unable to start UML cluster with tipc transport [#3122] Hi Vu, ACK from me (code review). Best Regards, ThuanTr -Original Message- From: Vu Minh Nguyen Sent: Monday, November 25, 2019 1:45 PM To: thuan.t...@dektech.com.au; thien.m.hu...@dektech.com.au Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen Subject: [PATCH 1/1] nid: fix unable to start UML cluster with tipc transport [#3122] --- src/nid/configure_tipc.in | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/nid/configure_tipc.in b/src/nid/configure_tipc.in index a63c97046..43ddb06e1 100644 --- a/src/nid/configure_tipc.in +++ b/src/nid/configure_tipc.in @@ -221,11 +221,13 @@ function tipc_duplicate_node_detect () function tipc_configure () { echo "Inserting TIPC mdoule..." - -if ! test -f "$TIPC_MODULE" ; then - modprobe tipc + +# Prefer using modprobe to insmod as modprobe takes care of +# loading all dependencies if any. If any dependent module +# has not yet loaded, insmod will get failed. +if modprobe tipc ; then [Thuan] ret_val=$? RM_TIPC_MODULE="modprobe -r tipc" -else +else insmod "$TIPC_MODULE" [Thuan] ret_val=$? RM_TIPC_MODULE="rmmod $TIPC_MODULE" fi ret_val=$? [Thuan] Remove ret_val=$? here if [ $ret_val -ne 0 ] ; then logger -p user.err " TIPC Module could not be loaded " exit 1 fi ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] mds: Avoid message re-allocation [#3089]
Hi Minh, Ack with comments inline. Regards, Vu On 11/25/19 1:12 PM, Minh Chau wrote: The patch avoids message reallocation if enable MDS_TIPC_FCTRL_ENABLED --- src/mds/mds_dt_tipc.c| 27 --- src/mds/mds_tipc_fctrl_msg.cc| 2 +- src/mds/mds_tipc_fctrl_portid.cc | 9 +++-- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/mds/mds_dt_tipc.c b/src/mds/mds_dt_tipc.c index fdf0da7..aa8d5c2 100644 --- a/src/mds/mds_dt_tipc.c +++ b/src/mds/mds_dt_tipc.c @@ -2644,7 +2644,7 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req) if (req->snd_type == MDS_SENDTYPE_ACK || req->snd_type == MDS_SENDTYPE_RACK) { uint8_t len = sum_mds_hdr_plus_mdtm_hdr_plus_len; - uint8_t buffer_ack[len]; + uint8_t* buffer_ack = calloc(1, len); [Vu] Below this allocation, there are several error handlings, but not free memory before returning. Is that expected? /* Add mds_hdr */ if (NCSCC_RC_SUCCESS != @@ -2667,7 +2667,11 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req) m_MDS_LOG_DBG( "MDTM:Sending message with Service Seqno=%d, TO Dest_Tipc_id=<0x%08x:%u> ", req->svc_seq_num, tipc_id.node, tipc_id.ref); - return mdtm_sendto(buffer_ack, len, tipc_id); + status = mdtm_sendto(buffer_ack, len, tipc_id); + if (gl_mds_pro_ver != MDS_PROT_FCTRL) { + free(buffer_ack); + } [Vu] Above allocation does not stick with `MDS_PROT_FCTRL` check, so if the above condition check gets failure, the allocated memory is leaked? + return status; } if (MDS_ENC_TYPE_FLAT == req->msg.encoding) { @@ -2815,6 +2819,8 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req) free(body); return NCSCC_RC_FAILURE; } + m_MMGR_FREE_BUFR_LIST(usrbuf); + free(body); } else { if (NCSCC_RC_SUCCESS != mdtm_sendto(body, len, tipc_id)) { @@ -2824,9 +2830,12 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req) free(body); return NCSCC_RC_FAILURE; } + if (gl_mds_pro_ver != MDS_PROT_FCTRL) { + m_MMGR_FREE_BUFR_LIST(usrbuf); + free(body); + } } - m_MMGR_FREE_BUFR_LIST(usrbuf); - free(body); + return NCSCC_RC_SUCCESS; } } break; @@ -2909,7 +2918,9 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req) mds_free_direct_buff( req->msg.data.buff_info.buff); } - free(body); + if (gl_mds_pro_ver != MDS_PROT_FCTRL) { + free(body); + } return NCSCC_RC_SUCCESS; } break; @@ -3059,21 +3070,23 @@ uint32_t mdtm_frag_and_send(MDTM_SEND_REQ *req, uint32_t seq_num, get_svc_names(req->src_svc_id), req->src_svc_id, get_svc_names(req->dest_svc_id), req->dest_svc_id); ret = mdtm_mcast_sendto(body, len_buf, req); + free(body); } else { m_MDS_LOG_DBG( "MDTM:Sending message with Service Seqno=%d, Fragment Seqnum=%d, frag_num=%d, TO Dest_Tipc_id=<0x%08x:%u>", req->svc_seq_num, seq_num, frag_val, id.node, id.ref); ret = mdtm_sendto(body, len_buf, id); + if (gl_mds_pro_ver != MDS_PROT_FCTRL) { + free(body); + } } if (ret != NCSCC_RC_SUCCESS) { // Failed to send a fragmented msg, stop sending m_MMGR_FREE_BUFR_LIST(usrbuf); - free(body); break; }
Re: [devel] [PATCH 1/1] imm: Fix coding issues identified by codechecker [#3115]
Hi Thuan, Ack with a minor comment. Regards, On 11/4/19 2:57 PM, thuan.tran wrote: --- src/imm/agent/imma_db.cc | 2 +- src/imm/immnd/immnd_main.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/imm/agent/imma_db.cc b/src/imm/agent/imma_db.cc index 071edbe74..80637e55f 100644 --- a/src/imm/agent/imma_db.cc +++ b/src/imm/agent/imma_db.cc @@ -621,7 +621,7 @@ int imma_oi_ccb_record_note_callback(IMMA_CLIENT_NODE *cl_node, rs = 1; } } - if (callback) { + if (tmp && callback) { if (callback->type == IMMA_CALLBACK_OI_CCB_CREATE && !(tmp->adminOwner)) { SaImmAttrValuesT_2 **attributes = (SaImmAttrValuesT_2 **)callback->attrValsForCreateUc; diff --git a/src/imm/immnd/immnd_main.c b/src/imm/immnd/immnd_main.c index 5280f0599..62c7b2478 100644 --- a/src/imm/immnd/immnd_main.c +++ b/src/imm/immnd/immnd_main.c @@ -489,6 +489,7 @@ int main(int argc, char *argv[]) fds[FD_CLM_INIT].fd = immnd_cb->clm_init_sel_obj.rmv_obj; fds[FD_CLM_INIT].events = POLLIN; + osaf_clock_gettime(CLOCK_MONOTONIC, &start_time); [Vu] Is it better to use the one provided by base as below? struct timespec start_time = base::ReadMonotonicClock() while (1) { /* Watch out for performance bug. Possibly change from event-count to recalculated timer. */ ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] log: Fix coding issues identified by codechecker [#3113]
Hi Thuan, Ack with a minor comment. Regards, Vu On 11/4/19 2:17 PM, thuan.tran wrote: --- src/log/logd/lgs_mbcsv.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log/logd/lgs_mbcsv.cc b/src/log/logd/lgs_mbcsv.cc index cd3d70009..ebc659ea1 100644 --- a/src/log/logd/lgs_mbcsv.cc +++ b/src/log/logd/lgs_mbcsv.cc @@ -1931,7 +1931,7 @@ static uint32_t ckpt_proc_log_write(lgs_cb_t *cb, void *data) { /* If configured for split file system log records shall be written also if * we are standby. */ - if (lgs_is_split_file_system()) { + if (lgs_is_split_file_system() && (logRecord != NULL)) { [Vu] Prefer using nullptr to NULL size_t rec_len = strlen(logRecord); stream->act_last_close_timestamp = c_file_close_time_stamp; ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] nid: Change the path of TIPC_MODULE [#3110]
Hi Thien, Ack with comments. Regards, Vu On 11/7/19 2:25 PM, thien.m.huynh wrote: --- src/nid/configure_tipc.in | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/nid/configure_tipc.in b/src/nid/configure_tipc.in index 73dd1cb..218de65 100644 --- a/src/nid/configure_tipc.in +++ b/src/nid/configure_tipc.in @@ -21,7 +21,11 @@ . $pkgsysconfdir/nid.conf MANAGE_TIPC=${OPENSAF_MANAGE_TIPC:-"yes"} -TIPC_MODULE=/lib/modules/$(uname -r)/kernel/net/tipc.ko +TIPC_MODULE=$(modinfo tipc -n 2> /dev/null) [Vu] Put option `-n` before tipc. +ret_val=$? +if [ $ret_val -ne 0 ] ; then +TIPC_MODULE=/lib/modules/$(uname -r)/kernel/net/tipc/tipc.ko [Vu] Add double quotes around the path +fi [Vu] Or you can use this below technique and remove above if +TIPC_MODULE=$(modinfo tipc -n 2> /dev/null) || TIPC_MODULE="/lib/modules/$(uname -r)/kernel/net/tipc/tipc.ko" CHASSIS_ID_FILE=$pkgsysconfdir/chassis_id SLOT_ID_FILE=$pkgsysconfdir/slot_id SUBSLOT_ID_FILE=$pkgsysconfdir/subslot_id ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] mds: Unset flow control env var [#3109]
Hi Minh, Ack with minor comments. Regards, Vu On 10/31/19 11:55 AM, Minh Chau wrote: Patch unsets MDS_TIPC_FCTRL_ENABLED, MDS_TIPC_FCTRL_ACKTIMEOUT, and MDS_TIPC_FCTRL_ACKSIZE to prevent child process inheritance. --- src/mds/mds_dt_tipc.c | 39 +-- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/src/mds/mds_dt_tipc.c b/src/mds/mds_dt_tipc.c index e7a7b48..096e4ca 100644 --- a/src/mds/mds_dt_tipc.c +++ b/src/mds/mds_dt_tipc.c @@ -167,6 +167,8 @@ uint32_t mdtm_global_frag_num; const unsigned int MAX_RECV_THRESHOLD = 30; uint8_t gl_mds_pro_ver = MDS_PROT | MDS_VERSION; +int gl_mds_fctrl_acksize = -1; +int gl_mds_fctrl_ackto = -1; [Vu] Should we declare these ones as static variables if they are only used in this file ? static bool get_tipc_port_id(int sock, struct tipc_portid* port_id) { struct sockaddr_tipc addr; @@ -347,32 +349,49 @@ uint32_t mdtm_tipc_init(NODE_ID nodeid, uint32_t *mds_tipc_ref) if ((ptr = getenv("MDS_TIPC_FCTRL_ENABLED")) != NULL) { if (atoi(ptr) == 1) { gl_mds_pro_ver = MDS_PROT_FCTRL; - int ackto = -1; - int acksize = -1; if ((ptr = getenv("MDS_TIPC_FCTRL_ACKTIMEOUT")) != NULL) { - ackto = atoi(ptr); - if (ackto == 0) { + gl_mds_fctrl_ackto = atoi(ptr); + if (gl_mds_fctrl_ackto == 0) { syslog(LOG_ERR, "MDTM:TIPC Invalid " "MDS_TIPC_FCTRL_ACKTIMEOUT, using default value"); - ackto = -1; + gl_mds_fctrl_ackto = -1; } } if ((ptr = getenv("MDS_TIPC_FCTRL_ACKSIZE")) != NULL) { - acksize = atoi(ptr); - if (acksize == 0) { + gl_mds_fctrl_acksize = atoi(ptr); + if (gl_mds_fctrl_acksize == 0) { syslog(LOG_ERR, "MDTM:TIPC Invalid " "MDS_TIPC_FCTRL_ACKSIZE, using default value"); - acksize = -1; + gl_mds_fctrl_acksize = -1; } } - mds_tipc_fctrl_initialize(tipc_cb.BSRsock, port_id, optval, - ackto, acksize, tipc_mcast_enabled); + /* unset the env var to prevent child process inheritance */ + if (unsetenv("MDS_TIPC_FCTRL_ENABLED") != 0) { + syslog(LOG_ERR, + "MDTM:TIPC Failed to unset MDS_TIPC_FCTRL_ENABLED"); + } + if (gl_mds_fctrl_ackto != -1 && + unsetenv("MDS_TIPC_FCTRL_ACKTIMEOUT") != 0) { + syslog(LOG_ERR, + "MDTM:TIPC Failed to unset MDS_TIPC_FCTRL_ACKTIMEOUT"); + } + if (gl_mds_fctrl_acksize != -1 && + unsetenv("MDS_TIPC_FCTRL_ACKSIZE") != 0) { + syslog(LOG_ERR, + "MDTM:TIPC Failed to unset MDS_TIPC_FCTRL_ACKSIZE"); + } } else { + gl_mds_pro_ver = MDS_PROT | MDS_VERSION; [Vu] This line may be not necessary as the default value of gl_mds_pro_ver is `MDS_PROT | MDS_VERSION`. syslog(LOG_ERR, "MDTM:TIPC Invalid value of" "MDS_TIPC_FCTRL_ENABLED"); } } + if (gl_mds_pro_ver == MDS_PROT_FCTRL) { + mds_tipc_fctrl_initialize(tipc_cb.BSRsock, port_id, optval, + gl_mds_fctrl_ackto, gl_mds_fctrl_acksize, tipc_mcast_enabled); + } + /* Create a task to receive the events and data */ if (mdtm_create_rcv_task(tipc_cb.hdle_mdtm) != NCSCC_RC_SUCCESS) { syslog(LOG_ERR, ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] mds: Unset flow control env var [#3109]
Hi Minh, Ack with one question. What happens if user does following sequence: 1) Init service handle --> Have this env variable set, then unset later on. 2) Finalize the handle 3) Init service handle --> I am not sure if previous unset has any affects to tipc flow control from this point e.g. has tipc flow control been disabled from previous unset? Regards, Vu On 10/31/19 5:32 AM, Minh Chau wrote: Patch unsets MDS_TIPC_FCTRL_ENABLED, MDS_TIPC_FCTRL_ACKTIMEOUT, and MDS_TIPC_FCTRL_ACKSIZE to prevent child process inheritance. --- src/mds/mds_dt_tipc.c | 13 + 1 file changed, 13 insertions(+) diff --git a/src/mds/mds_dt_tipc.c b/src/mds/mds_dt_tipc.c index e7a7b48..12b275d 100644 --- a/src/mds/mds_dt_tipc.c +++ b/src/mds/mds_dt_tipc.c @@ -367,6 +367,19 @@ uint32_t mdtm_tipc_init(NODE_ID nodeid, uint32_t *mds_tipc_ref) } mds_tipc_fctrl_initialize(tipc_cb.BSRsock, port_id, optval, ackto, acksize, tipc_mcast_enabled); + /* unset the env var to prevent child process inheritance */ + if (unsetenv("MDS_TIPC_FCTRL_ENABLED") != 0) { + syslog(LOG_ERR, + "MDTM:TIPC Failed to unset MDS_TIPC_FCTRL_ENABLED"); + } + if (ackto != -1 && unsetenv("MDS_TIPC_FCTRL_ACKTIMEOUT") != 0) { + syslog(LOG_ERR, + "MDTM:TIPC Failed to unset MDS_TIPC_FCTRL_ACKTIMEOUT"); + } + if (acksize != -1 && unsetenv("MDS_TIPC_FCTRL_ACKSIZE") != 0) { + syslog(LOG_ERR, + "MDTM:TIPC Failed to unset MDS_TIPC_FCTRL_ACKSIZE"); + } } else { syslog(LOG_ERR, "MDTM:TIPC Invalid value of" "MDS_TIPC_FCTRL_ENABLED"); ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] nid: Remove the absolute path of tipc command [#3105]
Hi Thien, Ack with comments. Regards, Vu On 10/23/19 9:42 AM, thien.m.huynh wrote: --- scripts/tipc-config | 22 -- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/scripts/tipc-config b/scripts/tipc-config index 34eb9a5..b7c6cef 100755 --- a/scripts/tipc-config +++ b/scripts/tipc-config @@ -36,10 +36,12 @@ EOF exit 1 fi +tipc=$(which tipc 2> /dev/null) + while [ $# -gt 0 ]; do case "$1" in -addr) - addr=$(/sbin/tipc node get address) + addr=$($tipc node get address) [Vu] Use double quotes around $tipc to avoid the case that having space(s) in the tipc path. So, use addr=$("$tipc" node get address). This comment is also applicable for below changes. hex_pattern="^[0-9a-fA-F]+$" if [[ $addr =~ $hex_pattern ]]; then dec_addr=$((16#$addr)) @@ -53,36 +55,36 @@ while [ $# -gt 0 ]; do echo "node address: $addr" ;; -a=*) - /sbin/tipc node set address "$(echo "$1" | cut -d= -f2)" + $tipc node set address "$(echo "$1" | cut -d= -f2)" ;; -b) echo "Bearers:" - /sbin/tipc bearer list + $tipc bearer list ;; -bd=*) - /sbin/tipc bearer disable media eth device "$(echo "$1" | cut -d: -f2)" + $tipc bearer disable media eth device "$(echo "$1" | cut -d: -f2)" ;; -be=*) - /sbin/tipc bearer enable media eth device "$(echo "$1" | cut -d: -f2)" + $tipc bearer enable media eth device "$(echo "$1" | cut -d: -f2)" ;; -l) echo "Links:" - /sbin/tipc link list + $tipc link list ;; -lt=*) tolerance=$(echo "$1" | cut -d= -f2) link=$(echo "$tolerance" | cut -d/ -f1) value=$(echo "$tolerance" | cut -d/ -f2) - /sbin/tipc link set tolerance "$value" link "$link" + $tipc link set tolerance "$value" link "$link" ;; -netid) - echo "current network id: $(/sbin/tipc node get netid)" + echo "current network id: $($tipc node get netid)" ;; -netid=*) - /sbin/tipc node set netid "$(echo "$1" | cut -d= -f2)" + $tipc node set netid "$(echo "$1" | cut -d= -f2)" ;; -nt) - /sbin/tipc nametable show + $tipc nametable show ;; *) echo "$0: unrecognized option '$1'" 1>&2 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] mds: not waste 1.5s in waiting Adest already down to send response message [#3102]
Hi Thuan, I have additional comments below. Regards, Vu On 10/22/19 7:14 AM, thuan.tran wrote: - When sending response message which Adest not exist (already down) current MDS try to wait for 1.5 seconds before conclude no route to send response message. - There are 2 scenarios may have: UP -> DOWN -> receive SNDRSP -> response timeout after 1.5s UP -> receive SNDRSP -> DOWN -> response timeout after 1.5s - With this change, MDS will not waste for 1.5s which can cause trouble for higher layer services, e.g: ntf, imm, etc... --- src/mds/mds_c_api.c | 70 - src/mds/mds_c_sndrcv.c | 52 -- src/mds/mds_core.h | 25 +-- src/mds/mds_dt2c.h | 2 +- src/mds/mds_dt_common.c | 22 - 5 files changed, 162 insertions(+), 9 deletions(-) diff --git a/src/mds/mds_c_api.c b/src/mds/mds_c_api.c index 132555b8e..5dd30c536 100644 --- a/src/mds/mds_c_api.c +++ b/src/mds/mds_c_api.c @@ -1900,6 +1900,32 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role, /*** Validation for SCOPE **/ + if (adest != m_MDS_GET_ADEST) { + MDS_ADEST_INFO *adest_info = + (MDS_ADEST_INFO *)ncs_patricia_tree_get( + &gl_mds_mcm_cb->adest_list, + (uint8_t *)&adest); + if (!adest_info) { + /* Add adest to adest list */ + adest_info = m_MMGR_ALLOC_ADEST_INFO; + memset(adest_info, 0, sizeof(MDS_ADEST_INFO)); + adest_info->adest = adest; + adest_info->node.key_info = + (uint8_t *)&adest_info->adest; + adest_info->svc_cnt = 1; + adest_info->tmr_start = false; + ncs_patricia_tree_add( + &gl_mds_mcm_cb->adest_list, + (NCS_PATRICIA_NODE *)adest_info); + m_MDS_LOG_DBG("MCM:API: Adest=%" PRIu64 + " svc_cnt=%u", adest, adest_info->svc_cnt); + } else { + adest_info->svc_cnt++; + m_MDS_LOG_DBG("MCM:API: Adest=%" PRIu64 + " svc_cnt=%u", adest, adest_info->svc_cnt); + } + } + status = mds_get_subtn_res_tbl_by_adest(local_svc_hdl, svc_id, vdest_id, adest, &log_subtn_result_info); @@ -3571,6 +3597,24 @@ uint32_t mds_mcm_svc_down(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role, /* Discard : Getting down before getting up */ } else { /* Entry exist in subscription result table */ + /* If adest exist and no sndrsp, start a timer */ + MDS_ADEST_INFO *adest_info = + (MDS_ADEST_INFO *)ncs_patricia_tree_get( + &gl_mds_mcm_cb->adest_list, + (uint8_t *)&adest); + if (adest_info) { + adest_info->svc_cnt--; + if (adest_info->svc_cnt == 0 && + adest_info->sndrsp_cnt == 0) { + m_MDS_LOG_INFO("MCM:API: Adest=%" PRIu64 + " down timer start", adest); + if (adest_info->tmr_start == false) { + adest_info->tmr_start = true; + start_mds_down_tmr(adest, svc_id); + } + } + } + if (vdest_id == m_VDEST_ID_FOR_ADEST_ENTRY) { status = mds_subtn_res_tbl_del( local_svc_hdl, svc_id, vdest_id, adest, @@ -4956,6 +5000,17 @@ uint32_t mds_mcm_init(void) return NCSCC_RC_FAILURE; } + /* ADEST TREE */ + memset(&pat_tree_params, 0, sizeof(NCS_PATRICIA_PARAMS)); + pat_tree_params.key_size = sizeof(MDS_DEST); + if (NCSCC_RC_SUCCESS != + ncs_patricia_tree_init(&gl_mds_mcm_cb->adest_list, + &pat_tree_params)) { + m_MDS_LOG_ERR( + "MCM:API: patricia_tree_init: adest :failure, L mds_mcm_init"); + return NCSCC_RC_FAILURE; + } + /* SERVICE TREE */ memset(&pat_tree_params, 0, sizeof(NCS_PATRICIA_PARAMS)); pat_tree_params.key_size = sizeof(MDS_SVC_HDL); @@ -4966,7 +5021,12 @@ uint32_t mds_mcm_init(void) if (NCSCC_RC_SUCCESS != ncs_patricia_tree_destroy(&gl_mds_mcm_cb->vdest_list)) { m_MDS_LOG_ERR( - "MCM:API: patricia_tree_destroy: service :failure, L mds_mcm_init"); +
Re: [devel] [PATCH 1/1] mds: not waste 1.5s in waiting Adest already down to send response message [#3102]
Hi Thuan, I have few comments below. Regards, Vu On 10/22/19 7:14 AM, thuan.tran wrote: - When sending response message which Adest not exist (already down) current MDS try to wait for 1.5 seconds before conclude no route to send response message. - There are 2 scenarios may have: UP -> DOWN -> receive SNDRSP -> response timeout after 1.5s UP -> receive SNDRSP -> DOWN -> response timeout after 1.5s - With this change, MDS will not waste for 1.5s which can cause trouble for higher layer services, e.g: ntf, imm, etc... --- src/mds/mds_c_api.c | 70 - src/mds/mds_c_sndrcv.c | 52 -- src/mds/mds_core.h | 25 +-- src/mds/mds_dt2c.h | 2 +- src/mds/mds_dt_common.c | 22 - 5 files changed, 162 insertions(+), 9 deletions(-) diff --git a/src/mds/mds_c_api.c b/src/mds/mds_c_api.c index 132555b8e..5dd30c536 100644 --- a/src/mds/mds_c_api.c +++ b/src/mds/mds_c_api.c @@ -1900,6 +1900,32 @@ uint32_t mds_mcm_svc_up(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role, /*** Validation for SCOPE **/ + if (adest != m_MDS_GET_ADEST) { + MDS_ADEST_INFO *adest_info = + (MDS_ADEST_INFO *)ncs_patricia_tree_get( + &gl_mds_mcm_cb->adest_list, + (uint8_t *)&adest); + if (!adest_info) { + /* Add adest to adest list */ + adest_info = m_MMGR_ALLOC_ADEST_INFO; + memset(adest_info, 0, sizeof(MDS_ADEST_INFO)); + adest_info->adest = adest; + adest_info->node.key_info = + (uint8_t *)&adest_info->adest; + adest_info->svc_cnt = 1; + adest_info->tmr_start = false; + ncs_patricia_tree_add( + &gl_mds_mcm_cb->adest_list, + (NCS_PATRICIA_NODE *)adest_info); + m_MDS_LOG_DBG("MCM:API: Adest=%" PRIu64 + " svc_cnt=%u", adest, adest_info->svc_cnt); + } else { + adest_info->svc_cnt++; + m_MDS_LOG_DBG("MCM:API: Adest=%" PRIu64 + " svc_cnt=%u", adest, adest_info->svc_cnt); + } + } [Vu] This new database, adest_list, is shared b/w internal osaf_mds thread and mds's user thread, hence should be protected by mutex. + status = mds_get_subtn_res_tbl_by_adest(local_svc_hdl, svc_id, vdest_id, adest, &log_subtn_result_info); @@ -3571,6 +3597,24 @@ uint32_t mds_mcm_svc_down(PW_ENV_ID pwe_id, MDS_SVC_ID svc_id, V_DEST_RL role, /* Discard : Getting down before getting up */ } else { /* Entry exist in subscription result table */ + /* If adest exist and no sndrsp, start a timer */ + MDS_ADEST_INFO *adest_info = + (MDS_ADEST_INFO *)ncs_patricia_tree_get( + &gl_mds_mcm_cb->adest_list, + (uint8_t *)&adest); + if (adest_info) { + adest_info->svc_cnt--; + if (adest_info->svc_cnt == 0 && + adest_info->sndrsp_cnt == 0) { + m_MDS_LOG_INFO("MCM:API: Adest=%" PRIu64 + " down timer start", adest); + if (adest_info->tmr_start == false) { + adest_info->tmr_start = true; + start_mds_down_tmr(adest, svc_id); [Vu] Seems mds_down tmr is started twice. The first start is at the beginning of the function. But what is the reason to start the mds down timer here? What if we won't start the tmr? /* potentially clean up the process info database */ MDS_PROCESS_INFO *info = mds_process_info_get(adest, svc_id); if (info != NULL) { /* Process info exist, delay the cleanup with a timeout to avoid * race */ start_mds_down_tmr(adest, svc_id); } + } + } + } + if (vdest_id == m_VDEST_ID_FOR_ADEST_ENTRY) { status = mds_subtn_res_tbl_del( local_svc_hdl, svc_id, vdest_id, adest, @@ -4956,6 +5000,17 @@ uint32_t mds_mcm_init(void) return NCSCC_RC_FAILURE; } + /* ADEST TREE */ + memset(&pat_tree_params, 0, sizeof(NCS_PATRICIA_PARAMS)); + pat_tree_params.key_size = sizeof(MDS_DEST); + if (NCSCC_RC_SUCCESS != + ncs_patricia_tree_init(&gl_mds_mcm_cb->adest_list, + &pat_tree_params)) { + m_MDS_LOG_ERR( +
Re: [devel] [PATCH 1/1] dtm: rotate logtraces on demand [#3086]
Hi, Any comments on this patch? I will push it by this week if no comments. Regards, Vu On 10/4/19 5:30 PM, Vu Minh Nguyen wrote: Introducing a new option '--rotate' to rotate given logtrace stream(s). This patch also cleans the code of LogServer::ExecuteCommand(). --- src/base/log_writer.h | 2 +- src/dtm/tools/osaflog.cc| 25 ++- src/dtm/transport/log_server.cc | 125 +--- src/dtm/transport/log_server.h | 11 ++- 4 files changed, 115 insertions(+), 48 deletions(-) diff --git a/src/base/log_writer.h b/src/base/log_writer.h index 0a03e9253..ab2bf32ae 100644 --- a/src/base/log_writer.h +++ b/src/base/log_writer.h @@ -45,13 +45,13 @@ class LogWriter { void Write(size_t size); void Write(const char* bytes, size_t size); void Flush(); + void RotateLog(); void SetLogFile(const std::string& log_file) { log_file_ = log_file; } private: constexpr static const size_t kBufferSize = 128 * size_t{1024}; void Open(); void Close(); - void RotateLog(); std::string log_file(size_t backup) const; diff --git a/src/dtm/tools/osaflog.cc b/src/dtm/tools/osaflog.cc index 4e0956aa2..f6fa16801 100644 --- a/src/dtm/tools/osaflog.cc +++ b/src/dtm/tools/osaflog.cc @@ -54,6 +54,7 @@ base::UnixServerSocket* CreateSocket(); uint64_t Random64Bits(uint64_t seed); bool PrettyPrint(const std::string& log_stream); bool Delete(const std::string& log_stream); +bool Rotate(const std::string& log_stream); std::list OpenLogFiles(const std::string& log_stream); std::string PathName(const std::string& log_stream, int suffix); uint64_t GetInode(int fd); @@ -70,6 +71,7 @@ int main(int argc, char** argv) { {"flush", no_argument, 0, 'f'}, {"print", no_argument, nullptr, 'p'}, {"delete", no_argument, nullptr, 'd'}, + {"rotate", no_argument, nullptr, 'r'}, {"extract-trace", required_argument, 0, 'e'}, {"max-idle", required_argument, 0, 'i'}, {0, 0, 0, 0}}; @@ -83,12 +85,14 @@ int main(int argc, char** argv) { bool flush_result = true; bool print_result = true; bool delete_result = true; + bool rotate_result = true; bool max_file_size_result = true; bool number_of_backups_result = true; bool max_idle_result = true; bool flush_set = false; bool pretty_print_set = false; bool delete_set = false; + bool rotate_set = false; bool max_file_size_set = false; bool max_backups_set = false; bool max_idle_set = false; @@ -101,7 +105,7 @@ int main(int argc, char** argv) { exit(EXIT_FAILURE); } - while ((option = getopt_long(argc, argv, "m:b:p:f:e:", + while ((option = getopt_long(argc, argv, "m:b:p:f:e:i:r", long_options, &long_index)) != -1) { switch (option) { case 'p': @@ -114,6 +118,9 @@ int main(int argc, char** argv) { case 'f': flush_set = true; break; + case 'r': +rotate_set = true; +break; case 'm': max_file_size = base::StrToUint64(optarg, &max_file_size_set); @@ -164,12 +171,12 @@ int main(int argc, char** argv) { if (thread_trace) exit(ExtractTrace(input_core, output_trace)); - if (argc > optind && !pretty_print_set && !delete_set) { + if (argc > optind && !pretty_print_set && !delete_set && !rotate_set) { pretty_print_set = true; flush_set = true; } - if ((argc <= optind && (pretty_print_set || delete_set)) || + if ((argc <= optind && (pretty_print_set || delete_set || rotate_set)) || (pretty_print_set && delete_set)) { PrintUsage(argv[0]); exit(EXIT_FAILURE); @@ -188,6 +195,11 @@ int main(int argc, char** argv) { delete_result = Delete(argv[optind++]); } } + if (rotate_set == true) { +while (rotate_result && optind < argc) { + rotate_result = Rotate(argv[optind++]); +} + } if (max_backups_set == true) { number_of_backups_result = NoOfBackupFiles(max_backups); } @@ -197,7 +209,7 @@ int main(int argc, char** argv) { if (max_idle_set == true) { max_idle_result = SetMaxIdleTime(max_idle); } - if (flush_result && print_result && max_file_size_result && + if (flush_result && print_result && max_file_size_result && rotate_result && delete_result && number_of_backups_result && max_idle_result) exit(EXIT_SUCCESS); exit(EXIT_FAILURE); @@ -224,6 +236,7 @@ void PrintUsage(const char* program_name) { "--delete Delete the specified LOGSTREAM(s) by\n" " removing allocated resources in the log\n" " server. Does not delete log files from disk.\n" +
Re: [devel] [PATCH 1/1] ntfd: Do not send response to client if client down [#3084]
Hi Thien, I have some comments below. I see this enhancement does not bring much value to NTF as it deals with a very rare case - process is terminated before saNtfInitialize() returns. In reality, if NTF server is getting overloaded by such process, there must be an error in that process. @Minh: how about your opinion? is this ticket valid? Anyway, here are my comments: 1) Only C source files, ntfs_mds.c & ntfs_evt.c, access the new added list `ntfa_down_list_head`, why put new added methods in the C++ file and add C wrapper functions for them? It should be more clean if you move these functions into a new files e.g: ntfs_client_down.{h,c}. 2) C++ method name should start with a capital letter (refer to C++ google coding rule) 3) Naming methods that represent adding a down client to list, and removing from the list should pair/opposite with each other e.g. Open vs Close, Add vs Remove, not mark vs remove 4) The list is accessing from 02 different threads, mds and main thread, therefore must use mutex to prevent race conditions. 5) Should have a check to ensure *not* adding the down client into the list if that client has successfully initialized. Regards, Vu On 10/9/19 9:36 AM, thien.m.huynh wrote: Ntfd will not send response to a client when client already down. This will avoid timeout when ntfd send via mds. --- src/ntf/ntfd/NtfAdmin.cc | 93 src/ntf/ntfd/NtfAdmin.h | 3 ++ src/ntf/ntfd/ntfs_cb.h | 6 src/ntf/ntfd/ntfs_com.h | 3 ++ src/ntf/ntfd/ntfs_evt.c | 1 + src/ntf/ntfd/ntfs_mds.c | 9 - 6 files changed, 114 insertions(+), 1 deletion(-) diff --git a/src/ntf/ntfd/NtfAdmin.cc b/src/ntf/ntfd/NtfAdmin.cc index 8bbee69..641171b 100644 --- a/src/ntf/ntfd/NtfAdmin.cc +++ b/src/ntf/ntfd/NtfAdmin.cc @@ -560,6 +560,85 @@ void NtfAdmin::SearchAndSetClientsDownFlag(MDS_DEST mds_dest) { } /** + * @brief Add mds_dest tag into ntfa down list + * @param mds_dest + */ +void NtfAdmin::markAgentDown(MDS_DEST mds_dest) { + TRACE_ENTER(); + NTFA_DOWN_LIST *ntfa_down_rec = NULL; + if ((ntfa_down_rec = reinterpret_cast( + malloc(sizeof(NTFA_DOWN_LIST == NULL) { +LOG_ER("memory allocation for the NTFA_DOWN_LIST failed"); +return; + } + memset(ntfa_down_rec, 0, sizeof(NTFA_DOWN_LIST)); + ntfa_down_rec->mds_dest = mds_dest; + ntfa_down_rec->next = NULL; + + if (ntfs_cb->ntfa_down_list_head == NULL) { +ntfs_cb->ntfa_down_list_head = ntfa_down_rec; + } else { +NTFA_DOWN_LIST *p = ntfs_cb->ntfa_down_list_head; +while (p->next != NULL) { + p = p->next; +} +p->next = ntfa_down_rec; + } + TRACE_1("Added MDS dest: %" PRIx64, ntfa_down_rec->mds_dest); + TRACE_LEAVE(); +} + +/** + * @brief Find and remove agent from ntfa down list + * @param mds_dest + */ +void NtfAdmin::removeAgentFromDownList(MDS_DEST mds_dest) { + NTFA_DOWN_LIST *ntfa_down_rec = ntfs_cb->ntfa_down_list_head; + NTFA_DOWN_LIST *prev = NULL; + TRACE_ENTER(); + while (ntfa_down_rec != NULL) { +if (mds_dest == ntfa_down_rec->mds_dest) { + if (ntfa_down_rec == ntfs_cb->ntfa_down_list_head) { +if (ntfa_down_rec->next == NULL) { + ntfs_cb->ntfa_down_list_head = NULL; +} else { + ntfs_cb->ntfa_down_list_head = ntfa_down_rec->next; +} + } else if (prev) { +prev->next = ntfa_down_rec->next; + } + TRACE("Deleted MDS dest: %" PRIx64, ntfa_down_rec->mds_dest); + free(ntfa_down_rec); + ntfa_down_rec = NULL; + break; +} +prev = ntfa_down_rec; +ntfa_down_rec = ntfa_down_rec->next; + } + TRACE_LEAVE(); +} + +/** + * @brief Check if agent exists in down list + * @param mds_dest + * @return true/false + */ +bool NtfAdmin::isInNtfaDownList(MDS_DEST mds_dest) { + bool found = false; + NTFA_DOWN_LIST *ntfa_down_rec = ntfs_cb->ntfa_down_list_head; + TRACE_ENTER(); + while (ntfa_down_rec != NULL) { +if (mds_dest == ntfa_down_rec->mds_dest) { + found = true; + break; +} +ntfa_down_rec = ntfa_down_rec->next; + } + TRACE_LEAVE(); + return found; +} + +/** * The node object where the client who had the subscription is notified * so it can delete the appropriate subscription and filter object. * @@ -1300,6 +1379,20 @@ uint32_t send_clm_node_status_change(SaClmClusterChangesT cluster_change, cluster_change, node_id)); } +void removeAgentFromDownList(MDS_DEST mds_dest) { + osafassert(NtfAdmin::theNtfAdmin != NULL); + NtfAdmin::theNtfAdmin->removeAgentFromDownList(mds_dest); +} + +bool isInNtfaDownList(MDS_DEST mds_dest) { + return (NtfAdmin::theNtfAdmin->isInNtfaDownList(mds_dest)); +} + +void markAgentDown(MDS_DEST mds_dest) { + osafassert(NtfAdmin::theNtfAdmin != NULL); + NtfAdmin::theNtfAdmin->markAgentDown(mds_dest); +} + /** * @brief Checks CLM membership status of a client. * A0101 clients are always CLM member. diff --git a/src/ntf/ntf
Re: [devel] [PATCH 0/2] Review Request for mds: Add Nack message for MDS_TIPC_FCTRL_ENABLED [#3095] V2
Hi Minh, Ack from me. Thanks. Regards, Vu On 10/4/19 12:20 PM, Minh Chau wrote: Summary: mds: Add Nack message for MDS_TIPC_FCTRL_ENABLED [#3095] V2 Review request for Ticket(s): 3095 Peer Reviewer(s): Hans, Vu, Gary, Thuan Pull request to: *** LIST THE PERSON WITH PUSH ACCESS HERE *** Affected branch(es): develop Development branch: ticket-3095 Base revision: 05064a1cfd0aeaf824dce7602d535654b3457e30 Personal repository: git://git.code.sf.net/u/minh-chau/review Impacted area Impact y/n Docsn Build systemn RPM/packaging n Configuration files n Startup scripts n SAF servicesn OpenSAF servicesn Core libraries y Samples n Tests n Other n Comments (indicate scope for each "y" above): - *** EXPLAIN/COMMENT THE PATCH SERIES HERE *** revision cbbeab8f2299620aa3eb9b0e29710a2b159b5a45 Author: Minh Chau Date: Fri, 4 Oct 2019 12:59:27 +1000 mds: Improve error log for MDS_TIPC_FCTRL_ENABLED [#3095] This commit as part of #3095 updates the error string with pattern "FCTRL:*Error[*]", in order to help grep-ing the error in mds debug log. revision cc666586717fa82df70471748d8766e8fe901460 Author: Minh Chau Date: Fri, 4 Oct 2019 12:59:16 +1000 mds: Add Nack message for MDS_TIPC_FCTRL_ENABLED [#3095] In the scenario of recovery from split-brain, where both active director services may suffer mds message loss due to lost-contact tipc link. If MDS_TIPC_FCTRL_ENABLED is set, the out-of-order message will be dropped, and there is no mechanism to trigger the retransmission from receiver side at this moment (the retransmission is only triggered from sender as result of TIPC_ERR_OVERLOAD). In reception of disordered message, the receiver can send not-acknowledgement to notify the sender for retransmission. Therefore, the sender can trigger retransmisison in the same way as receiving TIPC_ERR_OVERLOAD. This patch adds Nack message for retransmission of disordered message detected from receiver side, and adds a missing call to portid_map_mutex.unlock() in process_all_events(). Complete diffstat: -- src/mds/mds_c_api.c | 2 +- src/mds/mds_dt_common.c | 2 +- src/mds/mds_tipc_fctrl_intf.cc | 72 +--- src/mds/mds_tipc_fctrl_msg.cc| 35 ++- src/mds/mds_tipc_fctrl_msg.h | 22 src/mds/mds_tipc_fctrl_portid.cc | 42 --- src/mds/mds_tipc_fctrl_portid.h | 3 +- 7 files changed, 143 insertions(+), 35 deletions(-) Testing Commands: - *** LIST THE COMMAND LINE TOOLS/STEPS TO TEST YOUR CHANGES *** Testing, Expected Results: -- *** PASTE COMMAND OUTPUTS / TEST RESULTS *** Conditions of Submission: - *** HOW MANY DAYS BEFORE PUSHING, CONSENSUS ETC *** Arch Built StartedLinux distro --- mipsn n mips64 n n x86 n n x86_64 n n powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These have to be removed. ___ You have needlessly changed whitespace or added whitespace crimes like trailing spaces, or spaces before tabs. ___ You have mixed real technical changes with whitespace and other cosmetic code cleanup changes. These have to be separate commits. ___ You need to refactor your submission into logical chunks; there is too much content into a single commit. ___ You have extraneous garbage in your review (merge commits etc) ___ You have giant attachments which should never have been sent; Instead you should place your content in a public tree to be pulled. ___ You have too many commits attached to an e-mail; resend as threaded commits, or place in a public tree
Re: [devel] [PATCH 1/1] dtm: close unused log streams [#2642]
Hi Minh, I put that note in the usage of max-idle option. See below: + "--max-idle=NUM Set the maximum number of idle time to NUM\n" + " minutes. If a stream has not been used for\n" + " NUM minutes, the stream will be closed.\n" + " The default value is zero (disable the\n" + " clean-up job)\n", Regards, Vu On 10/1/19 5:38 PM, Minh Hon Chau wrote: Hi Vu, Ok, then the value '0' needs to be written somewhere (README?) for this special purpose I guess, to avoid a confusion later on. Thanks Minh On 1/10/19 8:27 pm, Nguyen Minh Vu wrote: Hi Minh, Thanks for your comment. When passing zero to max-idle, the server will disable 'close unused log streams' functionality. It may be useful when user has previously set max-idle to a specific value, and want to disable it later. If the range starts from 1, there is no chance to disable it. Regards, Vu On 10/1/19 5:17 PM, Minh Hon Chau wrote: Hi Vu, ack for minor comment. The range of --max-idle, I think, should be starting from 1, as the log_server ignores the tv_sec=0. From user's perspective, if allowing max-idle=0, the meaning seems that the stream must be constantly writing traces, or the stream will be deleted. Thanks Minh On 24/9/19 12:57 pm, Vu Minh Nguyen wrote: Providing a new option '--max-idle' to configure the maximum idle time of logtrace streams. If a stream has not been used for such time, logtrace server will close the stream from its database. This patch also corrects wrong indentation in osaflog.cc file. --- src/dtm/Makefile | 2 +- src/dtm/common/osaflog_protocol.h | 2 + src/dtm/tools/Makefile | 18 src/dtm/tools/osaflog.cc | 132 ++ src/dtm/transport/log_server.cc | 57 - src/dtm/transport/log_server.h | 7 +- src/dtm/transport/transportd.conf | 6 ++ 7 files changed, 168 insertions(+), 56 deletions(-) create mode 100644 src/dtm/tools/Makefile diff --git a/src/dtm/Makefile b/src/dtm/Makefile index 533b0f273..fb0221075 100644 --- a/src/dtm/Makefile +++ b/src/dtm/Makefile @@ -15,7 +15,7 @@ # all: - $(MAKE) -C ../.. bin/osafdtmd bin/osaftransportd + $(MAKE) -C ../.. bin/osafdtmd bin/osaftransportd bin/osaflog check: $(MAKE) -C ../.. bin/transport_test diff --git a/src/dtm/common/osaflog_protocol.h b/src/dtm/common/osaflog_protocol.h index 61e9f6f39..d35e5f345 100644 --- a/src/dtm/common/osaflog_protocol.h +++ b/src/dtm/common/osaflog_protocol.h @@ -27,6 +27,8 @@ namespace Osaflog { static constexpr const char* kServerSocketPath = PKGLOCALSTATEDIR "/osaf_log.sock"; +static constexpr const uint64_t kOneDayInMinute = 24*60; + struct __attribute__((__packed__)) ClientAddressConstantPrefix { sa_family_t family = AF_UNIX; char abstract = '\0'; diff --git a/src/dtm/tools/Makefile b/src/dtm/tools/Makefile new file mode 100644 index 0..8c48b70a5 --- /dev/null +++ b/src/dtm/tools/Makefile @@ -0,0 +1,18 @@ +# -*- OpenSAF -*- +# +# (C) Copyright 2019 The OpenSAF Foundation +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed +# under the GNU Lesser General Public License Version 2.1, February 1999. +# The complete license can be accessed from the following location: +# http://opensource.org/licenses/lgpl-license.php +# See the Copying file included with the OpenSAF distribution for full +# licensing terms. +# +# Author(s): Ericsson AB +# + +all: + $(MAKE) -C ../../.. bin/osaflog diff --git a/src/dtm/tools/osaflog.cc b/src/dtm/tools/osaflog.cc index 64be253e9..abbf0b164 100644 --- a/src/dtm/tools/osaflog.cc +++ b/src/dtm/tools/osaflog.cc @@ -47,6 +47,7 @@ namespace { void PrintUsage(const char* program_name); bool SendCommand(const std::string& command); bool MaxTraceFileSize(uint64_t max_file_size); +bool SetMaxIdleTime(uint64_t max_idle); bool NoOfBackupFiles(uint64_t number_of_backups); bool Flush(); base::UnixServerSocket* CreateSocket(); @@ -70,10 +71,12 @@ int main(int argc, char** argv) { {"print", no_argument, nullptr, 'p'}, {"delete", no_argument, nullptr, 'd'}, {"extract-trace", required_argument, 0, 'e'}, + {"max-idle", required_argument, 0, 'i'}, {0, 0, 0, 0}}; uint64_t max_file_size = 0; uint64_t max_backups = 0; + uint64_t max
Re: [devel] [PATCH 1/1] mds: Add Nack message for MDS_TIPC_FCTRL_ENABLED [#3095]
Hi Minh, Ack with minor comments. Thanks. Regards, Vu On 10/1/19 12:49 PM, Minh Chau wrote: In the scenario of recovery from split-brain, where both active director services may suffer mds message loss due to lost-contact tipc link. If MDS_TIPC_FCTRL_ENABLED is set, the out-of-order message will be dropped, and there is no mechanism to trigger the retransmission from receiver side at this moment (the retransmission is only triggered from sender as result of TIPC_ERR_OVERLOAD). In reception of disordered message, the receiver can send not-acknowledgement to notify the sender for retransmission. Therefore, the sender can trigger retransmisison in the same way as receiving TIPC_ERR_OVERLOAD. This patch adds Nack message for retransmission of disordered message detected from receiver side. --- src/mds/mds_c_api.c | 2 +- src/mds/mds_dt_common.c | 2 +- src/mds/mds_tipc_fctrl_intf.cc | 19 ++- src/mds/mds_tipc_fctrl_msg.cc| 33 + src/mds/mds_tipc_fctrl_msg.h | 22 ++ src/mds/mds_tipc_fctrl_portid.cc | 18 +- src/mds/mds_tipc_fctrl_portid.h | 1 + 7 files changed, 93 insertions(+), 4 deletions(-) diff --git a/src/mds/mds_c_api.c b/src/mds/mds_c_api.c index c41c8dd..132555b 100644 --- a/src/mds/mds_c_api.c +++ b/src/mds/mds_c_api.c @@ -4196,7 +4196,7 @@ void mds_mcm_msg_loss(MDS_SVC_HDL local_svc_hdl, MDS_DEST rem_adest, /* Check whether the msg loss is enabled or not */ if (true != local_svc_info->i_msg_loss_indication) { - m_MDS_LOG_INFO(" MSG loss not enbaled mds_mcm_msg_loss\n"); + m_MDS_LOG_NOTIFY("MSG loss is not enabled mds_mcm_msg_loss\n"); return; } diff --git a/src/mds/mds_dt_common.c b/src/mds/mds_dt_common.c index 66652af..de13883 100644 --- a/src/mds/mds_dt_common.c +++ b/src/mds/mds_dt_common.c @@ -972,7 +972,7 @@ uint32_t mds_tmr_mailbox_processing(void) .vdest_id); break; case MDS_REASSEMBLY_TMR: - m_MDS_LOG_DBG( + m_MDS_LOG_ERR( "MDTM: Tmr Mailbox Processing:Reassemble Tmr Hdl=0x%08x", mbx_evt_info->info.tmr_info_hdl); mdtm_process_reassem_timer_event( diff --git a/src/mds/mds_tipc_fctrl_intf.cc b/src/mds/mds_tipc_fctrl_intf.cc index 2366672..65f1849 100644 --- a/src/mds/mds_tipc_fctrl_intf.cc +++ b/src/mds/mds_tipc_fctrl_intf.cc @@ -38,6 +38,7 @@ using mds::Timer; using mds::DataMessage; using mds::ChunkAck; using mds::HeaderMessage; +using mds::Nack; namespace { // flow control enabled/disabled @@ -142,7 +143,8 @@ uint32_t process_flow_event(const Event& evt) { if (evt.type_ == Event::Type::kEvtSendChunkAck) { portid->SendChunkAck(evt.fseq_, evt.svc_id_, evt.chunk_size_); } -if (evt.type_ == Event::Type::kEvtDropData) { +if (evt.type_ == Event::Type::kEvtDropData || +evt.type_ == Event::Type::kEvtRcvNack) { portid->ReceiveNack(evt.mseq_, evt.mfrag_, evt.fseq_); } @@ -464,6 +466,21 @@ uint32_t mds_tipc_fctrl_rcv_data(uint8_t *buffer, uint16_t len, // skip this data msg return NCSCC_RC_FAILURE; } + if (header.msg_type_ == Nack::kNackMsgType) { +// receive nack message +Nack nack; +nack.Decode(buffer); +// send to the event thread +if (m_NCS_IPC_SEND(&mbx_events, +new Event(Event::Type::kEvtRcvNack, id, nack.svc_id_, +header.mseq_, header.mfrag_, nack.nacked_fseq_), +NCS_IPC_PRIORITY_HIGH) != NCSCC_RC_SUCCESS) { + m_MDS_LOG_ERR("FCTRL: Failed to send msg to mbx_events\n"); +} +// return NCSCC_RC_FAILURE, so the tipc receiving thread (legacy) will +// skip this data msg +return NCSCC_RC_FAILURE; + } } else { // receive data message DataMessage data; diff --git a/src/mds/mds_tipc_fctrl_msg.cc b/src/mds/mds_tipc_fctrl_msg.cc index 064d977..f85568c 100644 --- a/src/mds/mds_tipc_fctrl_msg.cc +++ b/src/mds/mds_tipc_fctrl_msg.cc @@ -139,4 +139,37 @@ void ChunkAck::Decode(uint8_t *msg) { chunk_size_ = ncs_decode_16bit(&ptr); } + +Nack::Nack(uint16_t svc_id, uint16_t fseq): +svc_id_(svc_id), nacked_fseq_(fseq) { + msg_type_ = kNackMsgType; +} + +void Nack::Encode(uint8_t *msg) { + uint8_t *ptr; + // encode protocol identifier + ptr = &msg[Nack::FieldIndex::kProtocolIdentifier]; + ncs_encode_32bit(&ptr, MDS_PROT_FCTRL_ID); + // encode message type + ptr = &msg[Nack::FieldIndex::kFlowControlMessageType]; + ncs_encode_8bit(&ptr, kNackMsgType); + // encode service id + ptr = &msg[Nack::FieldIndex::kServiceId]; + ncs_encode_16bit(&ptr, svc_id_); + // encode flow contr
Re: [devel] [PATCH 1/1] dtm: close unused log streams [#2642]
Hi Minh, Thanks for your comment. When passing zero to max-idle, the server will disable 'close unused log streams' functionality. It may be useful when user has previously set max-idle to a specific value, and want to disable it later. If the range starts from 1, there is no chance to disable it. Regards, Vu On 10/1/19 5:17 PM, Minh Hon Chau wrote: Hi Vu, ack for minor comment. The range of --max-idle, I think, should be starting from 1, as the log_server ignores the tv_sec=0. From user's perspective, if allowing max-idle=0, the meaning seems that the stream must be constantly writing traces, or the stream will be deleted. Thanks Minh On 24/9/19 12:57 pm, Vu Minh Nguyen wrote: Providing a new option '--max-idle' to configure the maximum idle time of logtrace streams. If a stream has not been used for such time, logtrace server will close the stream from its database. This patch also corrects wrong indentation in osaflog.cc file. --- src/dtm/Makefile | 2 +- src/dtm/common/osaflog_protocol.h | 2 + src/dtm/tools/Makefile | 18 src/dtm/tools/osaflog.cc | 132 ++ src/dtm/transport/log_server.cc | 57 - src/dtm/transport/log_server.h | 7 +- src/dtm/transport/transportd.conf | 6 ++ 7 files changed, 168 insertions(+), 56 deletions(-) create mode 100644 src/dtm/tools/Makefile diff --git a/src/dtm/Makefile b/src/dtm/Makefile index 533b0f273..fb0221075 100644 --- a/src/dtm/Makefile +++ b/src/dtm/Makefile @@ -15,7 +15,7 @@ # all: - $(MAKE) -C ../.. bin/osafdtmd bin/osaftransportd + $(MAKE) -C ../.. bin/osafdtmd bin/osaftransportd bin/osaflog check: $(MAKE) -C ../.. bin/transport_test diff --git a/src/dtm/common/osaflog_protocol.h b/src/dtm/common/osaflog_protocol.h index 61e9f6f39..d35e5f345 100644 --- a/src/dtm/common/osaflog_protocol.h +++ b/src/dtm/common/osaflog_protocol.h @@ -27,6 +27,8 @@ namespace Osaflog { static constexpr const char* kServerSocketPath = PKGLOCALSTATEDIR "/osaf_log.sock"; +static constexpr const uint64_t kOneDayInMinute = 24*60; + struct __attribute__((__packed__)) ClientAddressConstantPrefix { sa_family_t family = AF_UNIX; char abstract = '\0'; diff --git a/src/dtm/tools/Makefile b/src/dtm/tools/Makefile new file mode 100644 index 0..8c48b70a5 --- /dev/null +++ b/src/dtm/tools/Makefile @@ -0,0 +1,18 @@ +# -*- OpenSAF -*- +# +# (C) Copyright 2019 The OpenSAF Foundation +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed +# under the GNU Lesser General Public License Version 2.1, February 1999. +# The complete license can be accessed from the following location: +# http://opensource.org/licenses/lgpl-license.php +# See the Copying file included with the OpenSAF distribution for full +# licensing terms. +# +# Author(s): Ericsson AB +# + +all: + $(MAKE) -C ../../.. bin/osaflog diff --git a/src/dtm/tools/osaflog.cc b/src/dtm/tools/osaflog.cc index 64be253e9..abbf0b164 100644 --- a/src/dtm/tools/osaflog.cc +++ b/src/dtm/tools/osaflog.cc @@ -47,6 +47,7 @@ namespace { void PrintUsage(const char* program_name); bool SendCommand(const std::string& command); bool MaxTraceFileSize(uint64_t max_file_size); +bool SetMaxIdleTime(uint64_t max_idle); bool NoOfBackupFiles(uint64_t number_of_backups); bool Flush(); base::UnixServerSocket* CreateSocket(); @@ -70,10 +71,12 @@ int main(int argc, char** argv) { {"print", no_argument, nullptr, 'p'}, {"delete", no_argument, nullptr, 'd'}, {"extract-trace", required_argument, 0, 'e'}, + {"max-idle", required_argument, 0, 'i'}, {0, 0, 0, 0}}; uint64_t max_file_size = 0; uint64_t max_backups = 0; + uint64_t max_idle = 0; int option = 0; int long_index = 0; @@ -82,71 +85,81 @@ int main(int argc, char** argv) { bool delete_result = true; bool max_file_size_result = true; bool number_of_backups_result = true; + bool max_idle_result = true; bool flush_set = false; bool pretty_print_set = false; bool delete_set = false; bool max_file_size_set = false; bool max_backups_set = false; + bool max_idle_set = false; bool thread_trace = false; std::string input_core = ""; std::string output_trace = ""; if (argc == 1) { - PrintUsage(argv[0]); - exit(EXIT_FAILURE); + PrintUsage(argv[0]); + exit(EXIT_FAILURE); } while ((option = getopt_long(argc, argv, "m:b:p:f:e:", - long_options, &long_index)) != -1) { - switch (option) { - case 'p': - pretty_print_set = true; -
Re: [devel] [PATCH 1/1] dtm: correct no_of_log_streams_ when deleting a stream [#3085]
Hi Khanh, Ack from me. I will push this patch tomorrow on your behalf if no more comment from reviewers. Regards, Vu On 9/27/19 4:58 PM, khanh.h.dang wrote: If a stream is deleted, free the allocated memory for that stream and decrease the no_of_log_streams_. --- src/dtm/transport/log_server.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/dtm/transport/log_server.cc b/src/dtm/transport/log_server.cc index 43fa86a..2cff9e5 100644 --- a/src/dtm/transport/log_server.cc +++ b/src/dtm/transport/log_server.cc @@ -251,6 +251,8 @@ std::string LogServer::ExecuteCommand(const std::string& command, if (current_stream_ == stream) { current_stream_ = log_streams_.begin()->second; } +delete stream; +--no_of_log_streams_; return std::string{"!delete " + argument}; } else if (command == "?flush") { for (const auto& s : log_streams_) { ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] ntfd: Do not send response to client if client down [#3084]
Hi Thien, I have a few questions & comments below: 1) Use tab, not spaces to indent C source. 2) What if client down event comes to NTFS while it is handling the NTFSV_INITIALIZE_REQ msg type? 3) When will a down client be removed from the database client_down_list_head if the client has successfully done initialization ? Regards, Vu On 9/24/19 6:43 PM, thien.m.huynh wrote: Ntfd will not send response to a client when client already down. This will avoid timeout when ntfd send via mds. --- src/ntf/ntfd/NtfAdmin.cc | 26 src/ntf/ntfd/NtfAdmin.h | 1 + src/ntf/ntfd/ntfs_cb.h | 9 src/ntf/ntfd/ntfs_com.h | 1 + src/ntf/ntfd/ntfs_evt.c | 53 +++- 5 files changed, 89 insertions(+), 1 deletion(-) diff --git a/src/ntf/ntfd/NtfAdmin.cc b/src/ntf/ntfd/NtfAdmin.cc index 8bbee69..c7754b5 100644 --- a/src/ntf/ntfd/NtfAdmin.cc +++ b/src/ntf/ntfd/NtfAdmin.cc @@ -517,6 +517,27 @@ void NtfAdmin::clientRemoveMDS(MDS_DEST mds_dest) { } /** + * @brief Check clients are exists in clientMap + * @param mds_dest + * @return true/false. + */ +bool NtfAdmin::isClientExisted(MDS_DEST mds_dest) { + TRACE_ENTER2("mdsDest: %" PRIx64, mds_dest); + bool found = false; + auto it = clientMap.begin(); + while (it != clientMap.end()) { +NtfClient *client = it->second; +++it; +if (client->getMdsDest() == mds_dest) { + found = true; + break; +} + } + TRACE_LEAVE(); + return found; +} + +/** * Remove the clients that belong to the ntfa down at standby node. * * @param mds_dest @@ -1177,6 +1198,11 @@ void ClientsDownRemoved(MDS_DEST mds_dest) { NtfAdmin::theNtfAdmin->ClientsDownRemoved(mds_dest); } +bool isClientExisted(MDS_DEST mds_dest) { + osafassert(NtfAdmin::theNtfAdmin != NULL); + return (NtfAdmin::theNtfAdmin->isClientExisted(mds_dest)); +} + void SearchAndSetClientsDownFlag(MDS_DEST mds_dest) { osafassert(NtfAdmin::theNtfAdmin != NULL); NtfAdmin::theNtfAdmin->SearchAndSetClientsDownFlag(mds_dest); diff --git a/src/ntf/ntfd/NtfAdmin.h b/src/ntf/ntfd/NtfAdmin.h index 4808ca9..ff4b178 100644 --- a/src/ntf/ntfd/NtfAdmin.h +++ b/src/ntf/ntfd/NtfAdmin.h @@ -65,6 +65,7 @@ class NtfAdmin { void notificationLoggedConfirmed(SaNtfIdentifierT notificationId); void clientRemoved(unsigned int clientId); void clientRemoveMDS(MDS_DEST mds_dest); + bool isClientExisted(MDS_DEST mds_dest); void ClientsDownRemoved(MDS_DEST mds_dest); void SearchAndSetClientsDownFlag(MDS_DEST mds_dest); void subscriptionRemoved(unsigned int clientId, diff --git a/src/ntf/ntfd/ntfs_cb.h b/src/ntf/ntfd/ntfs_cb.h index 96eedc1..3b1d715 100644 --- a/src/ntf/ntfd/ntfs_cb.h +++ b/src/ntf/ntfd/ntfs_cb.h @@ -38,6 +38,11 @@ typedef struct { MDS_DEST mds_dest; } ntf_client_t; +typedef struct client_down_list { + MDS_DEST mds_dest; + struct client_down_list *next; +} CLIENT_DOWN_LIST; + typedef struct ntfs_cb { SYSF_MBX mbx; /* NTFS's mailbox */ MDS_HDL mds_hdl;/* PWE Handle for interacting with NTFAs */ @@ -71,6 +76,10 @@ typedef struct ntfs_cb { NCS_SEL_OBJ usr2_sel_obj; /* Selection object for CLM initialization.*/ uint16_t peer_mbcsv_version; /*Remeber peer NTFS MBCSV version.*/ bool clm_initialized;// For CLM init status; + CLIENT_DOWN_LIST + *client_down_list_head; /* client down reccords - fix for not respond to + client if client already downs*/ + bool client_finalized; } ntfs_cb_t; extern uint32_t ntfs_cb_init(ntfs_cb_t *); diff --git a/src/ntf/ntfd/ntfs_com.h b/src/ntf/ntfd/ntfs_com.h index b9e37da..884eae5 100644 --- a/src/ntf/ntfd/ntfs_com.h +++ b/src/ntf/ntfd/ntfs_com.h @@ -77,6 +77,7 @@ void notificationSentConfirmed(unsigned int clientId, void notificationLoggedConfirmed(SaNtfIdentifierT notificationId); void clientRemoved(unsigned int clientId); void clientRemoveMDS(MDS_DEST mds_dest); +bool isClientExisted(MDS_DEST mds_dest); void ClientsDownRemoved(MDS_DEST mds_dest); void SearchAndSetClientsDownFlag(MDS_DEST mds_dest); void subscriptionRemoved(unsigned int clientId, diff --git a/src/ntf/ntfd/ntfs_evt.c b/src/ntf/ntfd/ntfs_evt.c index 19b2f60..7b44cb2 100644 --- a/src/ntf/ntfd/ntfs_evt.c +++ b/src/ntf/ntfd/ntfs_evt.c @@ -108,8 +108,34 @@ static uint32_t proc_ntfa_updn_mds_msg(ntfsv_ntfs_evt_t *evt) if (ntfs_cb->ha_state == SA_AMF_HA_STANDBY) { ClientsDownRemoved(evt->fr_dest); } else { +if (ntfs_cb->client_finalized == false && +!isClientExisted(evt->fr_dest)) { +CLIENT_DOWN_LIST *client_down_rec = NULL; + +if ((client_down_rec = (CLIENT_DOWN_LIST *)malloc( + sizeof(CLIENT_DOWN_LIST))) == NULL) { +LOG_ER("memory allocation for the CLIENT_DOWN_LIST failed"); +
Re: [devel] [PATCH 0/9] Review Request for mds: Add solution for TIPC buffer overflow [#1960]
Thanks Minh. I have no more comments. Regards, Vu On 9/23/19 7:48 AM, Minh Hon Chau wrote: Hi all, Below is the patch #10 that updates most of comments, it applies on top of current patch #9. This patch #10 does not use the shared_ptr and base:Mutex as comments given by Gary and Vu, the reason is that it will cause a similar problem reported in #2860 (user call exit() without properly doing mds shutdown), unless those variables are allocated on the heap. I would like to push the #1960 patches today if we don't have any more comments. There are some other incremental improvements/fixes that will be addressed in other tickets. Thanks Minh --- src/mds/README | 2 +- src/mds/mds_dt_tipc.c | 28 - src/mds/mds_tipc_fctrl_intf.cc | 67 ++-- src/mds/mds_tipc_fctrl_intf.h | 2 +- src/mds/mds_tipc_fctrl_msg.cc | 44 +- src/mds/mds_tipc_fctrl_msg.h | 22 +++-- src/mds/mds_tipc_fctrl_portid.cc | 46 --- 7 files changed, 137 insertions(+), 74 deletions(-) diff --git a/src/mds/README b/src/mds/README index 1b94632..0819bdc 100644 --- a/src/mds/README +++ b/src/mds/README @@ -182,7 +182,7 @@ TIPC portid state machine and its transition kDisabled, // no flow control support at this state kStartup, // a newly published portid starts at this state -kTxProb, // txprob timer is running to confirm if the flow control is supported +kTxProb, // tx probation timer is running to confirm if the flow control is supported kEnabled // flow control support is confirmed, data flow is controlled kRcvBuffOverflow // anticipating (or experienced) the receiver's buffer overflow diff --git a/src/mds/mds_dt_tipc.c b/src/mds/mds_dt_tipc.c index 1b6c3f8..e7a7b48 100644 --- a/src/mds/mds_dt_tipc.c +++ b/src/mds/mds_dt_tipc.c @@ -247,6 +247,7 @@ uint32_t mdtm_tipc_init(NODE_ID nodeid, uint32_t *mds_tipc_ref) if (!get_tipc_port_id(tipc_cb.BSRsock, &port_id)) { close(tipc_cb.Dsock); close(tipc_cb.BSRsock); + *mds_tipc_ref = 0; return NCSCC_RC_FAILURE; } *mds_tipc_ref = port_id.ref; @@ -330,7 +331,7 @@ uint32_t mdtm_tipc_init(NODE_ID nodeid, uint32_t *mds_tipc_ref) } /* Get tipc socket receive buffer size */ - int optval; + int optval = 0; socklen_t optlen = sizeof(optval); if (getsockopt(tipc_cb.BSRsock, SOL_SOCKET, SO_RCVBUF, &optval, &optlen) != 0) { @@ -350,12 +351,25 @@ uint32_t mdtm_tipc_init(NODE_ID nodeid, uint32_t *mds_tipc_ref) int acksize = -1; if ((ptr = getenv("MDS_TIPC_FCTRL_ACKTIMEOUT")) != NULL) { ackto = atoi(ptr); + if (ackto == 0) { + syslog(LOG_ERR, "MDTM:TIPC Invalid " + "MDS_TIPC_FCTRL_ACKTIMEOUT, using default value"); + ackto = -1; + } } if ((ptr = getenv("MDS_TIPC_FCTRL_ACKSIZE")) != NULL) { acksize = atoi(ptr); + if (acksize == 0) { + syslog(LOG_ERR, "MDTM:TIPC Invalid " + "MDS_TIPC_FCTRL_ACKSIZE, using default value"); + acksize = -1; + } } - mds_tipc_fctrl_initialize(tipc_cb.BSRsock, port_id, (uint64_t)optval, + mds_tipc_fctrl_initialize(tipc_cb.BSRsock, port_id, optval, ackto, acksize, tipc_mcast_enabled); + } else { + syslog(LOG_ERR, "MDTM:TIPC Invalid value of" + "MDS_TIPC_FCTRL_ENABLED"); } } @@ -366,6 +380,7 @@ uint32_t mdtm_tipc_init(NODE_ID nodeid, uint32_t *mds_tipc_ref) close(tipc_cb.Dsock); close(tipc_cb.BSRsock); m_NCS_IPC_RELEASE(&tipc_cb.tmr_mbx, NULL); + mds_tipc_fctrl_shutdown(); return NCSCC_RC_FAILURE; } @@ -2528,7 +2543,7 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req) */ uint32_t status = 0; uint32_t sum_mds_hdr_plus_mdtm_hdr_plus_len; - uint16_t fctrl_seq_num = 0; + uint16_t fctrl_seq_num = 0; int version = req->msg_arch_word & 0x7; if (version > 1) { sum_mds_hdr_plus_mdtm_hdr_plus_len = @@ -2618,7 +2633,7 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req) return NCSCC_RC_FAILURE; } /* if sndqueue is capable, then obtain the current sending seq */ - if (mds_tipc_fctrl_sndqueue_capable(tipc_id, len, &fctrl_seq_num) + if (mds_tipc_fctrl_sndqueue_capable(tipc_id, &fctrl_seq_num) == NCSCC_RC_FAILURE){ m_MDS_LOG_ERR("FCTRL: Failed to send message len :%d", len); return NCSCC_RC_FAILURE; @@ -2717,10 +2732,10 @@ uint32_t mds_mdtm_send_tipc(MDTM_SEND_REQ *req) } /* if sndqueue is capable,
Re: [devel] [PATCH 8/9] mds: Apply serial number arithmetic for sequence counter [#1960]
Hi Minh, I have a minor comment below. Regards, Vu On 8/14/19 1:38 PM, Minh Chau wrote: This patch applies the serial number arithmetic for the flow control sequence number, referenced to RFC1982. This is only temporary patch, a proper one could be made in /base with template for others type, e.g uint32. Then mds reuses it from /base. --- src/mds/mds_tipc_fctrl_portid.cc | 53 +-- src/mds/mds_tipc_fctrl_portid.h | 77 2 files changed, 97 insertions(+), 33 deletions(-) diff --git a/src/mds/mds_tipc_fctrl_portid.cc b/src/mds/mds_tipc_fctrl_portid.cc index e762290..365d72f 100644 --- a/src/mds/mds_tipc_fctrl_portid.cc +++ b/src/mds/mds_tipc_fctrl_portid.cc @@ -66,12 +66,12 @@ DataMessage* MessageQueue::Find(uint32_t mseq, uint16_t mfrag) { return nullptr; } -uint64_t MessageQueue::Erase(uint16_t fseq_from, uint16_t fseq_to) { +uint64_t MessageQueue::Erase(Seq16 fseq_from, Seq16 fseq_to) { uint64_t msg_len = 0; for (auto it = queue_.begin(); it != queue_.end();) { DataMessage *m = *it; -if (fseq_from <= m->header_.fseq_ && -m->header_.fseq_ <= fseq_to) { +if (fseq_from <= Seq16(m->header_.fseq_) && +Seq16(m->header_.fseq_) <= fseq_to) { msg_len += m->header_.msg_len_; it = queue_.erase(it); delete m; @@ -92,10 +92,10 @@ DataMessage* MessageQueue::FirstUnsent() { return nullptr; } -void MessageQueue::MarkUnsentFrom(uint16_t fseq) { +void MessageQueue::MarkUnsentFrom(Seq16 fseq) { for (auto it = queue_.begin(); it != queue_.end(); ++it) { DataMessage *m = *it; -if (m->header_.fseq_ >= fseq) m->is_sent_ = false; +if (Seq16(m->header_.fseq_) >= fseq) m->is_sent_ = false; } } @@ -140,7 +140,7 @@ void TipcPortId::FlushData() { "sndwnd[acked:%u, send:%u, nacked:%" PRIu64 "]", id_.node, id_.ref, msg->header_.mseq_, msg->header_.mfrag_, msg->header_.fseq_, - sndwnd_.acked_, sndwnd_.send_, sndwnd_.nacked_space_); + sndwnd_.acked_.v(), sndwnd_.send_.v(), sndwnd_.nacked_space_); } } while (msg != nullptr); } @@ -185,7 +185,7 @@ uint32_t TipcPortId::Queue(const uint8_t* data, uint16_t length, "sndwnd[acked:%u, send:%u, nacked:%" PRIu64 "]", id_.node, id_.ref, msg->header_.mseq_, msg->header_.mfrag_, msg->header_.fseq_, length, -sndwnd_.acked_, sndwnd_.send_, sndwnd_.nacked_space_); +sndwnd_.acked_.v(), sndwnd_.send_.v(), sndwnd_.nacked_space_); } else { ++sndwnd_.send_; m_MDS_LOG_DBG("FCTRL: [me] --> [node:%x, ref:%u], " @@ -193,7 +193,7 @@ uint32_t TipcPortId::Queue(const uint8_t* data, uint16_t length, "sndwnd[acked:%u, send:%u, nacked:%" PRIu64 "]", id_.node, id_.ref, msg->header_.mseq_, msg->header_.mfrag_, msg->header_.fseq_, length, -sndwnd_.acked_, sndwnd_.send_, sndwnd_.nacked_space_); +sndwnd_.acked_.v(), sndwnd_.send_.v(), sndwnd_.nacked_space_); } return rc; } @@ -248,13 +248,13 @@ uint32_t TipcPortId::ReceiveData(uint32_t mseq, uint16_t mfrag, txprob_cnt_, (uint8_t)state_); } // update receiver sequence window - if (rcvwnd_.acked_ < fseq && rcvwnd_.rcv_ + 1 == fseq) { + if (rcvwnd_.acked_ < Seq16(fseq) && rcvwnd_.rcv_ + Seq16(1) == Seq16(fseq)) { m_MDS_LOG_DBG("FCTRL: [me] <-- [node:%x, ref:%u], " "RcvData[mseq:%u, mfrag:%u, fseq:%u], " "rcvwnd[acked:%u, rcv:%u, nacked:%" PRIu64 "]", id_.node, id_.ref, mseq, mfrag, fseq, -rcvwnd_.acked_, rcvwnd_.rcv_, rcvwnd_.nacked_space_); +rcvwnd_.acked_.v(), rcvwnd_.rcv_.v(), rcvwnd_.nacked_space_); ++rcvwnd_.rcv_; if (rcvwnd_.rcv_ - rcvwnd_.acked_ >= chunk_size_) { @@ -279,7 +279,7 @@ uint32_t TipcPortId::ReceiveData(uint32_t mseq, uint16_t mfrag, // It is not used for now, so ignore it. // check for transmission error -if (rcvwnd_.rcv_ + 1 < fseq) { +if (rcvwnd_.rcv_ + Seq16(1) < Seq16(fseq)) { if (rcvwnd_.rcv_ == 0 && rcvwnd_.acked_ == 0) { // peer does not realize that this portid reset m_MDS_LOG_ERR("FCTRL: [me] <-- [node:%x, ref:%u], " @@ -288,7 +288,7 @@ uint32_t TipcPortId::ReceiveData(uint32_t mseq, uint16_t mfrag, "Warning[portid reset]", id_.node, id_.ref, mseq, mfrag, fseq, -rcvwnd_.acked_, rcvwnd_.rcv_, rcvwnd_.nacked_space_); +rcvwnd_.acked_.v(), rcvwnd_.rcv_.v(), rcvwnd_.nacked_space_); rcvwnd_.rcv_ = fseq; } else { @@ -300,10 +300,10 @@ uint32_t TipcPortId::ReceiveData(uint32_t mseq, uint16_t mfrag, "Error[msg loss]", id_.node, id_.ref, mseq, mfrag, fseq, -rcvwnd_.acked_, rcvwnd_.rcv_, rcvwnd_.nacked_space_); +rcvwnd_.acked_.v(), rcvwnd_.rcv_.v(), rcvwnd_.nacked_space_); } }
Re: [devel] [PATCH 1/9] mds: Add README for solution of TIPC buffer overflow at MDS [#1960]
Hi Minh, I have just finished my review to your MDS patches, and I have a question: With 2N services, suppose the active is having TIPC overloaded issue; it will do some memory allocations, and probably starting a timer there too. Then, what happens if that active service is changed to the standby role? Shall allocated memory/timer be freed up and is there any impact on the subsequent messages sent to the new active? Regards, Vu On 8/14/19 1:38 PM, Minh Chau wrote: --- src/mds/README | 221 + 1 file changed, 221 insertions(+) create mode 100644 src/mds/README diff --git a/src/mds/README b/src/mds/README new file mode 100644 index 000..1b94632 --- /dev/null +++ b/src/mds/README @@ -0,0 +1,221 @@ +/* -*- OpenSAF -*- + * + * (C) Copyright 2019 The OpenSAF Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed + * under the GNU Lesser General Public License Version 2.1, February 1999. + * The complete license can be accessed from the following location: + * http://opensource.org/licenses/lgpl-license.php + * See the Copying file included with the OpenSAF distribution for full + * licensing terms. + * + * Author(s): Ericsson AB + * + */ +Background +== +If OpenSAF configures TIPC as transport, the MDS library today will use +TIPC SOCK_RDM socket for message distribution in the cluster. The SOCK_RDM +datagram socket possibly encounters buffer overflow at receiver ends which +has been documented in tipc.io[1]. A temporary solution for this buffer +overflow issue is that the socket buffer size can be increased to a larger +number. However, if the cluster continues either scaling out or adding more +components, the system will be under dimensioned, thus the TIPC buffer +overflow can occur again. + +MDS's solution for TIPC buffer overflow +=== +If MDS disables TIPC_DEST_DROPPABLE, TIPC will return the ancillary message +when the original message is failed to deliver. By this event, if the message +has been saved in queue, MDS at sender sides can search and retransmit this +message to the receivers. +Once the messages in the sender's queue has been delivered successfully, MDS +needs to remove them. MDS introduces its internal ACK message as an +acknowledgment from receivers so that the senders can remove the messages +out of the queue. +Also, as such situation of buffer overflow at receivers, the retransmission may +not succeed or even become worse at receiver ends (the more retransmission, +the more overflow to occur). MDS imitates the sliding window in TCP[2] to +control the flow of data message towards the receivers. + +Legacy MDS data message, new (data + ACK) MDS message, and upgradability + +Below is the MDS legacy message format that has been used till OpenSAF 5.19.07 + +oct 0 message length +oct 1 +-- +oct 2 sequence number: incremented for every message sent out to all destined +... tipc portid. +oct 5 +-- +oct 6 fragment number: a message with same sequence number can be fragmented, +oct 7 identified by this fragment number. +-- +oct 8 length check: cross check with message length(oct0,1), NOT USED. +oct 9 +-- +oct 10 protocol version: (MDS_PROT:0xA0 | MDS_VERSION:0x08) = 0xA8, NOT USED +-- +oct 11 mds length: length of mds header and mds data, starting from oct13 +oct 12 +-- +oct 13 mds header and data +... +-- + +The current sequence number/fragment number are being used in MDS for all +messages sent to all discovered tipc portid(s), meaning that every message is sent +to any tipc portid, the sequence/fragment number is increased. The flow control +needs its own sequence number sliding between two tipc porid(s) so that receivers +can detect message drop due to buffer overload. Therefore, the oct8 and oct9 are +now reused as flow control sequence number. The oct10, protocol version, has new +value of 0xB8. The format of new data message as below: + +oct 0 same +... +oct 7 +-- +oct 8 flow control sequence number +oct 9 +-- +oct 10 protocol version: (MDS_PROT_TIPC_FCTRL:0xB0 | MDS_VERSION:0x08) = 0xB8 +-- +oct 11 same +... +-- + +The ACK message is introduced to acknowledge one data message or a chunk of +accumulative data message. The ACK message format: + +oct 0 message length +
Re: [devel] [PATCH 6/9] mds: Implement kRcvBuffOverflow state [#1960]
Hi Minh, I has few comments below. Regards, Vu On 8/14/19 1:38 PM, Minh Chau wrote: This patch implements the kRcvBuffOverflow state machine as described in README file. --- src/mds/mds_tipc_fctrl_intf.cc | 6 +- src/mds/mds_tipc_fctrl_msg.h | 1 + src/mds/mds_tipc_fctrl_portid.cc | 137 ++- src/mds/mds_tipc_fctrl_portid.h | 5 +- 4 files changed, 131 insertions(+), 18 deletions(-) diff --git a/src/mds/mds_tipc_fctrl_intf.cc b/src/mds/mds_tipc_fctrl_intf.cc index c2d0922..397114e 100644 --- a/src/mds/mds_tipc_fctrl_intf.cc +++ b/src/mds/mds_tipc_fctrl_intf.cc @@ -285,14 +285,16 @@ uint32_t mds_tipc_fctrl_trysend(const uint8_t *buffer, uint16_t len, rc = NCSCC_RC_FAILURE; } else { if (portid->state_ != TipcPortId::State::kDisabled) { - portid->Queue(buffer, len); + bool sendable = portid->ReceiveCapable(len); + portid->Queue(buffer, len, sendable); // start txprob timer for the first msg sent out // do not start for other states - if (portid->state_ == TipcPortId::State::kStartup) { + if (sendable && portid->state_ == TipcPortId::State::kStartup) { txprob_timer.Start(kBaseTimerInt, tmr_exp_cbk); m_MDS_LOG_DBG("FCTRL: Start txprob"); portid->state_ = TipcPortId::State::kTxProb; } + if (sendable == false) rc = NCSCC_RC_FAILURE; } } diff --git a/src/mds/mds_tipc_fctrl_msg.h b/src/mds/mds_tipc_fctrl_msg.h index 69f8048..e6b9662 100644 --- a/src/mds/mds_tipc_fctrl_msg.h +++ b/src/mds/mds_tipc_fctrl_msg.h @@ -110,6 +110,7 @@ class DataMessage: public BaseMessage { uint8_t* msg_data_{nullptr}; uint8_t snd_type_{0}; + bool is_sent_{true}; DataMessage() {} virtual ~DataMessage(); void Decode(uint8_t *msg) override; diff --git a/src/mds/mds_tipc_fctrl_portid.cc b/src/mds/mds_tipc_fctrl_portid.cc index 84ecee9..e762290 100644 --- a/src/mds/mds_tipc_fctrl_portid.cc +++ b/src/mds/mds_tipc_fctrl_portid.cc @@ -82,6 +82,23 @@ uint64_t MessageQueue::Erase(uint16_t fseq_from, uint16_t fseq_to) { return msg_len; } +DataMessage* MessageQueue::FirstUnsent() { + for (auto it = queue_.begin(); it != queue_.end(); ++it) { [Vu] Use the shorter version `for (const auto& it : queue_) +DataMessage *m = *it; +if (m->is_sent_ == false) { + return m; +} + } + return nullptr; +} + +void MessageQueue::MarkUnsentFrom(uint16_t fseq) { + for (auto it = queue_.begin(); it != queue_.end(); ++it) { [Vu] as above comment +DataMessage *m = *it; +if (m->header_.fseq_ >= fseq) m->is_sent_ = false; + } +} + void MessageQueue::Clear() { while (queue_.empty() == false) { DataMessage* msg = queue_.front(); @@ -99,7 +116,8 @@ TipcPortId::TipcPortId(struct tipc_portid id, int sock, uint16_t chksize, TipcPortId::~TipcPortId() { // Fake a TmrChunkAck event to ack all received messages ReceiveTmrChunkAck(); - // clear all msg in sndqueue_ + // flush all unsent msg in sndqueue_ + FlushData(); sndqueue_.Clear(); [Vu] If sndqueue_.Clear() must be called every time calling `FlushData`, should move `Clear()` into FlushData() ? } @@ -109,6 +127,24 @@ uint64_t TipcPortId::GetUniqueId(struct tipc_portid id) { return uid; } +void TipcPortId::FlushData() { + DataMessage* msg = nullptr; + do { +// find the lowest sequence unsent yet +msg = sndqueue_.FirstUnsent(); +if (msg != nullptr) { + Send(msg->msg_data_, msg->header_.msg_len_); + msg->is_sent_ = true; + m_MDS_LOG_DBG("FCTRL: [me] --> [node:%x, ref:%u], " + "FlushData[mseq:%u, mfrag:%u, fseq:%u], " + "sndwnd[acked:%u, send:%u, nacked:%" PRIu64 "]", + id_.node, id_.ref, + msg->header_.mseq_, msg->header_.mfrag_, msg->header_.fseq_, + sndwnd_.acked_, sndwnd_.send_, sndwnd_.nacked_space_); +} + } while (msg != nullptr); +} + uint32_t TipcPortId::Send(uint8_t* data, uint16_t length) { struct sockaddr_tipc server_addr; ssize_t send_len = 0; @@ -130,29 +166,49 @@ uint32_t TipcPortId::Send(uint8_t* data, uint16_t length) { return rc; } -uint32_t TipcPortId::Queue(const uint8_t* data, uint16_t length) { +uint32_t TipcPortId::Queue(const uint8_t* data, uint16_t length, +bool is_sent) { uint32_t rc = NCSCC_RC_SUCCESS; DataMessage *msg = new DataMessage; msg->header_.Decode(const_cast(data)); msg->Decode(const_cast(data)); msg->msg_data_ = new uint8_t[length]; + msg->is_sent_ = is_sent; memcpy(msg->msg_data_, data, length); sndqueue_.Queue(msg); - ++sndwnd_.send_; - sndwnd_.nacked_space_ += length; - m_MDS_LOG_DBG("FCTRL: [me] --> [node:%x, ref:%u], " - "SndData[mseq:%u, mfrag:%u, fseq:%u, len:%u], " - "sndwnd[acked:%u, send:%u, nacked:%" PRIu64 "]", - id_.node, id_.ref, - msg->header_.mseq_, msg->header_.mfrag_, msg->header_.fseq_, length, - sndwnd_.acked_, sndwnd_.send_, sn
Re: [devel] [PATCH 3/9] mds: Add implementation for TIPC buffer overflow solution [#1960]
Hi Minh, See my responses to your comments below, started with [Vu2]. Regards, Vu On 9/16/19 1:06 PM, Minh Hon Chau wrote: Hi Vu, Several comments with [M] too :). Thanks Minh On 16/9/19 2:24 pm, Nguyen Minh Vu wrote: Hi Minh, I have several comments below, started with [Vu]. Regards, Vu On 8/14/19 1:01 PM, Minh Chau wrote: This is a collaborative patch of two participants: - Tran Thuan - Minh Chau Main changes: - Add mds_tipc_fctrl_intf.h, mds_tipc_fctrl_intf.cc: These two files introduce new functions which are called in mds_dt_tipc.c if the flow control is enabled - Add mds_tipc_fctrl_portid.h, mds_tipc_fctrl_portid.cc: These files implements the tipc portid instance, which supports the sliding window, mds msg queue - Add mds_tipc_fctrl_msg.h, mds_tipc_fctrl_msg.cc: These files define the event and messages which are used for this solution. --- src/mds/Makefile.am | 10 +- src/mds/mds_dt.h | 8 +- src/mds/mds_dt_tipc.c | 188 +--- src/mds/mds_tipc_fctrl_intf.cc | 376 +++ src/mds/mds_tipc_fctrl_intf.h | 47 + src/mds/mds_tipc_fctrl_msg.cc | 142 +++ src/mds/mds_tipc_fctrl_msg.h | 129 ++ src/mds/mds_tipc_fctrl_portid.cc | 261 +++ src/mds/mds_tipc_fctrl_portid.h | 87 + 9 files changed, 1184 insertions(+), 64 deletions(-) create mode 100644 src/mds/mds_tipc_fctrl_intf.cc create mode 100644 src/mds/mds_tipc_fctrl_intf.h create mode 100644 src/mds/mds_tipc_fctrl_msg.cc create mode 100644 src/mds/mds_tipc_fctrl_msg.h create mode 100644 src/mds/mds_tipc_fctrl_portid.cc create mode 100644 src/mds/mds_tipc_fctrl_portid.h diff --git a/src/mds/Makefile.am b/src/mds/Makefile.am index 2d7b652..d849e8f 100644 --- a/src/mds/Makefile.am +++ b/src/mds/Makefile.am @@ -48,10 +48,16 @@ lib_libopensaf_core_la_SOURCES += \ if ENABLE_TIPC_TRANSPORT noinst_HEADERS += src/mds/mds_dt_tipc.h \ src/mds/mds_tipc_recvq_stats.h \ - src/mds/mds_tipc_recvq_stats_impl.h + src/mds/mds_tipc_recvq_stats_impl.h \ + src/mds/mds_tipc_fctrl_intf.h \ + src/mds/mds_tipc_fctrl_portid.h \ + src/mds/mds_tipc_fctrl_msg.h lib_libopensaf_core_la_SOURCES += src/mds/mds_dt_tipc.c \ src/mds/mds_tipc_recvq_stats.cc \ - src/mds/mds_tipc_recvq_stats_impl.cc + src/mds/mds_tipc_recvq_stats_impl.cc \ + src/mds/mds_tipc_fctrl_intf.cc \ + src/mds/mds_tipc_fctrl_portid.cc \ + src/mds/mds_tipc_fctrl_msg.cc endif if ENABLE_TESTS diff --git a/src/mds/mds_dt.h b/src/mds/mds_dt.h index b645bb4..d9e8633 100644 --- a/src/mds/mds_dt.h +++ b/src/mds/mds_dt.h @@ -162,7 +162,7 @@ uint32_t mdtm_del_from_ref_tbl(MDS_SUBTN_REF_VAL ref); uint32_t mds_tmr_mailbox_processing(void); uint32_t mdtm_get_from_ref_tbl(MDS_SUBTN_REF_VAL ref, MDS_SVC_HDL *svc_hdl); uint32_t mdtm_add_frag_hdr(uint8_t *buf_ptr, uint16_t len, uint32_t seq_num, - uint16_t frag_byte); + uint16_t frag_byte, uint16_t fctrl_seq_num); uint32_t mdtm_free_reassem_msg_mem(MDS_ENCODED_MSG *msg); uint32_t mdtm_process_recv_data(uint8_t *buf, uint16_t len, uint64_t tipc_id, uint32_t *buff_dump); @@ -240,9 +240,13 @@ bool mdtm_mailbox_mbx_cleanup(NCSCONTEXT arg, NCSCONTEXT msg); #define MDS_PROT 0xA0 #define MDS_VERSION 0x08 -#define MDS_PROT_VER_MASK (MDS_PROT | MDS_VERSION) +#define MDS_PROT_VER_MASK 0xFC #define MDTM_PRI_MASK 0x3 +/* MDS protocol/version for flow control */ +#define MDS_PROT_FCTRL (0xB0 | MDS_VERSION) +#define MDS_PROT_FCTRL_ID 0x00AC13F5 + /* Added for the subscription changes */ #define MDS_NCS_CHASSIS_ID (m_NCS_GET_NODE_ID & 0x00ff) #define MDS_TIPC_COMMON_ID 0x01001000 diff --git a/src/mds/mds_dt_tipc.c b/src/mds/mds_dt_tipc.c index 86b52bb..fef1c50 100644 --- a/src/mds/mds_dt_tipc.c +++ b/src/mds/mds_dt_tipc.c @@ -47,6 +47,7 @@ #include "mds_dt_tipc.h" #include "mds_dt_tcp_disc.h" #include "mds_core.h" +#include "mds_tipc_fctrl_intf.h" #include "mds_tipc_recvq_stats.h" #include "base/osaf_utility.h" #include "base/osaf_poll.h" @@ -165,20 +166,22 @@ NCS_PATRICIA_TREE mdtm_reassembly_list; uint32_t mdtm_global_frag_num; const unsigned int MAX_RECV_THRESHOLD = 30; +uint8_t gl_mds_pro_ver = MDS_PROT | MDS_VERSION; -static bool get_tipc_port_id(int sock, uint32_t* port_id) { +static bool get_tipc_port_id(int sock, struct tipc_portid* port_id) { struct sockaddr_tipc addr; socklen_t sz = sizeof(addr); memset(&addr, 0, sizeof(addr)); - *port_id = 0; + port_id->node = 0; + port_id->ref = 0; if (0 > getsockname(sock, (struct sockaddr *)&addr, &sz)) { syslog(LOG_ERR, "MDTM:TIPC Failed to get socket name, err: %s", strerror(
Re: [devel] [PATCH 5/9] mds: Add state machine for tipc portid instance [#1960]
Hi Minh, I has few comments below. Regards, Vu On 8/14/19 1:38 PM, Minh Chau wrote: This patch adds state machine to support tx probation timer. --- src/mds/mds_tipc_fctrl_intf.cc | 47 +++-- src/mds/mds_tipc_fctrl_msg.h | 1 + src/mds/mds_tipc_fctrl_portid.cc | 109 +++ src/mds/mds_tipc_fctrl_portid.h | 22 4 files changed, 176 insertions(+), 3 deletions(-) diff --git a/src/mds/mds_tipc_fctrl_intf.cc b/src/mds/mds_tipc_fctrl_intf.cc index bd0a8f6..c2d0922 100644 --- a/src/mds/mds_tipc_fctrl_intf.cc +++ b/src/mds/mds_tipc_fctrl_intf.cc @@ -34,6 +34,7 @@ using mds::Event; using mds::TipcPortId; +using mds::Timer; using mds::DataMessage; using mds::ChunkAck; using mds::HeaderMessage; @@ -65,6 +66,11 @@ uint64_t sock_buf_size = 0; std::map portid_map; std::mutex portid_map_mutex; +// probation timer event to enable flow control at receivers +const int64_t kBaseTimerInt = 200; // in centisecond +const uint8_t kTxProbMaxRetries = 10; +Timer txprob_timer(Event::Type::kEvtTmrTxProb); + // chunk ack parameters // todo: The chunk ack timeout and chunk ack size should be configurable int kChunkAckTimeout = 1000; // in miliseconds @@ -76,13 +82,37 @@ TipcPortId* portid_lookup(struct tipc_portid id) { return portid_map[uid]; } +void tmr_exp_cbk(void* uarg) { + Timer* timer = reinterpret_cast(uarg); + if (timer != nullptr) { +timer->is_active_ = false; +// send to fctrl thread +if (m_NCS_IPC_SEND(&mbx_events, new Event(timer->type_), +NCS_IPC_PRIORITY_HIGH) != NCSCC_RC_SUCCESS) { + m_MDS_LOG_ERR("FCTRL: Failed to send msg to mbx_events\n"); +} + } +} + void process_timer_event(const Event evt) { + bool txprob_restart = false; for (auto i : portid_map) { TipcPortId* portid = i.second; + +if (evt.type_ == Event::Type::kEvtTmrTxProb) { + if (portid->ReceiveTmrTxProb(kTxProbMaxRetries) == true) { +txprob_restart = true; + } +} + if (evt.type_ == Event::Type::kEvtTmrChunkAck) { portid->ReceiveTmrChunkAck(); } } + if (txprob_restart) { +txprob_timer.Start(kBaseTimerInt, tmr_exp_cbk); +m_MDS_LOG_DBG("FCTRL: Restart txprob"); + } } uint32_t process_flow_event(const Event evt) { @@ -231,8 +261,10 @@ uint32_t mds_tipc_fctrl_sndqueue_capable(struct tipc_portid id, uint16_t len, id.node, id.ref, __LINE__); rc = NCSCC_RC_FAILURE; } else { -// assign the sequence number of the outgoing message -*next_seq = portid->GetCurrentSeq(); +if (portid->state_ != TipcPortId::State::kDisabled) { + // assign the sequence number of the outgoing message + *next_seq = portid->GetCurrentSeq(); +} } portid_map_mutex.unlock(); @@ -252,7 +284,16 @@ uint32_t mds_tipc_fctrl_trysend(const uint8_t *buffer, uint16_t len, id.node, id.ref, __LINE__); rc = NCSCC_RC_FAILURE; } else { -portid->Queue(buffer, len); +if (portid->state_ != TipcPortId::State::kDisabled) { + portid->Queue(buffer, len); + // start txprob timer for the first msg sent out + // do not start for other states + if (portid->state_ == TipcPortId::State::kStartup) { +txprob_timer.Start(kBaseTimerInt, tmr_exp_cbk); +m_MDS_LOG_DBG("FCTRL: Start txprob"); +portid->state_ = TipcPortId::State::kTxProb; + } +} } portid_map_mutex.unlock(); diff --git a/src/mds/mds_tipc_fctrl_msg.h b/src/mds/mds_tipc_fctrl_msg.h index 8e6a874..69f8048 100644 --- a/src/mds/mds_tipc_fctrl_msg.h +++ b/src/mds/mds_tipc_fctrl_msg.h @@ -45,6 +45,7 @@ class Event { kEvtDropData, // event reported from tipc that a message is not // delivered kEvtTmrAll, +kEvtTmrTxProb,// event that tx probation timer expired for once kEvtTmrChunkAck, // event to send the chunk ack }; NCS_IPC_MSG next_{0}; diff --git a/src/mds/mds_tipc_fctrl_portid.cc b/src/mds/mds_tipc_fctrl_portid.cc index 64115d5..84ecee9 100644 --- a/src/mds/mds_tipc_fctrl_portid.cc +++ b/src/mds/mds_tipc_fctrl_portid.cc @@ -23,6 +23,35 @@ namespace mds { +Timer::Timer(Event::Type type) { + tmr_id_ = nullptr; + type_ = type; + is_active_ = false; +} + +Timer::~Timer() { [Vu] Is it required to stop the timer here if it still in active? +} + +void Timer::Start(int64_t period, void (*tmr_exp_func)(void*)) { + // timer will not start if it's already started + // period is in centiseconds + if (is_active_ == false) { +if (tmr_id_ == nullptr) { + tmr_id_ = ncs_tmr_alloc(nullptr, 0); +} +tmr_id_ = ncs_tmr_start(tmr_id_, period, tmr_exp_func, this, +nullptr, 0); +is_active_ = true; + } +} + +void Timer::Stop() { [Vu] This method is not called from anywhere. Is there any case the timer should be stopped before the timer gets expired? + if (is_active_ == true) { +ncs_tmr_stop(tm
Re: [devel] [PATCH 4/9] mds: Add timeout for ack message [#1960]
Hi Minh, I have minor comments below. Regards, Vu On 8/14/19 1:38 PM, Minh Chau wrote: If the ack size is configured greater than 1, there should be a timeout at receiver ends to send the ack message back to senders. The ack message timeout utilizes the poll timeout in flow control thread to make mds lightweight (in contrast to additional timer threads). --- src/mds/mds_tipc_fctrl_intf.cc | 33 ++--- src/mds/mds_tipc_fctrl_msg.h | 6 ++ src/mds/mds_tipc_fctrl_portid.cc | 15 +++ src/mds/mds_tipc_fctrl_portid.h | 1 + 4 files changed, 52 insertions(+), 3 deletions(-) diff --git a/src/mds/mds_tipc_fctrl_intf.cc b/src/mds/mds_tipc_fctrl_intf.cc index 91b9107..bd0a8f6 100644 --- a/src/mds/mds_tipc_fctrl_intf.cc +++ b/src/mds/mds_tipc_fctrl_intf.cc @@ -66,7 +66,8 @@ std::map portid_map; std::mutex portid_map_mutex; // chunk ack parameters -// todo: The chunk ack size should be configurable +// todo: The chunk ack timeout and chunk ack size should be configurable +int kChunkAckTimeout = 1000; // in miliseconds uint16_t kChunkAckSize = 3; TipcPortId* portid_lookup(struct tipc_portid id) { @@ -75,6 +76,15 @@ TipcPortId* portid_lookup(struct tipc_portid id) { return portid_map[uid]; } +void process_timer_event(const Event evt) { + for (auto i : portid_map) { +TipcPortId* portid = i.second; +if (evt.type_ == Event::Type::kEvtTmrChunkAck) { + portid->ReceiveTmrChunkAck(); +} + } +} + uint32_t process_flow_event(const Event evt) { uint32_t rc = NCSCC_RC_SUCCESS; TipcPortId *portid = portid_lookup(evt.id_); @@ -110,7 +120,7 @@ uint32_t process_flow_event(const Event evt) { uint32_t process_all_events(void) { enum { FD_FCTRL = 0, NUM_FDS }; - int poll_tmo = MDTM_TIPC_POLL_TIMEOUT; + int poll_tmo = kChunkAckTimeout; while (true) { int pollres; struct pollfd pfd[NUM_FDS] = {{0}}; @@ -135,11 +145,24 @@ uint32_t process_all_events(void) { if (evt == nullptr) continue; portid_map_mutex.lock(); -process_flow_event(*evt); + +if (evt->IsTimerEvent()) { + process_timer_event(*evt); +} +if (evt->IsFlowEvent()) { + process_flow_event(*evt); +} + [Vu] Should log something here if the event is none of above? delete evt; portid_map_mutex.unlock(); } } +// timeout, scan all portid and send ack msgs +if (pollres == 0) { + portid_map_mutex.lock(); + process_timer_event(Event(Event::Type::kEvtTmrChunkAck)); + portid_map_mutex.unlock(); +} } /* while */ return NCSCC_RC_SUCCESS; } @@ -368,6 +391,10 @@ uint32_t mds_tipc_fctrl_rcv_data(uint8_t *buffer, uint16_t len, portid_map_mutex.lock(); uint32_t rc = process_flow_event(Event(Event::Type::kEvtRcvData, id, data.svc_id_, header.mseq_, header.mfrag_, header.fseq_)); + if (rc == NCSCC_RC_CONTINUE) { +process_timer_event(Event(Event::Type::kEvtTmrChunkAck)); [Vu] Missed to unlock the mutex here +rc = NCSCC_RC_SUCCESS; + } portid_map_mutex.unlock(); return rc; } diff --git a/src/mds/mds_tipc_fctrl_msg.h b/src/mds/mds_tipc_fctrl_msg.h index 677f256..8e6a874 100644 --- a/src/mds/mds_tipc_fctrl_msg.h +++ b/src/mds/mds_tipc_fctrl_msg.h @@ -44,6 +44,8 @@ class Event { // selective data msgs (not supported) kEvtDropData, // event reported from tipc that a message is not // delivered +kEvtTmrAll, +kEvtTmrChunkAck, // event to send the chunk ack }; NCS_IPC_MSG next_{0}; Type type_; @@ -68,6 +70,10 @@ class Event { fseq_(f_seg_num), chunk_size_(chunk_size) { type_ = type; } + bool IsTimerEvent() { return (type_ > Type::kEvtTmrAll); } + bool IsFlowEvent() { +return (Type::kEvtDataFlowAll < type_ && type_ < Type::kEvtTmrAll); + } [Vu] Consider making these ones to be constant methods if they do not change any of their attribute values. }; class BaseMessage { diff --git a/src/mds/mds_tipc_fctrl_portid.cc b/src/mds/mds_tipc_fctrl_portid.cc index 24d13ee..64115d5 100644 --- a/src/mds/mds_tipc_fctrl_portid.cc +++ b/src/mds/mds_tipc_fctrl_portid.cc @@ -67,6 +67,8 @@ TipcPortId::TipcPortId(struct tipc_portid id, int sock, uint16_t chksize, } TipcPortId::~TipcPortId() { + // Fake a TmrChunkAck event to ack all received messages + ReceiveTmrChunkAck(); // clear all msg in sndqueue_ sndqueue_.Clear(); } @@ -156,6 +158,7 @@ uint32_t TipcPortId::ReceiveData(uint32_t mseq, uint16_t mfrag, // send ack for @chunk_size_ msgs starting from fseq SendChunkAck(fseq, svc_id, chunk_size_); rcvwnd_.acked_ = rcvwnd_.rcv_; + rc = NCSCC_RC_CONTINUE; } } else { // todo: update rcvwnd_.nacked_space_. @@ -258,4 +261,16 @@ void TipcPortId::ReceiveNack(uint32_t mseq,
Re: [devel] [PATCH 3/9] mds: Add implementation for TIPC buffer overflow solution [#1960]
Hi Minh, I have several comments below, started with [Vu]. Regards, Vu On 8/14/19 1:01 PM, Minh Chau wrote: This is a collaborative patch of two participants: - Tran Thuan - Minh Chau Main changes: - Add mds_tipc_fctrl_intf.h, mds_tipc_fctrl_intf.cc: These two files introduce new functions which are called in mds_dt_tipc.c if the flow control is enabled - Add mds_tipc_fctrl_portid.h, mds_tipc_fctrl_portid.cc: These files implements the tipc portid instance, which supports the sliding window, mds msg queue - Add mds_tipc_fctrl_msg.h, mds_tipc_fctrl_msg.cc: These files define the event and messages which are used for this solution. --- src/mds/Makefile.am | 10 +- src/mds/mds_dt.h | 8 +- src/mds/mds_dt_tipc.c| 188 +--- src/mds/mds_tipc_fctrl_intf.cc | 376 +++ src/mds/mds_tipc_fctrl_intf.h| 47 + src/mds/mds_tipc_fctrl_msg.cc| 142 +++ src/mds/mds_tipc_fctrl_msg.h | 129 ++ src/mds/mds_tipc_fctrl_portid.cc | 261 +++ src/mds/mds_tipc_fctrl_portid.h | 87 + 9 files changed, 1184 insertions(+), 64 deletions(-) create mode 100644 src/mds/mds_tipc_fctrl_intf.cc create mode 100644 src/mds/mds_tipc_fctrl_intf.h create mode 100644 src/mds/mds_tipc_fctrl_msg.cc create mode 100644 src/mds/mds_tipc_fctrl_msg.h create mode 100644 src/mds/mds_tipc_fctrl_portid.cc create mode 100644 src/mds/mds_tipc_fctrl_portid.h diff --git a/src/mds/Makefile.am b/src/mds/Makefile.am index 2d7b652..d849e8f 100644 --- a/src/mds/Makefile.am +++ b/src/mds/Makefile.am @@ -48,10 +48,16 @@ lib_libopensaf_core_la_SOURCES += \ if ENABLE_TIPC_TRANSPORT noinst_HEADERS += src/mds/mds_dt_tipc.h \ src/mds/mds_tipc_recvq_stats.h \ - src/mds/mds_tipc_recvq_stats_impl.h + src/mds/mds_tipc_recvq_stats_impl.h \ + src/mds/mds_tipc_fctrl_intf.h \ + src/mds/mds_tipc_fctrl_portid.h \ + src/mds/mds_tipc_fctrl_msg.h lib_libopensaf_core_la_SOURCES += src/mds/mds_dt_tipc.c \ src/mds/mds_tipc_recvq_stats.cc \ - src/mds/mds_tipc_recvq_stats_impl.cc + src/mds/mds_tipc_recvq_stats_impl.cc \ + src/mds/mds_tipc_fctrl_intf.cc \ + src/mds/mds_tipc_fctrl_portid.cc \ + src/mds/mds_tipc_fctrl_msg.cc endif if ENABLE_TESTS diff --git a/src/mds/mds_dt.h b/src/mds/mds_dt.h index b645bb4..d9e8633 100644 --- a/src/mds/mds_dt.h +++ b/src/mds/mds_dt.h @@ -162,7 +162,7 @@ uint32_t mdtm_del_from_ref_tbl(MDS_SUBTN_REF_VAL ref); uint32_t mds_tmr_mailbox_processing(void); uint32_t mdtm_get_from_ref_tbl(MDS_SUBTN_REF_VAL ref, MDS_SVC_HDL *svc_hdl); uint32_t mdtm_add_frag_hdr(uint8_t *buf_ptr, uint16_t len, uint32_t seq_num, - uint16_t frag_byte); + uint16_t frag_byte, uint16_t fctrl_seq_num); uint32_t mdtm_free_reassem_msg_mem(MDS_ENCODED_MSG *msg); uint32_t mdtm_process_recv_data(uint8_t *buf, uint16_t len, uint64_t tipc_id, uint32_t *buff_dump); @@ -240,9 +240,13 @@ bool mdtm_mailbox_mbx_cleanup(NCSCONTEXT arg, NCSCONTEXT msg); #define MDS_PROT 0xA0 #define MDS_VERSION 0x08 -#define MDS_PROT_VER_MASK (MDS_PROT | MDS_VERSION) +#define MDS_PROT_VER_MASK 0xFC #define MDTM_PRI_MASK 0x3 +/* MDS protocol/version for flow control */ +#define MDS_PROT_FCTRL (0xB0 | MDS_VERSION) +#define MDS_PROT_FCTRL_ID 0x00AC13F5 + /* Added for the subscription changes */ #define MDS_NCS_CHASSIS_ID (m_NCS_GET_NODE_ID & 0x00ff) #define MDS_TIPC_COMMON_ID 0x01001000 diff --git a/src/mds/mds_dt_tipc.c b/src/mds/mds_dt_tipc.c index 86b52bb..fef1c50 100644 --- a/src/mds/mds_dt_tipc.c +++ b/src/mds/mds_dt_tipc.c @@ -47,6 +47,7 @@ #include "mds_dt_tipc.h" #include "mds_dt_tcp_disc.h" #include "mds_core.h" +#include "mds_tipc_fctrl_intf.h" #include "mds_tipc_recvq_stats.h" #include "base/osaf_utility.h" #include "base/osaf_poll.h" @@ -165,20 +166,22 @@ NCS_PATRICIA_TREE mdtm_reassembly_list; uint32_t mdtm_global_frag_num; const unsigned int MAX_RECV_THRESHOLD = 30; +uint8_t gl_mds_pro_ver = MDS_PROT | MDS_VERSION; -static bool get_tipc_port_id(int sock, uint32_t* port_id) { +static bool get_tipc_port_id(int sock, struct tipc_portid* port_id) { struct sockaddr_tipc addr; socklen_t sz = sizeof(addr); memset(&addr, 0, sizeof(addr)); - *port_id = 0; + port_id->node = 0; + port_id->ref = 0; if (0 > getsockname(sock, (struct sockaddr *)&addr, &sz)) { syslog(LOG_ERR, "MDTM:TIPC Failed to get socket name, err: %s", strerror(errno)); return false; } - *port_id = addr.addr.id.ref; + *port_id = addr.addr.id; return true; } @@ -240,12 +243,13 @@ uint32_t mdtm_tipc_init(NODE_ID nodeid, uint32_t *mds_tipc_ref) } /* Code for getting the self tipc ra
Re: [devel] [PATCH 7/9] mds: Add configurable parameters [#1960]
Hi Minh, I have minor comments below. Regards, Vu On 8/14/19 1:38 PM, Minh Chau wrote: This patch makes the solution of TIPC buffer overflow configurable, as well as the ack timeout/ack size. For example: The service config file can export the following environment variables export MDS_TIPC_FCTRL_ENABLED=1 export MDS_TIPC_FCTRL_ACKTIMEOUT=1000 export MDS_TIPC_FCTRL_ACKSIZE=1 If MDS_TIPC_FCTRL_ACKTIMEOUT, MDS_TIPC_FCTRL_ACKSIZE are not specified, the default values are used. --- src/mds/mds_dt_tipc.c | 19 --- src/mds/mds_tipc_fctrl_intf.cc | 25 +++-- src/mds/mds_tipc_fctrl_intf.h | 3 ++- 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/src/mds/mds_dt_tipc.c b/src/mds/mds_dt_tipc.c index fef1c50..1b6c3f8 100644 --- a/src/mds/mds_dt_tipc.c +++ b/src/mds/mds_dt_tipc.c @@ -342,9 +342,22 @@ uint32_t mdtm_tipc_init(NODE_ID nodeid, uint32_t *mds_tipc_ref) } /* Create flow control tasks if enabled*/ - gl_mds_pro_ver = MDS_PROT_FCTRL; - mds_tipc_fctrl_initialize(tipc_cb.BSRsock, port_id, - (uint64_t)optval, tipc_mcast_enabled); + char* ptr; + if ((ptr = getenv("MDS_TIPC_FCTRL_ENABLED")) != NULL) { + if (atoi(ptr) == 1) { + gl_mds_pro_ver = MDS_PROT_FCTRL; + int ackto = -1; + int acksize = -1; + if ((ptr = getenv("MDS_TIPC_FCTRL_ACKTIMEOUT")) != NULL) { + ackto = atoi(ptr); + } + if ((ptr = getenv("MDS_TIPC_FCTRL_ACKSIZE")) != NULL) { + acksize = atoi(ptr); + } [Vu] Do we have valid range of these environment variables? What if they mistakenly set them to empty values? e.g: export MDS_TIPC_FCTRL_ACKTIMEOUT="" + mds_tipc_fctrl_initialize(tipc_cb.BSRsock, port_id, (uint64_t)optval, + ackto, acksize, tipc_mcast_enabled); + } + } /* Create a task to receive the events and data */ if (mdtm_create_rcv_task(tipc_cb.hdle_mdtm) != NCSCC_RC_SUCCESS) { diff --git a/src/mds/mds_tipc_fctrl_intf.cc b/src/mds/mds_tipc_fctrl_intf.cc index 397114e..8949937 100644 --- a/src/mds/mds_tipc_fctrl_intf.cc +++ b/src/mds/mds_tipc_fctrl_intf.cc @@ -40,6 +40,9 @@ using mds::ChunkAck; using mds::HeaderMessage; namespace { +// flow control enabled/disabled +bool is_fctrl_enabled = false; + // multicast/broadcast enabled // todo: to be removed if flow control support it bool is_mcast_enabled = true; @@ -225,7 +228,8 @@ uint32_t create_ncs_task(void *task_hdl) { } // end local namespace uint32_t mds_tipc_fctrl_initialize(int dgramsock, struct tipc_portid id, -uint64_t rcv_buf_size, bool mcast_enabled) { +uint64_t rcv_buf_size, int32_t ackto, int32_t acksize, +bool mcast_enabled) { if (create_ncs_task(&p_task_hdl) != NCSCC_RC_SUCCESS) { m_MDS_LOG_ERR("FCTRL: Start of the Created Task-failed:\n"); @@ -234,8 +238,10 @@ uint32_t mds_tipc_fctrl_initialize(int dgramsock, struct tipc_portid id, data_sock_fd = dgramsock; snd_rcv_portid = id; sock_buf_size = rcv_buf_size; + is_fctrl_enabled = true; is_mcast_enabled = mcast_enabled; - + if (ackto != -1) kChunkAckTimeout = ackto; + if (acksize != -1) kChunkAckSize = acksize; m_MDS_LOG_NOTIFY("FCTRL: Initialize [node:%x, ref:%u]", id.node, id.ref); @@ -243,6 +249,7 @@ uint32_t mds_tipc_fctrl_initialize(int dgramsock, struct tipc_portid id, } uint32_t mds_tipc_fctrl_shutdown(void) { + if (is_fctrl_enabled == false) return NCSCC_RC_SUCCESS; if (ncs_task_release(p_task_hdl) != NCSCC_RC_SUCCESS) { m_MDS_LOG_ERR("FCTRL: Stop of the Created Task-failed:\n"); } @@ -251,6 +258,8 @@ uint32_t mds_tipc_fctrl_shutdown(void) { uint32_t mds_tipc_fctrl_sndqueue_capable(struct tipc_portid id, uint16_t len, uint16_t* next_seq) { + if (is_fctrl_enabled == false) return NCSCC_RC_SUCCESS; + uint32_t rc = NCSCC_RC_SUCCESS; portid_map_mutex.lock(); [Vu] We has a common class base::Lock that can help to unlock automatically when it goes out the scope. Should we make portid_map_mutex to be an Lock object? @@ -274,6 +283,8 @@ uint32_t mds_tipc_fctrl_sndqueue_capable(struct tipc_portid id, uint16_t len, uint32_t mds_tipc_fctrl_trysend(const uint8_t *buffer, uint16_t len, struct tipc_portid id) { + if (is_fctrl_enabled == false) return NCSCC_RC_SUCCESS; + uint32_t rc = NCSCC_RC_SUCCESS; portid_map_mutex.lock(); @@ -304,6 +315,8 @@ uint32_t mds_tipc_fctrl_trysend(const uint8_t *buffer, uint16_t len, } uint32_t mds_tipc_fctrl_portid_up(struct tipc_portid id, uint32_t type) { + if (is_fctrl_enabled == false) return NCSCC_RC_SUCCESS; + MDS_SVC_ID svc_id = (uint16_t)(type & MDS_EVENT_MASK_FOR_SVCID); portid
Re: [devel] [PATCH 1/1] nid: Flush internal log messages before stopping OpenSAF [#3079]
Ack with minor comments. Thanks, Vu On 9/9/19 11:34 AM, thien.m.huynh wrote: --- src/nid/opensafd.in | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nid/opensafd.in b/src/nid/opensafd.in index f85cf5b..3f5b6ff 100644 --- a/src/nid/opensafd.in +++ b/src/nid/opensafd.in @@ -291,6 +291,7 @@ stop() { return 1 fi + $bindir/osaflog --flush [Vu] Move the 'flush' to the beginning of the stop(), otherwise it could be missed in case the function returns earlier. logger -t $osafprog "Stopping OpenSAF Services" amfpid=`pidofproc -p $amfnd_pid $amfnd_bin` echo -n "Stopping OpenSAF Services: " ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] imm: Check error code before access to buffer V2 [#3075]
Ack. Thanks, Vu On 9/4/19 9:42 AM, thien.m.huynh wrote: --- src/imm/tools/imm_dumper.cc | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/imm/tools/imm_dumper.cc b/src/imm/tools/imm_dumper.cc index 68071f1..908bb94 100644 --- a/src/imm/tools/imm_dumper.cc +++ b/src/imm/tools/imm_dumper.cc @@ -403,8 +403,14 @@ static std::map cacheRDNs( std::list::iterator it = classNamesList.begin(); while (it != classNamesList.end()) { -saImmOmClassDescriptionGet_2(immHandle, (char*)it->c_str(), &classCategory, - &attrs); +SaAisErrorT errorCode = saImmOmClassDescriptionGet_2( +immHandle, (char*)it->c_str(), &classCategory, &attrs); + +if (errorCode != SA_AIS_OK) { + std::cerr << "Failed to get the description of " << *it +<< " class: " << errorCode << " - exiting!" << std::endl; + exit(EXIT_FAILURE); +} for (SaImmAttrDefinitionT_2** p = attrs; *p != NULL; p++) { if ((*p)->attrFlags & SA_IMM_ATTR_RDN) { ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] imm: Check error code before access to buffer [#3075]
Hi Thien, See my comments inline. Regards, Vu On 9/3/19 2:23 PM, thien.m.huynh wrote: --- src/imm/tools/imm_dumper.cc | 26 +++--- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/imm/tools/imm_dumper.cc b/src/imm/tools/imm_dumper.cc index 68071f1..a8b45a5 100644 --- a/src/imm/tools/imm_dumper.cc +++ b/src/imm/tools/imm_dumper.cc @@ -392,6 +392,7 @@ static std::map cacheRDNs( std::list classNamesList; SaImmClassCategoryT classCategory; SaImmAttrDefinitionT_2** attrs; + SaAisErrorT errorCode; [Vu] Move this into the below 'while' loop. std::map classRDNMap; if (selectedClassList.empty()) { @@ -403,19 +404,22 @@ static std::map cacheRDNs( std::list::iterator it = classNamesList.begin(); while (it != classNamesList.end()) { -saImmOmClassDescriptionGet_2(immHandle, (char*)it->c_str(), &classCategory, - &attrs); - -for (SaImmAttrDefinitionT_2** p = attrs; *p != NULL; p++) { - if ((*p)->attrFlags & SA_IMM_ATTR_RDN) { -classRDNMap[*it] = std::string((*p)->attrName); -break; +errorCode = saImmOmClassDescriptionGet_2(immHandle, (char*)it->c_str(), + &classCategory, &attrs); +if (errorCode != SA_AIS_OK) { + std::cerr << "Failed to get the description for " << it->c_str() +<< " class - exiting, " << errorCode << std::endl; + exit(1); [Vu] - Narrow down the scope of above `errorCode`. - Use '*it' instead of 'it->c_str()' - Use `exit(EXIT_FAILURE)` instead of `exit(1) - Remove 'else' statement. e.g: SaAisCode error_code = saImmOmClassDescriptionGet_(...); if (error_code != SA_AIS_OK) { std::cerr << "Failed to get the description of " << *it << " class: " << errorCode << " - exiting!" << std.endl; exit(EXIT_FAILURE); } +} else { + for (SaImmAttrDefinitionT_2** p = attrs; *p != NULL; p++) { +if ((*p)->attrFlags & SA_IMM_ATTR_RDN) { + classRDNMap[*it] = std::string((*p)->attrName); + break; +} } + /* Avoid memory leaking */ + saImmOmClassDescriptionMemoryFree_2(immHandle, attrs); } - -/* Avoid memory leaking */ -saImmOmClassDescriptionMemoryFree_2(immHandle, attrs); - ++it; } ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] smf: Fix c-linkage and string op errors V3 [#3065]
Hi Thanh, Ack (code review only) Regards, Vu On 8/26/19 12:50 PM, Thanh Nguyen wrote: - Fix faults in C linkage in file src/smf/smfd/SmfUtils.h. - Fix fault in string concatenation in file src/smf/smfd/SmfUpgradeCampaign.cc --- src/smf/smfd/SmfUpgradeCampaign.cc | 3 ++- src/smf/smfd/SmfUtils.h| 7 --- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/smf/smfd/SmfUpgradeCampaign.cc b/src/smf/smfd/SmfUpgradeCampaign.cc index 3c50bf7..4a1591a 100644 --- a/src/smf/smfd/SmfUpgradeCampaign.cc +++ b/src/smf/smfd/SmfUpgradeCampaign.cc @@ -930,7 +930,8 @@ void SmfUpgradeCampaign::continueExec() { if (o_result == true) { LOG_NO("The campaign have been restarted to many times"); int cnt = smfd_cb->smfCampMaxRestart; - std::string error = "To many campaign restarts, max " + cnt; + std::string error = "To many campaign restarts, max " + + std::to_string(cnt); SmfCampaignThread::instance()->campaign()->setError(error); changeState(SmfCampStateExecFailed::instance()); TRACE_LEAVE(); diff --git a/src/smf/smfd/SmfUtils.h b/src/smf/smfd/SmfUtils.h index 894e3c9..83ce6ec 100644 --- a/src/smf/smfd/SmfUtils.h +++ b/src/smf/smfd/SmfUtils.h @@ -51,10 +51,6 @@ class SmfRollbackCcb; * DATA DECLARATIONS * */ -#ifdef __cplusplus -extern "C" { -#endif - extern bool smf_stringToImmType(char* i_type, SaImmValueTypeT& o_type); extern const char* smf_immTypeToString(SaImmValueTypeT i_type); extern SaImmAttrModificationTypeT smf_stringToImmAttrModType(char* i_type); @@ -74,9 +70,6 @@ extern const std::string smfStateToString(const uint32_t& i_stateId, extern bool compare_du_part(unitNameAndState& first, unitNameAndState& second); extern bool unique_du_part(unitNameAndState& first, unitNameAndState& second); -#ifdef __cplusplus -} -#endif extern bool waitForNodeDestination(const std::string& i_node, SmfndNodeDest* o_nodeDest); extern bool getNodeDestination(const std::string& i_node, ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] smf: Fix c-linkage errors [#3065]
+ Add Lennart to reviewers list Those methods that are wrapped inside 'extern "C"' (/smf/smfd/SmfUtils.h) seem to be called from C++ source files only. So, perhaps the simple fix for that is moving them out of 'extern "C"'. Regards, Vu On 8/20/19 8:12 AM, Thanh Nguyen wrote: Faults in C linkage for PBE area is fixed. --- src/smf/smfd/SmfUpgradeCampaign.cc | 3 ++- src/smf/smfd/SmfUtils.cc | 14 ++ src/smf/smfd/SmfUtils.h| 10 -- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/smf/smfd/SmfUpgradeCampaign.cc b/src/smf/smfd/SmfUpgradeCampaign.cc index 3c50bf7..4a1591a 100644 --- a/src/smf/smfd/SmfUpgradeCampaign.cc +++ b/src/smf/smfd/SmfUpgradeCampaign.cc @@ -930,7 +930,8 @@ void SmfUpgradeCampaign::continueExec() { if (o_result == true) { LOG_NO("The campaign have been restarted to many times"); int cnt = smfd_cb->smfCampMaxRestart; - std::string error = "To many campaign restarts, max " + cnt; + std::string error = "To many campaign restarts, max " + + std::to_string(cnt); SmfCampaignThread::instance()->campaign()->setError(error); changeState(SmfCampStateExecFailed::instance()); TRACE_LEAVE(); diff --git a/src/smf/smfd/SmfUtils.cc b/src/smf/smfd/SmfUtils.cc index 882a3e6..0abb4b1 100644 --- a/src/smf/smfd/SmfUtils.cc +++ b/src/smf/smfd/SmfUtils.cc @@ -1091,6 +1091,13 @@ std::string smf_valueToString(SaImmAttrValueT value, SaImmValueTypeT type) { return ost.str(); } +char* smf_valueToString(char* buffer, SaImmAttrValueT value, +SaImmValueTypeT type) { + std::string tmp = smf_valueToString(value, type); + memcpy(buffer, tmp.c_str(), tmp.length()); + return buffer; +} + // -- // smf_opStringToInt() // -- @@ -1342,6 +1349,13 @@ const std::string smfStateToString(const uint32_t &i_stateId, } } +char* smfStateToString(char* buffer, const uint32_t &i_stateId, + const uint32_t &i_state) { + std::string tmp = smfStateToString(i_stateId, i_state); + memcpy(buffer, tmp.c_str(), tmp.length()); + return buffer; +} + bool compare_du_part(unitNameAndState &first, unitNameAndState &second) { unsigned int i = 0; while ((i < first.name.length()) && (i < second.name.length())) { diff --git a/src/smf/smfd/SmfUtils.h b/src/smf/smfd/SmfUtils.h index 894e3c9..394c000 100644 --- a/src/smf/smfd/SmfUtils.h +++ b/src/smf/smfd/SmfUtils.h @@ -42,6 +42,12 @@ class SmfImmOperation; class SmfRollbackCcb; +extern std::string smf_valueToString(SaImmAttrValueT value, + SaImmValueTypeT type); +extern const std::string smfStateToString(const uint32_t& i_stateId, + const uint32_t& i_state); + + /* * TYPE DEFINITIONS * @@ -62,14 +68,14 @@ extern bool smf_stringsToValues(SaImmAttrValuesT_2* i_attribute, std::list& i_values); extern bool smf_stringToValue(SaImmValueTypeT i_type, SaImmAttrValueT* i_value, const char* i_str); -extern std::string smf_valueToString(SaImmAttrValueT value, +extern char* smf_valueToString(char* buffer, SaImmAttrValueT value, SaImmValueTypeT type); extern int smf_opStringToInt(const char* i_str); extern int smf_system(std::string i_cmd); extern void updateSaflog(const std::string& i_dn, const uint32_t& i_stateId, const uint32_t& i_newState, const uint32_t& i_oldState); -extern const std::string smfStateToString(const uint32_t& i_stateId, +extern char* smfStateToString(char* buffer, const uint32_t& i_stateId, const uint32_t& i_state); extern bool compare_du_part(unitNameAndState& first, unitNameAndState& second); extern bool unique_du_part(unitNameAndState& first, unitNameAndState& second); ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] imm: Fix Object ID and Class ID type inconsistency [#2770]
Hi Thanh, Ack. Thanks, Vu On 09/08/2019 07:52, Thanh Nguyen wrote: In IMM, for PBE, IMM uses the mix type of unsigned int and signed int for Class ID and Object ID. These data are stored in sqlite3 data base as 32-bit integers. Cast is used when reading data back from sqlite3 data base. Applying Google C++ Style Guide, Class ID and Object ID are now signed int. One minor code change in the affected area is implemented to fix Static Code Checker error. --- src/imm/common/immpbe_dump.cc| 44 src/imm/common/immpbe_dump.h | 10 - src/imm/immpbed/immpbe.h | 2 +- src/imm/immpbed/immpbe_daemon.cc | 18 4 files changed, 36 insertions(+), 38 deletions(-) diff --git a/src/imm/common/immpbe_dump.cc b/src/imm/common/immpbe_dump.cc index e6b3cc5..ee8950d 100644 --- a/src/imm/common/immpbe_dump.cc +++ b/src/imm/common/immpbe_dump.cc @@ -150,8 +150,8 @@ static const char *preparedSql[] = { static sqlite3_stmt *preparedStmt[SQL_STMT_SIZE] = {NULL}; struct ObjectInfo { - unsigned obj_id; - unsigned class_id; + int obj_id; + int class_id; char *dn; }; @@ -569,8 +569,8 @@ void pbeAtomicSwitchFile(const char *filePath, std::string localTmpFilename) { // Reverse object DN and use the reverse DN as key in sReverseDnMap // which is used mainly to collect child objects when we perform cascade delete. static void reverseAndInsertDn(const std::string &dn, - unsigned obj_id, - unsigned class_id) { + int obj_id, + int class_id) { ObjectInfo *info = new ObjectInfo(); osafassert(info); @@ -596,8 +596,8 @@ static bool prepareLocalData(sqlite3 *dbHandle) { sqlite3_stmt *stmt = nullptr; int rc; bool ret = false; - unsigned obj_id; - unsigned class_id; + int obj_id; + int class_id; char *class_name; std::string dn; int count = 0; @@ -611,7 +611,7 @@ static bool prepareLocalData(sqlite3 *dbHandle) { } while((rc = sqlite3_step(stmt)) == SQLITE_ROW) { -class_id = static_cast(sqlite3_column_int(stmt, 0)); +class_id = sqlite3_column_int(stmt, 0); class_name = const_cast(reinterpret_cast (sqlite3_column_text(stmt, 1))); @@ -707,7 +707,7 @@ static bool deleteObjectList(sqlite3 *dbHandle, sqlite3_stmt *stmt = nullptr; int rc = 0; bool ret = false; - unsigned object_id; + int object_id; TRACE_ENTER(); @@ -1225,7 +1225,7 @@ void pbeCleanTmpFiles(std::string localTmpFilename) { } ClassInfo *classToPBE(std::string classNameString, SaImmHandleT immHandle, - void *db_handle, unsigned int class_id) { + void *db_handle, int class_id) { SaImmClassCategoryT classCategory; SaImmAttrDefinitionT_2 **attrDefinitions; SaAisErrorT errorCode; @@ -1563,7 +1563,7 @@ static ClassInfo *verifyClassPBE(std::string classNameString, void *db_handle, bool *badfile) { sqlite3 *dbHandle = (sqlite3 *)db_handle; int rc = 0; - unsigned int class_id = 0; + int class_id = 0; ClassInfo *classInfo = NULL; sqlite3_stmt *stmt; @@ -2479,9 +2479,9 @@ bailout: } int dumpInstancesOfClassToPBE(SaImmHandleT immHandle, ClassMap *classIdMap, - std::string className, unsigned int *objIdCount, + std::string className, int *objIdCount, void *db_handle) { - unsigned int obj_count = 0; + int obj_count = 0; SaImmSearchHandleT searchHandle; SaAisErrorT errorCode; SaNameT objectName; @@ -2591,11 +2591,11 @@ bailout: } bool objectToPBE(std::string objectNameString, const SaImmAttrValuesT_2 **attrs, - ClassMap *classIdMap, void *db_handle, unsigned int object_id, + ClassMap *classIdMap, void *db_handle, int object_id, SaImmClassNameT className, SaUint64T ccb_id) { std::string valueString; std::string classNameString; - unsigned int class_id = 0; + int class_id = 0; ClassInfo *classInfo = NULL; int rc = 0; sqlite3 *dbHandle = (sqlite3 *)db_handle; @@ -2752,7 +2752,7 @@ bool dumpClassesToPbe(SaImmHandleT immHandle, ClassMap *classIdMap, std::list classNameList; std::list::iterator it; int rc = 0; - unsigned int class_id = 0; + int class_id = 0; char *execErr = NULL; sqlite3 *dbHandle = (sqlite3 *)db_handle; TRACE_ENTER(); @@ -2847,13 +2847,13 @@ int verifyPbeState(SaImmHandleT immHandle, ClassMap *classIdMap, } if (nrows != 1) { -LOG_ER("Expected 1 row got %u rows (line: %u)", nrows, __LINE__); +LOG_ER("Expected 1 row got %d rows (line: %u)", nrows, __LINE__); badfile = true; goto bailout; } - obj_count = strtoul(result[ncols], NULL, 0); - TRACE("verifPbeS
Re: [devel] [PATCH 1/1] scripts: use tipc instead of obsolute tipc-config in opensaf_reboot [#3066]
Hi, I will push this patch on this Thursday if there is no more comment. Regards, Vu On 09/08/2019 13:06, Nguyen Minh Vu wrote: Hi Thang, It is my intention to use an unique interface that is the wrapper script `tipc-config` for all calls to tipc/tipc-config command. With that, if having any changes from tipc, we only need to change our own script without impacting other places. Regards, Vu On 09/08/2019 10:17, Thang Nguyen wrote: Hi Vu, I have a comment in line. B.R /Thang -Original Message- From: Vu Minh Nguyen Sent: Friday, August 9, 2019 9:47 AM To: anders.wid...@ericsson.com; gary@dektech.com.au; thang.d.ngu...@dektech.com.au Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen Subject: [PATCH 1/1] scripts: use tipc instead of obsolute tipc-config in opensaf_reboot [#3066] --- scripts/opensaf_reboot | 11 ++- src/nid/configure_tipc.in | 1 - tools/cluster_sim_uml/build_uml | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/scripts/opensaf_reboot b/scripts/opensaf_reboot index 9623c39a8..be635a442 100644 --- a/scripts/opensaf_reboot +++ b/scripts/opensaf_reboot @@ -128,6 +128,15 @@ temp_node_id=`cat "$NODE_ID_FILE"` temp_node_id=`echo "$temp_node_id" |sed -e 's:^0[bBxX]::'| sed -e 's:^:0x:'` self_node_id=`printf "%d" $temp_node_id` +tipc=$(which tipc 2> /dev/null) +tipc_config=$(which tipc-config 2> /dev/null) + +if [ -x "${tipc}" ]; then + tipc_config="${pkglibdir}"/tipc-config +fi + +unset tipc + [THANG]: I think no need. # If no argument is provided, forcing node reboot immediately without log # flushing, process terminating, disk un-mounting. # If clm cluster reboot requested argument one and two are set but not used, @@ -149,7 +158,7 @@ else # Isolate the node if [ "$MDS_TRANSPORT" = "TIPC" ]; then - tipc-config -bd eth:$TIPC_ETH_IF + ${tipc_config} -bd=eth:$TIPC_ETH_IF [THANG]: Can add like this ? --- + tipc=$(which tipc 2> /dev/null) + if [ -x "${tipc}" ]; then + tipc bearer disable media eth device $TIPC_ETH_IF + else + tipc-config -bd eth:$TIPC_ETH_IF + fi --- else $icmd pkill -STOP osafdtmd fi diff --git a/src/nid/configure_tipc.in b/src/nid/configure_tipc.in index b13a685f3..73dd1cbe0 100644 --- a/src/nid/configure_tipc.in +++ b/src/nid/configure_tipc.in @@ -69,7 +69,6 @@ if [ "$#" -lt "1" ] || [ "$#" -gt "4" ] exit 1 fi -# Make sure tipc-config is available, either in path or in default location tipc=$(which tipc 2> /dev/null) tipc_config=$(which tipc-config 2> /dev/null) diff --git a/tools/cluster_sim_uml/build_uml b/tools/cluster_sim_uml/build_uml index 8e48bb5a5..edbe01c5d 100755 --- a/tools/cluster_sim_uml/build_uml +++ b/tools/cluster_sim_uml/build_uml @@ -225,11 +225,11 @@ cmd_create_rootfs() install -m 755 $archive/scripts/*.rc etc/init.d cp $scripts/profile etc - cp $scripts/reboot $scripts/shutdown $opensaf_home/scripts/tipc-config usr/sbin + cp $scripts/reboot $scripts/shutdown usr/sbin mkdir -p root/www/cgi-bin cp $scripts/rshd root/www/cgi-bin cp $scripts/rsh $scripts/rcp $scripts/sudo usr/bin - chmod +x usr/sbin/shutdown usr/sbin/tipc-config root/www/cgi-bin/rshd usr/bin/rsh usr/bin/rcp usr/bin/sudo + chmod +x usr/sbin/shutdown root/www/cgi-bin/rshd usr/bin/rsh + usr/bin/rcp usr/bin/sudo echo "Copy some needed extra programs (bash, ...)" install /bin/bash usr/bin -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] scripts: use tipc instead of obsolute tipc-config in opensaf_reboot [#3066]
Hi Thang, It is my intention to use an unique interface that is the wrapper script `tipc-config` for all calls to tipc/tipc-config command. With that, if having any changes from tipc, we only need to change our own script without impacting other places. Regards, Vu On 09/08/2019 10:17, Thang Nguyen wrote: Hi Vu, I have a comment in line. B.R /Thang -Original Message- From: Vu Minh Nguyen Sent: Friday, August 9, 2019 9:47 AM To: anders.wid...@ericsson.com; gary@dektech.com.au; thang.d.ngu...@dektech.com.au Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen Subject: [PATCH 1/1] scripts: use tipc instead of obsolute tipc-config in opensaf_reboot [#3066] --- scripts/opensaf_reboot | 11 ++- src/nid/configure_tipc.in | 1 - tools/cluster_sim_uml/build_uml | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/scripts/opensaf_reboot b/scripts/opensaf_reboot index 9623c39a8..be635a442 100644 --- a/scripts/opensaf_reboot +++ b/scripts/opensaf_reboot @@ -128,6 +128,15 @@ temp_node_id=`cat "$NODE_ID_FILE"` temp_node_id=`echo "$temp_node_id" |sed -e 's:^0[bBxX]::'| sed -e 's:^:0x:'` self_node_id=`printf "%d" $temp_node_id` +tipc=$(which tipc 2> /dev/null) +tipc_config=$(which tipc-config 2> /dev/null) + +if [ -x "${tipc}" ]; then + tipc_config="${pkglibdir}"/tipc-config +fi + +unset tipc + [THANG]: I think no need. # If no argument is provided, forcing node reboot immediately without log # flushing, process terminating, disk un-mounting. # If clm cluster reboot requested argument one and two are set but not used, @@ -149,7 +158,7 @@ else # Isolate the node if [ "$MDS_TRANSPORT" = "TIPC" ]; then - tipc-config -bd eth:$TIPC_ETH_IF + ${tipc_config} -bd=eth:$TIPC_ETH_IF [THANG]: Can add like this ? --- + tipc=$(which tipc 2> /dev/null) + if [ -x "${tipc}" ]; then + tipc bearer disable media eth device $TIPC_ETH_IF + else + tipc-config -bd eth:$TIPC_ETH_IF + fi --- else $icmd pkill -STOP osafdtmd fi diff --git a/src/nid/configure_tipc.in b/src/nid/configure_tipc.in index b13a685f3..73dd1cbe0 100644 --- a/src/nid/configure_tipc.in +++ b/src/nid/configure_tipc.in @@ -69,7 +69,6 @@ if [ "$#" -lt "1" ] || [ "$#" -gt "4" ] exit 1 fi -# Make sure tipc-config is available, either in path or in default location tipc=$(which tipc 2> /dev/null) tipc_config=$(which tipc-config 2> /dev/null) diff --git a/tools/cluster_sim_uml/build_uml b/tools/cluster_sim_uml/build_uml index 8e48bb5a5..edbe01c5d 100755 --- a/tools/cluster_sim_uml/build_uml +++ b/tools/cluster_sim_uml/build_uml @@ -225,11 +225,11 @@ cmd_create_rootfs() install -m 755 $archive/scripts/*.rc etc/init.d cp $scripts/profile etc -cp $scripts/reboot $scripts/shutdown $opensaf_home/scripts/tipc-config usr/sbin +cp $scripts/reboot $scripts/shutdown usr/sbin mkdir -p root/www/cgi-bin cp $scripts/rshd root/www/cgi-bin cp $scripts/rsh $scripts/rcp $scripts/sudo usr/bin -chmod +x usr/sbin/shutdown usr/sbin/tipc-config root/www/cgi-bin/rshd usr/bin/rsh usr/bin/rcp usr/bin/sudo +chmod +x usr/sbin/shutdown root/www/cgi-bin/rshd usr/bin/rsh + usr/bin/rcp usr/bin/sudo echo "Copy some needed extra programs (bash, ...)" install /bin/bash usr/bin -- 2.17.1 ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] nid: use the tipc command instead of tipc-config [#2104]
Hi, Any comments on this? I will push the patch this Wednesday if getting no comments. Regards, Vu On 01/08/2019 09:53, Vu Minh Nguyen wrote: The tipc-config command is obsolete and no longer being maintained. We should switch to using the "tipc" command instead --- Makefile.am | 3 ++- opensaf.spec.in | 1 + .../archive/scripts => scripts}/tipc-config | 15 -- src/nid/configure_tipc.in | 16 ++- src/nid/opensafd.in | 20 +++ tools/cluster_sim_uml/build_uml | 2 +- 6 files changed, 35 insertions(+), 22 deletions(-) rename {tools/cluster_sim_uml/archive/scripts => scripts}/tipc-config (83%) diff --git a/Makefile.am b/Makefile.am index b3d6553c1..6d86ec180 100644 --- a/Makefile.am +++ b/Makefile.am @@ -159,7 +159,8 @@ dist_osaf_execbin_SCRIPTS += \ $(top_srcdir)/scripts/opensaf_reboot \ $(top_srcdir)/scripts/opensaf_sc_active \ $(top_srcdir)/scripts/opensaf_scale_out \ - $(top_srcdir)/scripts/plm_scale_out + $(top_srcdir)/scripts/plm_scale_out \ + $(top_srcdir)/scripts/tipc-config include $(top_srcdir)/src/ais/Makefile.am include $(top_srcdir)/src/base/Makefile.am diff --git a/opensaf.spec.in b/opensaf.spec.in index 0effd59cd..37be5de6d 100644 --- a/opensaf.spec.in +++ b/opensaf.spec.in @@ -950,6 +950,7 @@ fi %{_pkglibdir}/plm_scale_out %{_pkglibdir}/opensaf_sc_active %{_pkglibdir}/configure_tipc +%{_pkglibdir}/tipc-config %files amf-libs diff --git a/tools/cluster_sim_uml/archive/scripts/tipc-config b/scripts/tipc-config similarity index 83% rename from tools/cluster_sim_uml/archive/scripts/tipc-config rename to scripts/tipc-config index f9fd47937..34eb9a539 100755 --- a/tools/cluster_sim_uml/archive/scripts/tipc-config +++ b/scripts/tipc-config @@ -1,4 +1,4 @@ -#!/bin/ash +#!/bin/bash # # -*- OpenSAF -*- # @@ -39,7 +39,18 @@ fi while [ $# -gt 0 ]; do case "$1" in -addr) - echo "node address: $(/sbin/tipc node get address)" + addr=$(/sbin/tipc node get address) + hex_pattern="^[0-9a-fA-F]+$" + if [[ $addr =~ $hex_pattern ]]; then + dec_addr=$((16#$addr)) + # the algorithm is based on /usr/include/linux/tipc.h + # to form tipc node address into 'Z.C.N' format. + tipc_zone=$((dec_addr >> 24)) + tipc_cluster=$(((dec_addr >> 12) & 0xfff)) + tipc_node=$((dec_addr & 0xfff)) + addr="<$tipc_zone.$tipc_cluster.$tipc_node>" + fi + echo "node address: $addr" ;; -a=*) /sbin/tipc node set address "$(echo "$1" | cut -d= -f2)" diff --git a/src/nid/configure_tipc.in b/src/nid/configure_tipc.in index 33621a0ef..5d0bf6efb 100644 --- a/src/nid/configure_tipc.in +++ b/src/nid/configure_tipc.in @@ -78,12 +78,13 @@ if ! [ -x "${tipc}" ] && ! [ -x "${tipc_config}" ]; then exit 1 fi +# Prefer using `tipc` over the obsoleted `tipc-config` +if [ -x "${tipc}" ]; then +tipc_config="${pkglibdir}"/tipc-config +fi + if [ "$MANAGE_TIPC" != "yes" ] && ! [ -s "$pkglocalstatedir/node_id" ]; then -if [ -x "${tipc}" ]; then - addr=$(tipc node get address | cut -d'<' -f2 | cut -d'>' -f1) -else - addr=$(tipc-config -addr | cut -d'<' -f2 | cut -d'>' -f1) -fi + addr=$(${tipc-config} -addr | cut -d'<' -f2 | cut -d'>' -f1) addr=$(echo "$addr" | cut -d. -f3) CHASSIS_ID=2 SLOT_ID=$((addr & 255)) @@ -98,11 +99,6 @@ fi ETH_NAME=$2 TIPC_NETID=$3 -if ! [ -x "${tipc_config}" ]; then -echo "error: tipc-config is not available" -exit 1 -fi - # Get the Chassis Id and Slot Id from @sysconfdir@/@PACKAGE_NAME@/chassis_id and @sysconfdir@/@PACKAGE_NAME@/slot_id if ! test -f "$CHASSIS_ID_FILE"; then echo "$CHASSIS_ID_FILE doesnt exists, exiting " diff --git a/src/nid/opensafd.in b/src/nid/opensafd.in index 94888039a..f85cf5b0c 100644 --- a/src/nid/opensafd.in +++ b/src/nid/opensafd.in @@ -50,7 +50,7 @@ osafcshash=@INTERNAL_VERSION_ID@ unload_tipc() { # Unload TIPC if already loaded - if [ $MANAGE_TIPC = "yes" ] && grep tipc /proc/modules >/dev/null 2>&1; then + if [ "$MANAGE_TIPC" = "yes" ] && grep tipc /proc/modules >/dev/null 2>&1; then modprobe -r tipc >/dev/null 2>&1 if [ $? -eq 1 ]; then logger -t $osafprog "warning: TIPC module unloading failed" @@ -59,13 +59,17 @@ unload_tipc() { } check_tipc() { - # Exit if tipc-config is not installed - if [ "$MANAGE_TIPC" = "yes" ] && [ ! -x /sbin/tipc-config ]; then - which tipc-config >/dev/null 2>&1 - if [ $? -eq 1 ] ; then - logger -s -t