Hi Thien, In my understanding, push_async is vckpt_rec. I wonder if below idea can solve the issue?
diff --git a/src/log/logd/lgs_cache.cc b/src/log/logd/lgs_cache.cc index e3583e97c..27e33702d 100644 --- a/src/log/logd/lgs_cache.cc +++ b/src/log/logd/lgs_cache.cc @@ -344,14 +344,10 @@ int Cache::DecodeColdSync(NCS_UBAID* uba, lgsv_ckpt_header_t* header, uint32_t num_rec = header->num_ckpt_records; int rc = NCSCC_RC_SUCCESS; EDU_ERR ederror; - lgsv_ckpt_msg_v8_t msg_v8; - auto data = &msg_v8.ckpt_rec.push_async; - CkptPushAsync* cache_data; while (num_rec) { - cache_data = data; rc = m_NCS_EDU_EXEC(&lgs_cb->edu_hdl, EncodeDecodePushAsync, uba, EDP_OP_TYPE_DEC, - &cache_data, &ederror); + vckpt_rec, &ederror); if (rc != NCSCC_RC_SUCCESS) { m_NCS_EDU_PRINT_ERROR_STRING(ederror); return rc; Best Regards, Thuan ________________________________ From: Thien Minh Huynh <thien.m.hu...@dektech.com.au> Sent: Monday, May 11, 2020 2:51 PM To: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>; Thuan Tran <thuan.t...@dektech.com.au> Cc: opensaf-devel@lists.sourceforge.net <opensaf-devel@lists.sourceforge.net> Subject: RE: [PATCH 1/1] lgs: fix data CkptPushAsync equals with encode cold sync [#3183] Hi Vu, Thanks for your comments. Best Regards, ThienHuynh -----Original Message----- From: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au> Sent: Monday, May 11, 2020 2:30 PM To: Thien Minh Huynh <thien.m.hu...@dektech.com.au>; Thuan Tran <thuan.t...@dektech.com.au> Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] lgs: fix data CkptPushAsync equals with encode cold sync [#3183] Ack with minor comments. On 5/11/20 2:11 PM, thien.m.huynh wrote: > When NFS is unavailable, client try to write to log. Lgs server will > put it into the queue with the time. At this time, standby node > startup and cold sync. Cause of coredump due to duplicate data > (CkptPushAsync) to put queue is NULL. > The fix is adding a parametar CkptPushAsync into DecodeColdSync to get > data for ckpt_proc_push_async more correctly. [Vu] The description is unclear to me. Here is my suggestion: The standby logsv is crashed during cold sync if having pending write requests in the queue. That happens because the CkptPushAsync data for decoding is referring to wrong data. The fix is to map the CkptPushAsync to the right memory. [Vu] The slogan should be updated too. > --- > src/log/logd/lgs_cache.cc | 10 +++------- > src/log/logd/lgs_cache.h | 4 ++-- > src/log/logd/lgs_mbcsv.cc | 6 +++++- > 3 files changed, 10 insertions(+), 10 deletions(-) > > diff --git a/src/log/logd/lgs_cache.cc b/src/log/logd/lgs_cache.cc > index e3583e97c..ca25e681c 100644 > --- a/src/log/logd/lgs_cache.cc > +++ b/src/log/logd/lgs_cache.cc > @@ -324,8 +324,8 @@ int Cache::EncodeColdSync(NCS_UBAID* uba) const { > } > > int Cache::DecodeColdSync(NCS_UBAID* uba, lgsv_ckpt_header_t* header, > - void* vdata, void** vckpt_rec, > - size_t ckpt_rec_size) const { > + CkptPushAsync* pasync, void* vdata, > + void** vckpt_rec, size_t ckpt_rec_size) > + const { > TRACE_ENTER(); > assert(is_active() == false && "This instance does not run with standby > role"); > if (lgs_is_peer_v8() == false) return NCSCC_RC_SUCCESS; @@ -344,14 > +344,10 @@ int Cache::DecodeColdSync(NCS_UBAID* uba, lgsv_ckpt_header_t* > header, > uint32_t num_rec = header->num_ckpt_records; > int rc = NCSCC_RC_SUCCESS; > EDU_ERR ederror; > - lgsv_ckpt_msg_v8_t msg_v8; > - auto data = &msg_v8.ckpt_rec.push_async; > - CkptPushAsync* cache_data; > while (num_rec) { > - cache_data = data; > rc = m_NCS_EDU_EXEC(&lgs_cb->edu_hdl, EncodeDecodePushAsync, > uba, EDP_OP_TYPE_DEC, > - &cache_data, &ederror); > + &pasync, &ederror); > if (rc != NCSCC_RC_SUCCESS) { > m_NCS_EDU_PRINT_ERROR_STRING(ederror); > return rc; > diff --git a/src/log/logd/lgs_cache.h b/src/log/logd/lgs_cache.h index > a5d6181fb..98ea6791b 100644 > --- a/src/log/logd/lgs_cache.h > +++ b/src/log/logd/lgs_cache.h > @@ -251,8 +251,8 @@ class Cache { > int EncodeColdSync(NCS_UBAID* uba) const; > // Decode the queue on stanby side. > int DecodeColdSync(NCS_UBAID* uba, lgsv_ckpt_header_t* header, > - void* vdata, void** vckpt_rec, > - size_t ckpt_rec_size) const; > + CkptPushAsync* pasync, void* vdata, > + void** vckpt_rec, size_t ckpt_rec_size) const; > > private: > // Private constructor to not allow to instantiate this object > directly, diff --git a/src/log/logd/lgs_mbcsv.cc > b/src/log/logd/lgs_mbcsv.cc index 6ec004f0a..7d097fc28 100644 > --- a/src/log/logd/lgs_mbcsv.cc > +++ b/src/log/logd/lgs_mbcsv.cc > @@ -1677,6 +1677,7 @@ static uint32_t ckpt_decode_cold_sync(lgs_cb_t *cb, > NCS_MBCSV_CB_ARG *cbk_arg) { > size_t ckpt_rec_size; > void *vdata; > EDU_PROG_HANDLER edp_function_reg = edp_ed_reg_rec; > + CkptPushAsync *pasync{nullptr}; > > TRACE_ENTER(); > /* > @@ -1690,6 +1691,7 @@ static uint32_t ckpt_decode_cold_sync(lgs_cb_t *cb, > NCS_MBCSV_CB_ARG *cbk_arg) { > initialize_client_rec_ptr = &data_v9->ckpt_rec.initialize_client; > stream_open_rec_ptr = &data_v9->ckpt_rec.stream_open; > vdata = data_v9; > + pasync = &data_v9->ckpt_rec.push_async; > vckpt_rec = &data_v9->ckpt_rec; > ckpt_rec_size = sizeof(data_v9->ckpt_rec); > edp_function_reg = edp_ed_reg_rec_v6; @@ -1699,6 +1701,7 @@ > static uint32_t ckpt_decode_cold_sync(lgs_cb_t *cb, NCS_MBCSV_CB_ARG > *cbk_arg) { > initialize_client_rec_ptr = &data_v8->ckpt_rec.initialize_client; > stream_open_rec_ptr = &data_v8->ckpt_rec.stream_open; > vdata = data_v8; > + pasync = &data_v8->ckpt_rec.push_async; > vckpt_rec = &data_v8->ckpt_rec; > ckpt_rec_size = sizeof(data_v8->ckpt_rec); > edp_function_reg = edp_ed_reg_rec_v6; @@ -1806,7 +1809,8 @@ > static uint32_t ckpt_decode_cold_sync(lgs_cb_t *cb, NCS_MBCSV_CB_ARG > *cbk_arg) { > } /*End while, stream records */ > > rc = Cache::instance()->DecodeColdSync(&cbk_arg->info.decode.i_uba, > header, > - vdata, &vckpt_rec, ckpt_rec_size); > + pasync, vdata, &vckpt_rec, > + ckpt_rec_size); > if (rc != NCSCC_RC_SUCCESS) { > LOG_NO("DecodeColdSync failed"); > goto done; _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel