Hi Vu, Ack, with comments. See also attached diff with comments and some changes.
In general the code has become rather messy. Too much logic is distributed and handled "inline" in code which purpose is to handle other things and these latest fixes adds to that. This makes it very hard to review and maintain the code. Very big risk for mistakes, many places affected by a fix etc. If this continue the code will degenerate and become impossible to maintain with reasonable cost. Thanks Lennart > -----Original Message----- > From: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au> > Sent: den 6 juni 2018 10:27 > To: Canh Van Truong <canh.v.tru...@dektech.com.au>; Lennart Lund > <lennart.l...@ericsson.com> > Cc: opensaf-devel@lists.sourceforge.net > Subject: RE: [PATCH 1/1] log: fix restore ref counter for deleted stream > [#2870] > > Hi Canh, > > I forgot that patch. Just reviewed it. Thanks. > > Anyway, I will push the previous version for this ticket. > > Regards, Vu > > > -----Original Message----- > > From: Canh Van Truong <canh.v.tru...@dektech.com.au> > > Sent: Tuesday, June 5, 2018 6:20 PM > > To: 'Vu Minh Nguyen' <vu.m.ngu...@dektech.com.au>; > > lennart.l...@ericsson.com > > Cc: opensaf-devel@lists.sourceforge.net > > Subject: RE: [PATCH 1/1] log: fix restore ref counter for deleted stream > > [#2870] > > > > Hi a.Vu, > > > > Looking at patch in ticket #2670, we can use ref_counter to decide to > remove > > client in case recovery fail. That way is simple. > > > > Regards > > Canh > > > > -----Original Message----- > > From: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au> > > Sent: Tuesday, June 5, 2018 5:54 PM > > To: lennart.l...@ericsson.com; canh.v.tru...@dektech.com.au > > Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen > > <vu.m.ngu...@dektech.com.au> > > Subject: [PATCH 1/1] log: fix restore ref counter for deleted stream > [#2870] > > > > In the methods LogAgent::saLogStreamClose() and > > LogAgent::saLogWriteLogAsync(), > > the client is deleted if failed to recover; however, the pointer to the > log > > stream of this client has not been reset. Therefore, when the destrustor > of > > ScopeData runs, the reference counter could be restored on deleted log > > stream. > > > > Besides, there are possibilities of race condition b/w deleting the > > failed-to-recovery client and using that deleted client in other thread. > > > > This patch introduces a new attribute to LogClient. When the client fails > > to recover, that client is not removed at that time but is done at next > call > > of log handle initialize or finalize if the attribute is true. > > --- > > src/log/agent/lga_agent.cc | 87 > > ++++++++++++++++++++++++++++++++++++++------- > > src/log/agent/lga_agent.h | 5 +++ > > src/log/agent/lga_client.cc | 2 +- > > src/log/agent/lga_client.h | 7 ++++ > > 4 files changed, 87 insertions(+), 14 deletions(-) > > > > diff --git a/src/log/agent/lga_agent.cc b/src/log/agent/lga_agent.cc > > index f33b5dc..831edfa 100644 > > --- a/src/log/agent/lga_agent.cc > > +++ b/src/log/agent/lga_agent.cc > > @@ -255,6 +255,21 @@ void LogAgent::RemoveAllLogClients() { > > } > > } > > > > +void LogAgent::RemoveAllDeadLogClients() { > > + TRACE_ENTER(); > > + ScopeLock scopeLock(mutex_); > > + auto it = client_list_.begin(); > > + while (it != client_list_.end()) { > > + if ((*it)->is_died() == true) { > > + client_list_.erase(it); > > + delete *it; > > + it = client_list_.begin(); > > + continue; > > + } > > + it++; > > + } > > +} > > + > > // Add one client @client to the list @client_list_ > > // This method should be called in @saLogInitialize() context > > void LogAgent::AddLogClient(LogClient* client) { > > @@ -314,6 +329,14 @@ SaAisErrorT > > LogAgent::saLogInitialize(SaLogHandleT* > > logHandle, > > > > TRACE_ENTER(); > > > > + if (true) { > > + ScopeLock critical_section(get_delete_obj_sync_mutex_); > > + if (has_dead_clients == true) { > > + RemoveAllDeadLogClients(); > > + has_dead_clients = false; > > + } > > + } > > + > > // Verify parameters (log handle and that version is given) > > if ((logHandle == nullptr) || (version == nullptr)) { > > TRACE("version or handle FAILED"); > > @@ -453,6 +476,11 @@ SaAisErrorT LogAgent::saLogSelectionObjectGet( > > return ais_rc; > > } > > > > + if (client->is_died() == true) { > > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > > + return ais_rc; > > + } > > + > > if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) { > > // @client is being deleted. Don't touch @this client > > ais_rc = SA_AIS_ERR_TRY_AGAIN; > > @@ -504,6 +532,11 @@ SaAisErrorT > > LogAgent::saLogDispatch(SaLogHandleT > > logHandle, > > return ais_rc; > > } > > > > + if (client->is_died() == true) { > > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > > + return ais_rc; > > + } > > + > > if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) { > > // @client is being deleted. DO NOT touch this @client > > ais_rc = SA_AIS_ERR_TRY_AGAIN; > > @@ -584,6 +617,11 @@ SaAisErrorT LogAgent::saLogFinalize(SaLogHandleT > > logHandle) { > > if (true) { > > ScopeLock critical_section(get_delete_obj_sync_mutex_); > > > > + if (has_dead_clients == true) { > > + RemoveAllDeadLogClients(); > > + has_dead_clients = false; > > + } > > + > > // Get log client from the global list > > client = SearchClientByHandle(logHandle); > > if (client == nullptr) { > > @@ -852,6 +890,11 @@ SaAisErrorT LogAgent::saLogStreamOpen_2( > > return ais_rc; > > } > > > > + if (client->is_died() == true) { > > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > > + return ais_rc; > > + } > > + > > if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) { > > // @client is being deleted. DO NOT touch this @client > > ais_rc = SA_AIS_ERR_TRY_AGAIN; > > @@ -892,10 +935,12 @@ SaAisErrorT LogAgent::saLogStreamOpen_2( > > } > > > > // This client is failed to do recover in Recovery thread. > > - // Remove this client from the database. > > if (client->is_recovery_failed() == true) { > > ScopeLock critical_section(get_delete_obj_sync_mutex_); > > - RemoveLogClient(&client); > > + // Mark this client died, and it will be removed from client db > > later. > > + client->died(); > > + client = nullptr; > > + has_dead_clients = true; > > ais_rc = SA_AIS_ERR_BAD_HANDLE; > > return ais_rc; > > } > > @@ -1160,6 +1205,11 @@ SaAisErrorT > > LogAgent::saLogWriteLogAsync(SaLogStreamHandleT logStreamHandle, > > return ais_rc; > > } > > > > + if (client->is_died() == true) { > > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > > + return ais_rc; > > + } > > + > > if (client->FetchAndIncreaseRefCounter(__func__, &cUpdated) == -1) { > > // @client is being deleted. DO NOT touch this @client > > ais_rc = SA_AIS_ERR_TRY_AGAIN; > > @@ -1263,10 +1313,13 @@ SaAisErrorT > > LogAgent::saLogWriteLogAsync(SaLogStreamHandleT logStreamHandle, > > } > > > > // This client is failed to do recover in Recovery thread. > > - // Remove this client from the database. > > if (client->is_recovery_failed() == true) { > > ScopeLock critical_section(get_delete_obj_sync_mutex_); > > - RemoveLogClient(&client); > > + // Mark this client died, and it will be removed from client db > > later. > > + client->died(); > > + client = nullptr; > > + stream = nullptr; > > + has_dead_clients = true; > > ais_rc = SA_AIS_ERR_BAD_HANDLE; > > return ais_rc; > > } > > @@ -1321,13 +1374,6 @@ SaAisErrorT > > LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) { > > return ais_rc; > > } > > > > - if (stream->FetchAndDecreaseRefCounter(__func__, &sUpdated) != 0) > { > > - // @stream is being used somewhere (>0), DO NOT delete this > @stream. > > - // or @stream is being deleted on other thread (=-1) > > - ais_rc = SA_AIS_ERR_TRY_AGAIN; > > - return ais_rc; > > - } > > - > > // Retrieve @LogClient obj based on @client_handle > > client = SearchClientByHandle(stream->GetMyClientHandle()); > > if (client == nullptr) { > > @@ -1336,6 +1382,18 @@ SaAisErrorT > > LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) { > > return ais_rc; > > } > > > > + if (client->is_died() == true) { > > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > > + return ais_rc; > > + } > > + > > + if (stream->FetchAndDecreaseRefCounter(__func__, &sUpdated) != 0) > { > > + // @stream is being used somewhere (>0), DO NOT delete this > @stream. > > + // or @stream is being deleted on other thread (=-1) > > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > > + return ais_rc; > > + } > > + > > if (client->FetchAndIncreaseRefCounter(__func__, &cUpdated) == -1) { > > ais_rc = SA_AIS_ERR_TRY_AGAIN; > > return ais_rc; > > @@ -1384,10 +1442,13 @@ SaAisErrorT > > LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) { > > } > > > > // This client is failed to do recover in Recovery thread. > > - // Remove this client from the database. > > if (client->is_recovery_failed() == true) { > > ScopeLock critical_section(get_delete_obj_sync_mutex_); > > - RemoveLogClient(&client); > > + // Mark this client died, and it will be removed from client db > > later. > > + client->died(); > > + client = nullptr; > > + stream = nullptr; > > + has_dead_clients = true; > > ais_rc = SA_AIS_ERR_BAD_HANDLE; > > return ais_rc; > > } > > diff --git a/src/log/agent/lga_agent.h b/src/log/agent/lga_agent.h > > index 9f1c370..817a357 100644 > > --- a/src/log/agent/lga_agent.h > > +++ b/src/log/agent/lga_agent.h > > @@ -159,6 +159,8 @@ class LogAgent { > > void EnterCriticalSection(); > > void LeaveCriticalSection(); > > > > + void RemoveAllDeadLogClients(); > > + > > private: > > // Not allow to create @LogAgent object, except the singleton object > > @me_. > > LogAgent(); > > @@ -272,6 +274,9 @@ class LogAgent { > > // Hold list of current log clients > > std::vector<LogClient*> client_list_; > > > > + // true if there is any failed-to-recover client > > + bool has_dead_clients = false; > > + > > // LGS LGA sync params > > NCS_SEL_OBJ lgs_sync_sel_; > > > > diff --git a/src/log/agent/lga_client.cc b/src/log/agent/lga_client.cc > > index 386c849..3c3f893 100644 > > --- a/src/log/agent/lga_client.cc > > +++ b/src/log/agent/lga_client.cc > > @@ -32,7 +32,7 @@ > > // LogClient > > > > > //-------------------------------------------------------------------------- > > ---- > > LogClient::LogClient(const SaLogCallbacksT* cb, uint32_t id, SaVersionT > > ver) > > - : client_id_{id}, ref_counter_object_{} { > > + : client_id_{id}, ref_counter_object_{}, died_{false} { > > TRACE_ENTER(); > > // Reset registered callback info > > memset(&callbacks_, 0, sizeof(callbacks_)); > > diff --git a/src/log/agent/lga_client.h b/src/log/agent/lga_client.h > > index 05349be..08137b7 100644 > > --- a/src/log/agent/lga_client.h > > +++ b/src/log/agent/lga_client.h > > @@ -173,6 +173,10 @@ class LogClient { > > // while the recovery thread is going at RecoveryState::kRecovery2. > > bool is_recovery_done() const; > > > > + // Invoke when the client is not recovered successfully. > > + void died() { died_ = true; } > > + bool is_died() const { return died_; } > > + > > private: > > // Search for @LogStreamInfo object from @stream_list_ based on > > @stream_id > > LogStreamInfo* SearchLogStreamInfoById(uint32_t); > > @@ -257,6 +261,9 @@ class LogClient { > > // LOG handle (derived from hdl-mngr) > > SaLogHandleT handle_; > > > > + // true if this client is not successfully recovered. > > + bool died_; > > + > > DELETE_COPY_AND_MOVE_OPERATORS(LogClient); > > }; > > > > -- > > 1.9.1 >
log_2870_lennart_comments.diff
Description: log_2870_lennart_comments.diff
------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel