Hi Canh, I forgot that patch. Just reviewed it. Thanks.
Anyway, I will push the previous version for this ticket. Regards, Vu > -----Original Message----- > From: Canh Van Truong <canh.v.tru...@dektech.com.au> > Sent: Tuesday, June 5, 2018 6:20 PM > To: 'Vu Minh Nguyen' <vu.m.ngu...@dektech.com.au>; > lennart.l...@ericsson.com > Cc: opensaf-devel@lists.sourceforge.net > Subject: RE: [PATCH 1/1] log: fix restore ref counter for deleted stream > [#2870] > > Hi a.Vu, > > Looking at patch in ticket #2670, we can use ref_counter to decide to remove > client in case recovery fail. That way is simple. > > Regards > Canh > > -----Original Message----- > From: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au> > Sent: Tuesday, June 5, 2018 5:54 PM > To: lennart.l...@ericsson.com; canh.v.tru...@dektech.com.au > Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen > <vu.m.ngu...@dektech.com.au> > Subject: [PATCH 1/1] log: fix restore ref counter for deleted stream [#2870] > > In the methods LogAgent::saLogStreamClose() and > LogAgent::saLogWriteLogAsync(), > the client is deleted if failed to recover; however, the pointer to the log > stream of this client has not been reset. Therefore, when the destrustor of > ScopeData runs, the reference counter could be restored on deleted log > stream. > > Besides, there are possibilities of race condition b/w deleting the > failed-to-recovery client and using that deleted client in other thread. > > This patch introduces a new attribute to LogClient. When the client fails > to recover, that client is not removed at that time but is done at next call > of log handle initialize or finalize if the attribute is true. > --- > src/log/agent/lga_agent.cc | 87 > ++++++++++++++++++++++++++++++++++++++------- > src/log/agent/lga_agent.h | 5 +++ > src/log/agent/lga_client.cc | 2 +- > src/log/agent/lga_client.h | 7 ++++ > 4 files changed, 87 insertions(+), 14 deletions(-) > > diff --git a/src/log/agent/lga_agent.cc b/src/log/agent/lga_agent.cc > index f33b5dc..831edfa 100644 > --- a/src/log/agent/lga_agent.cc > +++ b/src/log/agent/lga_agent.cc > @@ -255,6 +255,21 @@ void LogAgent::RemoveAllLogClients() { > } > } > > +void LogAgent::RemoveAllDeadLogClients() { > + TRACE_ENTER(); > + ScopeLock scopeLock(mutex_); > + auto it = client_list_.begin(); > + while (it != client_list_.end()) { > + if ((*it)->is_died() == true) { > + client_list_.erase(it); > + delete *it; > + it = client_list_.begin(); > + continue; > + } > + it++; > + } > +} > + > // Add one client @client to the list @client_list_ > // This method should be called in @saLogInitialize() context > void LogAgent::AddLogClient(LogClient* client) { > @@ -314,6 +329,14 @@ SaAisErrorT > LogAgent::saLogInitialize(SaLogHandleT* > logHandle, > > TRACE_ENTER(); > > + if (true) { > + ScopeLock critical_section(get_delete_obj_sync_mutex_); > + if (has_dead_clients == true) { > + RemoveAllDeadLogClients(); > + has_dead_clients = false; > + } > + } > + > // Verify parameters (log handle and that version is given) > if ((logHandle == nullptr) || (version == nullptr)) { > TRACE("version or handle FAILED"); > @@ -453,6 +476,11 @@ SaAisErrorT LogAgent::saLogSelectionObjectGet( > return ais_rc; > } > > + if (client->is_died() == true) { > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > + return ais_rc; > + } > + > if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) { > // @client is being deleted. Don't touch @this client > ais_rc = SA_AIS_ERR_TRY_AGAIN; > @@ -504,6 +532,11 @@ SaAisErrorT > LogAgent::saLogDispatch(SaLogHandleT > logHandle, > return ais_rc; > } > > + if (client->is_died() == true) { > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > + return ais_rc; > + } > + > if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) { > // @client is being deleted. DO NOT touch this @client > ais_rc = SA_AIS_ERR_TRY_AGAIN; > @@ -584,6 +617,11 @@ SaAisErrorT LogAgent::saLogFinalize(SaLogHandleT > logHandle) { > if (true) { > ScopeLock critical_section(get_delete_obj_sync_mutex_); > > + if (has_dead_clients == true) { > + RemoveAllDeadLogClients(); > + has_dead_clients = false; > + } > + > // Get log client from the global list > client = SearchClientByHandle(logHandle); > if (client == nullptr) { > @@ -852,6 +890,11 @@ SaAisErrorT LogAgent::saLogStreamOpen_2( > return ais_rc; > } > > + if (client->is_died() == true) { > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > + return ais_rc; > + } > + > if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) { > // @client is being deleted. DO NOT touch this @client > ais_rc = SA_AIS_ERR_TRY_AGAIN; > @@ -892,10 +935,12 @@ SaAisErrorT LogAgent::saLogStreamOpen_2( > } > > // This client is failed to do recover in Recovery thread. > - // Remove this client from the database. > if (client->is_recovery_failed() == true) { > ScopeLock critical_section(get_delete_obj_sync_mutex_); > - RemoveLogClient(&client); > + // Mark this client died, and it will be removed from client db > later. > + client->died(); > + client = nullptr; > + has_dead_clients = true; > ais_rc = SA_AIS_ERR_BAD_HANDLE; > return ais_rc; > } > @@ -1160,6 +1205,11 @@ SaAisErrorT > LogAgent::saLogWriteLogAsync(SaLogStreamHandleT logStreamHandle, > return ais_rc; > } > > + if (client->is_died() == true) { > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > + return ais_rc; > + } > + > if (client->FetchAndIncreaseRefCounter(__func__, &cUpdated) == -1) { > // @client is being deleted. DO NOT touch this @client > ais_rc = SA_AIS_ERR_TRY_AGAIN; > @@ -1263,10 +1313,13 @@ SaAisErrorT > LogAgent::saLogWriteLogAsync(SaLogStreamHandleT logStreamHandle, > } > > // This client is failed to do recover in Recovery thread. > - // Remove this client from the database. > if (client->is_recovery_failed() == true) { > ScopeLock critical_section(get_delete_obj_sync_mutex_); > - RemoveLogClient(&client); > + // Mark this client died, and it will be removed from client db > later. > + client->died(); > + client = nullptr; > + stream = nullptr; > + has_dead_clients = true; > ais_rc = SA_AIS_ERR_BAD_HANDLE; > return ais_rc; > } > @@ -1321,13 +1374,6 @@ SaAisErrorT > LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) { > return ais_rc; > } > > - if (stream->FetchAndDecreaseRefCounter(__func__, &sUpdated) != 0) { > - // @stream is being used somewhere (>0), DO NOT delete this @stream. > - // or @stream is being deleted on other thread (=-1) > - ais_rc = SA_AIS_ERR_TRY_AGAIN; > - return ais_rc; > - } > - > // Retrieve @LogClient obj based on @client_handle > client = SearchClientByHandle(stream->GetMyClientHandle()); > if (client == nullptr) { > @@ -1336,6 +1382,18 @@ SaAisErrorT > LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) { > return ais_rc; > } > > + if (client->is_died() == true) { > + ais_rc = SA_AIS_ERR_BAD_HANDLE; > + return ais_rc; > + } > + > + if (stream->FetchAndDecreaseRefCounter(__func__, &sUpdated) != 0) { > + // @stream is being used somewhere (>0), DO NOT delete this @stream. > + // or @stream is being deleted on other thread (=-1) > + ais_rc = SA_AIS_ERR_TRY_AGAIN; > + return ais_rc; > + } > + > if (client->FetchAndIncreaseRefCounter(__func__, &cUpdated) == -1) { > ais_rc = SA_AIS_ERR_TRY_AGAIN; > return ais_rc; > @@ -1384,10 +1442,13 @@ SaAisErrorT > LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) { > } > > // This client is failed to do recover in Recovery thread. > - // Remove this client from the database. > if (client->is_recovery_failed() == true) { > ScopeLock critical_section(get_delete_obj_sync_mutex_); > - RemoveLogClient(&client); > + // Mark this client died, and it will be removed from client db > later. > + client->died(); > + client = nullptr; > + stream = nullptr; > + has_dead_clients = true; > ais_rc = SA_AIS_ERR_BAD_HANDLE; > return ais_rc; > } > diff --git a/src/log/agent/lga_agent.h b/src/log/agent/lga_agent.h > index 9f1c370..817a357 100644 > --- a/src/log/agent/lga_agent.h > +++ b/src/log/agent/lga_agent.h > @@ -159,6 +159,8 @@ class LogAgent { > void EnterCriticalSection(); > void LeaveCriticalSection(); > > + void RemoveAllDeadLogClients(); > + > private: > // Not allow to create @LogAgent object, except the singleton object > @me_. > LogAgent(); > @@ -272,6 +274,9 @@ class LogAgent { > // Hold list of current log clients > std::vector<LogClient*> client_list_; > > + // true if there is any failed-to-recover client > + bool has_dead_clients = false; > + > // LGS LGA sync params > NCS_SEL_OBJ lgs_sync_sel_; > > diff --git a/src/log/agent/lga_client.cc b/src/log/agent/lga_client.cc > index 386c849..3c3f893 100644 > --- a/src/log/agent/lga_client.cc > +++ b/src/log/agent/lga_client.cc > @@ -32,7 +32,7 @@ > // LogClient > > //-------------------------------------------------------------------------- > ---- > LogClient::LogClient(const SaLogCallbacksT* cb, uint32_t id, SaVersionT > ver) > - : client_id_{id}, ref_counter_object_{} { > + : client_id_{id}, ref_counter_object_{}, died_{false} { > TRACE_ENTER(); > // Reset registered callback info > memset(&callbacks_, 0, sizeof(callbacks_)); > diff --git a/src/log/agent/lga_client.h b/src/log/agent/lga_client.h > index 05349be..08137b7 100644 > --- a/src/log/agent/lga_client.h > +++ b/src/log/agent/lga_client.h > @@ -173,6 +173,10 @@ class LogClient { > // while the recovery thread is going at RecoveryState::kRecovery2. > bool is_recovery_done() const; > > + // Invoke when the client is not recovered successfully. > + void died() { died_ = true; } > + bool is_died() const { return died_; } > + > private: > // Search for @LogStreamInfo object from @stream_list_ based on > @stream_id > LogStreamInfo* SearchLogStreamInfoById(uint32_t); > @@ -257,6 +261,9 @@ class LogClient { > // LOG handle (derived from hdl-mngr) > SaLogHandleT handle_; > > + // true if this client is not successfully recovered. > + bool died_; > + > DELETE_COPY_AND_MOVE_OPERATORS(LogClient); > }; > > -- > 1.9.1 ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel