In the methods LogAgent::saLogStreamClose() and LogAgent::saLogWriteLogAsync(), the client is deleted if failed to recover; however, the pointer to the log stream of this client has not been reset. Therefore, when the destrustor of ScopeData runs, the reference counter could be restored on deleted log stream.
Besides, there are possibilities of race condition b/w deleting the failed-to-recovery client and using that deleted client in other thread. This patch introduces a new attribute to LogClient. When the client fails to recover, that client is not removed at that time but is done at next call of log handle initialize or finalize if the attribute is true. --- src/log/agent/lga_agent.cc | 87 ++++++++++++++++++++++++++++++++++++++------- src/log/agent/lga_agent.h | 5 +++ src/log/agent/lga_client.cc | 2 +- src/log/agent/lga_client.h | 7 ++++ 4 files changed, 87 insertions(+), 14 deletions(-) diff --git a/src/log/agent/lga_agent.cc b/src/log/agent/lga_agent.cc index f33b5dc..831edfa 100644 --- a/src/log/agent/lga_agent.cc +++ b/src/log/agent/lga_agent.cc @@ -255,6 +255,21 @@ void LogAgent::RemoveAllLogClients() { } } +void LogAgent::RemoveAllDeadLogClients() { + TRACE_ENTER(); + ScopeLock scopeLock(mutex_); + auto it = client_list_.begin(); + while (it != client_list_.end()) { + if ((*it)->is_died() == true) { + client_list_.erase(it); + delete *it; + it = client_list_.begin(); + continue; + } + it++; + } +} + // Add one client @client to the list @client_list_ // This method should be called in @saLogInitialize() context void LogAgent::AddLogClient(LogClient* client) { @@ -314,6 +329,14 @@ SaAisErrorT LogAgent::saLogInitialize(SaLogHandleT* logHandle, TRACE_ENTER(); + if (true) { + ScopeLock critical_section(get_delete_obj_sync_mutex_); + if (has_dead_clients == true) { + RemoveAllDeadLogClients(); + has_dead_clients = false; + } + } + // Verify parameters (log handle and that version is given) if ((logHandle == nullptr) || (version == nullptr)) { TRACE("version or handle FAILED"); @@ -453,6 +476,11 @@ SaAisErrorT LogAgent::saLogSelectionObjectGet( return ais_rc; } + if (client->is_died() == true) { + ais_rc = SA_AIS_ERR_BAD_HANDLE; + return ais_rc; + } + if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) { // @client is being deleted. Don't touch @this client ais_rc = SA_AIS_ERR_TRY_AGAIN; @@ -504,6 +532,11 @@ SaAisErrorT LogAgent::saLogDispatch(SaLogHandleT logHandle, return ais_rc; } + if (client->is_died() == true) { + ais_rc = SA_AIS_ERR_BAD_HANDLE; + return ais_rc; + } + if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) { // @client is being deleted. DO NOT touch this @client ais_rc = SA_AIS_ERR_TRY_AGAIN; @@ -584,6 +617,11 @@ SaAisErrorT LogAgent::saLogFinalize(SaLogHandleT logHandle) { if (true) { ScopeLock critical_section(get_delete_obj_sync_mutex_); + if (has_dead_clients == true) { + RemoveAllDeadLogClients(); + has_dead_clients = false; + } + // Get log client from the global list client = SearchClientByHandle(logHandle); if (client == nullptr) { @@ -852,6 +890,11 @@ SaAisErrorT LogAgent::saLogStreamOpen_2( return ais_rc; } + if (client->is_died() == true) { + ais_rc = SA_AIS_ERR_BAD_HANDLE; + return ais_rc; + } + if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) { // @client is being deleted. DO NOT touch this @client ais_rc = SA_AIS_ERR_TRY_AGAIN; @@ -892,10 +935,12 @@ SaAisErrorT LogAgent::saLogStreamOpen_2( } // This client is failed to do recover in Recovery thread. - // Remove this client from the database. if (client->is_recovery_failed() == true) { ScopeLock critical_section(get_delete_obj_sync_mutex_); - RemoveLogClient(&client); + // Mark this client died, and it will be removed from client db later. + client->died(); + client = nullptr; + has_dead_clients = true; ais_rc = SA_AIS_ERR_BAD_HANDLE; return ais_rc; } @@ -1160,6 +1205,11 @@ SaAisErrorT LogAgent::saLogWriteLogAsync(SaLogStreamHandleT logStreamHandle, return ais_rc; } + if (client->is_died() == true) { + ais_rc = SA_AIS_ERR_BAD_HANDLE; + return ais_rc; + } + if (client->FetchAndIncreaseRefCounter(__func__, &cUpdated) == -1) { // @client is being deleted. DO NOT touch this @client ais_rc = SA_AIS_ERR_TRY_AGAIN; @@ -1263,10 +1313,13 @@ SaAisErrorT LogAgent::saLogWriteLogAsync(SaLogStreamHandleT logStreamHandle, } // This client is failed to do recover in Recovery thread. - // Remove this client from the database. if (client->is_recovery_failed() == true) { ScopeLock critical_section(get_delete_obj_sync_mutex_); - RemoveLogClient(&client); + // Mark this client died, and it will be removed from client db later. + client->died(); + client = nullptr; + stream = nullptr; + has_dead_clients = true; ais_rc = SA_AIS_ERR_BAD_HANDLE; return ais_rc; } @@ -1321,13 +1374,6 @@ SaAisErrorT LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) { return ais_rc; } - if (stream->FetchAndDecreaseRefCounter(__func__, &sUpdated) != 0) { - // @stream is being used somewhere (>0), DO NOT delete this @stream. - // or @stream is being deleted on other thread (=-1) - ais_rc = SA_AIS_ERR_TRY_AGAIN; - return ais_rc; - } - // Retrieve @LogClient obj based on @client_handle client = SearchClientByHandle(stream->GetMyClientHandle()); if (client == nullptr) { @@ -1336,6 +1382,18 @@ SaAisErrorT LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) { return ais_rc; } + if (client->is_died() == true) { + ais_rc = SA_AIS_ERR_BAD_HANDLE; + return ais_rc; + } + + if (stream->FetchAndDecreaseRefCounter(__func__, &sUpdated) != 0) { + // @stream is being used somewhere (>0), DO NOT delete this @stream. + // or @stream is being deleted on other thread (=-1) + ais_rc = SA_AIS_ERR_TRY_AGAIN; + return ais_rc; + } + if (client->FetchAndIncreaseRefCounter(__func__, &cUpdated) == -1) { ais_rc = SA_AIS_ERR_TRY_AGAIN; return ais_rc; @@ -1384,10 +1442,13 @@ SaAisErrorT LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) { } // This client is failed to do recover in Recovery thread. - // Remove this client from the database. if (client->is_recovery_failed() == true) { ScopeLock critical_section(get_delete_obj_sync_mutex_); - RemoveLogClient(&client); + // Mark this client died, and it will be removed from client db later. + client->died(); + client = nullptr; + stream = nullptr; + has_dead_clients = true; ais_rc = SA_AIS_ERR_BAD_HANDLE; return ais_rc; } diff --git a/src/log/agent/lga_agent.h b/src/log/agent/lga_agent.h index 9f1c370..817a357 100644 --- a/src/log/agent/lga_agent.h +++ b/src/log/agent/lga_agent.h @@ -159,6 +159,8 @@ class LogAgent { void EnterCriticalSection(); void LeaveCriticalSection(); + void RemoveAllDeadLogClients(); + private: // Not allow to create @LogAgent object, except the singleton object @me_. LogAgent(); @@ -272,6 +274,9 @@ class LogAgent { // Hold list of current log clients std::vector<LogClient*> client_list_; + // true if there is any failed-to-recover client + bool has_dead_clients = false; + // LGS LGA sync params NCS_SEL_OBJ lgs_sync_sel_; diff --git a/src/log/agent/lga_client.cc b/src/log/agent/lga_client.cc index 386c849..3c3f893 100644 --- a/src/log/agent/lga_client.cc +++ b/src/log/agent/lga_client.cc @@ -32,7 +32,7 @@ // LogClient //------------------------------------------------------------------------------ LogClient::LogClient(const SaLogCallbacksT* cb, uint32_t id, SaVersionT ver) - : client_id_{id}, ref_counter_object_{} { + : client_id_{id}, ref_counter_object_{}, died_{false} { TRACE_ENTER(); // Reset registered callback info memset(&callbacks_, 0, sizeof(callbacks_)); diff --git a/src/log/agent/lga_client.h b/src/log/agent/lga_client.h index 05349be..08137b7 100644 --- a/src/log/agent/lga_client.h +++ b/src/log/agent/lga_client.h @@ -173,6 +173,10 @@ class LogClient { // while the recovery thread is going at RecoveryState::kRecovery2. bool is_recovery_done() const; + // Invoke when the client is not recovered successfully. + void died() { died_ = true; } + bool is_died() const { return died_; } + private: // Search for @LogStreamInfo object from @stream_list_ based on @stream_id LogStreamInfo* SearchLogStreamInfoById(uint32_t); @@ -257,6 +261,9 @@ class LogClient { // LOG handle (derived from hdl-mngr) SaLogHandleT handle_; + // true if this client is not successfully recovered. + bool died_; + DELETE_COPY_AND_MOVE_OPERATORS(LogClient); }; -- 1.9.1 ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel