Hi Canh,

I forgot that patch. Just reviewed it. Thanks.

Anyway, I will push the previous version for this ticket.

Regards, Vu

> -----Original Message-----
> From: Canh Van Truong <canh.v.tru...@dektech.com.au>
> Sent: Tuesday, June 5, 2018 6:20 PM
> To: 'Vu Minh Nguyen' <vu.m.ngu...@dektech.com.au>;
> lennart.l...@ericsson.com
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: RE: [PATCH 1/1] log: fix restore ref counter for deleted stream
> [#2870]
> 
> Hi a.Vu,
> 
> Looking at patch in ticket #2670, we can use ref_counter to decide to
remove
> client in case recovery fail. That way is simple.
> 
> Regards
> Canh
> 
> -----Original Message-----
> From: Vu Minh Nguyen <vu.m.ngu...@dektech.com.au>
> Sent: Tuesday, June 5, 2018 5:54 PM
> To: lennart.l...@ericsson.com; canh.v.tru...@dektech.com.au
> Cc: opensaf-devel@lists.sourceforge.net; Vu Minh Nguyen
> <vu.m.ngu...@dektech.com.au>
> Subject: [PATCH 1/1] log: fix restore ref counter for deleted stream
[#2870]
> 
> In the methods LogAgent::saLogStreamClose() and
> LogAgent::saLogWriteLogAsync(),
> the client is deleted if failed to recover; however, the pointer to the
log
> stream of this client has not been reset. Therefore, when the destrustor
of
> ScopeData runs, the reference counter could be restored on deleted log
> stream.
> 
> Besides, there are possibilities of race condition b/w deleting the
> failed-to-recovery client and using that deleted client in other thread.
> 
> This patch introduces a new attribute to LogClient. When the client fails
> to recover, that client is not removed at that time but is done at next
call
> of log handle initialize or finalize if the attribute is true.
> ---
>  src/log/agent/lga_agent.cc  | 87
> ++++++++++++++++++++++++++++++++++++++-------
>  src/log/agent/lga_agent.h   |  5 +++
>  src/log/agent/lga_client.cc |  2 +-
>  src/log/agent/lga_client.h  |  7 ++++
>  4 files changed, 87 insertions(+), 14 deletions(-)
> 
> diff --git a/src/log/agent/lga_agent.cc b/src/log/agent/lga_agent.cc
> index f33b5dc..831edfa 100644
> --- a/src/log/agent/lga_agent.cc
> +++ b/src/log/agent/lga_agent.cc
> @@ -255,6 +255,21 @@ void LogAgent::RemoveAllLogClients() {
>    }
>  }
> 
> +void LogAgent::RemoveAllDeadLogClients() {
> +  TRACE_ENTER();
> +  ScopeLock scopeLock(mutex_);
> +  auto it = client_list_.begin();
> +  while (it != client_list_.end()) {
> +    if ((*it)->is_died() == true) {
> +      client_list_.erase(it);
> +      delete *it;
> +      it = client_list_.begin();
> +      continue;
> +    }
> +    it++;
> +  }
> +}
> +
>  // Add one client @client to the list @client_list_
>  // This method should be called in @saLogInitialize() context
>  void LogAgent::AddLogClient(LogClient* client) {
> @@ -314,6 +329,14 @@ SaAisErrorT
> LogAgent::saLogInitialize(SaLogHandleT*
> logHandle,
> 
>    TRACE_ENTER();
> 
> +  if (true) {
> +    ScopeLock critical_section(get_delete_obj_sync_mutex_);
> +    if (has_dead_clients == true) {
> +      RemoveAllDeadLogClients();
> +      has_dead_clients = false;
> +    }
> +  }
> +
>    // Verify parameters (log handle and that version is given)
>    if ((logHandle == nullptr) || (version == nullptr)) {
>      TRACE("version or handle FAILED");
> @@ -453,6 +476,11 @@ SaAisErrorT LogAgent::saLogSelectionObjectGet(
>        return ais_rc;
>      }
> 
> +    if (client->is_died() == true) {
> +      ais_rc = SA_AIS_ERR_BAD_HANDLE;
> +      return ais_rc;
> +    }
> +
>      if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) {
>        // @client is being deleted. Don't touch @this client
>        ais_rc = SA_AIS_ERR_TRY_AGAIN;
> @@ -504,6 +532,11 @@ SaAisErrorT
> LogAgent::saLogDispatch(SaLogHandleT
> logHandle,
>        return ais_rc;
>      }
> 
> +    if (client->is_died() == true) {
> +      ais_rc = SA_AIS_ERR_BAD_HANDLE;
> +      return ais_rc;
> +    }
> +
>      if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) {
>        // @client is being deleted. DO NOT touch this @client
>        ais_rc = SA_AIS_ERR_TRY_AGAIN;
> @@ -584,6 +617,11 @@ SaAisErrorT LogAgent::saLogFinalize(SaLogHandleT
> logHandle) {
>    if (true) {
>      ScopeLock critical_section(get_delete_obj_sync_mutex_);
> 
> +    if (has_dead_clients == true) {
> +      RemoveAllDeadLogClients();
> +      has_dead_clients = false;
> +    }
> +
>      // Get log client from the global list
>      client = SearchClientByHandle(logHandle);
>      if (client == nullptr) {
> @@ -852,6 +890,11 @@ SaAisErrorT LogAgent::saLogStreamOpen_2(
>        return ais_rc;
>      }
> 
> +    if (client->is_died() == true) {
> +      ais_rc = SA_AIS_ERR_BAD_HANDLE;
> +      return ais_rc;
> +    }
> +
>      if (client->FetchAndIncreaseRefCounter(__func__, &updated) == -1) {
>        // @client is being deleted. DO NOT touch this @client
>        ais_rc = SA_AIS_ERR_TRY_AGAIN;
> @@ -892,10 +935,12 @@ SaAisErrorT LogAgent::saLogStreamOpen_2(
>      }
> 
>      // This client is failed to do recover in Recovery thread.
> -    // Remove this client from the database.
>      if (client->is_recovery_failed() == true) {
>        ScopeLock critical_section(get_delete_obj_sync_mutex_);
> -      RemoveLogClient(&client);
> +      // Mark this client died, and it will be removed from client db
> later.
> +      client->died();
> +      client = nullptr;
> +      has_dead_clients = true;
>        ais_rc = SA_AIS_ERR_BAD_HANDLE;
>        return ais_rc;
>      }
> @@ -1160,6 +1205,11 @@ SaAisErrorT
> LogAgent::saLogWriteLogAsync(SaLogStreamHandleT logStreamHandle,
>        return ais_rc;
>      }
> 
> +    if (client->is_died() == true) {
> +      ais_rc = SA_AIS_ERR_BAD_HANDLE;
> +      return ais_rc;
> +    }
> +
>      if (client->FetchAndIncreaseRefCounter(__func__, &cUpdated) == -1) {
>        // @client is being deleted. DO NOT touch this @client
>        ais_rc = SA_AIS_ERR_TRY_AGAIN;
> @@ -1263,10 +1313,13 @@ SaAisErrorT
> LogAgent::saLogWriteLogAsync(SaLogStreamHandleT logStreamHandle,
>      }
> 
>      // This client is failed to do recover in Recovery thread.
> -    // Remove this client from the database.
>      if (client->is_recovery_failed() == true) {
>        ScopeLock critical_section(get_delete_obj_sync_mutex_);
> -      RemoveLogClient(&client);
> +      // Mark this client died, and it will be removed from client db
> later.
> +      client->died();
> +      client = nullptr;
> +      stream = nullptr;
> +      has_dead_clients = true;
>        ais_rc = SA_AIS_ERR_BAD_HANDLE;
>        return ais_rc;
>      }
> @@ -1321,13 +1374,6 @@ SaAisErrorT
> LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) {
>        return ais_rc;
>      }
> 
> -    if (stream->FetchAndDecreaseRefCounter(__func__, &sUpdated) != 0) {
> -      // @stream is being used somewhere (>0), DO NOT delete this
@stream.
> -      // or @stream is being deleted on other thread (=-1)
> -      ais_rc = SA_AIS_ERR_TRY_AGAIN;
> -      return ais_rc;
> -    }
> -
>      // Retrieve @LogClient obj based on @client_handle
>      client = SearchClientByHandle(stream->GetMyClientHandle());
>      if (client == nullptr) {
> @@ -1336,6 +1382,18 @@ SaAisErrorT
> LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) {
>        return ais_rc;
>      }
> 
> +    if (client->is_died() == true) {
> +      ais_rc = SA_AIS_ERR_BAD_HANDLE;
> +      return ais_rc;
> +    }
> +
> +    if (stream->FetchAndDecreaseRefCounter(__func__, &sUpdated) != 0) {
> +      // @stream is being used somewhere (>0), DO NOT delete this
@stream.
> +      // or @stream is being deleted on other thread (=-1)
> +      ais_rc = SA_AIS_ERR_TRY_AGAIN;
> +      return ais_rc;
> +    }
> +
>      if (client->FetchAndIncreaseRefCounter(__func__, &cUpdated) == -1) {
>        ais_rc = SA_AIS_ERR_TRY_AGAIN;
>        return ais_rc;
> @@ -1384,10 +1442,13 @@ SaAisErrorT
> LogAgent::saLogStreamClose(SaLogStreamHandleT logStreamHandle) {
>      }
> 
>      // This client is failed to do recover in Recovery thread.
> -    // Remove this client from the database.
>      if (client->is_recovery_failed() == true) {
>        ScopeLock critical_section(get_delete_obj_sync_mutex_);
> -      RemoveLogClient(&client);
> +      // Mark this client died, and it will be removed from client db
> later.
> +      client->died();
> +      client = nullptr;
> +      stream = nullptr;
> +      has_dead_clients = true;
>        ais_rc = SA_AIS_ERR_BAD_HANDLE;
>        return ais_rc;
>      }
> diff --git a/src/log/agent/lga_agent.h b/src/log/agent/lga_agent.h
> index 9f1c370..817a357 100644
> --- a/src/log/agent/lga_agent.h
> +++ b/src/log/agent/lga_agent.h
> @@ -159,6 +159,8 @@ class LogAgent {
>    void EnterCriticalSection();
>    void LeaveCriticalSection();
> 
> +  void RemoveAllDeadLogClients();
> +
>   private:
>    // Not allow to create @LogAgent object, except the singleton object
> @me_.
>    LogAgent();
> @@ -272,6 +274,9 @@ class LogAgent {
>    // Hold list of current log clients
>    std::vector<LogClient*> client_list_;
> 
> +  // true if there is any failed-to-recover client
> +  bool has_dead_clients = false;
> +
>    // LGS LGA sync params
>    NCS_SEL_OBJ lgs_sync_sel_;
> 
> diff --git a/src/log/agent/lga_client.cc b/src/log/agent/lga_client.cc
> index 386c849..3c3f893 100644
> --- a/src/log/agent/lga_client.cc
> +++ b/src/log/agent/lga_client.cc
> @@ -32,7 +32,7 @@
>  // LogClient
> 
>
//--------------------------------------------------------------------------
> ----
>  LogClient::LogClient(const SaLogCallbacksT* cb, uint32_t id, SaVersionT
> ver)
> -    : client_id_{id}, ref_counter_object_{} {
> +    : client_id_{id}, ref_counter_object_{}, died_{false} {
>    TRACE_ENTER();
>    // Reset registered callback info
>    memset(&callbacks_, 0, sizeof(callbacks_));
> diff --git a/src/log/agent/lga_client.h b/src/log/agent/lga_client.h
> index 05349be..08137b7 100644
> --- a/src/log/agent/lga_client.h
> +++ b/src/log/agent/lga_client.h
> @@ -173,6 +173,10 @@ class LogClient {
>    // while the recovery thread is going at RecoveryState::kRecovery2.
>    bool is_recovery_done() const;
> 
> +  // Invoke when the client is not recovered successfully.
> +  void died() { died_ = true; }
> +  bool is_died() const { return died_; }
> +
>   private:
>    // Search for @LogStreamInfo object from @stream_list_ based on
> @stream_id
>    LogStreamInfo* SearchLogStreamInfoById(uint32_t);
> @@ -257,6 +261,9 @@ class LogClient {
>    // LOG handle (derived from hdl-mngr)
>    SaLogHandleT handle_;
> 
> +  // true if this client is not successfully recovered.
> +  bool died_;
> +
>    DELETE_COPY_AND_MOVE_OPERATORS(LogClient);
>  };
> 
> --
> 1.9.1



------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to