Hi Vu Ack with comments
See my comments inline [Lennart]: Thanks Lennart > -----Original Message----- > From: Vu Minh Nguyen [mailto:vu.m.ngu...@dektech.com.au] > Sent: den 23 mars 2016 08:16 > To: mathi.naic...@oracle.com; Lennart Lund > Cc: opensaf-devel@lists.sourceforge.net > Subject: [PATCH 1 of 1] log: miss mutex protection for common resource in > log agent [#1705] > > osaf/libs/agents/saf/lga/lga_state.c | 41 > +++++++++++++++++++++++++++++++---- > osaf/libs/agents/saf/lga/lga_state.h | 1 + > osaf/libs/agents/saf/lga/lga_util.c | 30 +++++++++++-------------- > 3 files changed, 50 insertions(+), 22 deletions(-) > > > There was an race condition between client thread and recovery thread > accessing to common resource `client_list`. > > Add mutex protection. > > diff --git a/osaf/libs/agents/saf/lga/lga_state.c > b/osaf/libs/agents/saf/lga/lga_state.c > --- a/osaf/libs/agents/saf/lga/lga_state.c > +++ b/osaf/libs/agents/saf/lga/lga_state.c > @@ -355,8 +355,11 @@ static void *recovery2_thread(void *dumm > /* We have been signaled to exit > */ > goto done; > } > + > /* Recover clients one at a time */ > rc = recover_one_client(p_client); > + if (p_client == NULL) goto done; > + [Lennart] Why is this needed? This check is done in "while (p_client != NULL) { " and p_client cannot become NULL until "p_client = p_client->next; " is done as the last line in the while loop. > TRACE("\t Client %d is recovered", p_client- > >lgs_client_id); > if (rc == -1) { > TRACE("%s recover_one_client > Fail Deleting cllient (id %d)", > @@ -593,6 +596,15 @@ int recover_one_client(lga_client_hdl_re > > TRACE_ENTER(); > > + if (p_client == NULL) { > + /** > + * Probably the client_list has been deleted by > client thread > + * in context calling saLogFinalize(). So, give up > recovering. > + */ > + rc = -1; > + goto done; > + } > + > /* This function may be called both in the recovery thread and > in the > * client thread. A possible scenario is that the recovery 2 > thread > * times out and calls this function in recovery mode 2 while > there > @@ -658,13 +670,32 @@ uint32_t delete_one_client( > lga_client_hdl_rec_t *rm_node > ) > { > - TRACE_ENTER2(); > - uint32_t ncs_rc; > + return lga_hdl_rec_del(list_head, rm_node); > +} > > +/** > + * Free the memory allocated for one client handle with mutext protection. > + * > + * @param p_client_hdl > + * @return void > + */ > +void free_client_hdl(lga_client_hdl_rec_t **p_client_hdl) > +{ > + lga_client_hdl_rec_t *client_hdl; > + > + /** > + * To prevent race condition b/w client thread and recovery > thread > + * accessing to common resource `client_list`. [#1705] > + */ > osaf_mutex_lock_ordie(&lga_recov_mutex); > - ncs_rc = lga_hdl_rec_del(list_head, rm_node); > + > + client_hdl = *p_client_hdl; > + if (client_hdl == NULL) goto done; > + > + free(client_hdl); > + client_hdl = NULL; > + > +done: > osaf_mutex_unlock_ordie(&lga_recov_mutex); > > - TRACE_LEAVE(); > - return ncs_rc; > } > diff --git a/osaf/libs/agents/saf/lga/lga_state.h > b/osaf/libs/agents/saf/lga/lga_state.h > --- a/osaf/libs/agents/saf/lga/lga_state.h > +++ b/osaf/libs/agents/saf/lga/lga_state.h > @@ -32,6 +32,7 @@ uint32_t delete_one_client( > lga_client_hdl_rec_t **list_head, > lga_client_hdl_rec_t *rm_node > ); > +void free_client_hdl(lga_client_hdl_rec_t **p_client_hdl); > > #ifdef __cplusplus > } > diff --git a/osaf/libs/agents/saf/lga/lga_util.c > b/osaf/libs/agents/saf/lga/lga_util.c > --- a/osaf/libs/agents/saf/lga/lga_util.c > +++ b/osaf/libs/agents/saf/lga/lga_util.c > @@ -19,6 +19,7 @@ > #include <syslog.h> > #include "lga.h" > #include "osaf_poll.h" > +#include "lga_state.h" > > /* Variables used during startup/shutdown only */ > static pthread_mutex_t lga_lock = PTHREAD_MUTEX_INITIALIZER; > @@ -449,11 +450,9 @@ void lga_hdl_list_del(lga_client_hdl_rec > /** clean up the channel records for this lga-client > **/ > lga_log_stream_hdl_rec_list_del(&client_hdl- > >stream_list); > - /** remove the association with hdl-mngr > - **/ > - free(client_hdl); > - client_hdl = 0; > + free_client_hdl(&client_hdl); > } > + > TRACE_LEAVE(); > } > > @@ -536,38 +535,35 @@ uint32_t lga_hdl_rec_del(lga_client_hdl_ > if (list_iter == rm_node) { > *list_head = rm_node->next; > > - /** detach & release the IPC > - **/ > + /* detach & release the IPC */ > m_NCS_IPC_DETACH(&rm_node->mbx, > lga_clear_mbx, NULL); > m_NCS_IPC_RELEASE(&rm_node->mbx, NULL); > > ncshm_give_hdl(rm_node->local_hdl); > ncshm_destroy_hdl(NCS_SERVICE_ID_LGA, > rm_node->local_hdl); > - /** Free the channel records off this hdl > - **/ > + > + /* Free the channel records off this hdl */ > lga_log_stream_hdl_rec_list_del(&rm_node- > >stream_list); > + free_client_hdl(&rm_node); > > - /** free the hdl rec > - **/ > - free(rm_node); > rc = NCSCC_RC_SUCCESS; > goto out; > - } else { /* find the rec */ > + } else { > + /* find the rec */ > while (NULL != list_iter) { > if (list_iter->next == rm_node) { > list_iter->next = > rm_node->next; > > - /** detach & release the IPC */ > + /* detach & release > the IPC */ > > m_NCS_IPC_DETACH(&rm_node->mbx, lga_clear_mbx, NULL); > > m_NCS_IPC_RELEASE(&rm_node->mbx, NULL); > > > ncshm_give_hdl(rm_node->local_hdl); > > ncshm_destroy_hdl(NCS_SERVICE_ID_LGA, rm_node- > >local_hdl); > - /** Free the channel records off this lga_hdl */ > + > + /* Free the channel > records off this lga_hdl */ > > lga_log_stream_hdl_rec_list_del(&rm_node->stream_list); > - > - /** free the hdl rec */ > - free(rm_node); > + > free_client_hdl(&rm_node); > > rc = > NCSCC_RC_SUCCESS; > goto out; ------------------------------------------------------------------------------ Transform Data into Opportunity. Accelerate data analysis in your applications with Intel Data Analytics Acceleration Library. Click to learn more. http://pubads.g.doubleclick.net/gampad/clk?id=278785471&iu=/4140 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel