Hi Vu,
/* Recover clients one at a time */
rc = recover_one_client(p_client);
+ if (p_client == NULL) goto done;
+
[Lennart] Why is this needed? This check is done in "while (p_client != NULL)
{ "
and p_client cannot become NULL until "p_client = p_client->next; " is done as
the last line in the while loop.
[Vu] It is my intention. I am afraid if the context is switched to
client thread
right after the recovery thread enters the while loop.
If it is the case, p_client will be NULL after done recover_one_client().
[Lennart] I don't really understand your explanation. What is happening in the
client thread when we are in recovery state 2 that can set p_client == NULL?
The client thread should not be able to do anything with the data involved when
recovering client in the recovery thread?
Regards
Lennart
> -----Original Message-----
> From: Vu Minh Nguyen [mailto:[email protected]]
> Sent: den 1 april 2016 14:14
> To: Lennart Lund
> Cc: [email protected]; [email protected]
> Subject: Re: [PATCH 1 of 1] log: miss mutex protection for common resource
> in log agent [#1705]
>
> Hi Lennart,
>
> Please see my comment inline.
>
> Regards, Vu
>
> Quoting Lennart Lund <[email protected]>:
>
> > Hi Vu
> >
> > Ack with comments
> >
> > See my comments inline [Lennart]:
> >
> > Thanks
> > Lennart
> >
> >> -----Original Message-----
> >> From: Vu Minh Nguyen [mailto:[email protected]]
> >> Sent: den 23 mars 2016 08:16
> >> To: [email protected]; Lennart Lund
> >> Cc: [email protected]
> >> Subject: [PATCH 1 of 1] log: miss mutex protection for common resource
> in
> >> log agent [#1705]
> >>
> >> osaf/libs/agents/saf/lga/lga_state.c | 41
> >> +++++++++++++++++++++++++++++++----
> >> osaf/libs/agents/saf/lga/lga_state.h | 1 +
> >> osaf/libs/agents/saf/lga/lga_util.c | 30 +++++++++++--------------
> >> 3 files changed, 50 insertions(+), 22 deletions(-)
> >>
> >>
> >> There was an race condition between client thread and recovery thread
> >> accessing to common resource `client_list`.
> >>
> >> Add mutex protection.
> >>
> >> diff --git a/osaf/libs/agents/saf/lga/lga_state.c
> >> b/osaf/libs/agents/saf/lga/lga_state.c
> >> --- a/osaf/libs/agents/saf/lga/lga_state.c
> >> +++ b/osaf/libs/agents/saf/lga/lga_state.c
> >> @@ -355,8 +355,11 @@ static void *recovery2_thread(void *dumm
> >> /* We have been signaled to exit
> >> */
> >> goto done;
> >> }
> >> +
> >> /* Recover clients one at a time */
> >> rc = recover_one_client(p_client);
> >> + if (p_client == NULL) goto done;
> >> +
> > [Lennart] Why is this needed? This check is done in "while (p_client
> > != NULL) { " and p_client cannot become NULL until "p_client =
> > p_client->next; " is done as the last line in the while loop.
> >
> [Vu] It is my intention. I am afraid if the context is switched to
> client thread
> right after the recovery thread enters the while loop.
> If it is the case, p_client will be NULL after done recover_one_client().
>
> >> TRACE("\t Client %d is recovered", p_client-
> >> >lgs_client_id);
> >> if (rc == -1) {
> >> TRACE("%s recover_one_client
> >> Fail Deleting cllient (id %d)",
> >> @@ -593,6 +596,15 @@ int recover_one_client(lga_client_hdl_re
> >>
> >> TRACE_ENTER();
> >>
> >> + if (p_client == NULL) {
> >> + /**
> >> + * Probably the client_list has been deleted by
> >> client thread
> >> + * in context calling saLogFinalize(). So, give up
> >> recovering.
> >> + */
> >> + rc = -1;
> >> + goto done;
> >> + }
> >> +
> >> /* This function may be called both in the recovery thread and
> >> in the
> >> * client thread. A possible scenario is that the recovery 2
> >> thread
> >> * times out and calls this function in recovery mode 2 while
> >> there
> >> @@ -658,13 +670,32 @@ uint32_t delete_one_client(
> >> lga_client_hdl_rec_t *rm_node
> >> )
> >> {
> >> - TRACE_ENTER2();
> >> - uint32_t ncs_rc;
> >> + return lga_hdl_rec_del(list_head, rm_node);
> >> +}
> >>
> >> +/**
> >> + * Free the memory allocated for one client handle with mutext
> protection.
> >> + *
> >> + * @param p_client_hdl
> >> + * @return void
> >> + */
> >> +void free_client_hdl(lga_client_hdl_rec_t **p_client_hdl)
> >> +{
> >> + lga_client_hdl_rec_t *client_hdl;
> >> +
> >> + /**
> >> + * To prevent race condition b/w client thread and recovery
> >> thread
> >> + * accessing to common resource `client_list`. [#1705]
> >> + */
> >> osaf_mutex_lock_ordie(&lga_recov_mutex);
> >> - ncs_rc = lga_hdl_rec_del(list_head, rm_node);
> >> +
> >> + client_hdl = *p_client_hdl;
> >> + if (client_hdl == NULL) goto done;
> >> +
> >> + free(client_hdl);
> >> + client_hdl = NULL;
> >> +
> >> +done:
> >> osaf_mutex_unlock_ordie(&lga_recov_mutex);
> >>
> >> - TRACE_LEAVE();
> >> - return ncs_rc;
> >> }
> >> diff --git a/osaf/libs/agents/saf/lga/lga_state.h
> >> b/osaf/libs/agents/saf/lga/lga_state.h
> >> --- a/osaf/libs/agents/saf/lga/lga_state.h
> >> +++ b/osaf/libs/agents/saf/lga/lga_state.h
> >> @@ -32,6 +32,7 @@ uint32_t delete_one_client(
> >> lga_client_hdl_rec_t **list_head,
> >> lga_client_hdl_rec_t *rm_node
> >> );
> >> +void free_client_hdl(lga_client_hdl_rec_t **p_client_hdl);
> >>
> >> #ifdef __cplusplus
> >> }
> >> diff --git a/osaf/libs/agents/saf/lga/lga_util.c
> >> b/osaf/libs/agents/saf/lga/lga_util.c
> >> --- a/osaf/libs/agents/saf/lga/lga_util.c
> >> +++ b/osaf/libs/agents/saf/lga/lga_util.c
> >> @@ -19,6 +19,7 @@
> >> #include <syslog.h>
> >> #include "lga.h"
> >> #include "osaf_poll.h"
> >> +#include "lga_state.h"
> >>
> >> /* Variables used during startup/shutdown only */
> >> static pthread_mutex_t lga_lock = PTHREAD_MUTEX_INITIALIZER;
> >> @@ -449,11 +450,9 @@ void lga_hdl_list_del(lga_client_hdl_rec
> >> /** clean up the channel records for this lga-client
> >> **/
> >> lga_log_stream_hdl_rec_list_del(&client_hdl-
> >> >stream_list);
> >> - /** remove the association with hdl-mngr
> >> - **/
> >> - free(client_hdl);
> >> - client_hdl = 0;
> >> + free_client_hdl(&client_hdl);
> >> }
> >> +
> >> TRACE_LEAVE();
> >> }
> >>
> >> @@ -536,38 +535,35 @@ uint32_t lga_hdl_rec_del(lga_client_hdl_
> >> if (list_iter == rm_node) {
> >> *list_head = rm_node->next;
> >>
> >> - /** detach & release the IPC
> >> - **/
> >> + /* detach & release the IPC */
> >> m_NCS_IPC_DETACH(&rm_node->mbx,
> >> lga_clear_mbx, NULL);
> >> m_NCS_IPC_RELEASE(&rm_node->mbx, NULL);
> >>
> >> ncshm_give_hdl(rm_node->local_hdl);
> >> ncshm_destroy_hdl(NCS_SERVICE_ID_LGA,
> >> rm_node->local_hdl);
> >> - /** Free the channel records off this hdl
> >> - **/
> >> +
> >> + /* Free the channel records off this hdl */
> >> lga_log_stream_hdl_rec_list_del(&rm_node-
> >> >stream_list);
> >> + free_client_hdl(&rm_node);
> >>
> >> - /** free the hdl rec
> >> - **/
> >> - free(rm_node);
> >> rc = NCSCC_RC_SUCCESS;
> >> goto out;
> >> - } else { /* find the rec */
> >> + } else {
> >> + /* find the rec */
> >> while (NULL != list_iter) {
> >> if (list_iter->next == rm_node) {
> >> list_iter->next =
> >> rm_node->next;
> >>
> >> - /** detach & release the IPC */
> >> + /* detach & release
> >> the IPC */
> >>
> >> m_NCS_IPC_DETACH(&rm_node->mbx, lga_clear_mbx,
> NULL);
> >>
> >> m_NCS_IPC_RELEASE(&rm_node->mbx, NULL);
> >>
> >>
> >> ncshm_give_hdl(rm_node->local_hdl);
> >>
> >> ncshm_destroy_hdl(NCS_SERVICE_ID_LGA, rm_node-
> >> >local_hdl);
> >> - /** Free the channel records off this lga_hdl */
> >> +
> >> + /* Free the channel
> >> records off this lga_hdl */
> >>
> >> lga_log_stream_hdl_rec_list_del(&rm_node->stream_list);
> >> -
> >> - /** free the hdl rec */
> >> - free(rm_node);
> >> +
> >> free_client_hdl(&rm_node);
> >>
> >> rc =
> >> NCSCC_RC_SUCCESS;
> >> goto out;
>
------------------------------------------------------------------------------
Transform Data into Opportunity.
Accelerate data analysis in your applications with
Intel Data Analytics Acceleration Library.
Click to learn more.
http://pubads.g.doubleclick.net/gampad/clk?id=278785471&iu=/4140
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel