Hi Vu

Ack with comments

See my comments inline [Lennart]:

Thanks
Lennart

> -----Original Message-----
> From: Vu Minh Nguyen [mailto:vu.m.ngu...@dektech.com.au]
> Sent: den 23 mars 2016 08:16
> To: mathi.naic...@oracle.com; Lennart Lund
> Cc: opensaf-devel@lists.sourceforge.net
> Subject: [PATCH 1 of 1] log: miss mutex protection for common resource in
> log agent [#1705]
> 
>  osaf/libs/agents/saf/lga/lga_state.c |  41
> +++++++++++++++++++++++++++++++----
>  osaf/libs/agents/saf/lga/lga_state.h |   1 +
>  osaf/libs/agents/saf/lga/lga_util.c  |  30 +++++++++++--------------
>  3 files changed, 50 insertions(+), 22 deletions(-)
> 
> 
> There was an race condition between client thread and recovery thread
> accessing to common resource `client_list`.
> 
> Add mutex protection.
> 
> diff --git a/osaf/libs/agents/saf/lga/lga_state.c
> b/osaf/libs/agents/saf/lga/lga_state.c
> --- a/osaf/libs/agents/saf/lga/lga_state.c
> +++ b/osaf/libs/agents/saf/lga/lga_state.c
> @@ -355,8 +355,11 @@ static void *recovery2_thread(void *dumm
>                       /* We have been signaled to exit
> */
>                       goto done;
>               }
> +
>               /* Recover clients one at a time */
>               rc = recover_one_client(p_client);
> +             if (p_client == NULL) goto done;
> +
[Lennart] Why is this needed? This check is done in "while (p_client != NULL) { 
" and p_client cannot become NULL until "p_client = p_client->next; " is done 
as the last line in the while loop.

>               TRACE("\t Client %d is recovered", p_client-
> >lgs_client_id);
>               if (rc == -1) {
>                       TRACE("%s recover_one_client
> Fail Deleting cllient (id %d)",
> @@ -593,6 +596,15 @@ int recover_one_client(lga_client_hdl_re
> 
>       TRACE_ENTER();
> 
> +     if (p_client == NULL) {
> +             /**
> +              * Probably the client_list has been deleted by
> client thread
> +              * in context calling saLogFinalize(). So, give up
> recovering.
> +              */
> +             rc = -1;
> +             goto done;
> +     }
> +
>       /* This function may be called both in the recovery thread and
> in the
>        * client thread. A possible scenario is that the recovery 2
> thread
>        * times out and calls this function in recovery mode 2 while
> there
> @@ -658,13 +670,32 @@ uint32_t delete_one_client(
>       lga_client_hdl_rec_t *rm_node
>       )
>  {
> -     TRACE_ENTER2();
> -     uint32_t ncs_rc;
> +     return lga_hdl_rec_del(list_head, rm_node);
> +}
> 
> +/**
> + * Free the memory allocated for one client handle with mutext protection.
> + *
> + * @param p_client_hdl
> + * @return void
> + */
> +void free_client_hdl(lga_client_hdl_rec_t **p_client_hdl)
> +{
> +     lga_client_hdl_rec_t *client_hdl;
> +
> +     /**
> +      * To prevent race condition b/w client thread and recovery
> thread
> +      * accessing to common resource `client_list`. [#1705]
> +      */
>       osaf_mutex_lock_ordie(&lga_recov_mutex);
> -     ncs_rc = lga_hdl_rec_del(list_head, rm_node);
> +
> +     client_hdl = *p_client_hdl;
> +     if (client_hdl == NULL) goto done;
> +
> +     free(client_hdl);
> +     client_hdl = NULL;
> +
> +done:
>       osaf_mutex_unlock_ordie(&lga_recov_mutex);
> 
> -     TRACE_LEAVE();
> -     return ncs_rc;
>  }
> diff --git a/osaf/libs/agents/saf/lga/lga_state.h
> b/osaf/libs/agents/saf/lga/lga_state.h
> --- a/osaf/libs/agents/saf/lga/lga_state.h
> +++ b/osaf/libs/agents/saf/lga/lga_state.h
> @@ -32,6 +32,7 @@ uint32_t delete_one_client(
>       lga_client_hdl_rec_t **list_head,
>       lga_client_hdl_rec_t *rm_node
>       );
> +void free_client_hdl(lga_client_hdl_rec_t **p_client_hdl);
> 
>  #ifdef       __cplusplus
>  }
> diff --git a/osaf/libs/agents/saf/lga/lga_util.c
> b/osaf/libs/agents/saf/lga/lga_util.c
> --- a/osaf/libs/agents/saf/lga/lga_util.c
> +++ b/osaf/libs/agents/saf/lga/lga_util.c
> @@ -19,6 +19,7 @@
>  #include <syslog.h>
>  #include "lga.h"
>  #include "osaf_poll.h"
> +#include "lga_state.h"
> 
>  /* Variables used during startup/shutdown only */
>  static pthread_mutex_t lga_lock = PTHREAD_MUTEX_INITIALIZER;
> @@ -449,11 +450,9 @@ void lga_hdl_list_del(lga_client_hdl_rec
>       /** clean up the channel records for this lga-client
>           **/
>               lga_log_stream_hdl_rec_list_del(&client_hdl-
> >stream_list);
> -     /** remove the association with hdl-mngr
> -         **/
> -             free(client_hdl);
> -             client_hdl = 0;
> +             free_client_hdl(&client_hdl);
>       }
> +
>       TRACE_LEAVE();
>  }
> 
> @@ -536,38 +535,35 @@ uint32_t lga_hdl_rec_del(lga_client_hdl_
>       if (list_iter == rm_node) {
>               *list_head = rm_node->next;
> 
> -     /** detach & release the IPC
> -         **/
> +             /* detach & release the IPC */
>               m_NCS_IPC_DETACH(&rm_node->mbx,
> lga_clear_mbx, NULL);
>               m_NCS_IPC_RELEASE(&rm_node->mbx, NULL);
> 
>               ncshm_give_hdl(rm_node->local_hdl);
>               ncshm_destroy_hdl(NCS_SERVICE_ID_LGA,
> rm_node->local_hdl);
> -     /** Free the channel records off this hdl
> -         **/
> +
> +             /* Free the channel records off this hdl */
>               lga_log_stream_hdl_rec_list_del(&rm_node-
> >stream_list);
> +             free_client_hdl(&rm_node);
> 
> -     /** free the hdl rec
> -         **/
> -             free(rm_node);
>               rc = NCSCC_RC_SUCCESS;
>               goto out;
> -     } else {                /* find the rec */
> +     } else {
> +             /* find the rec */
>               while (NULL != list_iter) {
>                       if (list_iter->next == rm_node) {
>                               list_iter->next =
> rm_node->next;
> 
> -             /** detach & release the IPC */
> +                             /* detach & release
> the IPC */
> 
>       m_NCS_IPC_DETACH(&rm_node->mbx, lga_clear_mbx, NULL);
> 
>       m_NCS_IPC_RELEASE(&rm_node->mbx, NULL);
> 
> 
>       ncshm_give_hdl(rm_node->local_hdl);
> 
>       ncshm_destroy_hdl(NCS_SERVICE_ID_LGA, rm_node-
> >local_hdl);
> -             /** Free the channel records off this lga_hdl  */
> +
> +                             /* Free the channel
> records off this lga_hdl  */
> 
>       lga_log_stream_hdl_rec_list_del(&rm_node->stream_list);
> -
> -             /** free the hdl rec */
> -                             free(rm_node);
> +
>       free_client_hdl(&rm_node);
> 
>                               rc =
> NCSCC_RC_SUCCESS;
>                               goto out;

------------------------------------------------------------------------------
Transform Data into Opportunity.
Accelerate data analysis in your applications with
Intel Data Analytics Acceleration Library.
Click to learn more.
http://pubads.g.doubleclick.net/gampad/clk?id=278785471&iu=/4140
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to