osaf/libs/agents/saf/lga/lga.h       |   1 +
 osaf/libs/agents/saf/lga/lga_api.c   |  12 ++++
 osaf/libs/agents/saf/lga/lga_mds.c   |  10 ++-
 osaf/libs/agents/saf/lga/lga_state.c |  87 ++++++++++++------------------------
 osaf/libs/agents/saf/lga/lga_state.h |   3 -
 osaf/libs/agents/saf/lga/lga_util.c  |  23 +++++++++
 6 files changed, 72 insertions(+), 64 deletions(-)


Remove deadlock when two mutexes conflict in MDS and log agent.
Make sure that cb_lock is not taken when calling any MDS API
Some cleaning of code e.g. removing "dead" code and moving functions from 
lga_state.c
that are not related to lga_state.c

diff --git a/osaf/libs/agents/saf/lga/lga.h b/osaf/libs/agents/saf/lga/lga.h
--- a/osaf/libs/agents/saf/lga/lga.h
+++ b/osaf/libs/agents/saf/lga/lga.h
@@ -122,6 +122,7 @@ typedef struct {
 
 /* lga_saf_api.c */
 extern lga_cb_t lga_cb;
+bool is_lgs_state(lgs_state_t state);
 
 /* lga_mds.c */
 extern uint32_t lga_mds_init(lga_cb_t *cb);
diff --git a/osaf/libs/agents/saf/lga/lga_api.c 
b/osaf/libs/agents/saf/lga/lga_api.c
--- a/osaf/libs/agents/saf/lga/lga_api.c
+++ b/osaf/libs/agents/saf/lga/lga_api.c
@@ -44,6 +44,18 @@ lga_cb_t lga_cb = {
        .lgs_state = LGS_START
 };
 
+bool is_lgs_state(lgs_state_t state)
+{
+       bool rc = false;
+
+       osaf_mutex_lock_ordie(&lga_cb.cb_lock);
+       if (state == lga_cb.lgs_state)
+               rc = true;
+       osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
+
+       return rc;
+}
+
 static void populate_open_params(lgsv_stream_open_req_t *open_param,
                                 const SaNameT *logStreamName,
                                 lga_client_hdl_rec_t *hdl_rec,
diff --git a/osaf/libs/agents/saf/lga/lga_mds.c 
b/osaf/libs/agents/saf/lga/lga_mds.c
--- a/osaf/libs/agents/saf/lga/lga_mds.c
+++ b/osaf/libs/agents/saf/lga/lga_mds.c
@@ -16,6 +16,7 @@
  */
 
 #include <stdlib.h>
+#include <saf_error.h>
 #include "lga.h"
 #include "lga_state.h"
 
@@ -446,8 +447,9 @@ static uint32_t lga_lgs_msg_proc(lga_cb_
                        {
                                lga_client_hdl_rec_t *lga_hdl_rec;
 
-                               TRACE_2("LGSV_LGS_WRITE_LOG_CBK: inv = %d, 
error = %d",
-                                       (int)lgsv_msg->info.cbk_info.inv, 
(int)lgsv_msg->info.cbk_info.write_cbk.error);
+                               TRACE_2("LGSV_LGS_WRITE_LOG_CBK: inv_id = %d, 
cbk_error %s",
+                                       (int)lgsv_msg->info.cbk_info.inv,
+                                       
saf_error((int)lgsv_msg->info.cbk_info.write_cbk.error));
 
                        /** Create the chan hdl record here before 
                          ** queing this message onto the priority queue
@@ -590,6 +592,7 @@ static uint32_t lga_mds_rcv(struct ncsmd
        lgsv_msg_t *lgsv_msg = (lgsv_msg_t *)mds_cb_info->info.receive.i_msg;
        uint32_t rc;
 
+       /* TBD Probably incorrect usage of this mutex here */
        osaf_mutex_lock_ordie(&lga_cb.cb_lock);
 
        /* process the message */
@@ -930,7 +933,8 @@ static uint32_t lga_mds_dec(struct ncsmd
                        TRACE_2("LGSV_LGS_CBK_MSG");
                        switch (msg->info.cbk_info.type) {
                        case LGSV_WRITE_LOG_CALLBACK_IND:
-                               TRACE_2("decode writelog message");
+                               TRACE_2("decode writelog message, 
lgs_client_id=%d",
+                                       msg->info.cbk_info.lgs_client_id);
                                total_bytes += lga_dec_write_cbk_msg(uba, msg);
                                break;
                        default:
diff --git a/osaf/libs/agents/saf/lga/lga_state.c 
b/osaf/libs/agents/saf/lga/lga_state.c
--- a/osaf/libs/agents/saf/lga/lga_state.c
+++ b/osaf/libs/agents/saf/lga/lga_state.c
@@ -28,13 +28,6 @@
  * Common data
  */
 
-/* And critical sections. Clients APIs must not pass recovery 2 state check if
- * recovery 2 thread is started but state has not yet been changed by the
- * thread. Also the thread must not start recovery if an API is executing and
- * has passed the recovery 2 check
- */
-static pthread_mutex_t lga_recov2_lock = PTHREAD_MUTEX_INITIALIZER;
-
 /* Selection object for terminating recovery thread for state 2 (after timeout)
  * NOTE: Only for recovery2_thread
  */
@@ -228,7 +221,10 @@ static int initialize_one_client(lga_cli
 
        TRACE_ENTER();
 
-       if (p_client->initialized_flag == true) {
+       osaf_mutex_lock_ordie(&lga_cb.cb_lock);
+       bool initialized_flag = p_client->initialized_flag;
+       osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
+       if (initialized_flag == true) {
                /* The client is already initialized */
                rc = 1;
                goto done;
@@ -242,11 +238,13 @@ static int initialize_one_client(lga_cli
        /* Restore the client Id with the one returned by the LGS and
         * set the initialized flag
         */
+       osaf_mutex_lock_ordie(&lga_cb.cb_lock);
        p_client->lgs_client_id = client_id;
        p_client->initialized_flag = true;
+       osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
 
 done:
-       TRACE_LEAVE2("rc = %d", rc);
+       TRACE_LEAVE2("rc = %d, client_id = %d", rc, client_id);
        return rc;
 }
 
@@ -267,7 +265,10 @@ static int recover_one_stream(lga_log_st
 
        TRACE_ENTER();
 
-       if (p_stream->recovered_flag == true) {
+       osaf_mutex_lock_ordie(&lga_cb.cb_lock);
+       bool recovered_flag = p_stream->recovered_flag;
+       osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
+       if (recovered_flag == true) {
                /* The stream is already recovered */
                rc = 1;
                goto done;
@@ -282,8 +283,10 @@ static int recover_one_stream(lga_log_st
        /* Restore the stream Id with the Id returned by the LGS and
         * set the recovered flag
         */
+       osaf_mutex_lock_ordie(&lga_cb.cb_lock);
        p_stream->lgs_log_stream_id = stream_id;
        p_stream->recovered_flag = true;
+       osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
 
 done:
        TRACE_LEAVE2("rc = %d", rc);
@@ -588,11 +591,17 @@ int lga_recover_one_client(lga_client_hd
 
        TRACE_ENTER();
 
-       /* Synchronize b/w client/mds thread */
+       /* TBD Incorrect usage of the cb_lock mutex removed here. Could cause
+        * a deadlock together with the MDS internal gl_mds_library_mutex
+        * Check mutex handling in log agent in general
+        */
        osaf_mutex_lock_ordie(&lga_cb.cb_lock);
-       if (p_client->recovered_flag == true) {
+       bool recovered_flag = p_client->recovered_flag;
+       osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
+       if (recovered_flag == true) {
                /* Client is already recovered */
-               TRACE("\t Already recovered");
+               TRACE("\t Already recovered, cilent_id=%d",
+                       p_client->lgs_client_id);
                goto done;
        }
 
@@ -620,35 +629,20 @@ int lga_recover_one_client(lga_client_hd
                p_stream = p_stream->next;
        }
 
+       osaf_mutex_lock_ordie(&lga_cb.cb_lock);
        p_client->recovered_flag = true;
+       osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
 done:
-       osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
        TRACE_LEAVE();
        return rc;
 }
 
-/**
- * Free the memory allocated for one client handle with mutext protection.
- *
- * @param p_client_hdl
- * @return void
+/* Clients APIs must not pass recovery 2 state check if
+ * recovery 2 thread is started but state has not yet been changed by the
+ * thread. Also the thread must not start recovery if an API is executing and
+ * has passed the recovery 2 check
  */
-void lga_free_client_hdl(lga_client_hdl_rec_t **p_client_hdl)
-{
-       lga_client_hdl_rec_t *client_hdl = NULL;
-
-       /* Synchronize b/w client & mds thread */
-       osaf_mutex_lock_ordie(&lga_cb.cb_lock);
-
-       client_hdl = *p_client_hdl;
-       if (client_hdl == NULL) goto done;
-
-       free(client_hdl);
-       client_hdl = NULL;
-
-done:
-       osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
-}
+static pthread_mutex_t lga_recov2_lock = PTHREAD_MUTEX_INITIALIZER;
 
 void lga_recovery2_lock(void)
 {
@@ -660,29 +654,6 @@ void lga_recovery2_unlock(void)
        osaf_mutex_unlock_ordie(&lga_recov2_lock);
 }
 
-//>
-// Handling lga_cb.lgs_state and lga_state in thread safe
-//<
-
-void set_lgs_state(lgs_state_t state)
-{
-       osaf_mutex_lock_ordie(&lga_cb.cb_lock);
-       lga_cb.lgs_state = state;
-       osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
-}
-
-bool is_lgs_state(lgs_state_t state)
-{
-       bool rc = false;
-
-       osaf_mutex_lock_ordie(&lga_cb.cb_lock);
-       if (state == lga_cb.lgs_state)
-               rc = true;
-       osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
-
-       return rc;
-}
-
 typedef struct {
        lga_state_t state;
        pthread_mutex_t lock;
diff --git a/osaf/libs/agents/saf/lga/lga_state.h 
b/osaf/libs/agents/saf/lga/lga_state.h
--- a/osaf/libs/agents/saf/lga/lga_state.h
+++ b/osaf/libs/agents/saf/lga/lga_state.h
@@ -30,11 +30,8 @@ void lga_serv_recov1state_set(void);
 int lga_recover_one_client(lga_client_hdl_rec_t *p_client);
 void lga_recovery2_lock(void);
 void lga_recovery2_unlock(void);
-void lga_free_client_hdl(lga_client_hdl_rec_t **p_client_hdl);
 
-void set_lgs_state(lgs_state_t state);
 void set_lga_state(lga_state_t state);
-bool is_lgs_state(lgs_state_t state);
 bool is_lga_state(lga_state_t state);
 
 
diff --git a/osaf/libs/agents/saf/lga/lga_util.c 
b/osaf/libs/agents/saf/lga/lga_util.c
--- a/osaf/libs/agents/saf/lga/lga_util.c
+++ b/osaf/libs/agents/saf/lga/lga_util.c
@@ -273,6 +273,29 @@ static uint32_t lga_hdl_cbk_dispatch_blo
 }
 
 /**
+ * Free the memory allocated for one client handle with mutex protection.
+ *
+ * @param p_client_hdl
+ * @return void
+ */
+static void lga_free_client_hdl(lga_client_hdl_rec_t **p_client_hdl)
+{
+       lga_client_hdl_rec_t *client_hdl = NULL;
+
+       /* Synchronize b/w client & mds thread */
+       osaf_mutex_lock_ordie(&lga_cb.cb_lock);
+
+       client_hdl = *p_client_hdl;
+       if (client_hdl == NULL) goto done;
+
+       free(client_hdl);
+       client_hdl = NULL;
+
+done:
+       osaf_mutex_unlock_ordie(&lga_cb.cb_lock);
+}
+
+/**
  * Initiate the agent when first used.
  * Start NCS service
  * Register with MDS

------------------------------------------------------------------------------
Find and fix application performance issues faster with Applications Manager
Applications Manager provides deep performance insights into multiple tiers of
your business applications. It resolves application problems quickly and
reduces your MTTR. Get your free trial!
https://ad.doubleclick.net/ddm/clk/302982198;130105516;z
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to