[Openais] [PATCH 2/2] cfg: Handle errors from totem_mcast

2011-07-28 Thread Jan Friesse
totem_mcast function can return -1 if corosync is overloaded. Sadly
in many calls of this functions was error code ether not handled at
all, or handled by assert.

Commit changes behaviour to ether return CS_ERR_TRY_AGAIN or put
error code to later layers to handle it.

Signed-off-by: Jan Friesse jfrie...@redhat.com
---
 services/cfg.c |   77 ++-
 1 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/services/cfg.c b/services/cfg.c
index b7aa63b..24f19f2 100644
--- a/services/cfg.c
+++ b/services/cfg.c
@@ -379,6 +379,7 @@ static int send_shutdown(void)
 {
struct req_exec_cfg_shutdown req_exec_cfg_shutdown;
struct iovec iovec;
+   int result;
 
ENTER();
req_exec_cfg_shutdown.header.size =
@@ -389,10 +390,10 @@ static int send_shutdown(void)
iovec.iov_base = (char *)req_exec_cfg_shutdown;
iovec.iov_len = sizeof (struct req_exec_cfg_shutdown);
 
-   assert (api-totem_mcast (iovec, 1, TOTEM_SAFE) == 0);
+   result = api-totem_mcast (iovec, 1, TOTEM_SAFE);
 
LEAVE();
-   return 0;
+   return (result);
 }
 
 static void send_test_shutdown(void *only_conn, void *exclude_conn, int status)
@@ -426,6 +427,9 @@ static void send_test_shutdown(void *only_conn, void 
*exclude_conn, int status)
 
 static void check_shutdown_status(void)
 {
+   int result;
+   cs_error_t error = CS_OK;
+
ENTER();
 
/*
@@ -448,9 +452,17 @@ static void check_shutdown_status(void)
shutdown_flags == CFG_SHUTDOWN_FLAG_REGARDLESS) {
TRACE1(shutdown confirmed);
 
+   /*
+* Tell other nodes we are going down
+*/
+   result = send_shutdown();
+   if (result == -1) {
+   error = CS_ERR_TRY_AGAIN;
+   }
+
res_lib_cfg_tryshutdown.header.size = sizeof(struct 
res_lib_cfg_tryshutdown);
res_lib_cfg_tryshutdown.header.id = 
MESSAGE_RES_CFG_TRYSHUTDOWN;
-   res_lib_cfg_tryshutdown.header.error = CS_OK;
+   res_lib_cfg_tryshutdown.header.error = error;
 
/*
 * Tell originator that shutdown was confirmed
@@ -459,10 +471,6 @@ static void check_shutdown_status(void)

sizeof(res_lib_cfg_tryshutdown));
shutdown_con = NULL;
 
-   /*
-* Tell other nodes we are going down
-*/
-   send_shutdown();
 
}
else {
@@ -698,7 +706,9 @@ static void message_handler_req_lib_cfg_ringreenable (
const void *msg)
 {
struct req_exec_cfg_ringreenable req_exec_cfg_ringreenable;
+   struct res_lib_cfg_ringreenable res_lib_cfg_ringreenable;
struct iovec iovec;
+   int result;
 
ENTER();
req_exec_cfg_ringreenable.header.size =
@@ -711,7 +721,19 @@ static void message_handler_req_lib_cfg_ringreenable (
iovec.iov_base = (char *)req_exec_cfg_ringreenable;
iovec.iov_len = sizeof (struct req_exec_cfg_ringreenable);
 
-   assert (api-totem_mcast (iovec, 1, TOTEM_SAFE) == 0);
+   result = api-totem_mcast (iovec, 1, TOTEM_SAFE);
+
+   if (result == -1) {
+   res_lib_cfg_ringreenable.header.id = 
MESSAGE_RES_CFG_RINGREENABLE;
+   res_lib_cfg_ringreenable.header.size = sizeof (struct 
res_lib_cfg_ringreenable);
+   res_lib_cfg_ringreenable.header.error = CS_ERR_TRY_AGAIN;
+   api-ipc_response_send (
+   conn,
+   res_lib_cfg_ringreenable,
+   sizeof (struct res_lib_cfg_ringreenable));
+
+   api-ipc_refcnt_dec(conn);
+   }
 
LEAVE();
 }
@@ -836,6 +858,8 @@ static void message_handler_req_lib_cfg_killnode (
struct res_lib_cfg_killnode res_lib_cfg_killnode;
struct req_exec_cfg_killnode req_exec_cfg_killnode;
struct iovec iovec;
+   int result;
+   cs_error_t error = CS_OK;
 
ENTER();
req_exec_cfg_killnode.header.size =
@@ -848,11 +872,14 @@ static void message_handler_req_lib_cfg_killnode (
iovec.iov_base = (char *)req_exec_cfg_killnode;
iovec.iov_len = sizeof (struct req_exec_cfg_killnode);
 
-   (void)api-totem_mcast (iovec, 1, TOTEM_SAFE);
+   result = api-totem_mcast (iovec, 1, TOTEM_SAFE);
+   if (result == -1) {
+   error = CS_ERR_TRY_AGAIN;
+   }
 
res_lib_cfg_killnode.header.size = sizeof(struct res_lib_cfg_killnode);
res_lib_cfg_killnode.header.id = MESSAGE_RES_CFG_KILLNODE;
-   res_lib_cfg_killnode.header.error = CS_OK;
+   res_lib_cfg_killnode.header.error = error;
 

[Openais] [PATCH 1/2] cpg: Handle errors from totem_mcast

2011-07-28 Thread Jan Friesse
totem_mcast function can return -1 if corosync is overloaded. Sadly in
many calls of this functions was error code ether not handled at all, or
handled by assert.

Commit changes behaviour to ether return CS_ERR_TRY_AGAIN or put error
code to later layers to handle it.

Signed-off-by: Jan Friesse jfrie...@redhat.com
---
 services/cpg.c |   31 ++-
 1 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/services/cpg.c b/services/cpg.c
index 6669fbd..18767bd 100644
--- a/services/cpg.c
+++ b/services/cpg.c
@@ -865,12 +865,19 @@ static void cpg_pd_finalize (struct cpg_pd *cpd)
 static int cpg_lib_exit_fn (void *conn)
 {
struct cpg_pd *cpd = (struct cpg_pd *)api-ipc_private_data_get (conn);
+   int result;
 
log_printf(LOGSYS_LEVEL_DEBUG, exit_fn for conn=%p\n, conn);
 
if (cpd-group_name.length  0) {
-   cpg_node_joinleave_send (cpd-pid, cpd-group_name,
+   result = cpg_node_joinleave_send (cpd-pid, cpd-group_name,
MESSAGE_REQ_EXEC_CPG_PROCLEAVE, 
CONFCHG_CPG_REASON_PROCDOWN);
+   if (result == -1) {
+   /*
+* Call this function again later
+*/
+   return (result);
+   }
}
 
cpg_pd_finalize (cpd);
@@ -1289,6 +1296,7 @@ static void message_handler_req_lib_cpg_join (void *conn, 
const void *message)
struct res_lib_cpg_join res_lib_cpg_join;
cs_error_t error = CPG_OK;
struct list_head *iter;
+   int result;
 
/* Test, if we don't have same pid and group name joined */
for (iter = cpg_pd_list_head.next; iter != cpg_pd_list_head; iter = 
iter-next) {
@@ -1327,9 +1335,15 @@ static void message_handler_req_lib_cpg_join (void 
*conn, const void *message)
memcpy (cpd-group_name, req_lib_cpg_join-group_name,
sizeof (cpd-group_name));
 
-   cpg_node_joinleave_send (req_lib_cpg_join-pid,
+   result = cpg_node_joinleave_send (req_lib_cpg_join-pid,
req_lib_cpg_join-group_name,
MESSAGE_REQ_EXEC_CPG_PROCJOIN, CONFCHG_CPG_REASON_JOIN);
+
+   if (result == -1) {
+   error = CPG_ERR_TRY_AGAIN;
+   cpd-cpd_state = CPD_STATE_UNJOINED;
+   goto response_send;
+   }
break;
case CPD_STATE_LEAVE_STARTED:
error = CPG_ERR_BUSY;
@@ -1356,6 +1370,7 @@ static void message_handler_req_lib_cpg_leave (void 
*conn, const void *message)
cs_error_t error = CPG_OK;
struct req_lib_cpg_leave  *req_lib_cpg_leave = (struct 
req_lib_cpg_leave *)message;
struct cpg_pd *cpd = (struct cpg_pd *)api-ipc_private_data_get (conn);
+   int result;
 
log_printf(LOGSYS_LEVEL_DEBUG, got leave request on %p\n, conn);
 
@@ -1372,10 +1387,14 @@ static void message_handler_req_lib_cpg_leave (void 
*conn, const void *message)
case CPD_STATE_JOIN_COMPLETED:
error = CPG_OK;
cpd-cpd_state = CPD_STATE_LEAVE_STARTED;
-   cpg_node_joinleave_send (req_lib_cpg_leave-pid,
+   result = cpg_node_joinleave_send (req_lib_cpg_leave-pid,
req_lib_cpg_leave-group_name,
MESSAGE_REQ_EXEC_CPG_PROCLEAVE,
CONFCHG_CPG_REASON_LEAVE);
+   if (result == -1) {
+   error = CPG_ERR_TRY_AGAIN;
+   cpd-cpd_state = CPD_STATE_JOIN_COMPLETED;
+   }
break;
}
 
@@ -1458,8 +1477,10 @@ static void message_handler_req_lib_cpg_mcast (void 
*conn, const void *message)
req_exec_cpg_iovec[1].iov_base = (char 
*)req_lib_cpg_mcast-message;
req_exec_cpg_iovec[1].iov_len = msglen;
 
-   result = api-totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED);
-   assert(result == 0);
+   result = api-totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED);
+   if (result == -1) {
+   error = CPG_ERR_TRY_AGAIN;
+   }
}
 
res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast);
-- 
1.7.1

___
Openais mailing list
Openais@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/openais


Re: [Openais] Corosync Compatability

2011-07-28 Thread Andrew Beekhof
On Wed, Jul 27, 2011 at 1:28 PM,  manish.gu...@ionidea.com wrote:

    Thank you Steave,
    We are currentely using corosync-1.2.1 and pacemaker 1.0.10
    Can we use the same version of pacemaker with corosync-1.4

I'd say its likely.  Try on one node - you'll find out pretty quickly
if its not going to work



 On Tue, July 26, 2011 7:12 pm, Steven Dake wrote:
 On 07/26/2011 01:52 AM, manish.gu...@ionidea.com wrote:

 Hi,


 I am facing problem with redundent Communication Channel.
 I am using Coroync 1.2 In this auto failback of redundent
 channel is not Supported. But 1.4 provide support.

 Corosync-1.4 id compatiable with which version of pacemaker





 corosync 1.4 should work with all versions of pacemaker.  What version of
 pm are you using?

 Regards
 -steve


 ___
 Openais mailing list
 Openais@lists.linux-foundation.org
 https://lists.linux-foundation.org/mailman/listinfo/openais





 ___
 Openais mailing list
 Openais@lists.linux-foundation.org
 https://lists.linux-foundation.org/mailman/listinfo/openais

___
Openais mailing list
Openais@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/openais