The sched_setscheduler and pthread_setscheduler calls were inconsistent,
setting inconsistent priorities for their tasks.  This was resulting in
deadlocks in the pthread_spin_lock code on single cpus.

Patch attached to fix

Also as future work these calls need proper configure.ac checking.  If
anyone takes on that work, both pthread_setscheduler and
sched_setscheduler should be in the system libraries in order to do any
schedule priority changes (or deadlock occurs).

regards
-steve
Index: exec/coroipcs.c
===================================================================
--- exec/coroipcs.c	(revision 1909)
+++ exec/coroipcs.c	(working copy)
@@ -259,6 +259,13 @@
 	struct res_overlay res_overlay;
 	int send_ok;
 
+	if (api->sched_priority != 0) {
+		struct sched_param sched_param;
+
+		sched_param.sched_priority = api->sched_priority;
+		res = pthread_setschedparam (conn_info->thread, SCHED_RR, &sched_param);
+	}
+
 	for (;;) {
 		sop.sem_num = 0;
 		sop.sem_op = -1;
Index: exec/coroipcs.h
===================================================================
--- exec/coroipcs.h	(revision 1909)
+++ exec/coroipcs.h	(working copy)
@@ -45,6 +45,7 @@
 
 struct coroipcs_init_state {
 	const char *socket_name;
+	int sched_priority;
 	void *(*malloc) (size_t size);
 	void (*free) (void *ptr);
         void (*log_printf) (
Index: exec/timer.c
===================================================================
--- exec/timer.c	(revision 1909)
+++ exec/timer.c	(working copy)
@@ -90,6 +90,8 @@
 
 static struct timerlist timers_timerlist;
 
+static int sched_priority = 0;
+
 static void (*timer_serialize_lock_fn) (void);
 
 static void (*timer_serialize_unlock_fn) (void);
@@ -107,11 +109,14 @@
 	unsigned long long timeout;
 
 #if ! defined(TS_CLASS) && (defined(COROSYNC_BSD) || defined(COROSYNC_LINUX) || defined(COROSYNC_SOLARIS))
-	struct sched_param sched_param;
 	int res;
 
-	sched_param.sched_priority = 2;
-	res = pthread_setschedparam (expiry_thread, SCHED_RR, &sched_param);
+	if (sched_priority != 0) {
+		struct sched_param sched_param;
+
+		sched_param.sched_priority = sched_priority;
+		res = pthread_setschedparam (expiry_thread, SCHED_RR, &sched_param);
+	}
 #endif
 
 	pthread_mutex_unlock (&timer_mutex);
@@ -148,12 +153,14 @@
 
 int corosync_timer_init (
         void (*serialize_lock_fn) (void),
-        void (*serialize_unlock_fn) (void))
+        void (*serialize_unlock_fn) (void),
+	int sched_priority_in)
 {
 	int res;
 
 	timer_serialize_lock_fn = serialize_lock_fn;
 	timer_serialize_unlock_fn = serialize_unlock_fn;
+	sched_priority = sched_priority_in;
 
 	timerlist_init (&timers_timerlist);
 
Index: exec/main.c
===================================================================
--- exec/main.c	(revision 1909)
+++ exec/main.c	(working copy)
@@ -93,6 +93,8 @@
 
 #define SERVER_BACKLOG 5
 
+static int sched_priority = 0;
+
 static unsigned int service_count = 32;
 
 #if defined(HAVE_PTHREAD_SPIN_LOCK)
@@ -364,16 +366,18 @@
 	struct sched_param sched_param;
 	int res;
 
-	res = sched_get_priority_max (SCHED_RR);
-	if (res != -1) {
-		sched_param.sched_priority = 1;//res;
+	sched_priority = sched_get_priority_max (SCHED_RR);
+	if (sched_priority != -1) {
+		sched_param.sched_priority = sched_priority;
 		res = sched_setscheduler (0, SCHED_RR, &sched_param);
 		if (res == -1) {
 			log_printf (LOG_LEVEL_WARNING, "Could not set SCHED_RR at priority %d: %s\n",
 				sched_param.sched_priority, strerror (errno));
 		}
-	} else
+	} else {
 		log_printf (LOG_LEVEL_WARNING, "Could not get maximum scheduler priority: %s\n", strerror (errno));
+		sched_priority = 0;
+	}
 #else
 	log_printf(LOG_LEVEL_WARNING, "Scheduler priority left to default value (no OS support)\n");
 #endif
@@ -727,7 +731,8 @@
 	
 	corosync_timer_init (
 		serialize_lock,
-		serialize_unlock);
+		serialize_unlock,
+		sched_priority);
 
 	log_printf (LOG_LEVEL_NOTICE, "Corosync Executive Service: started and ready to provide service.\n");
 
@@ -903,6 +908,8 @@
 
 	ipc_subsys_id = _logsys_subsys_create ("IPC", LOG_INFO);
 
+	ipc_init_state.sched_priority = sched_priority;
+
 	coroipcs_ipc_init (&ipc_init_state);
 
 	/*
Index: exec/timer.h
===================================================================
--- exec/timer.h	(revision 1909)
+++ exec/timer.h	(working copy)
@@ -39,7 +39,8 @@
 
 extern void corosync_timer_init (
         void (*serialize_lock) (void),
-        void (*serialize_unlock) (void));
+        void (*serialize_unlock) (void),
+	int sched_priority);
 
 extern int corosync_timer_add_duration (
 	unsigned long long nanoseconds_in_future,
Index: lib/coroipcc.c
===================================================================
--- lib/coroipcc.c	(revision 1909)
+++ lib/coroipcc.c	(working copy)
@@ -100,6 +100,10 @@
 }
 #endif 
 
+#ifndef MSG_NOSIGNAL
+#define MSG_NOSIGNAL 0
+#endif
+
 static int
 coroipcc_send (
 	int s,
_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to