These patches fix problems that can crop up if a server is shutdown
while it is still trying to service I/O operations. The first one
depends on perf counter patches from
http://www.beowulf-underground.org/pipermail/pvfs2-developers/2005-December/001704.html,
but only because of a minor change to the perf counter finalize function.
pvfs2-server-shutdown-order.patch:
------------------------------------
This tweaks the order in which components are shut down when
pvfs2-server exits:
- shut down trove _after_ flows, because otherwise a pending flow may
still try to work on the trove interface
- move perf counter shutdown to later, so that other components do not
attempt to log information to it after it has been finalized. This may
not really cause a problem unless you are using the perf-counter api
changes from my previous patch, but it seems like a good idea regardless
job-shutdown-callback.patch:
------------------------------------
After the job interface is shutdown, there may still be callbacks
pending from the thread mgr that cannot easily be stopped. In some
cases this will cause a segfault if the server is active. This patch
fixes the problem by making the callback functions check to see if the
job interface is still initialized before performing any work.
-Phil
---------------------
PatchSet 339
Date: 2005/12/08 16:56:19
Author: pcarns
Branch: HEAD
Tag: (none)
Log:
bug fix: avoid race conditions when shutting down an active server by making
sure that old callback triggers are ignored
[artf26994]
Members:
src/io/job/job.c:1.6->1.7
Index: src/io/job/job.c
diff -u src/io/job/job.c:1.6 src/io/job/job.c:1.7
--- src/io/job/job.c:1.6 Wed Nov 2 07:46:11 2005
+++ src/io/job/job.c Thu Dec 8 09:56:19 2005
@@ -54,6 +54,9 @@
static gen_mutex_t dev_unexp_mutex = GEN_MUTEX_INITIALIZER;
static gen_mutex_t completion_mutex = GEN_MUTEX_INITIALIZER;
+static int initialized = 0;
+static gen_mutex_t initialized_mutex = GEN_MUTEX_INITIALIZER;
+
#ifdef __PVFS2_JOB_THREADED__
static pthread_cond_t completion_cond = PTHREAD_COND_INITIALIZER;
#endif /* __PVFS2_JOB_THREADED__ */
@@ -155,6 +158,10 @@
assert(ret == 0);
#endif
+ gen_mutex_lock(&initialized_mutex);
+ initialized = 1;
+ gen_mutex_unlock(&initialized_mutex);
+
return (0);
}
@@ -166,6 +173,10 @@
*/
int job_finalize(void)
{
+ gen_mutex_lock(&initialized_mutex);
+ initialized = 0;
+ gen_mutex_unlock(&initialized_mutex);
+
PINT_thread_mgr_bmi_stop();
#ifdef __PVFS2_CLIENT__
PINT_thread_mgr_dev_stop();
@@ -3831,6 +3842,15 @@
struct job_desc* tmp_desc = (struct job_desc*)data;
assert(tmp_desc);
+ gen_mutex_lock(&initialized_mutex);
+ if(initialized == 0)
+ {
+ /* The job interface has been shutdown. Silently ignore callback. */
+ return;
+ gen_mutex_unlock(&initialized_mutex);
+ }
+ gen_mutex_unlock(&initialized_mutex);
+
gen_mutex_lock(&completion_mutex);
if (tmp_desc->completed_flag == 0)
{
@@ -3866,6 +3886,15 @@
struct job_desc* tmp_desc = (struct job_desc*)data;
assert(tmp_desc);
+ gen_mutex_lock(&initialized_mutex);
+ if(initialized == 0)
+ {
+ /* The job interface has been shutdown. Silently ignore callback. */
+ return;
+ gen_mutex_unlock(&initialized_mutex);
+ }
+ gen_mutex_unlock(&initialized_mutex);
+
gen_mutex_lock(&completion_mutex);
if (tmp_desc->completed_flag == 0)
{
@@ -3899,6 +3928,15 @@
{
struct job_desc* tmp_desc = NULL;
+ gen_mutex_lock(&initialized_mutex);
+ if(initialized == 0)
+ {
+ /* The job interface has been shutdown. Silently ignore callback. */
+ return;
+ gen_mutex_unlock(&initialized_mutex);
+ }
+ gen_mutex_unlock(&initialized_mutex);
+
gen_mutex_lock(&bmi_unexp_mutex);
/* remove the operation from the pending bmi_unexp queue */
tmp_desc = job_desc_q_shownext(bmi_unexp_queue);
@@ -4297,6 +4335,15 @@
{
struct job_desc* tmp_desc = (struct job_desc*)flow_d->user_ptr;
+ gen_mutex_lock(&initialized_mutex);
+ if(initialized == 0)
+ {
+ /* The job interface has been shutdown. Silently ignore callback. */
+ return;
+ gen_mutex_unlock(&initialized_mutex);
+ }
+ gen_mutex_unlock(&initialized_mutex);
+
/* set job descriptor fields and put into completion queue */
gen_mutex_lock(&completion_mutex);
job_desc_q_add(completion_queue_array[tmp_desc->context_id],
---------------------
PatchSet 340
Date: 2005/12/09 19:48:47
Author: pcarns
Branch: HEAD
Tag: (none)
Log:
bug fix to previous commit, locks were in wrong place
[artf26994]
Members:
src/io/job/job.c:1.7->1.8
Index: src/io/job/job.c
diff -u src/io/job/job.c:1.7 src/io/job/job.c:1.8
--- src/io/job/job.c:1.7 Thu Dec 8 09:56:19 2005
+++ src/io/job/job.c Fri Dec 9 12:48:47 2005
@@ -3846,8 +3846,8 @@
if(initialized == 0)
{
/* The job interface has been shutdown. Silently ignore callback. */
- return;
gen_mutex_unlock(&initialized_mutex);
+ return;
}
gen_mutex_unlock(&initialized_mutex);
@@ -3890,8 +3890,8 @@
if(initialized == 0)
{
/* The job interface has been shutdown. Silently ignore callback. */
- return;
gen_mutex_unlock(&initialized_mutex);
+ return;
}
gen_mutex_unlock(&initialized_mutex);
@@ -3932,8 +3932,8 @@
if(initialized == 0)
{
/* The job interface has been shutdown. Silently ignore callback. */
- return;
gen_mutex_unlock(&initialized_mutex);
+ return;
}
gen_mutex_unlock(&initialized_mutex);
@@ -4339,8 +4339,8 @@
if(initialized == 0)
{
/* The job interface has been shutdown. Silently ignore callback. */
- return;
gen_mutex_unlock(&initialized_mutex);
+ return;
}
gen_mutex_unlock(&initialized_mutex);
diff -Naur pvfs2/src/server/pvfs2-server.c pvfs2-new/src/server/pvfs2-server.c
--- pvfs2/src/server/pvfs2-server.c 2005-11-11 22:31:09.000000000 +0100
+++ pvfs2-new/src/server/pvfs2-server.c 2005-12-09 19:46:30.000000000 +0100
@@ -1261,15 +1261,6 @@
"profiling interface [ stopped ]\n");
}
- if (status & SERVER_PERF_COUNTER_INIT)
- {
- gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting performance "
- "interface [ ... ]\n");
- PINT_perf_finalize();
- gossip_debug(GOSSIP_SERVER_DEBUG, "[-] performance "
- "interface [ stopped ]\n");
- }
-
if (status & SERVER_REQ_SCHED_INIT)
{
gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting request "
@@ -1302,15 +1293,6 @@
"mgr interface [ stopped ]\n");
}
- if (status & SERVER_TROVE_INIT)
- {
- gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting storage "
- "interface [ ... ]\n");
- trove_finalize();
- gossip_debug(GOSSIP_SERVER_DEBUG, "[-] storage "
- "interface [ stopped ]\n");
- }
-
if (status & SERVER_FLOW_INIT)
{
gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting flow "
@@ -1329,6 +1311,15 @@
"interface [ stopped ]\n");
}
+ if (status & SERVER_TROVE_INIT)
+ {
+ gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting storage "
+ "interface [ ... ]\n");
+ trove_finalize();
+ gossip_debug(GOSSIP_SERVER_DEBUG, "[-] storage "
+ "interface [ stopped ]\n");
+ }
+
if (status & SERVER_ENCODER_INIT)
{
gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting encoder "
@@ -1338,6 +1329,15 @@
"interface [ stopped ]\n");
}
+ if (status & SERVER_PERF_COUNTER_INIT)
+ {
+ gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting performance "
+ "interface [ ... ]\n");
+ PINT_perf_finalize(PINT_server_pc);
+ gossip_debug(GOSSIP_SERVER_DEBUG, "[-] performance "
+ "interface [ stopped ]\n");
+ }
+
if (status & SERVER_GOSSIP_INIT)
{
gossip_debug(GOSSIP_SERVER_DEBUG,
_______________________________________________
PVFS2-developers mailing list
PVFS2-developers@beowulf-underground.org
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers