The following reply was made to PR os-solaris/3749; it has been noted by GNATS.
From: "T. V. Raman" <[EMAIL PROTECTED]>
To: Marc Slemko <[EMAIL PROTECTED]>
Cc: [EMAIL PROTECTED], Apache bugs database <[EMAIL PROTECTED]>
Subject: Re: os-solaris/3749: Apparent resource leak +httpd processes that
refuse to die
Date: Thu, 11 Feb 1999 13:43:03 -0800 (PST)
Here is some more data on the problem with Solaris 2.6,
Apache 1.3.4, NFS and resource leaks.
For the following test, the nfs volumes in question are
being mounted soft,inter.
The server is serving out many pages from NFS volumes.
After being up for a day I once again noticed many waiting
apache children.
, the NFS volume these children were trying to
access were up and accessible from other workstations on
the network. However from the server in question, accesses
to those NFS volumes from a shell hung-- I suspect some
weird nfs locking bug.
Doing an apachectl graceful turned the status of those
waiting children from W to G --but nfs accesses were still
blocking.
Next, I did a apachectl restart --and this still did not get
rid of the blocked children.
I then did apachectl stop --and all but one httpd process
went away.
The remaining httpd process (pid 5313 in the logs below)
refused to die.
Trying to restart apache now threw a "address already in
use error".
kill -9 on the process returned silently.
truss on the process hung indefinitely.
I'm appending the output of
tracing the kill using truss.
Rebooting the workstation was the only way to fix this
problem.
Details on the hanging httpd child:
S nobody 5313 1 0 39 20 4656 7744 107b1268
# truss kill -9 5313
execve("/usr/bin/kill", 0xEFFFFEC8, 0xEFFFFED8) argc = 4
open("/usr/lib/libsocket.so.1", O_RDONLY) = 3
fstat(3, 0xEFFFFA58) = 0
mmap(0x00000000, 8192, PROT_READ|PROT_EXEC, MAP_SHARED, 3, 0) = 0xEF7C0000
mmap(0x00000000, 106496, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xEF7A0000
munmap(0xEF7A8000, 57344) = 0
mmap(0xEF7B6000, 8185, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED,
3, 24576) = 0xEF7B6000
open("/dev/zero", O_RDONLY) = 4
mmap(0xEF7B8000, 388, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED,
4, 0) = 0xEF7B8000
close(3) = 0
open("/usr/lib/libnsl.so.1", O_RDONLY) = 3
fstat(3, 0xEFFFFA58) = 0
mmap(0xEF7C0000, 8192, PROT_READ|PROT_EXEC, MAP_SHARED|MAP_FIXED, 3, 0) =
0xEF7C0000
mmap(0x00000000, 581632, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xEF700000
munmap(0xEF770000, 57344) = 0
mmap(0xEF77E000, 33756, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED,
3, 450560) = 0xEF77E000
mmap(0xEF788000, 16824, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED,
4, 0) = 0xEF788000
close(3) = 0
open("/usr/lib/libc.so.1", O_RDONLY) = 3
fstat(3, 0xEFFFFA58) = 0
mmap(0xEF7C0000, 8192, PROT_READ|PROT_EXEC, MAP_SHARED|MAP_FIXED, 3, 0) =
0xEF7C0000
mmap(0x00000000, 696320, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xEF600000
munmap(0xEF694000, 57344) = 0
mmap(0xEF6A2000, 24432, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED,
3, 598016) = 0xEF6A2000
mmap(0xEF6A8000, 6784, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED,
4, 0) = 0xEF6A8000
close(3) = 0
open("/usr/lib/libdl.so.1", O_RDONLY) = 3
fstat(3, 0xEFFFFA58) = 0
mmap(0xEF7C0000, 8192, PROT_READ|PROT_EXEC, MAP_SHARED|MAP_FIXED, 3, 0) =
0xEF7C0000
mmap(0x00000000, 8192, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 4, 0) =
0xEF6F0000
close(3) = 0
open("/usr/lib/libmp.so.2", O_RDONLY) = 3
fstat(3, 0xEFFFFA58) = 0
mmap(0x00000000, 8192, PROT_READ|PROT_EXEC, MAP_SHARED, 3, 0) = 0xEF6E0000
mmap(0x00000000, 81920, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xEF6C0000
munmap(0xEF6C4000, 57344) = 0
mmap(0xEF6D2000, 3581, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED,
3, 8192) = 0xEF6D2000
close(3) = 0
open("/usr/platform/SUNW,Ultra-2/lib/libc_psr.so.1", O_RDONLY) = 3
fstat(3, 0xEFFFF870) = 0
mmap(0xEF6E0000, 8192, PROT_READ|PROT_EXEC, MAP_SHARED|MAP_FIXED, 3, 0) =
0xEF6E0000
mmap(0x00000000, 16384, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xEF5F0000
close(3) = 0
close(4) = 0
munmap(0xEF6E0000, 8192) = 0
getuid() = 0 [0]
getuid() = 0 [0]
getgid() = 1 [1]
getgid() = 1 [1]
time() = 918767729
brk(0x0004E818) = 0
brk(0x00050818) = 0
time() = 918767729
brk(0x00050818) = 0
brk(0x00052818) = 0
sigprocmask(SIG_SETMASK, 0xEFFFFCF8, 0x00000000) = 0
sigaction(SIGABRT, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGALRM, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGBUS, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGCLD, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGEMT, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGFPE, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGHUP, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGILL, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGINT, 0xEFFFFB78, 0xEFFFFBF8) = 0
sigaction(SIGABRT, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGPIPE, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGQUIT, 0xEFFFFB78, 0xEFFFFBF8) = 0
sigaction(SIGSYS, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGTERM, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGTRAP, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGUSR1, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGUSR2, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGXCPU, 0xEFFFFBD8, 0xEFFFFC58) = 0
sigaction(SIGXFSZ, 0xEFFFFB78, 0xEFFFFBF8) = 0
getpid() = 12435 [12434]
getpid() = 12435 [12434]
stat64("/", 0xEFFFFC10) = 0
stat64(".", 0xEFFFFB78) = 0
stat64("/", 0xEFFFFC10) = 0
stat64(".", 0xEFFFFB78) = 0
stat64("/usr/spool/cron/atjobs", 0xEFFFFC10) = 0
stat64(".", 0xEFFFFB78) = 0
stat64("/", 0xEFFFFC10) = 0
stat64(".", 0xEFFFFB78) = 0
stat64("/", 0xEFFFFC10) = 0
stat64(".", 0xEFFFFB78) = 0
stat64("/usr/spool/cron/atjobs", 0xEFFFFC10) = 0
stat64(".", 0xEFFFFB78) = 0
pipe() = 3 [4]
fork() = 12436
Received signal #18, SIGCLD [caught]
siginfo: SIGCLD CLD_EXITED pid=12436 status=0x0000
setcontext(0xEFFFE8A8)
sigaction(SIGCLD, 0xEFFFE9A0, 0xEFFFEA20) = 0
waitid(P_ALL, 0, 0xEFFFE9E0, WEXITED|WTRAPPED|WSTOPPED|WNOHANG) = 0
waitid(P_ALL, 0, 0xEFFFE9E0, WEXITED|WTRAPPED|WSTOPPED|WNOHANG) Err#10 ECHILD
sigaction(SIGCLD, 0xEFFFE9A0, 0xEFFFEA20) = 0
close(4) = 0
fcntl(3, F_GETFL, 0x00000000) = 2
fstat64(3, 0xEFFFEBD0) = 0
llseek(3, 0, SEEK_CUR) Err#29 ESPIPE
ioctl(3, TCGETS, 0x0004D424) Err#22 EINVAL
sigaction(SIGCLD, 0xEFFFE668, 0xEFFFE6E8) = 0
waitid(P_ALL, 0, 0xEFFFE6A8, WEXITED|WTRAPPED|WSTOPPED|WNOHANG) Err#10 ECHILD
sigaction(SIGCLD, 0xEFFFE668, 0xEFFFE6E8) = 0
read(3, " / e x p o r t / l o c a".., 1024) = 25
sigaction(SIGCLD, 0xEFFFE668, 0xEFFFE6E8) = 0
waitid(P_ALL, 0, 0xEFFFE6A8, WEXITED|WTRAPPED|WSTOPPED|WNOHANG) Err#10 ECHILD
sigaction(SIGCLD, 0xEFFFE668, 0xEFFFE6E8) = 0
read(3, 0xEFFFEEE8, 1024) = 0
sigaction(SIGCLD, 0xEFFFEC38, 0xEFFFECB8) = 0
sigaction(SIGCLD, 0xEFFFEC38, 0xEFFFECB8) = 0
close(3) = 0
brk(0x00052818) = 0
brk(0x00054818) = 0
stat64("/export/local/apache/bin", 0xEFFFFC10) = 0
stat64(".", 0xEFFFFB78) = 0
stat64("/usr/bin/kill", 0xEFFFFC10) = 0
open64("/usr/bin/kill", O_RDONLY) = 3
close(62) Err#9 EBADF
fcntl(3, F_DUPFD, 0x0000003E) = 62
close(3) = 0
fcntl(62, F_SETFD, 0x00000001) = 0
fcntl(62, F_GETFL, 0x00000000) = 8192
fstat64(62, 0xEFFFFAB0) = 0
llseek(62, 0, SEEK_CUR) = 0
ioctl(62, TCGETS, 0x0004D424) Err#25 ENOTTY
read(62, " # ! / b i n / k s h\n #".., 1024) = 131
pipe() = 3 [4]
fork() = 12437
Received signal #18, SIGCLD [caught]
siginfo: SIGCLD CLD_EXITED pid=12437 status=0x0000
setcontext(0xEFFFED18)
sigaction(SIGCLD, 0xEFFFEE10, 0xEFFFEE90) = 0
waitid(P_ALL, 0, 0xEFFFEE50, WEXITED|WTRAPPED|WSTOPPED|WNOHANG) = 0
waitid(P_ALL, 0, 0xEFFFEE50, WEXITED|WTRAPPED|WSTOPPED|WNOHANG) Err#10 ECHILD
sigaction(SIGCLD, 0xEFFFEE10, 0xEFFFEE90) = 0
close(4) = 0
fcntl(3, F_GETFL, 0x00000000) = 2
fstat64(3, 0xEFFFF040) = 0
llseek(3, 0, SEEK_CUR) Err#29 ESPIPE
ioctl(3, TCGETS, 0x0004D424) Err#22 EINVAL
sigaction(SIGCLD, 0xEFFFEAD8, 0xEFFFEB58) = 0
waitid(P_ALL, 0, 0xEFFFEB18, WEXITED|WTRAPPED|WSTOPPED|WNOHANG) Err#10 ECHILD
sigaction(SIGCLD, 0xEFFFEAD8, 0xEFFFEB58) = 0
read(3, " k i l l\n", 1024) = 5
sigaction(SIGCLD, 0xEFFFEAD8, 0xEFFFEB58) = 0
waitid(P_ALL, 0, 0xEFFFEB18, WEXITED|WTRAPPED|WSTOPPED|WNOHANG) Err#10 ECHILD
sigaction(SIGCLD, 0xEFFFEAD8, 0xEFFFEB58) = 0
read(3, 0xEFFFF358, 1024) = 0
sigaction(SIGCLD, 0xEFFFF0A8, 0xEFFFF128) = 0
sigaction(SIGCLD, 0xEFFFF0A8, 0xEFFFF128) = 0
close(3) = 0
kill(5313, SIGKILL) = 0
read(62, 0xEF6A9664, 1024) = 0
_exit(0)
#
15:20:48 ? 0:01 /export/local/apache/bin/httpd
httpd: [Thu Feb 11 12:45:03 1999] [error] could not make child process 5313
exit, attempting to continue anyway
--
Best Regards,
--raman
Adobe Systems Tel: 1 408 536 3945 (W14-128)
Advanced Technology Group Fax: 1 408 537 4042
W14-128 345 Park Avenue Email: [EMAIL PROTECTED]
San Jose , CA 95110 -2704 Email: [EMAIL PROTECTED]
http://labrador.corp.adobe.com/~raman/ (Adobe Intranet)
http://cs.cornell.edu/home/raman/raman.html (Cornell)
----------------------------------------------------------------------
Disclaimer: The opinions expressed are my own and in no way should be taken
as representative of my employer, Adobe Systems Inc.
____________________________________________________________