I am seeing this issue on my web servers. It all started a couple of
weeks ago. I looked at my change logs and see no changes that would
correlate to this happening.
So this is what happens. A process reaches the MaxRequestsPerChild, in
this case 10000, it dies off but the parent process still thinks it is
running. When I look at the server-status page the processes are in a
"Sending Reply" state.

Here is a score board from one of my servers:

Scoreboard: 
WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW_____________________W_WWW_________________....................................................................................................................................................................

Here is a small part of what I see on the server-status page:

0-0     21171   1/9999/9999     W       12.31   3081    15      0.0     76.34   
76.34
        10.241.48.117   www.blah.com    GET /js/s_code_remote.js HTTP/1.0
1-0     21172   1/9995/9995     W       12.65   2719    33      0.0     65.51   
65.51
        10.241.48.149   free.blah.com   GET /js/player.js HTTP/1.0
2-0     21173   1/9996/9996     W       12.37   1725    17      0.0     67.84   
67.84
        10.241.48.119   free.blah.com   GET /js/site_catalyst.js HTTP/1.0
3-0     21174   1/9996/9996     W       12.46   2718    3       0.0     66.14   
66.14
        10.241.48.118   free.blah.com   GET /js/cookies.js HTTP/1.0
4-0     21175   1/9997/9997     W       12.23   2711    95      0.7     70.41   
70.41
        10.241.48.147   free.blah.com   GET /playXML/track/13356268?r=0.33
HTTP/1.0
5-0     21176   1/9993/9993     W       12.59   2233    7       0.0     65.62   
65.62
        10.241.48.148   free.blah.com   GET /js/google_adsense.js HTTP/1.0
6-0     21177   1/10000/10000   W       12.41   2367    15      0.0     64.63   
64.63
        10.241.48.117   free.blah.com   GET /js/global.js HTTP/1.0
7-0     21178   1/9997/9997     W       12.49   2606    3       0.0     63.83   
63.83
        10.241.48.148   free.blah.com   GET /js/table_constructor.js HTTP/1.0
8-0     21179   1/9997/9997     W       12.32   2900    17      0.0     67.04   
67.04
        10.241.48.117   free.blah.com   GET /js/cookies.js HTTP/1.0


Below is some stuff I captured from a web server that stopped
responding to web requests all together.

[ ~]# ps ax |grep httpd
21457 ?        Ss     0:01 /usr/local/www/bin/httpd
 1414 pts/0    S+     0:00 grep httpd

[ ~]# lsof -p 21457
COMMAND   PID USER   FD   TYPE    DEVICE    SIZE      NODE NAME
httpd   21457 root  cwd    DIR       8,1    4096         2 /
httpd   21457 root  rtd    DIR       8,1    4096         2 /
httpd   21457 root  txt    REG       8,3  501900       471 /usr1/www/bin/httpd
httpd   21457 root  mem    REG       0,0                 0 [vdso]
(stat: No such file or directory)
httpd   21457 root  mem    REG       8,1  126648     36049 /lib/ld-2.3.5.so
httpd   21457 root  mem    REG       8,1 1489572     36050 /lib/libc-2.3.5.so
httpd   21457 root  mem    REG       8,1   25476    106215
/usr/lib/libgdbm.so.2.0.0
httpd   21457 root  mem    REG       8,1   27660     36057
/lib/libcrypt-2.3.5.so
httpd   21457 root  mem    REG       8,1  196676     36058 /lib/libm-2.3.5.so
httpd   21457 root  mem    REG       8,1  125160    106805
/usr/lib/libexpat.so.0.5.0
httpd   21457 root  mem    REG       8,1   46552     31966
/lib/libnss_files-2.3.5.so
httpd   21457 root  DEL    REG       0,8            983040 /SYSV00000000
httpd   21457 root    0r   CHR       1,3              1326 /dev/null
httpd   21457 root    1w   CHR       1,3              1326 /dev/null
httpd   21457 root    2w  FIFO       0,6         460663664 pipe
httpd   21457 root    3r  FIFO       0,6         460663693 pipe
httpd   21457 root    4w  FIFO       0,6         460663664 pipe
httpd   21457 root    5w  FIFO       0,6         460663666 pipe
httpd   21457 root    6w  FIFO       0,6         460663668 pipe
httpd   21457 root    7w  FIFO       0,6         460663673 pipe
httpd   21457 root    8w  FIFO       0,6         460663674 pipe
httpd   21457 root    9w  FIFO       0,6         460663693 pipe
httpd   21457 root   10r  FIFO       0,6         460663696 pipe
httpd   21457 root   11w  FIFO       0,6         460663696 pipe
httpd   21457 root   12r  FIFO       0,6         460663699 pipe
httpd   21457 root   13w  FIFO       0,6         460663699 pipe
httpd   21457 root   14r  FIFO       0,6         460663702 pipe
httpd   21457 root   15u  IPv4 460663477               TCP *:http (LISTEN)
httpd   21457 root   16w  FIFO       0,6         460663702 pipe
httpd   21457 root   17r  FIFO       0,6         460663705 pipe
httpd   21457 root   18w  FIFO       0,6         460663705 pipe

[ ~]# strace -p 21457
Process 21457 attached - interrupt to quit
select(0, NULL, NULL, NULL, {0, 540000}) = 0 (Timeout)
time(NULL)                              = 1224806756
waitpid(-1, 0xbfc31190, WNOHANG)        = 0
select(0, NULL, NULL, NULL, {1, 0})     = 0 (Timeout)
time(NULL)                              = 1224806757
waitpid(-1, 0xbfc31190, WNOHANG)        = 0
select(0, NULL, NULL, NULL, {1, 0})     = 0 (Timeout)
time(NULL)                              = 1224806758
waitpid(-1, 0xbfc31190, WNOHANG)        = 0
select(0, NULL, NULL, NULL, {1, 0})     = 0 (Timeout)
time(NULL)                              = 1224806759
select(19, NULL, [9 11 13 16 18], NULL, {0, 0}) = 5 (out [9 11 13 16
18], left {0, 0})
waitpid(-1, 0xbfc31190, WNOHANG)        = 0
select(0, NULL, NULL, NULL, {1, 0})     = 0 (Timeout)
time(NULL)                              = 1224806760
waitpid(-1, 0xbfc31190, WNOHANG)        = 0
select(0, NULL, NULL, NULL, {1, 0})     = 0 (Timeout)
time(NULL)                              = 1224806761
waitpid(-1, 0xbfc31190, WNOHANG)        = 0
select(0, NULL, NULL, NULL, {1, 0})     = 0 (Timeout)
time(NULL)                              = 1224806762
waitpid(-1, 0xbfc31190, WNOHANG)        = 0
select(0, NULL, NULL, NULL, {1, 0})     = 0 (Timeout)
time(NULL)                              = 1224806763
waitpid(-1, 0xbfc31190, WNOHANG)        = 0
select(0, NULL, NULL, NULL, {1, 0})     = 0 (Timeout)
time(NULL)                              = 1224806764
waitpid(-1, 0xbfc31190, WNOHANG)        = 0
select(0, NULL, NULL, NULL, {1, 0})     = 0 (Timeout)
time(NULL)                              = 1224806765
waitpid(-1, 0xbfc31190, WNOHANG)        = 0
select(0, NULL, NULL, NULL, {1, 0} <unfinished ...>
Process 21457 detached

Here is the server info. It is FC4:

[ ~]# uname -a
Linux web10.sd  2.6.17-1.2142smp #1 SMP Thu Sep 14 15:27:50 PDT 2006
i686 i686 i386 GNU/Linux

[ ~]# httpd -V
Server version: Apache/1.3.34 (Unix)
Server built:   Aug 14 2006 16:11:23
Server's Module Magic Number: 19990320:18
Server compiled with....
 -D HAVE_MMAP
 -D HAVE_SHMGET
 -D USE_SHMGET_SCOREBOARD
 -D USE_MMAP_FILES
 -D HAVE_FCNTL_SERIALIZED_ACCEPT
 -D HAVE_SYSVSEM_SERIALIZED_ACCEPT
 -D SINGLE_LISTEN_UNSERIALIZED_ACCEPT
 -D DYNAMIC_MODULE_LIMIT=64
 -D HARD_SERVER_LIMIT=256
 -D HTTPD_ROOT="/usr/local/www"
 -D SUEXEC_BIN="/usr/local/www/bin/suexec"
 -D DEFAULT_PIDLOG="logs/httpd.pid"
 -D DEFAULT_SCOREBOARD="logs/httpd.scoreboard"
 -D DEFAULT_LOCKFILE="logs/httpd.lock"
 -D DEFAULT_ERRORLOG="logs/error_log"
 -D TYPES_CONFIG_FILE="conf/mime.types"
 -D SERVER_CONFIG_FILE="conf/httpd.conf"
 -D ACCESS_CONFIG_FILE="conf/access.conf"
 -D RESOURCE_CONFIG_FILE="conf/srm.conf"

Here is the top part of my httpd.conf:

#################################
### SECTION 1: Global Environment
#################################

ServerType              standalone
Port                    80
HostnameLookups         Off
User                    wwwadmin
Group                   wwwadmin


Listen                  "80"
ServerRoot              "/usr/local/www"
DocumentRoot            "/usr/local/www/htdocs"

LockFile                /var/lock/httpd.lock
PidFile                 logs/httpd.pid
ScoreBoardFile          logs/apache_runtime_status
Timeout                 120
ExtendedStatus          On
UseCanonicalName        On
ServerSignature         Off
ServerTokens            prod
UserDir                 disabled

AddDefaultCharset utf-8

###SERVER TUNING###

KeepAlive               Off
MaxKeepAliveRequests    100
KeepAliveTimeout        15
MinSpareServers         10
MaxSpareServers         20
StartServers            50
MaxClients              125
MaxRequestsPerChild     10000


If anyone else has seen this behavior before I would appreciate some
help. Thanks.
---
Jason Cox

---------------------------------------------------------------------
The official User-To-User support forum of the Apache HTTP Server Project.
See <URL:http://httpd.apache.org/userslist.html> for more info.
To unsubscribe, e-mail: [EMAIL PROTECTED]
   "   from the digest: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to