------- You are receiving this mail because: ------- You are on the CC list for the bug.
http://bugs.exim.org/show_bug.cgi?id=789 Summary: Asleep Exim process Product: Exim Version: 4.69 Platform: x86 OS/Version: Linux Status: NEW Severity: bug Priority: critical Component: Delivery in general AssignedTo: [email protected] ReportedBy: [email protected] CC: [email protected] We have a large exim infrastructure running and very often we are watching some asleep exim processes. They are becoming a problem because the mail delivery is delayed even a week. Some useful information bellow... [r...@buzon ~]$ ps -aux | grep -i -E "exim4 -Mc|exim -Mc" USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND root 984 0.0 0.2 10340 2288 ? S 09:03 0:00 /usr/sbin/exim4 -Mc 1L89B2-0000Fl-NR [r...@buzon ~]$ strace -p 984 Process 984 attached - interrupt to quit time(NULL) = 1228379245 open("/etc/localtime", O_RDONLY) = 6 fstat64(6, {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 fstat64(6, {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f1a000 read(6, "TZif\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\10\0\0\0\10"..., 4096) = 946 close(6) = 0 munmap(0xb7f1a000, 4096) = 0 write(4, "2008-12-04 09:27:25 francisco.lo"..., 494) = 494 time(NULL) = 1228379246 time(NULL) = 1228379246 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 socket(PF_FILE, SOCK_DGRAM, 0) = 6 fcntl64(6, F_SETFD, FD_CLOEXEC) = 0 connect(6, {sa_family=AF_FILE, path="/dev/log"}, 110) = 0 send(6, "<22>Dec 4 09:27:26 exim[984]: 2"..., 544, MSG_NOSIGNAL) = 544 open("/var/log/exim4/main.log", O_WRONLY|O_APPEND|O_LARGEFILE) = 7 fcntl64(7, F_GETFD) = 0 fcntl64(7, F_SETFD, FD_CLOEXEC) = 0 fstat64(7, {st_mode=S_IFREG|0640, st_size=1489668, ...}) = 0 write(7, "2008-12-04 09:27:26 1L89B2-0000F"..., 514) = 514 time(NULL) = 1228379246 write(5, "0\201\240\2\1\3c\201\232\4#o=empleados,o=juntade"..., 163) = 163 poll([{fd=5, events=POLLIN|POLLPRI|POLLERR|POLLHUP, revents=POLLIN|POLLERR|POLLHUP}], 1, -1) = 1 read(5, 0x911fefb, 8) = -1 ECONNRESET (Connection reset by peer) time(NULL) = 1228379246 write(4, "2008-12-04 09:27:26 araceli.seba"..., 480) = 480 time(NULL) = 1228379246 time(NULL) = 1228379246 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 send(6, "<22>Dec 4 09:27:26 exim[984]: 2"..., 530, MSG_NOSIGNAL) = 530 time(NULL) = 1228379246 close(7) = 0 open("/var/log/exim4/main.log", O_WRONLY|O_APPEND|O_LARGEFILE) = 7 fcntl64(7, F_GETFD) = 0 fcntl64(7, F_SETFD, FD_CLOEXEC) = 0 fstat64(7, {st_mode=S_IFREG|0640, st_size=1490182, ...}) = 0 write(7, "2008-12-04 09:27:26 1L89B2-0000F"..., 500) = 500 time(NULL) = 1228379246 write(5, "0\201\235\2\1\4c\201\227\4#o=empleados,o=juntade"..., 160) = -1 EPIPE (Broken pipe) --- SIGPIPE (Broken pipe) @ 0 (0) --- time(NULL) = 1228379246 time(NULL) = 1228379246 write(4, "2008-12-04 09:27:26 manuelg.garc"..., 474) = 474 time(NULL) = 1228379246 time(NULL) = 1228379246 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 send(6, "<22>Dec 4 09:27:26 exim[984]: 2"..., 524, MSG_NOSIGNAL) = 524 time(NULL) = 1228379246 close(7) = 0 open("/var/log/exim4/main.log", O_WRONLY|O_APPEND|O_LARGEFILE) = 7 fcntl64(7, F_GETFD) = 0 fcntl64(7, F_SETFD, FD_CLOEXEC) = 0 fstat64(7, {st_mode=S_IFREG|0640, st_size=1490682, ...}) = 0 write(7, "2008-12-04 09:27:26 1L89B2-0000F"..., 494) = 494 write(5, "0\5\2\1\5B\0", 7) = -1 EPIPE (Broken pipe) --- SIGPIPE (Broken pipe) @ 0 (0) --- shutdown(5, 2 /* send and receive */) = -1 ENOTCONN (Transport endpoint is not connected) close(5) = 0 geteuid32() = 0 getegid32() = 102 setgid32(102) = 0 setuid32(102) = 0 rt_sigaction(SIGTERM, {SIG_IGN}, {SIG_DFL}, 8) = 0 time(NULL) = 1228379246 open("/colas/exim4/db/retry.lockfile", O_RDWR|O_LARGEFILE) = 5 alarm(60) = 0 fcntl64(5, F_SETLKW64, {type=F_WRLCK, whence=SEEK_SET, start=0, len=0}, 0xbfef8e64) = 0 alarm(0) = 60 open("/proc/stat", O_RDONLY) = 8 fstat64(8, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f1a000 read(8, "cpu 4550350 20 769622 26617089 "..., 4096) = 777 read(8, "", 4096) = 0 close(8) = 0 munmap(0xb7f1a000, 4096) = 0 stat64("/colas/exim4/db/retry", {st_mode=S_IFREG|0640, st_size=1134592, ...}) = 0 open("/colas/exim4/db/retry", O_RDWR|O_LARGEFILE) = 8 fcntl64(8, F_SETFD, FD_CLOEXEC) = 0 read(8, "\0\0\0\0\1\0\0\0\0\0\0\0a\25\6\0\10\0\0\0\0\20\0\0\0\10"..., 512) = 512 close(8) = 0 stat64("DB_CONFIG", 0xbfef892c) = -1 ENOENT (No such file or directory) open("DB_CONFIG", O_RDONLY|O_LARGEFILE) = -1 ENOENT (No such file or directory) stat64("/var/tmp", {st_mode=S_IFDIR|S_ISVTX|0777, st_size=4096, ...}) = 0 gettimeofday({1228379246, 59517}, NULL) = 0 stat64("__db.002", 0xbfef89cc) = -1 ENOENT (No such file or directory) open("/colas/exim4/db/retry", O_RDWR|O_LARGEFILE) = 8 fcntl64(8, F_SETFD, FD_CLOEXEC) = 0 fstat64(8, {st_mode=S_IFREG|0640, st_size=1134592, ...}) = 0 pread64(8, "\0\0\0\0\1\0\0\0\0\0\0\0a\25\6\0\10\0\0\0\0\20\0\0\0\10"..., 4096, 0) = 4096 pread64(8, "\0\0\0\0\1\0\0\0\222\0\0\0\0\0\0\0\0\0\0\0&\0b\3\0\2\331"..., 4096, 598016) = 4096 time(NULL) = 1228379246 time(NULL) = 1228379246 time(NULL) = 1228379246 pwrite64(8, "\0\0\0\0\1\0\0\0\0\0\0\0a\25\6\0\10\0\0\0\0\20\0\0\0\10"..., 4096, 0) = 4096 pwrite64(8, "\0\0\0\0\1\0\0\0\222\0\0\0\0\0\0\0\0\0\0\0(\0\224\2\0\2"..., 4096, 598016) = 4096 fdatasync(8) = 0 close(8) = 0 open("/colas/exim4/db/retry", O_RDWR|O_LARGEFILE) = 8 fcntl64(8, F_SETFD, FD_CLOEXEC) = 0 fdatasync(8) = 0 close(8) = 0 close(5) = 0 time(NULL) = 1228379246 pipe([5, 8]) = 0 rt_sigaction(SIGCHLD, {SIG_DFL}, {SIG_DFL}, 8) = 0 clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0xb7f09708) = 16951 close(5) = 0 fcntl64(8, F_GETFL) = 0x1 (flags O_WRONLY) fstat64(8, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f1a000 _llseek(8, 0, 0xbfef8ddc, SEEK_CUR) = -1 ESPIPE (Illegal seek) write(8, "Auto-Submitted: auto-replied\nFro"..., 1062) = 1062 close(8) = 0 munmap(0xb7f1a000, 4096) = 0 waitpid(16951, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0) = 16951 --- SIGCHLD (Child exited) @ 0 (0) --- rt_sigaction(SIGCHLD, {SIG_DFL}, {SIG_DFL}, 8) = 0 open("/colas/exim4/input/2/hdr.984", O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE, 0640) = 5 fchown32(5, 102, 102) = 0 fchmod(5, 0640) = 0 fcntl64(5, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE) fstat64(5, {st_mode=S_IFREG|0640, st_size=0, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f1a000 _llseek(5, 0, [0], SEEK_CUR) = 0 write(5, "1L89B2-0000Fl-NR-H\nexim 102 102\n"..., 464) = 464 fstat64(5, {st_mode=S_IFREG|0640, st_size=464, ...}) = 0 write(5, "214P Received: from [10.240.229."..., 1617) = 1617 fsync(5) = 0 fstat64(5, {st_mode=S_IFREG|0640, st_size=2081, ...}) = 0 close(5) = 0 munmap(0xb7f1a000, 4096) = 0 rename("/colas/exim4/input/2/hdr.984", "/colas/exim4/input/2/1L89B2-0000Fl-NR-H") = 0 open("/colas/exim4/input/2/.", O_RDONLY|O_LARGEFILE) = 5 fsync(5) = 0 close(5) = 0 close(4) = 0 munmap(0xb7f1b000, 4096) = 0 unlink("/colas/exim4/input/2/1L89B2-0000Fl-NR-J") = -1 ENOENT (No such file or directory) close(3) = 0 exit_group(0) = ? Process 984 detached [r...@buzon ~]# ls -ltr 1L89B2-0000Fl-NR* -rw-r----- 1 exim exim 977 dic 4 09:03 1L89B2-0000Fl-NR-D -rw-r----- 1 exim exim 2081 dic 4 09:27 1L89B2-0000Fl-NR-H Another process: [r...@buzon ~]# ps -ef|grep exim4 root 24501 1 0 11:13 ? 00:00:00 /usr/sbin/exim4 -Mc 1LAOLr-0006ME-8o [r...@buzon ~]# lsof -p 24501 OMMAND PID USER FD TYPE DEVICE SIZE NODE NAME exim4 24501 root cwd DIR 9,2 4096 11555810 /colas/exim4 exim4 24501 root rtd DIR 9,2 4096 2 / exim4 24501 root txt REG 9,2 911795 6761799 /usr/sbin/exim-4.69-1 exim4 24501 root mem REG 9,2 75284 6766476 /usr/lib/libz.so.1.2.3 exim4 24501 root mem REG 9,2 121684 16369530 /lib/ld-2.5.so exim4 24501 root mem REG 9,2 1011024 16368031 /lib/libdb-4.3.so exim4 24501 root mem REG 9,2 1576952 16369531 /lib/libc-2.5.so exim4 24501 root mem REG 9,2 174508 6749880 /usr/lib/libgssapi_krb5.so.2.2 exim4 24501 root mem REG 9,2 101036 16369538 /lib/libnsl-2.5.so exim4 24501 root mem REG 9,2 1238928 16368033 /lib/libcrypto.so.0.9.8b exim4 24501 root mem REG 9,2 318839 6754918 /usr/lib/libspf2.so.2.0.0 exim4 24501 root mem REG 9,2 133764 6766384 /usr/lib/libpq.so.4.1 exim4 24501 root mem REG 9,2 99252 6750115 /usr/lib/libsasl2.so.2.0.22 exim4 24501 root mem REG 9,2 240444 6755749 /usr/lib/libldap-2.3.so.0.2.15 exim4 24501 root mem REG 9,2 30596 6767446 /usr/lib/libkrb5support.so.0.1 exim4 24501 root mem REG 9,2 16528 16368025 /lib/libdl-2.5.so exim4 24501 root mem REG 9,2 7720 16369553 /lib/libcom_err.so.2.1 exim4 24501 root mem REG 9,2 76392 16369537 /lib/libresolv-2.5.so exim4 24501 root mem REG 9,2 53824 6767378 /usr/lib/liblber-2.3.so.0.2.15 exim4 24501 root mem REG 9,2 1453200 7692961 /usr/lib/mysql/libmysqlclient.so.15.0.0 exim4 24501 root mem REG 9,2 46740 16368041 /lib/libnss_files-2.5.so exim4 24501 root mem REG 9,2 208344 16368018 /lib/libm-2.5.so exim4 24501 root mem REG 9,2 157196 6767448 /usr/lib/libk5crypto.so.3.0 exim4 24501 root mem REG 9,2 559532 6767449 /usr/lib/libkrb5.so.3.2 exim4 24501 root mem REG 9,2 125564 16368029 /lib/libpthread-2.5.so exim4 24501 root mem REG 9,2 280464 16368035 /lib/libssl.so.0.9.8b exim4 24501 root mem REG 9,2 1241272 6843928 /usr/lib/perl5/5.8.8/i386-linux-thread-multi/CORE/libperl.so exim4 24501 root mem REG 9,2 15264 16369540 /lib/libutil-2.5.so exim4 24501 root mem REG 9,2 27836 16369539 /lib/libcrypt-2.5.so exim4 24501 root 0u CHR 1,3 1317 /dev/null exim4 24501 root 1u CHR 1,3 1317 /dev/null exim4 24501 root 2u CHR 1,3 1317 /dev/null exim4 24501 root 3uw REG 9,2 4010 11555953 /colas/exim4/input/r/1LAOLr-0006ME-8o-D exim4 24501 root 4w REG 9,2 199 11555961 /colas/exim4/msglog/r/1LAOLr-0006ME-8o exim4 24501 root 5u IPv4 14979272 TCP ************:60283->ldap:ldap (ESTABLISHED) [r...@buzon ~]# strace -p 24501 Process 24501 attached - interrupt to quit time(NULL) = 1228914090 open("/etc/localtime", O_RDONLY) = 6 fstat64(6, {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 fstat64(6, {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7ff3000 read(6, "TZif\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\10\0\0\0\10"..., 4096) = 946 close(6) = 0 munmap(0xb7ff3000, 4096) = 0 write(4, "2008-12-10 14:01:30 carlos.casad"..., 480) = 480 time(NULL) = 1228914090 time(NULL) = 1228914090 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0 socket(PF_FILE, SOCK_DGRAM, 0) = 6 fcntl64(6, F_SETFD, FD_CLOEXEC) = 0 connect(6, {sa_family=AF_FILE, path="/dev/log"}, 110) = 0 send(6, "<22>Dec 10 14:01:30 exim[24501]:"..., 532, MSG_NOSIGNAL) = 532 open("/var/log/exim4/main.log", O_WRONLY|O_APPEND|O_LARGEFILE) = 7 fcntl64(7, F_GETFD) = 0 fcntl64(7, F_SETFD, FD_CLOEXEC) = 0 fstat64(7, {st_mode=S_IFREG|0640, st_size=2844313, ...}) = 0 write(7, "2008-12-10 14:01:30 1LAOLr-0006M"..., 500) = 500 write(5, "0\5\2\1\3B\0", 7) = 7 shutdown(5, 2 /* send and receive */) = 0 close(5) = 0 geteuid32() = 0 getegid32() = 102 setgid32(102) = 0 setuid32(102) = 0 rt_sigaction(SIGTERM, {SIG_IGN}, {SIG_DFL}, 8) = 0 time(NULL) = 1228914090 open("/colas/exim4/db/retry.lockfile", O_RDWR|O_LARGEFILE) = 5 alarm(60) = 0 fcntl64(5, F_SETLKW64, {type=F_WRLCK, whence=SEEK_SET, start=0, len=0}, 0xbfc64bc4) = 0 alarm(0) = 60 open("/proc/stat", O_RDONLY) = 8 fstat64(8, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7ff3000 read(8, "cpu 11344351 160 2202934 119297"..., 4096) = 789 read(8, "", 4096) = 0 close(8) = 0 munmap(0xb7ff3000, 4096) = 0 stat64("/colas/exim4/db/retry", {st_mode=S_IFREG|0640, st_size=1134592, ...}) = 0 open("/colas/exim4/db/retry", O_RDWR|O_LARGEFILE) = 8 fcntl64(8, F_SETFD, FD_CLOEXEC) = 0 read(8, "\0\0\0\0\1\0\0\0\0\0\0\0a\25\6\0\10\0\0\0\0\20\0\0\0\10"..., 512) = 512 close(8) = 0 stat64("DB_CONFIG", 0xbfc6468c) = -1 ENOENT (No such file or directory) open("DB_CONFIG", O_RDONLY|O_LARGEFILE) = -1 ENOENT (No such file or directory) stat64("/var/tmp", {st_mode=S_IFDIR|S_ISVTX|0777, st_size=4096, ...}) = 0 gettimeofday({1228914090, 239287}, NULL) = 0 stat64("__db.002", 0xbfc6472c) = -1 ENOENT (No such file or directory) open("/colas/exim4/db/retry", O_RDWR|O_LARGEFILE) = 8 fcntl64(8, F_SETFD, FD_CLOEXEC) = 0 fstat64(8, {st_mode=S_IFREG|0640, st_size=1134592, ...}) = 0 pread64(8, "\0\0\0\0\1\0\0\0\0\0\0\0a\25\6\0\10\0\0\0\0\20\0\0\0\10"..., 4096, 0) = 4096 pread64(8, "\0\0\0\0\1\0\0\0\222\0\0\0\0\0\0\0\0\0\0\0(\0\305\2\0\2"..., 4096, 598016) = 4096 time(NULL) = 1228914090 pwrite64(8, "\0\0\0\0\1\0\0\0\0\0\0\0a\25\6\0\10\0\0\0\0\20\0\0\0\10"..., 4096, 0) = 4096 pwrite64(8, "\0\0\0\0\1\0\0\0\222\0\0\0\0\0\0\0\0\0\0\0*\0\367\1\0\2"..., 4096, 598016) = 4096 fdatasync(8) = 0 close(8) = 0 open("/colas/exim4/db/retry", O_RDWR|O_LARGEFILE) = 8 fcntl64(8, F_SETFD, FD_CLOEXEC) = 0 fdatasync(8) = 0 close(8) = 0 close(5) = 0 time(NULL) = 1228914090 pipe([5, 8]) = 0 rt_sigaction(SIGCHLD, {SIG_DFL}, {SIG_DFL}, 8) = 0 clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0xb7fe2708) = 5839 close(5) = 0 fcntl64(8, F_GETFL) = 0x1 (flags O_WRONLY) fstat64(8, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7ff3000 _llseek(8, 0, 0xbfc64b3c, SEEK_CUR) = -1 ESPIPE (Illegal seek) write(8, "Auto-Submitted: auto-replied\nFro"..., 990) = 990 close(8) = 0 munmap(0xb7ff3000, 4096) = 0 waitpid(5839, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0) = 5839 --- SIGCHLD (Child exited) @ 0 (0) --- rt_sigaction(SIGCHLD, {SIG_DFL}, {SIG_DFL}, 8) = 0 open("/colas/exim4/input/r/hdr.24501", O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE, 0640) = 5 fchown32(5, 102, 102) = 0 fchmod(5, 0640) = 0 fcntl64(5, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE) fstat64(5, {st_mode=S_IFREG|0640, st_size=0, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7ff3000 _llseek(5, 0, [0], SEEK_CUR) = 0 write(5, "1LAOLr-0006ME-8o-H\nexim 102 102\n"..., 403) = 403 fstat64(5, {st_mode=S_IFREG|0640, st_size=403, ...}) = 0 write(5, "250P Received: from [10.240.229."..., 1296) = 1296 fsync(5) = 0 fstat64(5, {st_mode=S_IFREG|0640, st_size=1699, ...}) = 0 close(5) = 0 munmap(0xb7ff3000, 4096) = 0 rename("/colas/exim4/input/r/hdr.24501", "/colas/exim4/input/r/1LAOLr-0006ME-8o-H") = 0 open("/colas/exim4/input/r/.", O_RDONLY|O_LARGEFILE) = 5 fsync(5) = 0 close(5) = 0 close(4) = 0 munmap(0xb7ff4000, 4096) = 0 unlink("/colas/exim4/input/r/1LAOLr-0006ME-8o-J") = -1 ENOENT (No such file or directory) close(3) = 0 exit_group(0) = ? Process 24501 detached Thanks. -- Configure bugmail: http://bugs.exim.org/userprefs.cgi?tab=email -- ## List details at http://lists.exim.org/mailman/listinfo/exim-dev Exim details at http://www.exim.org/ ##
