Hi Guys,
We're trying to run a pure 64-bit environment (the debian etch
amd64 port on both host and guests) but we're having serious
problems running our server applications on the guests. To my
untrained eyes, it looks like a process calls clone() and then
it's child and itself get killed off by a SIGSEGV. I've included
some straces down below. Any insight or direction anyone can offer
would be much appreciated. A solution that doesn't involve changing
the host kernel wins full points! :-)
We tested this stuff using host:
* debian stock 2.6.18-3-amd64 kernel
and guests:
* 2.6.20 from kernel.org
* 2.6.18 with debian patches applied
Most of the testing was done using the default configuration
(ARCH=um make defconfig) but statically linked. We tried a few
other configurations as well but the problem remained.
We usually run UML instances inside chroot jails, but we've
also tested all this stuff in the wild with:
./vmlinux umid=tuff mem=160M ubda=fs.cow,fs.base \
ubdb=swapfile eth0=tuntap,ituff con=pts ssl=pts uml_dir=tmp
Startup output looks like:
Checking that ptrace can change system call numbers...OK
Checking syscall emulation patch for ptrace...missing
Checking for tmpfs mount on /dev/shm...OK
Checking PROT_EXEC mmap in /dev/shm/...OK
Checking for the skas3 patch in the host:
- /proc/mm...not found
- PTRACE_FAULTINFO...not found
- PTRACE_LDT...not found
UML running in SKAS0 mode
Checking that ptrace can change system call numbers...OK
Checking syscall emulation patch for ptrace...missing
The server software that we've been testing with (Asterisk and
Apache) are standard debian packages that seem to work fine
on the host platform. We also compiled Asterisk from it's
original source on a guest and tried that as well. All this
stuff (configurations, packages, etc) work fine for us on x86.
I'm including three sample strace outputs:
(1) A trace taken from the host when sshd is the only server
running on the guest.
(2) A guest trace of Asterisk dying.
(3) A guest trace of Apache running. Unlike Asterisk, Apache
doesn't get killed. I reckon this is because it
registers a signal handler, but what do I know. :-)
I've got plenty more traces but the other ones tend to be
very verbose.
(1) Sample trace of the guest doing nothing much:
...
--- SIGALRM (Alarm clock) @ 0 (0) ---
setitimer(ITIMER_REAL, {it_interval={0, 0}, it_value={0, 0}}, NULL) = 0
setitimer(ITIMER_VIRTUAL, {it_interval={0, 10000}, it_value={0, 10000}}, NULL)
= 0
rt_sigprocmask(SIG_UNBLOCK, [USR1], [USR1 ALRM WINCH IO], 8) = 0
setitimer(ITIMER_VIRTUAL, {it_interval={0, 0}, it_value={0, 0}}, NULL) = 0
setitimer(ITIMER_REAL, {it_interval={0, 10000}, it_value={0, 10000}}, NULL) = 0
rt_sigreturn(0) = -1 EINTR (Interrupted system call)
nanosleep({10, 0}, 0) = ? ERESTART_RESTARTBLOCK (To be restarted)
--- SIGALRM (Alarm clock) @ 0 (0) ---
...
--- SIGCHLD (Child exited) @ 0 (0) ---
wait4(5627, [{WIFSTOPPED(s) && WSTOPSIG(s) == 133}], WSTOPPED, NULL) = 5627
ptrace(PTRACE_GETREGS, 5627, 0, 0x60e9f188) = 0
ptrace(PTRACE_GETFPREGS, 5627, 0, 0x60e9f260) = 0
ptrace(PTRACE_POKEUSER, 5627, 8*ORIG_RAX, 0x27) = 0
ptrace(PTRACE_SYSCALL, 5627, 0, SIG_0) = 0
--- SIGCHLD (Child exited) @ 0 (0) ---
wait4(5627, [{WIFSTOPPED(s) && WSTOPSIG(s) == 133}], WSTOPPED, NULL) = 5627
ptrace(PTRACE_SETREGS, 5627, 0, 0x60e9f188) = 0
ptrace(PTRACE_SETFPREGS, 5627, 0, 0x60e9f260) = 0
ptrace(PTRACE_SYSCALL, 5627, 0, SIG_0) = 0
...
(2) Sample trace of Asterisk's demise:
1785 clone(child_stack=0,
flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x4001f640)
= 1786
1785 exit_group(0) = ?
1786 setsid() = 1786
1786 chdir("/") = 0
1786 open("/dev/null", O_RDWR) = 3
1786 fstat(3, {st_mode=S_IFCHR|0666, st_rdev=makedev(1, 3), ...}) = 0
1786 dup2(3, 0) = 0
1786 dup2(3, 1) = 1
1786 dup2(3, 2) = 2
1786 close(3) = 0
1786 unlink("/var/run/asterisk/asterisk.pid") = 0
1786 open("/var/run/asterisk/asterisk.pid", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 3
1786 fstat(3, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
1786 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
= 0x40019000
1786 write(3, "1786\n", 5) = 5
1786 close(3) = 0
1786 munmap(0x40019000, 4096) = 0
1786 mmap(NULL, 266240, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|0x40,
-1, 0) = 0x40020000
1786 mprotect(0x40020000, 4096, PROT_NONE) = 0
1786 clone(child_stack=0x40060280,
flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID|CLONE_DETACHED,
parent_tidptr=0x400609f0, tls=0x40060960, child_tidptr=0x400609f0) = 1787
1786 nanosleep({0, 100000}, <unfinished ...>
1787 --- SIGSEGV (Segmentation fault) @ 0 (0) ---
(3) Sample trace of Apache:
...
945 rt_sigaction(SIGSEGV, {0x43ace0, [], SA_RESTORER|SA_ONESHOT, 0x4113f410},
NULL, 8) = 0
945 rt_sigaction(SIGBUS, {0x43ace0, [], SA_RESTORER|SA_ONESHOT, 0x4113f410},
NULL, 8) = 0
...
945 select(0, NULL, NULL, NULL, {1, 0}) = 0 (Timeout)
945 clone(child_stack=0,
flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x40025a60)
= 961
945 wait4(-1, 0x7f7fc3267c, WNOHANG|WSTOPPED, NULL) = 0
945 select(0, NULL, NULL, NULL, {1, 0} <unfinished ...>
961 rt_sigaction(SIGTERM, {0x446b10, [], SA_RESTORER|SA_INTERRUPT,
0x4113f410}, {0x444fd0, [], SA_RESTORER, 0x4113f410}, 8) = 0
961 geteuid() = 0
961 setgid(33) = 0
961 open("/proc/sys/kernel/ngroups_max", O_RDONLY) = 8
961 read(8, "65536\n", 31) = 6
961 close(8) = 0
961 open("/etc/group", O_RDONLY) = 8
961 fcntl(8, F_GETFD) = 0
961 fcntl(8, F_SETFD, FD_CLOEXEC) = 0
961 lseek(8, 0, SEEK_CUR) = 0
961 fstat(8, {st_mode=S_IFREG|0644, st_size=485, ...}) = 0
961 mmap(NULL, 485, PROT_READ, MAP_SHARED, 8, 0) = 0x40019000
961 lseek(8, 485, SEEK_SET) = 485
961 fstat(8, {st_mode=S_IFREG|0644, st_size=485, ...}) = 0
961 munmap(0x40019000, 485) = 0
961 close(8) = 0
961 setgroups(1, [33]) = 0
961 geteuid() = 0
961 setuid(33) = 0
961 rt_sigprocmask(SIG_SETMASK, ~[ILL TRAP ABRT BUS FPE SEGV USR2 PIPE SYS
RTMIN RT_1], NULL, 8) = 0
961 mmap(NULL, 8392704, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|0x40,
-1, 0) = 0x43981000
961 mprotect(0x43981000, 4096, PROT_NONE) = 0
961 clone(child_stack=0x44181280,
flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETT
ID|LONE_CHILD_CLEARTID|CLONE_DETACHED, parent_tidptr=0x441819f0,
tls=0x44181960, child_tidptr=0x441819f0) = 962
961 rt_sigprocmask(SIG_UNBLOCK, [TERM], NULL, 8) = 0
961 rt_sigaction(SIGTERM, {0x445040, [], SA_RESTORER|SA_INTERRUPT,
0x4113f410}, {0x446b10, [], SA_RESTORER|SA_INTERRUPT, 0x4113f410}, 8) = 0
961 read(4, <unfinished ...>
962 --- SIGSEGV (Segmentation fault) @ 0 (0) ---
962 chdir("/etc/apache2") = 0
962 rt_sigaction(SIGSEGV, {SIG_DFL}, {SIG_DFL}, 8) = 0
962 kill(961, SIGSEGV) = 0
961 <... read resumed> 0x7f7fc32657, 1) = ? ERESTARTSYS (To be restarted)
961 --- SIGSEGV (Segmentation fault) @ 0 (0) ---
962 +++ killed by SIGSEGV +++
945 <... select resumed> ) = ? ERESTARTNOHAND (To be restarted)
945 --- SIGCHLD (Child exited) @ 0 (0) ---
945 select(0, NULL, NULL, NULL, {1, 0}) = 0 (Timeout)
945 wait4(-1, [{WIFSIGNALED(s) && WTERMSIG(s) == SIGSEGV}], WNOHANG|WSTOPPED,
NULL) = 961
945 write(6, "[Tue Feb 20 02:46:59 2007] [notice] child pid 961 exit signal
Segmentation fault (11)\n", 86) = 86
945 wait4(-1, 0x7f7fc3267c, WNOHANG|WSTOPPED, NULL) = 0
945 select(0, NULL, NULL, NULL, {1, 0}) = 0 (Timeout)
945 clone(child_stack=0,
flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x40025a60)
= 963
... -- a la groundhog day
Even if you can't help, thanks for reading this far!
jez
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
User-mode-linux-user mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-user