unreapable zombies, maybe futex+ptrace+exit

2006-12-18 Thread Albert Cahalan

I have a fun little test program for people to try. It creates zombies
that persist until reboot, despite being reparented to init. Sometimes
it creates processes that block SIGKILL, sit around with pending SIGKILL,
or both.

You'll want:

a. either assembly skills or the ability to run 32-bit x86 code
b. the procps-3.2.7 release, so you can easily view the results
c. the strace program, or some other ptrace-based debugger
d. a recent kernel -- updated Fedora 5 or mainline 2.6.19 will do

Compile like this:
gcc -m32 -std=gnu99 -O2 -o cloninator cloninator.c

Run like this:
strace -f -F ./cloninator

Let the program run for a bit, then do one of a few fun things:

a. hit ^C to stop it
b. run "killall -9 cloninator" to stop it
c. send SIGKILL to the process group (the negative as PID)
d. send SIGKILL to all your processes (use -1 as PID)

View the results:
ps -Ccloninator -mwostat,ppid,pid,tid,nlwp,pending,sigmask,sigignore,caught,wch

I suggest trying other debuggers. Under a debugger I can't share,
thousands of messed-up zombies get created in under a minute.
With strace, you'll probably get a half dozen after a couple trys.
You might try gdb, fenris, nightview, and anything else which
uses ptrace to observe something. (Ideas?) Be sure to specify any
options needed to follow child processes; you may need to comment
out the CLONE_VFORK case for wimpy debuggers.

BTW, we can probably now answer this question:

$ egrep -i 'todo.*safe' kernel/*.c
kernel/exit.c:  // TODO: is this safe?
kernel/exit.c:  // TODO: is this safe?

///

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#include 
#include 

#include 

static void early_write(int fd, const void *buf, size_t count)
{
#if 0
   unsigned long eax = __NR_write;
   // push and pop because -fPIC probably needs ebx for the GOT
base pointer
   __asm__ __volatile__(
   "push %%ebx ; push %1 ; pop %%ebx ; int $0x80; pop %%ebx"
   :"=a"(eax)
   :"r"(fd),"c"(buf),"d"(count),"0"(eax)
   :"memory"
   );
#endif
}

static void p_str(char *s)
{
   size_t count = strlen(s);
   early_write(STDERR_FILENO,s,count);
}

static void p_hex(unsigned long u)
{
   char buf[9];
   char x[] = "0123456789abcdef";
   char *s = buf;
   s[8] = '\0';
   int i = 8;
   while(i--)
   buf[7-i] = x[(u>>(i*4))&15];
   early_write(STDERR_FILENO,buf,8);
}

static void p_dec(unsigned long u)
{
   char buf[11];
   char *s = buf+10;
   *s-- = '\0';
   int count = 0;
   while(u || !count)
   {
   *s-- = u%10 + '0';
   u /= 10;
   count++;
   }
   early_write(STDERR_FILENO,s+1,count);
}


#define FUTEX_WAIT  0
#define FUTEX_WAKE  1


typedef int lock_t;

#define LOCK_INITIALIZER 0

static inline void init_lock(lock_t* l) { *l = 0; }

// lock_add performs an atomic add and returns the resulting value
static inline int lock_add(lock_t* l, int val)
{
   int result = val;
   __asm__ __volatile__ (
   "lock; xaddl %1, %0;"
   : "=m" (*l), "=r" (result)
   : "1" (result), "m" (*l)
   : "memory");
   return result + val; // Return the value written to memory
}

// lock_bts_high_bit atomically tests and sets the high bit and returns
// true if the bit was clear initially
static inline bool lock_bts_high_bit(lock_t* l)
{
   bool result;
   __asm__ __volatile__ (
   "lock; btsl $31, %0;\n\t"
   "setnc %1;"
   : "=m" (*l), "=q" (result)
   : "m" (*l)
   : "memory");
   return result;
}

static int futex(int* uaddr, int op, int val, const struct
timespec*timeout, int*uaddr2, int val3)
{
   (void)timeout;
   (void)uaddr2;
   (void)val3;
   int eax = __NR_futex;
   __asm__ __volatile__(
   "push %%ebx ; push %1 ; pop %%ebx ; int $0x80; pop %%ebx"
   :"=a"(eax)
   :"r"(uaddr),"c"(op),"d"(val),"0"(eax)
   :"memory"
   );
   return eax;
}


// lock will wait for and lock a mutex
static void lock(lock_t* l)
{
   // Check the mutex and set held bit
   if (lock_bts_high_bit(l))
   {
   // Got the mutex
   return;
   }

   // Increment wait count
   lock_add(l, 1);

   while (true)
   {
   // Check the mutex and set held bit
   if (lock_bts_high_bit(l))
   {
   // Got the mutex, decrement wait count
   lock_add(l, -1);
   return;
   }

   int val = *l;
   // Ensure the mutex wasn't given up since the check
   if (!(val & 0x8000))
   continue;

   

unreapable zombies, maybe futex+ptrace+exit

2006-12-18 Thread Albert Cahalan

I have a fun little test program for people to try. It creates zombies
that persist until reboot, despite being reparented to init. Sometimes
it creates processes that block SIGKILL, sit around with pending SIGKILL,
or both.

You'll want:

a. either assembly skills or the ability to run 32-bit x86 code
b. the procps-3.2.7 release, so you can easily view the results
c. the strace program, or some other ptrace-based debugger
d. a recent kernel -- updated Fedora 5 or mainline 2.6.19 will do

Compile like this:
gcc -m32 -std=gnu99 -O2 -o cloninator cloninator.c

Run like this:
strace -f -F ./cloninator

Let the program run for a bit, then do one of a few fun things:

a. hit ^C to stop it
b. run killall -9 cloninator to stop it
c. send SIGKILL to the process group (the negative as PID)
d. send SIGKILL to all your processes (use -1 as PID)

View the results:
ps -Ccloninator -mwostat,ppid,pid,tid,nlwp,pending,sigmask,sigignore,caught,wch

I suggest trying other debuggers. Under a debugger I can't share,
thousands of messed-up zombies get created in under a minute.
With strace, you'll probably get a half dozen after a couple trys.
You might try gdb, fenris, nightview, and anything else which
uses ptrace to observe something. (Ideas?) Be sure to specify any
options needed to follow child processes; you may need to comment
out the CLONE_VFORK case for wimpy debuggers.

BTW, we can probably now answer this question:

$ egrep -i 'todo.*safe' kernel/*.c
kernel/exit.c:  // TODO: is this safe?
kernel/exit.c:  // TODO: is this safe?

///

#include sys/mman.h
#include signal.h
#include sched.h
#include stdio.h
#include stdlib.h
#include sys/types.h
#include sys/stat.h
#include fcntl.h
#include string.h
#include unistd.h
#include asm/unistd.h

#include sys/ipc.h
#include sys/shm.h

#include stdbool.h

static void early_write(int fd, const void *buf, size_t count)
{
#if 0
   unsigned long eax = __NR_write;
   // push and pop because -fPIC probably needs ebx for the GOT
base pointer
   __asm__ __volatile__(
   push %%ebx ; push %1 ; pop %%ebx ; int $0x80; pop %%ebx
   :=a(eax)
   :r(fd),c(buf),d(count),0(eax)
   :memory
   );
#endif
}

static void p_str(char *s)
{
   size_t count = strlen(s);
   early_write(STDERR_FILENO,s,count);
}

static void p_hex(unsigned long u)
{
   char buf[9];
   char x[] = 0123456789abcdef;
   char *s = buf;
   s[8] = '\0';
   int i = 8;
   while(i--)
   buf[7-i] = x[(u(i*4))15];
   early_write(STDERR_FILENO,buf,8);
}

static void p_dec(unsigned long u)
{
   char buf[11];
   char *s = buf+10;
   *s-- = '\0';
   int count = 0;
   while(u || !count)
   {
   *s-- = u%10 + '0';
   u /= 10;
   count++;
   }
   early_write(STDERR_FILENO,s+1,count);
}


#define FUTEX_WAIT  0
#define FUTEX_WAKE  1


typedef int lock_t;

#define LOCK_INITIALIZER 0

static inline void init_lock(lock_t* l) { *l = 0; }

// lock_add performs an atomic add and returns the resulting value
static inline int lock_add(lock_t* l, int val)
{
   int result = val;
   __asm__ __volatile__ (
   lock; xaddl %1, %0;
   : =m (*l), =r (result)
   : 1 (result), m (*l)
   : memory);
   return result + val; // Return the value written to memory
}

// lock_bts_high_bit atomically tests and sets the high bit and returns
// true if the bit was clear initially
static inline bool lock_bts_high_bit(lock_t* l)
{
   bool result;
   __asm__ __volatile__ (
   lock; btsl $31, %0;\n\t
   setnc %1;
   : =m (*l), =q (result)
   : m (*l)
   : memory);
   return result;
}

static int futex(int* uaddr, int op, int val, const struct
timespec*timeout, int*uaddr2, int val3)
{
   (void)timeout;
   (void)uaddr2;
   (void)val3;
   int eax = __NR_futex;
   __asm__ __volatile__(
   push %%ebx ; push %1 ; pop %%ebx ; int $0x80; pop %%ebx
   :=a(eax)
   :r(uaddr),c(op),d(val),0(eax)
   :memory
   );
   return eax;
}


// lock will wait for and lock a mutex
static void lock(lock_t* l)
{
   // Check the mutex and set held bit
   if (lock_bts_high_bit(l))
   {
   // Got the mutex
   return;
   }

   // Increment wait count
   lock_add(l, 1);

   while (true)
   {
   // Check the mutex and set held bit
   if (lock_bts_high_bit(l))
   {
   // Got the mutex, decrement wait count
   lock_add(l, -1);
   return;
   }

   int val = *l;
   // Ensure the mutex wasn't given up since the check