Attached, for possible inclusion in the Linux kernel, is a patch for the
Linux-2.4.0-test8 kernel that causes core dumps to output appropriate
information for multithreaded programs for use by gdb.  Currently only ELF
files are supported but other formats can easily add their own functions
to support this feature if desired. 

Affected Files:

fs/binfmt_elf.c 
fs/exec.c 
include/linux/binfmts.h 
include/linux/sysctl.h
kernel/sysctl.c 

Summary of the changes: 

To opt out the changes we made to the dumping of elf cores, we added an
integer named dumptcores into the proc interface.  This integer is located
at /proc/sys/kernel/dumptcores 

We added a function pointer named tcore_dump to the end of the binfmt
structure. 

In the initialization of the elf_binfmt structure, we initialize
tcore_dump to point at elf_tcore_dump.  Due to the C specification, the
initializations that do not set this function pointer set it to NULL. 

We added a tcore_list_node structure that holds the thread-specific
information that we will output. 

We added several list mainpulating functions to operate on that list. 

We added a check in do_coredump to see if tcore_dump exists-if so, we call
it. 

We added the function elf_tcore_dump which grabs the registers and other
pertinant information from current, and stores them on a linked list for
later use.

We modified the elf_core_dump procedure so that it creates a new PT_NOTE
section in the core and outputs all of the thread-specific information to
that section. 

Usage: 

echo 1 > /proc/sys/kernel/dumptcores to turn on multithreaded elf core
dumping.  Any other number will result in the previous style of core
dumps.  The default value of this number is 0.  

Turning it on at any time does not pose a problem but, once enabled, if 
it is turned off a memory leak may occur if a multithreaded program was
currently dumping a core.  Therefore once enabled it should not be
disabled. 

After this, run gdb as normal on any core files and all the thread
information will be available to you through the "thread" commands
inside gdb. 

Tests: 

We tested this patch on programs with several hundred threads running
concurrently.  It was also tested by Spinway, Inc. on some
of their code. 

Please respond to us with any suggestions/comments about this patch.

Jason Villarreal ([EMAIL PROTECTED])
John Jones ([EMAIL PROTECTED])
diff -b -r -u linux/fs/binfmt_elf.c linux-changed/fs/binfmt_elf.c
--- linux/fs/binfmt_elf.c       Tue Jul 11 15:43:45 2000
+++ linux-changed/fs/binfmt_elf.c       Thu Jul 27 14:45:03 2000
@@ -9,6 +9,16 @@
  * Copyright 1993, 1994: Eric Youngdale ([EMAIL PROTECTED]).
  */
 
+/*
+ * Added support for dumping multithreaded cores.
+ *
+ *  John Jones (jjones @cs.ucr.edu)
+ *  Jason Villarreal ([EMAIL PROTECTED])
+ *  University of California, Riverside
+ *
+ *  Supported by funding from Spinway Inc.
+ */
+
 #include <linux/module.h>
 
 #include <linux/fs.h>
@@ -39,6 +49,13 @@
 
 #include <linux/elf.h>
 
+/* Forward declarations used for multithreaded core dump */
+struct tcore_list_node;
+
+static void append_tcore(struct tcore_list_node* to_append);
+static void remove_tcore(struct tcore_list_node* to_remove);
+static int elf_tcore_dump(long signr, struct pt_regs* regs);
+
 static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs);
 static int load_elf_library(struct file*);
 extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
@@ -64,7 +81,7 @@
 #define ELF_PAGEALIGN(_v) (((_v) + ELF_EXEC_PAGESIZE - 1) & ~(ELF_EXEC_PAGESIZE - 1))
 
 static struct linux_binfmt elf_format = {
-       NULL, THIS_MODULE, load_elf_binary, load_elf_library, elf_core_dump, 
ELF_EXEC_PAGESIZE
+  NULL, THIS_MODULE, load_elf_binary, load_elf_library, elf_core_dump, 
+ELF_EXEC_PAGESIZE, elf_tcore_dump
 };
 
 static void set_brk(unsigned long start, unsigned long end)
@@ -965,6 +982,38 @@
 #define DUMP_SEEK(off) \
        if (!dump_seek(file, (off))) \
                goto end_coredump;
+
+/*
+  This is the variable that can be set in proc to determine if we want to
+    dump a multithreaded core or not.  A value of 1 means yes while any
+    other value means no.
+
+    It is located at /proc/sys/kernel/dumptcores
+*/
+
+int dumptcores = 0;
+
+/*  This is the head and tail of a linked list of tcores (thread
+    specific information)
+*/
+
+struct tcore_list_node *tcore_head = NULL, *tcore_tail = NULL;
+
+/* We also need a lock for the linked list in case two people are
+   dumping the core at the same time  */
+
+spinlock_t tcore_lock = SPIN_LOCK_UNLOCKED;
+
+/*  Here is the actual tcore structure */
+
+struct tcore_list_node
+{
+  struct elf_prstatus info;
+  struct tcore_list_node *next;
+  struct tcore_list_node *prev;
+};
+
+
 /*
  * Actual dumper
  *
@@ -972,6 +1021,7 @@
  * and then they are actually written out.  If we run out of core limit
  * we just truncate.
  */
+
 static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
 {
        int has_dumped = 0;
@@ -989,6 +1039,61 @@
        elf_fpregset_t fpu;             /* NT_PRFPREG */
        struct elf_prpsinfo psinfo;     /* NT_PRPSINFO */
 
+       int num_tcores = 0; 
+       struct memelfnote *tcore_notes = NULL; 
+
+       if (dumptcores == 1)  {
+
+            struct tcore_list_node *traverse;
+         
+            /* Let's calculate the number of TCores we have. */
+            spin_lock(&tcore_lock);
+            traverse = tcore_head;
+            while(traverse != NULL)  {
+                 if (traverse->info.pr_pgrp == current->pgrp)
+                      ++num_tcores;
+                 traverse = traverse->next;
+            }
+            spin_unlock(&tcore_lock);
+
+            tcore_notes = kmalloc(sizeof(struct memelfnote)*num_tcores, GFP_KERNEL);
+            if (!tcore_notes)
+                 return has_dumped;
+            
+            /* Construct our note sections - don't delete the tcores yet! */
+            {
+                 i = 0;
+            
+                 spin_lock(&tcore_lock);
+                 traverse = tcore_head;
+                 while(traverse != NULL)  {
+                      if (traverse->info.pr_pgrp == current->pgrp)  {
+                           /* The size of the name will be the length of
+                              .reg/ (5) plus the length of the PID
+                              plus 1 for the null character
+                           */
+                           tcore_notes[i].name = kmalloc(20, GFP_KERNEL);
+                           if (!tcore_notes[i].name)  {
+                                spin_unlock(&tcore_lock);
+                                return has_dumped;
+                           }
+                           sprintf((char*)(tcore_notes[i].name),
+                                   ".reg/%d", traverse->info.pr_pid);
+               
+                           /* specify that these are registers */
+               
+                           tcore_notes[i].type = NT_PRSTATUS; 
+                           tcore_notes[i].datasz = sizeof(traverse->info);
+                           tcore_notes[i].data = &(traverse->info);
+                           i++;
+               
+                      }
+                      traverse = traverse->next;
+                 }
+                 spin_unlock(&tcore_lock);       
+            }
+       }  /* End if(dumptcores == 1) */
+
        segs = current->mm->map_count;
 
 #ifdef DEBUG
@@ -1011,7 +1116,10 @@
        elf.e_flags = 0;
        elf.e_ehsize = sizeof(elf);
        elf.e_phentsize = sizeof(struct elf_phdr);
-       elf.e_phnum = segs+1;           /* Include notes */
+       if (dumptcores == 1)
+            elf.e_phnum = segs+1 + 1;  /* Include notes and tcore notes */
+       else
+            elf.e_phnum = segs+1;      /* Just include notes */
        elf.e_shentsize = 0;
        elf.e_shnum = 0;
        elf.e_shstrndx = 0;
@@ -1023,8 +1131,13 @@
        current->flags |= PF_DUMPCORE;
 
        DUMP_WRITE(&elf, sizeof(elf));
+
        offset += sizeof(elf);                          /* Elf header */
-       offset += (segs+1) * sizeof(struct elf_phdr);   /* Program headers */
+
+       if(dumptcores == 1)
+            offset += (segs+1+1) * sizeof(struct elf_phdr);/* Program headers */
+       else
+            offset += (segs+1)*sizeof(struct elf_phdr);
 
        /*
         * Set up the notes in similar form to SVR4 core dumps made
@@ -1145,6 +1258,32 @@
                DUMP_WRITE(&phdr, sizeof(phdr));
        }
 
+       if (dumptcores == 1)  {
+            /*
+              We need to write out our program header for the tcore
+              information
+            */
+            {
+                 struct elf_phdr phdr;
+                 int sz = 0;
+           
+                 for (i=0; i<num_tcores; i++)
+                      sz += notesize(&tcore_notes[i]);
+           
+                 phdr.p_type = PT_NOTE;
+                 phdr.p_offset = offset;
+                 phdr.p_vaddr = 0;
+                 phdr.p_paddr = 0;
+                 phdr.p_filesz = sz;
+                 phdr.p_memsz = 0;
+                 phdr.p_flags = 0;
+                 phdr.p_align = 0;
+                 
+                 offset += phdr.p_filesz;
+                 DUMP_WRITE(&phdr, sizeof(phdr));
+            }
+       }
+
        /* Page-align dumped data */
        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
@@ -1170,14 +1309,24 @@
                DUMP_WRITE(&phdr, sizeof(phdr));
        }
 
+       /*  write out the original notes */
+
        for(i = 0; i < numnote; i++)
                if (!writenote(&notes[i], file))
                        goto end_coredump;
 
+       if (dumptcores == 1)  {
+            /* Now we need to dump out our note section */
+            for(i = 0; i < num_tcores; i++)
+                if (!writenote(&tcore_notes[i], file))
+                     goto end_coredump;
+       }
        set_fs(fs);
 
        DUMP_SEEK(dataoff);
 
+       /*  Dump all of the data segments */
+
        for(vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
                unsigned long addr;
 
@@ -1218,6 +1367,39 @@
 
  end_coredump:
        set_fs(fs);
+
+       if (dumptcores == 1)  {
+            /* Clean up all the memory we allocated in this function
+               and in the tcore list
+            */
+            struct tcore_list_node *traverse;
+            struct tcore_list_node *follow;
+
+            /*  All the names in the note sections */
+            for(i=0; i<num_tcores; i++)
+                 kfree(tcore_notes[i].name);
+         
+            /* Now clean up the tcore_notes themselves */
+         
+            kfree(tcore_notes);
+         
+            /*  Clean up any tcores we don't need anymore */
+         
+            spin_lock(&tcore_lock);
+            traverse = tcore_head;
+            follow = traverse;
+            while (traverse != NULL)  {
+                 if(traverse->info.pr_pgrp == current->pgrp)  {
+                      follow = traverse;
+                      traverse = traverse->next;
+                      remove_tcore(follow);
+                 }
+                 else  {
+                      traverse = traverse->next;
+                 }
+            }
+            spin_unlock(&tcore_lock);     
+       }
        return has_dumped;
 }
 #endif         /* USE_ELF_CORE_DUMP */
@@ -1231,6 +1413,149 @@
 {
        /* Remove the COFF and ELF loaders. */
        unregister_binfmt(&elf_format);
+}
+
+
+/*
+
+  Function : append_tcore
+
+  This performs the simple linked list operation of adding a node to the end of the
+   list.  The tcore should alread be allocated.
+
+*/
+
+void append_tcore(struct tcore_list_node *to_append)
+{
+  /* acquire the lock */
+  spin_lock(&tcore_lock);
+
+  /* Special case of an empty list */
+
+  if (tcore_head == NULL)
+    {
+
+      tcore_head = tcore_tail = to_append;
+      to_append->next = NULL;
+      to_append->prev = NULL;
+
+    }
+  else
+    {
+      tcore_tail->next = to_append;
+      to_append->prev = tcore_tail;
+      to_append->next = NULL;
+      tcore_tail = to_append;
+    }
+
+  /* release the lock */
+  spin_unlock(&tcore_lock);
+
+}
+
+/*
+
+  Function: remove_tcore
+
+  This function will free up the memory associated with one tcore
+ 
+*/
+
+void remove_tcore(struct tcore_list_node *to_remove)
+{
+  /* We should already have locked the list */
+
+  if (tcore_head == to_remove && tcore_tail == to_remove)
+    {
+      kfree(to_remove);
+      tcore_head = tcore_tail = NULL;
+      return;
+    }
+
+  if (to_remove == tcore_head)
+    {
+
+      tcore_head = tcore_head->next;
+      kfree(to_remove);
+      /* There had to be a second element in the list or we would have never come 
+here */
+      tcore_head->prev = NULL;
+      return;
+
+    }
+
+  if (to_remove == tcore_tail)
+    {
+      tcore_tail = tcore_tail->prev;
+      kfree(to_remove);
+      /* There had to be a second element in the list or we would have never come 
+here */
+      tcore_tail->next = NULL;
+      return;
+    }
+
+  /* Just somewhere in the middle */
+
+  to_remove->prev->next = to_remove->next;
+  to_remove->next->prev = to_remove->prev;
+  kfree(to_remove);
+}
+
+/*
+ *
+ * In order to add the specific thread information for the elf file format,
+ *  we need to keep a linked list of every threads pr_status and then
+ *  create a single section for them in the final core file.
+ */
+
+int elf_tcore_dump(long signr, struct pt_regs * regs)
+{
+
+  struct tcore_list_node *next_tcore;
+  
+  /* Check to make sure this type of core dumping is expected */
+  if (dumptcores != 1)
+       return 0;
+
+  /* Now we construct the tcore that will be placed on our linked list */
+
+  next_tcore = kmalloc(sizeof(*next_tcore), GFP_KERNEL);
+  if (!next_tcore)  {
+    printk("Cannot allocate memory for the tcore.  I suggest you turn off this 
+feature.\n");
+    return 0;    
+  }
+
+  /* Now we set up our tcore */
+
+  next_tcore->info.pr_info.si_signo = next_tcore->info.pr_cursig = signr;
+  next_tcore->info.pr_sigpend = current->pending.signal.sig[0];
+  next_tcore->info.pr_sighold = current->blocked.sig[0];
+  next_tcore->info.pr_pid = current->pid;
+  next_tcore->info.pr_ppid = current->p_pptr->pid;
+  next_tcore->info.pr_pgrp = current->pgrp;
+  next_tcore->info.pr_sid = current->session;
+  next_tcore->info.pr_utime.tv_sec = CT_TO_SECS(current->times.tms_utime);
+  next_tcore->info.pr_utime.tv_usec = CT_TO_USECS(current->times.tms_utime);
+  next_tcore->info.pr_stime.tv_sec = CT_TO_SECS(current->times.tms_stime);
+  next_tcore->info.pr_stime.tv_usec = CT_TO_USECS(current->times.tms_stime);
+  next_tcore->info.pr_cutime.tv_sec = CT_TO_SECS(current->times.tms_cutime);
+  next_tcore->info.pr_cutime.tv_usec = CT_TO_USECS(current->times.tms_cutime);
+  next_tcore->info.pr_cstime.tv_sec = CT_TO_SECS(current->times.tms_cstime);
+  next_tcore->info.pr_cstime.tv_usec = CT_TO_USECS(current->times.tms_cstime);
+  
+#ifdef ELF_CORE_COPY_REGS
+  ELF_CORE_COPY_REGS(next_tcore->info.pr_reg, regs)
+#else
+    if (sizeof(elf_gregset_t) != sizeof(struct pt_regs))
+    {
+      printk("sizeof(elf_gregset_t) (%ld) != sizeof(struct pt_regs) (%ld)\n",
+            (long)sizeof(elf_gregset_t), (long)sizeof(struct pt_regs));
+    }
+    else
+      *(struct pt_regs *)&(next_tcore->info.pr_reg) = *regs;
+#endif
+  
+  /*  Now that we have constructed it, we place it on our list and we are done */
+  append_tcore(next_tcore);
+  return 1;
 }
 
 module_init(init_elf_binfmt)
diff -b -r -u linux/fs/exec.c linux-changed/fs/exec.c
--- linux/fs/exec.c     Wed Jul  5 11:31:01 2000
+++ linux-changed/fs/exec.c     Thu Jul 27 14:38:08 2000
@@ -41,6 +41,7 @@
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 
+
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
 #endif
@@ -903,8 +904,18 @@
        binfmt = current->binfmt;
        if (!binfmt || !binfmt->core_dump)
                goto fail;
-       if (!current->dumpable || atomic_read(&current->mm->mm_users) != 1)
+       if (!current->dumpable)
+               goto fail;
+        if (atomic_read(&current->mm->mm_users) != 1) {
+               /*  If more than one thread has this memory mapped,
+                *  we cannot dump the core yet, but we can dump
+                *  our thread-specific information */
+                if (binfmt->tcore_dump)
+                       binfmt->tcore_dump(signr, regs); 
+                
                goto fail;
+       }
+       
        current->dumpable = 0;
        if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
                goto fail;
diff -b -r -u linux/include/linux/binfmts.h linux-changed/include/linux/binfmts.h
--- linux/include/linux/binfmts.h       Wed Jul 12 22:01:34 2000
+++ linux-changed/include/linux/binfmts.h       Thu Jul 27 12:56:39 2000
@@ -40,6 +40,7 @@
        int (*load_shlib)(struct file *);
        int (*core_dump)(long signr, struct pt_regs * regs, struct file * file);
        unsigned long min_coredump;     /* minimal dump size */
+        int (*tcore_dump)(long signr, struct pt_regs * regs);
 };
 
 extern int register_binfmt(struct linux_binfmt *);
diff -b -r -u linux/include/linux/sysctl.h linux-changed/include/linux/sysctl.h
--- linux/include/linux/sysctl.h        Mon Jun 19 13:42:43 2000
+++ linux-changed/include/linux/sysctl.h        Thu Jul 27 13:32:09 2000
@@ -81,6 +81,7 @@
        KERN_CAP_BSET=14,       /* int: capability bounding set */
        KERN_PANIC=15,          /* int: panic timeout */
        KERN_REALROOTDEV=16,    /* real root device to mount after initrd */
+       KERN_DUMPTCORES=17,
 
        KERN_SPARC_REBOOT=21,   /* reboot command on Sparc */
        KERN_CTLALTDEL=22,      /* int: allow ctl-alt-del to reboot */
diff -b -r -u linux/kernel/sysctl.c linux-changed/kernel/sysctl.c
--- linux/kernel/sysctl.c       Mon Jun 26 12:07:44 2000
+++ linux-changed/kernel/sysctl.c       Thu Jul 27 13:31:17 2000
@@ -41,6 +41,7 @@
 
 /* External variables not in a header file. */
 extern int panic_timeout;
+extern int dumptcores;
 extern int console_loglevel, C_A_D;
 extern int bdf_prm[], bdflush_min[], bdflush_max[];
 extern int sysctl_overcommit_memory;
@@ -154,6 +155,8 @@
        {KERN_DOMAINNAME, "domainname", system_utsname.domainname, 64,
         0644, NULL, &proc_doutsstring, &sysctl_string},
        {KERN_PANIC, "panic", &panic_timeout, sizeof(int),
+        0644, NULL, &proc_dointvec},
+       {KERN_DUMPTCORES, "dumptcores", &dumptcores, sizeof(int),
         0644, NULL, &proc_dointvec},
        {KERN_CAP_BSET, "cap-bound", &cap_bset, sizeof(kernel_cap_t),
         0600, NULL, &proc_dointvec_bset},

Reply via email to