On Fri, 29 Dec 2000, Linus Torvalds wrote:

> 
> Ok, there's a test13-pre6 out there now, which does a partial sync with
> Alan, in addition to hopefully fixing the innd shared mapping writeback
> problem for good.  Thanks to Marcelo Tosatti and others..

I've been noticing a problem with the memory context switching conflicting with
fork() on my Athlon. The problem began in the test13-pre2 patch, and because
nobody else has seen this problem (or otherwise reported it) since then, I
felt I should look into it a little further.

I narrowed the problem down to a subset of patches from the MM set in
test13-pre2. Reversing the attached 'context.patch' fixes the problem (only for
i386), but I'm not yet sure why. test13-pre2 and up work without any problems
on an Intel cpu (Pentium 180 & P3 800 tested).

Anyways, I can't seem to find out what really changes with the patch except for
the obvious 'void *segment' changing into a typedef-struct. The only thing I
can think of is that the compiler decodes it differently, but I think I can
safely rule that out. I tried both 2.91.66 and 2.95.2, using both different
types of parameters for P5 & K7 (-march=i586 & -march=i686 -malign-functions=4)
and it still gives the problem on the Athlon. Maybe there's something I've
overlooked in that attached patch. Request for an extra pair of eyes please. :)


Here are the casual symptoms. The parent seems to die as soon as a forked child
exits, which seems to me that a new LDT isn't being initialized correctly:

root:~> ps -aux
USER       PID %CPU %MEM   VSZ  RSS TTY      STAT START   TIME COMMAND
root         1  1.1  0.4  1228  532 ?        S    21:42   0:05 init [3]
root         2  0.0  0.0     0    0 ?        SW   21:42   0:00 [keventd]
root         3  0.0  0.0     0    0 ?        SW   21:42   0:00 [kswapd]
root         4  0.0  0.0     0    0 ?        SW   21:42   0:00 [kreclaimd]
root         5  0.0  0.0     0    0 ?        SW   21:42   0:00 [bdflush]
root         6  0.0  0.0     0    0 ?        SW   21:42   0:00 [kupdate]
root       289  0.0  0.4  1284  604 ?        S    21:42   0:00 syslogd -m 0
root       299  0.0  0.8  1912 1104 ?        S    21:42   0:00 klogd
root       351  0.0  1.2  9292 1576 ?        S    21:42   0:00 named
root       361  0.0  0.0     0    0 ?        Z    21:42   0:00 [named <defunct>]
root       363  0.0  1.2  9292 1576 ?        S    21:42   0:00 named
root       364  0.0  1.2  9292 1576 ?        S    21:42   0:00 named
root       365  0.0  0.7  2064  936 ?        S    21:42   0:00 /usr/sbin/sshd
..etc
(Note PID 361)

root:~> strace nslookup sunsite.unc.edu
 :
 :
rt_sigaction(SIGINT, {0x4003ce78, ~[], 0x4000000}, NULL, 8) = 0
rt_sigaction(SIGTERM, {0x4003ce78, ~[], 0x4000000}, NULL, 8) = 0
rt_sigaction(SIGPIPE, {SIG_IGN}, NULL, 8) = 0
rt_sigaction(SIGHUP, {SIG_DFL}, NULL, 8) = 0
rt_sigprocmask(SIG_BLOCK, [HUP INT TERM], NULL, 8) = 0
getpid()                                = 2615
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 3
close(3)                                = 0
socket(PF_INET6, SOCK_STREAM, 0)        = -1 ENOSYS (Function not implemented)
socket(PF_INET6, SOCK_STREAM, 0)        = -1 ENOSYS (Function not implemented)
socket(PF_INET6, SOCK_STREAM, 0)        = -1 EAFNOSUPPORT (Address family not 
supported by protocol)--- SIGSEGV (Segmentation fault) ---
+++ killed by SIGSEGV +++


---Example parent/child process:

root:~> tar -xzvvf ../pkgs/zgv-5.2.tar.gz
 :
 :
-rw------- rus/users      1356 2000-06-01 11:46:57 zgv-5.2/INSTALL
-rw------- rus/users     17976 1994-08-23 16:09:05 zgv-5.2/COPYING
-rw------- rus/users      1077 1998-08-26 09:24:31 zgv-5.2/README.fonts
-rw------- rus/users       120 2000-04-22 22:46:49 zgv-5.2/AUTHORS
-rw------- rus/users      3714 2000-01-23 16:29:40 zgv-5.2/SECURITY
Segmentation fault (core dumped)

root:~> strace tar -xzvvf ../pkgs/zgv-5.2.tar.gz
 :
 :
open("zgv-5.2/COPYING", O_WRONLY|O_CREAT|O_EXCL|O_LARGEFILE, 0600) = 4
write(4, "\t\t    GNU GENERAL PUBLIC LICENSE"..., 9728) = 9728
read(3, "ccept this License.  Therefore, "..., 10240) = 10240
write(4, "ccept this License.  Therefore, "..., 8248) = 8248
close(4)                                = 0
utime("zgv-5.2/COPYING", [2000/12/29-20:21:16, 1994/08/23-16:09:05]) = 0
chown32("zgv-5.2/COPYING", 500, 100)    = 0
write(1, "-rw------- rus/users      1077 1"..., 72-rw------- rus/users      1077 
1998-08-26 09:24:31 zgv-5.2/README.fonts
) = 72
open("zgv-5.2/README.fonts", O_WRONLY|O_CREAT|O_EXCL|O_LARGEFILE, 0600) = 4
write(4, "The copyright for *.bdf (taken f"..., 1024) = 1024
read(3, "\"as\nis\" without express or impli"..., 10240) = 8192
--- SIGCHLD (Child exited) ---
--- SIGSEGV (Segmentation fault) ---
+++ killed by SIGSEGV +++

Ideas, anyone?

 -Byron

-- 
Byron Stanoszek                         Ph: (330) 644-3059
Systems Programmer                      Fax: (330) 644-8110
Commercial Timesharing Inc.             Email: [EMAIL PROTECTED]
diff -u --recursive --new-file v2.4.0-test12/linux/arch/i386/kernel/ldt.c 
linux/arch/i386/kernel/ldt.c
--- v2.4.0-test12/linux/arch/i386/kernel/ldt.c  Sat May 20 10:39:58 2000
+++ linux/arch/i386/kernel/ldt.c        Fri Dec 15 13:01:59 2000
@@ -31,7 +31,7 @@
        struct mm_struct * mm = current->mm;
 
        err = 0;
-       if (!mm->segments)
+       if (!mm->context.segments)
                goto out;
 
        size = LDT_ENTRIES*LDT_ENTRY_SIZE;
@@ -39,7 +39,7 @@
                size = bytecount;
 
        err = size;
-       if (copy_to_user(ptr, mm->segments, size))
+       if (copy_to_user(ptr, mm->context.segments, size))
                err = -EFAULT;
 out:
        return err;
@@ -87,13 +87,12 @@
         * limited by MAX_LDT_DESCRIPTORS.
         */
        down(&mm->mmap_sem);
-       if (!mm->segments) {
-               
+       if (!mm->context.segments) {
                error = -ENOMEM;
-               mm->segments = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
-               if (!mm->segments)
+               mm->context.segments = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
+               if (!mm->context.segments)
                        goto out_unlock;
-               memset(mm->segments, 0, LDT_ENTRIES*LDT_ENTRY_SIZE);
+               memset(mm->context.segments, 0, LDT_ENTRIES*LDT_ENTRY_SIZE);
                
                if (atomic_read(&mm->mm_users) > 1)
                        printk(KERN_WARNING "LDT allocated for cloned task!\n");
@@ -104,7 +103,7 @@
                load_LDT(mm);
        }
 
-       lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->segments);
+       lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.segments);
 
        /* Allow LDTs to be cleared by the user. */
        if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
diff -u --recursive --new-file v2.4.0-test12/linux/arch/i386/kernel/process.c 
linux/arch/i386/kernel/process.c
--- v2.4.0-test12/linux/arch/i386/kernel/process.c      Mon Dec 11 17:59:43 2000
+++ linux/arch/i386/kernel/process.c    Fri Dec 15 15:37:09 2000
@@ -412,15 +412,15 @@
 /*
  * No need to lock the MM as we are the last user
  */
-void release_segments(struct mm_struct *mm)
+void destroy_context(struct mm_struct *mm)
 {
-       void * ldt = mm->segments;
+       void * ldt = mm->context.segments;
 
        /*
         * free the LDT
         */
        if (ldt) {
-               mm->segments = NULL;
+               mm->context.segments = NULL;
                clear_LDT();
                vfree(ldt);
        }
@@ -478,7 +478,7 @@
 void release_thread(struct task_struct *dead_task)
 {
        if (dead_task->mm) {
-               void * ldt = dead_task->mm->segments;
+               void * ldt = dead_task->mm->context.segments;
 
                // temporary debugging check
                if (ldt) {
@@ -493,29 +493,24 @@
  * we do not have to muck with descriptors here, that is
  * done in switch_mm() as needed.
  */
-void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
+int init_new_context(struct task_struct *p, struct mm_struct *new_mm)
 {
-       struct mm_struct * old_mm = current->mm;
-       void * old_ldt = old_mm->segments, * ldt;
+       struct mm_struct * old_mm;
+       void *old_ldt, *ldt;
 
-       if (!old_ldt) {
+       ldt = NULL;
+       old_mm = current->mm;
+       if (old_mm && (old_ldt = old_mm->context.segments) != NULL) {
                /*
-                * default LDT - use the one from init_task
+                * Completely new LDT, we initialize it from the parent:
                 */
-               new_mm->segments = NULL;
-               return;
-       }
-
-       /*
-        * Completely new LDT, we initialize it from the parent:
-        */
-       ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
-       if (!ldt)
-               printk(KERN_WARNING "ldt allocation failed\n");
-       else
+               ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
+               if (!ldt)
+                       return -ENOMEM;
                memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
-       new_mm->segments = ldt;
-       return;
+       }
+       new_mm->context.segments = ldt;
+       return 0;
 }
 
 /*
diff -u --recursive --new-file v2.4.0-test12/linux/include/asm-i386/desc.h 
linux/include/asm-i386/desc.h
--- v2.4.0-test12/linux/include/asm-i386/desc.h Sat Sep  4 13:06:08 1999
+++ linux/include/asm-i386/desc.h       Fri Dec 15 12:40:53 2000
@@ -82,7 +82,7 @@
 extern inline void load_LDT (struct mm_struct *mm)
 {
        int cpu = smp_processor_id();
-       void *segments = mm->segments;
+       void *segments = mm->context.segments;
        int count = LDT_ENTRIES;
 
        if (!segments) {
diff -u --recursive --new-file v2.4.0-test12/linux/include/asm-i386/mmu.h 
linux/include/asm-i386/mmu.h
--- v2.4.0-test12/linux/include/asm-i386/mmu.h  Wed Dec 31 16:00:00 1969
+++ linux/include/asm-i386/mmu.h        Fri Dec 15 12:38:24 2000
@@ -0,0 +1,12 @@
+#ifndef __i386_MMU_H
+#define __i386_MMU_H
+
+/*
+ * The i386 doesn't have a mmu context, but
+ * we put the segment information here.
+ */
+typedef struct { 
+       void *segments;
+} mm_context_t;
+
+#endif
diff -u --recursive --new-file v2.4.0-test12/linux/include/asm-i386/mmu_context.h 
linux/include/asm-i386/mmu_context.h
--- v2.4.0-test12/linux/include/asm-i386/mmu_context.h  Fri Sep  8 12:52:41 2000
+++ linux/include/asm-i386/mmu_context.h        Fri Dec 15 18:29:19 2000
@@ -6,11 +6,9 @@
 #include <asm/atomic.h>
 #include <asm/pgalloc.h>
 
-/*
- * possibly do the LDT unload here?
- */
-#define destroy_context(mm)            do { } while(0)
-#define init_new_context(tsk,mm)       0
+/* Segment information */
+extern void destroy_context(struct mm_struct *);
+extern int init_new_context(struct task_struct *, struct mm_struct *);
 
 #ifdef CONFIG_SMP
 
@@ -33,7 +31,7 @@
                /*
                 * Re-load LDT if necessary
                 */
-               if (prev->segments != next->segments)
+               if (prev->context.segments != next->context.segments)
                        load_LDT(next);
 #ifdef CONFIG_SMP
                cpu_tlbstate[cpu].state = TLBSTATE_OK;
diff -u --recursive --new-file v2.4.0-test12/linux/include/asm-i386/processor.h 
linux/include/asm-i386/processor.h
--- v2.4.0-test12/linux/include/asm-i386/processor.h    Mon Dec 11 17:59:45 2000
+++ linux/include/asm-i386/processor.h  Fri Dec 15 18:29:18 2000
@@ -427,11 +427,6 @@
  */
 extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
-/* Copy and release all segment info associated with a VM */
-extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
-extern void release_segments(struct mm_struct * mm);
-extern void forget_segments(void);
-
 /*
  * Return saved PC of a blocked thread.
  */
diff -u --recursive --new-file v2.4.0-test12/linux/include/linux/sched.h 
linux/include/linux/sched.h
--- v2.4.0-test12/linux/include/linux/sched.h   Mon Dec 11 17:59:45 2000
+++ linux/include/linux/sched.h Fri Dec 15 18:29:19 2000
@@ -18,6 +18,7 @@
 #include <asm/semaphore.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
+#include <asm/mmu.h>
 
 #include <linux/smp.h>
 #include <linux/tty.h>
@@ -208,7 +209,6 @@
        int map_count;                          /* number of VMAs */
        struct semaphore mmap_sem;
        spinlock_t page_table_lock;
-       unsigned long context;
        unsigned long start_code, end_code, start_data, end_data;
        unsigned long start_brk, brk, start_stack;
        unsigned long arg_start, arg_end, env_start, env_end;
@@ -217,11 +217,9 @@
        unsigned long cpu_vm_mask;
        unsigned long swap_cnt; /* number of pages to swap on next pass */
        unsigned long swap_address;
-       /*
-        * This is an architecture-specific pointer: the portable
-        * part of Linux does not know about any segments.
-        */
-       void * segments;
+
+       /* Architecture-specific MM context */
+       mm_context_t context;
 };
 
 #define INIT_MM(name) \
@@ -235,7 +233,6 @@
        map_count:      1,                              \
        mmap_sem:       __MUTEX_INITIALIZER(name.mmap_sem), \
        page_table_lock: SPIN_LOCK_UNLOCKED,            \
-       segments:       NULL                            \
 }
 
 struct signal_struct {
diff -u --recursive --new-file v2.4.0-test12/linux/kernel/fork.c linux/kernel/fork.c
--- v2.4.0-test12/linux/kernel/fork.c   Mon Dec 11 17:59:45 2000
+++ linux/kernel/fork.c Fri Dec 15 12:45:58 2000
@@ -133,11 +133,9 @@
        mm->mmap_avl = NULL;
        mm->mmap_cache = NULL;
        mm->map_count = 0;
-       mm->context = 0;
        mm->cpu_vm_mask = 0;
        mm->swap_cnt = 0;
        mm->swap_address = 0;
-       mm->segments = NULL;
        pprev = &mm->mmap;
        for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
                struct file *file;
@@ -319,11 +317,6 @@
        up(&current->mm->mmap_sem);
        if (retval)
                goto free_pt;
-
-       /*
-        * child gets a private LDT (if there was an LDT in the parent)
-        */
-       copy_segments(tsk, mm);
 
        if (init_new_context(tsk,mm))
                goto free_pt;
diff -u --recursive --new-file v2.4.0-test12/linux/mm/mmap.c linux/mm/mmap.c
--- v2.4.0-test12/linux/mm/mmap.c       Mon Dec 11 17:59:45 2000
+++ linux/mm/mmap.c     Fri Dec 15 12:57:54 2000
@@ -885,7 +885,6 @@
 {
        struct vm_area_struct * mpnt;
 
-       release_segments(mm);
        spin_lock(&mm->page_table_lock);
        mpnt = mm->mmap;
        mm->mmap = mm->mmap_avl = mm->mmap_cache = NULL;

Reply via email to