On Fri, 2007-11-02 at 15:28 +0200, Felipe Contreras wrote:
> On 10/22/07, Thayne Harbaugh <[EMAIL PROTECTED]> wrote:
> >
> > On Sat, 2007-10-20 at 21:34 +0100, Thiemo Seufer wrote:

<SNIP>

> > > Please submit this patch (and resend what you think was missed).
> >
> > I'll get them reworked and re-sent soon.
> >
> > Thanks for the interest.
> 
> Do you have anything that can be tried out already?

Try these:

09_arm_eabitls.patch:  This is for TLS on arm.  We have used it
extensively and it appears quite solid.  It should apply to a stock CVS
tree - although we use it on top of about 15 other patches so YMMV.

40_tls.patch: This is more for i386.  It works mostly, but I don't trust
it.  It doesn't apply to a stock CVS tree.

There are a few things that should be reworked on both of these patches.
I haven't sent them up-stream because I'm working to get foundational
patches accepted first.  The sooner I can get the EFAULT patches
accepted then the sooner I can rework these patches and make them more
acceptable to be committed.  Right now I don't think either of them
should be committed (although 09_arm_eabitls.patch is better than
40_tls.patch).

Good luck!
Index: qemu/configure
===================================================================
--- qemu.orig/configure	2007-10-15 13:52:07.000000000 -0600
+++ qemu/configure	2007-10-15 13:52:38.000000000 -0600
@@ -102,6 +102,7 @@
 darwin_user="no"
 build_docs="no"
 uname_release=""
+nptl="yes"
 
 # OS specific
 targetos=`uname -s`
@@ -303,6 +304,8 @@
         *)     echo "undefined SPARC architecture. Exiting";exit 1;;
       esac
   ;;
+  --disable-nptl) nptl="no"
+  ;;
   esac
 done
 
@@ -388,6 +391,7 @@
 echo "  --disable-linux-user     disable all linux usermode emulation targets"
 echo "  --enable-darwin-user     enable all darwin usermode emulation targets"
 echo "  --disable-darwin-user    disable all darwin usermode emulation targets"
+echo "  --disable-nptl           disable usermode NPTL guest support"
 echo "  --fmod-lib               path to FMOD library"
 echo "  --fmod-inc               path to FMOD includes"
 echo "  --enable-uname-release=R Return R for uname -r in usermode emulation"
@@ -554,6 +558,23 @@
 }
 EOF
 
+# check NPTL support
+cat > $TMPC <<EOF
+#include <sched.h>
+void foo()
+{
+#ifndef CLONE_SETTLS
+#error bork
+#endif
+}
+EOF
+
+if $cc -c -o $TMPO $TMPC 2> /dev/null ; then
+  :
+else
+   nptl="no"
+fi
+
 ##########################################
 # SDL probe
 
@@ -717,6 +738,7 @@
 echo "Documentation     $build_docs"
 [ ! -z "$uname_release" ] && \
 echo "uname -r          $uname_release"
+echo "NPTL support      $nptl"
 
 if test $sdl_too_old = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -1122,6 +1144,14 @@
             echo "SDL_CFLAGS=`$sdl_config --cflags`" >> $config_mak
         fi
     fi
+else
+    if test "$nptl" = "yes" ; then
+        case "$target_cpu" in
+          arm | armeb | ppc | ppc64)
+            echo "#define USE_NPTL 1" >> $config_h
+          ;;
+        esac
+    fi
 fi
 
 if test "$cocoa" = "yes" ; then
Index: qemu/exec-all.h
===================================================================
--- qemu.orig/exec-all.h	2007-10-15 13:52:07.000000000 -0600
+++ qemu/exec-all.h	2007-10-15 13:52:32.000000000 -0600
@@ -391,170 +391,7 @@
 extern CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
 extern void *io_mem_opaque[IO_MEM_NB_ENTRIES];
 
-#if defined(__powerpc__)
-static inline int testandset (int *p)
-{
-    int ret;
-    __asm__ __volatile__ (
-                          "0:    lwarx %0,0,%1\n"
-                          "      xor. %0,%3,%0\n"
-                          "      bne 1f\n"
-                          "      stwcx. %2,0,%1\n"
-                          "      bne- 0b\n"
-                          "1:    "
-                          : "=&r" (ret)
-                          : "r" (p), "r" (1), "r" (0)
-                          : "cr0", "memory");
-    return ret;
-}
-#elif defined(__i386__)
-static inline int testandset (int *p)
-{
-    long int readval = 0;
-
-    __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
-                          : "+m" (*p), "+a" (readval)
-                          : "r" (1)
-                          : "cc");
-    return readval;
-}
-#elif defined(__x86_64__)
-static inline int testandset (int *p)
-{
-    long int readval = 0;
-
-    __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
-                          : "+m" (*p), "+a" (readval)
-                          : "r" (1)
-                          : "cc");
-    return readval;
-}
-#elif defined(__s390__)
-static inline int testandset (int *p)
-{
-    int ret;
-
-    __asm__ __volatile__ ("0: cs    %0,%1,0(%2)\n"
-			  "   jl    0b"
-			  : "=&d" (ret)
-			  : "r" (1), "a" (p), "0" (*p)
-			  : "cc", "memory" );
-    return ret;
-}
-#elif defined(__alpha__)
-static inline int testandset (int *p)
-{
-    int ret;
-    unsigned long one;
-
-    __asm__ __volatile__ ("0:	mov 1,%2\n"
-			  "	ldl_l %0,%1\n"
-			  "	stl_c %2,%1\n"
-			  "	beq %2,1f\n"
-			  ".subsection 2\n"
-			  "1:	br 0b\n"
-			  ".previous"
-			  : "=r" (ret), "=m" (*p), "=r" (one)
-			  : "m" (*p));
-    return ret;
-}
-#elif defined(__sparc__)
-static inline int testandset (int *p)
-{
-	int ret;
-
-	__asm__ __volatile__("ldstub	[%1], %0"
-			     : "=r" (ret)
-			     : "r" (p)
-			     : "memory");
-
-	return (ret ? 1 : 0);
-}
-#elif defined(__arm__)
-static inline int testandset (int *spinlock)
-{
-    register unsigned int ret;
-    __asm__ __volatile__("swp %0, %1, [%2]"
-                         : "=r"(ret)
-                         : "0"(1), "r"(spinlock));
-
-    return ret;
-}
-#elif defined(__mc68000)
-static inline int testandset (int *p)
-{
-    char ret;
-    __asm__ __volatile__("tas %1; sne %0"
-                         : "=r" (ret)
-                         : "m" (p)
-                         : "cc","memory");
-    return ret;
-}
-#elif defined(__ia64)
-
-#include <ia64intrin.h>
-
-static inline int testandset (int *p)
-{
-    return __sync_lock_test_and_set (p, 1);
-}
-#elif defined(__mips__)
-static inline int testandset (int *p)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-	"	.set push		\n"
-	"	.set noat		\n"
-	"	.set mips2		\n"
-	"1:	li	$1, 1		\n"
-	"	ll	%0, %1		\n"
-	"	sc	$1, %1		\n"
-	"	beqz	$1, 1b		\n"
-	"	.set pop		"
-	: "=r" (ret), "+R" (*p)
-	:
-	: "memory");
-
-    return ret;
-}
-#else
-#error unimplemented CPU support
-#endif
-
-typedef int spinlock_t;
-
-#define SPIN_LOCK_UNLOCKED 0
-
-#if defined(CONFIG_USER_ONLY)
-static inline void spin_lock(spinlock_t *lock)
-{
-    while (testandset(lock));
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
-    *lock = 0;
-}
-
-static inline int spin_trylock(spinlock_t *lock)
-{
-    return !testandset(lock);
-}
-#else
-static inline void spin_lock(spinlock_t *lock)
-{
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
-}
-
-static inline int spin_trylock(spinlock_t *lock)
-{
-    return 1;
-}
-#endif
+#include "spinlock.h"
 
 extern spinlock_t tb_lock;
 
Index: qemu/linux-user/arm/syscall.h
===================================================================
--- qemu.orig/linux-user/arm/syscall.h	2007-10-15 13:52:07.000000000 -0600
+++ qemu/linux-user/arm/syscall.h	2007-10-15 13:52:13.000000000 -0600
@@ -28,7 +28,9 @@
 #define ARM_SYSCALL_BASE	0x900000
 #define ARM_THUMB_SYSCALL	0
 
-#define ARM_NR_cacheflush (ARM_SYSCALL_BASE + 0xf0000 + 2)
+#define ARM_NR_BASE       0xf0000
+#define ARM_NR_cacheflush (ARM_NR_BASE + 2)
+#define ARM_NR_set_tls    (ARM_NR_BASE + 5)
 
 #define ARM_NR_semihosting	  0x123456
 #define ARM_NR_thumb_semihosting  0xAB
Index: qemu/linux-user/main.c
===================================================================
--- qemu.orig/linux-user/main.c	2007-10-15 13:52:13.000000000 -0600
+++ qemu/linux-user/main.c	2007-10-15 13:52:41.000000000 -0600
@@ -325,6 +325,50 @@
     }
 }
 
+/* Handle a jump to the kernel code page.  */
+static int
+do_kernel_trap(CPUARMState *env)
+{
+    uint32_t addr;
+    uint32_t *ptr;
+    uint32_t cpsr;
+
+    switch (env->regs[15]) {
+    case 0xffff0fc0: /* __kernel_cmpxchg */
+        /* XXX: This only works between threads, not between processes.
+           Use native atomic operations.  */
+        /* ??? This probably breaks horribly if the access segfaults.  */
+        cpu_lock();
+        ptr = (uint32_t *)env->regs[2];
+        cpsr = cpsr_read(env);
+        if (*ptr == env->regs[0]) {
+            *ptr = env->regs[1];
+            env->regs[0] = 0;
+            cpsr |= CPSR_C;
+        } else {
+            env->regs[0] = -1;
+            cpsr &= ~CPSR_C;
+        }
+        cpsr_write(env, cpsr, CPSR_C);
+        cpu_unlock();
+        break;
+    case 0xffff0fe0: /* __kernel_get_tls */
+        env->regs[0] = env->cp15.c13_tls;
+        break;
+    default:
+        return 1;
+    }
+    /* Jump back to the caller.  */
+    addr = env->regs[14];
+    if (addr & 1) {
+        env->thumb = 1;
+        addr &= ~1;
+    }
+    env->regs[15] = addr;
+
+    return 0;
+}
+
 void cpu_loop(CPUARMState *env)
 {
     int trapnr;
@@ -381,10 +425,8 @@
                     }
                 }
 
-                if (n == ARM_NR_cacheflush) {
-                    arm_cache_flush(env->regs[0], env->regs[1]);
-                } else if (n == ARM_NR_semihosting
-                           || n == ARM_NR_thumb_semihosting) {
+                if (n == ARM_NR_semihosting
+                    || n == ARM_NR_thumb_semihosting) {
                     env->regs[0] = do_arm_semihosting (env);
                 } else if (n == 0 || n >= ARM_SYSCALL_BASE
                            || (env->thumb && n == ARM_THUMB_SYSCALL)) {
@@ -395,6 +437,26 @@
                         n -= ARM_SYSCALL_BASE;
                         env->eabi = 0;
                     }
+                    if (n > ARM_NR_BASE) {
+                        switch (n)
+                          {
+                          case ARM_NR_cacheflush:
+                              arm_cache_flush(env->regs[0], env->regs[1]);
+                              break;
+#ifdef USE_NPTL
+                          case ARM_NR_set_tls:
+                              cpu_set_tls(env, env->regs[0]);
+                              env->regs[0] = 0;
+                              break;
+#endif
+                          default:
+                              printf ("Error: Bad syscall: %x\n", n);
+                              env->regs[0] = -TARGET_ENOSYS;
+                              goto error;
+                          }
+                      }
+                    else
+                      {
                     env->regs[0] = do_syscall(env,
                                               n,
                                               env->regs[0],
@@ -403,7 +465,9 @@
                                               env->regs[3],
                                               env->regs[4],
                                               env->regs[5]);
+                      }
                 } else {
+                    printf ("Error: Bad syscall: %x\n", n);
                     goto error;
                 }
             }
@@ -441,6 +505,10 @@
                   }
             }
             break;
+        case EXCP_KERNEL_TRAP:
+            if (do_kernel_trap(env))
+              goto error;
+            break;
         default:
         error:
             fprintf(stderr, "qemu: unhandled CPU exception 0x%x - aborting\n",
@@ -2069,6 +2137,10 @@
         for(i = 0; i < 16; i++) {
             env->regs[i] = regs->uregs[i];
         }
+        /* Register the magic kernel code page.  The cpu will generate a
+           special exception when it tries to execute code here.  We can't
+           put real code here because it may be in use by the host kernel.  */
+        page_set_flags(0xffff0000, 0xffff0fff, 0);
     }
 #elif defined(TARGET_SPARC)
     {
Index: qemu/linux-user/qemu.h
===================================================================
--- qemu.orig/linux-user/qemu.h	2007-10-15 13:52:13.000000000 -0600
+++ qemu/linux-user/qemu.h	2007-10-15 13:52:32.000000000 -0600
@@ -84,6 +84,9 @@
 #endif
     int used; /* non zero if used */
     struct image_info *info;
+#ifdef USE_NPTL
+    uint32_t *child_tidptr;
+#endif
     uint8_t stack[0];
 } __attribute__((aligned(16))) TaskState;
 
Index: qemu/linux-user/syscall.c
===================================================================
--- qemu.orig/linux-user/syscall.c	2007-10-15 13:52:13.000000000 -0600
+++ qemu/linux-user/syscall.c	2007-10-15 13:52:39.000000000 -0600
@@ -70,9 +70,18 @@
 #include <linux/kd.h>
 
 #include "qemu.h"
+#include "spinlock.h"
 
 //#define DEBUG
 
+#ifdef USE_NPTL
+#define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \
+    CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)
+#else
+/* XXX: Hardcode the above values.  */
+#define CLONE_NPTL_FLAGS2 0
+#endif
+
 #if defined(TARGET_I386) || defined(TARGET_ARM) || defined(TARGET_SPARC) \
     || defined(TARGET_M68K) || defined(TARGET_SH4)
 /* 16 bit uid wrappers emulation */
@@ -2313,9 +2322,19 @@
    thread/process */
 #define NEW_STACK_SIZE 8192
 
+#ifdef USE_NPTL
+static spinlock_t nptl_lock = SPIN_LOCK_UNLOCKED;
+#endif
+
 static int clone_func(void *arg)
 {
     CPUState *env = arg;
+#ifdef USE_NPTL
+    /* Wait until the parent has finshed initializing the tls state.  */
+    while (!spin_trylock(&nptl_lock))
+        usleep(1);
+    spin_unlock(&nptl_lock);
+#endif
     cpu_loop(env);
     /* never exits */
     return 0;
@@ -2324,11 +2343,19 @@
 /* do_fork() Must return host values and target errnos (unlike most
    do_*() functions). */
-int do_fork(CPUState *env, unsigned int flags, abi_ulong newsp)
+int do_fork(CPUState *env, unsigned int flags, abi_ulong newsp,
+            uint32_t *parent_tidptr, void *newtls,
+            uint32_t *child_tidptr)
 {
     int ret;
     TaskState *ts;
     uint8_t *new_stack;
     CPUState *new_env;
+#ifdef USE_NPTL
+    unsigned int nptl_flags;
+
+    if (flags & CLONE_PARENT_SETTID)
+        *parent_tidptr = gettid();
+#endif
 
     if (flags & CLONE_VM) {
         ts = malloc(sizeof(TaskState) + NEW_STACK_SIZE);
@@ -2396,16 +2423,64 @@
 #error unsupported target CPU
 #endif
         new_env->opaque = ts;
+#ifdef USE_NPTL
+        nptl_flags = flags;
+        flags &= ~CLONE_NPTL_FLAGS2;
+
+        if (nptl_flags & CLONE_CHILD_CLEARTID) {
+            ts->child_tidptr = child_tidptr;
+        }
+
+        if (nptl_flags & CLONE_SETTLS)
+            cpu_set_tls (new_env, newtls);
+
+        /* Grab the global cpu lock so that the thread setup appears
+           atomic.  */
+        if (nptl_flags & CLONE_CHILD_SETTID)
+            spin_lock(&nptl_lock);
+
+#else
+        if (flags & CLONE_NPTL_FLAGS2)
+            return -EINVAL;
+#endif
 #ifdef __ia64__
         ret = __clone2(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env);
 #else
 	ret = clone(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env);
 #endif
+#ifdef USE_NPTL
+        if (ret != -1) {
+            if (nptl_flags & CLONE_CHILD_SETTID)
+                *child_tidptr = ret;
+        }
+
+        /* Allow the child to continue.  */
+        if (nptl_flags & CLONE_CHILD_SETTID)
+            spin_unlock(&nptl_lock);
+#endif
     } else {
         /* if no CLONE_VM, we consider it is a fork */
-        if ((flags & ~CSIGNAL) != 0)
+        if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0)
             return -EINVAL;
         ret = fork();
+#ifdef USE_NPTL
+        /* There is a race condition here.  The parent process could
+           theoretically read the TID in the child process before the child
+           tid is set.  This would require using either ptrace
+           (not implemented) or having *_tidptr to point at a shared memory
+           mapping.  We can't repeat the spinlock hack used above because
+           the child process gets its own copy of the lock.  */
+        if (ret == 0) {
+            /* Child Process.  */
+            if (flags & CLONE_CHILD_SETTID)
+                *child_tidptr = gettid();
+            ts = (TaskState *)env->opaque;
+            if (flags & CLONE_CHILD_CLEARTID)
+                ts->child_tidptr = child_tidptr;
+            if (flags & CLONE_SETTLS)
+                cpu_set_tls (env, newtls);
+        }
+#endif
     }
     return ret;
 }
@@ -2727,7 +2802,7 @@
         ret = do_brk(arg1);
         break;
     case TARGET_NR_fork:
-        ret = get_errno(do_fork(cpu_env, SIGCHLD, 0));
+        ret = get_errno(do_fork(cpu_env, SIGCHLD, 0, NULL, NULL, NULL));
         break;
 #ifdef TARGET_NR_waitpid
     case TARGET_NR_waitpid:
@@ -4170,7 +4245,8 @@
         ret = get_errno(fsync(arg1));
         break;
     case TARGET_NR_clone:
-        ret = get_errno(do_fork(cpu_env, arg1, arg2));
+        ret = get_errno(do_fork(cpu_env, arg1, arg2, (uint32_t *)arg3,
+                                (void *)arg4, (uint32_t *)arg5));
         break;
 #ifdef __NR_exit_group
         /* new thread calls */
@@ -4590,7 +4666,8 @@
 #endif
 #ifdef TARGET_NR_vfork
     case TARGET_NR_vfork:
-        ret = get_errno(do_fork(cpu_env, CLONE_VFORK | CLONE_VM | SIGCHLD, 0));
+        ret = get_errno(do_fork(cpu_env, CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
+                                NULL, NULL, NULL));
         break;
 #endif
 #ifdef TARGET_NR_ugetrlimit
Index: qemu/spinlock.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ qemu/spinlock.h	2007-10-15 13:52:13.000000000 -0600
@@ -0,0 +1,188 @@
+/*
+ * Atomic operation helper include
+ *
+ *  Copyright (c) 2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef SPINLOCK_H
+#define SPINLOCK_H
+
+#if defined(__powerpc__)
+static inline int testandset (int *p)
+{
+    int ret;
+    __asm__ __volatile__ (
+                          "0:    lwarx %0,0,%1\n"
+                          "      xor. %0,%3,%0\n"
+                          "      bne 1f\n"
+                          "      stwcx. %2,0,%1\n"
+                          "      bne- 0b\n"
+                          "1:    "
+                          : "=&r" (ret)
+                          : "r" (p), "r" (1), "r" (0)
+                          : "cr0", "memory");
+    return ret;
+}
+#elif defined(__i386__)
+static inline int testandset (int *p)
+{
+    long int readval = 0;
+
+    __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
+                          : "+m" (*p), "+a" (readval)
+                          : "r" (1)
+                          : "cc");
+    return readval;
+}
+#elif defined(__x86_64__)
+static inline int testandset (int *p)
+{
+    long int readval = 0;
+
+    __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
+                          : "+m" (*p), "+a" (readval)
+                          : "r" (1)
+                          : "cc");
+    return readval;
+}
+#elif defined(__s390__)
+static inline int testandset (int *p)
+{
+    int ret;
+
+    __asm__ __volatile__ ("0: cs    %0,%1,0(%2)\n"
+                          "   jl    0b"
+                          : "=&d" (ret)
+                          : "r" (1), "a" (p), "0" (*p)
+                          : "cc", "memory" );
+    return ret;
+}
+#elif defined(__alpha__)
+static inline int testandset (int *p)
+{
+    int ret;
+    unsigned long one;
+
+    __asm__ __volatile__ ("0:	mov 1,%2\n"
+                          "	ldl_l %0,%1\n"
+                          "	stl_c %2,%1\n"
+                          "	beq %2,1f\n"
+                          ".subsection 2\n"
+                          "1:	br 0b\n"
+                          ".previous"
+                          : "=r" (ret), "=m" (*p), "=r" (one)
+                          : "m" (*p));
+    return ret;
+}
+#elif defined(__sparc__)
+static inline int testandset (int *p)
+{
+        int ret;
+
+        __asm__ __volatile__("ldstub	[%1], %0"
+                             : "=r" (ret)
+                             : "r" (p)
+                             : "memory");
+
+        return (ret ? 1 : 0);
+}
+#elif defined(__arm__)
+static inline int testandset (int *spinlock)
+{
+    register unsigned int ret;
+    __asm__ __volatile__("swp %0, %1, [%2]"
+                         : "=r"(ret)
+                         : "0"(1), "r"(spinlock));
+
+    return ret;
+}
+#elif defined(__mc68000)
+static inline int testandset (int *p)
+{
+    char ret;
+    __asm__ __volatile__("tas %1; sne %0"
+                         : "=r" (ret)
+                         : "m" (p)
+                         : "cc","memory");
+    return ret;
+}
+#elif defined(__ia64)
+
+#include <ia64intrin.h>
+
+static inline int testandset (int *p)
+{
+    return __sync_lock_test_and_set (p, 1);
+}
+#elif defined(__mips__)
+static inline int testandset (int *p)
+{
+    int ret;
+
+    __asm__ __volatile__ (
+        "	.set push		\n"
+        "	.set noat		\n"
+        "	.set mips2		\n"
+        "1:	li	$1, 1		\n"
+        "	ll	%0, %1		\n"
+        "	sc	$1, %1		\n"
+        "	beqz	$1, 1b		\n"
+        "	.set pop		"
+        : "=r" (ret), "+R" (*p)
+        :
+        : "memory");
+
+    return ret;
+}
+#else
+#error unimplemented CPU support
+#endif
+
+typedef int spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED 0
+
+#if defined(CONFIG_USER_ONLY)
+static inline void spin_lock(spinlock_t *lock)
+{
+    while (testandset(lock));
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+    *lock = 0;
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+    return !testandset(lock);
+}
+#else
+static inline void spin_lock(spinlock_t *lock)
+{
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+    return 1;
+}
+#endif
+
+#endif
Index: qemu/target-arm/cpu.h
===================================================================
--- qemu.orig/target-arm/cpu.h	2007-10-15 13:52:07.000000000 -0600
+++ qemu/target-arm/cpu.h	2007-10-15 13:52:13.000000000 -0600
@@ -37,6 +37,7 @@
 #define EXCP_IRQ             5
 #define EXCP_FIQ             6
 #define EXCP_BKPT            7
+#define EXCP_KERNEL_TRAP     8   /* Jumped to kernel code page.  */
 
 typedef void ARMWriteCPFunc(void *opaque, int cp_info,
                             int srcreg, int operand, uint32_t value);
@@ -98,6 +99,7 @@
         uint32_t c9_data;
         uint32_t c13_fcse; /* FCSE PID.  */
         uint32_t c13_context; /* Context ID.  */
+        uint32_t c13_tls; /* Thread ID */
         uint32_t c15_cpar; /* XScale Coprocessor Access Register */
         uint32_t c15_ticonfig; /* TI925T configuration byte.  */
         uint32_t c15_i_max; /* Maximum D-cache dirty line index.  */
@@ -174,6 +176,15 @@
 int cpu_arm_signal_handler(int host_signum, void *pinfo,
                            void *puc);
 
+void cpu_lock(void);
+void cpu_unlock(void);
+#if defined(USE_NPTL)
+static inline void cpu_set_tls(CPUARMState *env, void *newtls)
+{
+  env->cp15.c13_tls = (uint32_t)(long)newtls;
+}
+#endif
+
 #define CPSR_M (0x1f)
 #define CPSR_T (1 << 5)
 #define CPSR_F (1 << 6)
@@ -185,7 +196,11 @@
 #define CPSR_J (1 << 24)
 #define CPSR_IT_0_1 (3 << 25)
 #define CPSR_Q (1 << 27)
-#define CPSR_NZCV (0xf << 28)
+#define CPSR_V (1 << 28)
+#define CPSR_C (1 << 29)
+#define CPSR_Z (1 << 30)
+#define CPSR_N (1 << 31)
+#define CPSR_NZCV (CPSR_N | CPSR_Z | CPSR_C | CPSR_V)
 
 #define CACHED_CPSR_BITS (CPSR_T | CPSR_Q | CPSR_NZCV)
 /* Return the current CPSR value.  */
Index: qemu/target-arm/exec.h
===================================================================
--- qemu.orig/target-arm/exec.h	2007-10-15 13:52:07.000000000 -0600
+++ qemu/target-arm/exec.h	2007-10-15 13:52:13.000000000 -0600
@@ -68,8 +68,6 @@
 
 /* In op_helper.c */
 
-void cpu_lock(void);
-void cpu_unlock(void);
 void helper_set_cp(CPUState *, uint32_t, uint32_t);
 uint32_t helper_get_cp(CPUState *, uint32_t);
 void helper_set_cp15(CPUState *, uint32_t, uint32_t);
Index: qemu/target-arm/op.c
===================================================================
--- qemu.orig/target-arm/op.c	2007-10-15 13:52:07.000000000 -0600
+++ qemu/target-arm/op.c	2007-10-15 13:52:13.000000000 -0600
@@ -891,6 +891,12 @@
     cpu_loop_exit();
 }
 
+void OPPROTO op_kernel_trap(void)
+{
+    env->exception_index = EXCP_KERNEL_TRAP;
+    cpu_loop_exit();
+}
+
 /* VFP support.  We follow the convention used for VFP instrunctions:
    Single precition routines have a "s" suffix, double precision a
    "d" suffix.  */
Index: qemu/target-arm/op_mem.h
===================================================================
--- qemu.orig/target-arm/op_mem.h	2007-10-15 13:52:07.000000000 -0600
+++ qemu/target-arm/op_mem.h	2007-10-15 13:52:13.000000000 -0600
@@ -1,5 +1,6 @@
 /* ARM memory operations.  */
 
+void helper_ld(uint32_t);
 /* Load from address T1 into T0.  */
 #define MEM_LD_OP(name) \
 void OPPROTO glue(op_ld##name,MEMSUFFIX)(void) \
Index: qemu/target-arm/translate.c
===================================================================
--- qemu.orig/target-arm/translate.c	2007-10-15 13:52:07.000000000 -0600
+++ qemu/target-arm/translate.c	2007-10-15 13:52:13.000000000 -0600
@@ -3555,6 +3555,15 @@
     nb_gen_labels = 0;
     lj = -1;
     do {
+#ifdef CONFIG_USER_ONLY
+        /* Intercept jump to the magic kernel page.  */
+        if (dc->pc > 0xffff0000) {
+            gen_op_kernel_trap();
+            dc->is_jmp = DISAS_UPDATE;
+            break;
+        }
+#endif
+
         if (env->nb_breakpoints > 0) {
             for(j = 0; j < env->nb_breakpoints; j++) {
                 if (env->breakpoints[j] == dc->pc) {
Index: qemu/arm.ld
===================================================================
--- qemu.orig/arm.ld	2007-10-15 13:52:07.000000000 -0600
+++ qemu/arm.ld	2007-10-15 13:52:13.000000000 -0600
@@ -26,6 +26,10 @@
     { *(.rel.rodata) *(.rel.gnu.linkonce.r*) }
   .rela.rodata   :
     { *(.rela.rodata) *(.rela.gnu.linkonce.r*) }
+  .rel.tdata     : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) }
+  .rela.tdata    : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+  .rel.tbss      : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) }
+  .rela.tbss     : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
   .rel.got       : { *(.rel.got)		}
   .rela.got      : { *(.rela.got)		}
   .rel.ctors     : { *(.rel.ctors)	}
Index: qemu/target-ppc/cpu.h
===================================================================
--- qemu.orig/target-ppc/cpu.h	2007-10-15 13:52:07.000000000 -0600
+++ qemu/target-ppc/cpu.h	2007-10-15 13:52:13.000000000 -0600
@@ -589,6 +589,12 @@
 void do_interrupt (CPUPPCState *env);
 void ppc_hw_interrupt (CPUPPCState *env);
 void cpu_loop_exit (void);
+#if defined(USE_NPTL)
+static inline void cpu_set_tls(CPUPPCState *env, void *newtls)
+{
+  env->gpr[2] = (uint32_t)(long)newtls;
+}
+#endif
 
 void dump_stack (CPUPPCState *env);
 
Index: qemu/linux-user/main.c
===================================================================
--- qemu.orig/linux-user/main.c	2007-10-15 13:52:13.000000000 -0600
+++ qemu/linux-user/main.c	2007-10-15 13:52:29.000000000 -0600
@@ -156,7 +156,7 @@
     p[1] = tswapl(e2);
 }
 
-uint64_t gdt_table[6];
+uint64_t gdt_table[9];
 uint64_t idt_table[256];
 
 /* only dpl matters as we do only user space emulation */
Index: qemu/linux-user/syscall.c
===================================================================
--- qemu.orig/linux-user/syscall.c	2007-10-15 13:52:13.000000000 -0600
+++ qemu/linux-user/syscall.c	2007-10-15 13:52:30.000000000 -0600
@@ -183,6 +183,7 @@
 #define __NR_sys_symlinkat __NR_symlinkat
 #define __NR_sys_syslog __NR_syslog
 #define __NR_sys_tgkill __NR_tgkill
+#define __NR_sys_clone __NR_clone
 #define __NR_sys_tkill __NR_tkill
 #define __NR_sys_unlinkat __NR_unlinkat
 #define __NR_sys_utimensat __NR_utimensat
@@ -258,6 +259,7 @@
 #if defined(TARGET_NR_tgkill) && defined(__NR_tgkill)
 _syscall3(int,sys_tgkill,int,tgid,int,pid,int,sig)
 #endif
+_syscall5(int,sys_clone, int, flags, void *, child_stack, int *, parent_tidptr, struct user_desc *, newtls, int *, child_tidptr)
 #if defined(TARGET_NR_tkill) && defined(__NR_tkill)
 _syscall2(int,sys_tkill,int,tid,int,sig)
 #endif
@@ -2416,6 +2418,81 @@
     return ret;
 }
 
+int do_set_thread_area(CPUX86State *env, target_ulong ptr)
+{
+    uint64_t *gdt_table = g2h(env->gdt.base);
+    struct target_modify_ldt_ldt_s ldt_info;
+    struct target_modify_ldt_ldt_s *target_ldt_info;
+    int seg_32bit, contents, read_exec_only, limit_in_pages;
+    int seg_not_present, useable;
+    uint32_t *lp, entry_1, entry_2;
+    int i;
+    SegmentCache *sc = &env->segs[R_GS];
+
+    lock_user_struct(target_ldt_info, ptr, 1);
+    ldt_info.entry_number = tswap32(target_ldt_info->entry_number);
+    ldt_info.base_addr = tswapl(target_ldt_info->base_addr);
+    ldt_info.limit = tswap32(target_ldt_info->limit);
+    ldt_info.flags = tswap32(target_ldt_info->flags);
+    if (ldt_info.entry_number == -1) {
+           for (i=6; i<8; i++)
+                   if (gdt_table[i] == 0) {
+                           ldt_info.entry_number = i;
+                           target_ldt_info->entry_number = tswap32(i);
+                           break;
+                   }
+    }
+    unlock_user_struct(target_ldt_info, ptr, 0);
+
+    if (ldt_info.entry_number < 6 || ldt_info.entry_number > 8)
+           return -EINVAL;
+    seg_32bit = ldt_info.flags & 1;
+    contents = (ldt_info.flags >> 1) & 3;
+    read_exec_only = (ldt_info.flags >> 3) & 1;
+    limit_in_pages = (ldt_info.flags >> 4) & 1;
+    seg_not_present = (ldt_info.flags >> 5) & 1;
+    useable = (ldt_info.flags >> 6) & 1;
+
+    if (contents == 3) {
+        if (seg_not_present == 0)
+            return -EINVAL;
+    }
+
+    /* NOTE: same code as Linux kernel */
+    /* Allow LDTs to be cleared by the user. */
+    if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+        if ((contents == 0             &&
+             read_exec_only == 1       &&
+             seg_32bit == 0            &&
+             limit_in_pages == 0       &&
+             seg_not_present == 1      &&
+             useable == 0 )) {
+            entry_1 = 0;
+            entry_2 = 0;
+            goto install;
+        }
+    }
+
+    entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
+        (ldt_info.limit & 0x0ffff);
+    entry_2 = (ldt_info.base_addr & 0xff000000) |
+        ((ldt_info.base_addr & 0x00ff0000) >> 16) |
+        (ldt_info.limit & 0xf0000) |
+        ((read_exec_only ^ 1) << 9) |
+        (contents << 10) |
+        ((seg_not_present ^ 1) << 15) |
+        (seg_32bit << 22) |
+        (limit_in_pages << 23) |
+       (useable << 20) |
+       0x7000;
+
+    /* Install the new entry ...  */
+install:
+    lp = (uint32_t *)(gdt_table + ldt_info.entry_number);
+    lp[0] = tswap32(entry_1);
+    lp[1] = tswap32(entry_2);
+    return 0;
+}
 #endif /* defined(TARGET_I386) */
 
 /* this stack is the equivalent of the kernel stack associated with a
@@ -2426,40 +2503,62 @@
 static spinlock_t nptl_lock = SPIN_LOCK_UNLOCKED;
 #endif
 
-static int clone_func(void *arg)
+static int clone_func(CPUState *cloneenv)
 {
-    CPUState *env = arg;
 #ifdef USE_NPTL
     /* Wait until the parent has finshed initializing the tls state.  */
     while (!spin_trylock(&nptl_lock))
         usleep(1);
     spin_unlock(&nptl_lock);
 #endif
-    cpu_loop(env);
+    cpu_loop(cloneenv);
     /* never exits */
     return 0;
 }
 
+#ifdef __ia64__
+#define clone(...) __clone2(__VA_ARGS__)
+#endif
+
 /* do_fork() Must return host values and target errnos (unlike most
  * do_*() functions).
  */
 int do_fork(CPUState *env, unsigned int flags, target_ulong newsp,
-            uint32_t *parent_tidptr, void *newtls,
-            uint32_t *child_tidptr)
+            target_ulong parent_tidptr, target_ulong newtls,
+            target_ulong child_tidptr)
 {
     int ret;
     TaskState *ts;
     uint8_t *new_stack;
     CPUState *new_env;
 #ifdef USE_NPTL
+    unsigned long parent_tid=gettid();
+#if defined(TARGET_I386)
+    uint64_t *new_gdt_table;
+#endif
     unsigned int nptl_flags;
 
-    if (flags & CLONE_PARENT_SETTID)
-        *parent_tidptr = gettid();
+    /* check for invalid combinations */
+    if (((flags & CLONE_PARENT_SETTID) && !parent_tidptr)
+        || ((flags & CLONE_CHILD_SETTID) && !child_tidptr))
+        return -EINVAL;
+
+    if (flags & CLONE_CHILD_SETTID
+        && !access_ok(VERIFY_WRITE, child_tidptr, sizeof(target_ulong)))
+        return -EFAULT;
+
+    if (flags & CLONE_PARENT_SETTID
+        && !access_ok(VERIFY_WRITE, parent_tidptr, sizeof(target_ulong))) {
+        return -EFAULT;
+        if (flags & CLONE_PARENT_SETTID)
+            tput32(parent_tidptr, parent_tid);
+    }
 #endif
 
     if (flags & CLONE_VM) {
         ts = malloc(sizeof(TaskState) + NEW_STACK_SIZE);
+        if (!ts)
+            return -ENOMEM;
         memset(ts, 0, sizeof(TaskState));
         new_stack = ts->stack;
         ts->used = 1;
@@ -2471,6 +2570,31 @@
 #if defined(TARGET_I386)
         if (!newsp)
             newsp = env->regs[R_ESP];
+#ifdef USE_NPTL
+        new_gdt_table = malloc(9 * 8);
+        if (!new_gdt_table) {
+                free(ts);
+                free(new_env);
+                return -ENOMEM;
+        }
+        /* Copy main GDT table from parent, but clear TLS entries */
+        memcpy(new_gdt_table, g2h(env->gdt.base), 6 * 8);
+        memset(&new_gdt_table[6], 0, 3 * 8);
+        new_env->gdt.base = h2g(new_gdt_table);
+        if (flags & CLONE_SETTLS) {
+               ret = do_set_thread_area(new_env, newtls);
+               if (ret) {
+                       free(ts);
+                       free(new_env);
+                       free(new_gdt_table);
+                       return ret;
+               }
+        }
+#endif /* USE_NPTL */
+
+        cpu_x86_load_seg(new_env, R_FS, new_env->segs[R_FS].selector);
+        cpu_x86_load_seg(new_env, R_GS, new_env->segs[R_GS].selector);
+
         new_env->regs[R_ESP] = newsp;
         new_env->regs[R_EAX] = 0;
 #elif defined(TARGET_ARM)
@@ -2540,18 +2664,22 @@
             spin_lock(&nptl_lock);
 
 #else
-        if (flags & CLONE_NPTL_FLAGS2)
+        if (flags & CLONE_NPTL_FLAGS2) {
+            free(ts);
+            free(new_env);
+#ifdef USE_NPTL
+            free(new_gdt_table);
+#endif
             return -EINVAL;
+	}
 #endif
-#ifdef __ia64__
-        ret = __clone2(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env);
-#else
 	ret = clone(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env);
-#endif
 #ifdef USE_NPTL
-        if (ret != -1) {
-            if (nptl_flags & CLONE_CHILD_SETTID)
-                *child_tidptr = ret;
+        if (ret != -1 && nptl_flags & CLONE_CHILD_SETTID) {
+            if (ret==0) // only in client memory for fork()
+                tput32(child_tidptr, gettid());
+            else if (flags & CLONE_VM) // real threads need it too
+                tput32(child_tidptr, ret);
         }
 
         /* Allow the child to continue.  */
@@ -2562,7 +2690,7 @@
         /* if no CLONE_VM, we consider it is a fork */
         if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0)
             return -EINVAL;
-        ret = fork();
+        ret = sys_clone(flags & ~CLONE_NPTL_FLAGS2, 0, g2h(parent_tidptr), NULL, g2h(child_tidptr));
 #ifdef USE_NPTL
         /* There is a race condition here.  The parent process could
            theoretically read the TID in the child process before the child
@@ -2570,10 +2698,13 @@
            (not implemented) or having *_tidptr to point at a shared memory
            mapping.  We can't repeat the spinlock hack used above because
            the child process gets its own copy of the lock.  */
+        if (flags & CLONE_CHILD_SETTID) {
+            if (ret == 0)
+                tput32(child_tidptr, gettid());
+            else if (flags & CLONE_VM)
+                tput32(child_tidptr, ret);
+        }
         if (ret == 0) {
-            /* Child Process.  */
-            if (flags & CLONE_CHILD_SETTID)
-                *child_tidptr = gettid();
             ts = (TaskState *)env->opaque;
             if (flags & CLONE_CHILD_CLEARTID)
                 ts->child_tidptr = child_tidptr;
@@ -2585,6 +2716,10 @@
     return ret;
 }
 
+#ifdef __ia64__
+#undef clone
+#endif
+
 static target_long do_fcntl(int fd, int cmd, target_ulong arg)
 {
     struct flock fl;
@@ -2940,7 +3075,7 @@
         _mcleanup();
 #endif
         gdb_exit(cpu_env, arg1);
-        /* XXX: should free thread stack and CPU env */
+        /* XXX: should free thread stack, GDT and CPU env */
         _exit(arg1);
         ret = 0; /* avoid warning */
         break;
@@ -5420,12 +5555,12 @@
 #ifdef TARGET_NR_set_thread_area
     case TARGET_NR_set_thread_area:
 #ifdef TARGET_MIPS
-      ((CPUMIPSState *) cpu_env)->tls_value = arg1;
-      ret = 0;
-      break;
+        ((CPUMIPSState *) cpu_env)->tls_value = arg1;
+        ret = 0;
 #else
-      goto unimplemented_nowarn;
+        ret = get_errno(do_set_thread_area(cpu_env, arg1));
 #endif
+        break;
 #endif
 #ifdef TARGET_NR_get_thread_area
     case TARGET_NR_get_thread_area:
Index: qemu/configure
===================================================================
--- qemu.orig/configure	2007-10-15 13:52:13.000000000 -0600
+++ qemu/configure	2007-10-15 13:52:13.000000000 -0600
@@ -1153,7 +1153,7 @@
 else
     if test "$nptl" = "yes" ; then
         case "$target_cpu" in
-          arm | armeb | ppc | ppc64)
+          arm | armeb | i386 | ppc | ppc64)
             echo "#define USE_NPTL 1" >> $config_h
           ;;
         esac
Index: qemu/target-i386/cpu.h
===================================================================
--- qemu.orig/target-i386/cpu.h	2007-10-15 13:52:06.000000000 -0600
+++ qemu/target-i386/cpu.h	2007-10-15 13:52:13.000000000 -0600
@@ -567,6 +567,9 @@
 int cpu_get_pic_interrupt(CPUX86State *s);
 /* MSDOS compatibility mode FPU exception support */
 void cpu_set_ferr(CPUX86State *s);
+#if defined(USE_NPTL)
+#define cpu_set_tls(...) do {} while(0)
+#endif
 
 /* this function must always be used to load data in the segment
    cache: it synchronizes the hflags with the segment cache values */

Reply via email to