On Fri, 2007-11-02 at 15:28 +0200, Felipe Contreras wrote: > On 10/22/07, Thayne Harbaugh <[EMAIL PROTECTED]> wrote: > > > > On Sat, 2007-10-20 at 21:34 +0100, Thiemo Seufer wrote:
<SNIP> > > > Please submit this patch (and resend what you think was missed). > > > > I'll get them reworked and re-sent soon. > > > > Thanks for the interest. > > Do you have anything that can be tried out already? Try these: 09_arm_eabitls.patch: This is for TLS on arm. We have used it extensively and it appears quite solid. It should apply to a stock CVS tree - although we use it on top of about 15 other patches so YMMV. 40_tls.patch: This is more for i386. It works mostly, but I don't trust it. It doesn't apply to a stock CVS tree. There are a few things that should be reworked on both of these patches. I haven't sent them up-stream because I'm working to get foundational patches accepted first. The sooner I can get the EFAULT patches accepted then the sooner I can rework these patches and make them more acceptable to be committed. Right now I don't think either of them should be committed (although 09_arm_eabitls.patch is better than 40_tls.patch). Good luck!
Index: qemu/configure =================================================================== --- qemu.orig/configure 2007-10-15 13:52:07.000000000 -0600 +++ qemu/configure 2007-10-15 13:52:38.000000000 -0600 @@ -102,6 +102,7 @@ darwin_user="no" build_docs="no" uname_release="" +nptl="yes" # OS specific targetos=`uname -s` @@ -303,6 +304,8 @@ *) echo "undefined SPARC architecture. Exiting";exit 1;; esac ;; + --disable-nptl) nptl="no" + ;; esac done @@ -388,6 +391,7 @@ echo " --disable-linux-user disable all linux usermode emulation targets" echo " --enable-darwin-user enable all darwin usermode emulation targets" echo " --disable-darwin-user disable all darwin usermode emulation targets" +echo " --disable-nptl disable usermode NPTL guest support" echo " --fmod-lib path to FMOD library" echo " --fmod-inc path to FMOD includes" echo " --enable-uname-release=R Return R for uname -r in usermode emulation" @@ -554,6 +558,23 @@ } EOF +# check NPTL support +cat > $TMPC <<EOF +#include <sched.h> +void foo() +{ +#ifndef CLONE_SETTLS +#error bork +#endif +} +EOF + +if $cc -c -o $TMPO $TMPC 2> /dev/null ; then + : +else + nptl="no" +fi + ########################################## # SDL probe @@ -717,6 +738,7 @@ echo "Documentation $build_docs" [ ! -z "$uname_release" ] && \ echo "uname -r $uname_release" +echo "NPTL support $nptl" if test $sdl_too_old = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -1122,6 +1144,14 @@ echo "SDL_CFLAGS=`$sdl_config --cflags`" >> $config_mak fi fi +else + if test "$nptl" = "yes" ; then + case "$target_cpu" in + arm | armeb | ppc | ppc64) + echo "#define USE_NPTL 1" >> $config_h + ;; + esac + fi fi if test "$cocoa" = "yes" ; then Index: qemu/exec-all.h =================================================================== --- qemu.orig/exec-all.h 2007-10-15 13:52:07.000000000 -0600 +++ qemu/exec-all.h 2007-10-15 13:52:32.000000000 -0600 @@ -391,170 +391,7 @@ extern CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4]; extern void *io_mem_opaque[IO_MEM_NB_ENTRIES]; -#if defined(__powerpc__) -static inline int testandset (int *p) -{ - int ret; - __asm__ __volatile__ ( - "0: lwarx %0,0,%1\n" - " xor. %0,%3,%0\n" - " bne 1f\n" - " stwcx. %2,0,%1\n" - " bne- 0b\n" - "1: " - : "=&r" (ret) - : "r" (p), "r" (1), "r" (0) - : "cr0", "memory"); - return ret; -} -#elif defined(__i386__) -static inline int testandset (int *p) -{ - long int readval = 0; - - __asm__ __volatile__ ("lock; cmpxchgl %2, %0" - : "+m" (*p), "+a" (readval) - : "r" (1) - : "cc"); - return readval; -} -#elif defined(__x86_64__) -static inline int testandset (int *p) -{ - long int readval = 0; - - __asm__ __volatile__ ("lock; cmpxchgl %2, %0" - : "+m" (*p), "+a" (readval) - : "r" (1) - : "cc"); - return readval; -} -#elif defined(__s390__) -static inline int testandset (int *p) -{ - int ret; - - __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n" - " jl 0b" - : "=&d" (ret) - : "r" (1), "a" (p), "0" (*p) - : "cc", "memory" ); - return ret; -} -#elif defined(__alpha__) -static inline int testandset (int *p) -{ - int ret; - unsigned long one; - - __asm__ __volatile__ ("0: mov 1,%2\n" - " ldl_l %0,%1\n" - " stl_c %2,%1\n" - " beq %2,1f\n" - ".subsection 2\n" - "1: br 0b\n" - ".previous" - : "=r" (ret), "=m" (*p), "=r" (one) - : "m" (*p)); - return ret; -} -#elif defined(__sparc__) -static inline int testandset (int *p) -{ - int ret; - - __asm__ __volatile__("ldstub [%1], %0" - : "=r" (ret) - : "r" (p) - : "memory"); - - return (ret ? 1 : 0); -} -#elif defined(__arm__) -static inline int testandset (int *spinlock) -{ - register unsigned int ret; - __asm__ __volatile__("swp %0, %1, [%2]" - : "=r"(ret) - : "0"(1), "r"(spinlock)); - - return ret; -} -#elif defined(__mc68000) -static inline int testandset (int *p) -{ - char ret; - __asm__ __volatile__("tas %1; sne %0" - : "=r" (ret) - : "m" (p) - : "cc","memory"); - return ret; -} -#elif defined(__ia64) - -#include <ia64intrin.h> - -static inline int testandset (int *p) -{ - return __sync_lock_test_and_set (p, 1); -} -#elif defined(__mips__) -static inline int testandset (int *p) -{ - int ret; - - __asm__ __volatile__ ( - " .set push \n" - " .set noat \n" - " .set mips2 \n" - "1: li $1, 1 \n" - " ll %0, %1 \n" - " sc $1, %1 \n" - " beqz $1, 1b \n" - " .set pop " - : "=r" (ret), "+R" (*p) - : - : "memory"); - - return ret; -} -#else -#error unimplemented CPU support -#endif - -typedef int spinlock_t; - -#define SPIN_LOCK_UNLOCKED 0 - -#if defined(CONFIG_USER_ONLY) -static inline void spin_lock(spinlock_t *lock) -{ - while (testandset(lock)); -} - -static inline void spin_unlock(spinlock_t *lock) -{ - *lock = 0; -} - -static inline int spin_trylock(spinlock_t *lock) -{ - return !testandset(lock); -} -#else -static inline void spin_lock(spinlock_t *lock) -{ -} - -static inline void spin_unlock(spinlock_t *lock) -{ -} - -static inline int spin_trylock(spinlock_t *lock) -{ - return 1; -} -#endif +#include "spinlock.h" extern spinlock_t tb_lock; Index: qemu/linux-user/arm/syscall.h =================================================================== --- qemu.orig/linux-user/arm/syscall.h 2007-10-15 13:52:07.000000000 -0600 +++ qemu/linux-user/arm/syscall.h 2007-10-15 13:52:13.000000000 -0600 @@ -28,7 +28,9 @@ #define ARM_SYSCALL_BASE 0x900000 #define ARM_THUMB_SYSCALL 0 -#define ARM_NR_cacheflush (ARM_SYSCALL_BASE + 0xf0000 + 2) +#define ARM_NR_BASE 0xf0000 +#define ARM_NR_cacheflush (ARM_NR_BASE + 2) +#define ARM_NR_set_tls (ARM_NR_BASE + 5) #define ARM_NR_semihosting 0x123456 #define ARM_NR_thumb_semihosting 0xAB Index: qemu/linux-user/main.c =================================================================== --- qemu.orig/linux-user/main.c 2007-10-15 13:52:13.000000000 -0600 +++ qemu/linux-user/main.c 2007-10-15 13:52:41.000000000 -0600 @@ -325,6 +325,50 @@ } } +/* Handle a jump to the kernel code page. */ +static int +do_kernel_trap(CPUARMState *env) +{ + uint32_t addr; + uint32_t *ptr; + uint32_t cpsr; + + switch (env->regs[15]) { + case 0xffff0fc0: /* __kernel_cmpxchg */ + /* XXX: This only works between threads, not between processes. + Use native atomic operations. */ + /* ??? This probably breaks horribly if the access segfaults. */ + cpu_lock(); + ptr = (uint32_t *)env->regs[2]; + cpsr = cpsr_read(env); + if (*ptr == env->regs[0]) { + *ptr = env->regs[1]; + env->regs[0] = 0; + cpsr |= CPSR_C; + } else { + env->regs[0] = -1; + cpsr &= ~CPSR_C; + } + cpsr_write(env, cpsr, CPSR_C); + cpu_unlock(); + break; + case 0xffff0fe0: /* __kernel_get_tls */ + env->regs[0] = env->cp15.c13_tls; + break; + default: + return 1; + } + /* Jump back to the caller. */ + addr = env->regs[14]; + if (addr & 1) { + env->thumb = 1; + addr &= ~1; + } + env->regs[15] = addr; + + return 0; +} + void cpu_loop(CPUARMState *env) { int trapnr; @@ -381,10 +425,8 @@ } } - if (n == ARM_NR_cacheflush) { - arm_cache_flush(env->regs[0], env->regs[1]); - } else if (n == ARM_NR_semihosting - || n == ARM_NR_thumb_semihosting) { + if (n == ARM_NR_semihosting + || n == ARM_NR_thumb_semihosting) { env->regs[0] = do_arm_semihosting (env); } else if (n == 0 || n >= ARM_SYSCALL_BASE || (env->thumb && n == ARM_THUMB_SYSCALL)) { @@ -395,6 +437,26 @@ n -= ARM_SYSCALL_BASE; env->eabi = 0; } + if (n > ARM_NR_BASE) { + switch (n) + { + case ARM_NR_cacheflush: + arm_cache_flush(env->regs[0], env->regs[1]); + break; +#ifdef USE_NPTL + case ARM_NR_set_tls: + cpu_set_tls(env, env->regs[0]); + env->regs[0] = 0; + break; +#endif + default: + printf ("Error: Bad syscall: %x\n", n); + env->regs[0] = -TARGET_ENOSYS; + goto error; + } + } + else + { env->regs[0] = do_syscall(env, n, env->regs[0], @@ -403,7 +465,9 @@ env->regs[3], env->regs[4], env->regs[5]); + } } else { + printf ("Error: Bad syscall: %x\n", n); goto error; } } @@ -441,6 +505,10 @@ } } break; + case EXCP_KERNEL_TRAP: + if (do_kernel_trap(env)) + goto error; + break; default: error: fprintf(stderr, "qemu: unhandled CPU exception 0x%x - aborting\n", @@ -2069,6 +2137,10 @@ for(i = 0; i < 16; i++) { env->regs[i] = regs->uregs[i]; } + /* Register the magic kernel code page. The cpu will generate a + special exception when it tries to execute code here. We can't + put real code here because it may be in use by the host kernel. */ + page_set_flags(0xffff0000, 0xffff0fff, 0); } #elif defined(TARGET_SPARC) { Index: qemu/linux-user/qemu.h =================================================================== --- qemu.orig/linux-user/qemu.h 2007-10-15 13:52:13.000000000 -0600 +++ qemu/linux-user/qemu.h 2007-10-15 13:52:32.000000000 -0600 @@ -84,6 +84,9 @@ #endif int used; /* non zero if used */ struct image_info *info; +#ifdef USE_NPTL + uint32_t *child_tidptr; +#endif uint8_t stack[0]; } __attribute__((aligned(16))) TaskState; Index: qemu/linux-user/syscall.c =================================================================== --- qemu.orig/linux-user/syscall.c 2007-10-15 13:52:13.000000000 -0600 +++ qemu/linux-user/syscall.c 2007-10-15 13:52:39.000000000 -0600 @@ -70,9 +70,18 @@ #include <linux/kd.h> #include "qemu.h" +#include "spinlock.h" //#define DEBUG +#ifdef USE_NPTL +#define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \ + CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID) +#else +/* XXX: Hardcode the above values. */ +#define CLONE_NPTL_FLAGS2 0 +#endif + #if defined(TARGET_I386) || defined(TARGET_ARM) || defined(TARGET_SPARC) \ || defined(TARGET_M68K) || defined(TARGET_SH4) /* 16 bit uid wrappers emulation */ @@ -2313,9 +2322,19 @@ thread/process */ #define NEW_STACK_SIZE 8192 +#ifdef USE_NPTL +static spinlock_t nptl_lock = SPIN_LOCK_UNLOCKED; +#endif + static int clone_func(void *arg) { CPUState *env = arg; +#ifdef USE_NPTL + /* Wait until the parent has finshed initializing the tls state. */ + while (!spin_trylock(&nptl_lock)) + usleep(1); + spin_unlock(&nptl_lock); +#endif cpu_loop(env); /* never exits */ return 0; @@ -2324,11 +2343,19 @@ /* do_fork() Must return host values and target errnos (unlike most do_*() functions). */ -int do_fork(CPUState *env, unsigned int flags, abi_ulong newsp) +int do_fork(CPUState *env, unsigned int flags, abi_ulong newsp, + uint32_t *parent_tidptr, void *newtls, + uint32_t *child_tidptr) { int ret; TaskState *ts; uint8_t *new_stack; CPUState *new_env; +#ifdef USE_NPTL + unsigned int nptl_flags; + + if (flags & CLONE_PARENT_SETTID) + *parent_tidptr = gettid(); +#endif if (flags & CLONE_VM) { ts = malloc(sizeof(TaskState) + NEW_STACK_SIZE); @@ -2396,16 +2423,64 @@ #error unsupported target CPU #endif new_env->opaque = ts; +#ifdef USE_NPTL + nptl_flags = flags; + flags &= ~CLONE_NPTL_FLAGS2; + + if (nptl_flags & CLONE_CHILD_CLEARTID) { + ts->child_tidptr = child_tidptr; + } + + if (nptl_flags & CLONE_SETTLS) + cpu_set_tls (new_env, newtls); + + /* Grab the global cpu lock so that the thread setup appears + atomic. */ + if (nptl_flags & CLONE_CHILD_SETTID) + spin_lock(&nptl_lock); + +#else + if (flags & CLONE_NPTL_FLAGS2) + return -EINVAL; +#endif #ifdef __ia64__ ret = __clone2(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env); #else ret = clone(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env); #endif +#ifdef USE_NPTL + if (ret != -1) { + if (nptl_flags & CLONE_CHILD_SETTID) + *child_tidptr = ret; + } + + /* Allow the child to continue. */ + if (nptl_flags & CLONE_CHILD_SETTID) + spin_unlock(&nptl_lock); +#endif } else { /* if no CLONE_VM, we consider it is a fork */ - if ((flags & ~CSIGNAL) != 0) + if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0) return -EINVAL; ret = fork(); +#ifdef USE_NPTL + /* There is a race condition here. The parent process could + theoretically read the TID in the child process before the child + tid is set. This would require using either ptrace + (not implemented) or having *_tidptr to point at a shared memory + mapping. We can't repeat the spinlock hack used above because + the child process gets its own copy of the lock. */ + if (ret == 0) { + /* Child Process. */ + if (flags & CLONE_CHILD_SETTID) + *child_tidptr = gettid(); + ts = (TaskState *)env->opaque; + if (flags & CLONE_CHILD_CLEARTID) + ts->child_tidptr = child_tidptr; + if (flags & CLONE_SETTLS) + cpu_set_tls (env, newtls); + } +#endif } return ret; } @@ -2727,7 +2802,7 @@ ret = do_brk(arg1); break; case TARGET_NR_fork: - ret = get_errno(do_fork(cpu_env, SIGCHLD, 0)); + ret = get_errno(do_fork(cpu_env, SIGCHLD, 0, NULL, NULL, NULL)); break; #ifdef TARGET_NR_waitpid case TARGET_NR_waitpid: @@ -4170,7 +4245,8 @@ ret = get_errno(fsync(arg1)); break; case TARGET_NR_clone: - ret = get_errno(do_fork(cpu_env, arg1, arg2)); + ret = get_errno(do_fork(cpu_env, arg1, arg2, (uint32_t *)arg3, + (void *)arg4, (uint32_t *)arg5)); break; #ifdef __NR_exit_group /* new thread calls */ @@ -4590,7 +4666,8 @@ #endif #ifdef TARGET_NR_vfork case TARGET_NR_vfork: - ret = get_errno(do_fork(cpu_env, CLONE_VFORK | CLONE_VM | SIGCHLD, 0)); + ret = get_errno(do_fork(cpu_env, CLONE_VFORK | CLONE_VM | SIGCHLD, 0, + NULL, NULL, NULL)); break; #endif #ifdef TARGET_NR_ugetrlimit Index: qemu/spinlock.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ qemu/spinlock.h 2007-10-15 13:52:13.000000000 -0600 @@ -0,0 +1,188 @@ +/* + * Atomic operation helper include + * + * Copyright (c) 2005 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef SPINLOCK_H +#define SPINLOCK_H + +#if defined(__powerpc__) +static inline int testandset (int *p) +{ + int ret; + __asm__ __volatile__ ( + "0: lwarx %0,0,%1\n" + " xor. %0,%3,%0\n" + " bne 1f\n" + " stwcx. %2,0,%1\n" + " bne- 0b\n" + "1: " + : "=&r" (ret) + : "r" (p), "r" (1), "r" (0) + : "cr0", "memory"); + return ret; +} +#elif defined(__i386__) +static inline int testandset (int *p) +{ + long int readval = 0; + + __asm__ __volatile__ ("lock; cmpxchgl %2, %0" + : "+m" (*p), "+a" (readval) + : "r" (1) + : "cc"); + return readval; +} +#elif defined(__x86_64__) +static inline int testandset (int *p) +{ + long int readval = 0; + + __asm__ __volatile__ ("lock; cmpxchgl %2, %0" + : "+m" (*p), "+a" (readval) + : "r" (1) + : "cc"); + return readval; +} +#elif defined(__s390__) +static inline int testandset (int *p) +{ + int ret; + + __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n" + " jl 0b" + : "=&d" (ret) + : "r" (1), "a" (p), "0" (*p) + : "cc", "memory" ); + return ret; +} +#elif defined(__alpha__) +static inline int testandset (int *p) +{ + int ret; + unsigned long one; + + __asm__ __volatile__ ("0: mov 1,%2\n" + " ldl_l %0,%1\n" + " stl_c %2,%1\n" + " beq %2,1f\n" + ".subsection 2\n" + "1: br 0b\n" + ".previous" + : "=r" (ret), "=m" (*p), "=r" (one) + : "m" (*p)); + return ret; +} +#elif defined(__sparc__) +static inline int testandset (int *p) +{ + int ret; + + __asm__ __volatile__("ldstub [%1], %0" + : "=r" (ret) + : "r" (p) + : "memory"); + + return (ret ? 1 : 0); +} +#elif defined(__arm__) +static inline int testandset (int *spinlock) +{ + register unsigned int ret; + __asm__ __volatile__("swp %0, %1, [%2]" + : "=r"(ret) + : "0"(1), "r"(spinlock)); + + return ret; +} +#elif defined(__mc68000) +static inline int testandset (int *p) +{ + char ret; + __asm__ __volatile__("tas %1; sne %0" + : "=r" (ret) + : "m" (p) + : "cc","memory"); + return ret; +} +#elif defined(__ia64) + +#include <ia64intrin.h> + +static inline int testandset (int *p) +{ + return __sync_lock_test_and_set (p, 1); +} +#elif defined(__mips__) +static inline int testandset (int *p) +{ + int ret; + + __asm__ __volatile__ ( + " .set push \n" + " .set noat \n" + " .set mips2 \n" + "1: li $1, 1 \n" + " ll %0, %1 \n" + " sc $1, %1 \n" + " beqz $1, 1b \n" + " .set pop " + : "=r" (ret), "+R" (*p) + : + : "memory"); + + return ret; +} +#else +#error unimplemented CPU support +#endif + +typedef int spinlock_t; + +#define SPIN_LOCK_UNLOCKED 0 + +#if defined(CONFIG_USER_ONLY) +static inline void spin_lock(spinlock_t *lock) +{ + while (testandset(lock)); +} + +static inline void spin_unlock(spinlock_t *lock) +{ + *lock = 0; +} + +static inline int spin_trylock(spinlock_t *lock) +{ + return !testandset(lock); +} +#else +static inline void spin_lock(spinlock_t *lock) +{ +} + +static inline void spin_unlock(spinlock_t *lock) +{ +} + +static inline int spin_trylock(spinlock_t *lock) +{ + return 1; +} +#endif + +#endif Index: qemu/target-arm/cpu.h =================================================================== --- qemu.orig/target-arm/cpu.h 2007-10-15 13:52:07.000000000 -0600 +++ qemu/target-arm/cpu.h 2007-10-15 13:52:13.000000000 -0600 @@ -37,6 +37,7 @@ #define EXCP_IRQ 5 #define EXCP_FIQ 6 #define EXCP_BKPT 7 +#define EXCP_KERNEL_TRAP 8 /* Jumped to kernel code page. */ typedef void ARMWriteCPFunc(void *opaque, int cp_info, int srcreg, int operand, uint32_t value); @@ -98,6 +99,7 @@ uint32_t c9_data; uint32_t c13_fcse; /* FCSE PID. */ uint32_t c13_context; /* Context ID. */ + uint32_t c13_tls; /* Thread ID */ uint32_t c15_cpar; /* XScale Coprocessor Access Register */ uint32_t c15_ticonfig; /* TI925T configuration byte. */ uint32_t c15_i_max; /* Maximum D-cache dirty line index. */ @@ -174,6 +176,15 @@ int cpu_arm_signal_handler(int host_signum, void *pinfo, void *puc); +void cpu_lock(void); +void cpu_unlock(void); +#if defined(USE_NPTL) +static inline void cpu_set_tls(CPUARMState *env, void *newtls) +{ + env->cp15.c13_tls = (uint32_t)(long)newtls; +} +#endif + #define CPSR_M (0x1f) #define CPSR_T (1 << 5) #define CPSR_F (1 << 6) @@ -185,7 +196,11 @@ #define CPSR_J (1 << 24) #define CPSR_IT_0_1 (3 << 25) #define CPSR_Q (1 << 27) -#define CPSR_NZCV (0xf << 28) +#define CPSR_V (1 << 28) +#define CPSR_C (1 << 29) +#define CPSR_Z (1 << 30) +#define CPSR_N (1 << 31) +#define CPSR_NZCV (CPSR_N | CPSR_Z | CPSR_C | CPSR_V) #define CACHED_CPSR_BITS (CPSR_T | CPSR_Q | CPSR_NZCV) /* Return the current CPSR value. */ Index: qemu/target-arm/exec.h =================================================================== --- qemu.orig/target-arm/exec.h 2007-10-15 13:52:07.000000000 -0600 +++ qemu/target-arm/exec.h 2007-10-15 13:52:13.000000000 -0600 @@ -68,8 +68,6 @@ /* In op_helper.c */ -void cpu_lock(void); -void cpu_unlock(void); void helper_set_cp(CPUState *, uint32_t, uint32_t); uint32_t helper_get_cp(CPUState *, uint32_t); void helper_set_cp15(CPUState *, uint32_t, uint32_t); Index: qemu/target-arm/op.c =================================================================== --- qemu.orig/target-arm/op.c 2007-10-15 13:52:07.000000000 -0600 +++ qemu/target-arm/op.c 2007-10-15 13:52:13.000000000 -0600 @@ -891,6 +891,12 @@ cpu_loop_exit(); } +void OPPROTO op_kernel_trap(void) +{ + env->exception_index = EXCP_KERNEL_TRAP; + cpu_loop_exit(); +} + /* VFP support. We follow the convention used for VFP instrunctions: Single precition routines have a "s" suffix, double precision a "d" suffix. */ Index: qemu/target-arm/op_mem.h =================================================================== --- qemu.orig/target-arm/op_mem.h 2007-10-15 13:52:07.000000000 -0600 +++ qemu/target-arm/op_mem.h 2007-10-15 13:52:13.000000000 -0600 @@ -1,5 +1,6 @@ /* ARM memory operations. */ +void helper_ld(uint32_t); /* Load from address T1 into T0. */ #define MEM_LD_OP(name) \ void OPPROTO glue(op_ld##name,MEMSUFFIX)(void) \ Index: qemu/target-arm/translate.c =================================================================== --- qemu.orig/target-arm/translate.c 2007-10-15 13:52:07.000000000 -0600 +++ qemu/target-arm/translate.c 2007-10-15 13:52:13.000000000 -0600 @@ -3555,6 +3555,15 @@ nb_gen_labels = 0; lj = -1; do { +#ifdef CONFIG_USER_ONLY + /* Intercept jump to the magic kernel page. */ + if (dc->pc > 0xffff0000) { + gen_op_kernel_trap(); + dc->is_jmp = DISAS_UPDATE; + break; + } +#endif + if (env->nb_breakpoints > 0) { for(j = 0; j < env->nb_breakpoints; j++) { if (env->breakpoints[j] == dc->pc) { Index: qemu/arm.ld =================================================================== --- qemu.orig/arm.ld 2007-10-15 13:52:07.000000000 -0600 +++ qemu/arm.ld 2007-10-15 13:52:13.000000000 -0600 @@ -26,6 +26,10 @@ { *(.rel.rodata) *(.rel.gnu.linkonce.r*) } .rela.rodata : { *(.rela.rodata) *(.rela.gnu.linkonce.r*) } + .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) } + .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) } + .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) } + .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) } .rel.got : { *(.rel.got) } .rela.got : { *(.rela.got) } .rel.ctors : { *(.rel.ctors) } Index: qemu/target-ppc/cpu.h =================================================================== --- qemu.orig/target-ppc/cpu.h 2007-10-15 13:52:07.000000000 -0600 +++ qemu/target-ppc/cpu.h 2007-10-15 13:52:13.000000000 -0600 @@ -589,6 +589,12 @@ void do_interrupt (CPUPPCState *env); void ppc_hw_interrupt (CPUPPCState *env); void cpu_loop_exit (void); +#if defined(USE_NPTL) +static inline void cpu_set_tls(CPUPPCState *env, void *newtls) +{ + env->gpr[2] = (uint32_t)(long)newtls; +} +#endif void dump_stack (CPUPPCState *env);
Index: qemu/linux-user/main.c =================================================================== --- qemu.orig/linux-user/main.c 2007-10-15 13:52:13.000000000 -0600 +++ qemu/linux-user/main.c 2007-10-15 13:52:29.000000000 -0600 @@ -156,7 +156,7 @@ p[1] = tswapl(e2); } -uint64_t gdt_table[6]; +uint64_t gdt_table[9]; uint64_t idt_table[256]; /* only dpl matters as we do only user space emulation */ Index: qemu/linux-user/syscall.c =================================================================== --- qemu.orig/linux-user/syscall.c 2007-10-15 13:52:13.000000000 -0600 +++ qemu/linux-user/syscall.c 2007-10-15 13:52:30.000000000 -0600 @@ -183,6 +183,7 @@ #define __NR_sys_symlinkat __NR_symlinkat #define __NR_sys_syslog __NR_syslog #define __NR_sys_tgkill __NR_tgkill +#define __NR_sys_clone __NR_clone #define __NR_sys_tkill __NR_tkill #define __NR_sys_unlinkat __NR_unlinkat #define __NR_sys_utimensat __NR_utimensat @@ -258,6 +259,7 @@ #if defined(TARGET_NR_tgkill) && defined(__NR_tgkill) _syscall3(int,sys_tgkill,int,tgid,int,pid,int,sig) #endif +_syscall5(int,sys_clone, int, flags, void *, child_stack, int *, parent_tidptr, struct user_desc *, newtls, int *, child_tidptr) #if defined(TARGET_NR_tkill) && defined(__NR_tkill) _syscall2(int,sys_tkill,int,tid,int,sig) #endif @@ -2416,6 +2418,81 @@ return ret; } +int do_set_thread_area(CPUX86State *env, target_ulong ptr) +{ + uint64_t *gdt_table = g2h(env->gdt.base); + struct target_modify_ldt_ldt_s ldt_info; + struct target_modify_ldt_ldt_s *target_ldt_info; + int seg_32bit, contents, read_exec_only, limit_in_pages; + int seg_not_present, useable; + uint32_t *lp, entry_1, entry_2; + int i; + SegmentCache *sc = &env->segs[R_GS]; + + lock_user_struct(target_ldt_info, ptr, 1); + ldt_info.entry_number = tswap32(target_ldt_info->entry_number); + ldt_info.base_addr = tswapl(target_ldt_info->base_addr); + ldt_info.limit = tswap32(target_ldt_info->limit); + ldt_info.flags = tswap32(target_ldt_info->flags); + if (ldt_info.entry_number == -1) { + for (i=6; i<8; i++) + if (gdt_table[i] == 0) { + ldt_info.entry_number = i; + target_ldt_info->entry_number = tswap32(i); + break; + } + } + unlock_user_struct(target_ldt_info, ptr, 0); + + if (ldt_info.entry_number < 6 || ldt_info.entry_number > 8) + return -EINVAL; + seg_32bit = ldt_info.flags & 1; + contents = (ldt_info.flags >> 1) & 3; + read_exec_only = (ldt_info.flags >> 3) & 1; + limit_in_pages = (ldt_info.flags >> 4) & 1; + seg_not_present = (ldt_info.flags >> 5) & 1; + useable = (ldt_info.flags >> 6) & 1; + + if (contents == 3) { + if (seg_not_present == 0) + return -EINVAL; + } + + /* NOTE: same code as Linux kernel */ + /* Allow LDTs to be cleared by the user. */ + if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { + if ((contents == 0 && + read_exec_only == 1 && + seg_32bit == 0 && + limit_in_pages == 0 && + seg_not_present == 1 && + useable == 0 )) { + entry_1 = 0; + entry_2 = 0; + goto install; + } + } + + entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | + (ldt_info.limit & 0x0ffff); + entry_2 = (ldt_info.base_addr & 0xff000000) | + ((ldt_info.base_addr & 0x00ff0000) >> 16) | + (ldt_info.limit & 0xf0000) | + ((read_exec_only ^ 1) << 9) | + (contents << 10) | + ((seg_not_present ^ 1) << 15) | + (seg_32bit << 22) | + (limit_in_pages << 23) | + (useable << 20) | + 0x7000; + + /* Install the new entry ... */ +install: + lp = (uint32_t *)(gdt_table + ldt_info.entry_number); + lp[0] = tswap32(entry_1); + lp[1] = tswap32(entry_2); + return 0; +} #endif /* defined(TARGET_I386) */ /* this stack is the equivalent of the kernel stack associated with a @@ -2426,40 +2503,62 @@ static spinlock_t nptl_lock = SPIN_LOCK_UNLOCKED; #endif -static int clone_func(void *arg) +static int clone_func(CPUState *cloneenv) { - CPUState *env = arg; #ifdef USE_NPTL /* Wait until the parent has finshed initializing the tls state. */ while (!spin_trylock(&nptl_lock)) usleep(1); spin_unlock(&nptl_lock); #endif - cpu_loop(env); + cpu_loop(cloneenv); /* never exits */ return 0; } +#ifdef __ia64__ +#define clone(...) __clone2(__VA_ARGS__) +#endif + /* do_fork() Must return host values and target errnos (unlike most * do_*() functions). */ int do_fork(CPUState *env, unsigned int flags, target_ulong newsp, - uint32_t *parent_tidptr, void *newtls, - uint32_t *child_tidptr) + target_ulong parent_tidptr, target_ulong newtls, + target_ulong child_tidptr) { int ret; TaskState *ts; uint8_t *new_stack; CPUState *new_env; #ifdef USE_NPTL + unsigned long parent_tid=gettid(); +#if defined(TARGET_I386) + uint64_t *new_gdt_table; +#endif unsigned int nptl_flags; - if (flags & CLONE_PARENT_SETTID) - *parent_tidptr = gettid(); + /* check for invalid combinations */ + if (((flags & CLONE_PARENT_SETTID) && !parent_tidptr) + || ((flags & CLONE_CHILD_SETTID) && !child_tidptr)) + return -EINVAL; + + if (flags & CLONE_CHILD_SETTID + && !access_ok(VERIFY_WRITE, child_tidptr, sizeof(target_ulong))) + return -EFAULT; + + if (flags & CLONE_PARENT_SETTID + && !access_ok(VERIFY_WRITE, parent_tidptr, sizeof(target_ulong))) { + return -EFAULT; + if (flags & CLONE_PARENT_SETTID) + tput32(parent_tidptr, parent_tid); + } #endif if (flags & CLONE_VM) { ts = malloc(sizeof(TaskState) + NEW_STACK_SIZE); + if (!ts) + return -ENOMEM; memset(ts, 0, sizeof(TaskState)); new_stack = ts->stack; ts->used = 1; @@ -2471,6 +2570,31 @@ #if defined(TARGET_I386) if (!newsp) newsp = env->regs[R_ESP]; +#ifdef USE_NPTL + new_gdt_table = malloc(9 * 8); + if (!new_gdt_table) { + free(ts); + free(new_env); + return -ENOMEM; + } + /* Copy main GDT table from parent, but clear TLS entries */ + memcpy(new_gdt_table, g2h(env->gdt.base), 6 * 8); + memset(&new_gdt_table[6], 0, 3 * 8); + new_env->gdt.base = h2g(new_gdt_table); + if (flags & CLONE_SETTLS) { + ret = do_set_thread_area(new_env, newtls); + if (ret) { + free(ts); + free(new_env); + free(new_gdt_table); + return ret; + } + } +#endif /* USE_NPTL */ + + cpu_x86_load_seg(new_env, R_FS, new_env->segs[R_FS].selector); + cpu_x86_load_seg(new_env, R_GS, new_env->segs[R_GS].selector); + new_env->regs[R_ESP] = newsp; new_env->regs[R_EAX] = 0; #elif defined(TARGET_ARM) @@ -2540,18 +2664,22 @@ spin_lock(&nptl_lock); #else - if (flags & CLONE_NPTL_FLAGS2) + if (flags & CLONE_NPTL_FLAGS2) { + free(ts); + free(new_env); +#ifdef USE_NPTL + free(new_gdt_table); +#endif return -EINVAL; + } #endif -#ifdef __ia64__ - ret = __clone2(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env); -#else ret = clone(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env); -#endif #ifdef USE_NPTL - if (ret != -1) { - if (nptl_flags & CLONE_CHILD_SETTID) - *child_tidptr = ret; + if (ret != -1 && nptl_flags & CLONE_CHILD_SETTID) { + if (ret==0) // only in client memory for fork() + tput32(child_tidptr, gettid()); + else if (flags & CLONE_VM) // real threads need it too + tput32(child_tidptr, ret); } /* Allow the child to continue. */ @@ -2562,7 +2690,7 @@ /* if no CLONE_VM, we consider it is a fork */ if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0) return -EINVAL; - ret = fork(); + ret = sys_clone(flags & ~CLONE_NPTL_FLAGS2, 0, g2h(parent_tidptr), NULL, g2h(child_tidptr)); #ifdef USE_NPTL /* There is a race condition here. The parent process could theoretically read the TID in the child process before the child @@ -2570,10 +2698,13 @@ (not implemented) or having *_tidptr to point at a shared memory mapping. We can't repeat the spinlock hack used above because the child process gets its own copy of the lock. */ + if (flags & CLONE_CHILD_SETTID) { + if (ret == 0) + tput32(child_tidptr, gettid()); + else if (flags & CLONE_VM) + tput32(child_tidptr, ret); + } if (ret == 0) { - /* Child Process. */ - if (flags & CLONE_CHILD_SETTID) - *child_tidptr = gettid(); ts = (TaskState *)env->opaque; if (flags & CLONE_CHILD_CLEARTID) ts->child_tidptr = child_tidptr; @@ -2585,6 +2716,10 @@ return ret; } +#ifdef __ia64__ +#undef clone +#endif + static target_long do_fcntl(int fd, int cmd, target_ulong arg) { struct flock fl; @@ -2940,7 +3075,7 @@ _mcleanup(); #endif gdb_exit(cpu_env, arg1); - /* XXX: should free thread stack and CPU env */ + /* XXX: should free thread stack, GDT and CPU env */ _exit(arg1); ret = 0; /* avoid warning */ break; @@ -5420,12 +5555,12 @@ #ifdef TARGET_NR_set_thread_area case TARGET_NR_set_thread_area: #ifdef TARGET_MIPS - ((CPUMIPSState *) cpu_env)->tls_value = arg1; - ret = 0; - break; + ((CPUMIPSState *) cpu_env)->tls_value = arg1; + ret = 0; #else - goto unimplemented_nowarn; + ret = get_errno(do_set_thread_area(cpu_env, arg1)); #endif + break; #endif #ifdef TARGET_NR_get_thread_area case TARGET_NR_get_thread_area: Index: qemu/configure =================================================================== --- qemu.orig/configure 2007-10-15 13:52:13.000000000 -0600 +++ qemu/configure 2007-10-15 13:52:13.000000000 -0600 @@ -1153,7 +1153,7 @@ else if test "$nptl" = "yes" ; then case "$target_cpu" in - arm | armeb | ppc | ppc64) + arm | armeb | i386 | ppc | ppc64) echo "#define USE_NPTL 1" >> $config_h ;; esac Index: qemu/target-i386/cpu.h =================================================================== --- qemu.orig/target-i386/cpu.h 2007-10-15 13:52:06.000000000 -0600 +++ qemu/target-i386/cpu.h 2007-10-15 13:52:13.000000000 -0600 @@ -567,6 +567,9 @@ int cpu_get_pic_interrupt(CPUX86State *s); /* MSDOS compatibility mode FPU exception support */ void cpu_set_ferr(CPUX86State *s); +#if defined(USE_NPTL) +#define cpu_set_tls(...) do {} while(0) +#endif /* this function must always be used to load data in the segment cache: it synchronizes the hflags with the segment cache values */