Richard Henderson <richard.hender...@linaro.org> writes:
> Notice the magic page during translate, much like we already > do for the arm32 commpage. At runtime, raise an exception to > return cpu_loop for emulation. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > target/i386/cpu.h | 1 + > linux-user/i386/cpu_loop.c | 104 +++++++++++++++++++++++++++++++++++++ > target/i386/translate.c | 16 +++++- > 3 files changed, 120 insertions(+), 1 deletion(-) > > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > index 164d038d1f..3fb2d2a986 100644 > --- a/target/i386/cpu.h > +++ b/target/i386/cpu.h > @@ -1000,6 +1000,7 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; > > #define EXCP_VMEXIT 0x100 /* only for system emulation */ > #define EXCP_SYSCALL 0x101 /* only for user emulation */ > +#define EXCP_VSYSCALL 0x102 /* only for user emulation */ > > /* i386-specific interrupt pending bits. */ > #define CPU_INTERRUPT_POLL CPU_INTERRUPT_TGT_EXT_1 > diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c > index e217cca5ee..8b7c9f7337 100644 > --- a/linux-user/i386/cpu_loop.c > +++ b/linux-user/i386/cpu_loop.c > @@ -92,6 +92,105 @@ static void gen_signal(CPUX86State *env, int sig, int > code, abi_ptr addr) > queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); > } > > +#ifdef TARGET_X86_64 > +static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len) > +{ > + /* > + * For all the vsyscalls, NULL means "don't write anything" not > + * "write it at address 0". > + */ > + if (addr == 0 || access_ok(VERIFY_WRITE, addr, len)) { > + return true; > + } > + > + gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr); > + return false; > +} > + > +/* > + * Since v3.1, the kernel traps and emulates the vsyscall page. > + * Entry points other than the official generate SIGSEGV. > + */ > +static void emulate_vsyscall(CPUX86State *env) > +{ > + int syscall; > + abi_ulong ret; > + uint64_t caller; > + > + /* > + * Validate the entry point. We have already validated the page > + * during translation, now verify the offset. > + */ > + switch (env->eip & ~TARGET_PAGE_MASK) { > + case 0x000: > + syscall = TARGET_NR_gettimeofday; > + break; > + case 0x400: > + syscall = TARGET_NR_time; > + break; > + case 0x800: > + syscall = TARGET_NR_getcpu; > + break; > + default: > + sigsegv: > + /* Like force_sig(SIGSEGV). */ > + gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); > + return; > + } > + > + /* > + * Validate the return address. > + * Note that the kernel treats this the same as an invalid entry point. > + */ > + if (get_user_u64(caller, env->regs[R_ESP])) { > + goto sigsegv; > + } > + > + /* > + * Validate the the pointer arguments. > + */ > + switch (syscall) { > + case TARGET_NR_gettimeofday: > + if (!write_ok_or_segv(env, env->regs[R_EDI], > + sizeof(struct target_timeval)) || > + !write_ok_or_segv(env, env->regs[R_ESI], > + sizeof(struct target_timezone))) { > + return; > + } > + break; > + case TARGET_NR_time: > + if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) { > + return; > + } > + break; > + case TARGET_NR_getcpu: > + if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) || > + !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) { > + return; > + } > + break; > + default: > + g_assert_not_reached(); > + } > + > + /* > + * Perform the syscall. None of the vsyscalls should need restarting, > + * and all faults should have been caught above. > + */ > + ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI], > + env->regs[R_EDX], env->regs[10], env->regs[8], > + env->regs[9], 0, 0); How come the register ABI to the syscall is different to the others. I can see why syscall doesn't come from EAX but the others are a different set to normal syscalls which might be why: > + g_assert(ret != -TARGET_ERESTARTSYS); > + g_assert(ret != -TARGET_QEMU_ESIGRETURN); > + g_assert(ret != -TARGET_EFAULT); I'm seeing a EFAULT on the gettimeofday failure: #0 do_syscall (cpu_env=cpu_env@entry=0x5555577d2b10, num=num@entry=96, arg1=0, arg2=0, arg3=4211016, arg4=8, arg5=274888677184, arg6=274886295415, arg7=0, arg8=0) at /home/alex/lsrc/qemu.git/linux-user/syscall.c:12076 #1 0x0000555555609b6e in emulate_vsyscall (env=0x5555577d2b10) at /home/alex/lsrc/qemu.git/linux-user/x86_64/../i386/cpu_loop.c:180 #2 cpu_loop (env=0x5555577d2b10) at /home/alex/lsrc/qemu.git/linux-user/x86_64/../i386/cpu_loop.c:246 #3 0x000055555559640e in main (argc=<optimized out>, argv=<optimized #out>, envp=<optimized out>) at #/home/alex/lsrc/qemu.git/linux-user/main.c:865 arg1/arg2 don't seem right here. > + env->regs[R_EAX] = ret; > + > + /* Emulate a ret instruction to leave the vsyscall page. */ > + env->eip = caller; > + env->regs[R_ESP] += 8; > +} > +#endif > + > void cpu_loop(CPUX86State *env) > { > CPUState *cs = env_cpu(env); > @@ -141,6 +240,11 @@ void cpu_loop(CPUX86State *env) > env->regs[R_EAX] = ret; > } > break; > +#endif > +#ifdef TARGET_X86_64 > + case EXCP_VSYSCALL: > + emulate_vsyscall(env); > + break; > #endif > case EXCP0B_NOSEG: > case EXCP0C_STACK: > diff --git a/target/i386/translate.c b/target/i386/translate.c > index 7c99ef1385..391b4ef149 100644 > --- a/target/i386/translate.c > +++ b/target/i386/translate.c > @@ -8555,7 +8555,21 @@ static bool i386_tr_breakpoint_check(DisasContextBase > *dcbase, CPUState *cpu, > static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) > { > DisasContext *dc = container_of(dcbase, DisasContext, base); > - target_ulong pc_next = disas_insn(dc, cpu); > + target_ulong pc_next; > + > +#if defined(TARGET_X86_64) && \ > + defined(CONFIG_USER_ONLY) && \ > + defined(CONFIG_LINUX) > + /* > + * Detect entry into the vsyscall page and invoke the syscall. > + */ > + if ((dc->base.pc_next & TARGET_PAGE_MASK) == 0xffffffffff600000ull) { > + gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next); > + return; > + } > +#endif > + > + pc_next = disas_insn(dc, cpu); > > if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) { > /* if single step mode, we generate only one instruction and -- Alex Bennée