This is an automated email from the ASF dual-hosted git repository. acassis pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nuttx.git
commit d51ceccd7b64a1b04835cb6caece8fadf24263ea Author: p-szafonimateusz <[email protected]> AuthorDate: Sat Jul 20 11:58:54 2024 +0200 arch/x86_64: add syscalls support arch/x86_64: add syscalls support Signed-off-by: p-szafonimateusz <[email protected]> --- arch/x86_64/include/intel64/arch.h | 69 +++++++-- arch/x86_64/include/intel64/irq.h | 18 +++ arch/x86_64/include/irq.h | 17 +++ arch/x86_64/include/syscall.h | 52 ++++--- arch/x86_64/src/common/CMakeLists.txt | 4 + arch/x86_64/src/common/Make.defs | 4 + arch/x86_64/src/common/x86_64_internal.h | 2 +- arch/x86_64/src/common/x86_64_syscall.c | 163 +++++++++++++++++++++ arch/x86_64/src/intel64/intel64_cpu.c | 40 +++++ .../src/intel64/intel64_fullcontextrestore.S | 5 +- arch/x86_64/src/intel64/intel64_head.S | 152 ++++++++++++++++++- arch/x86_64/src/intel64/intel64_saveusercontext.S | 9 ++ arch/x86_64/src/intel64/intel64_start.c | 4 - arch/x86_64/src/intel64/intel64_vectors.S | 2 +- 14 files changed, 488 insertions(+), 53 deletions(-) diff --git a/arch/x86_64/include/intel64/arch.h b/arch/x86_64/include/intel64/arch.h index 1d0cff6057..f562e73046 100644 --- a/arch/x86_64/include/intel64/arch.h +++ b/arch/x86_64/include/intel64/arch.h @@ -83,7 +83,12 @@ /* GDT Definitions */ -/* Starting from third selector to confirm the syscall interface */ +/* Selector configuration must compy with the requirements of SYSCALL + * and SYSRET instructions. These definitions must match the GDT format + * in intel64_head.S:g_gdt64_low. + * + * For details look at the comment in intel64_cpu.c about MSR_STAT write. + */ #define X86_GDT_ENTRY_SIZE 0x8 @@ -93,24 +98,44 @@ #define X86_GDT_DATA_SEL_NUM 2 # define X86_GDT_DATA_SEL (X86_GDT_DATA_SEL_NUM * X86_GDT_ENTRY_SIZE) -/* The first TSS entry */ - -#define X86_GDT_ISTL_SEL_NUM 6 -#define X86_GDT_ISTH_SEL_NUM (X86_GDT_ISTL_SEL_NUM + 1) +#define X86_GDT_USERDATA_SEL_NUM 6 +# define X86_GDT_USERDATA_SEL (X86_GDT_USERDATA_SEL_NUM * X86_GDT_ENTRY_SIZE) -#define X86_GDT_BASE 0x0000000000000000 -#define X86_GDT_LIMIT 0x000f00000000ffff +#define X86_GDT_USERCODE_SEL_NUM 7 +# define X86_GDT_USERCODE_SEL (X86_GDT_USERCODE_SEL_NUM * X86_GDT_ENTRY_SIZE) -#define X86_GDT_FLAG_LONG 0x0020000000000000 - -#define X86_GDT_ACC_PR 0x0000800000000000 -#define X86_GDT_ACC_SEG 0x0000100000000000 -#define X86_GDT_ACC_EX 0x0000080000000000 -#define X86_GDT_ACC_WR 0x0000020000000000 +/* The first TSS entry */ -#define X86_GDT_CODE64_ENTRY (X86_GDT_BASE + X86_GDT_LIMIT + X86_GDT_FLAG_LONG + X86_GDT_ACC_PR + X86_GDT_ACC_SEG + X86_GDT_ACC_EX) -#define X86_GDT_CODE32_ENTRY (X86_GDT_BASE + X86_GDT_LIMIT + X86_GDT_ACC_PR + X86_GDT_ACC_SEG + X86_GDT_ACC_EX) -#define X86_GDT_DATA_ENTRY (X86_GDT_BASE + X86_GDT_LIMIT + X86_GDT_ACC_PR + X86_GDT_ACC_SEG + X86_GDT_ACC_WR) +#define X86_GDT_ISTL_SEL_NUM 8 +#define X86_GDT_ISTH_SEL_NUM (X86_GDT_ISTL_SEL_NUM + 1) + +#define X86_GDT_BASE 0x0000000000000000 +#define X86_GDT_LIMIT 0x000f00000000ffff + +#define X86_GDT_FLAG_LONG 0x0020000000000000 + +#define X86_GDT_ACC_PR 0x0000800000000000 +#define X86_GDT_ACC_USER 0x0000600000000000 +#define X86_GDT_ACC_SEG 0x0000100000000000 +#define X86_GDT_ACC_EX 0x0000080000000000 +#define X86_GDT_ACC_WR 0x0000020000000000 + +#define X86_GDT_CODE64_ENTRY (X86_GDT_BASE + X86_GDT_LIMIT + \ + X86_GDT_FLAG_LONG + X86_GDT_ACC_PR + \ + X86_GDT_ACC_SEG + X86_GDT_ACC_EX) +#define X86_GDT_CODE32_ENTRY (X86_GDT_BASE + X86_GDT_LIMIT + \ + X86_GDT_ACC_PR + X86_GDT_ACC_SEG + \ + X86_GDT_ACC_EX) +#define X86_GDT_DATA_ENTRY (X86_GDT_BASE + X86_GDT_LIMIT + \ + X86_GDT_ACC_PR + X86_GDT_ACC_SEG + \ + X86_GDT_ACC_WR) +#define X86_GDT_CODEUSER_ENTRY (X86_GDT_BASE + X86_GDT_LIMIT + \ + X86_GDT_FLAG_LONG + X86_GDT_ACC_PR + \ + X86_GDT_ACC_SEG + X86_GDT_ACC_EX + \ + X86_GDT_ACC_USER) +#define X86_GDT_DATAUSER_ENTRY (X86_GDT_BASE + X86_GDT_LIMIT + \ + X86_GDT_ACC_PR + X86_GDT_ACC_SEG + \ + X86_GDT_ACC_WR + X86_GDT_ACC_USER) /* CR0 Definitions */ @@ -213,9 +238,20 @@ /* MSR Definitions */ +#define MSR_STAR 0xc0000081 +#define MSR_STAR_CSSYSCALL(x) (((uint64_t)x) << 32) +#define MSR_STAR_CSSYSRET(x) (((uint64_t)x) << 48) + +#define MSR_LSTAR 0xc0000082 /* Target RIP for PM64 callers */ +#define MSR_CSTAR 0xc0000083 /* Target RIP for CM callers */ +#define MSR_FMASK 0xc0000084 /* RFLAGS mask for SYSCALL */ + #define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ +#define MSR_KERNELGS_BASE 0xc0000102 /* kernel GS base (for SWAPGS) */ #define MSR_EFER 0xc0000080 +# define EFER_SCE 0x00000001 # define EFER_LME 0x00000100 #define MSR_MTRR_DEF_TYPE 0x000002ff @@ -508,6 +544,7 @@ void x86_64_check_and_enable_capability(void); extern void __enable_sse_avx(void); extern void __revoke_low_memory(void); extern void __enable_pcid(void); +extern void x86_64_syscall_entry(void); #ifdef __cplusplus #define EXTERN extern "C" diff --git a/arch/x86_64/include/intel64/irq.h b/arch/x86_64/include/intel64/irq.h index 03764c4418..7dcb0e102e 100644 --- a/arch/x86_64/include/intel64/irq.h +++ b/arch/x86_64/include/intel64/irq.h @@ -499,6 +499,15 @@ enum ioapic_trigger_mode TRIGGER_LEVEL_ACTIVE_LOW = (1 << 15) | (1 << 13), }; +/* This structure represents the return state from a system call */ + +#ifdef CONFIG_LIB_SYSCALL +struct xcpt_syscall_s +{ + uintptr_t sysreturn; /* The return address */ +}; +#endif + /* This struct defines the way the registers are stored */ struct xcptcontext @@ -514,6 +523,15 @@ struct xcptcontext /* Register save area - allocated from stack in up_initial_state() */ uint64_t *regs; + +#ifdef CONFIG_LIB_SYSCALL + /* The following array holds information needed to return from each nested + * system call. + */ + + uint8_t nsyscalls; + struct xcpt_syscall_s syscall[CONFIG_SYS_NNEST]; +#endif }; #endif diff --git a/arch/x86_64/include/irq.h b/arch/x86_64/include/irq.h index 13cd9ead59..92024a910f 100644 --- a/arch/x86_64/include/irq.h +++ b/arch/x86_64/include/irq.h @@ -45,6 +45,13 @@ # include <arch/intel64/irq.h> #endif +/**************************************************************************** + * Pre-processor Definitions + ****************************************************************************/ + +#define X86_64_CPUPRIV_USTACK_OFFSET (16) +#define X86_64_CPUPRIV_UVBASE_OFFSET (24) + /**************************************************************************** * Public Data ****************************************************************************/ @@ -65,6 +72,16 @@ struct intel64_cpu_s */ uint64_t *current_regs; + +#ifdef CONFIG_LIB_SYSCALL + /* Current user RSP for syscall */ + + uint64_t *ustack; + + /* Userspace virtual address */ + + uint64_t *uvbase; +#endif }; /**************************************************************************** diff --git a/arch/x86_64/include/syscall.h b/arch/x86_64/include/syscall.h index 6e35fc0bb9..5e27629991 100644 --- a/arch/x86_64/include/syscall.h +++ b/arch/x86_64/include/syscall.h @@ -36,6 +36,10 @@ * Pre-processor Definitions ****************************************************************************/ +/* Configuration ************************************************************/ + +#define CONFIG_SYS_RESERVED 0 + /**************************************************************************** * Public Types ****************************************************************************/ @@ -57,15 +61,6 @@ extern "C" #define EXTERN extern #endif -void enable_syscall(void); -void syscall_entry(void); -uint64_t syscall_handler(unsigned long nbr, uintptr_t parm1, uintptr_t parm2, - uintptr_t parm3, uintptr_t parm4, uintptr_t parm5, - uintptr_t parm6); -uint64_t linux_interface(unsigned long nbr, uintptr_t parm1, uintptr_t parm2, - uintptr_t parm3, uintptr_t parm4, uintptr_t parm5, - uintptr_t parm6); - /* SWI with SYS_ call number and six parameters */ static inline uintptr_t sys_call6(unsigned int nbr, uintptr_t parm1, @@ -126,24 +121,33 @@ static inline uintptr_t sys_call6(unsigned int nbr, uintptr_t parm1, uintptr_t parm4, uintptr_t parm5, uintptr_t parm6) { - register uint64_t reg0 __asm__("rax") = (uint64_t)(nbr); - register uint64_t reg1 __asm__("rdi") = (uint64_t)(parm1); - register uint64_t reg2 __asm__("rsi") = (uint64_t)(parm2); - register uint64_t reg3 __asm__("rdx") = (uint64_t)(parm3); - register uint64_t reg4 __asm__("r10") = (uint64_t)(parm4); - register uint64_t reg5 __asm__("r8") = (uint64_t)(parm5); - register uint64_t reg6 __asm__("r9") = (uint64_t)(parm6); - - __asm__ __volatile__ + uint64_t ret; + + /* Registers modified by syscall instruction: + * RCX = RIP + * R11 = RFLAGS + * RIP = IA32_LSTAR (x86_64_syscall_entry) + */ + + __asm__ volatile ( - "syscall" - : "=r"(reg0) - : "r"(reg0), "r"(reg1), "r"(reg2), - "r"(reg3), "r"(reg4), "r"(reg5), "r"(reg6) - : "memory" + "movq %1, %%rax\n" + "movq %2, %%rdi\n" + "movq %3, %%rsi\n" + "movq %4, %%rdx\n" + "movq %5, %%r10\n" + "movq %6, %%r8\n" + "movq %7, %%r9\n" + "syscall\n" + "movq %%rax, %0\n" + : "=r"(ret) + : "rm"(nbr), "rm"(parm1), "rm"(parm2), + "rm"(parm3), "rm"(parm4), "rm"(parm5), + "rm"(parm6) + : "memory", "rcx", "r11" ); - return reg0; + return ret; } #undef EXTERN diff --git a/arch/x86_64/src/common/CMakeLists.txt b/arch/x86_64/src/common/CMakeLists.txt index 37d8dc1f3a..8e11fb3fa3 100644 --- a/arch/x86_64/src/common/CMakeLists.txt +++ b/arch/x86_64/src/common/CMakeLists.txt @@ -35,6 +35,10 @@ if(CONFIG_ARCH_HAVE_FORK) list(APPEND SRCS x86_64_fork.c fork.S) endif() +if(CONFIG_LIB_SYSCALL) + list(APPEND SRCS x86_64_syscall.c) +endif() + if(CONFIG_PCI) list(APPEND SRCS x86_64_pci.c) endif() diff --git a/arch/x86_64/src/common/Make.defs b/arch/x86_64/src/common/Make.defs index 52c74c6ba4..a13057704e 100644 --- a/arch/x86_64/src/common/Make.defs +++ b/arch/x86_64/src/common/Make.defs @@ -30,6 +30,10 @@ CMN_CSRCS += x86_64_fork.c CMN_ASRCS += fork.S endif +ifeq ($(CONFIG_LIB_SYSCALL),y) +CMN_CSRCS += x86_64_syscall.c +endif + ifeq ($(CONFIG_PCI),y) CMN_CSRCS += x86_64_pci.c endif diff --git a/arch/x86_64/src/common/x86_64_internal.h b/arch/x86_64/src/common/x86_64_internal.h index 878fe73b81..b1f972aefc 100644 --- a/arch/x86_64/src/common/x86_64_internal.h +++ b/arch/x86_64/src/common/x86_64_internal.h @@ -232,7 +232,7 @@ void x86_64_lowputs(const char *str); void x86_64_restore_auxstate(struct tcb_s *rtcb); void x86_64_checktasks(void); -void x86_64_syscall(uint64_t *regs); +uint64_t *x86_64_syscall(uint64_t *regs); #ifdef CONFIG_ARCH_MULTIBOOT2 void x86_64_mb2_fbinitialize(struct multiboot_tag_framebuffer *tag); diff --git a/arch/x86_64/src/common/x86_64_syscall.c b/arch/x86_64/src/common/x86_64_syscall.c new file mode 100644 index 0000000000..5e52b5347c --- /dev/null +++ b/arch/x86_64/src/common/x86_64_syscall.c @@ -0,0 +1,163 @@ +/**************************************************************************** + * arch/x86_64/src/common/x86_64_syscall.c + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. The + * ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + ****************************************************************************/ + +/**************************************************************************** + * Included Files + ****************************************************************************/ + +#include <nuttx/config.h> + +#include <inttypes.h> +#include <stdint.h> +#include <string.h> +#include <assert.h> +#include <debug.h> +#include <syscall.h> + +#include <nuttx/addrenv.h> +#include <nuttx/arch.h> +#include <nuttx/sched.h> +#include <nuttx/addrenv.h> + +#include "x86_64_internal.h" + +/**************************************************************************** + * Private Types + ****************************************************************************/ + +/* Syscall function */ + +typedef uintptr_t (*syscall_stub_t)(int nbr, + uintptr_t parm1, uintptr_t parm2, + uintptr_t parm3, uintptr_t parm4, + uintptr_t parm5, uintptr_t parm6); + +/**************************************************************************** + * Private Functions + ****************************************************************************/ + +/**************************************************************************** + * Name: dump_syscall + * + * Description: + * Dump the syscall registers + * + ****************************************************************************/ + +static void dump_syscall(const char *tag, uint64_t *regs) +{ + unsigned int cmd = regs[REG_RAX]; + +#ifdef CONFIG_LIB_SYSCALL + if (cmd >= CONFIG_SYS_RESERVED) + { + svcinfo("SYSCALL %s: cmd: %d name: %s\n", tag, + cmd, g_funcnames[cmd - CONFIG_SYS_RESERVED]); + } + else +#endif + { + svcinfo("SYSCALL %s: cmd: %d\n", tag, cmd); + } + + svcinfo(" RSP: %" PRIx64 " RCX: %" PRIx64 "\n", + regs[REG_RSP], regs[REG_RCX]); +} + +/**************************************************************************** + * Public Functions + ****************************************************************************/ + +/**************************************************************************** + * Name: x86_64_syscall + * + * Description: + * Syscall handler called from x86_64_syscall_entry(). + * Current registers stored in regs argument. + * The syscall is called with: + * + * - RAX = system call command, and + * - RDI, RSI, RDX, R10, R8, R9 = variable number of arguments depending + * on the system call. + * + ****************************************************************************/ + +uint64_t *x86_64_syscall(uint64_t *regs) +{ + unsigned int cmd = regs[REG_RAX]; + uint64_t arg1 = regs[REG_RDI]; + uint64_t arg2 = regs[REG_RSI]; + uint64_t arg3 = regs[REG_RDX]; + uint64_t arg4 = regs[REG_R10]; + uint64_t arg5 = regs[REG_R8]; + uint64_t arg6 = regs[REG_R9]; + uintptr_t ret = 0; + + /* The syscall command is in RAX on entry */ + + dump_syscall("Entry", regs); + + /* Handle the syscall according to the command in RAX */ + + switch (cmd) + { + /* This is not an architecture-specific system call. If NuttX is + * built as a standalone kernel with a system call interface, then + * all of the additional system calls must be handled as in the + * default case. + */ + + default: + { + int nbr = cmd - CONFIG_SYS_RESERVED; + struct tcb_s *rtcb = nxsched_self(); + syscall_stub_t stub = (syscall_stub_t)g_stublookup[nbr]; + + DEBUGASSERT(nbr < SYS_nsyscalls); + DEBUGASSERT(rtcb->xcp.nsyscalls < CONFIG_SYS_NNEST); + + /* Setup nested syscall */ + + rtcb->xcp.syscall[rtcb->xcp.nsyscalls].sysreturn = regs[REG_RCX]; + rtcb->xcp.nsyscalls += 1; + + /* Call syscall function */ + + ret = stub(nbr, arg1, arg2, arg3, arg4, arg5, arg6); + + /* Setup return from nested syscall */ + + rtcb->xcp.nsyscalls -= 1; + regs[REG_RCX] = rtcb->xcp.syscall[rtcb->xcp.nsyscalls].sysreturn; + + break; + } + } + + dump_syscall("Exit", regs); + + /* Store return value in RAX register */ + + regs[REG_RAX] = ret; + + /* Return pointer to regs */ + + return regs; +} diff --git a/arch/x86_64/src/intel64/intel64_cpu.c b/arch/x86_64/src/intel64/intel64_cpu.c index 30fcd95f32..2a6565d984 100644 --- a/arch/x86_64/src/intel64/intel64_cpu.c +++ b/arch/x86_64/src/intel64/intel64_cpu.c @@ -242,6 +242,10 @@ void x86_64_cpu_init(void) g_cpu_priv[i].loapic_id = lapic->apic_id; g_cpu_priv[i].id = i; g_cpu_priv[i].ready = false; +#ifdef CONFIG_LIB_SYSCALL + g_cpu_priv[i].ustack = NULL; + g_cpu_priv[i].uvbase = (uint64_t *)CONFIG_ARCH_TEXT_VBASE; +#endif /* Store private CPU in TSS */ @@ -380,4 +384,40 @@ void x86_64_cpu_priv_set(uint8_t cpu) /* Store private data pointer to GSBASE */ write_gsbase((uintptr_t)&g_cpu_priv[cpu]); + +#ifdef CONFIG_LIB_SYSCALL + /* Configure SYSCALL instruction entry point */ + + write_msr(MSR_LSTAR, (uintptr_t)x86_64_syscall_entry); + + /* Configure CS selection for SYSCALL (kernel) and SYSRET (userspace). + * + * Segment selection for SYSCALL works like this: + * + * CS.Selector = IA32_STAR[47:32] + * SS.Selector := IA32_STAR[47:32] + 8 + * + * This require that we have to fill GDT with kernel code segment + * first and after that we can put kernel data segment. + * + * Segment selection for SYSRET has a really weird setup for 64-bit + * operand size: + * + * CS.Selector = IA32_STAR[63:48]+16 + * SS.Selector = IA32_STAR[63:48]+8 + * + * This require that we have to fill GDT with user data segment + * first and after that we can put user code segment (differently + * than for kernel segments). Then this instruction needs to + * set CS segment for SYSRET at (USERDATA_SEL - 8) to work + * correctly. + */ + + write_msr(MSR_STAR, MSR_STAR_CSSYSCALL(X86_GDT_CODE_SEL) | + MSR_STAR_CSSYSRET(X86_GDT_USERDATA_SEL - 8)); + + /* Mask applied to RFLAGS when making a syscall */ + + write_msr(MSR_FMASK, X86_64_RFLAGS_IF | X86_64_RFLAGS_DF); +#endif } diff --git a/arch/x86_64/src/intel64/intel64_fullcontextrestore.S b/arch/x86_64/src/intel64/intel64_fullcontextrestore.S index 55b1ee871d..79b9d64308 100644 --- a/arch/x86_64/src/intel64/intel64_fullcontextrestore.S +++ b/arch/x86_64/src/intel64/intel64_fullcontextrestore.S @@ -130,9 +130,12 @@ x86_64_fullcontextrestore: movq (8*REG_RAX)(%rdi), %rax - /* Restore the correct value of EAX and then return */ + /* Restore the correct value of RDI */ popq %rdi + + /* Pops 5 things at once: RIP, CS, RFLAGS RSP and SS */ + iretq .size x86_64_fullcontextrestore, . - x86_64_fullcontextrestore .end diff --git a/arch/x86_64/src/intel64/intel64_head.S b/arch/x86_64/src/intel64/intel64_head.S index 144b97f68f..d07bcdd482 100644 --- a/arch/x86_64/src/intel64/intel64_head.S +++ b/arch/x86_64/src/intel64/intel64_head.S @@ -53,6 +53,12 @@ # define X86_CR4_FPU_VAL (X86_CR4_OSXFSR | X86_CR4_XMMEXCPT) #endif +/* XSAVE header data offset */ + +#define X86_XSAVE_XSTATEBV_OFFSET 512 +#define X86_XSAVE_XCOMPBC_OFFSET 520 +#define X86_XSAVE_RESERVED0_OFFSET 528 + /* Memory Map: _sbss is the start of the BSS region (see ld.script) _ebss is * the end of the BSS region (see ld.script). The idle task stack starts at * the end of BSS and is of size CONFIG_IDLETHREAD_STACKSIZE. The IDLE thread @@ -72,6 +78,10 @@ .global __enable_sse_avx .global __enable_pcid .global __revoke_low_memory +#ifdef CONFIG_LIB_SYSCALL + .global x86_64_syscall_entry + .global x86_64_syscall +#endif .global __nxstart /* __nxstart is defined elsewhere */ .global nx_start /* nx_start is defined elsewhere */ .global x86_64_ap_boot /* x86_64_ap_boot is defined elsewhere */ @@ -336,7 +346,11 @@ start64_init: movl $MSR_EFER, %ecx rdmsr +#ifdef CONFIG_LIB_SYSCALL + or $(EFER_LME | EFER_SCE), %eax +#else or $EFER_LME, %eax +#endif wrmsr /* Enable paging related bits in CR0 */ @@ -520,6 +534,126 @@ __enable_pcid: .size __enable_pcid, . - __enable_pcid +#ifdef CONFIG_LIB_SYSCALL + /**************************************************************************** + * Name: x86_64_syscall_entry + * + * Description: + * Landing point for syscall instruction. + * + * At this point RFLAGS are masked with MSR_FMASK + * - IF=0 (interrupts are disabled) + * - DF=0 + * + ****************************************************************************/ + + .type x86_64_syscall_entry, @function +x86_64_syscall_entry: + /* Store current RSP on CPU private data first */ + movq %rsp, %gs:X86_64_CPUPRIV_USTACK_OFFSET + + /* Store return address - we need free register to store + * CPU context but at this point we don't have any. + */ + pushq %rcx + + /* Get aligned registers area */ + movq %rsp, %rcx + sub $8, %rcx + sub $((XCPTCONTEXT_REGS + 8) * 8), %rcx + add $(0x3f), %rcx + and $(0xffffffffffffffc0), %rcx + + /* Syscall arguments */ + movq %rax, (8*REG_RAX)(%rcx) + movq %rdi, (8*REG_RDI)(%rcx) + movq %rsi, (8*REG_RSI)(%rcx) + movq %rdx, (8*REG_RDX)(%rcx) + movq %r10, (8*REG_R10)(%rcx) + movq %r8, (8*REG_R8)(%rcx) + movq %r9, (8*REG_R9)(%rcx) + + /* Callee registers */ + movq %rbx, (8*REG_RBX)(%rcx) + movq %r11, (8*REG_R11)(%rcx) + movq %r12, (8*REG_R12)(%rcx) + movq %r13, (8*REG_R13)(%rcx) + movq %r14, (8*REG_R14)(%rcx) + movq %r15, (8*REG_R15)(%rcx) + movq %rbp, (8*REG_RBP)(%rcx) + +#ifndef CONFIG_ARCH_X86_64_HAVE_XSAVE + /* Save xmm registers */ + fxsaveq (%rcx) +#else +# ifdef CONFIG_ARCH_CHIP_INTEL64_QEMU + /* BUGFIX for QEMU: make sure that xsave header is zeroed! + * QEMU desn't clear these fields during xsave, so if the memory region + * for xsave state was not cleared before use, there may be junk data there, + * that cause xrstor to crash later. + */ + movq $0, (X86_XSAVE_XSTATEBV_OFFSET)(%rcx) + movq $0, (X86_XSAVE_XCOMPBC_OFFSET)(%rcx) + movq $0, (X86_XSAVE_RESERVED0_OFFSET)(%rcx) +# endif + + movl $XSAVE_STATE_COMPONENTS, %eax + xor %edx, %edx + xsave (%rcx) +#endif + + /* Save RCX */ + popq (8*REG_RCX)(%rcx) + + /* Store user stack pointer */ + pushq %gs:X86_64_CPUPRIV_USTACK_OFFSET + popq (8*REG_RSP)(%rcx) + + /* Move stack pointer after registers area */ + movq %rcx, %rsp + + /* Return value from syscall stored in rax */ + movq %rcx, %rdi + call x86_64_syscall + movq %rax, %rdi + +#ifndef CONFIG_ARCH_X86_64_HAVE_XSAVE + /* Restore xmm registers */ + fxrstorq (%rdi) +#else + movl $XSAVE_STATE_COMPONENTS, %eax + xor %edx, %edx + xrstor (%rdi) +#endif + + /* Restore original user RSP */ + movq (8*REG_RSP)(%rdi), %rsp + + /* Restore CPU registers - regs are on RDI now */ + movq (8*REG_RBX)(%rdi), %rbx + movq (8*REG_R11)(%rdi), %r11 + movq (8*REG_R12)(%rdi), %r12 + movq (8*REG_R13)(%rdi), %r13 + movq (8*REG_R14)(%rdi), %r14 + movq (8*REG_R15)(%rdi), %r15 + movq (8*REG_RBP)(%rdi), %rbp + + movq (8*REG_RAX)(%rdi), %rax + movq (8*REG_RSI)(%rdi), %rsi + movq (8*REG_RDX)(%rdi), %rdx + movq (8*REG_R10)(%rdi), %r10 + movq (8*REG_RCX)(%rdi), %rcx + + movq (8*REG_R11)(%rdi), %r11 + movq (8*REG_RDI)(%rdi), %rdi + + /* Return to address pointed in RCX - must be on stack */ + pushq %rcx + ret + + .size x86_64_syscall_entry, . - x86_64_syscall_entry +#endif + /**************************************************************************** * .data ****************************************************************************/ @@ -537,12 +671,18 @@ g_ist64_low: /* GDT for 64 bit long mode */ .align(16) g_gdt64_low: - .quad 0 - .quad X86_GDT_CODE64_ENTRY - .quad X86_GDT_DATA_ENTRY - .quad X86_GDT_CODE32_ENTRY - .quad X86_GDT_DATA_ENTRY - .quad X86_GDT_CODE64_ENTRY + /* Must be in sequence required by SYSCALL and SYSRET. + * For details look at the comment in intel64_cpu.c about MSR_STAT write. + */ + .quad 0 /* 0x00: null descriptor */ + .quad X86_GDT_CODE64_ENTRY /* 0x08: kernel code 64 */ + .quad X86_GDT_DATA_ENTRY /* 0x10: kernel data */ + .quad X86_GDT_CODE32_ENTRY /* 0x18: kernel code 32 */ + .quad X86_GDT_DATA_ENTRY /* 0x20: kernel data */ + .quad X86_GDT_CODE64_ENTRY /* 0x28: kernel code 64 */ + .quad X86_GDT_DATAUSER_ENTRY /* 0x30: user data */ + .quad X86_GDT_CODEUSER_ENTRY /* 0x38: user code */ + g_gdt64_ist_low: /* TSS segment low + segment high per CPU */ .fill CONFIG_SMP_NCPUS * 16, 1, 0 diff --git a/arch/x86_64/src/intel64/intel64_saveusercontext.S b/arch/x86_64/src/intel64/intel64_saveusercontext.S index dd3c87ce6e..e0bd64eea4 100644 --- a/arch/x86_64/src/intel64/intel64_saveusercontext.S +++ b/arch/x86_64/src/intel64/intel64_saveusercontext.S @@ -103,6 +103,15 @@ up_saveusercontext: movq %rbp, (8*REG_RBP)(%rdi) +#ifdef CONFIG_LIB_SYSCALL + /* Save CS and SS if we support syscalls */ + xor %rax, %rax + mov %cs, %ax + movq %rax, (8*REG_CS)(%rdi) + mov %ss, %ax + movq %rax, (8*REG_SS)(%rdi) +#endif + /* Save EAX=1. This will be the "apparent" return value from this * function when context is switch back to this thread. The non-zero * return value is the indication that we have been resumed. diff --git a/arch/x86_64/src/intel64/intel64_start.c b/arch/x86_64/src/intel64/intel64_start.c index fdd18b15eb..e176801271 100644 --- a/arch/x86_64/src/intel64/intel64_start.c +++ b/arch/x86_64/src/intel64/intel64_start.c @@ -198,10 +198,6 @@ void __nxstart(void) x86_64_timer_calibrate_freq(); -#ifdef CONFIG_LIB_SYSCALL - enable_syscall(); -#endif - /* Store CPU IDs */ x86_64_cpu_priv_set(0); diff --git a/arch/x86_64/src/intel64/intel64_vectors.S b/arch/x86_64/src/intel64/intel64_vectors.S index 5317374572..dbd95432b2 100644 --- a/arch/x86_64/src/intel64/intel64_vectors.S +++ b/arch/x86_64/src/intel64/intel64_vectors.S @@ -923,7 +923,7 @@ irq_common: add $8, %rsp - iretq /* Pops 5 things at once: CS, RIP, RFLAGS and SS and RSP */ + iretq /* Pops 5 things at once: RIP, CS, RFLAGS RSP and SS */ .size irq_common, . - irq_common .end
