This is an automated email from the ASF dual-hosted git repository.

acassis pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nuttx.git

commit d51ceccd7b64a1b04835cb6caece8fadf24263ea
Author: p-szafonimateusz <[email protected]>
AuthorDate: Sat Jul 20 11:58:54 2024 +0200

    arch/x86_64: add syscalls support
    
    arch/x86_64: add syscalls support
    
    Signed-off-by: p-szafonimateusz <[email protected]>
---
 arch/x86_64/include/intel64/arch.h                 |  69 +++++++--
 arch/x86_64/include/intel64/irq.h                  |  18 +++
 arch/x86_64/include/irq.h                          |  17 +++
 arch/x86_64/include/syscall.h                      |  52 ++++---
 arch/x86_64/src/common/CMakeLists.txt              |   4 +
 arch/x86_64/src/common/Make.defs                   |   4 +
 arch/x86_64/src/common/x86_64_internal.h           |   2 +-
 arch/x86_64/src/common/x86_64_syscall.c            | 163 +++++++++++++++++++++
 arch/x86_64/src/intel64/intel64_cpu.c              |  40 +++++
 .../src/intel64/intel64_fullcontextrestore.S       |   5 +-
 arch/x86_64/src/intel64/intel64_head.S             | 152 ++++++++++++++++++-
 arch/x86_64/src/intel64/intel64_saveusercontext.S  |   9 ++
 arch/x86_64/src/intel64/intel64_start.c            |   4 -
 arch/x86_64/src/intel64/intel64_vectors.S          |   2 +-
 14 files changed, 488 insertions(+), 53 deletions(-)

diff --git a/arch/x86_64/include/intel64/arch.h 
b/arch/x86_64/include/intel64/arch.h
index 1d0cff6057..f562e73046 100644
--- a/arch/x86_64/include/intel64/arch.h
+++ b/arch/x86_64/include/intel64/arch.h
@@ -83,7 +83,12 @@
 
 /* GDT Definitions */
 
-/* Starting from third selector to confirm the syscall interface */
+/* Selector configuration must compy with the requirements of SYSCALL
+ * and SYSRET instructions. These definitions must match the GDT format
+ * in intel64_head.S:g_gdt64_low.
+ *
+ * For details look at the comment in intel64_cpu.c about MSR_STAT write.
+ */
 
 #define X86_GDT_ENTRY_SIZE        0x8
 
@@ -93,24 +98,44 @@
 #define X86_GDT_DATA_SEL_NUM      2
 #  define X86_GDT_DATA_SEL        (X86_GDT_DATA_SEL_NUM * X86_GDT_ENTRY_SIZE)
 
-/* The first TSS entry */
-
-#define X86_GDT_ISTL_SEL_NUM    6
-#define X86_GDT_ISTH_SEL_NUM    (X86_GDT_ISTL_SEL_NUM + 1)
+#define X86_GDT_USERDATA_SEL_NUM  6
+#  define X86_GDT_USERDATA_SEL    (X86_GDT_USERDATA_SEL_NUM * 
X86_GDT_ENTRY_SIZE)
 
-#define X86_GDT_BASE      0x0000000000000000
-#define X86_GDT_LIMIT     0x000f00000000ffff
+#define X86_GDT_USERCODE_SEL_NUM  7
+#  define X86_GDT_USERCODE_SEL    (X86_GDT_USERCODE_SEL_NUM * 
X86_GDT_ENTRY_SIZE)
 
-#define X86_GDT_FLAG_LONG 0x0020000000000000
-
-#define X86_GDT_ACC_PR    0x0000800000000000
-#define X86_GDT_ACC_SEG   0x0000100000000000
-#define X86_GDT_ACC_EX    0x0000080000000000
-#define X86_GDT_ACC_WR    0x0000020000000000
+/* The first TSS entry */
 
-#define X86_GDT_CODE64_ENTRY    (X86_GDT_BASE + X86_GDT_LIMIT + 
X86_GDT_FLAG_LONG + X86_GDT_ACC_PR + X86_GDT_ACC_SEG + X86_GDT_ACC_EX)
-#define X86_GDT_CODE32_ENTRY    (X86_GDT_BASE + X86_GDT_LIMIT + X86_GDT_ACC_PR 
+ X86_GDT_ACC_SEG + X86_GDT_ACC_EX)
-#define X86_GDT_DATA_ENTRY      (X86_GDT_BASE + X86_GDT_LIMIT + X86_GDT_ACC_PR 
+ X86_GDT_ACC_SEG + X86_GDT_ACC_WR)
+#define X86_GDT_ISTL_SEL_NUM      8
+#define X86_GDT_ISTH_SEL_NUM      (X86_GDT_ISTL_SEL_NUM + 1)
+
+#define X86_GDT_BASE              0x0000000000000000
+#define X86_GDT_LIMIT             0x000f00000000ffff
+
+#define X86_GDT_FLAG_LONG         0x0020000000000000
+
+#define X86_GDT_ACC_PR            0x0000800000000000
+#define X86_GDT_ACC_USER          0x0000600000000000
+#define X86_GDT_ACC_SEG           0x0000100000000000
+#define X86_GDT_ACC_EX            0x0000080000000000
+#define X86_GDT_ACC_WR            0x0000020000000000
+
+#define X86_GDT_CODE64_ENTRY      (X86_GDT_BASE + X86_GDT_LIMIT + \
+                                   X86_GDT_FLAG_LONG + X86_GDT_ACC_PR + \
+                                   X86_GDT_ACC_SEG + X86_GDT_ACC_EX)
+#define X86_GDT_CODE32_ENTRY      (X86_GDT_BASE + X86_GDT_LIMIT + \
+                                   X86_GDT_ACC_PR + X86_GDT_ACC_SEG + \
+                                   X86_GDT_ACC_EX)
+#define X86_GDT_DATA_ENTRY        (X86_GDT_BASE + X86_GDT_LIMIT + \
+                                   X86_GDT_ACC_PR + X86_GDT_ACC_SEG + \
+                                   X86_GDT_ACC_WR)
+#define X86_GDT_CODEUSER_ENTRY    (X86_GDT_BASE + X86_GDT_LIMIT +       \
+                                   X86_GDT_FLAG_LONG + X86_GDT_ACC_PR + \
+                                   X86_GDT_ACC_SEG + X86_GDT_ACC_EX +   \
+                                   X86_GDT_ACC_USER)
+#define X86_GDT_DATAUSER_ENTRY    (X86_GDT_BASE + X86_GDT_LIMIT +     \
+                                   X86_GDT_ACC_PR + X86_GDT_ACC_SEG + \
+                                   X86_GDT_ACC_WR + X86_GDT_ACC_USER)
 
 /* CR0 Definitions */
 
@@ -213,9 +238,20 @@
 
 /* MSR Definitions */
 
+#define MSR_STAR                0xc0000081
+#define   MSR_STAR_CSSYSCALL(x) (((uint64_t)x) << 32)
+#define   MSR_STAR_CSSYSRET(x)  (((uint64_t)x) << 48)
+
+#define MSR_LSTAR               0xc0000082 /* Target RIP for PM64 callers */
+#define MSR_CSTAR               0xc0000083 /* Target RIP for CM callers */
+#define MSR_FMASK               0xc0000084 /* RFLAGS mask for SYSCALL */
+
 #define MSR_FS_BASE             0xc0000100 /* 64bit FS base */
+#define MSR_GS_BASE             0xc0000101 /* 64bit GS base */
+#define MSR_KERNELGS_BASE       0xc0000102 /* kernel GS base (for SWAPGS) */
 
 #define MSR_EFER                0xc0000080
+#  define EFER_SCE              0x00000001
 #  define EFER_LME              0x00000100
 
 #define MSR_MTRR_DEF_TYPE       0x000002ff
@@ -508,6 +544,7 @@ void x86_64_check_and_enable_capability(void);
 extern void __enable_sse_avx(void);
 extern void __revoke_low_memory(void);
 extern void __enable_pcid(void);
+extern void x86_64_syscall_entry(void);
 
 #ifdef __cplusplus
 #define EXTERN extern "C"
diff --git a/arch/x86_64/include/intel64/irq.h 
b/arch/x86_64/include/intel64/irq.h
index 03764c4418..7dcb0e102e 100644
--- a/arch/x86_64/include/intel64/irq.h
+++ b/arch/x86_64/include/intel64/irq.h
@@ -499,6 +499,15 @@ enum ioapic_trigger_mode
   TRIGGER_LEVEL_ACTIVE_LOW = (1 << 15) | (1 << 13),
 };
 
+/* This structure represents the return state from a system call */
+
+#ifdef CONFIG_LIB_SYSCALL
+struct xcpt_syscall_s
+{
+  uintptr_t sysreturn;   /* The return address */
+};
+#endif
+
 /* This struct defines the way the registers are stored */
 
 struct xcptcontext
@@ -514,6 +523,15 @@ struct xcptcontext
   /* Register save area - allocated from stack in up_initial_state() */
 
   uint64_t *regs;
+
+#ifdef CONFIG_LIB_SYSCALL
+  /* The following array holds information needed to return from each nested
+   * system call.
+   */
+
+  uint8_t nsyscalls;
+  struct xcpt_syscall_s syscall[CONFIG_SYS_NNEST];
+#endif
 };
 #endif
 
diff --git a/arch/x86_64/include/irq.h b/arch/x86_64/include/irq.h
index 13cd9ead59..92024a910f 100644
--- a/arch/x86_64/include/irq.h
+++ b/arch/x86_64/include/irq.h
@@ -45,6 +45,13 @@
 #  include <arch/intel64/irq.h>
 #endif
 
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define X86_64_CPUPRIV_USTACK_OFFSET  (16)
+#define X86_64_CPUPRIV_UVBASE_OFFSET  (24)
+
 /****************************************************************************
  * Public Data
  ****************************************************************************/
@@ -65,6 +72,16 @@ struct intel64_cpu_s
  */
 
   uint64_t *current_regs;
+
+#ifdef CONFIG_LIB_SYSCALL
+  /* Current user RSP for syscall */
+
+  uint64_t *ustack;
+
+  /* Userspace virtual address */
+
+  uint64_t *uvbase;
+#endif
 };
 
 /****************************************************************************
diff --git a/arch/x86_64/include/syscall.h b/arch/x86_64/include/syscall.h
index 6e35fc0bb9..5e27629991 100644
--- a/arch/x86_64/include/syscall.h
+++ b/arch/x86_64/include/syscall.h
@@ -36,6 +36,10 @@
  * Pre-processor Definitions
  ****************************************************************************/
 
+/* Configuration ************************************************************/
+
+#define CONFIG_SYS_RESERVED 0
+
 /****************************************************************************
  * Public Types
  ****************************************************************************/
@@ -57,15 +61,6 @@ extern "C"
 #define EXTERN extern
 #endif
 
-void enable_syscall(void);
-void syscall_entry(void);
-uint64_t syscall_handler(unsigned long nbr, uintptr_t parm1, uintptr_t parm2,
-                         uintptr_t parm3, uintptr_t parm4, uintptr_t parm5,
-                         uintptr_t parm6);
-uint64_t linux_interface(unsigned long nbr, uintptr_t parm1, uintptr_t parm2,
-                         uintptr_t parm3, uintptr_t parm4, uintptr_t parm5,
-                         uintptr_t parm6);
-
 /* SWI with SYS_ call number and six parameters */
 
 static inline uintptr_t sys_call6(unsigned int nbr, uintptr_t parm1,
@@ -126,24 +121,33 @@ static inline uintptr_t sys_call6(unsigned int nbr, 
uintptr_t parm1,
                                   uintptr_t parm4, uintptr_t parm5,
                                   uintptr_t parm6)
 {
-  register uint64_t reg0 __asm__("rax") = (uint64_t)(nbr);
-  register uint64_t reg1 __asm__("rdi") = (uint64_t)(parm1);
-  register uint64_t reg2 __asm__("rsi") = (uint64_t)(parm2);
-  register uint64_t reg3 __asm__("rdx") = (uint64_t)(parm3);
-  register uint64_t reg4 __asm__("r10") = (uint64_t)(parm4);
-  register uint64_t reg5 __asm__("r8") = (uint64_t)(parm5);
-  register uint64_t reg6 __asm__("r9") = (uint64_t)(parm6);
-
-  __asm__ __volatile__
+  uint64_t ret;
+
+  /* Registers modified by syscall instruction:
+   *   RCX = RIP
+   *   R11 = RFLAGS
+   *   RIP = IA32_LSTAR (x86_64_syscall_entry)
+   */
+
+  __asm__ volatile
   (
-    "syscall"
-    : "=r"(reg0)
-    : "r"(reg0), "r"(reg1), "r"(reg2),
-      "r"(reg3), "r"(reg4), "r"(reg5), "r"(reg6)
-    : "memory"
+    "movq %1, %%rax\n"
+    "movq %2, %%rdi\n"
+    "movq %3, %%rsi\n"
+    "movq %4, %%rdx\n"
+    "movq %5, %%r10\n"
+    "movq %6, %%r8\n"
+    "movq %7, %%r9\n"
+    "syscall\n"
+    "movq %%rax, %0\n"
+    : "=r"(ret)
+    : "rm"(nbr), "rm"(parm1), "rm"(parm2),
+      "rm"(parm3), "rm"(parm4), "rm"(parm5),
+      "rm"(parm6)
+    : "memory", "rcx", "r11"
   );
 
-  return reg0;
+  return ret;
 }
 
 #undef EXTERN
diff --git a/arch/x86_64/src/common/CMakeLists.txt 
b/arch/x86_64/src/common/CMakeLists.txt
index 37d8dc1f3a..8e11fb3fa3 100644
--- a/arch/x86_64/src/common/CMakeLists.txt
+++ b/arch/x86_64/src/common/CMakeLists.txt
@@ -35,6 +35,10 @@ if(CONFIG_ARCH_HAVE_FORK)
   list(APPEND SRCS x86_64_fork.c fork.S)
 endif()
 
+if(CONFIG_LIB_SYSCALL)
+  list(APPEND SRCS x86_64_syscall.c)
+endif()
+
 if(CONFIG_PCI)
   list(APPEND SRCS x86_64_pci.c)
 endif()
diff --git a/arch/x86_64/src/common/Make.defs b/arch/x86_64/src/common/Make.defs
index 52c74c6ba4..a13057704e 100644
--- a/arch/x86_64/src/common/Make.defs
+++ b/arch/x86_64/src/common/Make.defs
@@ -30,6 +30,10 @@ CMN_CSRCS += x86_64_fork.c
 CMN_ASRCS += fork.S
 endif
 
+ifeq ($(CONFIG_LIB_SYSCALL),y)
+CMN_CSRCS += x86_64_syscall.c
+endif
+
 ifeq ($(CONFIG_PCI),y)
 CMN_CSRCS += x86_64_pci.c
 endif
diff --git a/arch/x86_64/src/common/x86_64_internal.h 
b/arch/x86_64/src/common/x86_64_internal.h
index 878fe73b81..b1f972aefc 100644
--- a/arch/x86_64/src/common/x86_64_internal.h
+++ b/arch/x86_64/src/common/x86_64_internal.h
@@ -232,7 +232,7 @@ void x86_64_lowputs(const char *str);
 void x86_64_restore_auxstate(struct tcb_s *rtcb);
 void x86_64_checktasks(void);
 
-void x86_64_syscall(uint64_t *regs);
+uint64_t *x86_64_syscall(uint64_t *regs);
 
 #ifdef CONFIG_ARCH_MULTIBOOT2
 void x86_64_mb2_fbinitialize(struct multiboot_tag_framebuffer *tag);
diff --git a/arch/x86_64/src/common/x86_64_syscall.c 
b/arch/x86_64/src/common/x86_64_syscall.c
new file mode 100644
index 0000000000..5e52b5347c
--- /dev/null
+++ b/arch/x86_64/src/common/x86_64_syscall.c
@@ -0,0 +1,163 @@
+/****************************************************************************
+ * arch/x86_64/src/common/x86_64_syscall.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.  The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <nuttx/config.h>
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include <debug.h>
+#include <syscall.h>
+
+#include <nuttx/addrenv.h>
+#include <nuttx/arch.h>
+#include <nuttx/sched.h>
+#include <nuttx/addrenv.h>
+
+#include "x86_64_internal.h"
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+/* Syscall function */
+
+typedef uintptr_t (*syscall_stub_t)(int nbr,
+                                    uintptr_t parm1, uintptr_t parm2,
+                                    uintptr_t parm3, uintptr_t parm4,
+                                    uintptr_t parm5, uintptr_t parm6);
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+/****************************************************************************
+ * Name: dump_syscall
+ *
+ * Description:
+ *   Dump the syscall registers
+ *
+ ****************************************************************************/
+
+static void dump_syscall(const char *tag, uint64_t *regs)
+{
+  unsigned int cmd = regs[REG_RAX];
+
+#ifdef CONFIG_LIB_SYSCALL
+  if (cmd >= CONFIG_SYS_RESERVED)
+    {
+      svcinfo("SYSCALL %s: cmd: %d name: %s\n", tag,
+              cmd, g_funcnames[cmd - CONFIG_SYS_RESERVED]);
+    }
+  else
+#endif
+    {
+      svcinfo("SYSCALL %s: cmd: %d\n", tag, cmd);
+    }
+
+  svcinfo("  RSP: %" PRIx64 " RCX: %" PRIx64 "\n",
+          regs[REG_RSP], regs[REG_RCX]);
+}
+
+/****************************************************************************
+ * Public Functions
+ ****************************************************************************/
+
+/****************************************************************************
+ * Name: x86_64_syscall
+ *
+ * Description:
+ *   Syscall handler called from x86_64_syscall_entry().
+ *   Current registers stored in regs argument.
+ *   The syscall is called with:
+ *
+ *     - RAX = system call command, and
+ *     - RDI, RSI, RDX, R10, R8, R9 = variable number of arguments depending
+ *       on the system call.
+ *
+ ****************************************************************************/
+
+uint64_t *x86_64_syscall(uint64_t *regs)
+{
+  unsigned int cmd  = regs[REG_RAX];
+  uint64_t     arg1 = regs[REG_RDI];
+  uint64_t     arg2 = regs[REG_RSI];
+  uint64_t     arg3 = regs[REG_RDX];
+  uint64_t     arg4 = regs[REG_R10];
+  uint64_t     arg5 = regs[REG_R8];
+  uint64_t     arg6 = regs[REG_R9];
+  uintptr_t    ret  = 0;
+
+  /* The syscall command is in RAX on entry */
+
+  dump_syscall("Entry", regs);
+
+  /* Handle the syscall according to the command in RAX */
+
+  switch (cmd)
+    {
+      /* This is not an architecture-specific system call.  If NuttX is
+       * built as a standalone kernel with a system call interface, then
+       * all of the additional system calls must be handled as in the
+       * default case.
+       */
+
+      default:
+        {
+          int             nbr  = cmd - CONFIG_SYS_RESERVED;
+          struct tcb_s   *rtcb = nxsched_self();
+          syscall_stub_t  stub = (syscall_stub_t)g_stublookup[nbr];
+
+          DEBUGASSERT(nbr < SYS_nsyscalls);
+          DEBUGASSERT(rtcb->xcp.nsyscalls < CONFIG_SYS_NNEST);
+
+          /* Setup nested syscall */
+
+          rtcb->xcp.syscall[rtcb->xcp.nsyscalls].sysreturn = regs[REG_RCX];
+          rtcb->xcp.nsyscalls += 1;
+
+          /* Call syscall function */
+
+          ret = stub(nbr, arg1, arg2, arg3, arg4, arg5, arg6);
+
+          /* Setup return from nested syscall */
+
+          rtcb->xcp.nsyscalls -= 1;
+          regs[REG_RCX] = rtcb->xcp.syscall[rtcb->xcp.nsyscalls].sysreturn;
+
+          break;
+        }
+    }
+
+  dump_syscall("Exit", regs);
+
+  /* Store return value in RAX register */
+
+  regs[REG_RAX] = ret;
+
+  /* Return pointer to regs */
+
+  return regs;
+}
diff --git a/arch/x86_64/src/intel64/intel64_cpu.c 
b/arch/x86_64/src/intel64/intel64_cpu.c
index 30fcd95f32..2a6565d984 100644
--- a/arch/x86_64/src/intel64/intel64_cpu.c
+++ b/arch/x86_64/src/intel64/intel64_cpu.c
@@ -242,6 +242,10 @@ void x86_64_cpu_init(void)
           g_cpu_priv[i].loapic_id = lapic->apic_id;
           g_cpu_priv[i].id        = i;
           g_cpu_priv[i].ready     = false;
+#ifdef CONFIG_LIB_SYSCALL
+          g_cpu_priv[i].ustack    = NULL;
+          g_cpu_priv[i].uvbase    = (uint64_t *)CONFIG_ARCH_TEXT_VBASE;
+#endif
 
           /* Store private CPU in TSS */
 
@@ -380,4 +384,40 @@ void x86_64_cpu_priv_set(uint8_t cpu)
   /* Store private data pointer to GSBASE */
 
   write_gsbase((uintptr_t)&g_cpu_priv[cpu]);
+
+#ifdef CONFIG_LIB_SYSCALL
+  /* Configure SYSCALL instruction entry point */
+
+  write_msr(MSR_LSTAR, (uintptr_t)x86_64_syscall_entry);
+
+  /* Configure CS selection for SYSCALL (kernel) and SYSRET (userspace).
+   *
+   * Segment selection for SYSCALL works like this:
+   *
+   *          CS.Selector = IA32_STAR[47:32]
+   *          SS.Selector := IA32_STAR[47:32] + 8
+   *
+   *       This require that we have to fill GDT with kernel code segment
+   *       first and after that we can put kernel data segment.
+   *
+   * Segment selection for SYSRET has a really weird setup for 64-bit
+   * operand size:
+   *
+   *           CS.Selector = IA32_STAR[63:48]+16
+   *           SS.Selector = IA32_STAR[63:48]+8
+   *
+   *       This require that we have to fill GDT with user data segment
+   *       first and after that we can put user code segment (differently
+   *       than for kernel segments). Then this instruction needs to
+   *       set CS segment for SYSRET at (USERDATA_SEL - 8) to work
+   *       correctly.
+   */
+
+  write_msr(MSR_STAR, MSR_STAR_CSSYSCALL(X86_GDT_CODE_SEL) |
+            MSR_STAR_CSSYSRET(X86_GDT_USERDATA_SEL - 8));
+
+  /* Mask applied to RFLAGS when making a syscall */
+
+  write_msr(MSR_FMASK, X86_64_RFLAGS_IF | X86_64_RFLAGS_DF);
+#endif
 }
diff --git a/arch/x86_64/src/intel64/intel64_fullcontextrestore.S 
b/arch/x86_64/src/intel64/intel64_fullcontextrestore.S
index 55b1ee871d..79b9d64308 100644
--- a/arch/x86_64/src/intel64/intel64_fullcontextrestore.S
+++ b/arch/x86_64/src/intel64/intel64_fullcontextrestore.S
@@ -130,9 +130,12 @@ x86_64_fullcontextrestore:
 
        movq    (8*REG_RAX)(%rdi), %rax
 
-       /* Restore the correct value of EAX and then return */
+       /* Restore the correct value of RDI */
 
        popq    %rdi
+
+       /* Pops 5 things at once: RIP, CS, RFLAGS RSP and SS */
+
        iretq
        .size x86_64_fullcontextrestore, . - x86_64_fullcontextrestore
        .end
diff --git a/arch/x86_64/src/intel64/intel64_head.S 
b/arch/x86_64/src/intel64/intel64_head.S
index 144b97f68f..d07bcdd482 100644
--- a/arch/x86_64/src/intel64/intel64_head.S
+++ b/arch/x86_64/src/intel64/intel64_head.S
@@ -53,6 +53,12 @@
 #  define X86_CR4_FPU_VAL (X86_CR4_OSXFSR | X86_CR4_XMMEXCPT)
 #endif
 
+/* XSAVE header data offset */
+
+#define X86_XSAVE_XSTATEBV_OFFSET  512
+#define X86_XSAVE_XCOMPBC_OFFSET   520
+#define X86_XSAVE_RESERVED0_OFFSET 528
+
 /* Memory Map: _sbss is the start of the BSS region (see ld.script) _ebss is
  * the end of the BSS region (see ld.script). The idle task stack starts at
  * the end of BSS and is of size CONFIG_IDLETHREAD_STACKSIZE.  The IDLE thread
@@ -72,6 +78,10 @@
        .global    __enable_sse_avx
        .global    __enable_pcid
        .global    __revoke_low_memory
+#ifdef CONFIG_LIB_SYSCALL
+       .global    x86_64_syscall_entry
+       .global    x86_64_syscall
+#endif
        .global    __nxstart                       /* __nxstart is defined 
elsewhere */
        .global    nx_start                        /* nx_start is defined 
elsewhere */
        .global    x86_64_ap_boot                  /* x86_64_ap_boot is defined 
elsewhere */
@@ -336,7 +346,11 @@ start64_init:
 
        movl    $MSR_EFER,  %ecx
        rdmsr
+#ifdef CONFIG_LIB_SYSCALL
+       or      $(EFER_LME | EFER_SCE),  %eax
+#else
        or      $EFER_LME,  %eax
+#endif
        wrmsr
 
        /* Enable paging related bits in CR0 */
@@ -520,6 +534,126 @@ __enable_pcid:
 
        .size   __enable_pcid, . - __enable_pcid
 
+#ifdef CONFIG_LIB_SYSCALL
+  /****************************************************************************
+  * Name: x86_64_syscall_entry
+  *
+  * Description:
+  *   Landing point for syscall instruction.
+  *
+  *   At this point RFLAGS are masked with MSR_FMASK
+  *     - IF=0 (interrupts are disabled)
+  *     - DF=0
+  *
+  ****************************************************************************/
+
+       .type   x86_64_syscall_entry, @function
+x86_64_syscall_entry:
+       /* Store current RSP on CPU private data first */
+       movq   %rsp, %gs:X86_64_CPUPRIV_USTACK_OFFSET
+
+       /* Store return address - we need free register to store
+        * CPU context but at this point we don't have any.
+        */
+       pushq   %rcx
+
+       /* Get aligned registers area */
+       movq   %rsp, %rcx
+       sub    $8, %rcx
+       sub    $((XCPTCONTEXT_REGS + 8) * 8), %rcx
+       add    $(0x3f), %rcx
+       and    $(0xffffffffffffffc0), %rcx
+
+       /* Syscall arguments */
+       movq    %rax, (8*REG_RAX)(%rcx)
+       movq    %rdi, (8*REG_RDI)(%rcx)
+       movq    %rsi, (8*REG_RSI)(%rcx)
+       movq    %rdx, (8*REG_RDX)(%rcx)
+       movq    %r10, (8*REG_R10)(%rcx)
+       movq    %r8,  (8*REG_R8)(%rcx)
+       movq    %r9,  (8*REG_R9)(%rcx)
+
+       /* Callee registers */
+       movq    %rbx, (8*REG_RBX)(%rcx)
+       movq    %r11, (8*REG_R11)(%rcx)
+       movq    %r12, (8*REG_R12)(%rcx)
+       movq    %r13, (8*REG_R13)(%rcx)
+       movq    %r14, (8*REG_R14)(%rcx)
+       movq    %r15, (8*REG_R15)(%rcx)
+       movq    %rbp, (8*REG_RBP)(%rcx)
+
+#ifndef CONFIG_ARCH_X86_64_HAVE_XSAVE
+       /* Save xmm registers */
+       fxsaveq (%rcx)
+#else
+#  ifdef CONFIG_ARCH_CHIP_INTEL64_QEMU
+       /* BUGFIX for QEMU: make sure that xsave header is zeroed!
+        * QEMU desn't clear these fields during xsave, so if the memory region
+        * for xsave state was not cleared before use, there may be junk data 
there,
+        * that cause xrstor to crash later.
+        */
+       movq    $0, (X86_XSAVE_XSTATEBV_OFFSET)(%rcx)
+       movq    $0, (X86_XSAVE_XCOMPBC_OFFSET)(%rcx)
+       movq    $0, (X86_XSAVE_RESERVED0_OFFSET)(%rcx)
+#  endif
+
+       movl    $XSAVE_STATE_COMPONENTS, %eax
+       xor     %edx, %edx
+       xsave   (%rcx)
+#endif
+
+       /* Save RCX */
+       popq    (8*REG_RCX)(%rcx)
+
+       /* Store user stack pointer */
+       pushq   %gs:X86_64_CPUPRIV_USTACK_OFFSET
+       popq    (8*REG_RSP)(%rcx)
+
+       /* Move stack pointer after registers area */
+       movq    %rcx, %rsp
+
+       /* Return value from syscall stored in rax */
+       movq    %rcx, %rdi
+       call    x86_64_syscall
+       movq    %rax, %rdi
+
+#ifndef CONFIG_ARCH_X86_64_HAVE_XSAVE
+       /* Restore xmm registers */
+       fxrstorq (%rdi)
+#else
+       movl    $XSAVE_STATE_COMPONENTS, %eax
+       xor     %edx, %edx
+       xrstor  (%rdi)
+#endif
+
+       /* Restore original user RSP */
+       movq    (8*REG_RSP)(%rdi), %rsp
+
+       /* Restore CPU registers - regs are on RDI now */
+       movq    (8*REG_RBX)(%rdi), %rbx
+       movq    (8*REG_R11)(%rdi), %r11
+       movq    (8*REG_R12)(%rdi), %r12
+       movq    (8*REG_R13)(%rdi), %r13
+       movq    (8*REG_R14)(%rdi), %r14
+       movq    (8*REG_R15)(%rdi), %r15
+       movq    (8*REG_RBP)(%rdi), %rbp
+
+       movq    (8*REG_RAX)(%rdi), %rax
+       movq    (8*REG_RSI)(%rdi), %rsi
+       movq    (8*REG_RDX)(%rdi), %rdx
+       movq    (8*REG_R10)(%rdi), %r10
+       movq    (8*REG_RCX)(%rdi), %rcx
+
+       movq    (8*REG_R11)(%rdi), %r11
+       movq    (8*REG_RDI)(%rdi), %rdi
+
+       /* Return to address pointed in RCX - must be on stack */
+       pushq   %rcx
+       ret
+
+       .size   x86_64_syscall_entry, . - x86_64_syscall_entry
+#endif
+
 /****************************************************************************
  * .data
  ****************************************************************************/
@@ -537,12 +671,18 @@ g_ist64_low:
        /* GDT for 64 bit long mode */
        .align(16)
 g_gdt64_low:
-       .quad   0
-       .quad   X86_GDT_CODE64_ENTRY
-       .quad   X86_GDT_DATA_ENTRY
-       .quad   X86_GDT_CODE32_ENTRY
-       .quad   X86_GDT_DATA_ENTRY
-       .quad   X86_GDT_CODE64_ENTRY
+       /* Must be in sequence required by SYSCALL and SYSRET.
+        * For details look at the comment in intel64_cpu.c about MSR_STAT 
write.
+        */
+       .quad   0                      /* 0x00: null descriptor */
+       .quad   X86_GDT_CODE64_ENTRY   /* 0x08: kernel code 64 */
+       .quad   X86_GDT_DATA_ENTRY     /* 0x10: kernel data */
+       .quad   X86_GDT_CODE32_ENTRY   /* 0x18: kernel code 32 */
+       .quad   X86_GDT_DATA_ENTRY     /* 0x20: kernel data */
+       .quad   X86_GDT_CODE64_ENTRY   /* 0x28: kernel code 64 */
+       .quad   X86_GDT_DATAUSER_ENTRY /* 0x30: user data */
+       .quad   X86_GDT_CODEUSER_ENTRY /* 0x38: user code */
+
 g_gdt64_ist_low:
        /* TSS segment low + segment high per CPU */
        .fill   CONFIG_SMP_NCPUS * 16, 1, 0
diff --git a/arch/x86_64/src/intel64/intel64_saveusercontext.S 
b/arch/x86_64/src/intel64/intel64_saveusercontext.S
index dd3c87ce6e..e0bd64eea4 100644
--- a/arch/x86_64/src/intel64/intel64_saveusercontext.S
+++ b/arch/x86_64/src/intel64/intel64_saveusercontext.S
@@ -103,6 +103,15 @@ up_saveusercontext:
 
        movq    %rbp, (8*REG_RBP)(%rdi)
 
+#ifdef CONFIG_LIB_SYSCALL
+       /* Save CS and SS if we support syscalls */
+       xor     %rax, %rax
+       mov     %cs, %ax
+       movq    %rax, (8*REG_CS)(%rdi)
+       mov     %ss, %ax
+       movq    %rax, (8*REG_SS)(%rdi)
+#endif
+
        /* Save EAX=1.  This will be the "apparent" return value from this
         * function when context is switch back to this thread.  The non-zero
         * return value is the indication that we have been resumed.
diff --git a/arch/x86_64/src/intel64/intel64_start.c 
b/arch/x86_64/src/intel64/intel64_start.c
index fdd18b15eb..e176801271 100644
--- a/arch/x86_64/src/intel64/intel64_start.c
+++ b/arch/x86_64/src/intel64/intel64_start.c
@@ -198,10 +198,6 @@ void __nxstart(void)
 
   x86_64_timer_calibrate_freq();
 
-#ifdef CONFIG_LIB_SYSCALL
-  enable_syscall();
-#endif
-
   /* Store CPU IDs */
 
   x86_64_cpu_priv_set(0);
diff --git a/arch/x86_64/src/intel64/intel64_vectors.S 
b/arch/x86_64/src/intel64/intel64_vectors.S
index 5317374572..dbd95432b2 100644
--- a/arch/x86_64/src/intel64/intel64_vectors.S
+++ b/arch/x86_64/src/intel64/intel64_vectors.S
@@ -923,7 +923,7 @@ irq_common:
 
        add     $8, %rsp
 
-       iretq                /* Pops 5 things at once: CS, RIP, RFLAGS and SS 
and RSP */
+       iretq                /* Pops 5 things at once: RIP, CS, RFLAGS RSP and 
SS */
        .size   irq_common, . - irq_common
        .end
 

Reply via email to