This patch can achieve stack switching for each thread on SYSCALL
instruction because each thread, which is created through class thread, has
two stacks: _stack and _sys_stack.

_sys_stack is a newly created stack in this patch and used by its
unique owner, a thread, during execution of system calls.
Without execution of system call, _stack, which is the application
stack, is used.

Fixes #808.

Signed-off-by: HawxChen <yingjheng.c...@gmail.com>
---
 arch/x64/arch-setup.cc  |  3 +++
 arch/x64/arch-switch.hh | 23 +++++++++++++++++++++++
 arch/x64/arch-tls.hh    |  5 +++++
 arch/x64/entry.S        |  8 ++++++--
 core/sched.cc           |  4 ++++
 include/osv/sched.hh    | 10 ++++++++++
 6 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/arch/x64/arch-setup.cc b/arch/x64/arch-setup.cc
index 325c26a..f7f67f2 100644
--- a/arch/x64/arch-setup.cc
+++ b/arch/x64/arch-setup.cc
@@ -227,6 +227,9 @@ static const int IA_32_STAR_SYSCALL_SHIFT = 32;
 namespace processor {
 void init_syscall() {
     unsigned long cs = gdt_cs;
+
+    //It is obvious that there is no mode switching between syscall and user.
+    //In short, their addressing space will stay the same.
     processor::wrmsr(msr::IA32_STAR,  (cs << CS_SELECTOR_SHIFT) << 
IA_32_STAR_SYSCALL_SHIFT);
     // lstar is where syscall set rip so we set it to syscall_entry
     processor::wrmsr(msr::IA32_LSTAR, 
reinterpret_cast<uint64_t>(syscall_entry));
diff --git a/arch/x64/arch-switch.hh b/arch/x64/arch-switch.hh
index d1a039a..6a87ff3 100644
--- a/arch/x64/arch-switch.hh
+++ b/arch/x64/arch-switch.hh
@@ -99,6 +99,18 @@ void thread::switch_to_first()
            "r10", "r11", "r12", "r13", "r14", "r15", "memory");
 }
 
+void thread::init_sys_stack() {
+    auto& stack = _attr._sys_stack;
+
+    if(!stack.size) {
+        stack.size = PAGE_SIZE;
+    }
+
+    if(!stack.begin) {
+        stack.begin = malloc(stack.size);
+    }
+}
+
 void thread::init_stack()
 {
     auto& stack = _attr._stack;
@@ -116,10 +128,15 @@ void thread::init_stack()
     _state.exception_stack = _arch.exception_stack + 
sizeof(_arch.exception_stack);
 }
 
+void thread::setup_tcb_stack() {
+    _tcb->stack_addr = static_cast<void*>  
((static_cast<void*>(_attr._sys_stack.begin)) + _attr._sys_stack.size);
+}
 void thread::setup_tcb()
 {
+    //_tcb's stack will be configured in thread::setup_tcb_stack();
     assert(tls.size);
 
+
     void* user_tls_data;
     size_t user_tls_size = 0;
     if (_app_runtime) {
@@ -148,6 +165,12 @@ void thread::setup_tcb()
     _tcb->tls_base = p + user_tls_size;
 }
 
+void thread::free_sys_stack()
+{
+    assert(_attr._sys_stack.begin);
+    free(_attr._sys_stack.begin);
+}
+
 void thread::free_tcb()
 {
     if (_app_runtime) {
diff --git a/arch/x64/arch-tls.hh b/arch/x64/arch-tls.hh
index 1bf86fd..d6a517c 100644
--- a/arch/x64/arch-tls.hh
+++ b/arch/x64/arch-tls.hh
@@ -8,9 +8,14 @@
 #ifndef ARCH_TLS_HH
 #define ARCH_TLS_HH
 
+//Don't change the declaration sequence of all existing members'.
+//Please add new members from the last.
 struct thread_control_block {
     thread_control_block* self;
     void* tls_base;
+    //FIXME: Through linke and TLS, stack_addr could be deleted.
+    //FIXME: The discussion form issue #808 provides details.
+    void* stack_addr; //This field is used in arch/x64/entry.S for %fs's 
offset.
 };
 
 #endif /* ARCH_TLS_HH */
diff --git a/arch/x64/entry.S b/arch/x64/entry.S
index 04d809d..e4dba46 100644
--- a/arch/x64/entry.S
+++ b/arch/x64/entry.S
@@ -172,8 +172,9 @@ syscall_entry:
 
     # Skip the "red zone" allowed by the AMD64 ABI (the caller used a
     # SYSCALL instruction and doesn't know he called a function):
-    subq $128, %rsp
+    //subq $128, %rsp
 
+    xchg %rsp, %fs:16
     # Align the stack to 16 bytes. We align it now because of limitations of
     # the CFI language, but need to ensure it is still aligned before we call
     # syscall_wrapper(), so must ensure that the number of pushes below are
@@ -283,8 +284,11 @@ syscall_entry:
     # pop the stack value in flag register
     popfq
 
+/* If we use the same stack between applicatoin and system call
     #undo red-zone skip without altering restored flags
-    lea 128(%rsp), %rsp
+    //lea 128(%rsp), %rsp
+*/
+    xchg %rsp, %fs:16
 
     # jump to rcx where the syscall instruction put rip
     # (sysret would leave rxc cloberred so we have nothing to do to restore it)
diff --git a/core/sched.cc b/core/sched.cc
index e65aa94..3b3cb8d 100644
--- a/core/sched.cc
+++ b/core/sched.cc
@@ -974,7 +974,10 @@ thread::thread(std::function<void ()> func, attr attr, 
bool main, bool app)
     if (!main && sched::s_current) {
         remote_thread_local_var(s_current) = this;
     }
+
     init_stack();
+    init_sys_stack();
+    setup_tcb_stack();
 
     if (_attr._detached) {
         _detach_state.store(detach_state::detached);
@@ -1058,6 +1061,7 @@ thread::~thread()
         delete[] _tls[i];
     }
     free_tcb();
+    free_sys_stack();
     rcu_dispose(_detached_state.release());
 }
 
diff --git a/include/osv/sched.hh b/include/osv/sched.hh
index dada8f5..fedd0d4 100644
--- a/include/osv/sched.hh
+++ b/include/osv/sched.hh
@@ -328,6 +328,11 @@ class thread : private timer_base::client {
 private:
     struct detached_state;
 public:
+    struct sys_stack_info {
+        sys_stack_info() : begin(nullptr), size(PAGE_SIZE) { }
+        void* begin;
+        size_t size;
+    };
     struct stack_info {
         stack_info();
         stack_info(void* begin, size_t size);
@@ -338,6 +343,8 @@ public:
     };
     struct attr {
         stack_info _stack;
+        sys_stack_info _sys_stack;
+
         cpu *_pinned_cpu;
         bool _detached;
         std::array<char, 16> _name = {};
@@ -602,9 +609,12 @@ private:
     void prepare_wait();
     void wait();
     void stop_wait();
+    void init_sys_stack();
     void init_stack();
     void setup_tcb();
+    void setup_tcb_stack();
     void free_tcb();
+    void free_sys_stack();
     void complete() __attribute__((__noreturn__));
     template <class Action>
     inline void do_wake_with(Action action, unsigned 
allowed_initial_states_mask);
-- 
2.7.4

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Reply via email to