This patch changes exception handling mechanism to use dedicated
exception stack instead of the default stack provided for kernel and
application threads. This is critical to support Golang apps which
are known to use tiny stacks in coroutines and exception handler of
svc instruction cannnot use single byte of the application stack in such
case. Having separate exception stack has other benefits for debugging
and will allow future implementation of "lazy" stacks. This also makes
aarch64 port similar to x64 where we use dedicated stacks as well.

To support dedicated stacks, we take advantage of the fact that at every
exception level but EL0 there are two stack registers available -
SP_ELx and SP_EL0. OSv runs at the exception level EL1 and in boot.S
selects EP_EL1 to be used by default. The SP effectively is an alias to
one of the two stack registers and can be changed by setting the system
register SPSel (stack selector).

This patch changes all exception handlers (both synchrounous and
asynchronous (interrupts)) in entry.S to switch to the new exception
stack before pushing a frame by setting the SPSel to #0 which makes
SP point to SP_EL0. We have to switch to SP_EL0 even in the case of the
nested exception when we are on SP_EL0 as per ARM specification the SP
is always reset to SP_ELx (in our case SP_EL1) after taking an
exception. The typical case of nested exception is handling of a page
fault where we enable exceptions downstream in the page fault handler
(arch/aarch64/mmu.cc) and it may be interrupted by an asynchronous
exception like a timer one. To that end we also add the exception
handlers for curr_el_sp0 which system invokes when code is running
with SP pointing to SP_EL0.

Finally, we also change the context switch code in sched.S to make
it save not only default stack register but explicitly save
SP_EL0 and SP_EL1 and SPSel for old thread and then restore those
from arch_thread_state for new thread. This makes context switch
slightly more expensive and has been measured to add around 5% of
overhead.

This patch effectively enhances OSv to allow runing Golang apps on
AArch64.

Fixes #1155

Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com>
---
 arch/aarch64/arch-cpu.hh          |  1 +
 arch/aarch64/arch-switch.hh       | 12 +++--
 arch/aarch64/arch-thread-state.hh |  3 ++
 arch/aarch64/entry.S              | 73 +++++++++++++++++++++----------
 arch/aarch64/sched.S              | 18 +++++++-
 5 files changed, 78 insertions(+), 29 deletions(-)

diff --git a/arch/aarch64/arch-cpu.hh b/arch/aarch64/arch-cpu.hh
index 15edbdaa..8848d880 100644
--- a/arch/aarch64/arch-cpu.hh
+++ b/arch/aarch64/arch-cpu.hh
@@ -33,6 +33,7 @@ struct arch_cpu {
 };
 
 struct arch_thread {
+    char exception_stack[4096*4] __attribute__((aligned(16)));
 };
 
 struct arch_fpu {
diff --git a/arch/aarch64/arch-switch.hh b/arch/aarch64/arch-switch.hh
index 0401a4b8..c8848605 100644
--- a/arch/aarch64/arch-switch.hh
+++ b/arch/aarch64/arch-switch.hh
@@ -33,13 +33,15 @@ void thread::switch_to_first()
     remote_thread_local_var(percpu_base) = _detached_state->_cpu->percpu_base;
 
     asm volatile("\n"
-                 "ldp x29, x0, %2  \n"
-                 "ldp x22, x21, %3 \n"
+                 "ldp x29, x0, %3  \n"
+                 "ldp x22, x21, %4 \n"
                  "mov sp, x22      \n"
+                 "ldr x22, %5      \n"
+                 "msr sp_el0, x22  \n"
                  "blr x21          \n"
                  : // No output operands - this is to designate the input 
operands as earlyclobbers
-                   "=&Ump"(this->_state.fp), "=&Ump"(this->_state.sp)
-                 : "Ump"(this->_state.fp), "Ump"(this->_state.sp)
+                "=&Ump"(this->_state.fp), "=&Ump"(this->_state.sp), 
"=&Ump"(this->_state.exception_sp)
+                 : "Ump"(this->_state.fp), "Ump"(this->_state.sp), 
"Ump"(this->_state.exception_sp)
                  : "x0", "x19", "x20", "x21", "x22", "x23", "x24",
                    "x25", "x26", "x27", "x28", "x30", "memory");
 }
@@ -59,6 +61,8 @@ void thread::init_stack()
     _state.thread = this;
     _state.sp = stacktop;
     _state.pc = reinterpret_cast<void*>(thread_main);
+    _state.exception_sp = _arch.exception_stack + 
sizeof(_arch.exception_stack);
+    _state.stack_selector = 1; //Select SP_ELx
 }
 
 void thread::setup_tcb()
diff --git a/arch/aarch64/arch-thread-state.hh 
b/arch/aarch64/arch-thread-state.hh
index 6f1b680d..f6a27ff2 100644
--- a/arch/aarch64/arch-thread-state.hh
+++ b/arch/aarch64/arch-thread-state.hh
@@ -15,6 +15,9 @@ struct thread_state {
     void* sp;
     void* pc;
     void* tcb;
+
+    void* exception_sp; //SP_EL0
+    u64 stack_selector; //1 - selects SP_ELx (default), 0 - selects SP_EL0 
(exceptions)
 };
 
 #endif /* ARCH_THREAD_STATE_HH_ */
diff --git a/arch/aarch64/entry.S b/arch/aarch64/entry.S
index 25354359..8322ee90 100644
--- a/arch/aarch64/entry.S
+++ b/arch/aarch64/entry.S
@@ -57,9 +57,16 @@ exception_vectors:
         vector_entry lower_el_aarch32 fiq
         vector_entry lower_el_aarch32 serror
 
-/* keep in sync with the struct in exceptions.hh */
-.macro push_state_to_exception_frame
-        sub     sp, sp, #48 // make space for align2, align1+ESR, PSTATE, PC, 
SP
+/* keep in sync with the struct in exceptions.hh
+   the switch argument (1 or 0) indicates if we would be switching from
+   SP_ELx -> SP_EL0 (1) or staying on the same stack - SP_EL0 -> SP_EL0 (0) */
+.macro push_state_to_exception_frame switch
+        // Regardless which stack (pointed by SP_ELx or SP_EL0) was in use when
+        // exception was taken, the stack is always reset to SP_ELx before 
exception
+        // handler is executed. To make sure the exception handler uses the 
exception
+        // stack pointed by SP_EL0 we need to set SPSEL to #0.
+        msr spsel, #0               // switch to exception stack by selecting 
SP_EL0
+        sub     sp, sp, #48         // make space for align2, align1+ESR, 
PSTATE, PC, SP
         .cfi_adjust_cfa_offset 48
         push_pair x28, x29
         push_pair x26, x27
@@ -76,7 +83,13 @@ exception_vectors:
         push_pair x4, x5
         push_pair x2, x3
         push_pair x0, x1
+        .if \switch == 1
+        msr spsel, #1                // switch to regular stack (SP_ELx) for 
brief moment to read it
+        mov     x1, sp               // fetch SP of regular stack (spsel 1)
+        msr spsel, #0                // switch back to exception stack
+        .else
         add     x1, sp, #288         // x1 := old SP (48 + 16 * 15 = 288)
+        .endif
         mrs     x2, elr_el1
         mrs     x3, spsr_el1
         stp     x30, x1, [sp, #240]  // store lr, old SP
@@ -102,6 +115,10 @@ exception_vectors:
         pop_pair x24, x25
         pop_pair x26, x27
         pop_pair x28, x29
+        // please note we do not need to explicitly switch the stack when 
returning
+        // from exception by resetting the stack selector register, as it will
+        // happen automatically based on the value of spsr_el1 which we 
restored above
+        // (the spsr_el1 holds PSTATE and EL and SP selector)
         ldr     x30, [sp], #48
         .cfi_adjust_cfa_offset -48
 .endm /* pop_state_to_exception_frame */
@@ -143,7 +160,7 @@ entry_\level\()_\type:
         .cfi_offset x30, -32 // Point to the elr register located at the -32 
offset
                              // of the exception frame to help gdb link to the
                              // address when interrupt was raised
-        push_state_to_exception_frame
+        push_state_to_exception_frame 1
         mrs     x1, esr_el1
         str     w1, [sp, #272] // Store Exception Syndrom Register in the frame
         mov     x0, sp         // Save exception_frame to x0
@@ -165,8 +182,6 @@ entry_\level\()_\type:
 .equ EX_TYPE_FIQ, 0x2
 .equ EX_TYPE_SERROR, 0x3
 
-entry_unexpected_exception curr_el_sp0, sync, #CURR_EL_SP0, #EX_TYPE_SYNC
-entry_unexpected_exception curr_el_sp0, irq, #CURR_EL_SP0, #EX_TYPE_IRQ
 entry_unexpected_exception curr_el_sp0, fiq, #CURR_EL_SP0, #EX_TYPE_FIQ
 entry_unexpected_exception curr_el_sp0, serror, #CURR_EL_SP0, #EX_TYPE_SERROR
 
@@ -183,38 +198,39 @@ entry_unexpected_exception lower_el_aarch32, irq, 
#LOWER_EL_AARCH32, #EX_TYPE_IR
 entry_unexpected_exception lower_el_aarch32, fiq, #LOWER_EL_AARCH32, 
#EX_TYPE_FIQ
 entry_unexpected_exception lower_el_aarch32, serror, #LOWER_EL_AARCH32, 
#EX_TYPE_SERROR
 
-.global entry_curr_el_spx_sync
-.hidden entry_curr_el_spx_sync
-.type entry_curr_el_spx_sync, @function
-entry_curr_el_spx_sync:
+.macro entry_curr_el_sync stack, switch
+.global entry_curr_el_sp\stack\()_sync
+.hidden entry_curr_el_sp\stack\()_sync
+.type entry_curr_el_sp\stack\()_sync, @function
+entry_curr_el_sp\stack\()_sync:
         .cfi_startproc simple
         .cfi_signal_frame
         .cfi_def_cfa sp, 0
         .cfi_offset x30, -32 // Point to the elr register located at the -32 
offset
                              // of the exception frame to help gdb link to the
                              // address when interrupt was raised
-        push_state_to_exception_frame
+        push_state_to_exception_frame \switch
         mrs     x1, esr_el1
         str     w1, [sp, #272] // Store Exception Syndrom Register in the frame
         ubfm    x2, x1, #ESR_EC_BEG, #ESR_EC_END // Exception Class -> X2
         ubfm    x3, x1, #ESR_FLT_BEG, #ESR_FLT_END // FLT -> X3
         cmp     x2, #ESR_EC_SVC64
-        b.eq    handle_system_call
+        b.eq    handle_system_call_sp\stack
         cmp     x2, #ESR_EC_DATA_ABORT
-        b.eq    handle_mem_abort
+        b.eq    handle_mem_abort_sp\stack
         cmp     x2, #ESR_EC_INSN_ABORT
-        b.ne    unexpected_sync_exception
-handle_mem_abort:
-        cbz     x3, unexpected_sync_exception
+        b.ne    unexpected_sync_exception_sp\stack
+handle_mem_abort_sp\stack:
+        cbz     x3, unexpected_sync_exception_sp\stack
         cmp     x3, #3
-        b.hi    unexpected_sync_exception
+        b.hi    unexpected_sync_exception_sp\stack
 
         mov     x0, sp  // save exception_frame to x0
         bl      page_fault
         pop_state_from_exception_frame
         eret
         .cfi_endproc
-handle_system_call:
+handle_system_call_sp\stack:
         .cfi_startproc
         //see https://man7.org/linux/man-pages/man2/syscall.2.html for details
         //about calling convention for arm64
@@ -237,7 +253,7 @@ handle_system_call:
         pop_state_from_exception_frame
         eret
         .cfi_endproc
-unexpected_sync_exception:
+unexpected_sync_exception_sp\stack:
         .cfi_startproc
         mov     x0, sp  // save exception_frame to x0
         mov     x1, #CURR_EL_SPX
@@ -246,23 +262,32 @@ unexpected_sync_exception:
         pop_state_from_exception_frame
         bl      abort
         .cfi_endproc
+.endm
+
+entry_curr_el_sync 0, 0 // the synchronous exception handler used when the 
SP_EL0 is active
+entry_curr_el_sync x, 1 // the synchronous exception handler used when the 
SP_ELx is active
 
-.global entry_curr_el_spx_irq
-.hidden entry_curr_el_spx_irq
-.type entry_curr_el_spx_irq, @function
-entry_curr_el_spx_irq:
+.macro entry_curr_el_irq stack, switch
+.global entry_curr_el_sp\stack\()_irq
+.hidden entry_curr_el_sp\stack\()_irq
+.type entry_curr_el_sp\stack\()_irq, @function
+entry_curr_el_sp\stack\()_irq:
         .cfi_startproc simple
         .cfi_signal_frame
         .cfi_def_cfa sp, 0
         .cfi_offset x30, -32 // Point to the elr register located at the -32 
offset
                              // of the exception frame to help gdb link to the
                              // address when interrupt was raised
-        push_state_to_exception_frame
+        push_state_to_exception_frame \switch
         mov     x0, sp
         bl      interrupt // extern "C"
         pop_state_from_exception_frame
         eret
         .cfi_endproc
+.endm
+
+entry_curr_el_irq 0, 0 // the asynchronous exception handler used when the 
SP_EL0 is active
+entry_curr_el_irq x, 1 // the asynchronous exception handler used when the 
SP_ELx is active
 
 .global call_signal_handler_thunk
 .hidden call_signal_handler_thunk
diff --git a/arch/aarch64/sched.S b/arch/aarch64/sched.S
index 5949459a..ab049a6f 100644
--- a/arch/aarch64/sched.S
+++ b/arch/aarch64/sched.S
@@ -39,15 +39,31 @@ reschedule_from_interrupt:
         isb
 
         str     x29, [x0, #0]     //Save frame pointer of the old thread
+
+        mrs     x2, spsel         //Fetch old thread stack selector
+        msr     spsel, #1         //Select SP_ELx
         mov     x3, sp            //Fetch old thread stack pointer
+
         adr     x4, 1f            //Fetch old thread instruction point
         stp     x3, x4, [x0, #16] //Save old thread sp and pc
 
+        msr     spsel, #0         //Select SP_EL0
+        mov     x3, sp            //Fetch old thread exception stack pointer
+        stp     x3, x2, [x0, #40] //Save old thread exception stack pointer 
and stack selector
+
         ldp     x29, x0, [x1, #0] //Set frame pointer of the new thread and 
this (x0) of the new thread
                                   //Please note that the pc may point to 
thread_main_c(thread*) which is
                                   //why we have to set x0 (1st argument) to 
new thread object
         ldp     x3, x4, [x1, #16] //Fetch new thread sp and pc
-        mov     sp, x3            //Set new thread stack pointer
+
+        msr     spsel, #1         //Select SP_ELx
+        mov     sp, x3            //Restore new thread stack pointer
+
+        ldp     x3, x2, [x1, #40] //Load new thread exception stack pointer 
and stack selector
+        msr     spsel, #0         //Select SP_EL0
+        mov     sp, x3            //Restore new thread exception stack pointer
+        msr     spsel, x2         //Restore new thread stack selector 
(1-SP_ELx,0-SP_EL0)
+
         blr     x4                //Jump to the new thread pc
 
 1:
-- 
2.27.0

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220429041005.13475-1-jwkozaczuk%40gmail.com.

Reply via email to