This patch changes exception handling mechanism to use dedicated exception stack instead of the default stack provided for kernel and application threads. This is critical to support Golang apps which are known to use tiny stacks in coroutines and exception handler of svc instruction cannnot use single byte of the application stack in such case. Having separate exception stack has other benefits for debugging and will allow future implementation of "lazy" stacks. This also makes aarch64 port similar to x64 where we use dedicated stacks as well.
To support dedicated stacks, we take advantage of the fact that at every exception level but EL0 there are two stack registers available - SP_ELx and SP_EL0. OSv runs at the exception level EL1 and in boot.S selects EP_EL1 to be used by default. The SP effectively is an alias to one of the two stack registers and can be changed by setting the system register SPSel (stack selector). This patch changes all exception handlers (both synchrounous and asynchronous (interrupts)) in entry.S to switch to the new exception stack before pushing a frame by setting the SPSel to #0 which makes SP point to SP_EL0. We have to switch to SP_EL0 even in the case of the nested exception when we are on SP_EL0 as per ARM specification the SP is always reset to SP_ELx (in our case SP_EL1) after taking an exception. The typical case of nested exception is handling of a page fault where we enable exceptions downstream in the page fault handler (arch/aarch64/mmu.cc) and it may be interrupted by an asynchronous exception like a timer one. To that end we also add the exception handlers for curr_el_sp0 which system invokes when code is running with SP pointing to SP_EL0. Finally, we also change the context switch code in sched.S to make it save not only default stack register but explicitly save SP_EL0 and SP_EL1 and SPSel for old thread and then restore those from arch_thread_state for new thread. This makes context switch slightly more expensive and has been measured to add around 5% of overhead. This patch effectively enhances OSv to allow runing Golang apps on AArch64. Fixes #1155 Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com> --- arch/aarch64/arch-cpu.hh | 1 + arch/aarch64/arch-switch.hh | 12 +++-- arch/aarch64/arch-thread-state.hh | 3 ++ arch/aarch64/entry.S | 73 +++++++++++++++++++++---------- arch/aarch64/sched.S | 18 +++++++- 5 files changed, 78 insertions(+), 29 deletions(-) diff --git a/arch/aarch64/arch-cpu.hh b/arch/aarch64/arch-cpu.hh index 15edbdaa..8848d880 100644 --- a/arch/aarch64/arch-cpu.hh +++ b/arch/aarch64/arch-cpu.hh @@ -33,6 +33,7 @@ struct arch_cpu { }; struct arch_thread { + char exception_stack[4096*4] __attribute__((aligned(16))); }; struct arch_fpu { diff --git a/arch/aarch64/arch-switch.hh b/arch/aarch64/arch-switch.hh index 0401a4b8..c8848605 100644 --- a/arch/aarch64/arch-switch.hh +++ b/arch/aarch64/arch-switch.hh @@ -33,13 +33,15 @@ void thread::switch_to_first() remote_thread_local_var(percpu_base) = _detached_state->_cpu->percpu_base; asm volatile("\n" - "ldp x29, x0, %2 \n" - "ldp x22, x21, %3 \n" + "ldp x29, x0, %3 \n" + "ldp x22, x21, %4 \n" "mov sp, x22 \n" + "ldr x22, %5 \n" + "msr sp_el0, x22 \n" "blr x21 \n" : // No output operands - this is to designate the input operands as earlyclobbers - "=&Ump"(this->_state.fp), "=&Ump"(this->_state.sp) - : "Ump"(this->_state.fp), "Ump"(this->_state.sp) + "=&Ump"(this->_state.fp), "=&Ump"(this->_state.sp), "=&Ump"(this->_state.exception_sp) + : "Ump"(this->_state.fp), "Ump"(this->_state.sp), "Ump"(this->_state.exception_sp) : "x0", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x30", "memory"); } @@ -59,6 +61,8 @@ void thread::init_stack() _state.thread = this; _state.sp = stacktop; _state.pc = reinterpret_cast<void*>(thread_main); + _state.exception_sp = _arch.exception_stack + sizeof(_arch.exception_stack); + _state.stack_selector = 1; //Select SP_ELx } void thread::setup_tcb() diff --git a/arch/aarch64/arch-thread-state.hh b/arch/aarch64/arch-thread-state.hh index 6f1b680d..f6a27ff2 100644 --- a/arch/aarch64/arch-thread-state.hh +++ b/arch/aarch64/arch-thread-state.hh @@ -15,6 +15,9 @@ struct thread_state { void* sp; void* pc; void* tcb; + + void* exception_sp; //SP_EL0 + u64 stack_selector; //1 - selects SP_ELx (default), 0 - selects SP_EL0 (exceptions) }; #endif /* ARCH_THREAD_STATE_HH_ */ diff --git a/arch/aarch64/entry.S b/arch/aarch64/entry.S index 25354359..8322ee90 100644 --- a/arch/aarch64/entry.S +++ b/arch/aarch64/entry.S @@ -57,9 +57,16 @@ exception_vectors: vector_entry lower_el_aarch32 fiq vector_entry lower_el_aarch32 serror -/* keep in sync with the struct in exceptions.hh */ -.macro push_state_to_exception_frame - sub sp, sp, #48 // make space for align2, align1+ESR, PSTATE, PC, SP +/* keep in sync with the struct in exceptions.hh + the switch argument (1 or 0) indicates if we would be switching from + SP_ELx -> SP_EL0 (1) or staying on the same stack - SP_EL0 -> SP_EL0 (0) */ +.macro push_state_to_exception_frame switch + // Regardless which stack (pointed by SP_ELx or SP_EL0) was in use when + // exception was taken, the stack is always reset to SP_ELx before exception + // handler is executed. To make sure the exception handler uses the exception + // stack pointed by SP_EL0 we need to set SPSEL to #0. + msr spsel, #0 // switch to exception stack by selecting SP_EL0 + sub sp, sp, #48 // make space for align2, align1+ESR, PSTATE, PC, SP .cfi_adjust_cfa_offset 48 push_pair x28, x29 push_pair x26, x27 @@ -76,7 +83,13 @@ exception_vectors: push_pair x4, x5 push_pair x2, x3 push_pair x0, x1 + .if \switch == 1 + msr spsel, #1 // switch to regular stack (SP_ELx) for brief moment to read it + mov x1, sp // fetch SP of regular stack (spsel 1) + msr spsel, #0 // switch back to exception stack + .else add x1, sp, #288 // x1 := old SP (48 + 16 * 15 = 288) + .endif mrs x2, elr_el1 mrs x3, spsr_el1 stp x30, x1, [sp, #240] // store lr, old SP @@ -102,6 +115,10 @@ exception_vectors: pop_pair x24, x25 pop_pair x26, x27 pop_pair x28, x29 + // please note we do not need to explicitly switch the stack when returning + // from exception by resetting the stack selector register, as it will + // happen automatically based on the value of spsr_el1 which we restored above + // (the spsr_el1 holds PSTATE and EL and SP selector) ldr x30, [sp], #48 .cfi_adjust_cfa_offset -48 .endm /* pop_state_to_exception_frame */ @@ -143,7 +160,7 @@ entry_\level\()_\type: .cfi_offset x30, -32 // Point to the elr register located at the -32 offset // of the exception frame to help gdb link to the // address when interrupt was raised - push_state_to_exception_frame + push_state_to_exception_frame 1 mrs x1, esr_el1 str w1, [sp, #272] // Store Exception Syndrom Register in the frame mov x0, sp // Save exception_frame to x0 @@ -165,8 +182,6 @@ entry_\level\()_\type: .equ EX_TYPE_FIQ, 0x2 .equ EX_TYPE_SERROR, 0x3 -entry_unexpected_exception curr_el_sp0, sync, #CURR_EL_SP0, #EX_TYPE_SYNC -entry_unexpected_exception curr_el_sp0, irq, #CURR_EL_SP0, #EX_TYPE_IRQ entry_unexpected_exception curr_el_sp0, fiq, #CURR_EL_SP0, #EX_TYPE_FIQ entry_unexpected_exception curr_el_sp0, serror, #CURR_EL_SP0, #EX_TYPE_SERROR @@ -183,38 +198,39 @@ entry_unexpected_exception lower_el_aarch32, irq, #LOWER_EL_AARCH32, #EX_TYPE_IR entry_unexpected_exception lower_el_aarch32, fiq, #LOWER_EL_AARCH32, #EX_TYPE_FIQ entry_unexpected_exception lower_el_aarch32, serror, #LOWER_EL_AARCH32, #EX_TYPE_SERROR -.global entry_curr_el_spx_sync -.hidden entry_curr_el_spx_sync -.type entry_curr_el_spx_sync, @function -entry_curr_el_spx_sync: +.macro entry_curr_el_sync stack, switch +.global entry_curr_el_sp\stack\()_sync +.hidden entry_curr_el_sp\stack\()_sync +.type entry_curr_el_sp\stack\()_sync, @function +entry_curr_el_sp\stack\()_sync: .cfi_startproc simple .cfi_signal_frame .cfi_def_cfa sp, 0 .cfi_offset x30, -32 // Point to the elr register located at the -32 offset // of the exception frame to help gdb link to the // address when interrupt was raised - push_state_to_exception_frame + push_state_to_exception_frame \switch mrs x1, esr_el1 str w1, [sp, #272] // Store Exception Syndrom Register in the frame ubfm x2, x1, #ESR_EC_BEG, #ESR_EC_END // Exception Class -> X2 ubfm x3, x1, #ESR_FLT_BEG, #ESR_FLT_END // FLT -> X3 cmp x2, #ESR_EC_SVC64 - b.eq handle_system_call + b.eq handle_system_call_sp\stack cmp x2, #ESR_EC_DATA_ABORT - b.eq handle_mem_abort + b.eq handle_mem_abort_sp\stack cmp x2, #ESR_EC_INSN_ABORT - b.ne unexpected_sync_exception -handle_mem_abort: - cbz x3, unexpected_sync_exception + b.ne unexpected_sync_exception_sp\stack +handle_mem_abort_sp\stack: + cbz x3, unexpected_sync_exception_sp\stack cmp x3, #3 - b.hi unexpected_sync_exception + b.hi unexpected_sync_exception_sp\stack mov x0, sp // save exception_frame to x0 bl page_fault pop_state_from_exception_frame eret .cfi_endproc -handle_system_call: +handle_system_call_sp\stack: .cfi_startproc //see https://man7.org/linux/man-pages/man2/syscall.2.html for details //about calling convention for arm64 @@ -237,7 +253,7 @@ handle_system_call: pop_state_from_exception_frame eret .cfi_endproc -unexpected_sync_exception: +unexpected_sync_exception_sp\stack: .cfi_startproc mov x0, sp // save exception_frame to x0 mov x1, #CURR_EL_SPX @@ -246,23 +262,32 @@ unexpected_sync_exception: pop_state_from_exception_frame bl abort .cfi_endproc +.endm + +entry_curr_el_sync 0, 0 // the synchronous exception handler used when the SP_EL0 is active +entry_curr_el_sync x, 1 // the synchronous exception handler used when the SP_ELx is active -.global entry_curr_el_spx_irq -.hidden entry_curr_el_spx_irq -.type entry_curr_el_spx_irq, @function -entry_curr_el_spx_irq: +.macro entry_curr_el_irq stack, switch +.global entry_curr_el_sp\stack\()_irq +.hidden entry_curr_el_sp\stack\()_irq +.type entry_curr_el_sp\stack\()_irq, @function +entry_curr_el_sp\stack\()_irq: .cfi_startproc simple .cfi_signal_frame .cfi_def_cfa sp, 0 .cfi_offset x30, -32 // Point to the elr register located at the -32 offset // of the exception frame to help gdb link to the // address when interrupt was raised - push_state_to_exception_frame + push_state_to_exception_frame \switch mov x0, sp bl interrupt // extern "C" pop_state_from_exception_frame eret .cfi_endproc +.endm + +entry_curr_el_irq 0, 0 // the asynchronous exception handler used when the SP_EL0 is active +entry_curr_el_irq x, 1 // the asynchronous exception handler used when the SP_ELx is active .global call_signal_handler_thunk .hidden call_signal_handler_thunk diff --git a/arch/aarch64/sched.S b/arch/aarch64/sched.S index 5949459a..ab049a6f 100644 --- a/arch/aarch64/sched.S +++ b/arch/aarch64/sched.S @@ -39,15 +39,31 @@ reschedule_from_interrupt: isb str x29, [x0, #0] //Save frame pointer of the old thread + + mrs x2, spsel //Fetch old thread stack selector + msr spsel, #1 //Select SP_ELx mov x3, sp //Fetch old thread stack pointer + adr x4, 1f //Fetch old thread instruction point stp x3, x4, [x0, #16] //Save old thread sp and pc + msr spsel, #0 //Select SP_EL0 + mov x3, sp //Fetch old thread exception stack pointer + stp x3, x2, [x0, #40] //Save old thread exception stack pointer and stack selector + ldp x29, x0, [x1, #0] //Set frame pointer of the new thread and this (x0) of the new thread //Please note that the pc may point to thread_main_c(thread*) which is //why we have to set x0 (1st argument) to new thread object ldp x3, x4, [x1, #16] //Fetch new thread sp and pc - mov sp, x3 //Set new thread stack pointer + + msr spsel, #1 //Select SP_ELx + mov sp, x3 //Restore new thread stack pointer + + ldp x3, x2, [x1, #40] //Load new thread exception stack pointer and stack selector + msr spsel, #0 //Select SP_EL0 + mov sp, x3 //Restore new thread exception stack pointer + msr spsel, x2 //Restore new thread stack selector (1-SP_ELx,0-SP_EL0) + blr x4 //Jump to the new thread pc 1: -- 2.27.0 -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/20220429041005.13475-1-jwkozaczuk%40gmail.com.