This patch implements the signal handling on aarch64. I will not be repeating the details of what it changes and why as it is quite well explained in the code changes.
But in essence, this patch updates the build_signal_frame() which I believe was based on the x86_64 version of it with the changes specific to aarch64. It also adds missing handling of the SA_ONSTACK flag. Secondly, this patch also enhances entry.S to implement the call_signal_handler_thunk which is probably the most tricky part. The call_signal_handler_thunk is called on exit from a page fault as an effect of build_signal_frame() setting the field `elr` into the exception frame. Unlike on x86_64, the stack pointer register (sp) is not changed automatically based on the content of the frame on exit. To that end, the call_signal_handler_thunk has to carefully switch to SP_EL0 (exception stack) to read the value of the sp field from the exception frame in order to set SP_EL1 which is used normally for non-expection-handling by kernel and apps. Eventually, it calls the call_signal_handler() which is implemented logically in similar fashion as on x86_64 except for different registers. Finally, this patch also enables 3 unit tests to run on aarch64. Fixes #1154 Fixes #1151 Fixes #1152 Fixes #1153 Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com> --- arch/aarch64/entry.S | 52 +++++++++++++---- arch/aarch64/exceptions.hh | 2 +- arch/aarch64/signal.cc | 115 +++++++++++++++++++++++++++++++++++-- scripts/test.py | 9 --------- 4 files changed, 152 insertions(+), 26 deletions(-) diff --git a/arch/aarch64/entry.S b/arch/aarch64/entry.S index 8322ee90..8cbc0f57 100644 --- a/arch/aarch64/entry.S +++ b/arch/aarch64/entry.S @@ -92,8 +92,10 @@ exception_vectors: .endif mrs x2, elr_el1 mrs x3, spsr_el1 + mrs x4, far_el1 stp x30, x1, [sp, #240] // store lr, old SP stp x2, x3, [sp, #256] // store elr_el1, spsr_el1 + str x4, [sp, #280] // store far_el1 .endm /* push_state_to_exception_frame */ .macro pop_state_from_exception_frame @@ -294,18 +296,44 @@ entry_curr_el_irq x, 1 // the asynchronous exception handler used when the SP_EL call_signal_handler_thunk: .type call_signal_handler_thunk, @function .cfi_startproc simple - # stack contains a signal_frame - /* - .cfi_offset reg, offset - ... - mov x0, sp - call call_signal_handler - # FIXME: fpu - - pop_pair... - add sp, sp, 16 # error_code - */ - ret + .cfi_signal_frame + .cfi_def_cfa %sp, 0 + .cfi_offset x30, -32 // Point to the elr register located at the -32 offset + // of the exception frame to help gdb link to the + // address when interrupt was raised + + # The call_signal_handler_thunk gets called on exit from the synchronous exception + # (most likely page fault handler) as a result of build_signal_frame placing the address + # of call_signal_handler_thunk into elr field of the exception frame. + + # On exit from the exception, the stack selector is reset to point to SP_EL1 which + # is where we are now. However the build_signal_frame() placed the address of the stack + # we are supposed to use in the field 'sp' of the original exception frame still present + # on the exception stack (please note the exception have been disabled). So in order + # to read the value of the 'sp' field we need to switch back briefly to the exception + # stack. + mrs x1, SPsel + msr SPsel, #0 // switch back to SP_EL0 so we can see original exception frame + ldr x0, [sp, #-40] // read 'sp' field placed by build_signal_frame() in the original exception frame + msr SPsel, x1 // switch stack selector to the original value + mov sp, x0 // set sp to the stack setup by build_signal_frame() + // sp points to the signal frame and original exception frame at the same time + //TODO: Fix cfa to help debugger + msr daifclr, #2 // enable interrupts which were disabled by build_signal_frame() + isb + + bl call_signal_handler //x0 (1st argument) points to the signal frame + + pop_state_from_exception_frame + # Adjust stack pointer by the remaining part of the signal frame to get back + # to the position in the stack we should be according to the logic in build_signal_frame(). + add sp, sp, #288 + # Please note we may not be on the original stack when exception was triggered. + # We would be IF the signal handler was executed on the same stack. However if user set + # up his own stack and passed using sigalstack() with SA_ONSTACK to make it handle + # out of stack page fault, we would instead stay on that user stack rather than restore + # to the one that was exhausted. + eret .cfi_endproc // Keep fpu_state_save/load in sync with struct fpu_state in arch/aarch64/processor.hh diff --git a/arch/aarch64/exceptions.hh b/arch/aarch64/exceptions.hh index de98dc52..e78cbe8f 100644 --- a/arch/aarch64/exceptions.hh +++ b/arch/aarch64/exceptions.hh @@ -27,7 +27,7 @@ struct exception_frame { u64 spsr; u32 esr; u32 align1; - u64 align2; /* align to 16 */ + u64 far; void *get_pc(void) { return (void *)elr; } unsigned int get_error(void) { return esr; } diff --git a/arch/aarch64/signal.cc b/arch/aarch64/signal.cc index 7f3cbc15..2a00e595 100644 --- a/arch/aarch64/signal.cc +++ b/arch/aarch64/signal.cc @@ -14,6 +14,10 @@ #include <arch-cpu.hh> #include <osv/debug.hh> +namespace osv { +extern struct sigaction signal_actions[]; +}; + namespace arch { struct signal_frame { @@ -35,20 +39,123 @@ void build_signal_frame(exception_frame* ef, const siginfo_t& si, const struct sigaction& sa) { - void* sp = reinterpret_cast<void*>(ef->sp); - sp -= sizeof(signal_frame); + // If an alternative signal stack was defined for this thread with + // sigaltstack() and the SA_ONSTACK flag was specified, we should run + // the signal handler on that stack. Otherwise, we need to run further + // down the same stack the thread was using when it received the signal: + void *sp = nullptr; + if (sa.sa_flags & SA_ONSTACK) { + stack_t sigstack; + sigaltstack(nullptr, &sigstack); + if (!(sigstack.ss_flags & SS_DISABLE)) { + // ss_sp points to the beginning of the stack region, but aarch64 + // stacks grow downward, from the end of the region + sp = sigstack.ss_sp + sigstack.ss_size; + } + } + if (!sp) { + sp = reinterpret_cast<void*>(ef->sp); + } + sp -= sizeof(signal_frame); // Make space for signal frame on stack sp = align_down(sp, 16); signal_frame* frame = static_cast<signal_frame*>(sp); - frame->state = *ef; + frame->state = *ef; // Save original exception frame and si and sa on stack frame->si = si; frame->sa = sa; + // Exit from this exception will make it call call_signal_handler_thunk ef->elr = reinterpret_cast<ulong>(call_signal_handler_thunk); + // On x86_64 exiting from exception sets stack pointer to the value we + // would adjust in the exception frame. On aarch64 the stack pointer is not reset + // automatically to the value of the field sp and we have to rely on + // call_signal_handler_thunk to manually set it. ef->sp = reinterpret_cast<ulong>(sp); + // To make sure it happens correctly we need to disable exceptions + // so that call_signal_handler_thunk has a chance to execute this logic. + ef->spsr |= processor::daif_i; } } +// This is called on exit from a synchronous exception after build_signal_frame() void call_signal_handler(arch::signal_frame* frame) { - processor::halt_no_interrupts(); + sched::fpu_lock fpu; + SCOPE_LOCK(fpu); + if (frame->sa.sa_flags & SA_SIGINFO) { + ucontext_t uc = {}; + auto& mcontext = uc.uc_mcontext; + auto& f = frame->state; + mcontext.regs[0] = f.regs[0]; + mcontext.regs[1] = f.regs[1]; + mcontext.regs[2] = f.regs[2]; + mcontext.regs[3] = f.regs[3]; + mcontext.regs[4] = f.regs[4]; + mcontext.regs[5] = f.regs[5]; + mcontext.regs[6] = f.regs[6]; + mcontext.regs[7] = f.regs[7]; + mcontext.regs[8] = f.regs[8]; + mcontext.regs[9] = f.regs[9]; + mcontext.regs[10] = f.regs[10]; + mcontext.regs[11] = f.regs[11]; + mcontext.regs[12] = f.regs[12]; + mcontext.regs[13] = f.regs[13]; + mcontext.regs[14] = f.regs[14]; + mcontext.regs[15] = f.regs[15]; + mcontext.regs[16] = f.regs[16]; + mcontext.regs[17] = f.regs[17]; + mcontext.regs[18] = f.regs[18]; + mcontext.regs[19] = f.regs[19]; + mcontext.regs[20] = f.regs[20]; + mcontext.regs[21] = f.regs[21]; + mcontext.regs[22] = f.regs[22]; + mcontext.regs[23] = f.regs[23]; + mcontext.regs[24] = f.regs[24]; + mcontext.regs[25] = f.regs[25]; + mcontext.regs[26] = f.regs[26]; + mcontext.regs[27] = f.regs[27]; + mcontext.regs[28] = f.regs[28]; + mcontext.regs[29] = f.regs[29]; + mcontext.regs[30] = f.regs[30]; + mcontext.sp = f.sp; + mcontext.pc = f.elr; + mcontext.pstate = f.spsr; + mcontext.fault_address = f.far; + frame->sa.sa_sigaction(frame->si.si_signo, &frame->si, &uc); + f.regs[0] = mcontext.regs[0]; + f.regs[1] = mcontext.regs[1]; + f.regs[2] = mcontext.regs[2]; + f.regs[3] = mcontext.regs[3]; + f.regs[4] = mcontext.regs[4]; + f.regs[5] = mcontext.regs[5]; + f.regs[6] = mcontext.regs[6]; + f.regs[7] = mcontext.regs[7]; + f.regs[8] = mcontext.regs[8]; + f.regs[9] = mcontext.regs[9]; + f.regs[10] = mcontext.regs[10]; + f.regs[11] = mcontext.regs[11]; + f.regs[12] = mcontext.regs[12]; + f.regs[13] = mcontext.regs[13]; + f.regs[14] = mcontext.regs[14]; + f.regs[15] = mcontext.regs[15]; + f.regs[16] = mcontext.regs[16]; + f.regs[17] = mcontext.regs[17]; + f.regs[18] = mcontext.regs[18]; + f.regs[19] = mcontext.regs[19]; + f.regs[20] = mcontext.regs[20]; + f.regs[21] = mcontext.regs[21]; + f.regs[22] = mcontext.regs[22]; + f.regs[23] = mcontext.regs[23]; + f.regs[24] = mcontext.regs[24]; + f.regs[25] = mcontext.regs[25]; + f.regs[26] = mcontext.regs[26]; + f.regs[27] = mcontext.regs[27]; + f.regs[28] = mcontext.regs[28]; + f.regs[29] = mcontext.regs[29]; + f.regs[30] = mcontext.regs[30]; + f.sp = mcontext.sp; + f.elr = mcontext.pc; + f.spsr = mcontext.pstate; + } else { + frame->sa.sa_handler(frame->si.si_signo); + } } diff --git a/scripts/test.py b/scripts/test.py index e036adbe..5b4c7ae3 100755 --- a/scripts/test.py +++ b/scripts/test.py @@ -31,14 +31,6 @@ firecracker_disabled_list= [ "tcp_close_without_reading_on_qemu" ] -#At this point there are 128 out of 134 unit tests that pass on aarch64. -#The remaining ones are disabled below until we fix the issues that prevent them from passing. -aarch64_disabled_list= [ - "tst-sigaltstack.so", #Most likely fails due to the lack of signals support on aarch64 - see #1151 - "tst-elf-permissions.so", #Hangs - most likely fails due to the lack of signals support on aarch64 - see #1152 - "tst-mmap.so", #Hangs - most likely fails due to the lack of signals support on aarch64 - see #1153 -] - class TestRunnerTest(SingleCommandTest): def __init__(self, name): super(TestRunnerTest, self).__init__(name, '/tests/%s' % name) @@ -185,7 +177,6 @@ if __name__ == "__main__": disabled_list.extend(qemu_disabled_list) if cmdargs.arch == 'aarch64': - disabled_list.extend(aarch64_disabled_list) if host_arch != cmdargs.arch: #Until the issue #1143 is resolved, we need to force running with 2 CPUs in TCG mode run_py_args = run_py_args + ['-c', '2'] -- 2.27.0 -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/20220503221608.41944-1-jwkozaczuk%40gmail.com.