Module: xenomai-3 Branch: arm64 Commit: 02e4fab4215698e28157c0d4145e524acf1b60d8 URL: http://git.xenomai.org/?p=xenomai-3.git;a=commit;h=02e4fab4215698e28157c0d4145e524acf1b60d8
Author: Dmitriy Cherkasov <dmit...@mperpetuo.com> Date: Fri Sep 11 17:58:54 2015 -0700 cobalt/arm64: add basic FPU support --- kernel/cobalt/arch/arm64/Kconfig | 2 +- .../cobalt/arch/arm64/include/asm/xenomai/fptest.h | 11 +- .../cobalt/arch/arm64/include/asm/xenomai/thread.h | 17 +- .../arch/arm64/include/asm/xenomai/uapi/fptest.h | 83 +++-- kernel/cobalt/arch/arm64/thread.c | 329 +++++--------------- lib/cobalt/arch/arm64/features.c | 4 +- 6 files changed, 153 insertions(+), 293 deletions(-) diff --git a/kernel/cobalt/arch/arm64/Kconfig b/kernel/cobalt/arch/arm64/Kconfig index dd5a8c6..27b5026 100644 --- a/kernel/cobalt/arch/arm64/Kconfig +++ b/kernel/cobalt/arch/arm64/Kconfig @@ -8,7 +8,7 @@ config XENO_ARCH_WANT_TIP def_bool y config XENO_ARCH_FPU - def_bool VFP + def_bool y config XENO_ARCH_SYS3264 def_bool n diff --git a/kernel/cobalt/arch/arm64/include/asm/xenomai/fptest.h b/kernel/cobalt/arch/arm64/include/asm/xenomai/fptest.h index a76f1e6..743d758 100644 --- a/kernel/cobalt/arch/arm64/include/asm/xenomai/fptest.h +++ b/kernel/cobalt/arch/arm64/include/asm/xenomai/fptest.h @@ -20,15 +20,10 @@ #define _COBALT_ARM_ASM_FPTEST_H #include <linux/errno.h> +#include <asm/xenomai/uapi/fptest.h> #include <asm/hwcap.h> -#ifdef CONFIG_VFP -#define have_vfp (elf_hwcap & HWCAP_VFP) -#else /* !CONFIG_VFP */ -#define have_vfp 0 -#endif /* !CONFIG_VFP */ - -#include <asm/xenomai/uapi/fptest.h> +#define have_fp (elf_hwcap & HWCAP_FP) static inline int fp_kernel_supported(void) { @@ -46,7 +41,7 @@ static inline void fp_linux_end(void) static inline int fp_detect(void) { - return have_vfp ? __COBALT_HAVE_VFP : 0; + return have_fp ? __COBALT_HAVE_FPU : 0; } #endif /* _COBALT_ARM_ASM_FPTEST_H */ diff --git a/kernel/cobalt/arch/arm64/include/asm/xenomai/thread.h b/kernel/cobalt/arch/arm64/include/asm/xenomai/thread.h index a8d7ed4..bfcceb4 100644 --- a/kernel/cobalt/arch/arm64/include/asm/xenomai/thread.h +++ b/kernel/cobalt/arch/arm64/include/asm/xenomai/thread.h @@ -21,21 +21,12 @@ #include <asm-generic/xenomai/thread.h> -#ifdef CONFIG_XENO_ARCH_FPU -#ifdef CONFIG_VFP -#include <asm/vfp.h> -#endif /* CONFIG_VFP */ -#endif /* !CONFIG_XENO_ARCH_FPU */ struct xnarchtcb { struct xntcb core; #ifdef CONFIG_XENO_ARCH_FPU -#ifdef CONFIG_VFP - union vfp_state *fpup; + struct fpsimd_state *fpup; #define xnarch_fpu_ptr(tcb) ((tcb)->fpup) -#else -#define xnarch_fpu_ptr(tcb) NULL -#endif #endif struct { unsigned long pc; @@ -67,7 +58,7 @@ static inline void xnarch_enter_root(struct xnthread *root) { } int xnarch_escalate(void); -#if defined(CONFIG_XENO_ARCH_FPU) && defined(CONFIG_VFP) +#if defined(CONFIG_XENO_ARCH_FPU) static inline void xnarch_init_root_tcb(struct xnthread *thread) { @@ -88,7 +79,7 @@ void xnarch_switch_fpu(struct xnthread *from, struct xnthread *thread); int xnarch_handle_fpu_fault(struct xnthread *from, struct xnthread *to, struct ipipe_trap_data *d); -#else /* !CONFIG_XENO_ARCH_FPU || !CONFIG_VFP */ +#else /* !CONFIG_XENO_ARCH_FPU */ static inline void xnarch_init_root_tcb(struct xnthread *thread) { } static inline void xnarch_init_shadow_tcb(struct xnthread *thread) { } @@ -114,7 +105,7 @@ static inline int xnarch_handle_fpu_fault(struct xnthread *from, { return 0; } -#endif /* !CONFIG_XENO_ARCH_FPU || !CONFIG_VFP */ +#endif /* !CONFIG_XENO_ARCH_FPU */ static inline void xnarch_enable_kfpu(void) { } diff --git a/kernel/cobalt/arch/arm64/include/asm/xenomai/uapi/fptest.h b/kernel/cobalt/arch/arm64/include/asm/xenomai/uapi/fptest.h index 65a3e31..25bc976 100644 --- a/kernel/cobalt/arch/arm64/include/asm/xenomai/uapi/fptest.h +++ b/kernel/cobalt/arch/arm64/include/asm/xenomai/uapi/fptest.h @@ -18,53 +18,86 @@ #ifndef _COBALT_ARM_ASM_UAPI_FPTEST_H #define _COBALT_ARM_ASM_UAPI_FPTEST_H -#ifdef __aarch64__ -/* CP10 and CP11, used for the FP/NEON operations, are already excluded from -the list of valid operands for the generic coprocessor instructions */ -#define __COBALT_HAVE_VFP 0 -#else -#define __COBALT_HAVE_VFP 0x1 -#endif +#define __COBALT_HAVE_FPU 0x1 static inline void fp_regs_set(int features, unsigned int val) { -#if __COBALT_HAVE_VFP != 0 - unsigned long long e[16]; + + unsigned long long e[32]; unsigned int i; - if (features & __COBALT_HAVE_VFP) { - for (i = 0; i < 16; i++) + if (features & __COBALT_HAVE_FPU) { + + for (i = 0; i < 32; i++) e[i] = val; - /* vldm %0!, {d0-d15}, - AKA fldmiax %0!, {d0-d15} */ - __asm__ __volatile__("ldc p11, cr0, [%0],#32*4": - "=r"(i): "0"(&e[0]): "memory"); + __asm__ __volatile__("ldp d0, d1, [%0, #8 * 0] \n\ + ldp d2, d3, [%0, #8 * 2] \n\ + ldp d4, d5, [%0, #8 * 4]\n\ + ldp d6, d7, [%0, #8 * 6]\n\ + ldp d8, d9, [%0, #8 * 8]\n\ + ldp d10, d11, [%0, #8 * 10]\n\ + ldp d12, d13, [%0, #8 * 12]\n\ + ldp d14, d15, [%0, #8 * 14]\n\ + ldp d16, d17, [%0, #8 * 16]\n\ + ldp d18, d19, [%0, #8 * 18]\n\ + ldp d20, d21, [%0, #8 * 20]\n\ + ldp d22, d23, [%0, #8 * 22]\n\ + ldp d24, d25, [%0, #8 * 24]\n\ + ldp d26, d27, [%0, #8 * 26]\n\ + ldp d28, d29, [%0, #8 * 28]\n\ + ldp d30, d31, [%0, #8 * 30]" + + : /* No outputs. */ + : "r"(&e[0]) + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "memory" + ); } -#endif + } static inline unsigned int fp_regs_check(int features, unsigned int val, int (*report)(const char *fmt, ...)) { unsigned int result = val; -#if __COBALT_HAVE_VFP != 0 + unsigned int i; - unsigned long long e[16]; + unsigned long long e[32]; + + if (features & __COBALT_HAVE_FPU) { + + __asm__ __volatile__("stp d0, d1, [%0, #8 * 0] \n\ + stp d2, d3, [%0, #8 * 2] \n\ + stp d4, d5, [%0, #8 * 4]\n\ + stp d6, d7, [%0, #8 * 6]\n\ + stp d8, d9, [%0, #8 * 8]\n\ + stp d10, d11, [%0, #8 * 10]\n\ + stp d12, d13, [%0, #8 * 12]\n\ + stp d14, d15, [%0, #8 * 14]\n\ + stp d16, d17, [%0, #8 * 16]\n\ + stp d18, d19, [%0, #8 * 18]\n\ + stp d20, d21, [%0, #8 * 20]\n\ + stp d22, d23, [%0, #8 * 22]\n\ + stp d24, d25, [%0, #8 * 24]\n\ + stp d26, d27, [%0, #8 * 26]\n\ + stp d28, d29, [%0, #8 * 28]\n\ + stp d30, d31, [%0, #8 * 30]" + + : /* No outputs. */ + : "r"(&e[0]) + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "memory" + ); + - if (features & __COBALT_HAVE_VFP) { - /* vstm %0!, {d0-d15}, - AKA fstmiax %0!, {d0-d15} */ - __asm__ __volatile__("stc p11, cr0, [%0],#32*4": - "=r"(i): "0"(&e[0]): "memory"); - for (i = 0; i < 16; i++) + for (i = 0; i < 32; i++) if (e[i] != val) { report("d%d: %llu != %u\n", i, e[i], val); result = e[i]; } } -#endif return result; } diff --git a/kernel/cobalt/arch/arm64/thread.c b/kernel/cobalt/arch/arm64/thread.c index 3168e28..316525f6 100644 --- a/kernel/cobalt/arch/arm64/thread.c +++ b/kernel/cobalt/arch/arm64/thread.c @@ -3,6 +3,9 @@ * * ARM port * Copyright (C) 2005 Stelian Pop + * + * ARM64 port + * Copyright (C) 2015 Dmitriy Cherkasov <dmit...@mperpetuo.com> * * Xenomai is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by @@ -26,291 +29,120 @@ #include <linux/jump_label.h> #include <asm/mmu_context.h> #include <cobalt/kernel/thread.h> +#include <asm/fpsimd.h> +#include <asm/processor.h> +#include <asm/hw_breakpoint.h> -struct static_key __xeno_vfp_key = STATIC_KEY_INIT_TRUE; -asmlinkage void __asm_thread_trampoline(void); +#if defined(CONFIG_XENO_ARCH_FPU) -#if defined(CONFIG_XENO_ARCH_FPU) && defined(CONFIG_VFP) - -static unsigned int vfp_checked; static DEFINE_MUTEX(vfp_check_lock); -asmlinkage void __asm_vfp_save(union vfp_state *vfp, unsigned int fpexc); - -asmlinkage void __asm_vfp_load(union vfp_state *vfp, unsigned int cpu); - -#define do_vfp_fmrx(_vfp_) \ - ({ \ - u32 __v; \ - asm volatile("mrc p10, 7, %0, " __stringify(_vfp_) \ - ", cr0, 0 @ fmrx %0, " #_vfp_: \ - "=r" (__v)); \ - __v; \ - }) - -#define do_vfp_fmxr(_vfp_,_var_) \ - asm volatile("mcr p10, 7, %0, " __stringify(_vfp_) \ - ", cr0, 0 @ fmxr " #_vfp_ ", %0": \ - /* */ : "r" (_var_)) - -extern union vfp_state *vfp_current_hw_state[NR_CPUS]; - -static inline union vfp_state *get_fpu_owner(void) -{ - union vfp_state *vfp_owner; - unsigned int cpu; -#ifdef CONFIG_SMP - unsigned int fpexc; -#endif - -#if __LINUX_ARM_ARCH__ <= 6 - if (!static_key_true(&__xeno_vfp_key)) - return NULL; -#endif - -#ifdef CONFIG_SMP - fpexc = do_vfp_fmrx(FPEXC); - if (!(fpexc & FPEXC_EN)) - return NULL; -#endif - - cpu = ipipe_processor_id(); - vfp_owner = vfp_current_hw_state[cpu]; - if (!vfp_owner) - return NULL; - -#ifdef CONFIG_SMP - if (vfp_owner->hard.cpu != cpu) - return NULL; -#endif /* SMP */ - - return vfp_owner; -} - -#define do_disable_vfp(fpexc) \ - do_vfp_fmxr(FPEXC, fpexc & ~FPEXC_EN) - -#define XNARCH_VFP_ANY_EXC \ - (FPEXC_EX|FPEXC_DEX|FPEXC_FP2V|FPEXC_VV|FPEXC_TRAP_MASK) - -#define do_enable_vfp() \ - ({ \ - unsigned _fpexc = do_vfp_fmrx(FPEXC) | FPEXC_EN; \ - do_vfp_fmxr(FPEXC, _fpexc & ~XNARCH_VFP_ANY_EXC); \ - _fpexc; \ - }) int xnarch_fault_fpu_p(struct ipipe_trap_data *d) { - /* This function does the same thing to decode the faulting instruct as - "call_fpe" in arch/arm/entry-armv.S */ - static unsigned copro_to_exc[16] = { - IPIPE_TRAP_UNDEFINSTR, - /* FPE */ - IPIPE_TRAP_FPU, IPIPE_TRAP_FPU, - IPIPE_TRAP_UNDEFINSTR, -#ifdef CONFIG_CRUNCH - IPIPE_TRAP_FPU, IPIPE_TRAP_FPU, IPIPE_TRAP_FPU, -#else /* !CONFIG_CRUNCH */ - IPIPE_TRAP_UNDEFINSTR, IPIPE_TRAP_UNDEFINSTR, IPIPE_TRAP_UNDEFINSTR, -#endif /* !CONFIG_CRUNCH */ - IPIPE_TRAP_UNDEFINSTR, IPIPE_TRAP_UNDEFINSTR, IPIPE_TRAP_UNDEFINSTR, -#ifdef CONFIG_VFP - IPIPE_TRAP_VFP, IPIPE_TRAP_VFP, -#else /* !CONFIG_VFP */ - IPIPE_TRAP_UNDEFINSTR, IPIPE_TRAP_UNDEFINSTR, -#endif /* !CONFIG_VFP */ - IPIPE_TRAP_UNDEFINSTR, IPIPE_TRAP_UNDEFINSTR, - IPIPE_TRAP_UNDEFINSTR, IPIPE_TRAP_UNDEFINSTR, - }; - unsigned instr, exc, cp; - char *pc; - - if (d->exception == IPIPE_TRAP_FPU) - return 1; - - if (d->exception == IPIPE_TRAP_VFP) - goto trap_vfp; - - if (d->exception != IPIPE_TRAP_UNDEFINSTR) - return 0; - - pc = (char *) xnarch_fault_pc(d); - if (unlikely(thumb_mode(d->regs))) { - unsigned short thumbh, thumbl; - -#if defined(CONFIG_ARM_THUMB) && __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_CPU_V7) -#if __LINUX_ARM_ARCH__ < 7 - if (cpu_architecture() < CPU_ARCH_ARMv7) -#else - if (0) -#endif /* arch < 7 */ -#endif /* thumb && arch >= 6 && cpu_v7 */ - return 0; - - thumbh = *(unsigned short *) pc; - thumbl = *((unsigned short *) pc + 1); - - if ((thumbh & 0x0000f800) < 0x0000e800) - return 0; - instr = (thumbh << 16) | thumbl; - -#ifdef CONFIG_NEON - if ((instr & 0xef000000) == 0xef000000 - || (instr & 0xff100000) == 0xf9000000) - goto trap_vfp; -#endif - } else { - instr = *(unsigned *) pc; - -#ifdef CONFIG_NEON - if ((instr & 0xfe000000) == 0xf2000000 - || (instr & 0xff100000) == 0xf4000000) - goto trap_vfp; -#endif - } - - if ((instr & 0x0c000000) != 0x0c000000) - return 0; - - cp = (instr & 0x00000f00) >> 8; -#ifdef CONFIG_IWMMXT - /* We need something equivalent to _TIF_USING_IWMMXT for Xenomai kernel - threads */ - if (cp <= 1) { - d->exception = IPIPE_TRAP_FPU; - return 1; - } -#endif - - exc = copro_to_exc[cp]; - if (exc == IPIPE_TRAP_VFP) { - trap_vfp: - /* If an exception is pending, the VFP fault is not really an - "FPU unavailable" fault, so we return undefinstr in that - case, the nucleus will let linux handle the fault. */ - exc = do_vfp_fmrx(FPEXC); - if (exc & (FPEXC_EX|FPEXC_DEX) - || ((exc & FPEXC_EN) && do_vfp_fmrx(FPSCR) & FPSCR_IXE)) - exc = IPIPE_TRAP_UNDEFINSTR; - else - exc = IPIPE_TRAP_VFP; - } + /* FPU never trapped, this will be a fault */ + return 0; +} - d->exception = exc; - return exc != IPIPE_TRAP_UNDEFINSTR; +static inline struct fpsimd_state *get_fpu_owner(struct xnarchtcb *tcb) { + return &(tcb->core.tsp->fpsimd_state); } void xnarch_leave_root(struct xnthread *root) { struct xnarchtcb *rootcb = xnthread_archtcb(root); - rootcb->fpup = get_fpu_owner(); + rootcb->fpup = get_fpu_owner(rootcb); } void xnarch_save_fpu(struct xnthread *thread) { - struct xnarchtcb *tcb = &thread->tcb; - if (tcb->fpup) - __asm_vfp_save(tcb->fpup, do_enable_vfp()); + struct xnarchtcb *tcb = &(thread->tcb); + if (xnarch_fpu_ptr(tcb)) + fpsimd_save_state(tcb->fpup); } void xnarch_switch_fpu(struct xnthread *from, struct xnthread *to) { - union vfp_state *const from_fpup = from ? from->tcb.fpup : NULL; - unsigned cpu = ipipe_processor_id(); - - if (xnthread_test_state(to, XNROOT) == 0) { - union vfp_state *const to_fpup = to->tcb.fpup; - unsigned fpexc = do_enable_vfp(); + struct fpsimd_state *const from_fpup = from ? from->tcb.fpup : NULL; - if (from_fpup == to_fpup) - return; + /* always switch, no lazy switching */ - if (from_fpup) - __asm_vfp_save(from_fpup, fpexc); + struct fpsimd_state *const to_fpup = to->tcb.fpup; + + if (from_fpup == to_fpup) + return; + + if (from_fpup) + fpsimd_save_state(from_fpup); + + fpsimd_load_state(to_fpup); + + /* always set FPU enabled */ + xnthread_set_state(to, XNFPU); - __asm_vfp_load(to_fpup, cpu); - } else { - /* - * We are restoring the Linux current thread. The FPU - * can be disabled, so that a fault will occur if the - * newly switched thread uses the FPU, to allow the - * kernel handler to pick the correct FPU context, and - * save in the same move the last used RT context. - */ - vfp_current_hw_state[cpu] = from_fpup; -#ifdef CONFIG_SMP - /* - * On SMP, since returning to FPU disabled mode means - * that we have to save fpu, avoid doing it if - * current FPU context belongs to the task we are - * switching to. - */ - if (from_fpup) { - union vfp_state *const current_task_fpup = - &to->tcb.core.tip->vfpstate; - const unsigned fpdis = do_vfp_fmrx(FPEXC); - const unsigned fpen = fpdis | FPEXC_EN; - - do_vfp_fmxr(FPEXC, fpen & ~XNARCH_VFP_ANY_EXC); - if (from_fpup == current_task_fpup) - return; - - __asm_vfp_save(from_fpup, fpen); - do_vfp_fmxr(FPEXC, fpdis); - } -#endif - } } int xnarch_handle_fpu_fault(struct xnthread *from, struct xnthread *to, struct ipipe_trap_data *d) { - spl_t s; + /* FPU always enabled, faults force exit to Linux */ + return 0; +} - if (xnthread_test_state(to, XNFPU)) - /* FPU is already enabled, probably an exception */ - return 0; +void xnarch_init_shadow_tcb(struct xnthread *thread) +{ + struct xnarchtcb *tcb = xnthread_archtcb(thread); -#if __LINUX_ARM_ARCH__ <= 6 - if (!static_key_true(&__xeno_vfp_key)) - /* VFP instruction emitted, on a cpu without VFP, this - is an error */ - return 0; -#endif + tcb->fpup = &(tcb->core.host_task->thread.fpsimd_state); - xnlock_get_irqsave(&nklock, s); - xnthread_set_state(to, XNFPU); - xnlock_put_irqrestore(&nklock, s); + /* XNFPU is always set, no lazy switching */ + xnthread_set_state(thread, XNFPU); +} +#endif /* CONFIG_XENO_ARCH_FPU */ - xnarch_switch_fpu(from, to); - /* Retry faulting instruction */ - d->regs->ARM_pc = xnarch_fault_pc(d); - return 1; -} +/* Switch support functions */ -void xnarch_init_shadow_tcb(struct xnthread *thread) +static void xnarch_tls_thread_switch(struct task_struct *next) { - struct xnarchtcb *tcb = xnthread_archtcb(thread); + unsigned long tpidr, tpidrro; - tcb->fpup = &task_thread_info(tcb->core.host_task)->vfpstate; + if (!is_compat_task()) { + asm("mrs %0, tpidr_el0" : "=r" (tpidr)); + current->thread.tp_value = tpidr; + } - if (vfp_checked == 0) { - mutex_lock(&vfp_check_lock); - if (vfp_checked == 0) { - if ((elf_hwcap & HWCAP_VFP) == 0) - static_key_slow_dec(&__xeno_vfp_key); - vfp_checked = 1; - } - mutex_unlock(&vfp_check_lock); + if (is_compat_thread(task_thread_info(next))) { + tpidr = 0; + tpidrro = next->thread.tp_value; + } else { + tpidr = next->thread.tp_value; + tpidrro = 0; } - /* XNFPU is set upon first FPU fault */ - xnthread_clear_state(thread, XNFPU); + asm( + " msr tpidr_el0, %0\n" + " msr tpidrro_el0, %1" + : : "r" (tpidr), "r" (tpidrro)); +} + +#ifdef CONFIG_PID_IN_CONTEXTIDR +static inline void xnarch_contextidr_thread_switch(struct task_struct *next) +{ + asm( + " msr contextidr_el1, %0\n" + " isb" + : + : "r" (task_pid_nr(next))); +} +#else +static inline void xnarch_contextidr_thread_switch(struct task_struct *next) +{ } -#endif /* CONFIG_XENO_ARCH_FPU && CONFIG_VFP*/ +#endif + +/*/Switch support functions */ void xnarch_switch_to(struct xnthread *out, struct xnthread *in) { @@ -338,7 +170,16 @@ void xnarch_switch_to(struct xnthread *out, struct xnthread *in) enter_lazy_tlb(prev_mm, next); } - __switch_to(out_tcb->core.tip->task, in_tcb->core.tip->task); + xnarch_tls_thread_switch(in_tcb->core.tip->task); + xnarch_contextidr_thread_switch(in_tcb->core.tip->task); + + /* + * Complete any pending TLB or cache maintenance on this CPU in case + * the thread migrates to a different CPU. + */ + dsb(ish); + + cpu_switch_to(out_tcb->core.tip->task, in_tcb->core.tip->task); } int xnarch_escalate(void) diff --git a/lib/cobalt/arch/arm64/features.c b/lib/cobalt/arch/arm64/features.c index 7c7e76a..f5253a6 100644 --- a/lib/cobalt/arch/arm64/features.c +++ b/lib/cobalt/arch/arm64/features.c @@ -94,8 +94,8 @@ int cobalt_fp_detect(void) while (fgets(buffer, sizeof(buffer), fp)) { if(strncmp(buffer, "Features", sizeof("Features") - 1)) continue; - if (strstr(buffer, "vfp")) { - features |= __COBALT_HAVE_VFP; + if (strstr(buffer, "fp")) { + features |= __COBALT_HAVE_FPU; break; } } _______________________________________________ Xenomai-git mailing list Xenomai-git@xenomai.org http://xenomai.org/mailman/listinfo/xenomai-git