ARM64 SMP support

This is the rest of the pieces of SMP support for arm64, major changes:

switch to ticket lock instead of simple lock
add code to print core type (printed from each core as it spins up)

IPI support is not yet added with this diff, however this should be 
already reasonably functional. FP context is saved on entry and when
runnable tasks are made avaiable 'sev' will wake sleeping processors.
The mips64_ipi_init() comment below is a reminder to implement IPI.

cpbus and late attach of the processor was added so that the device
to spin up the cores would be attached and available.


diff --git a/sys/arch/arm64/arm64/cpu.c b/sys/arch/arm64/arm64/cpu.c
new file mode 100644
index 00000000000..6a6f10a9727
--- /dev/null
+++ b/sys/arch/arm64/arm64/cpu.c
@@ -0,0 +1,336 @@
+/*     $OpenBSD$ */
+
+/*
+ * Copyright (c) 2016 Dale Rahn <dr...@dalerahn.com>
+ * Copyright (c) 1997-2004 Opsycon AB (www.opsycon.se)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/atomic.h>
+#include <sys/device.h>
+#include <sys/malloc.h>
+#include <dev/rndvar.h>
+
+#include <uvm/uvm_extern.h>
+
+#include <machine/cpu.h>
+
+#include <machine/bus.h>
+#include <machine/intr.h>
+#include <machine/fdt.h>
+#include <machine/cpufunc.h>
+
+#include <arm64/arm64/arm64var.h>
+
+#include <dev/ofw/openfirm.h>
+#include <dev/ofw/fdt.h>
+
+int    cpumatch(struct device *, void *, void *);
+void   cpuattach(struct device *, struct device *, void *);
+
+#ifdef MULTIPROCESSOR
+extern struct cpu_info cpu_info_primary;
+struct cpu_info *cpu_info_list = &cpu_info_primary;
+struct cpuset cpus_running;
+struct cpu_info *cpu_info_secondaries[MAXCPUS];
+void cpu_boot_secondary(struct cpu_info *ci);
+void cpu_hatch_secondary(struct cpu_info *ci);
+
+#endif
+
+struct cpu_cores {
+       int      id;
+       char    *name;
+};
+
+struct cpu_cores cpu_cores_none[] = {
+       { 0x0, "Unknown" },
+};
+
+struct cpu_cores cpu_cores_arm[] = {
+       { CPU_PART_CORTEX_A53, "Cortex-A53" },
+       { CPU_PART_CORTEX_A57, "Cortex-A57" },
+       { 0x0, "Unknown" },
+};
+
+/* arm cores makers */
+const struct implementers {
+       int                       id;
+       char                     *name;
+       struct cpu_cores        *corelist;
+} cpu_implementers[] = {
+       { CPU_IMPL_ARM,         "ARM",          cpu_cores_arm },
+       { CPU_IMPL_BROADCOM,    "Broadcom",     cpu_cores_none },
+       { 0,                    "",     NULL },
+};
+
+
+void
+cpu_identify(struct cpu_info *ci)
+{
+       uint64_t midr, impl, part;
+       char *impl_name = "Unknown";
+       char *part_name = "Unknown";
+       struct cpu_cores *coreselecter = NULL;
+
+       int i;
+
+       midr = READ_SPECIALREG(midr_el1);
+
+       impl = CPU_IMPL(midr);
+       part = CPU_PART(midr);
+
+       for (i = 0; cpu_implementers[i].id != 0; i++) {
+               if (cpu_implementers[i].id == impl) {
+                       impl_name = cpu_implementers[i].name;
+                       coreselecter =  cpu_implementers[i].corelist;
+                       break;
+               }
+       }
+
+       if (impl_name != NULL) {
+               for (i = 0; coreselecter[i].id != 0; i++) {
+                       if (part == coreselecter[i].id) {
+                               part_name = coreselecter[i].name; 
+                       }
+               }
+               printf(" %s %s r%dp%d", impl_name, part_name, CPU_VAR(midr),
+                   CPU_REV(midr));
+       } else {
+               printf (" unknown implementer");
+               
+       }
+}
+
+void
+cpuattach(struct device *parent, struct device *dev, void *aux)
+{
+       struct fdt_attach_args *faa = aux;
+       struct cpu_info *ci;
+       int cpuno = dev->dv_unit;
+
+       if (cpuno == 0) {
+               ci = &cpu_info_primary;
+#ifdef MULTIPROCESSOR
+               ci->ci_flags |= CPUF_RUNNING | CPUF_PRESENT | CPUF_PRIMARY;
+               cpuset_add(&cpus_running, ci);
+#endif
+       }
+#ifdef MULTIPROCESSOR
+       else {
+               ci = malloc(sizeof(*ci), M_DEVBUF, M_WAITOK|M_ZERO);
+               cpu_info_secondaries[cpuno - 1] = ci;
+               cpu_info[cpuno] = ci;
+               ci->ci_next = cpu_info_list->ci_next;
+               cpu_info_list->ci_next = ci;
+               ci->ci_flags |= CPUF_AP;
+       }
+#else
+       else {
+               printf("cpu skipped\n");
+       }
+#endif
+
+       ci->ci_cpuid = cpuno;
+       ci->ci_dev = dev;
+
+       printf(":");
+
+       int node = faa->fa_node;
+       int len;
+       if (faa->fa_node != 0) {
+               // pull out fdt info.
+               char     buffer[128];
+               len = OF_getprop(node, "enable-method", buffer, sizeof(buffer));
+               if (len > 4 && strcmp (buffer, "psci") == 0) {
+                       uint32_t reg;
+                       reg = faa->fa_reg[0].addr;
+                       if (reg != ~0) {
+                               ci->ci_mpidr = reg;
+                               printf(" psci %x", reg);
+                       } else {
+                               ci->ci_flags = 0;
+                               printf(" disabled", reg);
+                       }
+               }
+       }
+
+       if (ci->ci_flags & CPUF_AP) {
+               int timeout = 100000;
+               ncpusfound++;
+               cpu_hatch_secondary(ci);
+               atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY);
+               while ((ci->ci_flags & CPUF_IDENTIFIED) == 0 && --timeout)
+                       delay(1000);
+               if (timeout == 0) {
+                       printf(": FAILED TO IDENTIFY");
+               }
+       } else {
+               cpu_identify(ci);
+       }
+
+       // CPU INFO
+
+       printf("\n");
+
+}
+
+#ifdef MULTIPROCESSOR
+struct cpu_info *
+get_cpu_info(int cpuno)
+{
+       struct cpu_info *ci;
+       CPU_INFO_ITERATOR cii;
+
+       CPU_INFO_FOREACH(cii, ci) {
+               if (ci->ci_cpuid == cpuno)
+                       return ci;
+       }
+       return NULL;
+}
+
+void cpu_hatch(void);
+
+void
+cpu_boot_secondary_processors(void)
+{
+       struct cpu_info *ci;
+       CPU_INFO_ITERATOR cii;
+       extern uint64_t pmap_avail_kvo;
+
+
+       CPU_INFO_FOREACH(cii, ci) {
+               if ((ci->ci_flags & CPUF_AP) == 0)
+                       continue;
+               if (ci->ci_flags & CPUF_PRIMARY)
+                       continue;
+
+               ci->ci_randseed = (arc4random() & 0x7fffffff) + 1;
+               sched_init_cpu(ci);
+               cpu_boot_secondary(ci);
+
+       }
+
+      //mips64_ipi_init();
+}
+
+volatile int cpu_ready;
+volatile int cpu_running;
+
+extern int (*cpu_on_fn)(uint64_t, uint64_t);
+
+void
+cpu_hatch_secondary(struct cpu_info *ci)
+{
+       extern uint64_t pmap_avail_kvo;
+       extern paddr_t cpu_hatch_ci;
+
+       //printf(" spinning up cpu %d %p", ci->ci_mpidr, ci);
+       uint64_t kstack = uvm_km_alloc (kernel_map, USPACE+1024);
+       if (kstack == 0) {
+               panic("no stack for cpu\n");
+       }
+       ci->ci_el1_stkend = kstack +USPACE-16;
+       //ci->ci_el2_stkend = kstack +USPACE+512-16;
+       //ci->ci_el3_stkend = kstack +USPACE+1024-16;
+
+       cpu_ready = 0;
+       cpu_running = 0;
+       pmap_extract(pmap_kernel(), (vaddr_t)ci, &cpu_hatch_ci);
+       ci->ci_ci = (uint64_t)ci;
+
+       uint64_t ttbr1;
+       asm ("mrs %x0, ttbr1_el1": "=r"(ttbr1));
+       ci->ci_ttbr1 = ttbr1;
+       cpu_dcache_wb_range((vaddr_t)&cpu_hatch_ci, 8);
+       cpu_dcache_wb_range((vaddr_t)ci, sizeof(*ci));
+
+       if (cpu_on_fn != 0)
+               cpu_on_fn(ci->ci_mpidr, (uint64_t)cpu_hatch+pmap_avail_kvo);
+       
+}
+
+void 
+cpu_boot_secondary(struct cpu_info *ci)
+{
+       atomic_setbits_int(&ci->ci_flags, CPUF_GO);
+       asm ("dsb sy");
+       asm ("sev");
+
+       while ((ci->ci_flags & CPUF_RUNNING) == 0) {
+               delay(10);
+       }
+}
+
+void
+cpu_start_secondary(struct cpu_info *ci)
+{
+       int             s;
+       uint64_t        tcr;
+
+       ncpus++;
+       ci->ci_flags |= CPUF_PRESENT;
+       asm ("dsb sy");
+
+       while ((ci->ci_flags & CPUF_IDENTIFY) == 0) {
+               delay (10);
+       }
+       cpu_identify(ci);
+       atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFIED);
+       asm ("dsb sy");
+
+       while ((ci->ci_flags & CPUF_GO) == 0) {
+               asm ("wfe");
+       }
+
+       tcr = READ_SPECIALREG(tcr_el1);
+       tcr &= ~TCR_T0SZ(0x3f);
+       tcr |= TCR_T0SZ(64 - USER_SPACE_BITS);
+       WRITE_SPECIALREG(tcr_el1, tcr);
+
+       s=splhigh();
+       cpu_startclock();
+
+       nanouptime(&ci->ci_schedstate.spc_runtime);
+
+       spllower(IPL_NONE);
+
+       atomic_setbits_int(&ci->ci_flags, CPUF_RUNNING);
+
+        SCHED_LOCK(s);
+       cpu_switchto(NULL, sched_chooseproc());
+}
+
+void
+cpu_unidle(struct cpu_info *ci)
+{
+       if (ci != curcpu())
+               //mips64_send_ipi(ci->ci_cpuid, MIPS64_IPI_NOP);
+               // should this be sev or ipi ?
+               asm volatile ("sev");
+}
+#endif
diff --git a/sys/arch/arm64/arm64/genassym.cf b/sys/arch/arm64/arm64/genassym.cf
index 7ca785549f7..f6b2735c7dd 100644
--- a/sys/arch/arm64/arm64/genassym.cf
+++ b/sys/arch/arm64/arm64/genassym.cf
@@ -54,6 +54,10 @@ member SF_SC sf_sc
 
 struct cpu_info
 member CI_CURPROC ci_curproc
+member CI_EL1_STKEND ci_el1_stkend
+member CI_TTBR1 ci_ttbr1
+member CI_CI ci_ci
+
 
 struct proc
 member P_ASTPENDING p_md.md_astpending
diff --git a/sys/arch/arm64/arm64/lock_machdep.c 
b/sys/arch/arm64/arm64/lock_machdep.c
new file mode 100644
index 00000000000..6cc2457f4ac
--- /dev/null
+++ b/sys/arch/arm64/arm64/lock_machdep.c
@@ -0,0 +1,167 @@
+/*     $OpenBSD: lock_machdep.c,v 1.10 2016/03/19 11:34:22 mpi Exp $   */
+
+/*
+ * Copyright (c) 2007 Artur Grabowski <a...@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <machine/atomic.h>
+#include <machine/lock.h>
+
+#include <ddb/db_output.h>
+
+#define SPINLOCK_SPIN_HOOK asm volatile ("wfe");
+#define SPINLOCK_WAKEUP asm volatile ("sev");
+
+void
+__mp_lock_init(struct __mp_lock *mpl)
+{
+       memset(mpl->mpl_cpus, 0, sizeof(mpl->mpl_cpus));
+       mpl->mpl_users = 0;
+       mpl->mpl_ticket = 0;
+}
+
+#if defined(MP_LOCKDEBUG)
+#ifndef DDB
+#error "MP_LOCKDEBUG requires DDB"
+#endif
+
+/* CPU-dependent timing, needs this to be settable from ddb. */
+extern int __mp_lock_spinout;
+#endif
+
+static __inline void
+__mp_lock_spin(struct __mp_lock *mpl, u_int me)
+{
+#ifndef MP_LOCKDEBUG
+       while (mpl->mpl_ticket != me)
+               SPINLOCK_SPIN_HOOK;
+#else
+       int nticks = __mp_lock_spinout;
+
+       while (mpl->mpl_ticket != me && --nticks > 0)
+               SPINLOCK_SPIN_HOOK;
+
+       if (nticks == 0) {
+               db_printf("__mp_lock(%p): lock spun out", mpl);
+               Debugger();
+       }
+#endif
+}
+
+static inline u_int
+fetch_and_add(volatile u_int *var, u_int value)
+{
+       unsigned int written, new, old;
+
+       asm volatile (
+           "1: ldxr %w0, [%x4]         \n\t"
+           "   add %w2, %w0, %w3       \n\t"
+           "   stxr %w1, %w2, [%x4]    \n\t"
+           "   cbnz %w1, 1b            \n\t"
+           : "=&r" (old), "=&r" (written), "=&r" (new)
+           : "r" (value), "r" (var)
+           : "memory"
+       );
+
+       return (old);
+}
+
+
+void
+__mp_lock(struct __mp_lock *mpl)
+{
+       struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()];
+       int psw;
+
+       psw = disable_interrupts();
+       if (cpu->mplc_depth++ == 0)
+               cpu->mplc_ticket = fetch_and_add(&mpl->mpl_users, 1);
+       restore_interrupts(psw);
+
+       __mp_lock_spin(mpl, cpu->mplc_ticket);
+}
+
+void
+__mp_unlock(struct __mp_lock *mpl)
+{
+       struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()];
+       int psw;
+
+#ifdef MP_LOCKDEBUG
+       if (!__mp_lock_held(mpl)) {
+               db_printf("__mp_unlock(%p): not held lock\n", mpl);
+               Debugger();
+       }
+#endif
+
+       psw = disable_interrupts();
+       if (--cpu->mplc_depth == 0)
+               mpl->mpl_ticket++;
+       SPINLOCK_WAKEUP;
+       restore_interrupts(psw);
+}
+
+int
+__mp_release_all(struct __mp_lock *mpl)
+{
+       struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()];
+       int rv, psw;
+
+       psw = disable_interrupts();
+       rv = cpu->mplc_depth;
+       cpu->mplc_depth = 0;
+       mpl->mpl_ticket++;
+
+       restore_interrupts(psw);
+
+       return (rv);
+}
+
+int
+__mp_release_all_but_one(struct __mp_lock *mpl)
+{
+       struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()];
+       int rv = cpu->mplc_depth - 1;
+
+#ifdef MP_LOCKDEBUG
+       if (!__mp_lock_held(mpl)) {
+               db_printf("__mp_release_all_but_one(%p): not held lock\n", mpl);
+               Debugger();
+       }
+#endif
+
+       cpu->mplc_depth = 1;
+
+       return (rv);
+}
+
+void
+__mp_acquire_count(struct __mp_lock *mpl, int count)
+{
+       while (count--)
+               __mp_lock(mpl);
+}
+
+int
+__mp_lock_held(struct __mp_lock *mpl)
+{
+       struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()];
+
+       return (cpu->mplc_ticket == mpl->mpl_ticket && cpu->mplc_depth > 0);
+}
diff --git a/sys/arch/arm64/arm64/locore.S b/sys/arch/arm64/arm64/locore.S
index b472afc4b4b..ca240a0c291 100644
--- a/sys/arch/arm64/arm64/locore.S
+++ b/sys/arch/arm64/arm64/locore.S
@@ -1,5 +1,19 @@
 /* $OpenBSD: locore.S,v 1.15 2017/02/17 19:14:58 patrick Exp $ */
 /*-
+ * Copyright (c) 2016 Dale Rahn <dr...@dalerahn.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
  * Copyright (c) 2012-2014 Andrew Turner
  * All rights reserved.
  *
@@ -181,8 +195,15 @@ virtdone:
 /*
  * If we are started in EL2, configure the required hypervisor
  * registers and drop to EL1.
+ *
+ * API: x0-x9 are scratch
+ *     x21, x22, x23 have content to not touch
+ *     x24 - EL2 stack (small, physical)
+ *     x25 - EL3 stack (small, physical)
+ *     x26 - 0 secondary processor,  1 boot processor
  */
 drop_to_el1:
+       msr     SPSel, #1
        mrs     x1, CurrentEL
        lsr     x1, x1, #2
        cmp     x1, #0x2
@@ -620,7 +641,47 @@ switch_mmu_kernel:
        isb
        ret
 
+#ifdef MULTIPROCESSOR
+       .globl cpu_hatch
+       .type cpu_hatch,%function
+cpu_hatch:
+       adr     x0, .Lcpu_hatch_ci
+
+       bl      get_virt_delta
+
+       ldr     x0, [x0]
+       sub     x0, x0, x29
+       ldr     x0, [x0]
+       mov     x26, #0
+
+       bl      drop_to_el1
+
+
+       adr     x27, .Lpagetable
+       ldr     x27, [x27] 
+       sub     x27, x27, x29 // VA -> PA
+       add     x27, x27, #PAGE_SIZE * 2 // at ttbr2
+       add     x27, x27, #PAGE_SIZE * 2 // at ttbr1
+       add     x27, x27, #PAGE_SIZE * 2 // at ttbr0
+       ldr     x26, [x0, CI_TTBR1]
+
+       ldr     x0, [x0, #CI_CI ]
+       bl      start_mmu
+       msr     tpidr_el1, x0
+       ldr     x1, [x0, #CI_EL1_STKEND ]
+       mov     sp, x1
+
+       adr     x1, .Lcpu_start
+       ldr     x1, [x1]
+       blr     x1
+       b       .
 
+       .align 3
+.Lcpu_start:
+       .xword cpu_start_secondary
+.Lcpu_hatch_ci:
+       .xword cpu_hatch_ci
+#endif
 
        .align 3
 mair:
@@ -650,6 +711,8 @@ abort:
        .data
        .global _C_LABEL(esym)
 _C_LABEL(esym): .xword   _C_LABEL(end)
+       .global _C_LABEL(cpu_hatch_ci)
+_C_LABEL(cpu_hatch_ci):     .xword   0
 
        //.section .init_pagetable
 data_align_pad:
@@ -685,6 +748,11 @@ init_pt_va:
 initstack:
        .space  USPACE
 initstack_end:
+       // leave some space for an EL2/EL3 stack just in case.
+       .space 512
+initstack_el2_end:
+       .space 512
+initstack_el3_end:
 
        .text
 ENTRY(sigcode)
diff --git a/sys/arch/arm64/arm64/machdep.c b/sys/arch/arm64/arm64/machdep.c
index cc6766d0c19..368286e2d45 100644
--- a/sys/arch/arm64/arm64/machdep.c
+++ b/sys/arch/arm64/arm64/machdep.c
@@ -56,6 +56,10 @@ int stdout_node = 0;
 
 void (*cpuresetfn)(void);
 void (*powerdownfn)(void);
+int (*cpu_suspend_fn)(void);
+int (*cpu_off_fn)(void);
+int (*cpu_on_fn)(uint64_t, uint64_t);
+
 
 int cold = 1;
 
@@ -212,18 +216,23 @@ consinit()
 void
 cpu_idle_enter()
 {
+       asm ("msr daifset, #2");
 }
 
 void
 cpu_idle_cycle()
 {
-       restore_daif(0x0); // enable interrupts
        __asm volatile("wfi");
+       // briefly allow interrupts
+       asm ("msr daifclr, #2");
+       asm ("isb");
+       asm ("msr daifset, #2");
 }
 
 void
 cpu_idle_leave()
 {
+       asm ("msr daifclr, #2");
 }
 
 
diff --git a/sys/arch/arm64/conf/GENERIC b/sys/arch/arm64/conf/GENERIC
index d240c2b8356..e2ea982ad1e 100644
--- a/sys/arch/arm64/conf/GENERIC
+++ b/sys/arch/arm64/conf/GENERIC
@@ -48,6 +48,9 @@ uk*           at scsibus?
 ampintc*       at fdt?
 agtimer*       at fdt?
 
+cpubus0                at mainbus?
+cpu0           at cpubus?
+
 # NS16550 compatible serial ports
 com*           at fdt?
 
diff --git a/sys/arch/arm64/conf/GENERIC.MP b/sys/arch/arm64/conf/GENERIC.MP
new file mode 100644
index 00000000000..c9c64084f54
--- /dev/null
+++ b/sys/arch/arm64/conf/GENERIC.MP
@@ -0,0 +1,8 @@
+#       $OpenBSD: GENERIC.MP,v 1.11 2014/09/03 07:44:33 blambert Exp $
+
+include "arch/arm64/conf/GENERIC"
+
+option  MULTIPROCESSOR
+#option MP_LOCKDEBUG
+
+cpu*           at cpubus?
diff --git a/sys/arch/arm64/conf/files.arm64 b/sys/arch/arm64/conf/files.arm64
index bba0574f2fb..e1ab2ee661e 100644
--- a/sys/arch/arm64/conf/files.arm64
+++ b/sys/arch/arm64/conf/files.arm64
@@ -23,6 +23,7 @@ file  arch/arm64/arm64/sig_machdep.c
 file   arch/arm64/arm64/syscall.c
 file   arch/arm64/arm64/sys_machdep.c
 
+file   arch/arm64/arm64/cpu.c
 file   arch/arm64/arm64/intr.c
 file   arch/arm64/arm64/softintr.c
 file   arch/arm64/arm64/vfp.c
@@ -32,6 +33,8 @@ file  arch/arm64/arm64/ast.c
 file   arch/arm64/arm64/arm64_mutex.c
 
 file   arch/arm64/arm64/cpufunc_asm.S
+file   arch/arm64/arm64/lock_machdep.c         multiprocessor
+
 file   arch/arm64/arm64/support.S
 file   arch/arm64/arm64/bus_dma.c
 
@@ -75,6 +78,13 @@ include "dev/wscons/files.wscons"
 include "dev/rasops/files.rasops"
 include "dev/wsfont/files.wsfont"
 
+device  cpubus {}
+attach  cpubus at mainbus
+
+device  cpu {}
+attach  cpu at cpubus with cpu_fdt
+file    arch/arm64/dev/cpu_fdt.c
+
 #
 # Machine-independent HID support
 #
diff --git a/sys/arch/arm64/dev/cpu_fdt.c b/sys/arch/arm64/dev/cpu_fdt.c
new file mode 100644
index 00000000000..e4d6f03831e
--- /dev/null
+++ b/sys/arch/arm64/dev/cpu_fdt.c
@@ -0,0 +1,187 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2011,2015 Dale Rahn <dr...@dalerahn.com>
+ * Copyright (c) 2013 Patrick Wildt <patr...@blueri.se>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/queue.h>
+#include <sys/malloc.h>
+#include <sys/device.h>
+#include <sys/kernel.h>
+#include <machine/fdt.h>
+#include <sys/evcount.h>
+
+#include <machine/bus.h>
+#include <machine/intr.h>
+#include <arm64/arm64/arm64var.h>
+#include <arm64/dev/mainbus.h>
+
+#include <dev/ofw/fdt.h>
+#include <dev/ofw/openfirm.h>
+
+int            cpu_fdt_match(struct device *, void *, void *);
+void           cpuattach(struct device *, struct device *, void *);
+int            cpubus_match(struct device *, void *, void *);
+void           cpubus_attach(struct device *, struct device *, void *);
+void           cpu_attach_deferred(struct device *self);
+
+struct cpubus_softc {
+       struct device           sc_dev;
+       int             sc_pnode;
+       int                     sc_acells;
+       int                     sc_scells;
+};
+
+struct cpu_softc {
+       struct device   sc_dev;
+};
+
+
+struct cfattach cpubus_ca = {
+       sizeof (struct cpubus_softc), cpubus_match, cpubus_attach
+};
+
+int
+cpubus_match(struct device *parent, void *cfdata, void *aux)
+{
+       union mainbus_attach_args *ma = aux;
+
+       if (strcmp(ma->ma_name, "cpu"))
+               return 1;
+       return 0;
+}
+
+void cpubus_attach_node(struct device *self, int node);
+
+void
+cpubus_attach(struct device *parent, struct device *self, void *aux)
+{
+       struct cpubus_softc *sc = (struct cpubus_softc *)self;
+       int pnode;
+
+       pnode = OF_finddevice("/cpus");
+       if (pnode == 0) {
+               return ;
+       }
+       sc->sc_pnode = pnode;
+
+       sc->sc_acells = OF_getpropint(pnode, "#address-cells", 1);
+       sc->sc_scells = OF_getpropint(pnode, "#size-cells", 0);
+
+       config_defer(self, cpu_attach_deferred);
+       printf("\n");
+}
+
+void
+cpu_attach_deferred(struct device *self)
+{
+       struct cpubus_softc *sc = (struct cpubus_softc *)self;
+       int node;
+
+       for (node = OF_child(sc->sc_pnode); node; node = OF_peer(node)) {
+               cpubus_attach_node(self, node);
+       }
+}
+
+int
+cpubus_submatch(struct device *self, void *match, void *aux)
+{
+       struct cfdata *cf = match;
+
+       return (*cf->cf_attach->ca_match)(self, match, aux);
+}
+
+void
+cpubus_attach_node(struct device *self, int node)
+{
+       struct cpubus_softc *sc = (struct cpubus_softc *)self;
+       struct fdt_attach_args   fa;
+       char buffer[128];
+       int i, len, line;
+       uint32_t                *cell, *reg;
+
+       if (!OF_getprop(node, "compatible", buffer, sizeof(buffer)))
+               return;
+
+       memset(&fa, 0, sizeof(fa));
+       fa.fa_name = "";
+       fa.fa_node = node;
+       fa.fa_acells = sc->sc_acells;
+       fa.fa_scells = sc->sc_scells;
+
+       len = OF_getproplen(node, "reg");
+
+       line = (sc->sc_acells + sc->sc_scells) * sizeof(uint32_t);
+       if (len > 0 && line > 0 && (len % line) == 0) {
+               reg = malloc(len, M_TEMP, M_WAITOK);
+               OF_getpropintarray(node, "reg", reg, len);
+
+               fa.fa_reg = malloc((len / line) * sizeof(struct fdt_reg),
+                   M_DEVBUF, M_WAITOK | M_ZERO);
+               fa.fa_nreg = (len / line);
+
+               for (i = 0, cell = reg; i < len / line; i++) {
+                       if (sc->sc_acells >= 1)
+                               fa.fa_reg[i].addr = cell[0];
+                       if (sc->sc_acells == 2) {
+                               fa.fa_reg[i].addr <<= 32;
+                               fa.fa_reg[i].addr |= cell[1];
+                       }
+                       cell += sc->sc_acells;
+                       if (sc->sc_scells >= 1)
+                               fa.fa_reg[i].size = cell[0];
+                       if (sc->sc_scells == 2) {
+                               fa.fa_reg[i].size <<= 32;
+                               fa.fa_reg[i].size |= cell[1];
+                       }
+                       cell += sc->sc_scells;
+               }
+
+               free(reg, M_TEMP, len);
+       }
+       config_found_sm(self, &fa, NULL, cpubus_submatch);
+
+       free(fa.fa_reg, M_DEVBUF, fa.fa_nreg * sizeof(struct fdt_reg));
+}
+
+struct cfdriver cpubus_cd = {
+       NULL, "cpubus", DV_DULL
+};
+
+struct cfattach cpu_fdt_ca = {
+       sizeof (struct cpu_softc), cpu_fdt_match, cpuattach
+};
+
+struct cfdriver cpu_cd = {
+       NULL, "cpu", DV_DULL
+};
+
+
+int
+cpu_fdt_match(struct device *parent, void *cfdata, void *aux)
+{
+       struct fdt_attach_args *faa = aux;
+       char buffer[128];
+       OF_getprop(faa->fa_node, "compatible", buffer, sizeof(buffer));
+
+       // XXX  arm64
+       if (OF_is_compatible(faa->fa_node, "arm,cortex-a53") ||
+           OF_is_compatible(faa->fa_node, "arm,cortex-a57"))
+               return (1);
+
+       return 0;
+}
diff --git a/sys/arch/arm64/include/cpu.h b/sys/arch/arm64/include/cpu.h
index 2497cb33ccf..abb66cb3559 100644
--- a/sys/arch/arm64/include/cpu.h
+++ b/sys/arch/arm64/include/cpu.h
@@ -103,11 +103,29 @@ struct cpu_info {
 #endif
        int                     ci_want_resched;
 
+// MULTIPROCESSOR, but not ifdefed because of locore offsets.
+       struct srp_hazard       ci_srp_hazards[SRP_HAZARD_NUM];
+       volatile int            ci_flags;
+       uint64_t                ci_ttbr1;
+       int                     ci_mpidr; 
+       uint64_t                ci_el1_stkend;
+
+       // contains virtual address of ci, for when cpu is running PA==VA
+       uint64_t                ci_ci;
+
 #ifdef GPROF
        struct gmonparam        *ci_gmon;
 #endif
 };
 
+#define CPUF_PRIMARY           (1<<0)
+#define CPUF_AP                        (1<<1)
+#define CPUF_IDENTIFY          (1<<2)
+#define CPUF_IDENTIFIED                (1<<3)
+#define CPUF_PRESENT           (1<<4)
+#define CPUF_GO                        (1<<5)
+#define CPUF_RUNNING           (1<<6)
+
 static inline struct cpu_info *
 curcpu(void)
 {
@@ -134,10 +152,10 @@ extern struct cpu_info *cpu_info_list;
 #define CPU_INFO_ITERATOR              int
 #define CPU_INFO_FOREACH(cii, ci)      for (cii = 0, ci = cpu_info_list; \
                                            ci != NULL; ci = ci->ci_next)
+void cpu_unidle(struct cpu_info *ci);
 
 #define CPU_INFO_UNIT(ci)      ((ci)->ci_dev ? (ci)->ci_dev->dv_unit : 0)
 #define MAXCPUS        8
-#define cpu_unidle(ci)
 
 extern struct cpu_info *cpu_info[MAXCPUS];
 
@@ -158,7 +176,11 @@ void cpu_boot_secondary_processors(void);
  * process as soon as possible.
  */
 
+#ifdef MULTIPROCESSOR
+#define signotify(p)            (aston(p), cpu_unidle((p)->p_cpu))
+#else
 #define signotify(p)            setsoftast()
+#endif
 
 /*
  * Preempt the current process if in interrupt from user mode,
@@ -248,17 +270,31 @@ disable_irq_daif_ret()
 #define restore_interrupts(old_daif)                                   \
        restore_daif(old_daif)
 
+void   cpu_startclock(void);
+
 void   delay (unsigned);
 #define        DELAY(x)        delay(x)
 
-#endif /* !_LOCORE */
+/* CPU Identification */
+#define CPU_IMPL_ARM            0x41
+#define CPU_IMPL_BROADCOM       0x42
 
-#endif /* _KERNEL */
+#define CPU_PART_CORTEX_A53     0xD03
+#define CPU_PART_CORTEX_A57     0xD07
+
+#define CPU_IMPL(midr)  (((midr) >> 24) & 0xff)
+#define CPU_PART(midr)  (((midr) >> 4) & 0xfff)
+#define CPU_VAR(midr)   (((midr) >> 20) & 0xf)
+#define CPU_REV(midr)   (((midr) >> 0) & 0xf)
 
 #ifdef MULTIPROCESSOR
 #include <sys/mplock.h>
 #endif /* MULTIPROCESSOR */
 
+#endif /* !_LOCORE */
+
+#endif /* _KERNEL */
+
 #endif /* !_MACHINE_CPU_H_ */
 
 /* End of cpu.h */
diff --git a/sys/arch/arm64/include/mplock.h b/sys/arch/arm64/include/mplock.h
index f85c6df27d8..d429d9a018a 100644
--- a/sys/arch/arm64/include/mplock.h
+++ b/sys/arch/arm64/include/mplock.h
@@ -34,8 +34,8 @@ struct __mp_lock_cpu {
 
 struct __mp_lock {
        struct __mp_lock_cpu    mpl_cpus[MAXCPUS];
-       atomic_int              mpl_ticket;
-       atomic_int              mpl_users;
+       volatile u_int          mpl_ticket;
+       volatile u_int          mpl_users;
 };
 
 #ifndef _LOCORE

Dale Rahn                               dr...@dalerahn.com

Reply via email to