Module Name: src
Committed By: maxv
Date: Sun Feb 11 09:39:37 UTC 2018
Modified Files:
src/sys/arch/amd64/amd64: machdep.c
src/sys/arch/x86/conf: files.x86
Added Files:
src/sys/arch/x86/x86: svs.c
Log Message:
Move SVS into x86/svs.c
To generate a diff of this commit:
cvs rdiff -u -r1.297 -r1.298 src/sys/arch/amd64/amd64/machdep.c
cvs rdiff -u -r1.92 -r1.93 src/sys/arch/x86/conf/files.x86
cvs rdiff -u -r0 -r1.1 src/sys/arch/x86/x86/svs.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/arch/amd64/amd64/machdep.c
diff -u src/sys/arch/amd64/amd64/machdep.c:1.297 src/sys/arch/amd64/amd64/machdep.c:1.298
--- src/sys/arch/amd64/amd64/machdep.c:1.297 Sun Feb 4 17:03:21 2018
+++ src/sys/arch/amd64/amd64/machdep.c Sun Feb 11 09:39:36 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: machdep.c,v 1.297 2018/02/04 17:03:21 maxv Exp $ */
+/* $NetBSD: machdep.c,v 1.298 2018/02/11 09:39:36 maxv Exp $ */
/*
* Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
@@ -110,7 +110,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.297 2018/02/04 17:03:21 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.298 2018/02/11 09:39:36 maxv Exp $");
/* #define XENDEBUG_LOW */
@@ -123,7 +123,6 @@ __KERNEL_RCSID(0, "$NetBSD: machdep.c,v
#include "opt_realmem.h"
#include "opt_xen.h"
#include "opt_kaslr.h"
-#include "opt_svs.h"
#ifndef XEN
#include "opt_physmem.h"
#endif
@@ -2236,391 +2235,3 @@ mm_md_direct_mapped_phys(paddr_t paddr,
return true;
}
#endif
-
-/* -------------------------------------------------------------------------- */
-
-#ifdef SVS
-/*
- * Separate Virtual Space
- *
- * A per-cpu L4 page is maintained in ci_svs_updirpa. During each context
- * switch to a user pmap, updirpa is populated with the entries of the new
- * pmap, minus what we don't want to have mapped in userland.
- *
- * Note on locking/synchronization here:
- *
- * (a) Touching ci_svs_updir without holding ci_svs_mtx first is *not*
- * allowed.
- *
- * (b) pm_kernel_cpus contains the set of CPUs that have the pmap loaded
- * in their CR3 register. It must *not* be replaced by pm_cpus.
- *
- * (c) When a context switch on the current CPU is made from a user LWP
- * towards a kernel LWP, CR3 is not updated. Therefore, the pmap's
- * pm_kernel_cpus still contains the current CPU. It implies that the
- * remote CPUs that execute other threads of the user process we just
- * left will keep synchronizing us against their changes.
- *
- * List of areas that are removed from userland:
- * PTE Space [OK]
- * Direct Map [OK]
- * Remote PCPU Areas [OK]
- * Kernel Heap [OK]
- * Kernel Image [OK]
- *
- * TODO:
- *
- * (a) The NMI stack is not double-entered. Therefore if we ever receive
- * an NMI and leave it, the content of the stack will be visible to
- * userland (via Meltdown). Normally we never leave NMIs, unless a
- * privileged user launched PMCs. That's unlikely to happen, our PMC
- * support is pretty minimal.
- *
- * (b) Enable SVS depending on the CPU model, and add a sysctl to disable
- * it dynamically.
- *
- * (c) Narrow down the entry points: hide the 'jmp handler' instructions.
- * This makes sense on GENERIC_KASLR kernels.
- *
- * (d) Right now there is only one global LDT, and that's not compatible
- * with USER_LDT.
- */
-
-struct svs_utls {
- paddr_t kpdirpa;
- uint64_t scratch;
- vaddr_t rsp0;
-};
-
-static pd_entry_t *
-svs_tree_add(struct cpu_info *ci, vaddr_t va)
-{
- extern const vaddr_t ptp_masks[];
- extern const int ptp_shifts[];
- extern const long nbpd[];
- pd_entry_t *dstpde;
- size_t i, pidx, mod;
- struct vm_page *pg;
- paddr_t pa;
-
- dstpde = ci->ci_svs_updir;
- mod = (size_t)-1;
-
- for (i = PTP_LEVELS; i > 1; i--) {
- pidx = pl_i(va % mod, i);
-
- if (!pmap_valid_entry(dstpde[pidx])) {
- pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
- if (pg == 0)
- panic("%s: failed to allocate PA for CPU %d\n",
- __func__, cpu_index(ci));
- pa = VM_PAGE_TO_PHYS(pg);
-
- dstpde[pidx] = PG_V | PG_RW | pa;
- }
-
- pa = (paddr_t)(dstpde[pidx] & PG_FRAME);
- dstpde = (pd_entry_t *)PMAP_DIRECT_MAP(pa);
- mod = nbpd[i-1];
- }
-
- return dstpde;
-}
-
-static void
-svs_page_add(struct cpu_info *ci, vaddr_t va)
-{
- pd_entry_t *srcpde, *dstpde, pde;
- size_t idx, pidx;
- paddr_t pa;
-
- /* Create levels L4, L3 and L2. */
- dstpde = svs_tree_add(ci, va);
-
- pidx = pl1_i(va % NBPD_L2);
-
- /*
- * If 'va' is in a large page, we need to compute its physical
- * address manually.
- */
- idx = pl2_i(va);
- srcpde = L2_BASE;
- if (!pmap_valid_entry(srcpde[idx])) {
- panic("%s: L2 page not mapped", __func__);
- }
- if (srcpde[idx] & PG_PS) {
- pa = srcpde[idx] & PG_2MFRAME;
- pa += (paddr_t)(va % NBPD_L2);
- pde = (srcpde[idx] & ~(PG_PS|PG_2MFRAME)) | pa;
-
- if (pmap_valid_entry(dstpde[pidx])) {
- panic("%s: L1 page already mapped", __func__);
- }
- dstpde[pidx] = pde;
- return;
- }
-
- /*
- * Normal page, just copy the PDE.
- */
- idx = pl1_i(va);
- srcpde = L1_BASE;
- if (!pmap_valid_entry(srcpde[idx])) {
- panic("%s: L1 page not mapped", __func__);
- }
- if (pmap_valid_entry(dstpde[pidx])) {
- panic("%s: L1 page already mapped", __func__);
- }
- dstpde[pidx] = srcpde[idx];
-}
-
-static void
-svs_rsp0_init(struct cpu_info *ci)
-{
- const cpuid_t cid = cpu_index(ci);
- vaddr_t va, rsp0;
- pd_entry_t *pd;
- size_t pidx;
-
- rsp0 = (vaddr_t)&pcpuarea->ent[cid].rsp0;
-
- /* The first page is a redzone. */
- va = rsp0 + PAGE_SIZE;
-
- /* Create levels L4, L3 and L2. */
- pd = svs_tree_add(ci, va);
-
- /* Get the info for L1. */
- pidx = pl1_i(va % NBPD_L2);
- if (pmap_valid_entry(pd[pidx])) {
- panic("%s: rsp0 page already mapped", __func__);
- }
-
- ci->ci_svs_rsp0_pte = (pt_entry_t *)&pd[pidx];
- ci->ci_svs_rsp0 = rsp0 + PAGE_SIZE + sizeof(struct trapframe);
- ci->ci_svs_ursp0 = ci->ci_svs_rsp0 - sizeof(struct trapframe);
- ci->ci_svs_krsp0 = 0;
-}
-
-static void
-svs_utls_init(struct cpu_info *ci)
-{
- const vaddr_t utlsva = (vaddr_t)&pcpuarea->utls;
- struct svs_utls *utls;
- struct vm_page *pg;
- pd_entry_t *pd;
- size_t pidx;
- paddr_t pa;
- vaddr_t va;
-
- /* Create levels L4, L3 and L2. */
- pd = svs_tree_add(ci, utlsva);
-
- /* Allocate L1. */
- pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
- if (pg == 0)
- panic("%s: failed to allocate PA for CPU %d\n", __func__,
- cpu_index(ci));
- pa = VM_PAGE_TO_PHYS(pg);
-
- /* Enter L1. */
- if (pmap_valid_entry(L1_BASE[pl1_i(utlsva)])) {
- panic("%s: local page already mapped", __func__);
- }
- pidx = pl1_i(utlsva % NBPD_L2);
- if (pmap_valid_entry(pd[pidx])) {
- panic("%s: L1 page already mapped", __func__);
- }
- pd[pidx] = PG_V | PG_RW | pmap_pg_nx | pa;
-
- /*
- * Now, allocate a VA in the kernel map, that points to the UTLS
- * page.
- */
- va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
- UVM_KMF_VAONLY|UVM_KMF_NOWAIT);
- if (va == 0) {
- panic("%s: unable to allocate VA\n", __func__);
- }
- pmap_kenter_pa(va, pa, VM_PROT_READ|VM_PROT_WRITE, 0);
- pmap_update(pmap_kernel());
-
- ci->ci_svs_utls = va;
-
- /* Initialize the constant fields of the UTLS page */
- utls = (struct svs_utls *)ci->ci_svs_utls;
- utls->rsp0 = ci->ci_svs_rsp0;
-}
-
-static void
-svs_range_add(struct cpu_info *ci, vaddr_t va, size_t size)
-{
- size_t i, n;
-
- KASSERT(size % PAGE_SIZE == 0);
- n = size / PAGE_SIZE;
- for (i = 0; i < n; i++) {
- svs_page_add(ci, va + i * PAGE_SIZE);
- }
-}
-
-void
-cpu_svs_init(struct cpu_info *ci)
-{
- extern char __text_user_start;
- extern char __text_user_end;
- const cpuid_t cid = cpu_index(ci);
- struct vm_page *pg;
-
- KASSERT(ci != NULL);
-
- pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
- if (pg == 0)
- panic("%s: failed to allocate L4 PA for CPU %d\n",
- __func__, cpu_index(ci));
- ci->ci_svs_updirpa = VM_PAGE_TO_PHYS(pg);
-
- ci->ci_svs_updir = (pt_entry_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
- UVM_KMF_VAONLY | UVM_KMF_NOWAIT);
- if (ci->ci_svs_updir == NULL)
- panic("%s: failed to allocate L4 VA for CPU %d\n",
- __func__, cpu_index(ci));
-
- pmap_kenter_pa((vaddr_t)ci->ci_svs_updir, ci->ci_svs_updirpa,
- VM_PROT_READ | VM_PROT_WRITE, 0);
-
- pmap_update(pmap_kernel());
-
- ci->ci_svs_kpdirpa = pmap_pdirpa(pmap_kernel(), 0);
-
- mutex_init(&ci->ci_svs_mtx, MUTEX_DEFAULT, IPL_VM);
-
- svs_page_add(ci, (vaddr_t)&pcpuarea->idt);
- svs_page_add(ci, (vaddr_t)&pcpuarea->ldt);
- svs_range_add(ci, (vaddr_t)&pcpuarea->ent[cid],
- offsetof(struct pcpu_entry, rsp0));
- svs_range_add(ci, (vaddr_t)&__text_user_start,
- (vaddr_t)&__text_user_end - (vaddr_t)&__text_user_start);
-
- svs_rsp0_init(ci);
- svs_utls_init(ci);
-}
-
-void
-svs_pmap_sync(struct pmap *pmap, int index)
-{
- CPU_INFO_ITERATOR cii;
- struct cpu_info *ci;
- cpuid_t cid;
-
- KASSERT(pmap != NULL);
- KASSERT(pmap != pmap_kernel());
- KASSERT(mutex_owned(pmap->pm_lock));
- KASSERT(kpreempt_disabled());
- KASSERT(index < 255);
-
- for (CPU_INFO_FOREACH(cii, ci)) {
- cid = cpu_index(ci);
-
- if (!kcpuset_isset(pmap->pm_kernel_cpus, cid)) {
- continue;
- }
-
- /* take the lock and check again */
- mutex_enter(&ci->ci_svs_mtx);
- if (kcpuset_isset(pmap->pm_kernel_cpus, cid)) {
- ci->ci_svs_updir[index] = pmap->pm_pdir[index];
- }
- mutex_exit(&ci->ci_svs_mtx);
- }
-}
-
-void
-svs_lwp_switch(struct lwp *oldlwp, struct lwp *newlwp)
-{
- struct cpu_info *ci = curcpu();
- struct svs_utls *utls;
- struct pcb *pcb;
- pt_entry_t *pte;
- uintptr_t rsp0;
- vaddr_t va;
-
- if (newlwp->l_flag & LW_SYSTEM) {
- return;
- }
-
-#ifdef DIAGNOSTIC
- if (oldlwp != NULL && !(oldlwp->l_flag & LW_SYSTEM)) {
- pcb = lwp_getpcb(oldlwp);
- rsp0 = pcb->pcb_rsp0;
- va = rounddown(rsp0, PAGE_SIZE);
- KASSERT(ci->ci_svs_krsp0 == rsp0 - sizeof(struct trapframe));
- pte = ci->ci_svs_rsp0_pte;
- KASSERT(*pte == L1_BASE[pl1_i(va)]);
- }
-#endif
-
- pcb = lwp_getpcb(newlwp);
- rsp0 = pcb->pcb_rsp0;
- va = rounddown(rsp0, PAGE_SIZE);
-
- /* Update the kernel rsp0 in cpu_info */
- ci->ci_svs_krsp0 = rsp0 - sizeof(struct trapframe);
- KASSERT((ci->ci_svs_krsp0 % PAGE_SIZE) ==
- (ci->ci_svs_ursp0 % PAGE_SIZE));
-
- utls = (struct svs_utls *)ci->ci_svs_utls;
- utls->scratch = 0;
-
- /*
- * Enter the user rsp0. We don't need to flush the TLB here, since
- * the user page tables are not loaded.
- */
- pte = ci->ci_svs_rsp0_pte;
- *pte = L1_BASE[pl1_i(va)];
-}
-
-static inline pt_entry_t
-svs_pte_atomic_read(struct pmap *pmap, size_t idx)
-{
- /*
- * XXX: We don't have a basic atomic_fetch_64 function?
- */
- return atomic_cas_64(&pmap->pm_pdir[idx], 666, 666);
-}
-
-/*
- * We may come here with the pmap unlocked. So read its PTEs atomically. If
- * a remote CPU is updating them at the same time, it's not a problem: the
- * remote CPU will call svs_pmap_sync afterwards, and our updirpa will be
- * synchronized properly.
- */
-void
-svs_pdir_switch(struct pmap *pmap)
-{
- struct cpu_info *ci = curcpu();
- struct svs_utls *utls;
- pt_entry_t pte;
- size_t i;
-
- KASSERT(kpreempt_disabled());
- KASSERT(pmap != pmap_kernel());
-
- ci->ci_svs_kpdirpa = pmap_pdirpa(pmap, 0);
-
- /* Update the info in the UTLS page */
- utls = (struct svs_utls *)ci->ci_svs_utls;
- utls->kpdirpa = ci->ci_svs_kpdirpa;
-
- mutex_enter(&ci->ci_svs_mtx);
-
- /* User slots. */
- for (i = 0; i < 255; i++) {
- pte = svs_pte_atomic_read(pmap, i);
- ci->ci_svs_updir[i] = pte;
- }
-
- mutex_exit(&ci->ci_svs_mtx);
-}
-#endif
-
Index: src/sys/arch/x86/conf/files.x86
diff -u src/sys/arch/x86/conf/files.x86:1.92 src/sys/arch/x86/conf/files.x86:1.93
--- src/sys/arch/x86/conf/files.x86:1.92 Mon Jan 22 19:37:45 2018
+++ src/sys/arch/x86/conf/files.x86 Sun Feb 11 09:39:37 2018
@@ -1,4 +1,4 @@
-# $NetBSD: files.x86,v 1.92 2018/01/22 19:37:45 jdolecek Exp $
+# $NetBSD: files.x86,v 1.93 2018/02/11 09:39:37 maxv Exp $
# options for MP configuration through the MP spec
defflag opt_mpbios.h MPBIOS MPVERBOSE MPDEBUG MPBIOS_SCANPCI
@@ -97,6 +97,7 @@ file arch/x86/x86/pmap.c machdep
file arch/x86/x86/x86_tlb.c machdep
file arch/x86/x86/pmc.c machdep
file arch/x86/x86/procfs_machdep.c procfs
+file arch/x86/x86/svs.c machdep & svs
file arch/x86/x86/sys_machdep.c machdep
file arch/x86/x86/syscall.c machdep
file arch/x86/x86/tsc.c machdep
Added files:
Index: src/sys/arch/x86/x86/svs.c
diff -u /dev/null src/sys/arch/x86/x86/svs.c:1.1
--- /dev/null Sun Feb 11 09:39:37 2018
+++ src/sys/arch/x86/x86/svs.c Sun Feb 11 09:39:37 2018
@@ -0,0 +1,426 @@
+/* $NetBSD: svs.c,v 1.1 2018/02/11 09:39:37 maxv Exp $ */
+
+/*
+ * Copyright (c) 2018 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Maxime Villard.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: svs.c,v 1.1 2018/02/11 09:39:37 maxv Exp $");
+
+#include "opt_svs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/cpu.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_page.h>
+
+/*
+ * Separate Virtual Space
+ *
+ * A per-cpu L4 page is maintained in ci_svs_updirpa. During each context
+ * switch to a user pmap, updirpa is populated with the entries of the new
+ * pmap, minus what we don't want to have mapped in userland.
+ *
+ * Note on locking/synchronization here:
+ *
+ * (a) Touching ci_svs_updir without holding ci_svs_mtx first is *not*
+ * allowed.
+ *
+ * (b) pm_kernel_cpus contains the set of CPUs that have the pmap loaded
+ * in their CR3 register. It must *not* be replaced by pm_cpus.
+ *
+ * (c) When a context switch on the current CPU is made from a user LWP
+ * towards a kernel LWP, CR3 is not updated. Therefore, the pmap's
+ * pm_kernel_cpus still contains the current CPU. It implies that the
+ * remote CPUs that execute other threads of the user process we just
+ * left will keep synchronizing us against their changes.
+ *
+ * List of areas that are removed from userland:
+ * PTE Space [OK]
+ * Direct Map [OK]
+ * Remote PCPU Areas [OK]
+ * Kernel Heap [OK]
+ * Kernel Image [OK]
+ *
+ * TODO:
+ *
+ * (a) The NMI stack is not double-entered. Therefore if we ever receive
+ * an NMI and leave it, the content of the stack will be visible to
+ * userland (via Meltdown). Normally we never leave NMIs, unless a
+ * privileged user launched PMCs. That's unlikely to happen, our PMC
+ * support is pretty minimal.
+ *
+ * (b) Enable SVS depending on the CPU model, and add a sysctl to disable
+ * it dynamically.
+ *
+ * (c) Narrow down the entry points: hide the 'jmp handler' instructions.
+ * This makes sense on GENERIC_KASLR kernels.
+ *
+ * (d) Right now there is only one global LDT, and that's not compatible
+ * with USER_LDT.
+ */
+
+struct svs_utls {
+ paddr_t kpdirpa;
+ uint64_t scratch;
+ vaddr_t rsp0;
+};
+
+static pd_entry_t *
+svs_tree_add(struct cpu_info *ci, vaddr_t va)
+{
+ extern const vaddr_t ptp_masks[];
+ extern const int ptp_shifts[];
+ extern const long nbpd[];
+ pd_entry_t *dstpde;
+ size_t i, pidx, mod;
+ struct vm_page *pg;
+ paddr_t pa;
+
+ dstpde = ci->ci_svs_updir;
+ mod = (size_t)-1;
+
+ for (i = PTP_LEVELS; i > 1; i--) {
+ pidx = pl_i(va % mod, i);
+
+ if (!pmap_valid_entry(dstpde[pidx])) {
+ pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
+ if (pg == 0)
+ panic("%s: failed to allocate PA for CPU %d\n",
+ __func__, cpu_index(ci));
+ pa = VM_PAGE_TO_PHYS(pg);
+
+ dstpde[pidx] = PG_V | PG_RW | pa;
+ }
+
+ pa = (paddr_t)(dstpde[pidx] & PG_FRAME);
+ dstpde = (pd_entry_t *)PMAP_DIRECT_MAP(pa);
+ mod = nbpd[i-1];
+ }
+
+ return dstpde;
+}
+
+static void
+svs_page_add(struct cpu_info *ci, vaddr_t va)
+{
+ pd_entry_t *srcpde, *dstpde, pde;
+ size_t idx, pidx;
+ paddr_t pa;
+
+ /* Create levels L4, L3 and L2. */
+ dstpde = svs_tree_add(ci, va);
+
+ pidx = pl1_i(va % NBPD_L2);
+
+ /*
+ * If 'va' is in a large page, we need to compute its physical
+ * address manually.
+ */
+ idx = pl2_i(va);
+ srcpde = L2_BASE;
+ if (!pmap_valid_entry(srcpde[idx])) {
+ panic("%s: L2 page not mapped", __func__);
+ }
+ if (srcpde[idx] & PG_PS) {
+ pa = srcpde[idx] & PG_2MFRAME;
+ pa += (paddr_t)(va % NBPD_L2);
+ pde = (srcpde[idx] & ~(PG_PS|PG_2MFRAME)) | pa;
+
+ if (pmap_valid_entry(dstpde[pidx])) {
+ panic("%s: L1 page already mapped", __func__);
+ }
+ dstpde[pidx] = pde;
+ return;
+ }
+
+ /*
+ * Normal page, just copy the PDE.
+ */
+ idx = pl1_i(va);
+ srcpde = L1_BASE;
+ if (!pmap_valid_entry(srcpde[idx])) {
+ panic("%s: L1 page not mapped", __func__);
+ }
+ if (pmap_valid_entry(dstpde[pidx])) {
+ panic("%s: L1 page already mapped", __func__);
+ }
+ dstpde[pidx] = srcpde[idx];
+}
+
+static void
+svs_rsp0_init(struct cpu_info *ci)
+{
+ const cpuid_t cid = cpu_index(ci);
+ vaddr_t va, rsp0;
+ pd_entry_t *pd;
+ size_t pidx;
+
+ rsp0 = (vaddr_t)&pcpuarea->ent[cid].rsp0;
+
+ /* The first page is a redzone. */
+ va = rsp0 + PAGE_SIZE;
+
+ /* Create levels L4, L3 and L2. */
+ pd = svs_tree_add(ci, va);
+
+ /* Get the info for L1. */
+ pidx = pl1_i(va % NBPD_L2);
+ if (pmap_valid_entry(pd[pidx])) {
+ panic("%s: rsp0 page already mapped", __func__);
+ }
+
+ ci->ci_svs_rsp0_pte = (pt_entry_t *)&pd[pidx];
+ ci->ci_svs_rsp0 = rsp0 + PAGE_SIZE + sizeof(struct trapframe);
+ ci->ci_svs_ursp0 = ci->ci_svs_rsp0 - sizeof(struct trapframe);
+ ci->ci_svs_krsp0 = 0;
+}
+
+static void
+svs_utls_init(struct cpu_info *ci)
+{
+ const vaddr_t utlsva = (vaddr_t)&pcpuarea->utls;
+ struct svs_utls *utls;
+ struct vm_page *pg;
+ pd_entry_t *pd;
+ size_t pidx;
+ paddr_t pa;
+ vaddr_t va;
+
+ /* Create levels L4, L3 and L2. */
+ pd = svs_tree_add(ci, utlsva);
+
+ /* Allocate L1. */
+ pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
+ if (pg == 0)
+ panic("%s: failed to allocate PA for CPU %d\n", __func__,
+ cpu_index(ci));
+ pa = VM_PAGE_TO_PHYS(pg);
+
+ /* Enter L1. */
+ if (pmap_valid_entry(L1_BASE[pl1_i(utlsva)])) {
+ panic("%s: local page already mapped", __func__);
+ }
+ pidx = pl1_i(utlsva % NBPD_L2);
+ if (pmap_valid_entry(pd[pidx])) {
+ panic("%s: L1 page already mapped", __func__);
+ }
+ pd[pidx] = PG_V | PG_RW | pmap_pg_nx | pa;
+
+ /*
+ * Now, allocate a VA in the kernel map, that points to the UTLS
+ * page.
+ */
+ va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
+ UVM_KMF_VAONLY|UVM_KMF_NOWAIT);
+ if (va == 0) {
+ panic("%s: unable to allocate VA\n", __func__);
+ }
+ pmap_kenter_pa(va, pa, VM_PROT_READ|VM_PROT_WRITE, 0);
+ pmap_update(pmap_kernel());
+
+ ci->ci_svs_utls = va;
+
+ /* Initialize the constant fields of the UTLS page */
+ utls = (struct svs_utls *)ci->ci_svs_utls;
+ utls->rsp0 = ci->ci_svs_rsp0;
+}
+
+static void
+svs_range_add(struct cpu_info *ci, vaddr_t va, size_t size)
+{
+ size_t i, n;
+
+ KASSERT(size % PAGE_SIZE == 0);
+ n = size / PAGE_SIZE;
+ for (i = 0; i < n; i++) {
+ svs_page_add(ci, va + i * PAGE_SIZE);
+ }
+}
+
+void
+cpu_svs_init(struct cpu_info *ci)
+{
+ extern char __text_user_start;
+ extern char __text_user_end;
+ const cpuid_t cid = cpu_index(ci);
+ struct vm_page *pg;
+
+ KASSERT(ci != NULL);
+
+ pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
+ if (pg == 0)
+ panic("%s: failed to allocate L4 PA for CPU %d\n",
+ __func__, cpu_index(ci));
+ ci->ci_svs_updirpa = VM_PAGE_TO_PHYS(pg);
+
+ ci->ci_svs_updir = (pt_entry_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
+ UVM_KMF_VAONLY | UVM_KMF_NOWAIT);
+ if (ci->ci_svs_updir == NULL)
+ panic("%s: failed to allocate L4 VA for CPU %d\n",
+ __func__, cpu_index(ci));
+
+ pmap_kenter_pa((vaddr_t)ci->ci_svs_updir, ci->ci_svs_updirpa,
+ VM_PROT_READ | VM_PROT_WRITE, 0);
+
+ pmap_update(pmap_kernel());
+
+ ci->ci_svs_kpdirpa = pmap_pdirpa(pmap_kernel(), 0);
+
+ mutex_init(&ci->ci_svs_mtx, MUTEX_DEFAULT, IPL_VM);
+
+ svs_page_add(ci, (vaddr_t)&pcpuarea->idt);
+ svs_page_add(ci, (vaddr_t)&pcpuarea->ldt);
+ svs_range_add(ci, (vaddr_t)&pcpuarea->ent[cid],
+ offsetof(struct pcpu_entry, rsp0));
+ svs_range_add(ci, (vaddr_t)&__text_user_start,
+ (vaddr_t)&__text_user_end - (vaddr_t)&__text_user_start);
+
+ svs_rsp0_init(ci);
+ svs_utls_init(ci);
+}
+
+void
+svs_pmap_sync(struct pmap *pmap, int index)
+{
+ CPU_INFO_ITERATOR cii;
+ struct cpu_info *ci;
+ cpuid_t cid;
+
+ KASSERT(pmap != NULL);
+ KASSERT(pmap != pmap_kernel());
+ KASSERT(mutex_owned(pmap->pm_lock));
+ KASSERT(kpreempt_disabled());
+ KASSERT(index < 255);
+
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ cid = cpu_index(ci);
+
+ if (!kcpuset_isset(pmap->pm_kernel_cpus, cid)) {
+ continue;
+ }
+
+ /* take the lock and check again */
+ mutex_enter(&ci->ci_svs_mtx);
+ if (kcpuset_isset(pmap->pm_kernel_cpus, cid)) {
+ ci->ci_svs_updir[index] = pmap->pm_pdir[index];
+ }
+ mutex_exit(&ci->ci_svs_mtx);
+ }
+}
+
+void
+svs_lwp_switch(struct lwp *oldlwp, struct lwp *newlwp)
+{
+ struct cpu_info *ci = curcpu();
+ struct svs_utls *utls;
+ struct pcb *pcb;
+ pt_entry_t *pte;
+ uintptr_t rsp0;
+ vaddr_t va;
+
+ if (newlwp->l_flag & LW_SYSTEM) {
+ return;
+ }
+
+#ifdef DIAGNOSTIC
+ if (oldlwp != NULL && !(oldlwp->l_flag & LW_SYSTEM)) {
+ pcb = lwp_getpcb(oldlwp);
+ rsp0 = pcb->pcb_rsp0;
+ va = rounddown(rsp0, PAGE_SIZE);
+ KASSERT(ci->ci_svs_krsp0 == rsp0 - sizeof(struct trapframe));
+ pte = ci->ci_svs_rsp0_pte;
+ KASSERT(*pte == L1_BASE[pl1_i(va)]);
+ }
+#endif
+
+ pcb = lwp_getpcb(newlwp);
+ rsp0 = pcb->pcb_rsp0;
+ va = rounddown(rsp0, PAGE_SIZE);
+
+ /* Update the kernel rsp0 in cpu_info */
+ ci->ci_svs_krsp0 = rsp0 - sizeof(struct trapframe);
+ KASSERT((ci->ci_svs_krsp0 % PAGE_SIZE) ==
+ (ci->ci_svs_ursp0 % PAGE_SIZE));
+
+ utls = (struct svs_utls *)ci->ci_svs_utls;
+ utls->scratch = 0;
+
+ /*
+ * Enter the user rsp0. We don't need to flush the TLB here, since
+ * the user page tables are not loaded.
+ */
+ pte = ci->ci_svs_rsp0_pte;
+ *pte = L1_BASE[pl1_i(va)];
+}
+
+static inline pt_entry_t
+svs_pte_atomic_read(struct pmap *pmap, size_t idx)
+{
+ /*
+ * XXX: We don't have a basic atomic_fetch_64 function?
+ */
+ return atomic_cas_64(&pmap->pm_pdir[idx], 666, 666);
+}
+
+/*
+ * We may come here with the pmap unlocked. So read its PTEs atomically. If
+ * a remote CPU is updating them at the same time, it's not a problem: the
+ * remote CPU will call svs_pmap_sync afterwards, and our updirpa will be
+ * synchronized properly.
+ */
+void
+svs_pdir_switch(struct pmap *pmap)
+{
+ struct cpu_info *ci = curcpu();
+ struct svs_utls *utls;
+ pt_entry_t pte;
+ size_t i;
+
+ KASSERT(kpreempt_disabled());
+ KASSERT(pmap != pmap_kernel());
+
+ ci->ci_svs_kpdirpa = pmap_pdirpa(pmap, 0);
+
+ /* Update the info in the UTLS page */
+ utls = (struct svs_utls *)ci->ci_svs_utls;
+ utls->kpdirpa = ci->ci_svs_kpdirpa;
+
+ mutex_enter(&ci->ci_svs_mtx);
+
+ /* User slots. */
+ for (i = 0; i < 255; i++) {
+ pte = svs_pte_atomic_read(pmap, i);
+ ci->ci_svs_updir[i] = pte;
+ }
+
+ mutex_exit(&ci->ci_svs_mtx);
+}