The branch main has been updated by br: URL: https://cgit.FreeBSD.org/src/commit/?id=03b330e1916f468b16f7dbd0b7bd67b567a1eb1e
commit 03b330e1916f468b16f7dbd0b7bd67b567a1eb1e Author: Ruslan Bukin <b...@freebsd.org> AuthorDate: 2024-06-05 13:08:35 +0000 Commit: Ruslan Bukin <b...@freebsd.org> CommitDate: 2024-06-05 13:36:57 +0000 riscv: add stage 2 translation to pmap. Add basic stage 2 translation support (guest-physical to host-physical). RISC-V hypervisor spec[1] introduces new translation schemes: Sv32x4, Sv39x4, Sv48x4 and Sv57x4. In each case, the size of the incoming address is widened by 2 bits (e.g. Sv39 becomes 41-bit system). To accommodate the 2 extra bits, the root page table (only) is expanded by a factor of four to be 16 KiB instead of the usual 4 KiB. The rest of page table system (including PTE format) is similar. This gives us 4x of memory space in each scheme, but it does not make sense to support all that memory for now. Allocate required amount of pages for the top directory in case of stage 2, but leave it unused. 1. https://github.com/riscv/riscv-isa-manual/blob/main/src/hypervisor.adoc Reviewed by: mhorne Sponsored by: UKRI Differential Revision: https://reviews.freebsd.org/D45481 --- sys/riscv/include/pmap.h | 8 ++++++++ sys/riscv/riscv/pmap.c | 46 +++++++++++++++++++++++++++++++++++++++------- 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/sys/riscv/include/pmap.h b/sys/riscv/include/pmap.h index e10cbacb6e1f..8123231144bb 100644 --- a/sys/riscv/include/pmap.h +++ b/sys/riscv/include/pmap.h @@ -67,6 +67,12 @@ struct md_page { vm_memattr_t pv_memattr; }; +enum pmap_stage { + PM_INVALID, + PM_STAGE1, + PM_STAGE2, +}; + struct pmap { struct mtx pm_mtx; struct pmap_statistics pm_stats; /* pmap statictics */ @@ -76,6 +82,7 @@ struct pmap { TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ struct vm_radix pm_root; + enum pmap_stage pm_stage; }; typedef struct pmap *pmap_t; @@ -134,6 +141,7 @@ vm_paddr_t pmap_kextract(vm_offset_t va); void pmap_kremove(vm_offset_t); void pmap_kremove_device(vm_offset_t, vm_size_t); void *pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, vm_memattr_t ma); +int pmap_pinit_stage(pmap_t, enum pmap_stage); bool pmap_page_is_mapped(vm_page_t m); bool pmap_ps_enabled(pmap_t); diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 0bdf3be8ea39..8176975b049c 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -632,6 +632,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) /* Set this early so we can use the pagetable walking functions */ kernel_pmap_store.pm_top = (pd_entry_t *)l1pt; + kernel_pmap_store.pm_stage = PM_STAGE1; PMAP_LOCK_INIT(kernel_pmap); TAILQ_INIT(&kernel_pmap->pm_pvchunk); vm_radix_init(&kernel_pmap->pm_root); @@ -1324,6 +1325,7 @@ pmap_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); + pmap->pm_stage = PM_STAGE1; pmap->pm_top = kernel_pmap->pm_top; pmap->pm_satp = pmap_satp_mode() | (vtophys(pmap->pm_top) >> PAGE_SHIFT); @@ -1334,23 +1336,35 @@ pmap_pinit0(pmap_t pmap) } int -pmap_pinit(pmap_t pmap) +pmap_pinit_stage(pmap_t pmap, enum pmap_stage stage) { vm_paddr_t topphys; - vm_page_t mtop; + vm_page_t m; size_t i; - mtop = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO | - VM_ALLOC_WAITOK); + /* + * Top directory is 4 pages in hypervisor case. + * Current address space layout makes 3 of them unused. + */ + if (stage == PM_STAGE1) + m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO | + VM_ALLOC_WAITOK); + else + m = vm_page_alloc_noobj_contig(VM_ALLOC_WIRED | VM_ALLOC_ZERO, + 4, 0, ~0ul, L2_SIZE, 0, VM_MEMATTR_DEFAULT); - topphys = VM_PAGE_TO_PHYS(mtop); + topphys = VM_PAGE_TO_PHYS(m); pmap->pm_top = (pd_entry_t *)PHYS_TO_DMAP(topphys); pmap->pm_satp = pmap_satp_mode() | (topphys >> PAGE_SHIFT); + pmap->pm_stage = stage; bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); CPU_ZERO(&pmap->pm_active); + if (stage == PM_STAGE2) + goto finish; + if (pmap_mode == PMAP_MODE_SV39) { /* * Copy L1 entries from the kernel pmap. This must be done with @@ -1371,12 +1385,20 @@ pmap_pinit(pmap_t pmap) pmap->pm_top[i] = kernel_pmap->pm_top[i]; } +finish: TAILQ_INIT(&pmap->pm_pvchunk); vm_radix_init(&pmap->pm_root); return (1); } +int +pmap_pinit(pmap_t pmap) +{ + + return (pmap_pinit_stage(pmap, PM_STAGE1)); +} + /* * This routine is called if the desired page table page does not exist. * @@ -1609,6 +1631,8 @@ void pmap_release(pmap_t pmap) { vm_page_t m; + int npages; + int i; KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", @@ -1616,15 +1640,23 @@ pmap_release(pmap_t pmap) KASSERT(CPU_EMPTY(&pmap->pm_active), ("releasing active pmap %p", pmap)); + if (pmap->pm_stage == PM_STAGE2) + goto finish; + if (pmap_mode == PMAP_MODE_SV39) { mtx_lock(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock(&allpmaps_lock); } +finish: + npages = pmap->pm_stage == PM_STAGE2 ? 4 : 1; m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_top)); - vm_page_unwire_noq(m); - vm_page_free(m); + for (i = 0; i < npages; i++) { + vm_page_unwire_noq(m); + vm_page_free(m); + m++; + } } static int