Module Name: src Committed By: thorpej Date: Sat Apr 18 03:27:13 UTC 2020
Modified Files: src/sys/uvm: uvm_extern.h uvm_map.c Log Message: Add an API to get a reference on the identity of an individual byte of virtual memory, a "virtual object address". This is not a reference to a physical byte of memory, per se, but a reference to a byte residing in a page, owned by a unique UVM object (either a uobj or an anon). Two separate address+addresses space tuples that reference the same byte in an object (such as a location in a shared memory segment) will resolve to equivalent virtual object addresses. Even if the residency status of the page changes, the virtual object address remains unchanged. struct uvm_voaddr -- a structure that encapsulates this address reference. uvm_voaddr_acquire() -- a function to acquire this address reference, given a vm_map and a vaddr_t. uvm_voaddr_release() -- a function to release this address reference. uvm_voaddr_compare() -- a function to compare two such address references. uvm_voaddr_acquire() resolves the COW status of the object address before acquiring. In collaboration with riastradh@ and chs@. To generate a diff of this commit: cvs rdiff -u -r1.222 -r1.223 src/sys/uvm/uvm_extern.h cvs rdiff -u -r1.378 -r1.379 src/sys/uvm/uvm_map.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/uvm/uvm_extern.h diff -u src/sys/uvm/uvm_extern.h:1.222 src/sys/uvm/uvm_extern.h:1.223 --- src/sys/uvm/uvm_extern.h:1.222 Sun Mar 22 18:32:42 2020 +++ src/sys/uvm/uvm_extern.h Sat Apr 18 03:27:13 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_extern.h,v 1.222 2020/03/22 18:32:42 ad Exp $ */ +/* $NetBSD: uvm_extern.h,v 1.223 2020/04/18 03:27:13 thorpej Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -606,6 +606,40 @@ extern struct vm_map *kernel_map; extern struct vm_map *phys_map; /* + * uvm_voaddr: + * + * This structure encapsulates UVM's unique virtual object address + * for an individual byte inside a pageable page. Pageable pages can + * be owned by either a uvm_object (UVM_VOADDR_TYPE_OBJECT) or a + * vm_anon (UVM_VOADDR_TYPE_ANON). + * + * In each case, the byte offset into the owning object + * (uvm_object or vm_anon) is included in the ID, so that + * two different offsets into the same page have distinct + * IDs. + * + * Note that the page does not necessarily have to be resident + * in order to know the virtual object address. However, it + * is required that any pending copy-on-write is resolved. + * + * When someone wants a virtual object address, an extra reference + * is taken on the owner while the caller uses the ID. This + * ensures that the identity is stable for the duration of its + * use. + */ +struct uvm_voaddr { + enum { + UVM_VOADDR_TYPE_OBJECT = 1, + UVM_VOADDR_TYPE_ANON = 2, + } type; + union { + struct uvm_object *uobj; + struct vm_anon *anon; + }; + voff_t offset; +}; + +/* * macros */ @@ -710,6 +744,12 @@ void uvmspace_free(struct vmspace *); void uvmspace_share(struct proc *, struct proc *); void uvmspace_unshare(struct lwp *); +bool uvm_voaddr_acquire(struct vm_map *, vaddr_t, + struct uvm_voaddr *); +void uvm_voaddr_release(struct uvm_voaddr *); +int uvm_voaddr_compare(const struct uvm_voaddr *, + const struct uvm_voaddr *); + void uvm_whatis(uintptr_t, void (*)(const char *, ...)); /* uvm_meter.c */ Index: src/sys/uvm/uvm_map.c diff -u src/sys/uvm/uvm_map.c:1.378 src/sys/uvm/uvm_map.c:1.379 --- src/sys/uvm/uvm_map.c:1.378 Fri Apr 10 17:26:46 2020 +++ src/sys/uvm/uvm_map.c Sat Apr 18 03:27:13 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_map.c,v 1.378 2020/04/10 17:26:46 ad Exp $ */ +/* $NetBSD: uvm_map.c,v 1.379 2020/04/18 03:27:13 thorpej Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -66,7 +66,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.378 2020/04/10 17:26:46 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.379 2020/04/18 03:27:13 thorpej Exp $"); #include "opt_ddb.h" #include "opt_pax.h" @@ -4781,6 +4781,270 @@ uvm_map_unlock_entry(struct vm_map_entry } } +/* + * uvm_voaddr_acquire: returns the virtual object address corresponding + * to the specified virtual address. + * + * => resolves COW so the true page identity is tracked. + * + * => acquires a reference on the page's owner (uvm_object or vm_anon) + */ +bool +uvm_voaddr_acquire(struct vm_map * const map, vaddr_t const va, + struct uvm_voaddr * const voaddr) +{ + struct vm_map_entry *entry; + struct vm_anon *anon = NULL; + bool result = false; + bool exclusive = false; + void (*unlock_fn)(struct vm_map *); + + UVMHIST_FUNC("uvm_voaddr_acquire"); UVMHIST_CALLED(maphist); + UVMHIST_LOG(maphist,"(map=%#jx,va=%jx)", (uintptr_t)map, va, 0, 0); + + const vaddr_t start = trunc_page(va); + const vaddr_t end = round_page(va+1); + + lookup_again: + if (__predict_false(exclusive)) { + vm_map_lock(map); + unlock_fn = vm_map_unlock; + } else { + vm_map_lock_read(map); + unlock_fn = vm_map_unlock_read; + } + + if (__predict_false(!uvm_map_lookup_entry(map, start, &entry))) { + unlock_fn(map); + UVMHIST_LOG(maphist,"<- done (no entry)",0,0,0,0); + return false; + } + + if (__predict_false(entry->protection == VM_PROT_NONE)) { + unlock_fn(map); + UVMHIST_LOG(maphist,"<- done (PROT_NONE)",0,0,0,0); + return false; + } + + /* + * We have a fast path for the common case of "no COW resolution + * needed" whereby we have taken a read lock on the map and if + * we don't encounter any need to create a vm_anon then great! + * But if we do, we loop around again, instead taking an exclusive + * lock so that we can perform the fault. + * + * In the event that we have to resolve the fault, we do nearly the + * same work as uvm_map_pageable() does: + * + * 1: holding the write lock, we create any anonymous maps that need + * to be created. however, we do NOT need to clip the map entries + * in this case. + * + * 2: we downgrade to a read lock, and call uvm_fault_wire to fault + * in the page (assuming the entry is not already wired). this + * is done because we need the vm_anon to be present. + */ + if (__predict_true(!VM_MAPENT_ISWIRED(entry))) { + + bool need_fault = false; + + /* + * perform the action of vm_map_lookup that need the + * write lock on the map: create an anonymous map for + * a copy-on-write region, or an anonymous map for + * a zero-fill region. + */ + if (__predict_false(UVM_ET_ISSUBMAP(entry))) { + unlock_fn(map); + UVMHIST_LOG(maphist,"<- done (submap)",0,0,0,0); + return false; + } + if (__predict_false(UVM_ET_ISNEEDSCOPY(entry) && + ((entry->max_protection & VM_PROT_WRITE) || + (entry->object.uvm_obj == NULL)))) { + if (!exclusive) { + /* need to take the slow path */ + KASSERT(unlock_fn == vm_map_unlock_read); + vm_map_unlock_read(map); + exclusive = true; + goto lookup_again; + } + need_fault = true; + amap_copy(map, entry, 0, start, end); + /* XXXCDC: wait OK? */ + } + + /* + * do a quick check to see if the fault has already + * been resolved to the upper layer. + */ + if (__predict_true(entry->aref.ar_amap != NULL && + need_fault == false)) { + amap_lock(entry->aref.ar_amap, RW_WRITER); + anon = amap_lookup(&entry->aref, start - entry->start); + if (__predict_true(anon != NULL)) { + /* amap unlocked below */ + goto found_anon; + } + amap_unlock(entry->aref.ar_amap); + need_fault = true; + } + + /* + * we predict this test as false because if we reach + * this point, then we are likely dealing with a + * shared memory region backed by a uvm_object, in + * which case a fault to create the vm_anon is not + * necessary. + */ + if (__predict_false(need_fault)) { + if (exclusive) { + vm_map_busy(map); + vm_map_unlock(map); + unlock_fn = vm_map_unbusy; + } + + if (uvm_fault_wire(map, start, end, + entry->max_protection, 1)) { + /* wiring failed */ + unlock_fn(map); + UVMHIST_LOG(maphist,"<- done (wire failed)", + 0,0,0,0); + return false; + } + + /* + * now that we have resolved the fault, we can unwire + * the page. + */ + if (exclusive) { + vm_map_lock(map); + vm_map_unbusy(map); + unlock_fn = vm_map_unlock; + } + + uvm_fault_unwire_locked(map, start, end); + } + } + + /* check the upper layer */ + if (entry->aref.ar_amap) { + amap_lock(entry->aref.ar_amap, RW_WRITER); + anon = amap_lookup(&entry->aref, start - entry->start); + if (anon) { + found_anon: KASSERT(anon->an_lock == entry->aref.ar_amap->am_lock); + anon->an_ref++; + KASSERT(anon->an_ref != 0); + voaddr->type = UVM_VOADDR_TYPE_ANON; + voaddr->anon = anon; + voaddr->offset = va & PAGE_MASK; + result = true; + } + amap_unlock(entry->aref.ar_amap); + } + + /* check the lower layer */ + if (!result && UVM_ET_ISOBJ(entry)) { + struct uvm_object *uobj = entry->object.uvm_obj; + + KASSERT(uobj != NULL); + (*uobj->pgops->pgo_reference)(uobj); + voaddr->type = UVM_VOADDR_TYPE_OBJECT; + voaddr->uobj = uobj; + voaddr->offset = entry->offset + (va - entry->start); + result = true; + } + + unlock_fn(map); + + if (result) { + UVMHIST_LOG(maphist, + "<- done OK (type=%jd,owner=#%jx,offset=%jx)", + voaddr->type, voaddr->uobj, voaddr->offset, 0); + } else { + UVMHIST_LOG(maphist,"<- done (failed)",0,0,0,0); + } + + return result; +} + +/* + * uvm_voaddr_release: release the references held by the + * vitual object address. + */ +void +uvm_voaddr_release(struct uvm_voaddr * const voaddr) +{ + + switch (voaddr->type) { + case UVM_VOADDR_TYPE_OBJECT: { + struct uvm_object * const uobj = voaddr->uobj; + + KASSERT(uobj != NULL); + KASSERT(uobj->pgops->pgo_detach != NULL); + (*uobj->pgops->pgo_detach)(uobj); + break; + } + case UVM_VOADDR_TYPE_ANON: { + struct vm_anon * const anon = voaddr->anon; + + KASSERT(anon != NULL); + rw_enter(anon->an_lock, RW_WRITER); + KASSERT(anon->an_ref > 0); + anon->an_ref--; + if (anon->an_ref == 0) { + uvm_anon_release(anon); + } else { + rw_exit(anon->an_lock); + } + break; + } + default: + panic("uvm_voaddr_release: bad type"); + } + memset(voaddr, 0, sizeof(*voaddr)); +} + +/* + * uvm_voaddr_compare: compare two uvm_voaddr objects. + * + * => memcmp() semantics + */ +int +uvm_voaddr_compare(const struct uvm_voaddr * const voaddr1, + const struct uvm_voaddr * const voaddr2) +{ + + KASSERT(voaddr1->type == UVM_VOADDR_TYPE_OBJECT || + voaddr1->type == UVM_VOADDR_TYPE_ANON); + + KASSERT(voaddr2->type == UVM_VOADDR_TYPE_OBJECT || + voaddr2->type == UVM_VOADDR_TYPE_ANON); + + if (voaddr1->type < voaddr2->type) + return -1; + if (voaddr1->type > voaddr2->type) + return 1; + + /* These fields are unioned together. */ + CTASSERT(offsetof(struct uvm_voaddr, uobj) == + offsetof(struct uvm_voaddr, anon)); + const uintptr_t addr1 = (uintptr_t)voaddr1->uobj; + const uintptr_t addr2 = (uintptr_t)voaddr2->uobj; + + if (addr1 < addr2) + return -1; + if (addr1 > addr2) + return 1; + + if (voaddr1->offset < voaddr2->offset) + return -1; + if (voaddr1->offset > voaddr2->offset) + return 1; + + return 0; +} + #if defined(DDB) || defined(DEBUGPRINT) /*