Module Name:    src
Committed By:   thorpej
Date:           Sat Apr 18 03:27:13 UTC 2020

Modified Files:
        src/sys/uvm: uvm_extern.h uvm_map.c

Log Message:
Add an API to get a reference on the identity of an individual byte of
virtual memory, a "virtual object address".  This is not a reference to
a physical byte of memory, per se, but a reference to a byte residing
in a page, owned by a unique UVM object (either a uobj or an anon).  Two
separate address+addresses space tuples that reference the same byte in
an object (such as a location in a shared memory segment) will resolve
to equivalent virtual object addresses.  Even if the residency status
of the page changes, the virtual object address remains unchanged.

struct uvm_voaddr -- a structure that encapsulates this address reference.

uvm_voaddr_acquire() -- a function to acquire this address reference,
given a vm_map and a vaddr_t.

uvm_voaddr_release() -- a function to release this address reference.

uvm_voaddr_compare() -- a function to compare two such address references.

uvm_voaddr_acquire() resolves the COW status of the object address before
acquiring.

In collaboration with riastradh@ and chs@.


To generate a diff of this commit:
cvs rdiff -u -r1.222 -r1.223 src/sys/uvm/uvm_extern.h
cvs rdiff -u -r1.378 -r1.379 src/sys/uvm/uvm_map.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/uvm/uvm_extern.h
diff -u src/sys/uvm/uvm_extern.h:1.222 src/sys/uvm/uvm_extern.h:1.223
--- src/sys/uvm/uvm_extern.h:1.222	Sun Mar 22 18:32:42 2020
+++ src/sys/uvm/uvm_extern.h	Sat Apr 18 03:27:13 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_extern.h,v 1.222 2020/03/22 18:32:42 ad Exp $	*/
+/*	$NetBSD: uvm_extern.h,v 1.223 2020/04/18 03:27:13 thorpej Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -606,6 +606,40 @@ extern struct vm_map *kernel_map;
 extern struct vm_map *phys_map;
 
 /*
+ *	uvm_voaddr:
+ *
+ *	This structure encapsulates UVM's unique virtual object address
+ *	for an individual byte inside a pageable page. Pageable pages can
+ *	be owned by either a uvm_object (UVM_VOADDR_TYPE_OBJECT) or a
+ *	vm_anon (UVM_VOADDR_TYPE_ANON).
+ *
+ *	In each case, the byte offset into the owning object
+ *	(uvm_object or vm_anon) is included in the ID, so that
+ *	two different offsets into the same page have distinct
+ *	IDs.
+ *
+ *	Note that the page does not necessarily have to be resident
+ *	in order to know the virtual object address.  However, it
+ *	is required that any pending copy-on-write is resolved.
+ *
+ *	When someone wants a virtual object address, an extra reference
+ *	is taken on the owner while the caller uses the ID.  This
+ *	ensures that the identity is stable for the duration of its
+ *	use.
+ */
+struct uvm_voaddr {
+	enum {
+		UVM_VOADDR_TYPE_OBJECT = 1,
+		UVM_VOADDR_TYPE_ANON = 2,
+	} type;
+	union {
+		struct uvm_object *uobj;
+		struct vm_anon *anon;
+	};
+	voff_t offset;
+};
+
+/*
  * macros
  */
 
@@ -710,6 +744,12 @@ void			uvmspace_free(struct vmspace *);
 void			uvmspace_share(struct proc *, struct proc *);
 void			uvmspace_unshare(struct lwp *);
 
+bool			uvm_voaddr_acquire(struct vm_map *, vaddr_t,
+			    struct uvm_voaddr *);
+void			uvm_voaddr_release(struct uvm_voaddr *);
+int			uvm_voaddr_compare(const struct uvm_voaddr *,
+			    const struct uvm_voaddr *);
+
 void			uvm_whatis(uintptr_t, void (*)(const char *, ...));
 
 /* uvm_meter.c */

Index: src/sys/uvm/uvm_map.c
diff -u src/sys/uvm/uvm_map.c:1.378 src/sys/uvm/uvm_map.c:1.379
--- src/sys/uvm/uvm_map.c:1.378	Fri Apr 10 17:26:46 2020
+++ src/sys/uvm/uvm_map.c	Sat Apr 18 03:27:13 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_map.c,v 1.378 2020/04/10 17:26:46 ad Exp $	*/
+/*	$NetBSD: uvm_map.c,v 1.379 2020/04/18 03:27:13 thorpej Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.378 2020/04/10 17:26:46 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.379 2020/04/18 03:27:13 thorpej Exp $");
 
 #include "opt_ddb.h"
 #include "opt_pax.h"
@@ -4781,6 +4781,270 @@ uvm_map_unlock_entry(struct vm_map_entry
 	}
 }
 
+/*
+ * uvm_voaddr_acquire: returns the virtual object address corresponding
+ * to the specified virtual address.
+ *
+ * => resolves COW so the true page identity is tracked.
+ *
+ * => acquires a reference on the page's owner (uvm_object or vm_anon)
+ */
+bool
+uvm_voaddr_acquire(struct vm_map * const map, vaddr_t const va,
+    struct uvm_voaddr * const voaddr)
+{
+	struct vm_map_entry *entry;
+	struct vm_anon *anon = NULL;
+	bool result = false;
+	bool exclusive = false;
+	void (*unlock_fn)(struct vm_map *);
+
+	UVMHIST_FUNC("uvm_voaddr_acquire"); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist,"(map=%#jx,va=%jx)", (uintptr_t)map, va, 0, 0);
+
+	const vaddr_t start = trunc_page(va);
+	const vaddr_t end = round_page(va+1);
+
+ lookup_again:
+	if (__predict_false(exclusive)) {
+		vm_map_lock(map);
+		unlock_fn = vm_map_unlock;
+	} else {
+		vm_map_lock_read(map);
+		unlock_fn = vm_map_unlock_read;
+	}
+
+	if (__predict_false(!uvm_map_lookup_entry(map, start, &entry))) {
+		unlock_fn(map);
+		UVMHIST_LOG(maphist,"<- done (no entry)",0,0,0,0);
+		return false;
+	}
+
+	if (__predict_false(entry->protection == VM_PROT_NONE)) {
+		unlock_fn(map);
+		UVMHIST_LOG(maphist,"<- done (PROT_NONE)",0,0,0,0);
+		return false;
+	}
+
+	/*
+	 * We have a fast path for the common case of "no COW resolution
+	 * needed" whereby we have taken a read lock on the map and if
+	 * we don't encounter any need to create a vm_anon then great!
+	 * But if we do, we loop around again, instead taking an exclusive
+	 * lock so that we can perform the fault.
+	 *
+	 * In the event that we have to resolve the fault, we do nearly the
+	 * same work as uvm_map_pageable() does:
+	 *
+	 * 1: holding the write lock, we create any anonymous maps that need
+	 *    to be created.  however, we do NOT need to clip the map entries
+	 *    in this case.
+	 *
+	 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
+	 *    in the page (assuming the entry is not already wired).  this
+	 *    is done because we need the vm_anon to be present.
+	 */
+	if (__predict_true(!VM_MAPENT_ISWIRED(entry))) {
+
+		bool need_fault = false;
+
+		/*
+		 * perform the action of vm_map_lookup that need the
+		 * write lock on the map: create an anonymous map for
+		 * a copy-on-write region, or an anonymous map for
+		 * a zero-fill region.
+		 */
+		if (__predict_false(UVM_ET_ISSUBMAP(entry))) {
+			unlock_fn(map);
+			UVMHIST_LOG(maphist,"<- done (submap)",0,0,0,0);
+			return false;
+		}
+		if (__predict_false(UVM_ET_ISNEEDSCOPY(entry) &&
+		    ((entry->max_protection & VM_PROT_WRITE) ||
+		     (entry->object.uvm_obj == NULL)))) {
+			if (!exclusive) {
+				/* need to take the slow path */
+				KASSERT(unlock_fn == vm_map_unlock_read);
+				vm_map_unlock_read(map);
+				exclusive = true;
+				goto lookup_again;
+			}
+			need_fault = true;
+			amap_copy(map, entry, 0, start, end);
+			/* XXXCDC: wait OK? */
+		}
+
+		/*
+		 * do a quick check to see if the fault has already
+		 * been resolved to the upper layer.
+		 */
+		if (__predict_true(entry->aref.ar_amap != NULL &&
+				   need_fault == false)) {
+			amap_lock(entry->aref.ar_amap, RW_WRITER);
+			anon = amap_lookup(&entry->aref, start - entry->start);
+			if (__predict_true(anon != NULL)) {
+				/* amap unlocked below */
+				goto found_anon;
+			}
+			amap_unlock(entry->aref.ar_amap);
+			need_fault = true;
+		}
+
+		/*
+		 * we predict this test as false because if we reach
+		 * this point, then we are likely dealing with a
+		 * shared memory region backed by a uvm_object, in
+		 * which case a fault to create the vm_anon is not
+		 * necessary.
+		 */
+		if (__predict_false(need_fault)) {
+			if (exclusive) {
+				vm_map_busy(map);
+				vm_map_unlock(map);
+				unlock_fn = vm_map_unbusy;
+			}
+
+			if (uvm_fault_wire(map, start, end,
+					   entry->max_protection, 1)) {
+				/* wiring failed */
+				unlock_fn(map);
+				UVMHIST_LOG(maphist,"<- done (wire failed)",
+					    0,0,0,0);
+				return false;
+			}
+
+			/*
+			 * now that we have resolved the fault, we can unwire
+			 * the page.
+			 */
+			if (exclusive) {
+				vm_map_lock(map);
+				vm_map_unbusy(map);
+				unlock_fn = vm_map_unlock;
+			}
+
+			uvm_fault_unwire_locked(map, start, end);
+		}
+	}
+
+	/* check the upper layer */
+	if (entry->aref.ar_amap) {
+		amap_lock(entry->aref.ar_amap, RW_WRITER);
+		anon = amap_lookup(&entry->aref, start - entry->start);
+		if (anon) {
+ found_anon:		KASSERT(anon->an_lock == entry->aref.ar_amap->am_lock);
+			anon->an_ref++;
+			KASSERT(anon->an_ref != 0);
+			voaddr->type = UVM_VOADDR_TYPE_ANON;
+			voaddr->anon = anon;
+			voaddr->offset = va & PAGE_MASK;
+			result = true;
+		}
+		amap_unlock(entry->aref.ar_amap);
+	}
+
+	/* check the lower layer */
+	if (!result && UVM_ET_ISOBJ(entry)) {
+		struct uvm_object *uobj = entry->object.uvm_obj;
+
+		KASSERT(uobj != NULL);
+		(*uobj->pgops->pgo_reference)(uobj);
+		voaddr->type = UVM_VOADDR_TYPE_OBJECT;
+		voaddr->uobj = uobj;
+		voaddr->offset = entry->offset + (va - entry->start);
+		result = true;
+	}
+
+	unlock_fn(map);
+
+	if (result) {
+		UVMHIST_LOG(maphist,
+		    "<- done OK (type=%jd,owner=#%jx,offset=%jx)",
+		    voaddr->type, voaddr->uobj, voaddr->offset, 0);
+	} else {
+		UVMHIST_LOG(maphist,"<- done (failed)",0,0,0,0);
+	}
+
+	return result;
+}
+
+/*
+ * uvm_voaddr_release: release the references held by the
+ * vitual object address.
+ */
+void
+uvm_voaddr_release(struct uvm_voaddr * const voaddr)
+{
+
+	switch (voaddr->type) {
+	case UVM_VOADDR_TYPE_OBJECT: {
+		struct uvm_object * const uobj = voaddr->uobj;
+
+		KASSERT(uobj != NULL);
+		KASSERT(uobj->pgops->pgo_detach != NULL);
+		(*uobj->pgops->pgo_detach)(uobj);
+		break;
+	    }
+	case UVM_VOADDR_TYPE_ANON: {
+		struct vm_anon * const anon = voaddr->anon;
+
+		KASSERT(anon != NULL);
+		rw_enter(anon->an_lock, RW_WRITER);
+	    	KASSERT(anon->an_ref > 0);
+		anon->an_ref--;
+		if (anon->an_ref == 0) {
+			uvm_anon_release(anon);
+		} else {
+			rw_exit(anon->an_lock);
+		}
+	    	break;
+	    }
+	default:
+		panic("uvm_voaddr_release: bad type");
+	}
+	memset(voaddr, 0, sizeof(*voaddr));
+}
+
+/*
+ * uvm_voaddr_compare: compare two uvm_voaddr objects.
+ *
+ * => memcmp() semantics
+ */
+int
+uvm_voaddr_compare(const struct uvm_voaddr * const voaddr1,
+    const struct uvm_voaddr * const voaddr2)
+{
+
+	KASSERT(voaddr1->type == UVM_VOADDR_TYPE_OBJECT ||
+		voaddr1->type == UVM_VOADDR_TYPE_ANON);
+
+	KASSERT(voaddr2->type == UVM_VOADDR_TYPE_OBJECT ||
+		voaddr2->type == UVM_VOADDR_TYPE_ANON);
+	
+	if (voaddr1->type < voaddr2->type)
+		return -1;
+	if (voaddr1->type > voaddr2->type)
+		return 1;
+	
+	/* These fields are unioned together. */
+	CTASSERT(offsetof(struct uvm_voaddr, uobj) ==
+		 offsetof(struct uvm_voaddr, anon));
+	const uintptr_t addr1 = (uintptr_t)voaddr1->uobj;
+	const uintptr_t addr2 = (uintptr_t)voaddr2->uobj;
+
+	if (addr1 < addr2)
+		return -1;
+	if (addr1 > addr2)
+		return 1;
+	
+	if (voaddr1->offset < voaddr2->offset)
+		return -1;
+	if (voaddr1->offset > voaddr2->offset)
+		return 1;
+	
+	return 0;
+}
+
 #if defined(DDB) || defined(DEBUGPRINT)
 
 /*

Reply via email to