Stefan Kempf wrote:
> Hi,
>
> here comes a diff for vmm, and I'd like to ask people that are
> interested in our hypervisor to test this. If you are experimenting
> with vmm already, just do what you always do with vmm when running
> with this diff :-)
>
> [...]
>
> This diff will not go in at once. The first thing that should be
> committed is an addition to uvm. I'll post that one separately in this
> thread and ask for reviews.
Here are just the uvm parts:
This diff has just the uvm parts with a new main function uvm_share()
and a helper function uvm_mapent_share(). Nothing uses it yet, but
vmm(4) will call it later. So this diff should have no impact on the
rest of uvm or the kernel.
What uvm_share() does is that it takes two virtual address ranges [A,B]
and [C,D], and makes sure that [A,B] and [C,D] both get mapped to
the same physical pages.
uvm already has the possibility to establish such shared mappings
(uvm_mapent_forkshared). I pulled out the common functionality
that uvm_share() needs as well into uvm_mapent_share() and made
uvm_mapent_clone a little more generic.
The only thing that uvm_share() does is that the source address
range [A,B] exists in the source address space, and that it is
backed by memory (whether it's anon memory or whether is comes
from a file does not matter). And the destination address range
[C, D] must still be available in the destination address space.
Comments, oks?
Background:
vmm(4) creates a separate (virtual) address space
for the guest VM. These guest physical addresses are then mapped to
"real" physical RAM on the host. But the memory for the guest is
currently allocated within the kernel and not directly visible to vmd.
With this diff, we can later have vmd(8) allocate a large chunk of
memory via mmap(), and have this memory correspond to a guest physical
range in the guest VM.
The protection bits for the two address ranges can be different. That
way, the allocated memory in vmd(8) will be non-executable. In the
guest itself however, the memory is executable.
Index: uvm/uvm_extern.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.138
diff -u -p -r1.138 uvm_extern.h
--- uvm/uvm_extern.h 4 Apr 2016 16:34:16 -0000 1.138
+++ uvm/uvm_extern.h 6 Apr 2016 17:57:06 -0000
@@ -428,6 +428,8 @@ void uvmspace_exec(struct proc *,
vadd
struct vmspace *uvmspace_fork(struct process *);
void uvmspace_free(struct vmspace *);
struct vmspace *uvmspace_share(struct process *);
+int uvm_share(vm_map_t, vaddr_t, vm_prot_t,
+ vm_map_t, vaddr_t, vsize_t);
void uvm_meter(void);
int uvm_sysctl(int *, u_int, void *, size_t *,
void *, size_t, struct proc *);
Index: uvm/uvm_map.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_map.c,v
retrieving revision 1.211
diff -u -p -r1.211 uvm_map.c
--- uvm/uvm_map.c 4 Apr 2016 16:34:16 -0000 1.211
+++ uvm/uvm_map.c 6 Apr 2016 17:57:06 -0000
@@ -182,8 +182,12 @@ int uvm_mapent_bias(struct
vm_map*, s
* uvm_vmspace_fork helper functions.
*/
struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
- vsize_t, struct vm_map_entry*,
- struct uvm_map_deadq*, int, int);
+ vsize_t, vm_prot_t, vm_prot_t,
+ struct vm_map_entry*, struct uvm_map_deadq*, int,
+ int);
+struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t,
+ vsize_t, vm_prot_t, vm_prot_t, struct vm_map*,
+ struct vm_map_entry*, struct uvm_map_deadq*);
struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
struct vm_map*, struct vm_map_entry*,
struct uvm_map_deadq*);
@@ -3364,6 +3368,98 @@ uvmspace_free(struct vmspace *vm)
}
/*
+ * uvm_share: Map the address range [srcaddr, srcaddr + sz) in
+ * srcmap to the address range [dstaddr, dstaddr + sz) in
+ * dstmap.
+ *
+ * The whole address range in srcmap must be backed by an object
+ * (no holes).
+ *
+ * If successful, the address ranges share memory and the destination
+ * address range uses the protection flags in prot.
+ *
+ * This routine assumes that sz is a multiple of PAGE_SIZE and
+ * that dstaddr and srcaddr are page-aligned.
+ */
+int
+uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot,
+ struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz)
+{
+ int ret = 0;
+ vaddr_t unmap_end;
+ vaddr_t dstva;
+ vsize_t off, len, n = sz;
+ struct vm_map_entry *first = NULL, *last = NULL;
+ struct vm_map_entry *src_entry, *psrc_entry = NULL;
+ struct uvm_map_deadq dead;
+
+ if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr)
+ return EINVAL;
+
+ TAILQ_INIT(&dead);
+ vm_map_lock(dstmap);
+ vm_map_lock_read(srcmap);
+
+ if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) {
+ ret = ENOMEM;
+ goto exit_unlock;
+ }
+ if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) {
+ ret = EINVAL;
+ goto exit_unlock;
+ }
+
+ unmap_end = dstaddr;
+ for (; src_entry != NULL;
+ psrc_entry = src_entry,
+ src_entry = RB_NEXT(uvm_map_addr, &srcmap->addr, src_entry)) {
+ /* hole in address space, bail out */
+ if (psrc_entry != NULL && psrc_entry->end != src_entry->start)
+ break;
+ if (src_entry->start >= srcaddr + sz)
+ break;
+
+ if (UVM_ET_ISSUBMAP(src_entry))
+ panic("uvm_share: encountered a submap (illegal)");
+ if (!UVM_ET_ISCOPYONWRITE(src_entry) &&
+ UVM_ET_ISNEEDSCOPY(src_entry))
+ panic("uvm_share: non-copy_on_write map entries "
+ "marked needs_copy (illegal)");
+
+ dstva = dstaddr;
+ if (src_entry->start > srcaddr) {
+ dstva += src_entry->start - srcaddr;
+ off = 0;
+ } else
+ off = srcaddr - src_entry->start;
+
+ if (n < src_entry->end - src_entry->start)
+ len = n;
+ else
+ len = src_entry->end - src_entry->start;
+ n -= len;
+
+ if (uvm_mapent_share(dstmap, dstva, len, off, prot, prot,
+ srcmap, src_entry, &dead) == NULL)
+ break;
+
+ unmap_end = dstva + len;
+ if (n == 0)
+ goto exit_unlock;
+ }
+
+ ret = EINVAL;
+ uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE);
+
+exit_unlock:
+ vm_map_unlock_read(srcmap);
+ vm_map_unlock(dstmap);
+ uvm_unmap_detach(&dead, 0);
+
+ return ret;
+}
+
+/*
* Clone map entry into other map.
*
* Mapping will be placed at dstaddr, for the same length.
@@ -3372,7 +3468,8 @@ uvmspace_free(struct vmspace *vm)
*/
struct vm_map_entry *
uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
- vsize_t off, struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
+ vsize_t off, vm_prot_t prot, vm_prot_t maxprot,
+ struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
int mapent_flags, int amap_share_flags)
{
struct vm_map_entry *new_entry, *first, *last;
@@ -3394,8 +3491,8 @@ uvm_mapent_clone(struct vm_map *dstmap,
new_entry->offset = old_entry->offset;
new_entry->aref = old_entry->aref;
new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
- new_entry->protection = old_entry->protection;
- new_entry->max_protection = old_entry->max_protection;
+ new_entry->protection = prot;
+ new_entry->max_protection = maxprot;
new_entry->inheritance = old_entry->inheritance;
new_entry->advice = old_entry->advice;
@@ -3417,34 +3514,48 @@ uvm_mapent_clone(struct vm_map *dstmap,
return new_entry;
}
-/*
- * share the mapping: this means we want the old and
- * new entries to share amaps and backing objects.
- */
struct vm_map_entry *
-uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
- struct vm_map *old_map,
+uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
+ vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map,
struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
{
- struct vm_map_entry *new_entry;
-
/*
- * if the old_entry needs a new amap (due to prev fork)
- * then we need to allocate it now so that we have
- * something we own to share with the new_entry. [in
- * other words, we need to clear needs_copy]
+ * If old_entry refers to a copy-on-write region that has not yet been
+ * written to (needs_copy flag is set), then we need to allocate a new
+ * amap for old_entry.
+ *
+ * If we do not do this, and the process owning old_entry does a copy-on
+ * write later, old_entry and new_entry will refer to different memory
+ * regions, and the memory between the processes is no longer shared.
+ *
+ * [in other words, we need to clear needs_copy]
*/
if (UVM_ET_ISNEEDSCOPY(old_entry)) {
/* get our own amap, clears needs_copy */
amap_copy(old_map, old_entry, M_WAITOK, FALSE,
- 0, 0);
+ 0, 0);
/* XXXCDC: WAITOK??? */
}
- new_entry = uvm_mapent_clone(new_map, old_entry->start,
- old_entry->end - old_entry->start, 0, old_entry,
- dead, 0, AMAP_SHARED);
+ return uvm_mapent_clone(dstmap, dstaddr, dstlen, off,
+ prot, maxprot, old_entry, dead, 0, AMAP_SHARED);
+}
+
+/*
+ * share the mapping: this means we want the old and
+ * new entries to share amaps and backing objects.
+ */
+struct vm_map_entry *
+uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
+ struct vm_map *old_map,
+ struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
+{
+ struct vm_map_entry *new_entry;
+
+ new_entry = uvm_mapent_share(new_map, old_entry->start,
+ old_entry->end - old_entry->start, 0, old_entry->protection,
+ old_entry->max_protection, old_map, old_entry, dead);
/*
* pmap_copy the mappings: this routine is optional
@@ -3474,8 +3585,8 @@ uvm_mapent_forkcopy(struct vmspace *new_
boolean_t protect_child;
new_entry = uvm_mapent_clone(new_map, old_entry->start,
- old_entry->end - old_entry->start, 0, old_entry,
- dead, 0, 0);
+ old_entry->end - old_entry->start, 0, old_entry->protection,
+ old_entry->max_protection, old_entry, dead, 0, 0);
new_entry->etype |=
(UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
@@ -3615,8 +3726,8 @@ uvm_mapent_forkzero(struct vmspace *new_
struct vm_map_entry *new_entry;
new_entry = uvm_mapent_clone(new_map, old_entry->start,
- old_entry->end - old_entry->start, 0, old_entry,
- dead, 0, 0);
+ old_entry->end - old_entry->start, 0, old_entry->protection,
+ old_entry->max_protection, old_entry, dead, 0, 0);
new_entry->etype |=
(UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
@@ -4116,6 +4227,7 @@ uvm_map_extract(struct vm_map *srcmap, v
newentry = uvm_mapent_clone(kernel_map,
cp_start - start + dstaddr, cp_len, cp_off,
+ entry->protection, entry->max_protection,
entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
if (newentry == NULL) {
error = ENOMEM;