Module Name:    src
Committed By:   maxv
Date:           Sun Apr 28 14:22:14 UTC 2019

Modified Files:
        src/lib/libnvmm: Makefile libnvmm.c libnvmm_x86.c nvmm.h
        src/sys/dev/nvmm: nvmm.c nvmm.h nvmm_internal.h nvmm_ioctl.h
        src/sys/dev/nvmm/x86: nvmm_x86.h nvmm_x86_svm.c nvmm_x86_vmx.c

Log Message:
Modify the communication layer between the kernel NVMM driver and libnvmm:
introduce a bidirectionnal "comm page", a page of memory shared between
the kernel and userland, and used to transfer data in and out in a more
performant manner than ioctls.

The comm page contains the VCPU state, plus three flags:

 - "wanted": the states the kernel must get/set when requested via ioctls
 - "cached": the states that are in the comm page
 - "commit": the states the kernel must set in vcpu_run

The idea is to avoid performing expensive syscalls, by using the VCPU
state cached, either explicitly or speculatively, in the comm page. For
example, if the state is cached we do a direct 1->5 with no syscall:

          +---------------------------------------------+
          |                    Qemu                     |
          +---------------------------------------------+
               |                                   ^
               | (0) nvmm_vcpu_getstate            | (6) Done
               |                                   |
               V                                   |
             +---------------------------------------+
             |                libnvmm                |
             +---------------------------------------+
                  |   ^          |               ^
        (1) State |   | (2) No   | (3) Ioctl:    | (5) Ok, state
        cached?   |   |          | "please cache | fetched
                  |   |          |  the state"   |
                  V   |          |               |
              +-----------+      |               |
              | Comm Page |------+---------------+
              +-----------+      |
                       ^         |
          (4) "Alright |         V
               babe"   |     +--------+
                       +-----| Kernel |
                             +--------+

The main changes in behavior are:

 - nvmm_vcpu_getstate(): won't emit a syscall if the state is already
   cached in the comm page, will just fetch from the comm page directly
 - nvmm_vcpu_setstate(): won't emit a syscall at all, will just cache
   the wanted state in the comm page
 - nvmm_vcpu_run(): will commit the to-be-set state in the comm page,
   as previously requested by nvmm_vcpu_setstate()

In addition to this, the kernel NVMM driver is changed to speculatively
cache certain states known to be of interest, so that the future
nvmm_vcpu_getstate() calls libnvmm or the emulator will perform will use
the comm page rather than expensive syscalls. For example, if an I/O
VMEXIT occurs, the I/O Assist in libnvmm will want GPRS+SEGS+CRS+MSRS,
and now the kernel caches all of that in the comm page before returning
to userland.

Overall, in a normal run of Windows 10, this saves several millions of
syscalls. Eg on a 4CPU Intel with 4VCPUs, booting the Win10 install ISO
goes from taking 1min35 to taking 1min16.

The libnvmm API is not changed, but the ABI is. If we changed the API it
would be possible to save expensive memcpys on libnvmm's side. This will
be avoided in a future version. The comm page can also be extended to
implement future services.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/lib/libnvmm/Makefile
cvs rdiff -u -r1.9 -r1.10 src/lib/libnvmm/libnvmm.c src/lib/libnvmm/nvmm.h
cvs rdiff -u -r1.28 -r1.29 src/lib/libnvmm/libnvmm_x86.c
cvs rdiff -u -r1.18 -r1.19 src/sys/dev/nvmm/nvmm.c
cvs rdiff -u -r1.7 -r1.8 src/sys/dev/nvmm/nvmm.h
cvs rdiff -u -r1.9 -r1.10 src/sys/dev/nvmm/nvmm_internal.h
cvs rdiff -u -r1.5 -r1.6 src/sys/dev/nvmm/nvmm_ioctl.h
cvs rdiff -u -r1.12 -r1.13 src/sys/dev/nvmm/x86/nvmm_x86.h
cvs rdiff -u -r1.42 -r1.43 src/sys/dev/nvmm/x86/nvmm_x86_svm.c
cvs rdiff -u -r1.30 -r1.31 src/sys/dev/nvmm/x86/nvmm_x86_vmx.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/lib/libnvmm/Makefile
diff -u src/lib/libnvmm/Makefile:1.5 src/lib/libnvmm/Makefile:1.6
--- src/lib/libnvmm/Makefile:1.5	Tue Nov 13 09:24:37 2018
+++ src/lib/libnvmm/Makefile	Sun Apr 28 14:22:13 2019
@@ -1,11 +1,11 @@
-# $NetBSD: Makefile,v 1.5 2018/11/13 09:24:37 martin Exp $
+# $NetBSD: Makefile,v 1.6 2019/04/28 14:22:13 maxv Exp $
 
 .include <bsd.own.mk>
 
 LIB=		nvmm
 MAN=		libnvmm.3
 
-SRCS=		libnvmm.c libnvmm_x86.c
+SRCS=		libnvmm.c
 
 INCS=		nvmm.h
 INCSDIR=	/usr/include

Index: src/lib/libnvmm/libnvmm.c
diff -u src/lib/libnvmm/libnvmm.c:1.9 src/lib/libnvmm/libnvmm.c:1.10
--- src/lib/libnvmm/libnvmm.c:1.9	Wed Apr 10 18:49:04 2019
+++ src/lib/libnvmm/libnvmm.c	Sun Apr 28 14:22:13 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: libnvmm.c,v 1.9 2019/04/10 18:49:04 maxv Exp $	*/
+/*	$NetBSD: libnvmm.c,v 1.10 2019/04/28 14:22:13 maxv Exp $	*/
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -40,10 +40,16 @@
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/queue.h>
+#include <machine/vmparam.h>
 
 #include "nvmm.h"
 
-struct nvmm_callbacks __callbacks;
+static struct nvmm_callbacks __callbacks;
+static struct nvmm_capability __capability;
+
+#ifdef __x86_64__
+#include "libnvmm_x86.c"
+#endif
 
 typedef struct __area {
 	LIST_ENTRY(__area) list;
@@ -159,6 +165,11 @@ nvmm_init(void)
 	nvmm_fd = open("/dev/nvmm", O_RDWR);
 	if (nvmm_fd == -1)
 		return -1;
+	if (nvmm_capability(&__capability) == -1) {
+		close(nvmm_fd);
+		nvmm_fd = -1;
+		return -1;
+	}
 	return 0;
 }
 
@@ -185,6 +196,7 @@ int
 nvmm_machine_create(struct nvmm_machine *mach)
 {
 	struct nvmm_ioc_machine_create args;
+	struct nvmm_comm_page **pages;
 	area_list_t *areas;
 	int ret;
 
@@ -196,16 +208,25 @@ nvmm_machine_create(struct nvmm_machine 
 	if (areas == NULL)
 		return -1;
 
+	pages = calloc(__capability.max_vcpus, sizeof(*pages));
+	if (pages == NULL) {
+		free(areas);
+		return -1;
+	}
+
 	ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_CREATE, &args);
 	if (ret == -1) {
 		free(areas);
 		return -1;
 	}
 
-	memset(mach, 0, sizeof(*mach));
 	LIST_INIT(areas);
-	mach->areas = areas;
+
+	memset(mach, 0, sizeof(*mach));
 	mach->machid = args.machid;
+	mach->pages = pages;
+	mach->npages = __capability.max_vcpus;
+	mach->areas = areas;
 
 	return 0;
 }
@@ -227,6 +248,7 @@ nvmm_machine_destroy(struct nvmm_machine
 		return -1;
 
 	__area_remove_all(mach);
+	free(mach->pages);
 
 	return 0;
 }
@@ -256,6 +278,7 @@ int
 nvmm_vcpu_create(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
 {
 	struct nvmm_ioc_vcpu_create args;
+	struct nvmm_comm_page *comm;
 	int ret;
 
 	if (nvmm_init() == -1) {
@@ -269,6 +292,13 @@ nvmm_vcpu_create(struct nvmm_machine *ma
 	if (ret == -1)
 		return -1;
 
+	comm = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE,
+	    nvmm_fd, NVMM_COMM_OFF(mach->machid, cpuid));
+	if (comm == MAP_FAILED)
+		return -1;
+
+	mach->pages[cpuid] = comm;
+
 	return 0;
 }
 
@@ -276,6 +306,7 @@ int
 nvmm_vcpu_destroy(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
 {
 	struct nvmm_ioc_vcpu_destroy args;
+	struct nvmm_comm_page *comm;
 	int ret;
 
 	if (nvmm_init() == -1) {
@@ -289,6 +320,9 @@ nvmm_vcpu_destroy(struct nvmm_machine *m
 	if (ret == -1)
 		return -1;
 
+	comm = mach->pages[cpuid];
+	munmap(comm, PAGE_SIZE);
+
 	return 0;
 }
 
@@ -296,21 +330,20 @@ int
 nvmm_vcpu_setstate(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
     void *state, uint64_t flags)
 {
-	struct nvmm_ioc_vcpu_setstate args;
-	int ret;
+	struct nvmm_comm_page *comm;
 
 	if (nvmm_init() == -1) {
 		return -1;
 	}
 
-	args.machid = mach->machid;
-	args.cpuid = cpuid;
-	args.state = state;
-	args.flags = flags;
-
-	ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_SETSTATE, &args);
-	if (ret == -1)
+	if (__predict_false(cpuid >= mach->npages)) {
 		return -1;
+	}
+	comm = mach->pages[cpuid];
+
+	nvmm_arch_copystate(&comm->state, state, flags);
+	comm->state_commit |= flags;
+	comm->state_cached |= flags;
 
 	return 0;
 }
@@ -320,21 +353,32 @@ nvmm_vcpu_getstate(struct nvmm_machine *
     void *state, uint64_t flags)
 {
 	struct nvmm_ioc_vcpu_getstate args;
+	struct nvmm_comm_page *comm;
 	int ret;
 
 	if (nvmm_init() == -1) {
 		return -1;
 	}
 
+	if (__predict_false(cpuid >= mach->npages)) {
+		return -1;
+	}
+	comm = mach->pages[cpuid];
+
+	if (__predict_true((flags & ~comm->state_cached) == 0)) {
+		goto out;
+	}
+	comm->state_wanted = flags & ~comm->state_cached;
+
 	args.machid = mach->machid;
 	args.cpuid = cpuid;
-	args.state = state;
-	args.flags = flags;
 
 	ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_GETSTATE, &args);
 	if (ret == -1)
 		return -1;
 
+out:
+	nvmm_arch_copystate(state, &comm->state, flags);
 	return 0;
 }
 
Index: src/lib/libnvmm/nvmm.h
diff -u src/lib/libnvmm/nvmm.h:1.9 src/lib/libnvmm/nvmm.h:1.10
--- src/lib/libnvmm/nvmm.h:1.9	Sat Apr 27 15:45:21 2019
+++ src/lib/libnvmm/nvmm.h	Sun Apr 28 14:22:13 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: nvmm.h,v 1.9 2019/04/27 15:45:21 maxv Exp $	*/
+/*	$NetBSD: nvmm.h,v 1.10 2019/04/28 14:22:13 maxv Exp $	*/
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -40,6 +40,8 @@
 
 struct nvmm_machine {
 	nvmm_machid_t machid;
+	struct nvmm_comm_page **pages;
+	size_t npages;
 	void *areas; /* opaque */
 };
 

Index: src/lib/libnvmm/libnvmm_x86.c
diff -u src/lib/libnvmm/libnvmm_x86.c:1.28 src/lib/libnvmm/libnvmm_x86.c:1.29
--- src/lib/libnvmm/libnvmm_x86.c:1.28	Thu Apr  4 17:33:47 2019
+++ src/lib/libnvmm/libnvmm_x86.c	Sun Apr 28 14:22:13 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: libnvmm_x86.c,v 1.28 2019/04/04 17:33:47 maxv Exp $	*/
+/*	$NetBSD: libnvmm_x86.c,v 1.29 2019/04/28 14:22:13 maxv Exp $	*/
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -43,14 +43,41 @@
 #include <machine/pte.h>
 #include <machine/psl.h>
 
-#include "nvmm.h"
-
 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
 #define __cacheline_aligned __attribute__((__aligned__(64)))
 
 #include <x86/specialreg.h>
 
-extern struct nvmm_callbacks __callbacks;
+/* -------------------------------------------------------------------------- */
+
+static void
+nvmm_arch_copystate(void *_dst, void *_src, uint64_t flags)
+{
+	struct nvmm_x64_state *src = _src;
+	struct nvmm_x64_state *dst = _dst;
+
+	if (flags & NVMM_X64_STATE_GPRS) {
+		memcpy(dst->gprs, src->gprs, sizeof(dst->gprs));
+	}
+	if (flags & NVMM_X64_STATE_SEGS) {
+		memcpy(dst->segs, src->segs, sizeof(dst->segs));
+	}
+	if (flags & NVMM_X64_STATE_CRS) {
+		memcpy(dst->crs, src->crs, sizeof(dst->crs));
+	}
+	if (flags & NVMM_X64_STATE_DRS) {
+		memcpy(dst->drs, src->drs, sizeof(dst->drs));
+	}
+	if (flags & NVMM_X64_STATE_MSRS) {
+		memcpy(dst->msrs, src->msrs, sizeof(dst->msrs));
+	}
+	if (flags & NVMM_X64_STATE_INTR) {
+		memcpy(&dst->intr, &src->intr, sizeof(dst->intr));
+	}
+	if (flags & NVMM_X64_STATE_FPU) {
+		memcpy(&dst->fpu, &src->fpu, sizeof(dst->fpu));
+	}
+}
 
 /* -------------------------------------------------------------------------- */
 

Index: src/sys/dev/nvmm/nvmm.c
diff -u src/sys/dev/nvmm/nvmm.c:1.18 src/sys/dev/nvmm/nvmm.c:1.19
--- src/sys/dev/nvmm/nvmm.c:1.18	Sat Apr 27 17:30:38 2019
+++ src/sys/dev/nvmm/nvmm.c	Sun Apr 28 14:22:13 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: nvmm.c,v 1.18 2019/04/27 17:30:38 maxv Exp $	*/
+/*	$NetBSD: nvmm.c,v 1.19 2019/04/28 14:22:13 maxv Exp $	*/
 
 /*
  * Copyright (c) 2018-2019 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.18 2019/04/27 17:30:38 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.19 2019/04/28 14:22:13 maxv Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -155,7 +155,7 @@ nvmm_vcpu_alloc(struct nvmm_machine *mac
 	}
 
 	vcpu->present = true;
-	vcpu->state = kmem_zalloc(nvmm_impl->state_size, KM_SLEEP);
+	vcpu->comm = NULL;
 	vcpu->hcpu_last = -1;
 	*ret = vcpu;
 	return 0;
@@ -166,7 +166,9 @@ nvmm_vcpu_free(struct nvmm_machine *mach
 {
 	KASSERT(mutex_owned(&vcpu->lock));
 	vcpu->present = false;
-	kmem_free(vcpu->state, nvmm_impl->state_size);
+	if (vcpu->comm != NULL) {
+		uvm_deallocate(kernel_map, (vaddr_t)vcpu->comm, PAGE_SIZE);
+	}
 }
 
 int
@@ -278,6 +280,9 @@ nvmm_machine_create(struct nvmm_owner *o
 	mach->gpa_end = NVMM_MAX_RAM;
 	mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false);
 
+	/* Create the comm uobj. */
+	mach->commuobj = uao_create(NVMM_MAX_VCPUS * PAGE_SIZE, 0);
+
 	(*nvmm_impl->machine_create)(mach);
 
 	args->machid = mach->machid;
@@ -377,6 +382,26 @@ nvmm_vcpu_create(struct nvmm_owner *owne
 	if (error)
 		goto out;
 
+	/* Allocate the comm page. */
+	uao_reference(mach->commuobj);
+	error = uvm_map(kernel_map, (vaddr_t *)&vcpu->comm, PAGE_SIZE,
+	    mach->commuobj, args->cpuid * PAGE_SIZE, 0, UVM_MAPFLAG(UVM_PROT_RW,
+	    UVM_PROT_RW, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
+	if (error) {
+		uao_detach(mach->commuobj);
+		nvmm_vcpu_free(mach, vcpu);
+		nvmm_vcpu_put(vcpu);
+		goto out;
+	}
+	error = uvm_map_pageable(kernel_map, (vaddr_t)vcpu->comm,
+	    (vaddr_t)vcpu->comm + PAGE_SIZE, false, 0);
+	if (error) {
+		nvmm_vcpu_free(mach, vcpu);
+		nvmm_vcpu_put(vcpu);
+		goto out;
+	}
+	memset(vcpu->comm, 0, PAGE_SIZE);
+
 	error = (*nvmm_impl->vcpu_create)(mach, vcpu);
 	if (error) {
 		nvmm_vcpu_free(mach, vcpu);
@@ -431,13 +456,7 @@ nvmm_vcpu_setstate(struct nvmm_owner *ow
 	if (error)
 		goto out;
 
-	error = copyin(args->state, vcpu->state, nvmm_impl->state_size);
-	if (error) {
-		nvmm_vcpu_put(vcpu);
-		goto out;
-	}
-
-	(*nvmm_impl->vcpu_setstate)(vcpu, vcpu->state, args->flags);
+	(*nvmm_impl->vcpu_setstate)(vcpu);
 	nvmm_vcpu_put(vcpu);
 
 out:
@@ -461,9 +480,8 @@ nvmm_vcpu_getstate(struct nvmm_owner *ow
 	if (error)
 		goto out;
 
-	(*nvmm_impl->vcpu_getstate)(vcpu, vcpu->state, args->flags);
+	(*nvmm_impl->vcpu_getstate)(vcpu);
 	nvmm_vcpu_put(vcpu);
-	error = copyout(vcpu->state, args->state, nvmm_impl->state_size);
 
 out:
 	nvmm_machine_put(mach);
@@ -945,6 +963,8 @@ const struct cdevsw nvmm_cdevsw = {
 
 static int nvmm_ioctl(file_t *, u_long, void *);
 static int nvmm_close(file_t *);
+static int nvmm_mmap(file_t *, off_t *, size_t, int, int *, int *,
+    struct uvm_object **, int *);
 
 const struct fileops nvmm_fileops = {
 	.fo_read = fbadop_read,
@@ -956,7 +976,7 @@ const struct fileops nvmm_fileops = {
 	.fo_close = nvmm_close,
 	.fo_kqfilter = fnullop_kqfilter,
 	.fo_restart = fnullop_restart,
-	.fo_mmap = NULL,
+	.fo_mmap = nvmm_mmap,
 };
 
 static int
@@ -992,6 +1012,40 @@ nvmm_close(file_t *fp)
 }
 
 static int
+nvmm_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp,
+    int *advicep, struct uvm_object **uobjp, int *maxprotp)
+{
+	struct nvmm_owner *owner = fp->f_data;
+	struct nvmm_machine *mach;
+	nvmm_machid_t machid;
+	nvmm_cpuid_t cpuid;
+	int error;
+
+	if (prot & PROT_EXEC)
+		return EACCES;
+	if (size != PAGE_SIZE)
+		return EINVAL;
+
+	cpuid = NVMM_COMM_CPUID(*offp);
+	if (__predict_false(cpuid >= NVMM_MAX_VCPUS))
+		return EINVAL;
+
+	machid = NVMM_COMM_MACHID(*offp);
+	error = nvmm_machine_get(owner, machid, &mach, false);
+	if (error)
+		return error;
+
+	uao_reference(mach->commuobj);
+	*uobjp = mach->commuobj;
+	*offp = cpuid * PAGE_SIZE;
+	*maxprotp = prot;
+	*advicep = UVM_ADV_RANDOM;
+
+	nvmm_machine_put(mach);
+	return 0;
+}
+
+static int
 nvmm_ioctl(file_t *fp, u_long cmd, void *data)
 {
 	struct nvmm_owner *owner = fp->f_data;

Index: src/sys/dev/nvmm/nvmm.h
diff -u src/sys/dev/nvmm/nvmm.h:1.7 src/sys/dev/nvmm/nvmm.h:1.8
--- src/sys/dev/nvmm/nvmm.h:1.7	Sat Apr 27 15:45:21 2019
+++ src/sys/dev/nvmm/nvmm.h	Sun Apr 28 14:22:13 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: nvmm.h,v 1.7 2019/04/27 15:45:21 maxv Exp $	*/
+/*	$NetBSD: nvmm.h,v 1.8 2019/04/28 14:22:13 maxv Exp $	*/
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -94,4 +94,17 @@ struct nvmm_capability {
 	struct nvmm_cap_md arch;
 };
 
+/*
+ * Bits 20:27 -> machid
+ * Bits 12:19 -> cpuid
+ */
+#define NVMM_COMM_OFF(machid, cpuid)	\
+	((((uint64_t)machid & 0xFFULL) << 20) | (((uint64_t)cpuid & 0xFFULL) << 12))
+
+#define NVMM_COMM_MACHID(off)		\
+	((off >> 20) & 0xFF)
+
+#define NVMM_COMM_CPUID(off)		\
+	((off >> 12) & 0xFF)
+
 #endif

Index: src/sys/dev/nvmm/nvmm_internal.h
diff -u src/sys/dev/nvmm/nvmm_internal.h:1.9 src/sys/dev/nvmm/nvmm_internal.h:1.10
--- src/sys/dev/nvmm/nvmm_internal.h:1.9	Wed Apr 10 18:49:04 2019
+++ src/sys/dev/nvmm/nvmm_internal.h	Sun Apr 28 14:22:13 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: nvmm_internal.h,v 1.9 2019/04/10 18:49:04 maxv Exp $	*/
+/*	$NetBSD: nvmm_internal.h,v 1.10 2019/04/28 14:22:13 maxv Exp $	*/
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -47,8 +47,8 @@ struct nvmm_cpu {
 	nvmm_cpuid_t cpuid;
 	kmutex_t lock;
 
-	/* State buffer. */
-	void *state;
+	/* Comm page. */
+	struct nvmm_comm_page *comm;
 
 	/* Last host CPU on which the VCPU ran. */
 	int hcpu_last;
@@ -71,6 +71,9 @@ struct nvmm_machine {
 	struct nvmm_owner *owner;
 	krwlock_t lock;
 
+	/* Comm */
+	struct uvm_object *commuobj;
+
 	/* Kernel */
 	struct vmspace *vm;
 	gpaddr_t gpa_begin;
@@ -102,8 +105,8 @@ struct nvmm_impl {
 
 	int (*vcpu_create)(struct nvmm_machine *, struct nvmm_cpu *);
 	void (*vcpu_destroy)(struct nvmm_machine *, struct nvmm_cpu *);
-	void (*vcpu_setstate)(struct nvmm_cpu *, const void *, uint64_t);
-	void (*vcpu_getstate)(struct nvmm_cpu *, void *, uint64_t);
+	void (*vcpu_setstate)(struct nvmm_cpu *);
+	void (*vcpu_getstate)(struct nvmm_cpu *);
 	int (*vcpu_inject)(struct nvmm_machine *, struct nvmm_cpu *,
 	    struct nvmm_event *);
 	int (*vcpu_run)(struct nvmm_machine *, struct nvmm_cpu *,

Index: src/sys/dev/nvmm/nvmm_ioctl.h
diff -u src/sys/dev/nvmm/nvmm_ioctl.h:1.5 src/sys/dev/nvmm/nvmm_ioctl.h:1.6
--- src/sys/dev/nvmm/nvmm_ioctl.h:1.5	Wed Apr 10 18:49:04 2019
+++ src/sys/dev/nvmm/nvmm_ioctl.h	Sun Apr 28 14:22:13 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: nvmm_ioctl.h,v 1.5 2019/04/10 18:49:04 maxv Exp $	*/
+/*	$NetBSD: nvmm_ioctl.h,v 1.6 2019/04/28 14:22:13 maxv Exp $	*/
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -65,15 +65,11 @@ struct nvmm_ioc_vcpu_destroy {
 struct nvmm_ioc_vcpu_setstate {
 	nvmm_machid_t machid;
 	nvmm_cpuid_t cpuid;
-	uint64_t flags;
-	void *state;
 };
 
 struct nvmm_ioc_vcpu_getstate {
 	nvmm_machid_t machid;
 	nvmm_cpuid_t cpuid;
-	uint64_t flags;
-	void *state;
 };
 
 struct nvmm_ioc_vcpu_inject {

Index: src/sys/dev/nvmm/x86/nvmm_x86.h
diff -u src/sys/dev/nvmm/x86/nvmm_x86.h:1.12 src/sys/dev/nvmm/x86/nvmm_x86.h:1.13
--- src/sys/dev/nvmm/x86/nvmm_x86.h:1.12	Sat Apr 27 15:45:21 2019
+++ src/sys/dev/nvmm/x86/nvmm_x86.h	Sun Apr 28 14:22:13 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: nvmm_x86.h,v 1.12 2019/04/27 15:45:21 maxv Exp $	*/
+/*	$NetBSD: nvmm_x86.h,v 1.13 2019/04/28 14:22:13 maxv Exp $	*/
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -248,6 +248,13 @@ struct nvmm_x86_conf_cpuid {
 	} del;
 };
 
+struct nvmm_comm_page {
+	uint64_t state_wanted;
+	uint64_t state_cached;
+	uint64_t state_commit;
+	struct nvmm_x64_state state;
+};
+
 #ifdef _KERNEL
 struct nvmm_x86_cpuid_mask {
 	uint32_t eax;

Index: src/sys/dev/nvmm/x86/nvmm_x86_svm.c
diff -u src/sys/dev/nvmm/x86/nvmm_x86_svm.c:1.42 src/sys/dev/nvmm/x86/nvmm_x86_svm.c:1.43
--- src/sys/dev/nvmm/x86/nvmm_x86_svm.c:1.42	Sat Apr 27 15:45:21 2019
+++ src/sys/dev/nvmm/x86/nvmm_x86_svm.c	Sun Apr 28 14:22:13 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: nvmm_x86_svm.c,v 1.42 2019/04/27 15:45:21 maxv Exp $	*/
+/*	$NetBSD: nvmm_x86_svm.c,v 1.43 2019/04/28 14:22:13 maxv Exp $	*/
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_svm.c,v 1.42 2019/04/27 15:45:21 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_svm.c,v 1.43 2019/04/28 14:22:13 maxv Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -452,6 +452,9 @@ CTASSERT(offsetof(struct vmcb, state) ==
 
 /* -------------------------------------------------------------------------- */
 
+static void svm_vcpu_state_provide(struct nvmm_cpu *, uint64_t);
+static void svm_vcpu_state_commit(struct nvmm_cpu *);
+
 struct svm_hsave {
 	paddr_t pa;
 };
@@ -954,6 +957,10 @@ svm_exit_io(struct nvmm_machine *mach, s
 	exit->u.io.rep = (info & SVM_EXIT_IO_REP) != 0;
 	exit->u.io.str = (info & SVM_EXIT_IO_STR) != 0;
 	exit->u.io.npc = nextpc;
+
+	svm_vcpu_state_provide(vcpu,
+	    NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
+	    NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
 }
 
 static const uint64_t msr_ignore_list[] = {
@@ -1057,6 +1064,8 @@ svm_exit_msr(struct nvmm_machine *mach, 
 
 	exit->reason = NVMM_EXIT_MSR;
 	exit->u.msr.npc = cpudata->vmcb->ctrl.nrip;
+
+	svm_vcpu_state_provide(vcpu, NVMM_X64_STATE_GPRS);
 }
 
 static void
@@ -1077,6 +1086,10 @@ svm_exit_npf(struct nvmm_machine *mach, 
 	exit->u.mem.inst_len = cpudata->vmcb->ctrl.inst_len;
 	memcpy(exit->u.mem.inst_bytes, cpudata->vmcb->ctrl.inst_bytes,
 	    sizeof(exit->u.mem.inst_bytes));
+
+	svm_vcpu_state_provide(vcpu,
+	    NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
+	    NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
 }
 
 static void
@@ -1274,12 +1287,16 @@ static int
 svm_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
     struct nvmm_exit *exit)
 {
+	struct nvmm_comm_page *comm = vcpu->comm;
 	struct svm_machdata *machdata = mach->machdata;
 	struct svm_cpudata *cpudata = vcpu->cpudata;
 	struct vmcb *vmcb = cpudata->vmcb;
 	uint64_t machgen;
 	int hcpu, s;
 
+	svm_vcpu_state_commit(vcpu);
+	comm->state_cached = 0;
+
 	kpreempt_disable();
 	hcpu = cpu_number();
 
@@ -1583,12 +1600,16 @@ svm_state_tlb_flush(const struct vmcb *v
 }
 
 static void
-svm_vcpu_setstate(struct nvmm_cpu *vcpu, const void *data, uint64_t flags)
+svm_vcpu_setstate(struct nvmm_cpu *vcpu)
 {
-	const struct nvmm_x64_state *state = data;
+	struct nvmm_comm_page *comm = vcpu->comm;
+	const struct nvmm_x64_state *state = &comm->state;
 	struct svm_cpudata *cpudata = vcpu->cpudata;
 	struct vmcb *vmcb = cpudata->vmcb;
 	struct fxsave *fpustate;
+	uint64_t flags;
+
+	flags = comm->state_wanted;
 
 	if (svm_state_tlb_flush(vmcb, state, flags)) {
 		cpudata->gtlb_want_flush = true;
@@ -1714,14 +1735,21 @@ svm_vcpu_setstate(struct nvmm_cpu *vcpu,
 	}
 
 	svm_vmcb_cache_update(vmcb, flags);
+
+	comm->state_wanted = 0;
+	comm->state_cached |= flags;
 }
 
 static void
-svm_vcpu_getstate(struct nvmm_cpu *vcpu, void *data, uint64_t flags)
+svm_vcpu_getstate(struct nvmm_cpu *vcpu)
 {
-	struct nvmm_x64_state *state = (struct nvmm_x64_state *)data;
+	struct nvmm_comm_page *comm = vcpu->comm;
+	struct nvmm_x64_state *state = &comm->state;
 	struct svm_cpudata *cpudata = vcpu->cpudata;
 	struct vmcb *vmcb = cpudata->vmcb;
+	uint64_t flags;
+
+	flags = comm->state_wanted;
 
 	if (flags & NVMM_X64_STATE_SEGS) {
 		svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_CS],
@@ -1810,6 +1838,24 @@ svm_vcpu_getstate(struct nvmm_cpu *vcpu,
 		memcpy(&state->fpu, cpudata->gfpu.xsh_fxsave,
 		    sizeof(state->fpu));
 	}
+
+	comm->state_wanted = 0;
+	comm->state_cached |= flags;
+}
+
+static void
+svm_vcpu_state_provide(struct nvmm_cpu *vcpu, uint64_t flags)
+{
+	vcpu->comm->state_wanted = flags;
+	svm_vcpu_getstate(vcpu);
+}
+
+static void
+svm_vcpu_state_commit(struct nvmm_cpu *vcpu)
+{
+	vcpu->comm->state_wanted = vcpu->comm->state_commit;
+	vcpu->comm->state_commit = 0;
+	svm_vcpu_setstate(vcpu);
 }
 
 /* -------------------------------------------------------------------------- */
@@ -1982,7 +2028,11 @@ svm_vcpu_init(struct nvmm_machine *mach,
 	cpudata->sfmask = rdmsr(MSR_SFMASK);
 
 	/* Install the RESET state. */
-	svm_vcpu_setstate(vcpu, &nvmm_x86_reset_state, NVMM_X64_STATE_ALL);
+	memcpy(&vcpu->comm->state, &nvmm_x86_reset_state,
+	    sizeof(nvmm_x86_reset_state));
+	vcpu->comm->state_wanted = NVMM_X64_STATE_ALL;
+	vcpu->comm->state_cached = 0;
+	svm_vcpu_setstate(vcpu);
 }
 
 static int

Index: src/sys/dev/nvmm/x86/nvmm_x86_vmx.c
diff -u src/sys/dev/nvmm/x86/nvmm_x86_vmx.c:1.30 src/sys/dev/nvmm/x86/nvmm_x86_vmx.c:1.31
--- src/sys/dev/nvmm/x86/nvmm_x86_vmx.c:1.30	Sat Apr 27 15:45:21 2019
+++ src/sys/dev/nvmm/x86/nvmm_x86_vmx.c	Sun Apr 28 14:22:13 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: nvmm_x86_vmx.c,v 1.30 2019/04/27 15:45:21 maxv Exp $	*/
+/*	$NetBSD: nvmm_x86_vmx.c,v 1.31 2019/04/28 14:22:13 maxv Exp $	*/
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_vmx.c,v 1.30 2019/04/27 15:45:21 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_vmx.c,v 1.31 2019/04/28 14:22:13 maxv Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -554,6 +554,9 @@ vmx_vmclear(paddr_t *pa)
 
 /* -------------------------------------------------------------------------- */
 
+static void vmx_vcpu_state_provide(struct nvmm_cpu *, uint64_t);
+static void vmx_vcpu_state_commit(struct nvmm_cpu *);
+
 #define VMX_MSRLIST_STAR		0
 #define VMX_MSRLIST_LSTAR		1
 #define VMX_MSRLIST_CSTAR		2
@@ -1517,6 +1520,10 @@ vmx_exit_io(struct nvmm_machine *mach, s
 	inslen = vmx_vmread(VMCS_EXIT_INSTRUCTION_LENGTH);
 	rip = vmx_vmread(VMCS_GUEST_RIP);
 	exit->u.io.npc = rip + inslen;
+
+	vmx_vcpu_state_provide(vcpu,
+	    NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
+	    NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
 }
 
 static const uint64_t msr_ignore_list[] = {
@@ -1625,6 +1632,8 @@ vmx_exit_msr(struct nvmm_machine *mach, 
 	inslen = vmx_vmread(VMCS_EXIT_INSTRUCTION_LENGTH);
 	rip = vmx_vmread(VMCS_GUEST_RIP);
 	exit->u.msr.npc = rip + inslen;
+
+	vmx_vcpu_state_provide(vcpu, NVMM_X64_STATE_GPRS);
 }
 
 static void
@@ -1679,6 +1688,10 @@ vmx_exit_epf(struct nvmm_machine *mach, 
 		exit->u.mem.prot = PROT_READ;
 	exit->u.mem.gpa = gpa;
 	exit->u.mem.inst_len = 0;
+
+	vmx_vcpu_state_provide(vcpu,
+	    NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
+	    NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
 }
 
 static void
@@ -1867,6 +1880,7 @@ static int
 vmx_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
     struct nvmm_exit *exit)
 {
+	struct nvmm_comm_page *comm = vcpu->comm;
 	struct vmx_machdata *machdata = mach->machdata;
 	struct vmx_cpudata *cpudata = vcpu->cpudata;
 	struct vpid_desc vpid_desc;
@@ -1878,6 +1892,10 @@ vmx_vcpu_run(struct nvmm_machine *mach, 
 	bool launched;
 
 	vmx_vmcs_enter(vcpu);
+
+	vmx_vcpu_state_commit(vcpu);
+	comm->state_cached = 0;
+
 	ci = curcpu();
 	hcpu = cpu_number();
 	launched = cpudata->vmcs_launched;
@@ -2220,12 +2238,16 @@ vmx_state_tlb_flush(const struct nvmm_x6
 }
 
 static void
-vmx_vcpu_setstate(struct nvmm_cpu *vcpu, const void *data, uint64_t flags)
+vmx_vcpu_setstate(struct nvmm_cpu *vcpu)
 {
-	const struct nvmm_x64_state *state = data;
+	struct nvmm_comm_page *comm = vcpu->comm;
+	const struct nvmm_x64_state *state = &comm->state;
 	struct vmx_cpudata *cpudata = vcpu->cpudata;
 	struct fxsave *fpustate;
 	uint64_t ctls1, intstate;
+	uint64_t flags;
+
+	flags = comm->state_wanted;
 
 	vmx_vmcs_enter(vcpu);
 
@@ -2356,14 +2378,20 @@ vmx_vcpu_setstate(struct nvmm_cpu *vcpu,
 	}
 
 	vmx_vmcs_leave(vcpu);
+
+	comm->state_wanted = 0;
+	comm->state_cached |= flags;
 }
 
 static void
-vmx_vcpu_getstate(struct nvmm_cpu *vcpu, void *data, uint64_t flags)
+vmx_vcpu_getstate(struct nvmm_cpu *vcpu)
 {
-	struct nvmm_x64_state *state = (struct nvmm_x64_state *)data;
+	struct nvmm_comm_page *comm = vcpu->comm;
+	struct nvmm_x64_state *state = &comm->state;
 	struct vmx_cpudata *cpudata = vcpu->cpudata;
-	uint64_t intstate;
+	uint64_t intstate, flags;
+
+	flags = comm->state_wanted;
 
 	vmx_vmcs_enter(vcpu);
 
@@ -2448,6 +2476,24 @@ vmx_vcpu_getstate(struct nvmm_cpu *vcpu,
 	}
 
 	vmx_vmcs_leave(vcpu);
+
+	comm->state_wanted = 0;
+	comm->state_cached |= flags;
+}
+
+static void
+vmx_vcpu_state_provide(struct nvmm_cpu *vcpu, uint64_t flags)
+{
+	vcpu->comm->state_wanted = flags;
+	vmx_vcpu_getstate(vcpu);
+}
+
+static void
+vmx_vcpu_state_commit(struct nvmm_cpu *vcpu)
+{
+	vcpu->comm->state_wanted = vcpu->comm->state_commit;
+	vcpu->comm->state_commit = 0;
+	vmx_vcpu_setstate(vcpu);
 }
 
 /* -------------------------------------------------------------------------- */
@@ -2614,7 +2660,11 @@ vmx_vcpu_init(struct nvmm_machine *mach,
 	cpudata->sfmask = rdmsr(MSR_SFMASK);
 
 	/* Install the RESET state. */
-	vmx_vcpu_setstate(vcpu, &nvmm_x86_reset_state, NVMM_X64_STATE_ALL);
+	memcpy(&vcpu->comm->state, &nvmm_x86_reset_state,
+	    sizeof(nvmm_x86_reset_state));
+	vcpu->comm->state_wanted = NVMM_X64_STATE_ALL;
+	vcpu->comm->state_cached = 0;
+	vmx_vcpu_setstate(vcpu);
 
 	vmx_vmcs_leave(vcpu);
 }

Reply via email to