The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=f892c97b6109639b8b03dfbccd4190389a88b067

commit f892c97b6109639b8b03dfbccd4190389a88b067
Author:     Bojan Novković <[email protected]>
AuthorDate: 2025-11-06 14:28:47 +0000
Commit:     Bojan Novković <[email protected]>
CommitDate: 2025-12-17 14:08:32 +0000

    bhyve: Introduce monitor mode
    
    This change introduces "monitor mode", a mechanism for automatically
    releasing virtual machine resources when bhyve dies, bringing us
    closer towards making non-root bhyve viable.
    Under this regime bhyve will create a transient virtual machine using
    `vmmctl`'s `VMMCTL_CREATE_DESTROY_ON_CLOSE` flag and automatically
    reboot said virtual machine as long as it exits with "reboot" status.
    
    This is done by splitting bhyve into two processes. The parent process
    creates the virtual machine while the child process initializes and
    runs the virtual machine. When the child exits the parent inspects
    its exit status and either exits or forks again. `vmmctl` automatically
    destroys the underlying virtual machine once the parent process dies.
    
    Differential Revision:  https://reviews.freebsd.org/D53731
    Reviewed by:    markj
    Sponsored by:   The FreeBSD Foundation
    Sponsored by:   Klara, Inc.
    MFC after:      3 months
---
 usr.sbin/bhyve/aarch64/bhyverun_machdep.c |  6 ++-
 usr.sbin/bhyve/amd64/bhyverun_machdep.c   |  8 ++-
 usr.sbin/bhyve/bhyve.8                    | 12 ++++-
 usr.sbin/bhyve/bhyverun.c                 | 82 +++++++++++++++++++++++--------
 usr.sbin/bhyve/riscv/bhyverun_machdep.c   |  6 ++-
 5 files changed, 87 insertions(+), 27 deletions(-)

diff --git a/usr.sbin/bhyve/aarch64/bhyverun_machdep.c 
b/usr.sbin/bhyve/aarch64/bhyverun_machdep.c
index 1d35f958ffb7..e099df0559a1 100644
--- a/usr.sbin/bhyve/aarch64/bhyverun_machdep.c
+++ b/usr.sbin/bhyve/aarch64/bhyverun_machdep.c
@@ -107,6 +107,7 @@ bhyve_usage(int code)
            "       -G: start a debug server\n"
            "       -h: help\n"
            "       -k: key=value flat config file\n"
+           "       -M: monitor mode\n"
            "       -m: memory size\n"
            "       -o: set config 'var' to 'value'\n"
            "       -p: pin 'vcpu' to 'hostcpu'\n"
@@ -125,7 +126,7 @@ bhyve_optparse(int argc, char **argv)
        const char *optstr;
        int c;
 
-       optstr = "hCDSWk:f:o:p:G:c:s:m:U:";
+       optstr = "hCDMSWk:f:o:p:G:c:s:m:U:";
        while ((c = getopt(argc, argv, optstr)) != -1) {
                switch (c) {
                case 'c':
@@ -149,6 +150,9 @@ bhyve_optparse(int argc, char **argv)
                case 'm':
                        set_config_value("memory.size", optarg);
                        break;
+               case 'M':
+                       set_config_bool("monitor", true);
+                       break;
                case 'o':
                        if (!bhyve_parse_config_option(optarg)) {
                                errx(EX_USAGE,
diff --git a/usr.sbin/bhyve/amd64/bhyverun_machdep.c 
b/usr.sbin/bhyve/amd64/bhyverun_machdep.c
index 4fd3c29bd86a..538d031fd1a4 100644
--- a/usr.sbin/bhyve/amd64/bhyverun_machdep.c
+++ b/usr.sbin/bhyve/amd64/bhyverun_machdep.c
@@ -90,6 +90,7 @@ bhyve_usage(int code)
            "       -k: key=value flat config file\n"
            "       -K: PS2 keyboard layout\n"
            "       -l: LPC device configuration\n"
+           "       -M: monitor mode\n"
            "       -m: memory size\n"
            "       -n: NUMA domain specification\n"
            "       -o: set config 'var' to 'value'\n"
@@ -118,9 +119,9 @@ bhyve_optparse(int argc, char **argv)
        int c;
 
 #ifdef BHYVE_SNAPSHOT
-       optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:n:l:K:U:r:";
+       optstr = "aehuwxACDHIMPSWYk:f:o:p:G:c:s:m:n:l:K:U:r:";
 #else
-       optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:n:l:K:U:";
+       optstr = "aehuwxACDHIMPSWYk:f:o:p:G:c:s:m:n:l:K:U:";
 #endif
        while ((c = getopt(argc, argv, optstr)) != -1) {
                switch (c) {
@@ -195,6 +196,9 @@ bhyve_optparse(int argc, char **argv)
                case 'm':
                        set_config_value("memory.size", optarg);
                        break;
+               case 'M':
+                       set_config_bool("monitor", true);
+                       break;
                case 'n':
                        if (bhyve_numa_parse(optarg) != 0)
                                errx(EX_USAGE,
diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8
index 0893b0b719bc..7be1f7447adc 100644
--- a/usr.sbin/bhyve/bhyve.8
+++ b/usr.sbin/bhyve/bhyve.8
@@ -25,7 +25,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd December 4, 2025
+.Dd December 17, 2025
 .Dt BHYVE 8
 .Os
 .Sh NAME
@@ -33,7 +33,7 @@
 .Nd "run a guest operating system inside a virtual machine"
 .Sh SYNOPSIS
 .Nm
-.Op Fl aCDeHhPSuWwxY
+.Op Fl aCDeHhMPSuWwxY
 .Oo
 .Sm off
 .Fl c\~
@@ -273,6 +273,14 @@ or
 to indicate a multiple of kilobytes, megabytes, gigabytes, or terabytes.
 If no suffix is given, the value is assumed to be in megabytes.
 The default is 256M.
+.It Fl M
+Run the VM in
+.Ql monitor
+mode.
+In this mode, a guest reboot does not cause the bhyve process to exit.
+Instead, bhyve will restart the VM.
+Once the bhyve process exits or is killed, the VM will be destroyed 
automatically.
+The underlying virtual machine is automatically destroyed when the bhyve 
process exits.
 .Pp
 .It Fl n Ar id Ns Cm \&, Ns Ar size Ns Cm \&, Ns Ar cpus Ns Op Cm \&, Ns Ar 
domain_policy
 Configure guest NUMA domains.
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 6190e0039c9d..9db62972467c 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -41,6 +41,7 @@
 #ifdef BHYVE_SNAPSHOT
 #include <sys/un.h>
 #endif
+#include <sys/wait.h>
 
 #include <machine/atomic.h>
 
@@ -687,9 +688,10 @@ static struct vmctx *
 do_open(const char *vmname)
 {
        struct vmctx *ctx;
-       int error;
-       bool romboot;
+       int error, flags;
+       bool romboot, monitor;
 
+       monitor = get_config_bool_default("monitor", false);
        romboot = bootrom_boot();
 
        /*
@@ -702,7 +704,10 @@ do_open(const char *vmname)
                        err(4, "vm_openf");
                if (!romboot)
                        errx(4, "no bootrom was configured");
-               ctx = vm_openf(vmname, VMMAPI_OPEN_CREATE);
+               flags = VMMAPI_OPEN_CREATE;
+               if (monitor)
+                       flags |= VMMAPI_OPEN_CREATE_DESTROY_ON_CLOSE;
+               ctx = vm_openf(vmname, flags);
                if (ctx == NULL)
                        err(4, "vm_openf");
        }
@@ -792,7 +797,7 @@ bhyve_parse_gdb_options(const char *opt)
 int
 main(int argc, char *argv[])
 {
-       int error;
+       int error, status;
        int max_vcpus, memflags;
        struct vcpu *bsp;
        struct vmctx *ctx;
@@ -859,6 +864,58 @@ main(int argc, char *argv[])
        }
 #endif
 
+       calc_mem_affinity(memsize);
+       memflags = 0;
+       if (get_config_bool_default("memory.wired", false))
+               memflags |= VM_MEM_F_WIRED;
+       if (get_config_bool_default("memory.guest_in_core", false))
+               memflags |= VM_MEM_F_INCORE;
+       vm_set_memflags(ctx, memflags);
+       error = vm_setup_memory_domains(ctx, VM_MMAP_ALL, guest_domains,
+           guest_ndomains);
+       if (error) {
+               fprintf(stderr, "Unable to setup memory (%d)\n", errno);
+               exit(BHYVE_EXIT_ERROR);
+       }
+
+       set_vcpu_affinities();
+       init_mem(guest_ncpus);
+       init_bootrom(ctx);
+
+       if (get_config_bool_default("monitor", false)) {
+               while (1) {
+                       pid_t child = fork();
+                       if (child == -1) {
+                               EPRINTLN("Monitor mode fork failed: %s",
+                                   strerror(errno));
+                               exit(BHYVE_EXIT_ERROR);
+                       }
+                       if (child == 0)
+                               break;
+                       while ((error = waitpid(child, &status, 0)) == -1 && 
errno == EINTR)
+                           ;
+                       if (error == -1) {
+                               EPRINTLN("Monitor mode wait failed: %s",
+                                   strerror(errno));
+                               exit(BHYVE_EXIT_ERROR);
+                       }
+                       if (WIFSIGNALED(status)) {
+                               EPRINTLN("Child process was killed by signal 
%d",
+                                   WTERMSIG(status));
+                               exit(BHYVE_EXIT_ERROR);
+                       } else {
+                               status = WEXITSTATUS(status);
+                               if (status != BHYVE_EXIT_RESET)
+                                       exit(status);
+                       }
+                       if (vm_reinit(ctx) != 0) {
+                               EPRINTLN("Monitor mode reinit failed: %s",
+                                   strerror(errno));
+                               exit(BHYVE_EXIT_ERROR);
+                       };
+               }
+       }
+
        bsp = vm_vcpu_open(ctx, BSP);
        max_vcpus = num_vcpus_allowed(ctx, bsp);
        if (guest_ncpus > max_vcpus) {
@@ -880,23 +937,6 @@ main(int argc, char *argv[])
                        vcpu_info[vcpuid].vcpu = vm_vcpu_open(ctx, vcpuid);
        }
 
-       calc_mem_affinity(memsize);
-       memflags = 0;
-       if (get_config_bool_default("memory.wired", false))
-               memflags |= VM_MEM_F_WIRED;
-       if (get_config_bool_default("memory.guest_in_core", false))
-               memflags |= VM_MEM_F_INCORE;
-       vm_set_memflags(ctx, memflags);
-       error = vm_setup_memory_domains(ctx, VM_MMAP_ALL, guest_domains,
-           guest_ndomains);
-       if (error) {
-               fprintf(stderr, "Unable to setup memory (%d)\n", errno);
-               exit(4);
-       }
-
-       set_vcpu_affinities();
-       init_mem(guest_ncpus);
-       init_bootrom(ctx);
        if (bhyve_init_platform(ctx, bsp) != 0)
                exit(BHYVE_EXIT_ERROR);
 
diff --git a/usr.sbin/bhyve/riscv/bhyverun_machdep.c 
b/usr.sbin/bhyve/riscv/bhyverun_machdep.c
index f4c248ee662a..a90a508901bc 100644
--- a/usr.sbin/bhyve/riscv/bhyverun_machdep.c
+++ b/usr.sbin/bhyve/riscv/bhyverun_machdep.c
@@ -101,6 +101,7 @@ bhyve_usage(int code)
            "       -D: destroy on power-off\n"
            "       -h: help\n"
            "       -k: key=value flat config file\n"
+           "       -M: monitor mode\n"
            "       -m: memory size\n"
            "       -o: set config 'var' to 'value'\n"
            "       -p: pin 'vcpu' to 'hostcpu'\n"
@@ -119,7 +120,7 @@ bhyve_optparse(int argc, char **argv)
        const char *optstr;
        int c;
 
-       optstr = "hCDSWk:f:o:p:c:s:m:U:";
+       optstr = "hCDMSWk:f:o:p:c:s:m:U:";
        while ((c = getopt(argc, argv, optstr)) != -1) {
                switch (c) {
                case 'c':
@@ -137,6 +138,9 @@ bhyve_optparse(int argc, char **argv)
                case 'k':
                        bhyve_parse_simple_config_file(optarg);
                        break;
+               case 'M':
+                       set_config_bool("monitor", true);
+                       break;
                case 'm':
                        set_config_value("memory.size", optarg);
                        break;

Reply via email to