On Wed, 17 Dec 2025 14:08:55 +0000 Bojan Novković <[email protected]> wrote:
> The branch main has been updated by bnovkov: > > URL: > https://cgit.FreeBSD.org/src/commit/?id=1092ec8b337595ed8d52accf41c6904d75b3689d > > commit 1092ec8b337595ed8d52accf41c6904d75b3689d > Author: Bojan Novković <[email protected]> > AuthorDate: 2025-11-07 13:11:03 +0000 > Commit: Bojan Novković <[email protected]> > CommitDate: 2025-12-17 14:08:31 +0000 > > kern: Introduce RLIMIT_VMM > > This change introduces a new per-UID limit for controlling the > number of vmm instances, in anticipation of unprivileged bhyve. > This allows ut to limit the amount of kernel memory allocated > by the vmm driver and prevent potential memory exhaustion attacks. > > Differential Revision: https://reviews.freebsd.org/D53728 > Reviewed by: markj, olce, corvink > MFC after: 3 months > Sponsored by: The FreeBSD Foundation > Sponsored by: Klara, Inc. > --- > sys/dev/vmm/vmm_dev.c | 18 +++++++++++++++--- > sys/kern/kern_resource.c | 13 +++++++++++++ > sys/sys/resource.h | 4 +++- > sys/sys/resourcevar.h | 2 ++ > usr.bin/procstat/procstat_rlimit.c | 1 + > 5 files changed, 34 insertions(+), 4 deletions(-) > > diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c > index d6543bf6534e..3a86a8f966ef 100644 > --- a/sys/dev/vmm/vmm_dev.c > +++ b/sys/dev/vmm/vmm_dev.c > @@ -18,6 +18,7 @@ > #include <sys/priv.h> > #include <sys/proc.h> > #include <sys/queue.h> > +#include <sys/resourcevar.h> > #include <sys/smp.h> > #include <sys/sx.h> > #include <sys/sysctl.h> > @@ -96,6 +97,10 @@ u_int vm_maxcpu; > SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, > &vm_maxcpu, 0, "Maximum number of vCPUs"); > > +u_int vm_maxvmms; > +SYSCTL_UINT(_hw_vmm, OID_AUTO, maxvmms, CTLFLAG_RWTUN, > + &vm_maxvmms, 0, "Maximum number of VMM instances per user"); > + > static void devmem_destroy(void *arg); > static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem); > > @@ -870,6 +875,7 @@ vmmdev_destroy(struct vmmdev_softc *sc) > int error __diagused; > > KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__)); > + KASSERT(sc->ucred != NULL, ("%s: missing ucred", __func__)); > > /* > * Destroy all cdevs: > @@ -898,8 +904,8 @@ vmmdev_destroy(struct vmmdev_softc *sc) > if (sc->vm != NULL) > vm_destroy(sc->vm); > > - if (sc->ucred != NULL) > - crfree(sc->ucred); > + chgvmmcnt(sc->ucred->cr_ruidinfo, -1, 0); > + crfree(sc->ucred); > > sx_xlock(&vmmdev_mtx); > SLIST_REMOVE(&head, sc, vmmdev_softc, link); > @@ -1021,6 +1027,12 @@ vmmdev_create(const char *name, struct ucred *cred) > vmmdev_destroy(sc); > return (error); > } > + if (!chgvmmcnt(cred->cr_ruidinfo, 1, vm_maxvmms)) { > + sx_xunlock(&vmmdev_mtx); > + destroy_dev(cdev); > + vmmdev_destroy(sc); > + return (ENOMEM); > + } > sc->cdev = cdev; > sx_xunlock(&vmmdev_mtx); > return (0); > @@ -1172,7 +1184,7 @@ vmm_handler(module_t mod, int what, void *arg) > } > if (vm_maxcpu == 0) > vm_maxcpu = 1; > - > + vm_maxvmms = 4 * mp_ncpus; > error = vmm_modinit(); > if (error == 0) > vmm_initialized = true; > diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c > index dcd38c6e6fbe..31f89bd41f6d 100644 > --- a/sys/kern/kern_resource.c > +++ b/sys/kern/kern_resource.c > @@ -895,6 +895,9 @@ getrlimitusage_one(struct proc *p, u_int which, int > flags, rlim_t *res) case RLIMIT_PIPEBUF: > *res = ui->ui_pipecnt; > break; > + case RLIMIT_VMM: > + *res = ui->ui_vmmcnt; > + break; > default: > error = EINVAL; > break; > @@ -1643,6 +1646,9 @@ uifree(struct uidinfo *uip) > if (uip->ui_inotifywatchcnt != 0) > printf("freeing uidinfo: uid = %d, inotifywatchcnt = %ld\n", > uip->ui_uid, uip->ui_inotifywatchcnt); > + if (uip->ui_vmmcnt != 0) > + printf("freeing vmmcnt: uid = %d, vmmcnt = %ld\n", > + uip->ui_uid, uip->ui_vmmcnt); > free(uip, M_UIDINFO); > } > > @@ -1763,6 +1769,13 @@ chginotifywatchcnt(struct uidinfo *uip, int diff, > rlim_t max) "inotifywatchcnt")); > } > > +int > +chgvmmcnt(struct uidinfo *uip, int diff, rlim_t max) > +{ > + > + return (chglimit(uip, &uip->ui_vmmcnt, diff, max, "vmmcnt")); > +} > + > static int > sysctl_kern_proc_rlimit_usage(SYSCTL_HANDLER_ARGS) > { > diff --git a/sys/sys/resource.h b/sys/sys/resource.h > index 2725aa1ef646..9e0635cdb328 100644 > --- a/sys/sys/resource.h > +++ b/sys/sys/resource.h > @@ -115,8 +115,9 @@ struct __wrusage { > #define RLIMIT_KQUEUES 13 /* kqueues allocated > */ #define RLIMIT_UMTXP 14 /* process-shared > umtx */ #define RLIMIT_PIPEBUF 15 /* pipes/fifos > buffers */ +#define RLIMIT_VMM 16 /* virtual > machines */ > -#define RLIM_NLIMITS 16 /* number of resource > limits */ +#define RLIM_NLIMITS 17 /* number of > resource limits */ > #define RLIM_INFINITY ((rlim_t)(((__uint64_t)1 << 63) - 1)) > #define RLIM_SAVED_MAX RLIM_INFINITY > @@ -144,6 +145,7 @@ static const char *rlimit_ident[] = { > "kqueues", > "umtx", > "pipebuf", > + "vmm", > }; > #endif > > diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h > index 61411890c85b..d5c4561eec66 100644 > --- a/sys/sys/resourcevar.h > +++ b/sys/sys/resourcevar.h > @@ -124,6 +124,7 @@ struct uidinfo { > long ui_pipecnt; /* (b) consumption of pipe > buffers */ long ui_inotifycnt; /* (b) number of inotify > descriptors */ long ui_inotifywatchcnt; /* (b) number of > inotify watches */ > + long ui_vmmcnt; /* (b) number of vmm instances > */ uid_t ui_uid; /* (a) uid */ > u_int ui_ref; /* (b) reference count */ > #ifdef RACCT > @@ -148,6 +149,7 @@ int chgumtxcnt(struct uidinfo *uip, int diff, > rlim_t maxval); int chgpipecnt(struct uidinfo *uip, int diff, rlim_t > max); int chginotifycnt(struct uidinfo *uip, int diff, rlim_t maxval); > int chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t maxval); > +int chgvmmcnt(struct uidinfo *uip, int diff, rlim_t max); > int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int > which, struct rlimit *limp); > struct plimit > diff --git a/usr.bin/procstat/procstat_rlimit.c > b/usr.bin/procstat/procstat_rlimit.c index c34550295f05..f3132758e005 100644 > --- a/usr.bin/procstat/procstat_rlimit.c > +++ b/usr.bin/procstat/procstat_rlimit.c > @@ -64,6 +64,7 @@ static struct { > {"kqueues", " "}, > {"umtxp", " "}, > {"pipebuf", "B "}, > + {"virtual-machines", " "}, > }; > > _Static_assert(nitems(rlimit_param) == RLIM_NLIMITS, > After this commit - probably - my kernel pollutes the console with lots of pid XXXXX (limits), jid 0, uid 0: exited on signal 11 (core dumped) Especially bind920 (named) is failing immediately, surprisingly the mouse isn't working anymore and so on ... ssgd is also dying on startup - no access possible. Environment: customized kernel (especially commenting out unused driver and ZFS/IPFW in-kernel). Kind regards, oh
