On Wed, 17 Dec 2025 14:08:55 +0000
Bojan Novković <[email protected]> wrote:

> The branch main has been updated by bnovkov:
> 
> URL:
> https://cgit.FreeBSD.org/src/commit/?id=1092ec8b337595ed8d52accf41c6904d75b3689d
> 
> commit 1092ec8b337595ed8d52accf41c6904d75b3689d
> Author:     Bojan Novković <[email protected]>
> AuthorDate: 2025-11-07 13:11:03 +0000
> Commit:     Bojan Novković <[email protected]>
> CommitDate: 2025-12-17 14:08:31 +0000
> 
>     kern: Introduce RLIMIT_VMM
>     
>     This change introduces a new per-UID limit for controlling the
>     number of vmm instances, in anticipation of unprivileged bhyve.
>     This allows ut to limit the amount of kernel memory allocated
>     by the vmm driver and prevent potential memory exhaustion attacks.
>     
>     Differential Revision:  https://reviews.freebsd.org/D53728
>     Reviewed by:    markj, olce, corvink
>     MFC after:      3 months
>     Sponsored by:   The FreeBSD Foundation
>     Sponsored by:   Klara, Inc.
> ---
>  sys/dev/vmm/vmm_dev.c              | 18 +++++++++++++++---
>  sys/kern/kern_resource.c           | 13 +++++++++++++
>  sys/sys/resource.h                 |  4 +++-
>  sys/sys/resourcevar.h              |  2 ++
>  usr.bin/procstat/procstat_rlimit.c |  1 +
>  5 files changed, 34 insertions(+), 4 deletions(-)
> 
> diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
> index d6543bf6534e..3a86a8f966ef 100644
> --- a/sys/dev/vmm/vmm_dev.c
> +++ b/sys/dev/vmm/vmm_dev.c
> @@ -18,6 +18,7 @@
>  #include <sys/priv.h>
>  #include <sys/proc.h>
>  #include <sys/queue.h>
> +#include <sys/resourcevar.h>
>  #include <sys/smp.h>
>  #include <sys/sx.h>
>  #include <sys/sysctl.h>
> @@ -96,6 +97,10 @@ u_int vm_maxcpu;
>  SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
>      &vm_maxcpu, 0, "Maximum number of vCPUs");
>  
> +u_int vm_maxvmms;
> +SYSCTL_UINT(_hw_vmm, OID_AUTO, maxvmms, CTLFLAG_RWTUN,
> +    &vm_maxvmms, 0, "Maximum number of VMM instances per user");
> +
>  static void devmem_destroy(void *arg);
>  static int devmem_create_cdev(struct vmmdev_softc *sc, int id, char *devmem);
>  
> @@ -870,6 +875,7 @@ vmmdev_destroy(struct vmmdev_softc *sc)
>       int error __diagused;
>  
>       KASSERT(sc->cdev == NULL, ("%s: cdev not free", __func__));
> +     KASSERT(sc->ucred != NULL, ("%s: missing ucred", __func__));
>  
>       /*
>        * Destroy all cdevs:
> @@ -898,8 +904,8 @@ vmmdev_destroy(struct vmmdev_softc *sc)
>       if (sc->vm != NULL)
>               vm_destroy(sc->vm);
>  
> -     if (sc->ucred != NULL)
> -             crfree(sc->ucred);
> +     chgvmmcnt(sc->ucred->cr_ruidinfo, -1, 0);
> +     crfree(sc->ucred);
>  
>       sx_xlock(&vmmdev_mtx);
>       SLIST_REMOVE(&head, sc, vmmdev_softc, link);
> @@ -1021,6 +1027,12 @@ vmmdev_create(const char *name, struct ucred *cred)
>               vmmdev_destroy(sc);
>               return (error);
>       }
> +     if (!chgvmmcnt(cred->cr_ruidinfo, 1, vm_maxvmms)) {
> +             sx_xunlock(&vmmdev_mtx);
> +             destroy_dev(cdev);
> +             vmmdev_destroy(sc);
> +             return (ENOMEM);
> +     }
>       sc->cdev = cdev;
>       sx_xunlock(&vmmdev_mtx);
>       return (0);
> @@ -1172,7 +1184,7 @@ vmm_handler(module_t mod, int what, void *arg)
>               }
>               if (vm_maxcpu == 0)
>                       vm_maxcpu = 1;
> -
> +             vm_maxvmms = 4 * mp_ncpus;
>               error = vmm_modinit();
>               if (error == 0)
>                       vmm_initialized = true;
> diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
> index dcd38c6e6fbe..31f89bd41f6d 100644
> --- a/sys/kern/kern_resource.c
> +++ b/sys/kern/kern_resource.c
> @@ -895,6 +895,9 @@ getrlimitusage_one(struct proc *p, u_int which, int
> flags, rlim_t *res) case RLIMIT_PIPEBUF:
>               *res = ui->ui_pipecnt;
>               break;
> +     case RLIMIT_VMM:
> +             *res = ui->ui_vmmcnt;
> +             break;
>       default:
>               error = EINVAL;
>               break;
> @@ -1643,6 +1646,9 @@ uifree(struct uidinfo *uip)
>       if (uip->ui_inotifywatchcnt != 0)
>               printf("freeing uidinfo: uid = %d, inotifywatchcnt = %ld\n",
>                   uip->ui_uid, uip->ui_inotifywatchcnt);
> +     if (uip->ui_vmmcnt != 0)
> +             printf("freeing vmmcnt: uid = %d, vmmcnt = %ld\n",
> +                 uip->ui_uid, uip->ui_vmmcnt);
>       free(uip, M_UIDINFO);
>  }
>  
> @@ -1763,6 +1769,13 @@ chginotifywatchcnt(struct uidinfo *uip, int diff,
> rlim_t max) "inotifywatchcnt"));
>  }
>  
> +int
> +chgvmmcnt(struct uidinfo *uip, int diff, rlim_t max)
> +{
> +
> +     return (chglimit(uip, &uip->ui_vmmcnt, diff, max, "vmmcnt"));
> +}
> +
>  static int
>  sysctl_kern_proc_rlimit_usage(SYSCTL_HANDLER_ARGS)
>  {
> diff --git a/sys/sys/resource.h b/sys/sys/resource.h
> index 2725aa1ef646..9e0635cdb328 100644
> --- a/sys/sys/resource.h
> +++ b/sys/sys/resource.h
> @@ -115,8 +115,9 @@ struct __wrusage {
>  #define      RLIMIT_KQUEUES  13              /* kqueues allocated
> */ #define    RLIMIT_UMTXP    14              /* process-shared
> umtx */ #define       RLIMIT_PIPEBUF  15              /* pipes/fifos
> buffers */ +#define   RLIMIT_VMM      16              /* virtual
> machines */ 
> -#define      RLIM_NLIMITS    16              /* number of resource
> limits */ +#define    RLIM_NLIMITS    17              /* number of
> resource limits */ 
>  #define      RLIM_INFINITY   ((rlim_t)(((__uint64_t)1 << 63) - 1))
>  #define      RLIM_SAVED_MAX  RLIM_INFINITY
> @@ -144,6 +145,7 @@ static const char *rlimit_ident[] = {
>       "kqueues",
>       "umtx",
>       "pipebuf",
> +     "vmm",
>  };
>  #endif
>  
> diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
> index 61411890c85b..d5c4561eec66 100644
> --- a/sys/sys/resourcevar.h
> +++ b/sys/sys/resourcevar.h
> @@ -124,6 +124,7 @@ struct uidinfo {
>       long    ui_pipecnt;             /* (b) consumption of pipe
> buffers */ long       ui_inotifycnt;          /* (b) number of inotify
> descriptors */ long   ui_inotifywatchcnt;     /* (b) number of
> inotify watches */
> +     long    ui_vmmcnt;              /* (b) number of vmm instances
> */ uid_t      ui_uid;                 /* (a) uid */
>       u_int   ui_ref;                 /* (b) reference count */
>  #ifdef       RACCT
> @@ -148,6 +149,7 @@ int        chgumtxcnt(struct uidinfo *uip, int diff,
> rlim_t maxval); int    chgpipecnt(struct uidinfo *uip, int diff, rlim_t
> max); int      chginotifycnt(struct uidinfo *uip, int diff, rlim_t maxval);
>  int   chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t maxval);
> +int   chgvmmcnt(struct uidinfo *uip, int diff, rlim_t max);
>  int   kern_proc_setrlimit(struct thread *td, struct proc *p, u_int
> which, struct rlimit *limp);
>  struct plimit
> diff --git a/usr.bin/procstat/procstat_rlimit.c
> b/usr.bin/procstat/procstat_rlimit.c index c34550295f05..f3132758e005 100644
> --- a/usr.bin/procstat/procstat_rlimit.c
> +++ b/usr.bin/procstat/procstat_rlimit.c
> @@ -64,6 +64,7 @@ static struct {
>       {"kqueues",          "   "},
>       {"umtxp",            "   "},
>       {"pipebuf",          "B  "},
> +     {"virtual-machines", "   "},
>  };
>  
>  _Static_assert(nitems(rlimit_param) == RLIM_NLIMITS,
> 

After this commit - probably - my kernel pollutes the console with lots of 

pid XXXXX (limits), jid 0, uid 0: exited on signal 11 (core dumped)

Especially bind920 (named) is failing immediately, surprisingly the mouse isn't
working anymore and so on ... ssgd is also dying on startup - no access
possible.

Environment: customized kernel (especially commenting out unused driver and
ZFS/IPFW in-kernel).

Kind regards,

oh

 

Reply via email to