On Fri, Jan 16, 2026 at 05:39:33PM -0800, Nathan Chen via Devel wrote: > From: Nathan Chen <[email protected]> > > Implement the IOMMU_OPTION_RLIMIT_MODE > ioctl to set per-process memory accounting for > iommufd. This prevents ENOMEM errors from the > default per-user memory accounting when multiple > VMs under the libvirt-qemu user have their pinned > memory summed and checked against a per-process > RLIMIT_MEMLOCK limit. > > Signed-off-by: Nathan Chen <[email protected]> > --- > meson.build | 1 + > po/POTFILES | 1 + > src/libvirt_private.syms | 3 ++ > src/util/meson.build | 1 + > src/util/viriommufd.c | 111 +++++++++++++++++++++++++++++++++++++++ > src/util/viriommufd.h | 25 +++++++++ > 6 files changed, 142 insertions(+) > create mode 100644 src/util/viriommufd.c > create mode 100644 src/util/viriommufd.h > > diff --git a/meson.build b/meson.build > index 964d1fa4e1..a6db70f13e 100644 > --- a/meson.build > +++ b/meson.build > @@ -732,6 +732,7 @@ headers = [ > 'ifaddrs.h', > 'libtasn1.h', > 'linux/kvm.h', > + 'linux/iommufd.h', > 'mntent.h', > 'net/ethernet.h', > 'net/if.h', > diff --git a/po/POTFILES b/po/POTFILES > index f0aad35c8c..c78d2b8000 100644 > --- a/po/POTFILES > +++ b/po/POTFILES > @@ -303,6 +303,7 @@ src/util/virhostuptime.c > src/util/viridentity.c > src/util/virinhibitor.c > src/util/virinitctl.c > +src/util/viriommufd.c > src/util/viriscsi.c > src/util/virjson.c > src/util/virlease.c > diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms > index 6bffd2eb6d..7fa76a1ec3 100644 > --- a/src/libvirt_private.syms > +++ b/src/libvirt_private.syms > @@ -2646,6 +2646,9 @@ virInhibitorRelease; > virInitctlFifos; > virInitctlSetRunLevel; > > +# util/viriommufd.h > +virIOMMUFDSetRLimitMode; > + > # util/viriscsi.h > virISCSIConnectionLogin; > virISCSIConnectionLogout; > diff --git a/src/util/meson.build b/src/util/meson.build > index 4950a795cc..9fb0aa0fe7 100644 > --- a/src/util/meson.build > +++ b/src/util/meson.build > @@ -46,6 +46,7 @@ util_sources = [ > 'viridentity.c', > 'virinhibitor.c', > 'virinitctl.c', > + 'viriommufd.c', > 'viriscsi.c', > 'virjson.c', > 'virkeycode.c', > diff --git a/src/util/viriommufd.c b/src/util/viriommufd.c > new file mode 100644 > index 0000000000..225c76f4b2 > --- /dev/null > +++ b/src/util/viriommufd.c > @@ -0,0 +1,111 @@ > +#include <config.h> > + > +#include "viriommufd.h" > +#include "virlog.h" > +#include "virerror.h" > +#include "virfile.h" > + > +#ifdef __linux__ > + > +# include <sys/ioctl.h> > +# include <linux/types.h> > + > +# ifdef HAVE_LINUX_IOMMUFD_H > +# include <linux/iommufd.h> > +# endif > + > +# define VIR_FROM_THIS VIR_FROM_NONE > + > +VIR_LOG_INIT("util.iommufd"); > + > +# ifndef IOMMU_OPTION > + > +enum iommufd_option { > + IOMMU_OPTION_RLIMIT_MODE = 0, > + IOMMU_OPTION_HUGE_PAGES = 1, > +}; > + > +enum iommufd_option_ops { > + IOMMU_OPTION_OP_SET = 0, > + IOMMU_OPTION_OP_GET = 1, > +}; > + > +struct iommu_option { > + __u32 size; > + __u32 option_id; > + __u16 op; > + __u16 __reserved; > + __u32 object_id; > + __aligned_u64 val64; > +}; > + > +# define IOMMUFD_TYPE (';') > +# define IOMMUFD_CMD_OPTION 0x87 > +# define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION) > + > +# endif > + > +/** > + * virIOMMUFDSetRLimitMode: > + * @fd: iommufd file descriptor > + * @processAccounting: true for per-process, false for per-user > + * > + * Set RLIMIT_MEMLOCK accounting mode for the iommufd. > + * > + * Returns: 0 on success, -1 on error > + */ > +int > +virIOMMUFDSetRLimitMode(int fd, bool processAccounting) > +{ > + struct iommu_option option = { > + .size = sizeof(struct iommu_option), > + .option_id = IOMMU_OPTION_RLIMIT_MODE, > + .op = IOMMU_OPTION_OP_SET, > + .__reserved = 0, > + .object_id = 0, > + .val64 = processAccounting ? 1 : 0, > + }; > + > + if (ioctl(fd, IOMMU_OPTION, &option) < 0) { > + switch (errno) { > + case ENOTTY: > + VIR_WARN("IOMMU_OPTION ioctl not supported"); > + return 0; > + > + case EOPNOTSUPP: > + VIR_WARN("IOMMU_OPTION_RLIMIT_MODE not supported by kernel"); > + return 0; > + > + case EINVAL: > + virReportSystemError(errno, "%s", > + _("invalid iommufd option parameters")); > + return -1; > + > + case EPERM: > + VIR_WARN("Permission denied for IOMMU_OPTION ioctl. " > + "Per-user-based memory accounting to be used by > default."); > + return 0; > + > + default: > + virReportSystemError(errno, "%s", > + _("failed to set iommufd option")); > + return -1; > + } > + }
In my previous testing this part of code was not used so no rlimit was
configured for the grace hopper GPU that was assigned to a VM.
The VM OS was able to see the GPU and I was able to run cuda-samples
with most of them passing. This setup didn't use vCMDQ or EGM. When I
tried patches that add support for vCMDQ I was no longer able to use the
GPU inside the VM until this code was called or setting
"setcap cap_ipc_lock=ep" on the qemu binary but it was still detected
inside the VM and the VM was started successfully.
So is this required for all devices that want to use iommufd in order
for them to work correctly inside the VM? Or is it necessary only when
specific features are used?
I wonder if we should allow to start a VM if we know the device will not
actually work correctly.
Basically if IOMMU_OPTION ioctl, IOMMU_OPTION_RLIMIT_MODE are not
supported or we get permission denied we return 0 and we let the VM
start.
Pavel
> +
> + VIR_DEBUG("Set iommufd rlimit mode to %s-based accounting",
> + processAccounting ? "process" : "user");
> + return 0;
> +}
> +
> +#else
> +
> +int virIOMMUFDSetRLimitMode(int fd G_GNUC_UNUSED,
> + bool processAccounting G_GNUC_UNUSED)
> +{
> + virReportError(VIR_ERR_NO_SUPPORT, "%s",
> + _("IOMMUFD is not supported on this platform"));
> + return -1;
> +}
> +
> +#endif
> diff --git a/src/util/viriommufd.h b/src/util/viriommufd.h
> new file mode 100644
> index 0000000000..ebecfe3633
> --- /dev/null
> +++ b/src/util/viriommufd.h
> @@ -0,0 +1,25 @@
> +/*
> + * viriommufd.h: iommufd helpers
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> + * <http://www.gnu.org/licenses/>.
> + */
> +
> +#pragma once
> +
> +#include "internal.h"
> +
> +#define VIR_IOMMU_DEV_PATH "/dev/iommu"
> +
> +int virIOMMUFDSetRLimitMode(int fd, bool processAccounting);
> --
> 2.43.0
>
signature.asc
Description: PGP signature
