On 3/2/26 14:14, Pavel Hrdina via Devel wrote:
> From: Pavel Hrdina <[email protected]>
> 
> When IOMMUFD support was introduced it incorrectly tried to lable
> `/dev/iommu` and `/dev/vfio/devices/vfioX` but they are not added to
> QEMU namespace because libvirt opens FDs and passes these FDs to QEMU.
> 
> We need to label these FDs instead.
> 
> Fixes: 7d2f91f9cb572ab95d0916bdd1a46dd198874529
> Signed-off-by: Pavel Hrdina <[email protected]>
> ---
>  src/qemu/qemu_hotplug.c          |  2 +-
>  src/qemu/qemu_process.c          | 16 ++++++++++++----
>  src/qemu/qemu_process.h          |  3 ++-
>  src/security/security_apparmor.c | 12 ------------
>  src/security/security_dac.c      | 27 ---------------------------
>  src/security/security_selinux.c  | 23 -----------------------
>  6 files changed, 15 insertions(+), 68 deletions(-)
> 
> diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
> index 40489b84db..b3f2a173a8 100644
> --- a/src/qemu/qemu_hotplug.c
> +++ b/src/qemu/qemu_hotplug.c
> @@ -1613,7 +1613,7 @@ qemuDomainAttachHostPCIDevice(virQEMUDriver *driver,
>      }
>  
>      if (virHostdevIsPCIDeviceWithIOMMUFD(hostdev)) {
> -        if (qemuProcessOpenVfioDeviceFd(hostdev) < 0)
> +        if (qemuProcessOpenVfioDeviceFd(vm, hostdev) < 0)
>              goto error;
>  
>          if (!priv->iommufdState) {
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index a82ee4b15e..ab7cf03c0e 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -7728,13 +7728,16 @@ int
>  qemuProcessOpenIommuFd(virDomainObj *vm)
>  {
>      qemuDomainObjPrivate *priv = vm->privateData;
> -    int iommufd;
> +    VIR_AUTOCLOSE iommufd = -1;
>  
>      VIR_DEBUG("Opening IOMMU FD for domain %s", vm->def->name);
>  
>      if ((iommufd = virIOMMUFDOpenDevice()) < 0)
>          return -1;
>  
> +    if (qemuSecuritySetImageFDLabel(priv->driver->securityManager, vm->def, 
> iommufd) < 0)
> +        return -1;
> +
>      priv->iommufd = qemuFDPassDirectNew("iommufd", &iommufd);
>  
>      return 0;
> @@ -7749,16 +7752,21 @@ qemuProcessOpenIommuFd(virDomainObj *vm)
>   * Returns: 0 on success, -1 on failure
>   */
>  int
> -qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev)
> +qemuProcessOpenVfioDeviceFd(virDomainObj *vm,
> +                            virDomainHostdevDef *hostdev)
>  {
> +    qemuDomainObjPrivate *priv = vm->privateData;
>      qemuDomainHostdevPrivate *hostdevPriv = 
> QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev);
>      virDomainHostdevSubsysPCI *pci = &hostdev->source.subsys.u.pci;
>      g_autofree char *name = g_strdup_printf("hostdev-%s-fd", 
> hostdev->info->alias);
> -    int vfioDeviceFd;
> +    VIR_AUTOCLOSE vfioDeviceFd = -1;
>  
>      if ((vfioDeviceFd = virPCIDeviceOpenVfioFd(&pci->addr)) < 0)
>          return -1;
>  
> +    if (qemuSecuritySetImageFDLabel(priv->driver->securityManager, vm->def, 
> vfioDeviceFd) < 0)
> +        return -1;
> +
>      hostdevPriv->vfioDeviceFd = qemuFDPassDirectNew(name, &vfioDeviceFd);
>  
>      return 0;
> @@ -7776,7 +7784,7 @@ qemuProcessPrepareHostHostdev(virDomainObj *vm)
>          case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI:
>              if (virHostdevIsPCIDeviceWithIOMMUFD(hostdev)) {
>                  /* Open VFIO device FD */
> -                if (qemuProcessOpenVfioDeviceFd(hostdev) < 0)
> +                if (qemuProcessOpenVfioDeviceFd(vm, hostdev) < 0)
>                      return -1;
>              }
>              break;
> diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h
> index fccd41e1a6..5874214596 100644
> --- a/src/qemu/qemu_process.h
> +++ b/src/qemu/qemu_process.h
> @@ -136,7 +136,8 @@ int 
> qemuProcessPrepareHostBackendChardevHotplug(virDomainObj *vm,
>  
>  int qemuProcessOpenIommuFd(virDomainObj *vm);
>  
> -int qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev);
> +int qemuProcessOpenVfioDeviceFd(virDomainObj *vm,
> +                                virDomainHostdevDef *hostdev);
>  
>  int qemuProcessPrepareHost(virQEMUDriver *driver,
>                             virDomainObj *vm,

ACK to these hunk above. BUT...

> diff --git a/src/security/security_apparmor.c 
> b/src/security/security_apparmor.c
> index 1c3496893c..40f13ec1a5 100644
> --- a/src/security/security_apparmor.c
> +++ b/src/security/security_apparmor.c
> @@ -45,7 +45,6 @@
>  #include "virstring.h"
>  #include "virscsi.h"
>  #include "virmdev.h"
> -#include "viriommufd.h"
>  
>  #define VIR_FROM_THIS VIR_FROM_SECURITY
>  
> @@ -856,17 +855,6 @@ AppArmorSetSecurityHostdevLabel(virSecurityManager *mgr,
>  
>                  if (AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr) < 0)
>                      return -1;
> -            } else {
> -                g_autofree char *vfiofdDev = NULL;
> -
> -                if (virPCIDeviceGetVfioPath(pci, &vfiofdDev) < 0)
> -                    return -1;
> -
> -                if (AppArmorSetSecurityPCILabel(pci, vfiofdDev, ptr) < 0)
> -                    return -1;
> -
> -                if (AppArmorSetSecurityPCILabel(pci, VIR_IOMMU_DEV_PATH, 
> ptr) < 0)
> -                    return -1;

I don't think removing these hunks (trimmed) is justified. If you'd take
a look at virHostdevIsPCIDeviceWithIOMMUFD() it looks like this:

    return virHostdevIsPCIDevice(hostdev) &&
        hostdev->source.subsys.u.pci.driver.name == 
VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO &&
        hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES;

Now, consider the following <hostdev/> (either as a part of domain XML or being 
hotplugged):

    <hostdev mode='subsystem' type='pci' managed='yes'>
      <source>
        <address domain='0x0000' bus='0x00' slot='0x14' function='0x3'/>
      </source>
      <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' 
function='0x0'/>
    </hostdev>

The ->source.subsys.u.pci.driver.name is autofilled to _VFIO, BUT
->source.subsys.u.pci.driver.iommufd is NOT expanded to _YES.

I do not run a machine with SELinux, but in my testing, I can imitate the 
problem by starting
a domain with +77:+77 DAC seclabel, and hotplugging aforementioned hostdev. I 
set a breakpoint
to qemuDomainNamespaceSetupHostdev() so that I can mangle perms of 
/dev/vfio/vfio INSIDE the
namespace (once the function finishes and nodes are created inside the 
namespace). Then I
observe virSecurityDACSetHostdevLabel() (well qemuSecuritySetHostdevLabel()) 
changing perms
only to /dev/vfio/8 (my hostdev is in IOMMU group #8) only to see QEMU fail:

error: internal error: unable to execute QEMU command 'device_add': vfio 
0000:00:14.3: failed to setup container for group 8: Could not open 
'/dev/vfio/vfio': Permission denied

Michal

Reply via email to